#!/usr/bin/env python # coding: utf-8 # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import matplotlib.pyplot as plt # In[3]: get_ipython().system('head -n 35 GSE5859_series_matrix.txt') # In[4]: df = pd.read_table('GSE5859_series_matrix.txt', skiprows=34, low_memory=False) df # In[5]: matrix = df.iloc[40:] new_columns = [a for a in matrix.columns] # In[6]: new_columns[0] = 'Probe' # In[7]: matrix.columns = new_columns # In[8]: matrix.head() # In[9]: matrix.reset_index().drop('index', 1) # In[40]: ethnicity = pd.read_csv('ethnicity.csv') # In[43]: ethnicity['sample_id'] = ethnicity['filename'].apply(lambda x: x[:8]) ethnicity # In[ ]: