In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
In [3]:
!head -n 35 GSE5859_series_matrix.txt 
!Series_title	"Allelic Differences Account for Gene Expression Differences Among Population."
!Series_geo_accession	"GSE5859"
!Series_status	"Public on Jan 07 2007"
!Series_submission_date	"Sep 18 2006"
!Series_last_update_date	"Jul 08 2016"
!Series_pubmed_id	"17206142"
!Series_summary	"Expression level of genes in lymphoblasts from individuals in three HapMap populations (CEU, CHB, JPT) were compared.  More than 1,000 genes were found to be significantly different (Pc<0.05) in mean expression level between the CEU and CHB+JPT samples."
!Series_summary	"Keywords: Comparison of Gene Expression Profiles from Lymphoblastoid cells"
!Series_overall_design	"Gene expression analysis using Affymetrix Human Focus arrays; comparison of expression levels of genes by t-test."
!Series_type	"Expression profiling by array"
!Series_contributor	"Richard,S,Spielman"
!Series_contributor	"Laurel,A,Bastone"
!Series_contributor	"Joshua,B,Burdick"
!Series_contributor	"Michael,P,Morley"
!Series_contributor	"Warren,J,Ewens"
!Series_contributor	"Vivian,G,Cheung"
!Series_sample_id	"GSM25349 GSM25350 GSM25356 GSM25357 GSM25358 GSM25359 GSM25360 GSM25361 GSM25377 GSM25378 GSM25385 GSM25386 GSM25399 GSM25400 GSM25401 GSM25402 GSM25409 GSM25410 GSM25426 GSM25427 GSM25479 GSM25480 GSM25481 GSM25482 GSM25524 GSM25525 GSM25526 GSM25527 GSM25528 GSM25529 GSM25530 GSM25531 GSM25540 GSM25541 GSM25542 GSM25543 GSM25548 GSM25549 GSM25550 GSM25551 GSM25552 GSM25553 GSM25561 GSM25562 GSM25563 GSM25564 GSM25565 GSM25566 GSM25568 GSM25569 GSM25570 GSM25571 GSM25578 GSM25579 GSM25580 GSM25581 GSM25624 GSM25625 GSM25626 GSM25627 GSM25628 GSM25629 GSM25630 GSM25631 GSM25632 GSM25633 GSM25634 GSM25635 GSM25656 GSM25657 GSM25658 GSM25659 GSM25660 GSM25661 GSM25662 GSM25663 GSM25680 GSM25681 GSM25682 GSM25683 GSM25684 GSM25685 GSM25686 GSM25687 GSM48650 GSM48651 GSM48652 GSM48653 GSM48654 GSM48655 GSM48656 GSM48657 GSM48658 GSM48660 GSM48661 GSM48662 GSM48663 GSM48664 GSM48665 GSM136441 GSM136442 GSM136443 GSM136444 GSM136445 GSM136506 GSM136507 GSM136508 GSM136509 GSM136510 GSM136514 GSM136515 GSM136516 GSM136517 GSM136518 GSM136520 GSM136521 GSM136522 GSM136523 GSM136524 GSM136528 GSM136529 GSM136530 GSM136531 GSM136532 GSM136534 GSM136535 GSM136536 GSM136537 GSM136538 GSM136539 GSM136540 GSM136541 GSM136542 GSM136543 GSM136544 GSM136545 GSM136546 GSM136547 GSM136548 GSM136549 GSM136550 GSM136551 GSM136552 GSM136553 GSM136555 GSM136556 GSM136557 GSM136558 GSM136559 GSM136563 GSM136564 GSM136565 GSM136566 GSM136567 GSM136568 GSM136569 GSM136570 GSM136571 GSM136572 GSM136573 GSM136574 GSM136575 GSM136576 GSM136577 GSM136658 GSM136659 GSM136660 GSM136661 GSM136662 GSM136663 GSM136664 GSM136665 GSM136666 GSM136667 GSM136668 GSM136669 GSM136670 GSM136671 GSM136672 GSM136673 GSM136674 GSM136675 GSM136676 GSM136677 GSM136705 GSM136706 GSM136707 GSM136708 GSM136709 GSM136710 GSM136711 GSM136712 GSM136713 GSM136714 GSM136715 GSM136716 GSM136717 GSM136718 GSM136719 GSM136720 GSM136721 GSM136722 GSM136723 GSM136724 GSM136725 GSM136726 GSM136727 GSM136729 "
!Series_contact_name	"Michael,Patrick,Morley"
!Series_contact_email	"[email protected]"
!Series_contact_phone	"215-898-2026"
!Series_contact_department	"Penn Cardiovascular Institute"
!Series_contact_institute	"Perelman School of Medicine at the University of Pennsylvania"
!Series_contact_address	"3400 Civic Center Blvd, Bldg 421"
!Series_contact_city	"Philadelphia"
!Series_contact_state	"PA"
!Series_contact_zip/postal_code	"19104"
!Series_contact_country	"USA"
!Series_supplementary_file	"ftp://ftp.ncbi.nlm.nih.gov/pub/geo/DATA/supplementary/series/GSE5859/GSE5859_RAW.tar"
!Series_platform_id	"GPL201"
!Series_platform_taxid	"9606"
!Series_sample_taxid	"9606"
!Series_relation	"BioProject: http://www.ncbi.nlm.nih.gov/bioproject/PRJNA97265"

!Sample_title	"GM06985_rep1"	"GM06985_rep2"	"GM06993_rep1"	"GM06993_rep2"	"GM06994_rep1"	"GM06994_rep2"	"GM07000_rep1"	"GM07000_rep2"	"GM07022_rep1"	"GM07022_rep2"	"GM07034_rep1"	"GM07034_rep2"	"GM07055_rep1"	"GM07055_rep2"	"GM07056_rep1"	"GM07056_rep2"	"GM07345_rep1"	"GM07345_rep2"	"GM07357_rep1"	"GM07357_rep2"	"GM11881_rep1"	"GM11881_rep2"	"GM11882_rep1"	"GM11882_rep2"	"GM11992_rep1"	"GM11992_rep2"	"GM11993_rep1"	"GM11993_rep2"	"GM11994_rep1"	"GM11994_rep2"	"GM11995_rep1"	"GM11995_rep2"	"GM12043_rep1"	"GM12043_rep2"	"GM12044_rep1"	"GM12044_rep2"	"GM12144_rep1"	"GM12144_rep2"	"GM12145_rep1"	"GM12145_rep2"	"GM12146_rep1"	"GM12146_rep2"	"GM12154_rep1"	"GM12154_rep2"	"GM12155_rep1"	"GM12155_rep2"	"GM12156_rep1"	"GM12156_rep2"	"GM12236_rep1"	"GM12236_rep2"	"GM12239_rep1"	"GM12239_rep2"	"GM12248_rep1"	"GM12248_rep2"	"GM12249_rep1"	"GM12249_rep2"	"GM12750_rep1"	"GM12750_rep2"	"GM12751_rep1"	"GM12751_rep2"	"GM12760_rep1"	"GM12760_rep2"	"GM12761_rep1"	"GM12761_rep2"	"GM12762_rep1"	"GM12762_rep2"	"GM12763_rep1"	"GM12763_rep2"	"GM12812_rep1"	"GM12812_rep2"	"GM12813_rep1"	"GM12813_rep2"	"GM12814_rep1"	"GM12814_rep2"	"GM12815_rep1"	"GM12815_rep2"	"GM12872_rep1"	"GM12872_rep2"	"GM12873_rep1"	"GM12873_rep2"	"GM12874_rep1"	"GM12874_rep2"	"GM12875_rep1"	"GM12875_rep2"	"GM11829_rep1"	"GM11830_rep1"	"GM11831_rep1"	"GM11832_rep1"	"GM11839_rep1"	"GM12003_rep1"	"GM12004_rep1"	"GM12005_rep1"	"GM12006_rep1"	"GM12057_rep1"	"GM12234_rep1"	"GM12716_rep1"	"GM12717_rep1"	"GM12891_rep1"	"GM12892_rep1"	"GM18956"	"GM18942"	"GM18944"	"GM18945"	"GM18949"	"GM18952"	"GM18943"	"GM18947"	"GM18948"	"GM18953"	"GM18961"	"GM18970"	"GM18540"	"GM18542"	"GM18550"	"GM18552"	"GM18555"	"GM18959"	"GM18960"	"GM18965"	"GM18966"	"GM18967"	"GM18968"	"GM18971"	"GM18972"	"GM18973"	"GM18974"	"GM18975"	"GM18976"	"GM18978"	"GM18980"	"GM18969"	"GM18987"	"GM18990"	"GM18991"	"GM18992"	"GM18995"	"GM19005"	"GM18524"	"GM18529"	"GM18532"	"GM18547"	"GM18561"	"GM18994"	"GM18526"	"GM18537"	"GM18593"	"GM18603"	"GM18605"	"GM18545"	"GM18558"	"GM18563"	"GM18566"	"GM18577"	"GM18582"	"GM18592"	"GM18999"	"GM19000"	"GM19003"	"GM19007"	"GM18573"	"GM18579"	"GM18576"	"GM18964"	"GM18981"	"GM18622"	"GM18594"	"GM18611"	"GM18564"	"GM18612"	"GM18621"	"GM18623"	"GM18624"	"GM18632"	"GM18635"	"GM18636"	"GM18637"	"GM18620"	"GM18562"	"GM18570"	"GM18608"	"GM17733"	"GM17734"	"GM17735"	"GM17736"	"GM17737"	"GM17738"	"GM17739"	"GM17740"	"GM17742"	"GM17741"	"GM17743"	"GM17744"	"GM17745"	"GM17746"	"GM17747"	"GM17749"	"GM17752"	"GM17753"	"GM17754"	"GM17755"	"GM17756"	"GM17757"	"GM17759"	"GM17761"	"GM11840"	"GM12056"	"GM12264"	"GM18940"
!Sample_geo_accession	"GSM25349"	"GSM25350"	"GSM25356"	"GSM25357"	"GSM25358"	"GSM25359"	"GSM25360"	"GSM25361"	"GSM25377"	"GSM25378"	"GSM25385"	"GSM25386"	"GSM25399"	"GSM25400"	"GSM25401"	"GSM25402"	"GSM25409"	"GSM25410"	"GSM25426"	"GSM25427"	"GSM25479"	"GSM25480"	"GSM25481"	"GSM25482"	"GSM25524"	"GSM25525"	"GSM25526"	"GSM25527"	"GSM25528"	"GSM25529"	"GSM25530"	"GSM25531"	"GSM25540"	"GSM25541"	"GSM25542"	"GSM25543"	"GSM25548"	"GSM25549"	"GSM25550"	"GSM25551"	"GSM25552"	"GSM25553"	"GSM25561"	"GSM25562"	"GSM25563"	"GSM25564"	"GSM25565"	"GSM25566"	"GSM25568"	"GSM25569"	"GSM25570"	"GSM25571"	"GSM25578"	"GSM25579"	"GSM25580"	"GSM25581"	"GSM25624"	"GSM25625"	"GSM25626"	"GSM25627"	"GSM25628"	"GSM25629"	"GSM25630"	"GSM25631"	"GSM25632"	"GSM25633"	"GSM25634"	"GSM25635"	"GSM25656"	"GSM25657"	"GSM25658"	"GSM25659"	"GSM25660"	"GSM25661"	"GSM25662"	"GSM25663"	"GSM25680"	"GSM25681"	"GSM25682"	"GSM25683"	"GSM25684"	"GSM25685"	"GSM25686"	"GSM25687"	"GSM48650"	"GSM48651"	"GSM48652"	"GSM48653"	"GSM48654"	"GSM48655"	"GSM48656"	"GSM48657"	"GSM48658"	"GSM48660"	"GSM48661"	"GSM48662"	"GSM48663"	"GSM48664"	"GSM48665"	"GSM136441"	"GSM136442"	"GSM136443"	"GSM136444"	"GSM136445"	"GSM136506"	"GSM136507"	"GSM136508"	"GSM136509"	"GSM136510"	"GSM136514"	"GSM136515"	"GSM136516"	"GSM136517"	"GSM136518"	"GSM136520"	"GSM136521"	"GSM136522"	"GSM136523"	"GSM136524"	"GSM136528"	"GSM136529"	"GSM136530"	"GSM136531"	"GSM136532"	"GSM136534"	"GSM136535"	"GSM136536"	"GSM136537"	"GSM136538"	"GSM136539"	"GSM136540"	"GSM136541"	"GSM136542"	"GSM136543"	"GSM136544"	"GSM136545"	"GSM136546"	"GSM136547"	"GSM136548"	"GSM136549"	"GSM136550"	"GSM136551"	"GSM136552"	"GSM136553"	"GSM136555"	"GSM136556"	"GSM136557"	"GSM136558"	"GSM136559"	"GSM136563"	"GSM136564"	"GSM136565"	"GSM136566"	"GSM136567"	"GSM136568"	"GSM136569"	"GSM136570"	"GSM136571"	"GSM136572"	"GSM136573"	"GSM136574"	"GSM136575"	"GSM136576"	"GSM136577"	"GSM136658"	"GSM136659"	"GSM136660"	"GSM136661"	"GSM136662"	"GSM136663"	"GSM136664"	"GSM136665"	"GSM136666"	"GSM136667"	"GSM136668"	"GSM136669"	"GSM136670"	"GSM136671"	"GSM136672"	"GSM136673"	"GSM136674"	"GSM136675"	"GSM136676"	"GSM136677"	"GSM136705"	"GSM136706"	"GSM136707"	"GSM136708"	"GSM136709"	"GSM136710"	"GSM136711"	"GSM136712"	"GSM136713"	"GSM136714"	"GSM136715"	"GSM136716"	"GSM136717"	"GSM136718"	"GSM136719"	"GSM136720"	"GSM136721"	"GSM136722"	"GSM136723"	"GSM136724"	"GSM136725"	"GSM136726"	"GSM136727"	"GSM136729"
In [4]:
df = pd.read_table('GSE5859_series_matrix.txt', skiprows=34, low_memory=False)
df
Out[4]:
!Sample_geo_accession GSM25349 GSM25350 GSM25356 GSM25357 GSM25358 GSM25359 GSM25360 GSM25361 GSM25377 ... GSM136719 GSM136720 GSM136721 GSM136722 GSM136723 GSM136724 GSM136725 GSM136726 GSM136727 GSM136729
0 !Sample_status Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 Public on Aug 12 2004 ... Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007 Public on Jan 07 2007
1 !Sample_submission_date Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 Jun 17 2004 ... Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006 Sep 18 2006
2 !Sample_last_update_date Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 Nov 30 2005 ... Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007 Jan 04 2007
3 !Sample_type RNA RNA RNA RNA RNA RNA RNA RNA RNA ... RNA RNA RNA RNA RNA RNA RNA RNA RNA RNA
4 !Sample_channel_count 1 1 1 1 1 1 1 1 1 ... 1 1 1 1 1 1 1 1 1 1
5 !Sample_source_name_ch1 Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line Lymphoblastoid cell line ... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an... lymphoblastoid cell lines from the CEU, CHB an...
6 !Sample_organism_ch1 Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens ... Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens Homo sapiens
7 !Sample_characteristics_ch1 Coriell cell line repository identifier: GM06985 Coriell cell line repository identifier: GM06985 Coriell cell line repository identifier: GM06993 Coriell cell line repository identifier: GM06993 Coriell cell line repository identifier: GM06994 Coriell cell line repository identifier: GM06994 Coriell cell line repository identifier: GM07000 Coriell cell line repository identifier: GM07000 Coriell cell line repository identifier: GM07022 ... lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line lymphoblastoid cell line
8 !Sample_characteristics_ch1 http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... http://locus.umdnj.edu/nigms/nigms_cgi/display... ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
9 !Sample_characteristics_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10 !Sample_characteristics_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 !Sample_characteristics_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
12 !Sample_characteristics_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
13 !Sample_biomaterial_provider_ch1 Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories Coriell Cell Repositories ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 !Sample_biomaterial_provider_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
15 !Sample_biomaterial_provider_ch1 Coriell Coriell Coriell Coriell Coriell Coriell Coriell Coriell Coriell ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
16 !Sample_molecule_ch1 total RNA total RNA total RNA total RNA total RNA total RNA total RNA total RNA total RNA ... total RNA total RNA total RNA total RNA total RNA total RNA total RNA total RNA total RNA total RNA
17 !Sample_label_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... biotin biotin biotin biotin biotin biotin biotin biotin biotin biotin
18 !Sample_label_protocol_ch1 NaN NaN NaN NaN NaN NaN NaN NaN NaN ... standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures
19 !Sample_taxid_ch1 9606 9606 9606 9606 9606 9606 9606 9606 9606 ... 9606 9606 9606 9606 9606 9606 9606 9606 9606 9606
20 !Sample_hyb_protocol NaN NaN NaN NaN NaN NaN NaN NaN NaN ... standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures
21 !Sample_scan_protocol NaN NaN NaN NaN NaN NaN NaN NaN NaN ... standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures standard Affymetrix procedures
22 !Sample_description Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... Sample description = RNA was extracted from ly... ... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c... The cells were grown at a density of 5 x 105 c...
23 !Sample_data_processing NaN NaN NaN NaN NaN NaN NaN NaN NaN ... Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0 Affymetrix Microarray Suite version 5.0
24 !Sample_platform_id GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 ... GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201 GPL201
25 !Sample_contact_name Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley ... Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley Michael,Patrick,Morley
26 !Sample_contact_email [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] ... [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected] [email protected]
27 !Sample_contact_phone 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 ... 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026 215-898-2026
28 !Sample_contact_department Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute ... Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute Penn Cardiovascular Institute
29 !Sample_contact_institute Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ... Perelman School of Medicine at the University ...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8804 AFFX-HUMISGF3A/M97935_MA_at 2119.2 1852.2 799.8 1011.7 600.1 588.7 676.9 1039.7 1109.1 ... 3289.38 1830.6 3108.71 3062.72 2792.42 2550.21 1969.34 2991.8 1639.34 1781.9
8805 AFFX-HUMISGF3A/M97935_MB_at 998.9 645.2 454.4 411.2 363.3 327.2 301.3 272.3 581.4 ... 1838.74 698.4 1665.11 1481.87 1465.12 1281.93 1191.78 2031.46 734.099 1099.17
8806 AFFX-HUMRGE/M10098_3_at 45.7 75.2 31 353.5 108.4 287.9 91.7 38.6 21.4 ... 76.8999 85.9002 38.3999 113.3 57.4 175.101 53.4001 13.4 16 77.9999
8807 AFFX-HUMRGE/M10098_5_at 11.4 109.8 77.6 99.2 75.4 134.5 8.9 9.8 28.1 ... 28.5 5.69999 11.1 23.4001 4.79999 73.6999 5.10001 3.2 25.5001 4.39999
8808 AFFX-HUMRGE/M10098_M_at 56.1 57.3 104.8 201.9 71.8 189 39.7 13 63 ... 30.4 51.5 32.9999 28.7 23.9 122.5 6.5 34.8999 5.60001 7.79998
8809 AFFX-M27830_3_at 42.1 183.5 41.3 51.9 71.1 24.6 26.1 34.6 50.8 ... 52.9002 50.2002 39.9001 103.4 47.8 120 38.9999 42.9001 31.0001 43.6001
8810 AFFX-M27830_5_at 203 132.5 223.7 214.5 175.2 384.5 237.7 200.5 137.5 ... 151.5 166.5 171 184 131.9 267.201 41.7001 57.8999 179.7 236.201
8811 AFFX-M27830_M_at 1332 1404.2 864.7 459.7 942.6 1549.3 1923.3 1180.4 461.4 ... 464.701 623.001 402.7 552.599 367.801 731.701 154.1 94.0998 414.699 577.001
8812 AFFX-r2-Bs-dap-3_at 15.3 2.4 30.6 5.9 3.5 7.4 9.7 3.3 60.5 ... 5.49999 1.7 1.5 8 2.80001 10.2 3.39999 1.5 1.1 1.6
8813 AFFX-r2-Bs-dap-5_at 3.5 7.1 3.9 3.9 1.3 4.5 27.3 8.1 4.4 ... 3.2 10.2 4.39999 2.39999 1.7 5.10001 3.70001 2.89999 13.7 2.5
8814 AFFX-r2-Bs-dap-M_at 8.5 3.3 2.1 53.3 3.2 17.2 25.6 3.2 3.4 ... 10.4 0.7 0.8 1.1 2.19999 5.79999 0.299999 1.3 3.1 1.2
8815 AFFX-r2-Bs-lys-3_at 47.1 55.3 11.9 49.1 17.6 9.8 5.9 40.3 13.1 ... 20.9 11.5 16.3 15.1 32 20.0999 9.10002 27.6001 10.1 15.8999
8816 AFFX-r2-Bs-lys-5_at 8.2 13.3 1.9 26.6 6.9 2 16.4 2.7 5.4 ... 0.6 3.70001 2.99999 23.8 1.6 1.6 8.4 1.4 1.9 1.1
8817 AFFX-r2-Bs-lys-M_at 5.1 39.4 35.7 43.3 18 31.7 17.9 19 63.5 ... 4.30001 31.8999 1.1 4.79999 18.5 6.1 1.4 25.0001 6.60002 32.6
8818 AFFX-r2-Bs-phe-3_at 77.1 61.4 9.2 79.6 45.3 27.3 16.6 27.3 43.7 ... 57.2999 35.1 44 27.3 55.5002 47.8999 5.00001 14.5 7.89999 49.4
8819 AFFX-r2-Bs-phe-5_at 6.4 6.7 1.8 1.3 14.8 4.5 16.5 3.3 47.6 ... 10.1 13.6 5.49999 1.5 13.6 9.10002 24.4 15.5 3.70001 1.3
8820 AFFX-r2-Bs-phe-M_at 10 5.8 2.8 10.2 1.5 40 39 57.5 61.6 ... 1.1 2.89999 2.19999 3.2 9.50002 12.7 5.29999 21.4 3.39999 24.6001
8821 AFFX-r2-Bs-thr-3_s_at 12.2 9 4 11.3 3.4 3.6 3.5 6.3 14 ... 7.30002 2.99999 4 24.2999 13.1 11.5 25.6 8.19998 19.9 5.19999
8822 AFFX-r2-Bs-thr-5_s_at 55.8 49.3 17.8 38.6 97.6 38.2 43.2 56.8 37.8 ... 48.4999 76.0001 52.6 72.8002 61.7998 42.9999 31.5999 36.1001 60.4999 84.0002
8823 AFFX-r2-Bs-thr-M_s_at 81.3 15.3 42.1 48.2 60.2 8 41.8 13.1 42 ... 75.3001 15.1 12.2 47.8 51.6 62.6998 24.7 45.1001 42.0001 49.6001
8824 AFFX-r2-Ec-bioB-3_at 259 266.7 664.5 148.4 281.6 349.9 469.3 152.9 674.7 ... 126.7 99.4997 76.7998 152.8 160.8 118.3 105.6 75.6999 120.3 203.401
8825 AFFX-r2-Ec-bioB-5_at 286.5 185.5 572.9 123.2 251.6 360.4 432.7 183.5 677.2 ... 162.3 96.9998 144.1 158.3 126.5 170.7 100.9 98.9001 105.7 160.6
8826 AFFX-r2-Ec-bioB-M_at 370.1 250 806.4 136.9 332.3 452.4 543.7 168.4 932.8 ... 180.9 130.9 155.9 198 140.3 212.3 121.6 76.7998 103.7 191.3
8827 AFFX-r2-Ec-bioC-3_at 1353.8 1032.4 2539.7 646.1 1303.8 1805.9 2030.1 777.4 3466.8 ... 478.299 549.7 516.201 586.2 528.2 700.499 379.201 262.499 460.399 637.902
8828 AFFX-r2-Ec-bioC-5_at 1351.9 1015.2 2340.9 439.1 1097.6 1527.3 2097.1 696.4 3240.3 ... 525.898 548.501 619.899 705.201 642.601 899.8 420.4 225.7 478.2 540.099
8829 AFFX-r2-Ec-bioD-3_at 4456.8 3188.6 7993.1 1845.3 3985.8 4621.8 6283.4 2112.3 11969.8 ... 7876.29 8033.43 7867.01 9144.57 7400.46 12090.6 6472.02 4458.06 8437.5 7842.51
8830 AFFX-r2-Ec-bioD-5_at 3634.6 2813.5 6777.1 1340 3385.2 3853.5 5323.2 1690.8 8892.4 ... 7070.52 7290.49 7330.01 7994.54 5365.14 10405.8 5860.87 3727.52 7137.98 7780.25
8831 AFFX-r2-P1-cre-3_at 15062.4 10854.6 19995.1 6494 12609.5 15177.5 17468.9 6847.4 27817.6 ... 19.3999 9.69997 38.6 9.10002 22.9001 8.29999 21.7 10.2 33.4001 11.7
8832 AFFX-r2-P1-cre-5_at 17679.5 13325.2 20964.6 7346.5 15322.4 15541.8 24135.4 8037.3 32044.7 ... 6.60002 2 12.5 4 12.7 7.89999 6.39999 4.59999 4.39999 0.8
8833 !series_matrix_table_end NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

8834 rows × 209 columns

In [5]:
matrix  = df.iloc[40:]
new_columns = [a for a in matrix.columns]
In [6]:
new_columns[0] = 'Probe'
In [7]:
matrix.columns = new_columns
In [8]:
matrix.head()
Out[8]:
Probe GSM25349 GSM25350 GSM25356 GSM25357 GSM25358 GSM25359 GSM25360 GSM25361 GSM25377 ... GSM136719 GSM136720 GSM136721 GSM136722 GSM136723 GSM136724 GSM136725 GSM136726 GSM136727 GSM136729
40 1007_s_at 329.9 408.1 258.2 262 299.2 318.1 338.7 333.2 419.5 ... 325.301 342.899 533.498 384.7 394.899 417.699 380.6 422.8 232.401 248.3
41 1053_at 419.1 378.6 359.9 612.1 297.4 305.7 315 363.8 424.8 ... 345.301 537.001 376.201 406.9 347.799 426.201 438.201 431.1 296.1 235.1
42 117_at 70.8 47.9 107.9 114.3 111.2 74.3 107.2 95.6 297.8 ... 172.6 23.5 63.2998 115.4 182.4 87.6001 80.5002 58 152.1 124.6
43 121_at 908.2 798.4 720.5 798 614.6 503.3 467.5 405.8 894.9 ... 930.602 917.3 794.801 967.099 924.898 950.9 666.601 797.699 787.202 631.299
44 1255_g_at 50.9 8 12.9 55.4 27.3 28.9 13.5 13.1 93 ... 7.6 28.5 22.7 8.19998 33.2 23.5 40.7001 19.3 50.6999 5.69999

5 rows × 209 columns

In [9]:
matrix.reset_index().drop('index', 1)
Out[9]:
Probe GSM25349 GSM25350 GSM25356 GSM25357 GSM25358 GSM25359 GSM25360 GSM25361 GSM25377 ... GSM136719 GSM136720 GSM136721 GSM136722 GSM136723 GSM136724 GSM136725 GSM136726 GSM136727 GSM136729
0 1007_s_at 329.9 408.1 258.2 262 299.2 318.1 338.7 333.2 419.5 ... 325.301 342.899 533.498 384.7 394.899 417.699 380.6 422.8 232.401 248.3
1 1053_at 419.1 378.6 359.9 612.1 297.4 305.7 315 363.8 424.8 ... 345.301 537.001 376.201 406.9 347.799 426.201 438.201 431.1 296.1 235.1
2 117_at 70.8 47.9 107.9 114.3 111.2 74.3 107.2 95.6 297.8 ... 172.6 23.5 63.2998 115.4 182.4 87.6001 80.5002 58 152.1 124.6
3 121_at 908.2 798.4 720.5 798 614.6 503.3 467.5 405.8 894.9 ... 930.602 917.3 794.801 967.099 924.898 950.9 666.601 797.699 787.202 631.299
4 1255_g_at 50.9 8 12.9 55.4 27.3 28.9 13.5 13.1 93 ... 7.6 28.5 22.7 8.19998 33.2 23.5 40.7001 19.3 50.6999 5.69999
5 1294_at 510.5 575.6 485.7 345.3 668.4 585.5 989.8 1219.4 648.2 ... 453.601 491 540.2 493.799 401.101 637.601 505.799 350.799 362.3 528.999
6 1316_at 102.6 107.9 55 117.2 54.1 75.8 114 78.8 133.3 ... 73.8999 86.6003 66.3001 86.2002 58.1002 87.2002 87.9999 71.6001 27.8999 41.5
7 1320_at 9.3 70.7 10.1 27 5.1 48.2 10.3 43.9 63.5 ... 9.79999 12.7 14.3 66.4999 20.2 10.4 48.8 31.0001 14.7 37.9001
8 1431_at 43.6 37 23 58 36.3 21.4 61 18.7 61.5 ... 35.9 34.9999 39.8001 38.1 40.9 45.8999 21 31.0001 26.1999 22.7
9 1438_at 48.5 27.8 58.2 48.7 10.9 15.1 51.6 29 81.6 ... 64.6998 58.7998 10.5 29.8 17.2 53 13 13.9 15.8 12.6
10 1487_at 350.7 294.4 366.6 365.3 338.4 272.3 375.8 339.8 552.9 ... 331.001 247.199 296.4 255.1 260.899 294.199 308.099 371.601 511.199 567.002
11 1494_f_at 99 144.8 182.1 224.2 148.6 125.6 112.4 162.9 64.2 ... 188.799 158.2 117.2 197.099 160.4 185.501 207.4 180.401 113.3 152.8
12 1598_g_at 260.9 353.4 238.2 224.9 111.5 182.9 184.2 180.9 309.7 ... 397.2 312.201 244.8 391.3 333.4 293.399 259 188 270.199 200.899
13 160020_at 237.4 263.5 152.4 234.4 132.9 127 134.2 150.4 324.9 ... 240.9 224.301 200.1 152.9 169.9 276.8 224.2 185.6 263.4 182.2
14 1729_at 816.9 665.3 459.6 414.4 452.9 427.1 598 735.4 393.4 ... 630.901 853.602 471.6 340 496.6 411.401 528.5 541.201 528.5 348.301
15 1773_at 202.9 174 70.6 155.7 11.6 79.8 26.3 92.2 70.3 ... 180.7 172.4 114.5 136.3 181.001 165 97.5 120.2 168.501 136.3
16 177_at 153.5 144.5 30.1 92.8 52.5 46.6 213 166 71.6 ... 52.2001 196.3 163.8 123.8 210.001 131 261.999 362.099 56.5 130.3
17 1861_at 188.2 204.6 103.7 180.8 104.6 140.5 118.1 111.9 122.4 ... 138.7 144.9 176.4 159.5 161.2 154.3 123.6 145.3 171 163.4
18 200000_s_at 2768.4 2605.6 2058.8 1920.5 2008.9 2357.6 2047.1 2100.4 2312.2 ... 2069.83 1912.44 2248.11 2151.76 2356.61 2005.44 3063.57 2094.22 2377.45 2558.71
19 200001_at 2503.7 2881.4 2201.8 2580.7 2401.8 2063.1 2041.9 2682.6 2887.1 ... 2450.4 2804.64 2373.5 2690.39 2848.53 1494.97 3290.98 3073.78 4708.66 2606.68
20 200002_at 23153.8 24351.2 17455.6 12257.8 13091.4 16095.4 15706.6 16687.8 17705.1 ... 14858.5 17264.2 17396.4 15608 15528.2 22217.4 18027.4 8777.53 15172.8 15202.3
21 200003_s_at 23458.2 24298.2 19064.6 15105.3 14796.8 19148.1 19736.3 19082.2 14373.1 ... 17049 17845.8 16900.8 18612.7 16814.3 22028.7 15497 8648.29 13095.6 16126
22 200004_at 4972.2 4920.7 6542.7 6028.4 6322.2 6092.4 4675.1 3975.2 6239.3 ... 6386.9 4874.37 6469.78 6144.16 5810.72 5000.65 4649.95 5662.02 6929.81 7499.09
23 200005_at 6600.9 7526.9 4004.2 4411.3 3659.7 3415.9 3555.5 4098.4 4211 ... 4296.66 4468.27 4277.94 3944.43 3787.68 5230.02 4507.46 4657.69 4719.11 4481.3
24 200006_at 9246.7 9750.1 8519.6 8718.4 8974.7 9062.8 9375.2 7783.4 8460.5 ... 7211.58 7143.92 7567.49 7316.81 7878.47 6987.69 8897.6 7650.83 7316.81 7349.85
25 200007_at 4917.4 4763.4 5024.4 5265.1 5759.1 5963.1 7255.8 5863 4530.8 ... 5305.23 5259.83 4899.1 5232.92 5756.2 4769.1 4969.9 3872.1 4407.67 4308.59
26 200008_s_at 4002.4 4280.6 1811.7 1516.4 1397.5 1027.1 845.6 1404.9 2183 ... 3744.09 3615.8 3451.48 3299.66 2911 2722.85 2065.11 2083.37 2721.34 2745.78
27 200009_at 6565.1 6577.5 4809.4 5354.1 5849.7 6040.3 4885 5074.3 5612.3 ... 5574.78 6901.53 6481.9 5895.1 5822 7148.38 5256.91 4724.35 6202.78 5387.87
28 200010_at 16488.9 15388.9 13689.6 10883.2 12046.5 13666.3 12881.1 10966.5 15697.7 ... 14788.6 14455.2 14359.3 14784.5 14610.3 19753.2 13155.7 8780.58 13566.9 12124.2
29 200011_s_at 1532 1347.8 1136.8 1260.5 1792.5 1704.5 1786.7 1894 1778.3 ... 2032.16 1947.76 1681.34 1953.17 2920.5 1604.6 2473.61 2934.3 2557.82 1795.16
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8764 AFFX-HUMISGF3A/M97935_MA_at 2119.2 1852.2 799.8 1011.7 600.1 588.7 676.9 1039.7 1109.1 ... 3289.38 1830.6 3108.71 3062.72 2792.42 2550.21 1969.34 2991.8 1639.34 1781.9
8765 AFFX-HUMISGF3A/M97935_MB_at 998.9 645.2 454.4 411.2 363.3 327.2 301.3 272.3 581.4 ... 1838.74 698.4 1665.11 1481.87 1465.12 1281.93 1191.78 2031.46 734.099 1099.17
8766 AFFX-HUMRGE/M10098_3_at 45.7 75.2 31 353.5 108.4 287.9 91.7 38.6 21.4 ... 76.8999 85.9002 38.3999 113.3 57.4 175.101 53.4001 13.4 16 77.9999
8767 AFFX-HUMRGE/M10098_5_at 11.4 109.8 77.6 99.2 75.4 134.5 8.9 9.8 28.1 ... 28.5 5.69999 11.1 23.4001 4.79999 73.6999 5.10001 3.2 25.5001 4.39999
8768 AFFX-HUMRGE/M10098_M_at 56.1 57.3 104.8 201.9 71.8 189 39.7 13 63 ... 30.4 51.5 32.9999 28.7 23.9 122.5 6.5 34.8999 5.60001 7.79998
8769 AFFX-M27830_3_at 42.1 183.5 41.3 51.9 71.1 24.6 26.1 34.6 50.8 ... 52.9002 50.2002 39.9001 103.4 47.8 120 38.9999 42.9001 31.0001 43.6001
8770 AFFX-M27830_5_at 203 132.5 223.7 214.5 175.2 384.5 237.7 200.5 137.5 ... 151.5 166.5 171 184 131.9 267.201 41.7001 57.8999 179.7 236.201
8771 AFFX-M27830_M_at 1332 1404.2 864.7 459.7 942.6 1549.3 1923.3 1180.4 461.4 ... 464.701 623.001 402.7 552.599 367.801 731.701 154.1 94.0998 414.699 577.001
8772 AFFX-r2-Bs-dap-3_at 15.3 2.4 30.6 5.9 3.5 7.4 9.7 3.3 60.5 ... 5.49999 1.7 1.5 8 2.80001 10.2 3.39999 1.5 1.1 1.6
8773 AFFX-r2-Bs-dap-5_at 3.5 7.1 3.9 3.9 1.3 4.5 27.3 8.1 4.4 ... 3.2 10.2 4.39999 2.39999 1.7 5.10001 3.70001 2.89999 13.7 2.5
8774 AFFX-r2-Bs-dap-M_at 8.5 3.3 2.1 53.3 3.2 17.2 25.6 3.2 3.4 ... 10.4 0.7 0.8 1.1 2.19999 5.79999 0.299999 1.3 3.1 1.2
8775 AFFX-r2-Bs-lys-3_at 47.1 55.3 11.9 49.1 17.6 9.8 5.9 40.3 13.1 ... 20.9 11.5 16.3 15.1 32 20.0999 9.10002 27.6001 10.1 15.8999
8776 AFFX-r2-Bs-lys-5_at 8.2 13.3 1.9 26.6 6.9 2 16.4 2.7 5.4 ... 0.6 3.70001 2.99999 23.8 1.6 1.6 8.4 1.4 1.9 1.1
8777 AFFX-r2-Bs-lys-M_at 5.1 39.4 35.7 43.3 18 31.7 17.9 19 63.5 ... 4.30001 31.8999 1.1 4.79999 18.5 6.1 1.4 25.0001 6.60002 32.6
8778 AFFX-r2-Bs-phe-3_at 77.1 61.4 9.2 79.6 45.3 27.3 16.6 27.3 43.7 ... 57.2999 35.1 44 27.3 55.5002 47.8999 5.00001 14.5 7.89999 49.4
8779 AFFX-r2-Bs-phe-5_at 6.4 6.7 1.8 1.3 14.8 4.5 16.5 3.3 47.6 ... 10.1 13.6 5.49999 1.5 13.6 9.10002 24.4 15.5 3.70001 1.3
8780 AFFX-r2-Bs-phe-M_at 10 5.8 2.8 10.2 1.5 40 39 57.5 61.6 ... 1.1 2.89999 2.19999 3.2 9.50002 12.7 5.29999 21.4 3.39999 24.6001
8781 AFFX-r2-Bs-thr-3_s_at 12.2 9 4 11.3 3.4 3.6 3.5 6.3 14 ... 7.30002 2.99999 4 24.2999 13.1 11.5 25.6 8.19998 19.9 5.19999
8782 AFFX-r2-Bs-thr-5_s_at 55.8 49.3 17.8 38.6 97.6 38.2 43.2 56.8 37.8 ... 48.4999 76.0001 52.6 72.8002 61.7998 42.9999 31.5999 36.1001 60.4999 84.0002
8783 AFFX-r2-Bs-thr-M_s_at 81.3 15.3 42.1 48.2 60.2 8 41.8 13.1 42 ... 75.3001 15.1 12.2 47.8 51.6 62.6998 24.7 45.1001 42.0001 49.6001
8784 AFFX-r2-Ec-bioB-3_at 259 266.7 664.5 148.4 281.6 349.9 469.3 152.9 674.7 ... 126.7 99.4997 76.7998 152.8 160.8 118.3 105.6 75.6999 120.3 203.401
8785 AFFX-r2-Ec-bioB-5_at 286.5 185.5 572.9 123.2 251.6 360.4 432.7 183.5 677.2 ... 162.3 96.9998 144.1 158.3 126.5 170.7 100.9 98.9001 105.7 160.6
8786 AFFX-r2-Ec-bioB-M_at 370.1 250 806.4 136.9 332.3 452.4 543.7 168.4 932.8 ... 180.9 130.9 155.9 198 140.3 212.3 121.6 76.7998 103.7 191.3
8787 AFFX-r2-Ec-bioC-3_at 1353.8 1032.4 2539.7 646.1 1303.8 1805.9 2030.1 777.4 3466.8 ... 478.299 549.7 516.201 586.2 528.2 700.499 379.201 262.499 460.399 637.902
8788 AFFX-r2-Ec-bioC-5_at 1351.9 1015.2 2340.9 439.1 1097.6 1527.3 2097.1 696.4 3240.3 ... 525.898 548.501 619.899 705.201 642.601 899.8 420.4 225.7 478.2 540.099
8789 AFFX-r2-Ec-bioD-3_at 4456.8 3188.6 7993.1 1845.3 3985.8 4621.8 6283.4 2112.3 11969.8 ... 7876.29 8033.43 7867.01 9144.57 7400.46 12090.6 6472.02 4458.06 8437.5 7842.51
8790 AFFX-r2-Ec-bioD-5_at 3634.6 2813.5 6777.1 1340 3385.2 3853.5 5323.2 1690.8 8892.4 ... 7070.52 7290.49 7330.01 7994.54 5365.14 10405.8 5860.87 3727.52 7137.98 7780.25
8791 AFFX-r2-P1-cre-3_at 15062.4 10854.6 19995.1 6494 12609.5 15177.5 17468.9 6847.4 27817.6 ... 19.3999 9.69997 38.6 9.10002 22.9001 8.29999 21.7 10.2 33.4001 11.7
8792 AFFX-r2-P1-cre-5_at 17679.5 13325.2 20964.6 7346.5 15322.4 15541.8 24135.4 8037.3 32044.7 ... 6.60002 2 12.5 4 12.7 7.89999 6.39999 4.59999 4.39999 0.8
8793 !series_matrix_table_end NaN NaN NaN NaN NaN NaN NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

8794 rows × 209 columns

In [40]:
ethnicity = pd.read_csv('ethnicity.csv')
In [43]:
ethnicity['sample_id'] = ethnicity['filename'].apply(lambda x: x[:8])
ethnicity
Out[43]:
Unnamed: 0 ethnicity filename sample_id
0 1 CEU GSM25349.CEL.gz GSM25349
1 2 CEU GSM25350.CEL.gz GSM25350
2 3 CEU GSM25356.CEL.gz GSM25356
3 4 CEU GSM25357.CEL.gz GSM25357
4 5 CEU GSM25358.CEL.gz GSM25358
5 6 CEU GSM25359.CEL.gz GSM25359
6 7 CEU GSM25360.CEL.gz GSM25360
7 8 CEU GSM25361.CEL.gz GSM25361
8 9 CEU GSM25377.CEL.gz GSM25377
9 10 CEU GSM25378.CEL.gz GSM25378
10 11 CEU GSM25385.CEL.gz GSM25385
11 12 CEU GSM25386.CEL.gz GSM25386
12 13 CEU GSM25399.CEL.gz GSM25399
13 14 CEU GSM25400.CEL.gz GSM25400
14 15 CEU GSM25401.CEL.gz GSM25401
15 16 CEU GSM25402.CEL.gz GSM25402
16 17 CEU GSM25409.CEL.gz GSM25409
17 18 CEU GSM25410.CEL.gz GSM25410
18 19 CEU GSM25426.CEL.gz GSM25426
19 20 CEU GSM25427.CEL.gz GSM25427
20 21 CEU GSM25479.CEL.gz GSM25479
21 22 CEU GSM25480.CEL.gz GSM25480
22 23 CEU GSM25481.CEL.gz GSM25481
23 24 CEU GSM25482.CEL.gz GSM25482
24 25 CEU GSM25524.CEL.gz GSM25524
25 26 CEU GSM25525.CEL.gz GSM25525
26 27 CEU GSM25526.CEL.gz GSM25526
27 28 CEU GSM25527.CEL.gz GSM25527
28 29 CEU GSM25528.CEL.gz GSM25528
29 30 CEU GSM25529.CEL.gz GSM25529
... ... ... ... ...
178 179 ASN GSM136672.CEL.gz GSM13667
179 180 ASN GSM136673.CEL.gz GSM13667
180 181 HAN GSM136674.CEL.gz GSM13667
181 182 HAN GSM136675.CEL.gz GSM13667
182 183 HAN GSM136676.CEL.gz GSM13667
183 184 HAN GSM136677.CEL.gz GSM13667
184 185 HAN GSM136705.CEL.gz GSM13670
185 186 HAN GSM136706.CEL.gz GSM13670
186 187 HAN GSM136707.CEL.gz GSM13670
187 188 HAN GSM136708.CEL.gz GSM13670
188 189 HAN GSM136709.CEL.gz GSM13670
189 190 HAN GSM136710.CEL.gz GSM13671
190 191 HAN GSM136711.CEL.gz GSM13671
191 192 HAN GSM136712.CEL.gz GSM13671
192 193 HAN GSM136713.CEL.gz GSM13671
193 194 HAN GSM136714.CEL.gz GSM13671
194 195 HAN GSM136715.CEL.gz GSM13671
195 196 HAN GSM136716.CEL.gz GSM13671
196 197 HAN GSM136717.CEL.gz GSM13671
197 198 HAN GSM136718.CEL.gz GSM13671
198 199 HAN GSM136719.CEL.gz GSM13671
199 200 HAN GSM136720.CEL.gz GSM13672
200 201 HAN GSM136721.CEL.gz GSM13672
201 202 HAN GSM136722.CEL.gz GSM13672
202 203 HAN GSM136723.CEL.gz GSM13672
203 204 HAN GSM136724.CEL.gz GSM13672
204 205 CEU GSM136725.CEL.gz GSM13672
205 206 CEU GSM136726.CEL.gz GSM13672
206 207 CEU GSM136727.CEL.gz GSM13672
207 208 ASN GSM136729.CEL.gz GSM13672

208 rows × 4 columns

In [ ]: