1KG VCF analysis

1KG VCF 파일의 일부 (22번 염색체에서 5천개 변이 추철)

In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

VCF 읽기

In [7]:
!head -n 19 only_rs_22-5000.vcf
##fileformat=VCFv4.0
##filedat=20101112
##datarelease=20100804
##samples=629
##description="Where BI calls are present, genotypes and alleles are from BI.  In there absence, UM genotypes are used.  If neither are available, no genotype information is present and the alleles are from the NCBI calls."
##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth (only filtered reads used for calling)">
##FORMAT=<ID=GL,Number=3,Type=Float,Description="Log-scaled likelihoods for AA,AB,BB genotypes where A=ref and B=alt; not applicable if site is not biallelic">
##FORMAT=<ID=GQ,Number=1,Type=Float,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
##FORMAT=<ID=GD,Number=1,Type=Float,Description="Genotype dosage.  Expected count of non-ref alleles [0,2]">
##FORMAT=<ID=OG,Number=1,Type=String,Description="Original Genotype input to Beagle">
##INFO=<ID=AF,Number=.,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
##INFO=<ID=CB,Number=.,Type=String,Description="List of centres that called, UM (University of Michigan), BI (Broad Institute), BC (Boston College), NCBI">
##INFO=<ID=EUR_R2,Number=1,Type=Float,Description="R2 From Beagle based on European Samples">
##INFO=<ID=AFR_R2,Number=1,Type=Float,Description="R2 From Beagle based on AFRICAN Samples">
##INFO=<ID=ASN_R2,Number=1,Type=Float,Description="R2 From Beagle based on Asian Samples">
#CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	HG00098	HG00100	HG00106	HG00112	HG00114	HG00116	HG00117	HG00118	HG00119	HG00120	HG00122	HG00123	HG00124	HG00126	HG00131	HG00141	HG00142	HG00143	HG00144	HG00145	HG00146	HG00147	HG00148	HG00149	HG00150	HG00151	HG00152	HG00153	HG00156	HG00158	HG00159	HG00160	HG00171	HG00173	HG00174	HG00176	HG00177	HG00178	HG00179	HG00180	HG00181	HG00182	HG00183	HG00185	HG00186	HG00187	HG00188	HG00189	HG00190	HG00231	HG00239	HG00242	HG00243	HG00244	HG00245	HG00247	HG00258	HG00262	HG00264	HG00265	HG00266	HG00267	HG00269	HG00270	HG00272	HG00306	HG00308	HG00311	HG00312	HG00357	HG00361	HG00366	HG00367	HG00368	HG00369	HG00372	HG00373	HG00377	HG00380	HG00403	HG00404	HG00406	HG00407	HG00445	HG00446	HG00452	HG00457	HG00553	HG00554	HG00559	HG00560	HG00565	HG00566	HG00577	HG00578	HG00592	HG00593	HG00596	HG00610	HG00611	HG00625	HG00626	HG00628	HG00629	HG00634	HG00635	HG00637	HG00638	HG00640	NA06984	NA06985	NA06986	NA06989	NA06994	NA07000	NA07037	NA07048	NA07051	NA07056	NA07346	NA07347	NA07357	NA10847	NA10851	NA11829	NA11830	NA11831	NA11832	NA11840	NA11843	NA11881	NA11892	NA11893	NA11894	NA11918	NA11919	NA11920	NA11930	NA11931	NA11932	NA11933	NA11992	NA11993	NA11994	NA11995	NA12003	NA12004	NA12005	NA12006	NA12043	NA12044	NA12045	NA12046	NA12058	NA12144	NA12154	NA12155	NA12156	NA12249	NA12272	NA12273	NA12275	NA12287	NA12340	NA12341	NA12342	NA12347	NA12348	NA12383	NA12399	NA12400	NA12413	NA12414	NA12489	NA12546	NA12716	NA12717	NA12718	NA12749	NA12750	NA12751	NA12761	NA12762	NA12763	NA12775	NA12776	NA12777	NA12778	NA12812	NA12813	NA12814	NA12815	NA12828	NA12830	NA12872	NA12873	NA12874	NA12889	NA12890	NA18486	NA18487	NA18489	NA18498	NA18499	NA18501	NA18502	NA18504	NA18505	NA18507	NA18508	NA18510	NA18511	NA18516	NA18517	NA18519	NA18520	NA18522	NA18523	NA18525	NA18526	NA18527	NA18532	NA18535	NA18537	NA18538	NA18539	NA18541	NA18542	NA18545	NA18547	NA18550	NA18552	NA18553	NA18555	NA18558	NA18560	NA18561	NA18562	NA18563	NA18564	NA18565	NA18566	NA18567	NA18570	NA18571	NA18572	NA18573	NA18574	NA18576	NA18577	NA18579	NA18582	NA18592	NA18593	NA18603	NA18605	NA18608	NA18609	NA18611	NA18612	NA18614	NA18615	NA18616	NA18617	NA18618	NA18619	NA18620	NA18621	NA18622	NA18623	NA18624	NA18625	NA18626	NA18627	NA18628	NA18630	NA18631	NA18632	NA18633	NA18634	NA18636	NA18638	NA18640	NA18642	NA18643	NA18745	NA18853	NA18856	NA18858	NA18861	NA18867	NA18868	NA18870	NA18871	NA18873	NA18874	NA18907	NA18908	NA18909	NA18910	NA18912	NA18916	NA18940	NA18941	NA18942	NA18943	NA18944	NA18945	NA18947	NA18948	NA18949	NA18950	NA18951	NA18952	NA18953	NA18955	NA18956	NA18959	NA18960	NA18961	NA18963	NA18964	NA18965	NA18967	NA18968	NA18970	NA18971	NA18972	NA18973	NA18974	NA18975	NA18976	NA18977	NA18979	NA18980	NA18981	NA18982	NA18983	NA18984	NA18985	NA18986	NA18987	NA18988	NA18989	NA18990	NA18997	NA18999	NA19000	NA19001	NA19002	NA19003	NA19004	NA19005	NA19007	NA19009	NA19010	NA19012	NA19027	NA19044	NA19054	NA19055	NA19056	NA19057	NA19058	NA19059	NA19060	NA19062	NA19063	NA19064	NA19065	NA19066	NA19067	NA19068	NA19070	NA19072	NA19074	NA19075	NA19076	NA19077	NA19078	NA19079	NA19082	NA19083	NA19084	NA19085	NA19086	NA19087	NA19088	NA19093	NA19098	NA19099	NA19102	NA19107	NA19108	NA19113	NA19114	NA19116	NA19119	NA19129	NA19130	NA19131	NA19137	NA19138	NA19141	NA19143	NA19144	NA19147	NA19152	NA19153	NA19159	NA19160	NA19171	NA19172	NA19184	NA19189	NA19190	NA19200	NA19201	NA19204	NA19206	NA19207	NA19209	NA19210	NA19213	NA19225	NA19235	NA19236	NA19247	NA19248	NA19256	NA19257	NA19311	NA19312	NA19313	NA19314	NA19332	NA19334	NA19338	NA19346	NA19347	NA19350	NA19355	NA19359	NA19360	NA19371	NA19372	NA19375	NA19376	NA19377	NA19379	NA19381	NA19382	NA19383	NA19384	NA19385	NA19390	NA19391	NA19393	NA19394	NA19395	NA19397	NA19398	NA19399	NA19401	NA19404	NA19428	NA19429	NA19434	NA19435	NA19436	NA19437	NA19438	NA19439	NA19440	NA19443	NA19444	NA19445	NA19446	NA19448	NA19449	NA19451	NA19452	NA19453	NA19455	NA19456	NA19457	NA19461	NA19462	NA19463	NA19466	NA19467	NA19469	NA19471	NA19472	NA19473	NA19474	NA19625	NA19648	NA19649	NA19651	NA19652	NA19654	NA19655	NA19658	NA19660	NA19661	NA19678	NA19684	NA19685	NA19700	NA19701	NA19703	NA19704	NA19707	NA19712	NA19713	NA19720	NA19722	NA19723	NA19725	NA19726	NA19818	NA19819	NA19834	NA19835	NA19900	NA19901	NA19904	NA19908	NA19909	NA19914	NA19916	NA19917	NA19920	NA19921	NA19982	NA20414	NA20502	NA20505	NA20508	NA20509	NA20510	NA20512	NA20515	NA20516	NA20517	NA20518	NA20519	NA20520	NA20521	NA20522	NA20524	NA20525	NA20526	NA20527	NA20528	NA20529	NA20530	NA20531	NA20532	NA20533	NA20534	NA20535	NA20536	NA20537	NA20538	NA20539	NA20540	NA20541	NA20542	NA20543	NA20544	NA20581	NA20582	NA20585	NA20586	NA20588	NA20589	NA20752	NA20753	NA20754	NA20755	NA20756	NA20757	NA20758	NA20759	NA20760	NA20761	NA20765	NA20769	NA20770	NA20771	NA20772	NA20773	NA20774	NA20775	NA20778	NA20783	NA20785	NA20786	NA20787	NA20790	NA20792	NA20795	NA20796	NA20797	NA20798	NA20799	NA20800	NA20801	NA20802	NA20803	NA20804	NA20805	NA20806	NA20807	NA20808	NA20809	NA20810	NA20811	NA20812	NA20813	NA20814	NA20815	NA20816	NA20818	NA20819	NA20826	NA20828
In [9]:
df = pd.read_table('only_rs_22-5000.vcf', skiprows=18)
df
Out[9]:
#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT HG00098 ... NA20811 NA20812 NA20813 NA20814 NA20815 NA20816 NA20818 NA20819 NA20826 NA20828
0 22 50328528 rs116643340 G C . PASS DP=269;AF=0.010;CB=BI,NCBI;AFR_R2=0.516 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
1 22 50328670 rs114196268 G A . PASS DP=1089;AF=0.007;CB=UM,BI,BC,NCBI;AFR_R2=0.465 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
2 22 50328731 rs28608875 C T . PASS DP=907;AF=0.055;CB=UM,BI,BC,NCBI;EUR_R2=0.751;... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:19.48:./. ... 0|0:3,0:3:.:-0.00,-0.90,-10.06:31.02:./. 0|0:7,0:5:.:-0.01,-1.51,-14.93:25.32:./. 0|0:2,0:2:.:-0.00,-0.60,-6.66:25.58:./. 0|0:1,0:1:.:-0.00,-0.30,-3.47:22.60:./. 0|0:3,0:3:.:-0.00,-0.90,-9.31:28.57:./. 1|0:3,0:3:.:-0.00,-0.90,-10.19:3.84:0/0 0|0:3,0:3:.:-0.00,-0.90,-9.86:28.57:./. 0|1:2,0:2:.:-0.00,-0.60,-7.19:5.60:0/0 0|0:4,0:3:.:-0.00,-0.90,-10.16:18.81:./. 0|0:5,0:3:.:-0.00,-0.91,-9.26:31.14:./.
3 22 50329119 rs56008904 C T . PASS DP=141;AF=0.390;CB=BI,BC;EUR_R2=0.145;AFR_R2=0... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:3.49:./. ... 1|0:.:.:.:.,.,.:3.36:./. 0|0:1,0:1:.:-0.00,-0.30,-3.17:3.87:./. 0|0:.:.:.:.,.,.:3.54:./. 0|0:.:.:.:.,.,.:3.54:./. 0|0:1,0:1:.:-0.00,-0.30,-2.37:5.94:./. 1|0:0,1:1:.:-3.87,-0.30,-0.00:4.71:1/1 0|0:.:.:.:.,.,.:3.54:./. 1|1:.:.:.:.,.,.:1.71:./. 1|0:.:.:.:.,.,.:3.22:./. 1|0:.:.:.:.,.,.:3.36:./.
4 22 50329830 rs79860705 G A . PASS DP=373;AF=0.107;CB=UM,BI,BC,NCBI;EUR_R2=0.507 GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:9.26:./. ... 0|0:1,0:1:.:-0.00,-0.30,-3.47:11.92:./. 1|0:1,2:2:.:-6.43,-0.60,-0.00:6.99:1/1 0|0:1,0:1:.:-0.00,-0.30,-2.50:12.12:./. 0|0:.:.:.:.,.,.:9.26:./. 0|0:1,0:1:.:-0.00,-0.30,-2.80:12.12:./. 0|0:3,0:3:.:-0.00,-0.90,-9.43:5.77:./. 0|0:.:.:.:.,.,.:9.26:./. 0|1:.:.:.:.,.,.:5.52:./. 0|0:3,1:3:.:-0.00,-0.91,-8.99:9.32:./. 0|0:1,0:1:.:-0.00,-0.30,-3.30:11.93:./.
5 22 50330186 rs73444002 G T . PASS DP=1024;AF=0.026;CB=UM,BI,BC,NCBI;EUR_R2=0.498... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:27.01:./. ... 0|0:2,0:2:.:-0.00,-0.60,-6.86:19.92:./. 0|0:2,0:2:.:-0.00,-0.60,-6.01:41.55:./. 0|0:3,0:3:.:-0.00,-0.90,-10.67:42.22:./. 0|0:7,0:5:.:-0.01,-1.51,-14.76:46.99:./. 0|0:3,0:3:.:-0.00,-0.90,-10.26:41.55:./. 0|0:2,0:2:.:-0.00,-0.60,-6.90:35.23:./. 0|0:.:.:.:.,.,.:32.76:./. 0|0:2,0:1:.:-0.00,-0.30,-3.50:15.52:./. 0|0:5,0:5:.:-0.00,-1.51,-17.40:27.64:./. 0|0:1,0:1:.:-0.00,-0.30,-3.56:16.98:./.
6 22 50330345 rs28652639 G A . PASS DP=1477;AF=0.016;CB=UM,BI,BC,NCBI;EUR_R2=0.605 GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:11.33:./. ... 0|0:3,0:3:.:-0.00,-0.90,-10.19:18.58:./. 0|0:3,0:2:.:-0.00,-0.60,-5.89:20.27:./. 0|0:3,0:3:.:-0.00,-0.90,-10.99:20.27:./. 0|0:.:.:.:.,.,.:11.47:./. 1|0:2,3:5:.:-9.59,-1.51,-6.07:60:./. 0|0:6,0:6:.:-0.00,-1.81,-21.59:32.22:./. 0|0:.:.:.:.,.,.:11.44:./. 0|0:1,0:1:.:-0.00,-0.30,-3.70:13.38:./. 0|0:8,0:6:.:-0.01,-1.81,-18.99:26.82:./. 0|0:4,0:2:.:-0.00,-0.60,-7.09:15.63:./.
7 22 50330546 rs28516848 T G . PASS DP=1517;AF=0.061;CB=UM,BI,BC,NCBI;EUR_R2=0.773... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:21.35:./. ... 0|0:.:.:.:.,.,.:21.73:./. 0|0:6,0:6:.:-0.00,-1.81,-19.50:45.23:./. 0|0:5,0:5:.:-0.00,-1.51,-18.62:39.59:./. 0|0:6,0:6:.:-0.00,-1.81,-21.26:43.01:./. 0|0:3,0:3:.:-0.00,-0.90,-10.43:33.87:./. 0|0:5,0:5:.:-0.00,-1.51,-17.46:43.01:./. 0|0:.:.:.:.,.,.:24.67:./. 1|0:1,1:2:.:-2.80,-0.60,-2.94:43.98:./. 0|0:3,0:3:.:-0.00,-0.90,-10.93:36.20:./. 0|0:3,0:2:.:-0.00,-0.60,-7.49:30.22:./.
8 22 50331032 rs115322211 G A . PASS DP=1465;AF=0.013;CB=UM,BI,BC,NCBI;AFR_R2=0.711 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
9 22 50331164 rs28690400 T C . PASS DP=1538;AF=0.299;CB=UM,BI,BC,NCBI;EUR_R2=0.929... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-3.46:23.76:./. ... 0|0:2,0:2:.:-0.00,-0.60,-6.43:24.96:./. 1|0:4,2:4:.:-2.80,-1.21,-10.48:50:./. 0|0:9,0:9:.:-0.01,-2.72,-31.86:60:./. 0|0:2,0:2:.:-0.00,-0.61,-6.82:50:./. 0|0:4,0:4:.:-0.00,-1.21,-14.35:60:./. 1|0:3,3:5:.:-5.97,-1.51,-11.20:60:./. 0|0:1,0:1:.:-0.00,-0.30,-3.57:43.98:./. 1|1:0,3:3:.:-8.76,-0.91,-0.01:37.21:./. 1|0:3,1:4:.:-3.20,-1.21,-11.00:22.73:./. 0|0:8,0:6:.:-0.00,-1.81,-22.28:42.22:./.
10 22 50331300 rs73891184 T C . PASS DP=1799;AF=0.020;CB=UM,BI,BC,NCBI;AFR_R2=0.759 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
11 22 50331475 rs76433322 A G . PASS DP=2162;AF=0.009;CB=UM,BI,BC,NCBI;AFR_R2=0.732 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
12 22 50331622 rs28393180 C T . PASS DP=1952;AF=0.052;CB=UM,BI,BC,NCBI;EUR_R2=0.68;... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:24.96:./. ... 0|0:6,0:6:.:-0.01,-1.81,-17.47:60:./. 0|0:13,0:11:.:-0.02,-3.33,-30.65:60:./. 0|0:16,0:16:.:-0.02,-4.83,-49.96:60:./. 0|0:3,0:2:.:-0.00,-0.60,-5.66:50:./. 0|0:8,0:7:.:-0.01,-2.11,-20.85:43.01:./. 0|0:1,0:1:.:-0.00,-0.30,-3.10:60:./. 0|0:1,0:1:.:-0.00,-0.30,-2.90:28.96:./. 0|0:3,0:3:.:-0.00,-0.90,-9.33:60:./. 0|0:8,0:7:.:-0.01,-2.11,-22.39:60:./. 0|0:6,0:4:.:-0.00,-1.21,-12.77:60:./.
13 22 50332005 rs28460675 G T . PASS DP=2228;AF=0.144;CB=UM,BI,BC,NCBI;EUR_R2=0.918... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-2.40:17.77:./. ... 1|0:1,5:5:.:-13.57,-1.51,-2.80:50:./. 0|0:6,0:4:.:-0.01,-1.21,-11.77:50:./. 0|0:18,0:17:.:-0.02,-5.13,-53.88:60:./. 0|0:6,0:4:.:-0.00,-1.21,-11.66:46.99:./. 0|0:6,0:5:.:-0.01,-1.51,-16.23:26.78:./. 0|0:5,0:5:.:-0.01,-1.51,-15.63:60:./. 0|0:2,0:1:.:-0.00,-0.30,-3.06:28.54:./. 1|0:4,1:5:.:-3.90,-1.51,-12.42:43.01:./. 0|0:6,0:6:.:-0.01,-1.81,-17.97:46.99:./. 1|0:5,2:6:.:-3.90,-1.81,-15.46:39.21:./.
14 22 50332022 rs28470489 T A . PASS DP=2210;AF=0.438;CB=UM,BI,BC,NCBI;EUR_R2=0.972... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-2.94:16.89:./. ... 1|0:2,3:5:.:-10.04,-1.51,-6.48:60:./. 1|0:4,3:6:.:-6.94,-1.81,-13.38:60:./. 0|0:17,0:16:.:-0.01,-4.82,-58.10:60:./. 0|0:5,0:5:.:-0.00,-1.51,-18.06:45.23:./. 0|0:5,0:5:.:-0.00,-1.51,-19.09:50:./. 1|0:4,5:8:.:-14.36,-2.41,-13.48:60:./. 0|0:1,0:1:.:-0.00,-0.30,-3.70:27.93:./. 1|1:0,7:6:.:-20.45,-1.81,-0.00:60:./. 1|0:2,3:5:.:-10.48,-1.51,-6.60:60:./. 1|0:6,3:9:.:-11.04,-2.71,-20.50:60:./.
15 22 50332097 rs77228592 C T . PASS DP=2104;AF=0.020;CB=UM,BI,BC,NCBI;EUR_R2=0.483... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:21.60:./. ... 0|0:2,0:2:.:-0.00,-0.60,-6.06:30.56:./. 0|0:10,0:7:.:-0.01,-2.11,-21.06:45.23:./. 0|0:19,0:19:.:-0.02,-5.73,-60.41:60:./. 0|0:10,0:8:.:-0.01,-2.41,-26.40:45.23:./. 0|0:4,0:2:.:-0.00,-0.60,-5.46:27.93:./. 0|0:4,0:4:.:-0.00,-1.21,-12.20:36.58:./. 0|0:.:.:.:.,.,.:21.56:./. 0|0:8,0:6:.:-0.01,-1.81,-17.99:60:./. 0|0:4,0:3:.:-0.00,-0.90,-9.76:33.57:./. 0|0:5,0:5:.:-0.00,-1.51,-16.43:39.59:./.
16 22 50332242 rs28451805 A G . PASS DP=3101;AF=0.426;CB=UM,BI,BC,NCBI;EUR_R2=0.969... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:17.48:./. ... 1|0:7,3:9:.:-8.00,-2.72,-23.94:60:./. 1|0:7,2:9:.:-6.07,-2.72,-23.15:60:./. 0|0:31,0:30:.:-0.02,-9.04,-109.61:60:./. 0|0:11,0:11:.:-0.01,-3.32,-40.19:60:./. 0|0:18,0:16:.:-0.02,-4.83,-57.14:60:./. 1|0:7,5:12:.:-18.26,-3.62,-24.95:60:./. 0|0:7,0:7:.:-0.01,-2.12,-22.43:50:./. 1|1:0,4:4:.:-14.33,-1.20,-0.00:33.28:./. 1|0:13,6:19:.:-23.89,-5.73,-47.23:60:./. 1|0:11,2:12:.:-7.08,-3.62,-35.26:60:./.
17 22 50332324 rs28523661 T C . PASS DP=3435;AF=0.587;CB=UM,BI,BC,NCBI;EUR_R2=0.755... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:4.64:./. ... 1|1:0,15:14:.:-45.50,-4.22,-0.02:60:./. 1|0:14,13:24:.:-32.97,-7.24,-45.55:60:./. 0|1:25,24:44:.:-64.95,-13.26,-90.40:60:./. 0|0:10,0:9:.:-0.01,-2.72,-32.25:30.71:./. 0|0:17,0:17:.:-0.02,-5.13,-60.28:60:./. 1|1:0,12:10:.:-31.71,-3.01,-0.01:23.48:./. 0|1:3,2:4:.:-2.67,-1.21,-10.78:11.43:./. 1|1:0,2:2:.:-7.36,-0.60,-0.00:18.29:./. 1|0:19,12:28:.:-40.16,-8.44,-62.12:60:./. 1|0:12,3:15:.:-10.84,-4.52,-45.63:60:./.
18 22 50332377 rs114353012 C T . PASS DP=3208;AF=0.003;CB=UM,BC,NCBI GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
19 22 50332472 rs115364094 C G . PASS DP=3376;AF=0.003;CB=UM,BI,BC,NCBI;AFR_R2=0.824 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
20 22 50333130 rs77002698 G A . PASS DP=1951;AF=0.081;CB=UM,BI,BC,NCBI;EUR_R2=0.886... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:30.97:./. ... 0|0:3,0:2:.:-0.00,-0.60,-6.16:40.46:./. 1|0:6,3:6:.:-7.14,-1.81,-12.28:60:./. 0|0:10,0:10:.:-0.01,-3.01,-32.30:60:./. 0|0:8,0:5:.:-0.00,-1.51,-16.60:46.99:./. 0|0:4,0:3:.:-0.00,-0.91,-9.36:41.55:./. 1|0:7,5:8:.:-3.76,-2.41,-22.63:28.66:./. 0|0:2,0:2:.:-0.00,-0.60,-6.06:37.45:./. 0|1:2,2:2:.:-7.23,-0.60,-0.00:60:1/1 1|0:7,3:7:.:-11.48,-2.11,-11.99:60:./. 0|0:9,0:7:.:-0.01,-2.11,-21.89:60:./.
21 22 50333253 rs116837852 G A . PASS DP=2488;AF=0.010;CB=UM,BI,BC,NCBI;AFR_R2=0.822 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
22 22 50333415 rs4073387 G A . PASS DP=2144;AF=0.087;CB=UM,BI,BC,NCBI;EUR_R2=0.967... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-2.80:26.09:./. ... 0|0:4,0:3:.:-0.00,-0.90,-9.03:34.81:./. 1|0:4,4:6:.:-14.77,-1.81,-5.67:60:./. 0|0:6,0:6:.:-0.01,-1.81,-18.69:40.97:./. 0|0:4,0:3:.:-0.00,-0.90,-9.66:32.29:./. 0|0:6,0:6:.:-0.01,-1.81,-17.65:41.55:./. 1|0:6,3:9:.:-10.60,-2.72,-17.67:60:./. 0|0:4,0:4:.:-0.01,-1.21,-11.86:35.09:./. 0|1:1,0:1:.:-0.00,-0.30,-2.80:26.13:0/0 1|0:4,3:7:.:-11.69,-2.11,-12.10:60:./. 0|0:6,0:5:.:-0.00,-1.51,-16.25:41.55:./.
23 22 50333697 rs4073388 C G . PASS DP=1957;AF=0.073;CB=UM,BI,BC,NCBI;EUR_R2=0.885... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-4.05:23.48:./. ... 0|0:4,0:3:.:-0.00,-0.90,-11.94:31.87:./. 1|0:5,2:5:.:-4.20,-1.51,-14.08:40.46:./. 0|0:10,0:8:.:-0.01,-2.42,-31.35:43.98:./. 0|0:2,0:1:.:-0.00,-0.30,-3.89:23.68:./. 0|0:1,0:1:.:-0.00,-0.30,-3.89:23.97:./. 1|0:2,3:4:.:-7.20,-1.21,-7.09:60:./. 0|0:2,0:1:.:-0.00,-0.30,-3.69:23.29:./. 0|1:.:.:.:.,.,.:11.50:./. 1|0:1,3:3:.:-7.84,-0.91,-3.79:50:./. 0|0:5,0:4:.:-0.00,-1.21,-15.77:35.69:./.
24 22 50333748 rs114900897 C T . PASS DP=2417;AF=0.022;CB=UM,BI,BC,NCBI;AFR_R2=0.818 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
25 22 50334208 rs4073885 C T . PASS DP=2555;AF=0.022;CB=UM,BI,BC,NCBI;AFR_R2=0.75 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
26 22 50334409 rs28376035 A G . PASS DP=2092;AF=0.059;CB=UM,BI;EUR_R2=0.815;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:31.43:./. ... 0|0:5,0:2:.:-0.00,-0.60,-8.23:40.46:./. 1|0:5,4:6:.:-5.50,-1.81,-13.56:46.99:./. 0|0:6,0:3:.:-0.01,-0.91,-9.48:40.46:./. 0|0:9,0:5:.:-0.00,-1.51,-19.71:46.99:./. 0|0:8,0:5:.:-0.00,-1.51,-18.92:46.99:./. 1|0:7,2:4:.:-5.99,-1.21,-7.33:60:./. 0|0:7,0:4:.:-0.01,-1.21,-12.22:43.01:./. 0|1:7,0:1:.:-0.00,-0.30,-3.97:7.52:0/0 1|0:3,5:5:.:-9.76,-1.51,-7.94:60:./. 0|0:11,0:7:.:-0.01,-2.11,-26.55:60:./.
27 22 50334415 rs28405006 C T . PASS DP=2200;AF=0.055;CB=UM,BI,BC;AFR_R2=0.651 GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:60:./. ... 0|0:4,0:2:.:-0.00,-0.60,-6.74:60:./. 0|0:10,0:6:.:-0.01,-1.81,-18.07:60:./. 0|0:7,0:5:.:-0.01,-1.51,-15.08:60:./. 0|0:10,0:6:.:-0.00,-1.81,-19.77:60:./. 0|0:7,0:3:.:-0.00,-0.90,-9.83:60:./. 0|0:11,0:5:.:-0.00,-1.51,-16.20:60:./. 0|0:6,0:3:.:-0.01,-0.91,-8.59:60:./. 0|0:6,0:3:.:-0.00,-0.91,-9.43:60:./. 0|0:9,0:6:.:-0.01,-1.81,-18.67:60:./. 0|0:12,0:8:.:-0.01,-2.41,-25.90:60:./.
28 22 50334443 rs28612446 T C . PASS DP=2398;AF=0.069;CB=UM,BI,BC,NCBI;EUR_R2=0.907... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:28.39:./. ... 0|0:7,0:7:.:-0.00,-2.11,-26.13:50:./. 1|0:7,5:8:.:-11.93,-2.42,-13.84:60:./. 0|0:7,1:6:.:-0.00,-1.81,-22.49:46.99:./. 0|0:11,0:11:.:-0.01,-3.32,-40.97:60:./. 0|0:5,0:3:.:-0.00,-0.90,-10.89:37.21:./. 1|0:8,5:10:.:-9.36,-3.01,-26.04:60:./. 0|0:7,0:5:.:-0.00,-1.51,-18.51:43.01:./. 0|1:4,2:4:.:-2.57,-1.21,-11.88:23.38:./. 1|0:3,3:5:.:-9.29,-1.51,-7.43:60:./. 0|0:8,0:5:.:-0.00,-1.51,-19.78:46.99:./.
29 22 50334680 rs62234028 A G . PASS DP=2743;AF=0.099;CB=UM,BI,BC,NCBI;EUR_R2=0.98;... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-8.02:39.21:./. ... 0|0:5,0:5:.:-0.00,-1.51,-18.40:60:./. 1|0:1,5:5:.:-10.79,-1.51,-3.68:60:./. 0|0:11,0:9:.:-0.00,-2.71,-33.87:60:./. 0|0:5,0:5:.:-0.00,-1.51,-19.51:50:./. 0|0:3,0:3:.:-0.00,-0.90,-11.89:45.23:./. 1|0:7,3:9:.:-6.00,-2.71,-25.63:60:./. 0|0:1,0:1:.:-0.00,-0.30,-2.86:38.24:./. 0|1:0,1:1:.:-2.77,-0.30,-0.00:42.22:1/1 1|0:3,4:6:.:-9.73,-1.81,-11.39:60:./. 0|0:2,0:2:.:-0.00,-0.60,-7.92:43.98:./.
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
4970 22 51217134 rs117417021 A G . PASS DP=1636;AF=0.080;CB=BI,BC;EUR_R2=0.423;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:6.73:./. ... 0|0:.:.:.:.,.,.:9.80:./. 0|0:4,1:3:.:-0.00,-0.91,-10.29:22.85:./. 0|0:.:.:.:.,.,.:8.07:./. 0|0:3,0:3:.:-0.00,-0.90,-11.08:20.32:./. 0|0:4,0:3:.:-0.00,-0.90,-11.29:22.06:./. 0|0:.:.:.:.,.,.:5.15:./. 0|0:.:.:.:.,.,.:6.92:./. 0|0:2,0:2:.:-0.00,-0.60,-7.62:15.65:./. 0|0:4,0:4:.:-0.00,-1.20,-16.16:24.93:./. 0|0:6,0:6:.:-0.00,-1.81,-23.76:32.01:./.
4971 22 51217954 rs9616974 G A . PASS DP=2572;AF=0.066;CB=UM,BI;EUR_R2=0.925;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-6.04:10.47:./. ... 0|0:8,0:3:.:-0.00,-0.90,-11.16:40.97:./. 0|0:4,0:1:.:-0.00,-0.30,-3.20:34.32:./. 0|1:2,1:2:.:-2.87,-0.61,-3.47:34.81:./. 0|0:5,0:1:.:-0.00,-0.30,-4.00:34.56:./. 0|0:3,0:3:.:-0.00,-0.90,-9.81:40.46:./. 0|0:12,0:4:.:-0.01,-1.21,-12.57:45.23:./. 0|0:2,0:3:.:-2.80,-3.31,-8.58:26.29:./. 0|0:2,0:1:.:-0.00,-0.30,-3.80:34.69:./. 0|0:10,0:7:.:-0.00,-2.11,-25.06:50:./. 0|0:.:.:.:.,.,.:31.14:./.
4972 22 51218224 rs9616975 C A . PASS DP=2741;AF=0.050;CB=UM,BI,BC,NCBI;EUR_R2=0.699... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:12.69:./. ... 0|0:10,0:4:.:-0.00,-1.21,-13.47:43.98:./. 0|0:9,0:4:.:-0.00,-1.21,-12.66:43.01:./. 0|1:4,3:4:.:-5.23,-1.21,-6.86:60:./. 0|0:.:.:.:.,.,.:31.74:./. 0|0:.:.:.:.,.,.:31.49:./. 0|0:7,0:3:.:-0.00,-0.90,-9.92:40:./. 0|0:.:.:.:.,.,.:27.19:./. 0|0:.:.:.:.,.,.:30.71:./. 0|0:11,0:4:.:-0.00,-1.21,-12.40:43.98:./. 0|0:.:.:.:.,.,.:30.86:./.
4973 22 51218615 rs117845379 T A . PASS DP=2071;AF=0.067;CB=UM,BI,BC,NCBI;EUR_R2=0.765... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-4.00:11.69:./. ... 0|0:10,0:9:.:-0.01,-2.71,-29.75:60:./. 0|0:7,0:6:.:-0.00,-1.81,-20.37:50:./. 0|1:6,1:7:.:-3.70,-2.11,-20.97:21.16:./. 0|0:2,0:2:.:-0.00,-0.60,-7.10:36.99:./. 0|0:3,0:3:.:-0.00,-0.91,-9.40:38.24:./. 0|0:10,0:9:.:-0.01,-2.71,-30.25:60:./. 0|0:7,0:5:.:-0.01,-1.51,-15.97:35.23:./. 0|0:3,0:3:.:-0.00,-0.90,-9.98:40.46:./. 0|0:3,0:3:.:-0.00,-0.90,-10.44:40.46:./. 0|0:5,0:5:.:-0.01,-1.51,-16.19:46.99:./.
4974 22 51218957 rs116323389 A G . PASS DP=3192;AF=0.015;CB=UM,BI,BC,NCBI;AFR_R2=0.867 GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-4.07:33.57:./. ... 0|0:4,0:4:.:-0.00,-1.20,-15.63:37.45:./. 0|0:4,0:4:.:-0.00,-1.21,-13.55:38.86:./. 0|0:4,0:4:.:-0.00,-1.21,-13.56:40.97:./. 0|0:8,0:8:.:-0.00,-2.41,-30.60:50:./. 0|0:7,0:7:.:-0.00,-2.11,-26.21:46.99:./. 0|0:6,0:5:.:-0.00,-1.51,-18.50:41.55:./. 0|0:1,0:1:.:-0.00,-0.30,-3.76:32.52:./. 0|0:4,0:4:.:-0.00,-1.21,-13.35:38.24:./. 0|0:10,0:10:.:-0.00,-3.01,-37.71:60:./. 0|0:8,0:7:.:-0.00,-2.11,-26.22:46.99:./.
4975 22 51219006 rs28729663 G A . PASS DP=3270;AF=0.200;CB=UM,BI,BC,NCBI;EUR_R2=0.871... GT:AD:DP:GD:GL:GQ:OG 0|1:0,1:1:.:-3.77,-0.30,-0.00:7.51:1/1 ... 0|0:8,0:7:.:-0.00,-2.11,-22.66:35.69:./. 0|0:6,0:5:.:-0.01,-1.51,-14.95:30:./. 0|1:3,2:5:.:-7.21,-1.51,-10.29:60:./. 0|0:8,0:10:.:-6.24,-8.71,-30.87:40:./. 0|0:4,0:4:.:-0.00,-1.21,-12.30:26.99:./. 0|0:6,0:5:.:-0.00,-1.51,-15.80:29.96:./. 1|0:2,1:3:.:-3.87,-0.90,-6.49:38.54:./. 0|0:3,0:3:.:-0.00,-0.90,-9.73:23.74:./. 0|0:9,0:9:.:-0.01,-2.71,-29.33:42.22:./. 0|0:5,0:5:.:-0.01,-1.51,-15.33:30.71:./.
4976 22 51219216 rs6010087 G A . PASS DP=3115;AF=0.002;CB=UM,BI,NCBI;AFR_R2=0.794 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
4977 22 51219382 rs118066233 G A . PASS DP=2889;AF=0.000;CB=BI,BC,NCBI;AFR_R2=0.957 GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-3.50:60:./. ... 0|0:8,0:8:.:-0.00,-2.41,-26.36:60:./. 0|0:7,0:6:.:-0.01,-1.81,-17.14:60:./. 0|0:9,0:9:.:-0.00,-2.71,-29.83:60:./. 0|0:2,0:1:.:-0.00,-0.30,-3.30:60:./. 0|0:3,0:3:.:-0.00,-0.90,-10.36:60:./. 0|0:1,0:1:.:-0.00,-0.30,-3.40:60:./. 0|0:3,0:2:.:-0.00,-0.60,-6.34:60:./. 0|0:2,0:2:.:-0.00,-0.60,-7.06:60:./. 0|0:4,0:4:.:-0.00,-1.20,-15.28:60:./. 0|0:3,0:2:.:-0.00,-0.60,-6.44:60:./.
4978 22 51219387 rs9616832 T C . PASS DP=2796;AF=0.069;CB=UM,BI,BC,NCBI;EUR_R2=0.952... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-4.06:10.25:./. ... 0|0:10,0:9:.:-0.00,-2.71,-34.55:60:./. 0|0:7,0:6:.:-0.00,-1.81,-21.99:50:./. 0|1:7,2:7:.:-3.17,-2.11,-23.27:46.99:./. 0|0:2,0:1:.:-0.00,-0.30,-3.86:34.69:./. 0|0:3,0:3:.:-0.00,-0.90,-11.58:40.46:./. 0|0:1,0:1:.:-0.00,-0.30,-3.86:38.24:./. 0|0:3,0:3:.:-0.00,-0.90,-11.69:26.80:./. 0|0:3,0:2:.:-0.00,-0.60,-7.12:37.96:./. 0|0:4,0:4:.:-0.00,-1.20,-15.82:43.98:./. 0|0:3,0:3:.:-0.00,-0.90,-11.40:39.59:./.
4979 22 51220441 rs76759269 T C . PASS DP=1857;AF=1.000;CB=UM,BI,BC GT:AD:DP:GD:GL:GQ:OG 1|1:0,1:1:.:-2.80,-0.30,-0.00:36.58:./. ... 1|1:0,2:2:.:-5.96,-0.60,-0.00:38.24:./. 1|1:0,7:5:.:-14.73,-1.51,-0.01:46.99:./. 1|1:0,13:9:.:-27.58,-2.71,-0.01:60:./. 1|1:0,8:3:.:-8.83,-0.90,-0.00:40.97:./. 1|1:0,5:4:.:-11.73,-1.21,-0.01:43.98:./. 1|1:0,6:6:.:-18.17,-1.81,-0.01:50:./. 1|1:0,1:1:.:-3.07,-0.30,-0.00:36.20:./. 1|1:0,7:5:.:-15.10,-1.51,-0.00:46.99:./. 1|1:0,4:4:.:-12.56,-4.54,-3.95:37.96:./. 1|1:0,4:3:.:-8.93,-0.91,-0.00:40.97:./.
4980 22 51220848 rs9616981 C A . PASS DP=3053;AF=0.126;CB=UM,BI,BC;EUR_R2=0.607;AFR_... GT:AD:DP:GD:GL:GQ:OG 0|0:1,2:1:.:-0.00,-0.30,-3.66:10.88:./. ... 0|0:14,1:14:.:-0.01,-4.22,-44.95:60:./. 0|0:7,0:7:.:-0.01,-2.11,-20.53:31.80:./. 0|0:5,1:5:.:-0.00,-1.51,-16.56:18.25:./. 0|0:2,0:1:.:-0.00,-0.30,-3.46:12.84:./. 0|0:6,0:5:.:-0.00,-1.51,-16.37:25.21:./. 0|0:17,1:17:.:-0.01,-5.12,-55.22:60:./. 0|0:5,0:4:.:-0.00,-1.21,-11.96:20.55:./. 0|0:2,0:2:.:-0.00,-0.60,-6.56:16.09:./. 0|0:12,0:12:.:-0.00,-3.61,-41.70:46.99:./. 0|0:9,2:9:.:-0.01,-2.71,-30.09:37.96:./.
4981 22 51220938 rs78802599 A G . PASS DP=4787;AF=0.220;CB=BI,BC,NCBI;EUR_R2=0.667;AF... GT:AD:DP:GD:GL:GQ:OG 0|0:1,1:1:.:-0.00,-0.30,-4.07:6.93:./. ... 0|1:5,1:6:.:-3.40,-1.81,-19.62:12.98:./. 0|0:6,1:6:.:-0.00,-1.81,-20.77:21.40:./. 0|0:11,2:11:.:-3.30,-3.32,-37.84:3.99:./. 1|0:4,2:3:.:-3.50,-0.90,-8.03:22.92:./. 0|0:13,0:13:.:-0.01,-3.92,-48.33:60:./. 0|0:13,3:13:.:-3.17,-3.92,-44.70:11.14:./. 0|0:3,0:3:.:-0.00,-0.90,-11.18:12.39:./. 0|0:2,0:2:.:-0.00,-0.60,-7.53:9.72:./. 0|0:9,0:9:.:-0.00,-2.71,-35.56:30.41:./. 0|0:6,1:6:.:-0.00,-1.81,-24.48:21.42:./.
4982 22 51221731 rs115055839 T C . PASS DP=2518;AF=0.059;CB=UM,BI,BC,NCBI;EUR_R2=0.807... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:10.76:./. ... 0|0:4,0:4:.:-0.01,-1.21,-14.17:31.74:./. 0|0:8,0:8:.:-0.01,-2.41,-27.82:45.23:./. 0|1:7,11:17:.:-32.13,-5.12,-26.47:60:./. 0|0:2,0:1:.:-0.00,-0.30,-3.17:19.80:./. 0|0:7,0:7:.:-0.00,-2.11,-26.33:42.22:./. 0|0:5,0:5:.:-0.00,-1.51,-18.14:36.78:./. 0|0:9,0:9:.:-0.01,-2.72,-31.78:45.23:./. 0|0:7,0:5:.:-0.01,-1.51,-16.91:34.32:./. 0|0:12,0:12:.:-0.01,-3.62,-44.65:60:./. 0|0:10,0:9:.:-0.01,-2.72,-32.28:50:./.
4983 22 51222052 rs2879915 G A . PASS DP=979;AF=0.190;CB=UM,BI,BC,NCBI;EUR_R2=0.314;... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:3.02:./. ... 1|1:2,4:3:.:-9.18,-0.91,-0.01:4:./. 0|0:3,0:1:.:-0.00,-0.30,-3.07:5.90:./. 0|1:2,2:2:.:-3.76,-0.60,-2.70:26.09:./. 1|0:2,5:2:.:-7.52,-0.60,-0.00:3.70:1/1 0|0:2,0:2:.:-0.00,-0.60,-6.46:8.15:./. 1|1:0,6:4:.:-14.32,-1.21,-0.00:6.43:./. 0|0:.:.:.:.,.,.:3.07:./. 0|1:5,1:5:.:-3.36,-1.51,-12.50:17.28:./. 1|0:0,1:1:.:-3.86,-0.30,-0.00:5.98:1/1 0|0:10,0:8:.:-0.01,-2.41,-24.49:25.70:./.
4984 22 51222100 rs114553188 G T . PASS DP=840;AF=0.082;CB=UM,BI;EUR_R2=0.393;AFR_R2=0... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:9.42:./. ... 0|0:.:.:.:.,.,.:9.82:./. 0|0:4,0:3:.:-0.00,-0.90,-9.57:18.88:./. 0|0:5,0:4:.:-0.01,-1.21,-11.76:21.34:./. 0|0:4,0:1:.:-0.00,-0.30,-2.70:13.09:./. 0|0:2,0:2:.:-0.00,-0.60,-7.11:15.88:./. 0|0:3,0:3:.:-0.00,-0.90,-8.86:18.88:./. 1|0:.:.:.:.,.,.:1.94:./. 0|0:3,1:2:.:-0.00,-0.60,-7.21:15.75:./. 0|0:2,0:2:.:-0.00,-0.60,-6.06:15.99:./. 0|0:5,0:4:.:-0.01,-1.21,-12.22:21.89:./.
4985 22 51223848 rs2519470 C G . PASS DP=2632;AF=0.020;CB=BI,BC;AFR_R2=0.782 GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:1:.:-0.00,-0.30,-4.49:60:./. ... 0|0:6,1:5:.:-0.01,-1.51,-19.73:60:./. 0|0:8,0:7:.:-0.01,-2.11,-27.90:60:./. 0|0:9,0:7:.:-0.01,-2.11,-28.36:60:./. 0|0:6,0:5:.:-0.00,-1.51,-20.18:60:./. 0|0:6,0:5:.:-0.00,-1.51,-20.12:60:./. 0|0:7,0:6:.:-0.00,-1.81,-25.01:60:./. 0|0:7,0:5:.:-0.01,-1.51,-19.17:60:./. 0|0:5,0:5:.:-0.01,-1.51,-18.37:60:./. 0|0:12,0:12:.:-0.01,-3.62,-49.18:60:./. 0|0:3,0:3:.:-0.00,-0.91,-11.78:60:./.
4986 22 51224208 rs116656403 G A . PASS DP=3100;AF=0.024;CB=UM,BI,NCBI;AFR_R2=0.936 GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-7.06:37.45:./. ... 0|0:3,0:3:.:-0.00,-0.90,-9.43:40.46:./. 0|0:9,0:9:.:-0.01,-2.72,-28.91:60:./. 0|0:11,0:11:.:-0.01,-3.32,-36.33:60:./. 0|0:4,0:2:.:-0.00,-0.60,-6.46:37.45:./. 0|0:5,0:4:.:-0.00,-1.21,-13.36:43.98:./. 0|0:14,0:13:.:-0.02,-3.92,-40.44:60:./. 0|0:4,0:2:.:-0.00,-0.60,-6.64:37.45:./. 0|0:1,0:1:.:-0.00,-0.30,-3.40:34.56:./. 0|0:8,0:8:.:-0.00,-2.41,-26.71:60:./. 0|0:6,0:5:.:-0.00,-1.51,-16.03:46.99:./.
4987 22 51224947 rs6009969 A G . PASS DP=3023;AF=0.078;CB=UM,BI,BC;EUR_R2=0.346;AFR_... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:6.17:./. ... 0|0:.:.:.:.,.,.:6.17:./. 0|0:.:.:.:.,.,.:6.17:./. 0|0:2,2:1:.:-0.00,-0.30,-3.16:8.88:./. 0|0:.:.:.:.,.,.:6.17:./. 0|0:.:.:.:.,.,.:6.17:./. 0|0:5,0:1:.:-0.00,-0.30,-4.07:8.88:./. 0|0:4,0:2:.:-0.00,-0.60,-7.33:11.59:./. 0|0:5,0:2:.:-0.00,-0.60,-7.11:11.59:./. 0|0:10,0:5:.:-0.00,-1.51,-19.50:20.41:./. 0|0:.:.:.:.,.,.:6.17:./.
4988 22 51224993 rs2595141 G C . PASS DP=2322;AF=0.020;CB=BI,BC;EUR_R2=0.59;AFR_R2=0... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:20.55:./. ... 0|0:.:.:.:.,.,.:20.55:./. 0|0:6,1:4:.:-0.00,-1.21,-16.22:32.60:./. 0|0:2,1:2:.:-0.00,-0.60,-8.84:26.54:./. 0|0:5,0:3:.:-0.00,-0.90,-13.43:29.51:./. 0|0:4,0:3:.:-0.00,-0.90,-13.43:29.51:./. 0|0:6,0:6:.:-0.00,-1.81,-24.96:38.54:./. 0|0:3,0:3:.:-0.00,-0.90,-12.78:29.51:./. 0|0:.:.:.:.,.,.:20.55:./. 0|0:10,0:8:.:-0.00,-2.41,-35.26:45.23:./. 0|0:2,2:1:.:-0.00,-0.30,-3.85:23.54:./.
4989 22 51227891 rs6010091 G A . PASS DP=1953;AF=0.264;CB=UM,BI,BC;EUR_R2=0.429;AFR_... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-2.90:6.41:./. ... 0|0:.:.:.:.,.,.:2.29:./. 0|0:4,0:2:.:-0.00,-0.60,-5.54:12.11:./. 0|0:.:.:.:.,.,.:1.97:./. 0|0:.:.:.:.,.,.:4.76:./. 1|0:0,1:1:.:-3.66,-0.30,-0.00:9.76:1/1 1|1:0,6:1:.:-3.66,-0.30,-0.00:2.23:./. 1|0:0,3:2:.:-7.61,-0.60,-0.00:5.61:1/1 0|0:2,0:1:.:-0.00,-0.30,-3.30:8.43:./. 0|0:6,3:6:.:-0.01,-1.81,-18.29:18.11:./. 0|0:4,0:2:.:-0.00,-0.60,-6.46:11.17:./.
4990 22 51227937 rs6009970 A C . PASS DP=1066;AF=0.997;CB=UM,BI;EUR_R2=0.21;AFR_R2=0... GT:AD:DP:GD:GL:GQ:OG 1|1:.:.:.:.,.,.:12.86:./. ... 1|1:.:.:.:.,.,.:12.22:./. 1|1:.:.:.:.,.,.:12.97:./. 1|1:.:.:.:.,.,.:13.98:./. 1|1:.:.:.:.,.,.:11.92:./. 1|1:.:.:.:.,.,.:7.93:./. 1|1:.:.:.:.,.,.:12.42:./. 1|1:.:.:.:.,.,.:7.31:./. 1|1:.:.:.:.,.,.:12.38:./. 1|1:.:.:.:.,.,.:13.35:./. 1|1:.:.:.:.,.,.:13.02:./.
4991 22 51229805 rs9616985 T C . PASS DP=3300;AF=0.085;CB=UM,BI,BC;EUR_R2=0.724;AFR_... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-4.17:10.71:./. ... 0|0:6,1:5:.:-0.00,-1.51,-18.51:23.10:./. 0|0:12,1:12:.:-0.01,-3.62,-42.46:45.23:./. 0|1:6,4:6:.:-9.86,-1.81,-11.68:60:./. 0|0:10,0:5:.:-0.00,-1.51,-18.61:23.21:./. 0|0:10,0:8:.:-0.01,-2.41,-28.27:32.01:./. 0|0:9,0:8:.:-0.01,-2.41,-28.88:31.94:./. 0|0:14,0:12:.:-0.01,-3.62,-44.52:45.23:./. 0|0:6,0:5:.:-0.00,-1.51,-18.92:23.31:./. 0|0:8,0:6:.:-0.00,-1.81,-23.46:26.11:./. 0|0:10,0:7:.:-0.00,-2.11,-26.83:29.28:./.
4992 22 51233312 rs62240043 A G . PASS DP=2252;AF=0.078;CB=UM,BI;EUR_R2=0.321;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-7.61:13.84:./. ... 1|1:0,2:1:.:-3.17,-0.30,-0.00:3.28:./. 0|0:2,0:1:.:-0.00,-0.30,-3.36:11.02:./. 0|1:6,2:1:.:-0.00,-0.30,-3.17:2.96:0/0 0|0:1,0:1:.:-0.00,-0.30,-3.07:11.02:./. 0|1:3,4:1:.:-0.00,-0.30,-3.66:2.84:0/0 0|1:2,4:1:.:-2.77,-0.30,-0.00:10.71:1/1 0|0:.:.:.:.,.,.:8.23:./. 0|0:6,0:3:.:-0.00,-0.91,-10.07:16.85:./. 0|0:8,1:1:.:-0.00,-0.30,-3.76:12.02:./. 0|0:5,0:1:.:-0.00,-0.30,-3.86:12.68:./.
4993 22 51233347 rs62240044 T C . PASS DP=2566;AF=0.010;CB=UM,BI;EUR_R2=0.072;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-8.23:10.09:./. ... 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 1|0:5,3:1:.:-3.10,-0.30,-0.00:7.46:1/1 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./. 0|0:.:.:.:.,.,.:4.91:./.
4994 22 51234159 rs8138356 T A . PASS DP=2851;AF=0.021;CB=UM,BI;EUR_R2=0.548;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-7.64:27.72:./. ... 0|0:5,0:2:.:-0.00,-0.60,-7.70:27.72:./. 0|0:5,0:1:.:-0.00,-0.30,-3.40:24.74:./. 0|0:10,0:5:.:-0.00,-1.51,-17.03:36.78:./. 0|0:10,0:4:.:-0.00,-1.21,-13.99:33.77:./. 0|0:10,0:7:.:-0.01,-2.11,-23.25:43.01:./. 0|0:7,0:1:.:-0.00,-0.30,-3.60:24.74:./. 0|0:5,0:1:.:-0.00,-0.30,-3.40:24.74:./. 0|0:7,0:3:.:-0.00,-0.90,-9.84:30.71:./. 0|0:4,0:3:.:-0.00,-0.90,-10.24:30.71:./. 0|0:5,0:1:.:-0.00,-0.30,-3.44:24.74:./.
4995 22 51234199 rs6010092 T C . PASS DP=2463;AF=0.006;CB=UM,BI;EUR_R2=0.648;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:1,0:1:.:-0.00,-0.30,-3.96:25.48:./. ... 0|0:1,0:1:.:-0.00,-0.30,-3.97:25.48:./. 0|0:6,0:6:.:-0.00,-1.81,-21.60:40.46:./. 0|0:11,0:11:.:-0.01,-3.32,-41.93:60:./. 0|0:2,0:2:.:-0.00,-0.60,-7.62:28.48:./. 0|0:11,0:9:.:-0.01,-2.72,-32.46:50:./. 0|0:11,0:10:.:-0.00,-3.01,-36.92:50:./. 0|0:6,0:5:.:-0.01,-1.51,-17.11:37.45:./. 0|0:8,0:7:.:-0.00,-2.11,-25.95:43.98:./. 0|0:10,0:10:.:-0.00,-3.01,-38.82:50:./. 0|0:8,0:8:.:-0.00,-2.41,-29.28:46.99:./.
4996 22 51234632 rs6010093 G C . PASS DP=2243;AF=0.003;CB=UM,BI;AFR_R2=0.658 GT:AD:DP:GD:GL:GQ:OG ./.:.:.:.:.,.,.:.:./. ... ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./. ./.:.:.:.:.,.,.:.:./.
4997 22 51235344 rs114148499 G A . PASS DP=2322;AF=0.030;CB=UM,BI,BC;EUR_R2=0.548;AFR_... GT:AD:DP:GD:GL:GQ:OG 0|0:2,0:2:.:-0.00,-0.60,-7.26:14.83:./. ... 0|0:2,0:2:.:-0.00,-0.60,-7.19:14.62:./. 0|0:4,0:3:.:-0.00,-0.91,-8.43:17.62:./. 0|0:4,0:3:.:-0.00,-0.90,-9.11:17.63:./. 0|0:3,0:1:.:-0.00,-0.30,-3.50:11.85:./. 0|0:3,0:3:.:-0.00,-0.90,-9.03:17.60:./. 0|0:6,0:3:.:-0.00,-0.90,-9.63:17.62:./. 0|0:2,0:2:.:-0.00,-0.60,-7.49:14.71:./. 0|0:.:.:.:.,.,.:9:./. 0|0:.:.:.:.,.,.:9.08:./. 0|0:.:.:.:.,.,.:8.86:./.
4998 22 51237063 rs3896457 T C . PASS DP=2410;AF=0.136;CB=UM,BI;EUR_R2=0.25;AFR_R2=0.46 GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:3.27:./. ... 0|0:.:.:.:.,.,.:3.06:./. 0|0:.:.:.:.,.,.:3.06:./. 0|0:.:.:.:.,.,.:3.06:./. 0|1:2,1:1:.:-3.37,-0.30,-0.00:5.39:1/1 0|0:.:.:.:.,.,.:3.06:./. 0|0:.:.:.:.,.,.:3.06:./. 1|1:1,2:2:.:-5.96,-0.60,-0.00:2.80:./. 0|0:.:.:.:.,.,.:3.06:./. 0|1:4,2:1:.:-3.27,-0.30,-0.00:5.39:1/1 0|0:.:.:.:.,.,.:3.06:./.
4999 22 51238831 rs62240048 C T . PASS DP=2392;AF=0.268;CB=UM,BI;EUR_R2=0.392;AFR_R2=... GT:AD:DP:GD:GL:GQ:OG 0|0:.:.:.:.,.,.:8.45:./. ... 0|0:4,2:4:.:-0.00,-1.21,-12.40:20.10:./. 0|0:.:.:.:.,.,.:8.45:./. 0|0:3,4:2:.:-0.00,-0.60,-6.14:14.13:./. 0|1:0,2:1:.:-3.97,-0.30,-0.00:11.31:1/1 0|0:6,2:4:.:-0.01,-1.21,-11.53:20:./. 0|0:8,1:6:.:-0.01,-1.81,-18.14:25.97:./. 0|0:1,3:1:.:-0.00,-0.30,-2.77:11.29:./. 0|0:2,2:1:.:-0.00,-0.30,-2.97:11.29:./. 0|0:7,0:5:.:-0.01,-1.51,-14.73:22.97:./. 0|0:5,0:3:.:-0.00,-0.91,-8.93:17.14:./.

5000 rows × 638 columns

In [14]:
sample_ids = list(df.columns[9:])
len(sample_ids)
Out[14]:
629

유전자형을 숫자료 표시 (AA:0, AB:1, BB:2)

In [15]:
sample_ids_number = ['{} num'.format(s) for s in sample_ids]

def get_number(ref, alt):
    if ',' in alt or '.' in ref or '.' in alt:
        return np.nan
    return sum(map(int, [ref, alt]))

def call_genotype_number(row):
    return pd.Series(
            [get_number(row[s][0], row[s][2]) for s in sample_ids if s], 
                index=sample_ids_number)

df[sample_ids_number] = df.apply(call_genotype_number, axis=1)
In [21]:
matrix = df[sample_ids_number].T
matrix
Out[21]:
0 1 2 3 4 5 6 7 8 9 ... 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999
HG00098 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00100 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00106 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00112 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00114 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00116 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 2.0 0.0
HG00117 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00118 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00119 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0
HG00120 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00122 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00123 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00124 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00126 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00131 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0
HG00141 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00142 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0
HG00143 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00144 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00145 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00146 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00147 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00148 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00149 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0
HG00150 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0
HG00151 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
HG00152 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00153 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
HG00156 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0
HG00158 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 1.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
NA20786 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
NA20787 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20790 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20792 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20795 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 2.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0
NA20796 num NaN NaN 0.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20797 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20798 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0
NA20799 num NaN NaN 0.0 1.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20800 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0
NA20801 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20802 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20803 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 1.0 1.0 0.0
NA20804 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20805 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20806 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0
NA20807 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 1.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0
NA20808 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20809 num NaN NaN 1.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 1.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0
NA20810 num NaN NaN 2.0 2.0 2.0 0.0 0.0 0.0 NaN 2.0 ... 2.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0
NA20811 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20812 num NaN NaN 0.0 0.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20813 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20814 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0
NA20815 num NaN NaN 0.0 0.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 2.0 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20816 num NaN NaN 1.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 1.0 1.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20818 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 2.0 0.0
NA20819 num NaN NaN 1.0 2.0 1.0 0.0 0.0 1.0 NaN 2.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0
NA20826 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0
NA20828 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 2.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0

629 rows × 5000 columns

샘플 인종 정보

In [16]:
sample_df = pd.read_excel('1KG_sample_info.xlsx')
sample_df
Out[16]:
Sample Family ID Population Population Description Gender Relationship Unexpected Parent/Child Non Paternity Siblings Grandparents Avuncular Half Siblings Unknown Second Order Third Order Other Comments
0 HG00096 HG00096 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 HG00097 HG00097 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 HG00098 HG00098 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 HG00099 HG00099 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 HG00100 HG00100 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
5 HG00101 HG00101 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
6 HG00102 HG00102 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
7 HG00103 HG00103 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
8 HG00104 HG00104 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
9 HG00105 HG00105 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
10 HG00106 HG00106 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
11 HG00107 HG00107 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
12 HG00108 HG00108 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
13 HG00109 HG00109 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
14 HG00110 HG00110 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
15 HG00111 HG00111 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
16 HG00112 HG00112 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
17 HG00113 HG00113 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
18 HG00114 HG00114 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
19 HG00115 HG00115 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
20 HG00116 HG00116 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
21 HG00117 HG00117 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
22 HG00118 HG00118 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
23 HG00119 HG00119 GBR British in England and Scotland male NaN NaN NaN NaN NaN NaN NaN HG00124 NaN NaN
24 HG00120 HG00120 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
25 HG00121 HG00121 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
26 HG00122 HG00122 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
27 HG00123 HG00123 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
28 HG00124 HG00124 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN HG00119 NaN NaN
29 HG00125 HG00125 GBR British in England and Scotland female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3470 NA21109 NA21109 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3471 NA21110 NA21110 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3472 NA21111 NA21111 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3473 NA21112 NA21112 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3474 NA21113 NA21113 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3475 NA21114 NA21114 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NA21134 NaN
3476 NA21115 NA21115 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3477 NA21116 NA21116 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3478 NA21117 NA21117 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3479 NA21118 NA21118 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3480 NA21119 NA21119 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3481 NA21120 NA21120 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3482 NA21121 NA21121 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3483 NA21122 NA21122 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3484 NA21123 NA21123 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3485 NA21124 NA21124 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3486 NA21125 NA21125 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3487 NA21126 NA21126 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3488 NA21127 NA21127 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3489 NA21128 NA21128 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3490 NA21129 NA21129 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3491 NA21130 NA21130 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3492 NA21133 NA21133 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3493 NA21134 NA21134 GIH Gujarati Indian in Houston,TX male NaN NaN NaN NaN NaN NaN NaN NaN NA21114 NaN
3494 NA21135 NA21135 GIH Gujarati Indian in Houston,TX male unrel NaN NaN NaN NaN NaN NaN NaN NaN NaN
3495 NA21137 NA21137 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3496 NA21141 NA21141 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3497 NA21142 NA21142 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3498 NA21143 NA21143 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3499 NA21144 NA21144 GIH Gujarati Indian in Houston,TX female NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

3500 rows × 15 columns

In [29]:
sample_dict = {}
for i, r in sample_df.iterrows():
    sample_dict[r['Sample']] = r['Population']
    
sample_dict
Out[29]:
{'HG03351': 'ESN',
 'HG01677': 'IBS',
 'HG01789': 'GBR',
 'HG03510': 'ESN',
 'HG00641': 'PUR',
 'HG01596': 'KHV',
 'NA11839': 'CEU',
 'HG02786': 'PJL',
 'HG02733': 'PJL',
 'HG02382': 'CDX',
 'HG00105': 'GBR',
 'HG00664': 'CHS',
 'NA20292': 'ASW',
 'NA19154': 'YRI',
 'HG02406': 'CDX',
 'HG03393': 'MSL',
 'HG01531': 'IBS',
 'NA19788': 'MXL',
 'HG03269': 'ESN',
 'HG02451': 'ACB',
 'HG03778': 'ITU',
 'NA12812': 'CEU',
 'NA20753': 'TSI',
 'HG01675': 'IBS',
 'NA20333': 'ASW',
 'HG01858': 'KHV',
 'HG03863': 'ITU',
 'HG04156': 'BEB',
 'NA19086': 'JPT',
 'NA20533': 'TSI',
 'HG01439': 'CLM',
 'HG03095': 'MSL',
 'HG02721': 'GWD',
 'NA20905': 'GIH',
 'NA10856': 'CEU',
 'NA18582': 'CHB',
 'NA20805': 'TSI',
 'NA18592': 'CHB',
 'NA18544': 'CHB',
 'HG00369': 'FIN',
 'HG03490': 'PJL',
 'NA20512': 'TSI',
 'HG00578': 'CHS',
 'HG03995': 'STU',
 'NA19141': 'YRI',
 'HG00584': 'CHS',
 'HG02255': 'ACB',
 'NA21125': 'GIH',
 'NA18951': 'JPT',
 'HG03104': 'ESN',
 'HG03556': 'MSL',
 'HG01891': 'ACB',
 'HG03166': 'ESN',
 'HG03947': 'STU',
 'HG01377': 'CLM',
 'HG00372': 'FIN',
 'HG01840': 'KHV',
 'HG03896': 'STU',
 'NA20355': 'ASW',
 'HG01710': 'IBS',
 'HG02184': 'CDX',
 'HG01174': 'PUR',
 'HG02568': 'GWD',
 'HG02861': 'GWD',
 'HG02231': 'IBS',
 'HG02256': 'ACB',
 'HG02489': 'ACB',
 'HG00101': 'GBR',
 'NA19307': 'LWK',
 'HG02442': 'ACB',
 'NA19775': 'MXL',
 'NA21091': 'GIH',
 'NA19784': 'MXL',
 'HG01777': 'IBS',
 'HG01273': 'CLM',
 'HG03066': 'MSL',
 'HG03788': 'ITU',
 'HG01870': 'KHV',
 'NA19836': 'ASW',
 'NA19789': 'MXL',
 'NA20530': 'TSI',
 'HG03452': 'MSL',
 'HG01167': 'PUR',
 'HG03058': 'MSL',
 'NA12864': 'CEU',
 'HG03701': 'PJL',
 'HG00139': 'GBR',
 'HG02106': 'PEL',
 'HG02004': 'PEL',
 'NA20357': 'ASW',
 'HG01928': 'PEL',
 'NA20876': 'GIH',
 'HG02490': 'PJL',
 'HG00447': 'CHS',
 'HG02455': 'ACB',
 'HG01950': 'PEL',
 'HG01379': 'CLM',
 'HG03928': 'BEB',
 'NA10850': 'CEU',
 'HG04202': 'ITU',
 'NA21144': 'GIH',
 'HG02879': 'GWD',
 'NA20816': 'TSI',
 'HG03857': 'STU',
 'HG00685': 'CHS',
 'HG02612': 'GWD',
 'HG02102': 'PEL',
 'NA12056': 'CEU',
 'HG02924': 'ESN',
 'HG00594': 'CHS',
 'HG03901': 'BEB',
 'HG01369': 'CLM',
 'HG03164': 'ESN',
 'NA20276': 'ASW',
 'NA19384': 'LWK',
 'NA20787': 'TSI',
 'NA19762': 'MXL',
 'NA18623': 'CHB',
 'HG03742': 'ITU',
 'HG01031': 'CDX',
 'HG01280': 'CLM',
 'HG01347': 'CLM',
 'HG03762': 'PJL',
 'NA18933': 'YRI',
 'NA12828': 'CEU',
 'NA18935': 'YRI',
 'HG02577': 'ACB',
 'HG03906': 'BEB',
 'HG01394': 'PUR',
 'NA19024': 'LWK',
 'NA19019': 'LWK',
 'HG03091': 'MSL',
 'HG01028': 'CDX',
 'NA19678': 'MXL',
 'NA19771': 'MXL',
 'HG03972': 'ITU',
 'HG00157': 'GBR',
 'HG00156': 'GBR',
 'HG03593': 'BEB',
 'NA19440': 'LWK',
 'HG04115': 'STU',
 'NA19680': 'MXL',
 'HG01334': 'GBR',
 'HG00614': 'CHS',
 'HG04302': 'GBR',
 'HG02775': 'PJL',
 'NA20364': 'ASW',
 'NA19377': 'LWK',
 'NA19787': 'MXL',
 'NA21105': 'GIH',
 'HG01105': 'PUR',
 'HG03137': 'ESN',
 'HG02501': 'ACB',
 'HG03657': 'PJL',
 'HG01590': 'PJL',
 'HG03679': 'STU',
 'HG00308': 'FIN',
 'HG00600': 'CHS',
 'HG00844': 'CDX',
 'NA19056': 'JPT',
 'NA06986': 'CEU',
 'NA19900': 'ASW',
 'HG03115': 'ESN',
 'NA19077': 'JPT',
 'NA19444': 'LWK',
 'HG01081': 'PUR',
 'HG02030': 'KHV',
 'HG02397': 'CDX',
 'NA20581': 'TSI',
 'HG02132': 'KHV',
 'HG02012': 'ACB',
 'HG03228': 'PJL',
 'NA12892': 'CEU',
 'HG02020': 'KHV',
 'HG00444': 'CHS',
 'HG03940': 'BEB',
 'HG04118': 'ITU',
 'NA20891': 'GIH',
 'HG03840': 'STU',
 'HG03683': 'STU',
 'HG01790': 'GBR',
 'HG01308': 'PUR',
 'HG00310': 'FIN',
 'NA19097': 'YRI',
 'HG03693': 'STU',
 'HG01134': 'CLM',
 'HG00622': 'CHS',
 'HG02477': 'ACB',
 'HG02085': 'KHV',
 'NA12740': 'CEU',
 'HG03026': 'GWD',
 'HG03752': 'STU',
 'HG01630': 'IBS',
 'NA20881': 'GIH',
 'NA18534': 'CHB',
 'NA20882': 'GIH',
 'NA07349': 'CEU',
 'HG03583': 'MSL',
 'NA07014': 'CEU',
 'HG03806': 'BEB',
 'HG01926': 'PEL',
 'HG01890': 'ACB',
 'NA19085': 'JPT',
 'HG03419': 'MSL',
 'HG01029': 'CDX',
 'NA19428': 'LWK',
 'HG02077': 'KHV',
 'NA10864': 'CEU',
 'HG03445': 'MSL',
 'HG03754': 'STU',
 'HG03381': 'MSL',
 'HG02387': 'CDX',
 'HG01981': 'PEL',
 'HG01176': 'PUR',
 'NA20291': 'ASW',
 'HG01150': 'CLM',
 'HG01050': 'PUR',
 'NA19030': 'LWK',
 'HG01783': 'IBS',
 'HG03160': 'ESN',
 'HG02810': 'GWD',
 'HG00407': 'CHS',
 'HG01605': 'IBS',
 'HG02685': 'PJL',
 'HG00306': 'FIN',
 'NA19140': 'YRI',
 'HG04080': 'ITU',
 'NA12717': 'CEU',
 'NA18626': 'CHB',
 'HG01254': 'CLM',
 'HG02139': 'KHV',
 'HG03604': 'BEB',
 'HG03660': 'PJL',
 'HG02358': 'CDX',
 'NA19084': 'JPT',
 'NA18505': 'YRI',
 'HG00867': 'CDX',
 'NA20520': 'TSI',
 'HG03982': 'STU',
 'HG03874': 'ITU',
 'HG02013': 'ACB',
 'HG00187': 'FIN',
 'HG00250': 'GBR',
 'NA19116': 'YRI',
 'HG02505': 'ACB',
 'NA20312': 'ASW',
 'HG03167': 'ESN',
 'NA19792': 'MXL',
 'NA21142': 'GIH',
 'HG00124': 'GBR',
 'NA21143': 'GIH',
 'HG02716': 'GWD',
 'HG00864': 'CDX',
 'HG00100': 'GBR',
 'HG02526': 'KHV',
 'NA19160': 'YRI',
 'NA19662': 'MXL',
 'HG02464': 'GWD',
 'NA19122': 'YRI',
 'HG02722': 'GWD',
 'HG00376': 'FIN',
 'HG03832': 'BEB',
 'HG03473': 'MSL',
 'HG01258': 'CLM',
 'NA20871': 'GIH',
 'HG03733': 'STU',
 'HG04003': 'STU',
 'NA18794': 'CHB',
 'HG03339': 'ESN',
 'HG02597': 'PJL',
 'HG01702': 'IBS',
 'HG00599': 'CHS',
 'HG03944': 'STU',
 'NA20845': 'GIH',
 'HG00596': 'CHS',
 'HG02891': 'GWD',
 'HG00110': 'GBR',
 'HG01454': 'CLM',
 'HG00730': 'CHS',
 'HG02811': 'GWD',
 'NA20785': 'TSI',
 'HG00632': 'CHS',
 'NA20504': 'TSI',
 'HG03571': 'MSL',
 'HG00135': 'GBR',
 'NA20875': 'GIH',
 'HG00245': 'GBR',
 'HG01667': 'IBS',
 'HG04171': 'BEB',
 'HG02318': 'ACB',
 'NA20346': 'ASW',
 'HG00428': 'CHS',
 'HG03721': 'ITU',
 'HG02570': 'GWD',
 'NA20518': 'TSI',
 'NA19079': 'JPT',
 'HG00672': 'CHS',
 'NA12282': 'CEU',
 'HG02323': 'ACB',
 'NA19682': 'MXL',
 'HG03056': 'MSL',
 'HG03361': 'ESN',
 'NA21088': 'GIH',
 'HG02584': 'GWD',
 'NA18967': 'JPT',
 'NA19463': 'LWK',
 'HG02768': 'GWD',
 'HG03654': 'PJL',
 'HG03129': 'ESN',
 'NA18498': 'YRI',
 'NA20525': 'TSI',
 'HG02040': 'KHV',
 'HG02058': 'KHV',
 'NA20587': 'TSI',
 'HG04206': 'ITU',
 'HG03629': 'PJL',
 'HG01532': 'IBS',
 'HG03451': 'MSL',
 'NA06994': 'CEU',
 'HG02623': 'GWD',
 'NA20870': 'GIH',
 'HG02880': 'GWD',
 'NA18628': 'CHB',
 'HG04056': 'ITU',
 'NA18867': 'YRI',
 'HG00103': 'GBR',
 'HG02152': 'CDX',
 'HG04070': 'ITU',
 'HG02799': 'GWD',
 'HG01149': 'CLM',
 'HG03508': 'ESN',
 'HG02660': 'PJL',
 'NA19776': 'MXL',
 'HG03055': 'MSL',
 'HG01998': 'PEL',
 'HG01247': 'PUR',
 'NA20821': 'TSI',
 'HG03804': 'BEB',
 'HG01279': 'CLM',
 'HG03465': 'MSL',
 'HG03474': 'MSL',
 'HG03868': 'ITU',
 'NA19324': 'LWK',
 'HG02156': 'CDX',
 'NA18941': 'JPT',
 'HG03248': 'GWD',
 'HG01433': 'CLM',
 'NA10840': 'CEU',
 'NA20541': 'TSI',
 'HG03116': 'ESN',
 'HG01686': 'IBS',
 'HG02046': 'KHV',
 'HG03303': 'ESN',
 'HG03741': 'STU',
 'HG03297': 'ESN',
 'HG02367': 'CDX',
 'HG03799': 'BEB',
 'HG01357': 'CLM',
 'NA18630': 'CHB',
 'HG04208': 'STU',
 'NA21086': 'GIH',
 'HG03082': 'MSL',
 'HG02462': 'GWD',
 'HG01073': 'PUR',
 'NA19652': 'MXL',
 'HG03985': 'STU',
 'HG01775': 'IBS',
 'HG03691': 'STU',
 'NA18577': 'CHB',
 'NA18881': 'YRI',
 'NA20831': 'TSI',
 'NA19394': 'LWK',
 'HG01880': 'ACB',
 'HG02304': 'PEL',
 'NA20300': 'ASW',
 'HG02379': 'CDX',
 'NA20511': 'TSI',
 'HG00592': 'CHS',
 'HG00188': 'FIN',
 'NA19063': 'JPT',
 'HG03729': 'ITU',
 'NA19132': 'YRI',
 'NA18966': 'JPT',
 'NA20810': 'TSI',
 'HG02312': 'PEL',
 'HG01965': 'PEL',
 'NA19159': 'YRI',
 'NA12815': 'CEU',
 'HG01997': 'PEL',
 'HG01187': 'PUR',
 'NA12154': 'CEU',
 'HG02537': 'ACB',
 'HG03908': 'BEB',
 'NA19834': 'ASW',
 'HG01942': 'PEL',
 'HG03669': 'PJL',
 'NA19683': 'MXL',
 'HG02006': 'PEL',
 'HG02661': 'PJL',
 'NA19055': 'JPT',
 'HG01361': 'CLM',
 'HG03126': 'ESN',
 'HG03802': 'BEB',
 'HG01489': 'CLM',
 'NA18615': 'CHB',
 'NA18944': 'JPT',
 'HG00189': 'FIN',
 'HG00633': 'CHS',
 'HG01106': 'PUR',
 'NA20754': 'TSI',
 'NA19161': 'YRI',
 'HG02725': 'PJL',
 'HG00351': 'FIN',
 'NA20815': 'TSI',
 'NA12058': 'CEU',
 'HG02150': 'PEL',
 'HG00249': 'GBR',
 'HG00465': 'CHS',
 'HG00453': 'CHS',
 'HG00178': 'FIN',
 'HG02075': 'KHV',
 'HG02347': 'PEL',
 'HG03814': 'BEB',
 'NA12336': 'CEU',
 'HG00690': 'CHS',
 'HG02890': 'GWD',
 'HG03743': 'STU',
 'NA18485': 'YRI',
 'HG01968': 'PEL',
 'HG01441': 'CLM',
 'HG00625': 'CHS',
 'HG03793': 'BEB',
 'HG02471': 'ACB',
 'NA07029': 'CEU',
 'HG00619': 'CHS',
 'HG04164': 'BEB',
 'NA18617': 'CHB',
 'HG02947': 'ESN',
 'HG03128': 'ESN',
 'NA20296': 'ASW',
 'HG02821': 'GWD',
 'NA18536': 'CHB',
 'HG00129': 'GBR',
 'HG00321': 'FIN',
 'HG03522': 'ESN',
 'NA19316': 'LWK',
 'HG03808': 'BEB',
 'HG01774': 'IBS',
 'HG03926': 'BEB',
 'HG03376': 'MSL',
 'NA19461': 'LWK',
 'NA20585': 'TSI',
 'HG00141': 'GBR',
 'NA19127': 'YRI',
 'HG02466': 'GWD',
 'HG01367': 'CLM',
 'HG02272': 'PEL',
 'HG02317': 'ACB',
 'HG03492': 'PJL',
 'NA20519': 'TSI',
 'HG01954': 'PEL',
 'HG03700': 'PJL',
 'HG01868': 'KHV',
 'NA19012': 'JPT',
 'NA19149': 'YRI',
 'NA19190': 'YRI',
 'HG01048': 'PUR',
 'HG02385': 'CDX',
 'HG03858': 'STU',
 'NA20786': 'TSI',
 'NA19448': 'LWK',
 'NA18917': 'YRI',
 'HG04194': 'BEB',
 'NA19002': 'JPT',
 'HG03909': 'BEB',
 'HG04191': 'BEB',
 'HG00866': 'CDX',
 'NA19332': 'LWK',
 'HG04036': 'STU',
 'HG02166': 'CDX',
 'HG02851': 'GWD',
 'NA19908': 'ASW',
 'HG03027': 'GWD',
 'NA20529': 'TSI',
 'NA20854': 'GIH',
 'HG00472': 'CHS',
 'NA18486': 'YRI',
 'HG01748': 'IBS',
 'HG04141': 'BEB',
 'HG02134': 'KHV',
 'NA12877': 'CEU',
 'HG02521': 'KHV',
 'HG03062': 'MSL',
 'HG02737': 'PJL',
 'HG03969': 'ITU',
 'HG01346': 'CLM',
 'HG01883': 'ACB',
 'HG02495': 'PJL',
 'HG02727': 'PJL',
 'HG04015': 'ITU',
 'HG01769': 'IBS',
 'HG01305': 'PUR',
 'HG00530': 'CHS',
 'HG00280': 'FIN',
 'NA18538': 'CHB',
 'HG01453': 'CLM',
 'HG01261': 'CLM',
 'NA07345': 'CEU',
 'HG03720': 'ITU',
 'HG01845': 'KHV',
 'HG03022': 'PJL',
 'HG03266': 'ESN',
 'HG01248': 'PUR',
 'HG02798': 'GWD',
 'HG03783': 'ITU',
 'NA18948': 'JPT',
 'NA21129': 'GIH',
 'NA12762': 'CEU',
 'HG01626': 'IBS',
 'NA19027': 'LWK',
 'NA20321': 'ASW',
 'NA20869': 'GIH',
 'HG03785': 'ITU',
 'HG02885': 'GWD',
 'HG00113': 'GBR',
 'HG00607': 'CHS',
 'HG03454': 'MSL',
 'HG03824': 'BEB',
 'NA20535': 'TSI',
 'HG02555': 'ACB',
 'NA19731': 'MXL',
 'HG02770': 'GWD',
 'HG03744': 'STU',
 'HG01323': 'PUR',
 'HG03702': 'PJL',
 'HG04058': 'ITU',
 'NA18878': 'YRI',
 'NA18497': 'YRI',
 'HG03251': 'GWD',
 'HG02130': 'KHV',
 'HG00313': 'FIN',
 'NA18985': 'JPT',
 'HG02780': 'PJL',
 'HG03617': 'BEB',
 'HG00364': 'FIN',
 'NA18621': 'CHB',
 'HG01849': 'KHV',
 'HG01948': 'PEL',
 'HG01960': 'ACB',
 'NA12275': 'CEU',
 'HG02889': 'GWD',
 'HG01253': 'CLM',
 'HG00699': 'CHS',
 'HG01342': 'CLM',
 'NA18970': 'JPT',
 'NA20302': 'ASW',
 'HG00375': 'FIN',
 'HG02410': 'CDX',
 'HG01914': 'ACB',
 'HG03127': 'ESN',
 'HG03134': 'ESN',
 'HG03517': 'ESN',
 'HG02807': 'GWD',
 'HG03789': 'ITU',
 'NA12767': 'CEU',
 'NA10855': 'CEU',
 'NA19663': 'MXL',
 'HG02808': 'GWD',
 'NA18643': 'CHB',
 'HG00315': 'FIN',
 'HG02217': 'IBS',
 'NA20540': 'TSI',
 'HG02419': 'ACB',
 'HG02127': 'KHV',
 'HG00556': 'CHS',
 'HG00182': 'FIN',
 'HG01551': 'CLM',
 'HG00738': 'PUR',
 'HG03760': 'STU',
 'HG03344': 'ESN',
 'HG04002': 'ITU',
 'HG02439': 'ACB',
 'NA20861': 'GIH',
 'HG03124': 'ESN',
 'HG03978': 'ITU',
 'NA18525': 'CHB',
 'HG00422': 'CHS',
 'HG02511': 'ACB',
 'HG02596': 'GWD',
 'HG00362': 'FIN',
 'HG01566': 'PEL',
 'NA19471': 'LWK',
 'HG00450': 'CHS',
 'HG00149': 'GBR',
 'NA20414': 'ASW',
 'HG02595': 'GWD',
 'HG02073': 'KHV',
 'HG02610': 'GWD',
 'NA19430': 'LWK',
 'HG00099': 'GBR',
 'HG00579': 'CHS',
 'HG03844': 'STU',
 'HG01536': 'IBS',
 'HG00104': 'GBR',
 'HG00566': 'CHS',
 'HG03385': 'MSL',
 'NA19254': 'YRI',
 'HG03953': 'STU',
 'HG03919': 'BEB',
 'HG00533': 'CHS',
 'HG02736': 'PJL',
 'NA19153': 'YRI',
 'HG02089': 'PEL',
 'HG03910': 'BEB',
 'HG02356': 'CDX',
 'NA19984': 'ASW',
 'NA20313': 'ASW',
 'HG03949': 'STU',
 'NA12329': 'CEU',
 'NA19214': 'YRI',
 'HG01359': 'CLM',
 'NA19902': 'ASW',
 'NA19671': 'MXL',
 'HG03920': 'BEB',
 'HG03766': 'PJL',
 'HG01241': 'PUR',
 'HG02624': 'GWD',
 'NA10859': 'CEU',
 'NA18960': 'JPT',
 'NA18558': 'CHB',
 'NA19723': 'MXL',
 'HG04076': 'ITU',
 'HG00285': 'FIN',
 'NA19025': 'LWK',
 'HG01063': 'PUR',
 'NA19704': 'ASW',
 'HG01138': 'CLM',
 'NA19755': 'MXL',
 'HG00709': 'CHS',
 'NA19017': 'LWK',
 'HG01122': 'CLM',
 'HG03457': 'MSL',
 'NA07000': 'CEU',
 'HG02800': 'GWD',
 'HG02557': 'ACB',
 'HG01068': 'PUR',
 'HG01850': 'KHV',
 'NA12739': 'CEU',
 'HG03008': 'BEB',
 'HG00611': 'CHS',
 'HG02178': 'CDX',
 'NA18566': 'CHB',
 'HG00623': 'CHS',
 'HG02008': 'PEL',
 'HG01988': 'ACB',
 'NA11832': 'CEU',
 'NA20126': 'ASW',
 'NA19138': 'YRI',
 'NA18565': 'CHB',
 'NA18606': 'CHB',
 'NA20902': 'GIH',
 'HG03021': 'PJL',
 'HG01967': 'PEL',
 'HG03130': 'ESN',
 'NA20828': 'TSI',
 'HG01502': 'IBS',
 'HG00111': 'GBR',
 'HG02698': 'PJL',
 'NA19714': 'ASW',
 'NA19659': 'MXL',
 'HG00638': 'PUR',
 'HG01284': 'CLM',
 'HG02731': 'PJL',
 'HG02757': 'GWD',
 'HG01969': 'PEL',
 'HG01508': 'IBS',
 'HG02971': 'ESN',
 'NA19746': 'MXL',
 'NA19914': 'ASW',
 'NA19338': 'LWK',
 'HG03237': 'PJL',
 'HG00409': 'CHS',
 'HG01985': 'ACB',
 'HG01477': 'CLM',
 'HG03625': 'PJL',
 'HG03870': 'ITU',
 'NA19438': 'LWK',
 'HG01302': 'PUR',
 'HG02285': 'PEL',
 'HG01108': 'PUR',
 'HG00978': 'CDX',
 'HG01274': 'CLM',
 'HG02613': 'GWD',
 'HG03225': 'MSL',
 'HG03135': 'ESN',
 'NA19319': 'LWK',
 'NA12753': 'CEU',
 'HG00589': 'CHS',
 'HG02964': 'ESN',
 'NA20829': 'TSI',
 'NA21111': 'GIH',
 'HG02250': 'CDX',
 'HG02337': 'ACB',
 'NA11917': 'CEU',
 'NA19390': 'LWK',
 'HG00357': 'FIN',
 'HG02237': 'IBS',
 'HG03194': 'ESN',
 'NA19648': 'MXL',
 'HG01761': 'IBS',
 'HG03831': 'BEB',
 'HG01757': 'IBS',
 'HG00628': 'CHS',
 'HG00634': 'CHS',
 'NA11840': 'CEU',
 'HG03247': 'GWD',
 'HG03539': 'GWD',
 'HG04181': 'BEB',
 'NA18911': 'YRI',
 'HG01936': 'PEL',
 'NA19328': 'LWK',
 'HG02652': 'PJL',
 'NA18591': 'CHB',
 'HG00463': 'CHS',
 'HG01669': 'IBS',
 'HG01943': 'PEL',
 'HG03578': 'MSL',
 'HG02724': 'PJL',
 'HG03455': 'MSL',
 'NA18860': 'YRI',
 'HG03391': 'MSL',
 'NA19173': 'YRI',
 'HG02146': 'PEL',
 'HG00356': 'FIN',
 'HG00142': 'GBR',
 'HG03767': 'PJL',
 'HG02267': 'PEL',
 'HG03063': 'MSL',
 'NA06991': 'CEU',
 'NA12865': 'CEU',
 'NA19247': 'YRI',
 'NA20802': 'TSI',
 'HG01620': 'IBS',
 'HG03293': 'ESN',
 'HG01183': 'PUR',
 'NA20299': 'ASW',
 'HG03446': 'MSL',
 'HG00277': 'FIN',
 'HG01175': 'PUR',
 'HG00320': 'FIN',
 'HG03717': 'ITU',
 'NA18564': 'CHB',
 'HG01627': 'IBS',
 'HG03488': 'PJL',
 'HG01951': 'PEL',
 'HG00154': 'GBR',
 'HG02330': 'ACB',
 'HG02965': 'ESN',
 'HG03486': 'MSL',
 'NA19090': 'JPT',
 'HG03963': 'ITU',
 'NA19238': 'YRI',
 'HG00983': 'CDX',
 'HG02980': 'ESN',
 'HG01136': 'CLM',
 'HG02982': 'GWD',
 'NA19093': 'YRI',
 'HG02476': 'ACB',
 'NA12249': 'CEU',
 'HG01706': 'IBS',
 'NA19778': 'MXL',
 'NA20910': 'GIH',
 'NA12043': 'CEU',
 'NA18542': 'CHB',
 'HG00328': 'FIN',
 'NA18934': 'YRI',
 'NA18923': 'YRI',
 'NA12750': 'CEU',
 'HG00116': 'GBR',
 'HG03879': 'ITU',
 'HG02148': 'PEL',
 'NA12044': 'CEU',
 'NA19764': 'MXL',
 'HG03354': 'ESN',
 'HG01126': 'CLM',
 'HG04114': 'STU',
 'HG01934': 'PEL',
 'HG00502': 'CHS',
 'HG01841': 'KHV',
 'HG00403': 'CHS',
 'HG00532': 'CHS',
 'NA18979': 'JPT',
 'HG01052': 'PUR',
 'NA19828': 'ASW',
 'HG02380': 'CDX',
 'HG03732': 'ITU',
 'HG02547': 'ACB',
 'NA20863': 'GIH',
 'HG03547': 'MSL',
 'NA19347': 'LWK',
 'HG03052': 'MSL',
 'HG03703': 'PJL',
 'NA20314': 'ASW',
 'NA21118': 'GIH',
 'HG04228': 'STU',
 'HG02108': 'ACB',
 'NA11891': 'CEU',
 'NA19749': 'MXL',
 'NA18521': 'YRI',
 'NA19685': 'MXL',
 'NA19171': 'YRI',
 'HG00126': 'GBR',
 'NA20277': 'ASW',
 'HG02233': 'IBS',
 'NA18865': 'YRI',
 'NA19664': 'MXL',
 'HG03017': 'PJL',
 'HG00353': 'FIN',
 'NA10854': 'CEU',
 'NA18504': 'YRI',
 'HG03816': 'BEB',
 'NA21087': 'GIH',
 'NA21099': 'GIH',
 'HG01984': 'PEL',
 'HG03121': 'ESN',
 'NA18632': 'CHB',
 'HG03240': 'GWD',
 'HG00476': 'CHS',
 'HG02360': 'CDX',
 'HG03491': 'PJL',
 'HG01946': 'PEL',
 'HG01987': 'ACB',
 'HG03639': 'PJL',
 'NA19360': 'LWK',
 'HG00381': 'FIN',
 'NA20798': 'TSI',
 'HG01556': 'CLM',
 'HG02377': 'CDX',
 'HG02131': 'KHV',
 'NA18852': 'YRI',
 'HG03513': 'ESN',
 'HG04217': 'ITU',
 'HG03054': 'MSL',
 'NA19235': 'YRI',
 'HG01684': 'IBS',
 'HG02262': 'PEL',
 'HG00612': 'CHS',
 'NA21137': 'GIH',
 'NA18978': 'JPT',
 'NA20362': 'ASW',
 'HG02224': 'IBS',
 'HG03161': 'ESN',
 'HG01978': 'PEL',
 'HG04024': 'ITU',
 'HG00627': 'CHS',
 'HG03079': 'MSL',
 'NA19730': 'MXL',
 'HG02883': 'GWD',
 'HG02541': 'ACB',
 'HG00543': 'CHS',
 'NA19248': 'YRI',
 'NA18795': 'CHB',
 'HG01679': 'IBS',
 'HG02259': 'PEL',
 'NA20297': 'ASW',
 'HG02479': 'ACB',
 'HG02692': 'PJL',
 'HG02491': 'PJL',
 'NA18748': 'CHB',
 'NA20806': 'TSI',
 'HG00475': 'CHS',
 'NA21100': 'GIH',
 'NA19436': 'LWK',
 'NA10845': 'CEU',
 'NA20772': 'TSI',
 'HG02079': 'KHV',
 'HG00330': 'FIN',
 'HG02398': 'CDX',
 'NA18983': 'JPT',
 'HG01631': 'IBS',
 'HG01510': 'IBS',
 'HG03199': 'ESN',
 'HG01681': 'IBS',
 'HG01550': 'CLM',
 'NA20895': 'GIH',
 'NA12827': 'CEU',
 'HG02605': 'PJL',
 'HG03196': 'ESN',
 'NA12751': 'CEU',
 'HG02252': 'PEL',
 'HG03842': 'STU',
 'HG04200': 'ITU',
 'HG03846': 'STU',
 'NA19774': 'MXL',
 'HG02715': 'GWD',
 'NA19705': 'ASW',
 'HG03797': 'BEB',
 'HG03618': 'PJL',
 'NA20849': 'GIH',
 'HG01878': 'KHV',
 'HG03433': 'MSL',
 'NA19797': 'MXL',
 'HG00480': 'CHS',
 'HG02276': 'PEL',
 'HG01971': 'PEL',
 'HG03635': 'PJL',
 'NA18954': 'JPT',
 'HG03651': 'PJL',
 'NA10851': 'CEU',
 'NA18519': 'YRI',
 'NA19759': 'MXL',
 'NA20515': 'TSI',
 'HG03746': 'STU',
 'NA10860': 'CEU',
 'HG01809': 'CDX',
 'HG00251': 'GBR',
 'HG02567': 'GWD',
 'HG01088': 'PUR',
 'HG01484': 'CLM',
 'NA19658': 'MXL',
 'NA19174': 'YRI',
 'HG04235': 'ITU',
 'HG01694': 'IBS',
 'HG03761': 'PJL',
 'HG04155': 'BEB',
 'HG01982': 'PEL',
 'HG00642': 'PUR',
 'HG03040': 'GWD',
 'NA12155': 'CEU',
 'HG01920': 'PEL',
 'HG02219': 'IBS',
 'NA11933': 'CEU',
 'HG03371': 'ESN',
 'HG03472': 'MSL',
 'NA20807': 'TSI',
 'NA21128': 'GIH',
 'HG01459': 'CLM',
 'NA20507': 'TSI',
 'HG03549': 'MSL',
 'HG02644': 'GWD',
 'NA18986': 'JPT',
 'NA18532': 'CHB',
 'NA18642': 'CHB',
 'NA19204': 'YRI',
 'HG00635': 'CHS',
 'HG02025': 'KHV',
 'NA18507': 'YRI',
 'HG00267': 'FIN',
 'HG01415': 'PUR',
 'NA21116': 'GIH',
 'HG02761': 'GWD',
 'HG01892': 'PEL',
 'NA18561': 'CHB',
 'HG01843': 'KHV',
 'HG01526': 'IBS',
 'HG02869': 'GWD',
 'HG00122': 'GBR',
 'HG02820': 'GWD',
 'HG03133': 'ESN',
 'HG02179': 'CDX',
 'HG02260': 'PEL',
 'HG01457': 'CLM',
 'HG00326': 'FIN',
 'NA12829': 'CEU',
 'HG01704': 'IBS',
 'NA19003': 'JPT',
 'NA18570': 'CHB',
 'HG03709': 'PJL',
 'HG01871': 'KHV',
 'HG03939': 'BEB',
 'HG03594': 'BEB',
 'HG02391': 'CDX',
 'HG02429': 'ACB',
 'HG00629': 'CHS',
 'NA18537': 'CHB',
 'HG00238': 'GBR',
 'HG01705': 'IBS',
 'NA18740': 'CHB',
 'HG01799': 'CDX',
 'HG01249': 'PUR',
 'HG01490': 'CLM',
 'NA19004': 'JPT',
 'HG02140': 'KHV',
 'HG03584': 'MSL',
 'HG03714': 'ITU',
 'NA07346': 'CEU',
 'NA19373': 'LWK',
 'HG00663': 'CHS',
 'NA20864': 'GIH',
 'NA20287': 'ASW',
 'HG04038': 'STU',
 'NA21102': 'GIH',
 'HG03200': 'ESN',
 'HG00130': 'GBR',
 'HG02603': 'PJL',
 'HG00281': 'FIN',
 'NA18624': 'CHB',
 'HG02525': 'KHV',
 'HG00694': 'CHS',
 'HG00636': 'CHS',
 ...}
In [82]:
a = {'Korea': 90, 'English': 100}
sample_dict.get('HG02276--', np.nan)
Out[82]:
nan

유전자형 정보 행렬 만들기

In [32]:
matrix['Population'] = matrix.apply(
    lambda r: sample_dict.get(r.name.split()[0], np.nan), axis=1)
matrix
Out[32]:
0 1 2 3 4 5 6 7 8 9 ... 4991 4992 4993 4994 4995 4996 4997 4998 4999 Population
HG00098 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00100 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00106 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00112 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00114 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00116 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 1.0 2.0 0.0 GBR
HG00117 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00118 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00119 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00120 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00122 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00123 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00124 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00126 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00131 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00141 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00142 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00143 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00144 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00145 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00146 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00147 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00148 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00149 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00150 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00151 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00152 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00153 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00156 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00158 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
NA20786 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20787 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20790 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20792 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20795 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20796 num NaN NaN 0.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20797 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20798 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20799 num NaN NaN 0.0 1.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20800 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20801 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20802 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20803 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 1.0 0.0 TSI
NA20804 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20805 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20806 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 TSI
NA20807 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20808 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20809 num NaN NaN 1.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20810 num NaN NaN 2.0 2.0 2.0 0.0 0.0 0.0 NaN 2.0 ... 1.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20811 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20812 num NaN NaN 0.0 0.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20813 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20814 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 TSI
NA20815 num NaN NaN 0.0 0.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20816 num NaN NaN 1.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 1.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20818 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 2.0 0.0 TSI
NA20819 num NaN NaN 1.0 2.0 1.0 0.0 0.0 1.0 NaN 2.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20826 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20828 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI

629 rows × 5001 columns

In [45]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from cycler import cycler

def get_pca_df(x):
    x_std = StandardScaler().fit_transform(x)
    pca = PCA(n_components=2)
    y = pca.fit_transform(x_std)
    pca_df = pd.DataFrame(y)
    pca_df.index = x.index
    pca_df.columns = ['C1', 'C2']
    pca_df = pd.concat([x, pca_df], axis=1)
    return pca_df
    
def draw_groups(groups, loc=None):
    fig, ax = plt.subplots(figsize=(12,10))
    colors = pd.tools.plotting._get_standard_colors(len(groups), color_type='random')
    ax.set_prop_cycle(cycler('color', colors))
    ax.margins(0.15)

    for name, group in groups:
        ax.plot(group['C1'], group['C2'], marker='o', linestyle='', ms=8, label=name)

    ax.legend(numpoints=1, loc=loc)
In [46]:
matrix.ix[:,:-1].dropna(1)
Out[46]:
2 3 5 7 9 12 13 14 16 17 ... 4988 4989 4990 4991 4992 4993 4995 4997 4998 4999
HG00098 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00100 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00106 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
HG00112 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00114 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
HG00116 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 1.0 1.0 0.0 0.0 1.0 2.0 0.0
HG00117 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00118 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 1.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00119 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
HG00120 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 2.0 1.0 2.0 0.0 0.0 0.0 1.0 0.0
HG00122 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00123 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00124 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0
HG00126 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00131 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
HG00141 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0
HG00142 num 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 ... 1.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0
HG00143 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00144 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00145 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00146 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 1.0 2.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0
HG00147 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00148 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
HG00149 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0
HG00150 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0
HG00151 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
HG00152 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
HG00153 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
HG00156 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
HG00158 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
NA20786 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
NA20787 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0
NA20790 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
NA20792 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20795 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 2.0 1.0 0.0 0.0 1.0 0.0
NA20796 num 0.0 2.0 0.0 0.0 1.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20797 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20798 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
NA20799 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20800 num 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0
NA20801 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20802 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20803 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 2.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0
NA20804 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20805 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20806 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
NA20807 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 2.0 ... 0.0 2.0 2.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0
NA20808 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20809 num 1.0 2.0 0.0 0.0 1.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 0.0 2.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0
NA20810 num 2.0 2.0 0.0 0.0 2.0 0.0 0.0 2.0 2.0 2.0 ... 0.0 0.0 2.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0
NA20811 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 2.0 ... 0.0 0.0 2.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0
NA20812 num 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20813 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 0.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0
NA20814 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0
NA20815 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 1.0 2.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0
NA20816 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 2.0 ... 0.0 2.0 2.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0
NA20818 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 1.0 2.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0
NA20819 num 1.0 2.0 0.0 1.0 2.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
NA20826 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0
NA20828 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

629 rows × 2827 columns

In [47]:
pca_df = get_pca_df(matrix.ix[:,:-1].dropna(1))
pca_df['Population'] = matrix['Population']
In [48]:
draw_groups(pca_df.groupby('Population'), 'upper right')
In [49]:
matrix
Out[49]:
0 1 2 3 4 5 6 7 8 9 ... 4991 4992 4993 4994 4995 4996 4997 4998 4999 Population
HG00098 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00100 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00106 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00112 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00114 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00116 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 1.0 2.0 0.0 GBR
HG00117 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00118 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00119 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00120 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00122 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00123 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00124 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00126 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00131 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00141 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00142 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00143 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00144 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00145 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00146 num NaN NaN 0.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00147 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00148 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00149 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00150 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 1.0 GBR
HG00151 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 GBR
HG00152 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00153 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
HG00156 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 GBR
HG00158 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 GBR
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
NA20786 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20787 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20790 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20792 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20795 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20796 num NaN NaN 0.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20797 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20798 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20799 num NaN NaN 0.0 1.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20800 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20801 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20802 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20803 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 1.0 1.0 0.0 TSI
NA20804 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20805 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20806 num NaN NaN 0.0 1.0 0.0 0.0 0.0 1.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 TSI
NA20807 num NaN NaN 1.0 1.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 1.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20808 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20809 num NaN NaN 1.0 2.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20810 num NaN NaN 2.0 2.0 2.0 0.0 0.0 0.0 NaN 2.0 ... 1.0 0.0 0.0 0.0 0.0 NaN 1.0 0.0 0.0 TSI
NA20811 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 2.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20812 num NaN NaN 0.0 0.0 1.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20813 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 1.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20814 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 1.0 TSI
NA20815 num NaN NaN 0.0 0.0 0.0 0.0 1.0 0.0 NaN 0.0 ... 0.0 1.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20816 num NaN NaN 1.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 1.0 1.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20818 num NaN NaN 0.0 0.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 2.0 0.0 TSI
NA20819 num NaN NaN 1.0 2.0 1.0 0.0 0.0 1.0 NaN 2.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI
NA20826 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 1.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 1.0 0.0 TSI
NA20828 num NaN NaN 0.0 1.0 0.0 0.0 0.0 0.0 NaN 0.0 ... 0.0 0.0 0.0 0.0 0.0 NaN 0.0 0.0 0.0 TSI

629 rows × 5001 columns

In [54]:
matrix2 = matrix.dropna(1)

CEU와 CHB의 변이 차이

In [55]:
ceu = matrix2[matrix2['Population'] == 'CEU']
chb = matrix2[matrix2['Population'] == 'CHB']
In [70]:
ceu_ids = list(ceu.index)
chb_ids = list(chb.index)
matrix2
Out[70]:
2 3 5 7 9 12 13 14 16 17 ... 4989 4990 4991 4992 4993 4995 4997 4998 4999 Population
HG00098 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00100 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00106 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00112 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00114 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00116 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 1.0 1.0 0.0 0.0 1.0 2.0 0.0 GBR
HG00117 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00118 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 1.0 2.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00119 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 GBR
HG00120 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 2.0 2.0 1.0 2.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00122 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00123 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00124 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00126 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00131 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 GBR
HG00141 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00142 num 0.0 1.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 GBR
HG00143 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00144 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00145 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00146 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 1.0 2.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00147 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00148 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00149 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 GBR
HG00150 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 GBR
HG00151 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 GBR
HG00152 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00153 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 GBR
HG00156 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 GBR
HG00158 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 1.0 0.0 0.0 0.0 1.0 0.0 GBR
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
NA20786 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 TSI
NA20787 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20790 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20792 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20795 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 2.0 1.0 0.0 0.0 1.0 0.0 TSI
NA20796 num 0.0 2.0 0.0 0.0 1.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20797 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20798 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 TSI
NA20799 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20800 num 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 TSI
NA20801 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20802 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20803 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 2.0 2.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 TSI
NA20804 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20805 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20806 num 0.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 1.0 1.0 ... 2.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 TSI
NA20807 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 2.0 ... 2.0 2.0 0.0 1.0 1.0 0.0 0.0 1.0 0.0 TSI
NA20808 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20809 num 1.0 2.0 0.0 0.0 1.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 2.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 TSI
NA20810 num 2.0 2.0 0.0 0.0 2.0 0.0 0.0 2.0 2.0 2.0 ... 0.0 2.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 TSI
NA20811 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 2.0 ... 0.0 2.0 0.0 2.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20812 num 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20813 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 0.0 2.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20814 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 TSI
NA20815 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 2.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20816 num 1.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 2.0 ... 2.0 2.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 TSI
NA20818 num 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 ... 1.0 2.0 0.0 0.0 0.0 0.0 0.0 2.0 0.0 TSI
NA20819 num 1.0 2.0 0.0 1.0 2.0 0.0 1.0 2.0 2.0 2.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI
NA20826 num 0.0 1.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 TSI
NA20828 num 0.0 1.0 0.0 0.0 0.0 0.0 1.0 1.0 1.0 1.0 ... 0.0 2.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 TSI

629 rows × 2828 columns

In [78]:
def get_ttest_pvalue(s):
    try:
        x1 = list(s[ceu_ids])
        x2 = list(s[chb_ids])
        if not x1 or not x2:
            return np.nan
        return stats.ttest_ind(x1, x2)[1]
    except:
        return np.nan

matrix2.apply(get_ttest_pvalue)
/usr/local/lib/python3.5/site-packages/scipy/stats/stats.py:249: RuntimeWarning: The input array could not be properly checked for nan values. nan values will be ignored.
  "values. nan values will be ignored.", RuntimeWarning)
Out[78]:
2             3.001240e-01
3             5.077094e-10
5             4.455631e-02
7             4.395198e-01
9             8.239539e-02
12            1.957032e-02
13            9.170527e-02
14            1.132260e-02
16            1.626908e-02
17            7.837922e-02
20            2.717679e-03
22            1.749647e-03
23            4.161932e-03
26            7.064017e-03
27            8.737629e-03
28            9.362104e-03
29            1.749647e-03
30            1.749647e-03
32            2.101233e-01
34            2.717679e-03
40            1.110325e-03
42            2.717679e-03
49            4.072045e-01
55            3.639410e-03
59            8.399446e-03
61            1.221159e-01
62            4.455631e-02
64            4.404638e-01
66            1.208496e-01
67                     NaN
                  ...     
4966          5.805121e-04
4967          4.568664e-03
4968          5.023331e-05
4969          7.493982e-01
4970          6.569898e-02
4971          1.413589e-02
4972          9.131017e-02
4973          3.059390e-01
4975          3.121692e-03
4977          1.028203e-01
4978          9.459553e-03
4979                   NaN
4980          1.952565e-01
4981          3.024057e-01
4982          8.373549e-03
4983          3.364626e-01
4985          3.869401e-03
4986          2.512300e-01
4987          2.254631e-02
4988          1.957032e-02
4989          1.893101e-01
4990          2.185872e-01
4991          2.113554e-01
4992          4.729785e-02
4993          1.028203e-01
4995                   NaN
4997          8.821178e-02
4998          4.157188e-01
4999          3.026712e-02
Population             NaN
dtype: float64
In [ ]: