%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import scipy
Defining the column names:
#names = ["Individual"] + ["PC" + str(i) for i in range(1, 5)] + ["Population"]
names = ["Individual", "PC1", "PC2", "PC3", "PC4", "Population"]
Loading the eigenVec file from the pca run
pcaDat = pd.read_csv("/home/training/share/pca_results/pca.WestEurasia.evec",
delim_whitespace=True, skiprows=1, names=names)
Looking at the data, we find that it is a matrix, with each individual on one row, and the columns denoting the first 10 principal components. The last column contains the population for each individual:
pcaDat
Individual | PC1 | PC2 | PC3 | PC4 | Population | |
---|---|---|---|---|---|---|
0 | Yuk_009 | 0.0123 | 0.1252 | 0.1147 | 0.0567 | Yukagir |
1 | Yuk_025 | 0.0120 | 0.1258 | 0.1168 | 0.0576 | Yukagir |
2 | Yuk_022 | 0.0136 | 0.1303 | 0.1186 | 0.0564 | Yukagir |
3 | Yuk_020 | 0.0170 | 0.1278 | 0.1176 | 0.0584 | Yukagir |
4 | MC_40 | 0.0183 | 0.1226 | 0.1123 | 0.0537 | Chukchi |
5 | Yuk_024 | 0.0144 | 0.1271 | 0.1124 | 0.0584 | Yukagir |
6 | Yuk_023 | 0.0124 | 0.1348 | 0.1238 | 0.0642 | Yukagir |
7 | MC_16 | 0.0144 | 0.1266 | 0.1169 | 0.0541 | Chukchi |
8 | MC_15 | 0.0146 | 0.1250 | 0.1119 | 0.0559 | Chukchi |
9 | MC_18 | 0.0175 | 0.1238 | 0.1167 | 0.0523 | Chukchi |
10 | Yuk_004 | 0.0110 | 0.1273 | 0.1117 | 0.0573 | Yukagir |
11 | MC_08 | 0.0187 | 0.1253 | 0.1185 | 0.0564 | Chukchi |
12 | Nov_005 | 0.0152 | 0.1349 | 0.1285 | 0.0618 | Nganasan |
13 | MC_25 | 0.0182 | 0.1258 | 0.1196 | 0.0532 | Chukchi |
14 | Yuk_019 | 0.0161 | 0.1327 | 0.1229 | 0.0617 | Yukagir |
15 | Yuk_011 | 0.0152 | 0.1217 | 0.1148 | 0.0569 | Yukagir |
16 | Sesk_47 | 0.0167 | 0.1241 | 0.1177 | 0.0549 | Chukchi1 |
17 | MC_17 | 0.0180 | 0.1268 | 0.1147 | 0.0544 | Chukchi |
18 | Yuk_021 | 0.0141 | 0.1329 | 0.1210 | 0.0653 | Yukagir |
19 | MC_06 | 0.0159 | 0.1264 | 0.1135 | 0.0557 | Chukchi |
20 | MC_38 | 0.0178 | 0.1240 | 0.1143 | 0.0534 | Chukchi |
21 | MC_14 | 0.0165 | 0.1238 | 0.1114 | 0.0524 | Chukchi |
22 | Ul5 | 0.0070 | 0.1306 | 0.1144 | 0.0540 | Ulchi |
23 | Ul31 | 0.0056 | 0.1289 | 0.1182 | 0.0550 | Ulchi |
24 | Ul65 | 0.0051 | 0.1331 | 0.1117 | 0.0599 | Ulchi |
25 | Tuba12 | 0.0172 | 0.0906 | 0.0790 | 0.0362 | Tubalar |
26 | Tuba20 | 0.0129 | 0.0894 | 0.0767 | 0.0308 | Tubalar |
27 | Nel19 | 0.0273 | 0.0605 | 0.0608 | 0.0333 | Yukagir |
28 | Nlk16 | 0.0217 | 0.0744 | 0.0753 | 0.0360 | Even |
29 | Kor66 | 0.0148 | 0.1259 | 0.1157 | 0.0531 | Koryak |
... | ... | ... | ... | ... | ... | ... |
1259 | I0429 | 0.0413 | 0.0447 | 0.0440 | 0.0098 | Yamnaya_Samara |
1260 | I0438 | 0.0384 | 0.0497 | 0.0399 | 0.0020 | Yamnaya_Samara |
1261 | I0585 | 0.0770 | -0.0424 | 0.0372 | 0.0355 | WHG |
1262 | I0797 | -0.0101 | -0.0452 | -0.0342 | -0.0124 | LBK_EN |
1263 | I0795 | -0.0057 | -0.0495 | -0.0429 | 0.0098 | LBK_EN |
1264 | I0022 | -0.0133 | -0.0433 | -0.0356 | -0.0089 | LBK_EN |
1265 | I0026 | -0.0142 | -0.0438 | -0.0430 | -0.0027 | LBK_EN |
1266 | I1507 | 0.0866 | -0.0455 | 0.0393 | 0.0311 | WHG |
1267 | I0025 | -0.0103 | -0.0449 | -0.0404 | -0.0023 | LBK_EN |
1268 | I0443 | 0.0350 | 0.0401 | 0.0412 | 0.0028 | Yamnaya_Samara |
1269 | I0054 | -0.0054 | -0.0413 | -0.0410 | -0.0124 | LBK_EN |
1270 | I0046 | -0.0066 | -0.0446 | -0.0386 | -0.0092 | LBK_EN |
1271 | I0048 | -0.0128 | -0.0367 | -0.0388 | -0.0129 | LBK_EN |
1272 | I0056 | -0.0067 | -0.0472 | -0.0388 | -0.0054 | LBK_EN |
1273 | I0057 | -0.0113 | -0.0442 | -0.0357 | -0.0008 | LBK_EN |
1274 | I0100 | -0.0063 | -0.0455 | -0.0410 | -0.0051 | LBK_EN |
1275 | I0659 | -0.0084 | -0.0437 | -0.0431 | -0.0099 | LBK_EN |
1276 | I0821 | -0.0071 | -0.0428 | -0.0380 | -0.0103 | LBK_EN |
1277 | I1550 | -0.0107 | -0.0386 | -0.0402 | -0.0039 | LBK_EN |
1278 | BOO001 | 0.0399 | 0.0760 | 0.0915 | 0.0453 | BolshoyOleniOstrov |
1279 | BOO002 | 0.0445 | 0.0735 | 0.0925 | 0.0379 | BolshoyOleniOstrov |
1280 | BOO003 | 0.0466 | 0.0765 | 0.0862 | 0.0415 | BolshoyOleniOstrov |
1281 | BOO004 | 0.0411 | 0.0723 | 0.0938 | 0.0419 | BolshoyOleniOstrov |
1282 | BOO005 | 0.0461 | 0.0731 | 0.0909 | 0.0401 | BolshoyOleniOstrov |
1283 | BOO006 | 0.0394 | 0.0917 | 0.1002 | 0.0438 | BolshoyOleniOstrov |
1284 | CHV001 | 0.0441 | 0.0331 | 0.0587 | 0.0325 | ChalmnyVarre |
1285 | CHV002 | 0.0442 | 0.0351 | 0.0610 | 0.0373 | ChalmnyVarre |
1286 | JK1968 | 0.0398 | 0.0385 | 0.0661 | 0.0299 | Levanluhta |
1287 | JK1970 | 0.0408 | 0.0466 | 0.0600 | 0.0363 | Levanluhta |
1288 | JK2065 | 0.0392 | -0.0065 | 0.0195 | 0.0043 | JK2065 |
1289 rows × 6 columns
We can quickly plot the first two PCs for all individuals:
plt.figure(figsize=(10, 10))
plt.scatter(x=pcaDat["PC1"], y=pcaDat["PC2"])
plt.xlabel("PC1");
plt.ylabel("PC2");
which is not very helpful, because we can't see where each population falls. We can highlight a few populations to get a bit more of a feeling:
plt.figure(figsize=(10, 10))
plt.scatter(x=pcaDat["PC1"], y=pcaDat["PC2"], label="")
for pop in ["Finnish", "Sardinian", "Armenian", "BedouinB"]:
d = pcaDat[pcaDat["Population"] == pop]
plt.scatter(x=d["PC1"], y=d["PC2"], label=pop)
plt.legend()
plt.xlabel("PC1");
plt.ylabel("PC2");
We can improve the plot further by plotting only the population that we had in the population list. So we need to load the population list and while we're at it, also add color- and symbol numbers for plotting.
popListDat = pd.read_csv("/home/training/share/WestEurasia.poplist.txt",
names=["Population"]).sort_values(by="Population")
nPops = len(popListDat)
nCols = 8
nSymbols = int(nPops / nCols)
colorIndices = [int(i / nSymbols) for i in range(nPops)]
symbolIndices = [i % nSymbols for i in range(nPops)]
popListDat = popListDat.assign(colorIndex=colorIndices, symbolIndex=symbolIndices)
popListDat
Population | colorIndex | symbolIndex | |
---|---|---|---|
1 | Abkhasian | 0 | 0 |
2 | Adygei | 0 | 1 |
3 | Albanian | 0 | 2 |
4 | Armenian | 0 | 3 |
5 | Assyrian | 0 | 4 |
6 | Balkar | 0 | 5 |
7 | Basque | 0 | 6 |
8 | BedouinA | 0 | 7 |
9 | BedouinB | 1 | 0 |
10 | Belarusian | 1 | 1 |
11 | Bulgarian | 1 | 2 |
12 | Canary_Islander | 1 | 3 |
13 | Chechen | 1 | 4 |
0 | Chuvash | 1 | 5 |
14 | Croatian | 1 | 6 |
15 | Cypriot | 1 | 7 |
16 | Czech | 2 | 0 |
17 | Druze | 2 | 1 |
18 | English | 2 | 2 |
19 | Estonian | 2 | 3 |
20 | Finnish | 2 | 4 |
21 | French | 2 | 5 |
22 | Georgian | 2 | 6 |
23 | German | 2 | 7 |
24 | Greek | 3 | 0 |
25 | Hungarian | 3 | 1 |
26 | Icelandic | 3 | 2 |
27 | Iranian | 3 | 3 |
28 | Irish | 3 | 4 |
29 | Irish_Ulster | 3 | 5 |
... | ... | ... | ... |
38 | Jew_Tunisian | 4 | 6 |
39 | Jew_Turkish | 4 | 7 |
40 | Jew_Yemenite | 5 | 0 |
41 | Jordanian | 5 | 1 |
42 | Kumyk | 5 | 2 |
44 | Lebanese | 5 | 3 |
43 | Lebanese_Christian | 5 | 4 |
45 | Lebanese_Muslim | 5 | 5 |
46 | Lezgin | 5 | 6 |
47 | Lithuanian | 5 | 7 |
48 | Maltese | 6 | 0 |
49 | Mordovian | 6 | 1 |
50 | North_Ossetian | 6 | 2 |
51 | Norwegian | 6 | 3 |
52 | Orcadian | 6 | 4 |
53 | Palestinian | 6 | 5 |
54 | Polish | 6 | 6 |
55 | Romanian | 6 | 7 |
56 | Russian | 7 | 0 |
57 | Sardinian | 7 | 1 |
58 | Saudi | 7 | 2 |
59 | Scottish | 7 | 3 |
60 | Shetlandic | 7 | 4 |
61 | Sicilian | 7 | 5 |
62 | Sorb | 7 | 6 |
64 | Spanish | 7 | 7 |
63 | Spanish_North | 8 | 0 |
65 | Syrian | 8 | 1 |
66 | Turkish | 8 | 2 |
67 | Ukrainian | 8 | 3 |
68 rows × 3 columns
and we end up with only 720 points. Note that I'm here flipping the x axis to make the correlation to Geography more obvious. We can now plot all points with colors and symbols:
plt.figure(figsize=(10,10))
symbolVec = ["8", "s", "p", "P", "*", "h", "H", "+", "x", "X", "D", "d"]
colorVec = [u'#1f77b4', u'#ff7f0e', u'#2ca02c', u'#d62728', u'#9467bd',
u'#8c564b', u'#e377c2', u'#7f7f7f', u'#bcbd22', u'#17becf']
for i, row in popListDat.iterrows():
d = pcaDat[pcaDat.Population == row["Population"]]
plt.scatter(x=-d["PC1"], y=d["PC2"], c=colorVec[row["colorIndex"]],
marker=symbolVec[row["symbolIndex"]], label=row["Population"])
plt.xlabel("PC1");
plt.ylabel("PC2");
plt.legend(loc=(1.1, 0), ncol=3)
<matplotlib.legend.Legend at 0x7f70862abba8>
Adding ancient populations:
plt.figure(figsize=(10,10))
symbolVec = ["8", "s", "p", "P", "*", "h", "H", "+", "x", "X", "D", "d", "v", "<", ">", "^"]
colorVec = [u'#1f77b4', u'#ff7f0e', u'#2ca02c', u'#d62728', u'#9467bd',
u'#8c564b', u'#e377c2', u'#7f7f7f', u'#bcbd22', u'#17becf']
for i, row in popListDat.iterrows():
d = pcaDat[pcaDat.Population == row["Population"]]
plt.scatter(x=-d["PC1"], y=d["PC2"], c=colorVec[row["colorIndex"]],
marker=symbolVec[row["symbolIndex"]], label=row["Population"])
for i, pop in enumerate(["Levanluhta", "Saami.DG", "BolshoyOleniOstrov", "Yamnaya_Samara", "LBK_EN", "WHG"]):
d = pcaDat[pcaDat.Population == pop]
plt.scatter(x=-d["PC1"], y=d["PC2"], c="black", marker=symbolVec[i], label=pop)
plt.xlabel("PC1");
plt.ylabel("PC2");
plt.legend(loc=(1.1, 0), ncol=3)
<matplotlib.legend.Legend at 0x7f704c7136a0>
popListDat = pd.read_csv("/home/training/share/AllEurasia.poplist.txt",
names=["Population"]).sort_values(by="Population")
nPops = len(popListDat)
nCols = 9
nSymbols = int(nPops / nCols)
colorIndices = [int(i / nSymbols) for i in range(nPops)]
symbolIndices = [i % nSymbols for i in range(nPops)]
popListDat = popListDat.assign(colorIndex=colorIndices, symbolIndex=symbolIndices)
popListDat
Population | colorIndex | symbolIndex | |
---|---|---|---|
0 | Abkhasian | 0 | 0 |
1 | Adygei | 0 | 1 |
2 | Albanian | 0 | 2 |
3 | Aleut | 0 | 3 |
4 | Aleut_Tlingit | 0 | 4 |
5 | Altaian | 0 | 5 |
6 | Ami | 0 | 6 |
7 | Armenian | 0 | 7 |
8 | Assyrian | 0 | 8 |
9 | Atayal | 0 | 9 |
10 | Avar | 0 | 10 |
11 | Azeri | 0 | 11 |
12 | Balkar | 0 | 12 |
13 | Basque | 1 | 0 |
14 | BedouinA | 1 | 1 |
15 | BedouinB | 1 | 2 |
16 | Belarusian | 1 | 3 |
17 | Borneo | 1 | 4 |
18 | Bulgarian | 1 | 5 |
19 | Buryat | 1 | 6 |
20 | Cambodian | 1 | 7 |
21 | Chechen | 1 | 8 |
22 | Chukchi | 1 | 9 |
23 | Chukchi1 | 1 | 10 |
24 | Chuvash | 1 | 11 |
25 | Croatian | 1 | 12 |
26 | Cypriot | 2 | 0 |
27 | Czech | 2 | 1 |
28 | Dai | 2 | 2 |
29 | Daur | 2 | 3 |
... | ... | ... | ... |
89 | Saami.DG | 6 | 11 |
90 | Saami_WGA | 6 | 12 |
91 | Sardinian | 7 | 0 |
92 | Saudi | 7 | 1 |
93 | Scottish | 7 | 2 |
94 | Selkup | 7 | 3 |
95 | Semende | 7 | 4 |
96 | She | 7 | 5 |
97 | Sherpa.DG | 7 | 6 |
98 | Sicilian | 7 | 7 |
99 | Spanish | 7 | 8 |
100 | Spanish_North | 7 | 9 |
101 | Syrian | 7 | 10 |
102 | Tajik | 7 | 11 |
103 | Thai | 7 | 12 |
104 | Tibetan.DG | 8 | 0 |
105 | Tu | 8 | 1 |
106 | Tubalar | 8 | 2 |
107 | Tujia | 8 | 3 |
108 | Turkish | 8 | 4 |
109 | Turkmen | 8 | 5 |
110 | Tuvinian | 8 | 6 |
111 | Ukrainian | 8 | 7 |
112 | Ulchi | 8 | 8 |
113 | Uygur | 8 | 9 |
114 | Uzbek | 8 | 10 |
115 | Xibo | 8 | 11 |
116 | Yakut | 8 | 12 |
117 | Yi | 9 | 0 |
118 | Yukagir | 9 | 1 |
119 rows × 3 columns
pcaDat = pd.read_csv("/home/training/share/pca_results/pca.AllEurasia.evec",
delim_whitespace=True, skiprows=1, names=names)
plt.figure(figsize=(10,10))
symbolVec = ["8", "s", "p", "P", "*", "h", "H", "+", "x", "X", "D", "d", "v", "<", ">", "^"]
colorVec = [u'#1f77b4', u'#ff7f0e', u'#2ca02c', u'#d62728', u'#9467bd',
u'#8c564b', u'#e377c2', u'#7f7f7f', u'#bcbd22', u'#17becf']
for i, row in popListDat.iterrows():
d = pcaDat[pcaDat.Population == row["Population"]]
plt.scatter(x=-d["PC1"], y=d["PC2"], c=colorVec[row["colorIndex"]],
marker=symbolVec[row["symbolIndex"]], label=row["Population"])
for i, pop in enumerate(["Levanluhta", "Saami.DG", "BolshoyOleniOstrov", "Yamnaya_Samara", "LBK_EN", "WHG"]):
d = pcaDat[pcaDat.Population == pop]
plt.scatter(x=-d["PC1"], y=d["PC2"], c="black", marker=symbolVec[i], label=pop)
plt.xlabel("PC1");
plt.ylabel("PC2");
plt.legend(loc=(1.1, 0), ncol=3)
<matplotlib.legend.Legend at 0x7f7086207a58>
f3dat_han = pd.read_csv("/home/training/work/share/solutions/outgroupF3_results_Han.txt",
delim_whitespace=True,
names=["dummy", "A", "B", "C", "F3", "stderr", "Z", "nSNPs"])
f3dat_han
dummy | A | B | C | F3 | stderr | Z | nSNPs | |
---|---|---|---|---|---|---|---|---|
0 | result: | Han | Chuvash | Mbuti | 0.233652 | 0.002072 | 112.782 | 502678 |
1 | result: | Han | Albanian | Mbuti | 0.215629 | 0.002029 | 106.291 | 501734 |
2 | result: | Han | Armenian | Mbuti | 0.213724 | 0.001963 | 108.882 | 504370 |
3 | result: | Han | Bulgarian | Mbuti | 0.216193 | 0.001979 | 109.266 | 504310 |
4 | result: | Han | Czech | Mbuti | 0.218060 | 0.002002 | 108.939 | 504089 |
5 | result: | Han | Druze | Mbuti | 0.209551 | 0.001919 | 109.205 | 510853 |
6 | result: | Han | English | Mbuti | 0.216959 | 0.001973 | 109.954 | 504161 |
7 | result: | Han | Estonian | Mbuti | 0.220730 | 0.002019 | 109.332 | 503503 |
8 | result: | Han | Finnish | Mbuti | 0.223447 | 0.002044 | 109.345 | 502217 |
9 | result: | Han | French | Mbuti | 0.216623 | 0.001969 | 110.012 | 509613 |
10 | result: | Han | Georgian | Mbuti | 0.214295 | 0.001935 | 110.721 | 503598 |
11 | result: | Han | Greek | Mbuti | 0.215203 | 0.001984 | 108.465 | 507475 |
12 | result: | Han | Hungarian | Mbuti | 0.217894 | 0.001999 | 109.004 | 507409 |
13 | result: | Han | Icelandic | Mbuti | 0.218683 | 0.002015 | 108.553 | 504655 |
14 | result: | Han | Italian_North | Mbuti | 0.215332 | 0.001978 | 108.854 | 507589 |
15 | result: | Han | Italian_South | Mbuti | 0.211787 | 0.002271 | 93.265 | 492400 |
16 | result: | Han | Lithuanian | Mbuti | 0.219615 | 0.002032 | 108.098 | 503681 |
17 | result: | Han | Maltese | Mbuti | 0.210359 | 0.001956 | 107.542 | 503985 |
18 | result: | Han | Mordovian | Mbuti | 0.223469 | 0.002008 | 111.296 | 503441 |
19 | result: | Han | Norwegian | Mbuti | 0.218873 | 0.002023 | 108.197 | 504621 |
20 | result: | Han | Orcadian | Mbuti | 0.217773 | 0.002014 | 108.115 | 504993 |
21 | result: | Han | Russian | Mbuti | 0.223993 | 0.001995 | 112.274 | 506525 |
22 | result: | Han | Sardinian | Mbuti | 0.213230 | 0.001980 | 107.711 | 508413 |
23 | result: | Han | Scottish | Mbuti | 0.218489 | 0.002039 | 107.145 | 499784 |
24 | result: | Han | Sicilian | Mbuti | 0.212272 | 0.001975 | 107.486 | 505477 |
25 | result: | Han | Spanish_North | Mbuti | 0.215885 | 0.002029 | 106.383 | 500853 |
26 | result: | Han | Spanish | Mbuti | 0.213869 | 0.001975 | 108.297 | 513648 |
27 | result: | Han | Ukrainian | Mbuti | 0.218716 | 0.002007 | 108.950 | 503981 |
28 | result: | Han | Levanluhta | Mbuti | 0.236252 | 0.002383 | 99.123 | 263049 |
29 | result: | Han | BolshoyOleniOstrov | Mbuti | 0.247814 | 0.002177 | 113.849 | 457102 |
30 | result: | Han | ChalmnyVarre | Mbuti | 0.233499 | 0.002304 | 101.345 | 366220 |
31 | result: | Han | Saami.DG | Mbuti | 0.236198 | 0.002274 | 103.852 | 489038 |
d=f3dat_han.sort_values(by="F3")
y = range(len(d))
plt.figure(figsize=(6, 8))
plt.errorbar(d["F3"], y, xerr=d["stderr"], fmt='o')
plt.yticks(y, d["B"]);
plt.xlabel("F3(Han, Test; Mbuti)");
We're loading the two outgroupF3 datasets:
outgroupf3dat_Han = pd.read_csv("/home/training/work/share/solutions/outgroupF3_results_Han.txt",
delim_whitespace=True,
names=["dummy", "A", "B", "C", "F3", "stderr", "Z", "nSNPs"])
outgroupf3dat_MA1 = pd.read_csv("/home/training/work/share/solutions/outgroupF3_results_MA1.txt",
delim_whitespace=True,
names=["dummy", "A", "B", "C", "F3", "stderr", "Z", "nSNPs"])
outgroupf3dat_merged = outgroupf3dat_Han.merge(outgroupf3dat_MA1, on="B", suffixes=("_Han", "_MA1"))
outgroupf3dat_merged
dummy_Han | A_Han | B | C_Han | F3_Han | stderr_Han | Z_Han | nSNPs_Han | dummy_MA1 | A_MA1 | C_MA1 | F3_MA1 | stderr_MA1 | Z_MA1 | nSNPs_MA1 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | result: | Han | Chuvash | Mbuti | 0.233652 | 0.002072 | 112.782 | 502678 | result: | MA1_HG.SG | Mbuti | 0.243818 | 0.002349 | 103.781 | 350484 |
1 | result: | Han | Albanian | Mbuti | 0.215629 | 0.002029 | 106.291 | 501734 | result: | MA1_HG.SG | Mbuti | 0.236494 | 0.002296 | 103.008 | 344332 |
2 | result: | Han | Armenian | Mbuti | 0.213724 | 0.001963 | 108.882 | 504370 | result: | MA1_HG.SG | Mbuti | 0.231399 | 0.002264 | 102.229 | 349612 |
3 | result: | Han | Bulgarian | Mbuti | 0.216193 | 0.001979 | 109.266 | 504310 | result: | MA1_HG.SG | Mbuti | 0.237498 | 0.002281 | 104.103 | 349800 |
4 | result: | Han | Czech | Mbuti | 0.218060 | 0.002002 | 108.939 | 504089 | result: | MA1_HG.SG | Mbuti | 0.243224 | 0.002328 | 104.457 | 349553 |
5 | result: | Han | Druze | Mbuti | 0.209551 | 0.001919 | 109.205 | 510853 | result: | MA1_HG.SG | Mbuti | 0.226740 | 0.002197 | 103.193 | 359004 |
6 | result: | Han | English | Mbuti | 0.216959 | 0.001973 | 109.954 | 504161 | result: | MA1_HG.SG | Mbuti | 0.243135 | 0.002317 | 104.941 | 349321 |
7 | result: | Han | Estonian | Mbuti | 0.220730 | 0.002019 | 109.332 | 503503 | result: | MA1_HG.SG | Mbuti | 0.247065 | 0.002362 | 104.619 | 348861 |
8 | result: | Han | Finnish | Mbuti | 0.223447 | 0.002044 | 109.345 | 502217 | result: | MA1_HG.SG | Mbuti | 0.245684 | 0.002379 | 103.266 | 347208 |
9 | result: | Han | French | Mbuti | 0.216623 | 0.001969 | 110.012 | 509613 | result: | MA1_HG.SG | Mbuti | 0.240235 | 0.002269 | 105.886 | 357842 |
10 | result: | Han | Georgian | Mbuti | 0.214295 | 0.001935 | 110.721 | 503598 | result: | MA1_HG.SG | Mbuti | 0.232645 | 0.002253 | 103.243 | 349082 |
11 | result: | Han | Greek | Mbuti | 0.215203 | 0.001984 | 108.465 | 507475 | result: | MA1_HG.SG | Mbuti | 0.236566 | 0.002280 | 103.757 | 355261 |
12 | result: | Han | Hungarian | Mbuti | 0.217894 | 0.001999 | 109.004 | 507409 | result: | MA1_HG.SG | Mbuti | 0.241720 | 0.002313 | 104.483 | 355340 |
13 | result: | Han | Icelandic | Mbuti | 0.218683 | 0.002015 | 108.553 | 504655 | result: | MA1_HG.SG | Mbuti | 0.244488 | 0.002386 | 102.481 | 350287 |
14 | result: | Han | Italian_North | Mbuti | 0.215332 | 0.001978 | 108.854 | 507589 | result: | MA1_HG.SG | Mbuti | 0.236407 | 0.002273 | 104.002 | 354999 |
15 | result: | Han | Italian_South | Mbuti | 0.211787 | 0.002271 | 93.265 | 492400 | result: | MA1_HG.SG | Mbuti | 0.230839 | 0.002767 | 83.427 | 321217 |
16 | result: | Han | Lithuanian | Mbuti | 0.219615 | 0.002032 | 108.098 | 503681 | result: | MA1_HG.SG | Mbuti | 0.246864 | 0.002403 | 102.718 | 348656 |
17 | result: | Han | Maltese | Mbuti | 0.210359 | 0.001956 | 107.542 | 503985 | result: | MA1_HG.SG | Mbuti | 0.230200 | 0.002259 | 101.903 | 347725 |
18 | result: | Han | Mordovian | Mbuti | 0.223469 | 0.002008 | 111.296 | 503441 | result: | MA1_HG.SG | Mbuti | 0.245284 | 0.002346 | 104.571 | 350058 |
19 | result: | Han | Norwegian | Mbuti | 0.218873 | 0.002023 | 108.197 | 504621 | result: | MA1_HG.SG | Mbuti | 0.243930 | 0.002301 | 106.031 | 350182 |
20 | result: | Han | Orcadian | Mbuti | 0.217773 | 0.002014 | 108.115 | 504993 | result: | MA1_HG.SG | Mbuti | 0.243614 | 0.002320 | 105.008 | 351053 |
21 | result: | Han | Russian | Mbuti | 0.223993 | 0.001995 | 112.274 | 506525 | result: | MA1_HG.SG | Mbuti | 0.245212 | 0.002298 | 106.698 | 355953 |
22 | result: | Han | Sardinian | Mbuti | 0.213230 | 0.001980 | 107.711 | 508413 | result: | MA1_HG.SG | Mbuti | 0.231967 | 0.002264 | 102.449 | 355548 |
23 | result: | Han | Scottish | Mbuti | 0.218489 | 0.002039 | 107.145 | 499784 | result: | MA1_HG.SG | Mbuti | 0.244598 | 0.002434 | 100.512 | 339441 |
24 | result: | Han | Sicilian | Mbuti | 0.212272 | 0.001975 | 107.486 | 505477 | result: | MA1_HG.SG | Mbuti | 0.231141 | 0.002260 | 102.297 | 351028 |
25 | result: | Han | Spanish_North | Mbuti | 0.215885 | 0.002029 | 106.383 | 500853 | result: | MA1_HG.SG | Mbuti | 0.238479 | 0.002426 | 98.319 | 341661 |
26 | result: | Han | Spanish | Mbuti | 0.213869 | 0.001975 | 108.297 | 513648 | result: | MA1_HG.SG | Mbuti | 0.235386 | 0.002257 | 104.293 | 361951 |
27 | result: | Han | Ukrainian | Mbuti | 0.218716 | 0.002007 | 108.950 | 503981 | result: | MA1_HG.SG | Mbuti | 0.243551 | 0.002345 | 103.881 | 348948 |
28 | result: | Han | Levanluhta | Mbuti | 0.236252 | 0.002383 | 99.123 | 263049 | result: | MA1_HG.SG | Mbuti | 0.247704 | 0.003055 | 81.090 | 172055 |
29 | result: | Han | BolshoyOleniOstrov | Mbuti | 0.247814 | 0.002177 | 113.849 | 457102 | result: | MA1_HG.SG | Mbuti | 0.256041 | 0.002624 | 97.561 | 305851 |
30 | result: | Han | ChalmnyVarre | Mbuti | 0.233499 | 0.002304 | 101.345 | 366220 | result: | MA1_HG.SG | Mbuti | 0.249619 | 0.002862 | 87.212 | 239594 |
31 | result: | Han | Saami.DG | Mbuti | 0.236198 | 0.002274 | 103.852 | 489038 | result: | MA1_HG.SG | Mbuti | 0.251530 | 0.002622 | 95.922 | 326072 |
And we can now plot the two F3 statistics against each other:
plt.figure(figsize=(10, 10))
plt.scatter(x=outgroupf3dat_merged["F3_Han"], y=outgroupf3dat_merged["F3_MA1"])
plt.xlabel("F3(Test, Han; Mbuti)");
plt.ylabel("F3(Test, MA1; Mbuti)");
This isn't very useful yet, as we need to again label the points
plt.figure(figsize=(10, 10))
plt.scatter(x=outgroupf3dat_merged["F3_Han"], y=outgroupf3dat_merged["F3_MA1"])
for i, row in outgroupf3dat_merged.iterrows():
plt.annotate(row["B"], (row["F3_Han"], row["F3_MA1"]))
plt.xlabel("F3(Test, Han; Mbuti)");
plt.ylabel("F3(Test, MA1; Mbuti)");