import pandas as pd
import os
os.chdir("/home/zhanglab1/ndong/Lymnaea_CNS_transcriptome_files/7_Interspecies_comparison/7f_KOG_comparisons")
data = None
filelist = ["MM_KOG_summary.txt", "XT_KOG_summary.txt", "DR_KOG_summary.txt", "LS_KOG_summary.txt", "DM_KOG_summary.txt", "CE_KOG_summary.txt"]
for f in filelist:
if data is None:
data = pd.read_csv(f, sep='\t', index_col='KOG')
else:
data = data.join(pd.read_csv(f, sep='\t', index_col='KOG'))
data["Category"] = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "Y", "Z"]
print(data.columns)
Index(['MM_Percentage', 'XT_Percentage', 'DR_Percentage', 'LS_Percentage', 'DM_Percentage', 'CE_Percentage', 'Category'], dtype='object')
%get data --from Python3
data
write.table(data, file="KOG_summary.tsv", sep="\t")
MM_Percentage | XT_Percentage | DR_Percentage | LS_Percentage | DM_Percentage | CE_Percentage | Category | |
---|---|---|---|---|---|---|---|
RNA processing and modification | 3.6242961 | 3.3408356 | 3.1163875 | 3.1594416 | 3.9281110 | 3.6816505 | A |
Chromatin structure and dynamics | 1.7940467 | 1.5262344 | 1.5016607 | 1.6327863 | 1.7365587 | 1.8617913 | B |
Energy production and conversion | 2.0353982 | 1.9623013 | 2.0033920 | 2.3348845 | 3.3972852 | 3.2371687 | C |
Cell cycle control | 2.5905068 | 2.6375018 | 2.4273903 | 2.9390154 | 2.2143020 | 2.6165716 | D |
Amino acid transport and metabolism | 1.9951730 | 2.0818681 | 2.1376581 | 2.8737040 | 3.2987033 | 2.4572291 | E |
Nucleotide transport and metabolism | 1.0257442 | 1.1323674 | 1.0105293 | 1.0858029 | 1.1223174 | 0.9141228 | F |
Carbohydrate transport and metabolism | 2.1520515 | 2.5320017 | 2.3885238 | 3.1594416 | 3.1546220 | 3.1197585 | G |
Coenzyme transport and metabolism | 0.6315366 | 0.6259671 | 0.5829977 | 0.8408850 | 0.8569045 | 0.6205971 | H |
Lipid transport and metabolism | 2.8238134 | 2.9469686 | 2.5828563 | 3.0696383 | 3.5413665 | 3.6564911 | I |
Translation, ribosomal structure and biogenesis | 2.9283990 | 3.0102687 | 2.4415236 | 3.1920973 | 4.0115265 | 4.1932238 | J |
Transcription | 10.0965406 | 8.6791391 | 10.9356229 | 8.1802596 | 8.2278001 | 8.0845354 | K |
Replication, recombination and repair | 1.5607401 | 1.9552680 | 1.5475938 | 2.3348845 | 1.7517252 | 2.0966119 | L |
Cell wall/membrane/envelope biogenesis | 1.0418343 | 1.1534674 | 0.9822627 | 2.1144583 | 1.0844013 | 0.7631667 | M |
Cell motility | 0.2131939 | 0.1969335 | 0.2261324 | 0.1714426 | 0.1819974 | 0.1425696 | N |
Posttranslational modification | 7.6548673 | 8.0531720 | 7.1938379 | 8.1149482 | 7.8638053 | 8.5625629 | O |
Inorganic ion transport and metabolism | 2.5905068 | 2.9469686 | 2.8584552 | 2.5798024 | 2.9119587 | 3.5642402 | P |
Secondary metabolites | 0.7079646 | 1.2519342 | 1.0281959 | 1.7144257 | 1.4787291 | 1.1153975 | Q |
General function prediction only | 12.3209976 | 12.3224082 | 11.8896191 | 13.2582252 | 13.2099795 | 11.8584368 | R |
Function unknown | 6.8664521 | 6.5621044 | 6.8546392 | 6.7352437 | 7.3784788 | 7.0110701 | S |
Signal transduction mechanisms | 20.8165728 | 20.6780138 | 21.9807787 | 18.3606825 | 16.9788428 | 17.4941295 | T |
Intracellular trafficking | 5.9493162 | 5.7040371 | 5.6992439 | 4.9881623 | 5.3234246 | 4.9815498 | U |
Defense mechanisms | 0.8809332 | 0.8721339 | 0.8055968 | 0.8408850 | 0.6824903 | 0.7967125 | V |
Extracellular structures | 1.6934835 | 2.2366015 | 2.1482581 | 1.8450486 | 1.2360658 | 2.5327071 | W |
Nuclear structure | 0.5430410 | 0.5978337 | 0.6077309 | 0.4735080 | 0.3943277 | 0.4780275 | Y |
Cytoskeleton | 5.4625905 | 4.9936700 | 5.0491131 | 4.0003266 | 4.0342762 | 4.1596780 | Z |