json_filename = 'x.json' # accidentally erased json part, re-integrate later gexf_filename = 'gh43.gexf' import os #os.chdir("C:/Users/David/Documents/Dropbox") os.chdir("C:/_Dropbox/Dropbox") import pandas as pd df = pd.read_csv("gh43treetable.txt", dtype=str) df.columns = [['ancid', 'desc1', 'desc2', 'branchlength1', 'branchlength2']] # replace spaces with underscores, then make a list of all ids with duplicates included, then remove duplicates id_list = [] for idx, row in df.iterrows(): for field in ['ancid', 'desc1', 'desc2']: df[field][idx] = (df[field][idx].strip()).replace(" ", "_") id_list.append(df[field][idx]) print len(id_list) id_list = list(set(id_list)) print len(id_list) print df.iloc[2] print id_list[:10] # make dicts of IDs and positions in id_list dict_pti = {} dict_itp = {} for pos in range(len(id_list)): dict_pti[pos] = id_list[pos] dict_itp[id_list[pos]] = pos #make link list link_list = [] for idx, row in df.iterrows(): for descnum in ['desc1', 'desc2']: templist = [] templist.append(df.ancid[idx]) templist.append(df[descnum][idx]) link_list.append(templist) print link_list[:10] # write gexf with open(gexf_filename, "w") as f: f.write('\n \n Gexf.net\n') with open(gexf_filename, "a") as f: f.write(' ') f.write(gexf_filename) f.write('\n \n \n') f.write(' \n') for pos in range(len(id_list)): f.write(' \n') f.write(' \n') f.write(' \n') for i in range(len(link_list)): f.write(' \n') f.write(' \n') f.write(' \n') f.write('\n')