fn = 'insect_p450.txt'
sum(1 for line in open(fn))
5357
for i, line in enumerate(open(fn)):
print (i, line)
if i > 100:
break
(0, 'Blattodea\t[Blattella germanica]\tCYP15A1\t1\n') (1, 'Blattodea\t[Diploptera punctata]\tCYP15A1\t1\n') (2, 'Blattodea\t[Diploptera punctata]\tCYP9E1\t1\n') (3, 'Blattodea\t[Blaberus discoidalis]\tCYP4C1\t1\n') (4, 'Blattodea\t[Blattella germanica]\tCYP4G19\t1\n') (5, 'Blattodea\t[Blattella germanica]\tCYP4C1\t2\n') (6, 'Blattodea\t[Blattella germanica]\tCYP9E2\t1\n') (7, 'Blattodea\t[Blattella germanica]\tCYP6K1\t1\n') (8, 'Blattodea\t[Blattella germanica]\tCYP6J1\t1\n') (9, 'Blattodea\t[Blattella germanica]\tCYP6L1\t1\n') (10, 'Blattodea\t[Diploptera punctata]\tCYP4C1\t1\n') (11, 'Blattodea\t[Diploptera punctata]\tCYP4C5\t1\n') (12, 'Blattodea\t[Diploptera punctata]\tCYP4C4\t1\n') (13, 'Blattodea\t[Diploptera punctata]\tCYP4C3\t1\n') (14, 'Coleoptera\t[Anoplophora glabripennis]\tCYP12H2\t2\n') (15, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9D4\t1\n') (16, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4V18\t1\n') (17, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6M11\t6\n') (18, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9D1\t1\n') (19, 'Coleoptera\t[Anoplophora glabripennis]\tCYP18A1\t1\n') (20, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9J24\t1\n') (21, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6B26\t1\n') (22, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6N11\t3\n') (23, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6N6\t1\n') (24, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6B10\t1\n') (25, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6P5\t1\n') (26, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4Q1\t1\n') (27, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6F1\t2\n') (28, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6Y2\t2\n') (29, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6M7\t2\n') (30, 'Coleoptera\t[Anoplophora glabripennis]\tCYP12H5\t1\n') (31, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6N12\t3\n') (32, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6S2\t1\n') (33, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4Q2\t1\n') (34, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6M3\t3\n') (35, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6A26\t1\n') (36, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9E2\t1\n') (37, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9D5\t4\n') (38, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6N9\t2\n') (39, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4Q4\t3\n') (40, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4D8\t1\n') (41, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6S1\t2\n') (42, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6K1\t1\n') (43, 'Coleoptera\t[Anoplophora glabripennis]\tCYP4V4\t1\n') (44, 'Coleoptera\t[Anoplophora glabripennis]\tCYP12F2\t1\n') (45, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6L1\t1\n') (46, 'Coleoptera\t[Anoplophora glabripennis]\tCYP9J17\t1\n') (47, 'Coleoptera\t[Anoplophora glabripennis]\tCYP12H1\t1\n') (48, 'Coleoptera\t[Anoplophora glabripennis]\tCYP6P10\t1\n') (49, 'Coleoptera\t[Brassicogethes aeneus]\tCYP6L1\t2\n') (50, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4Q4\t2\n') (51, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4V20\t1\n') (52, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4Q7\t1\n') (53, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4V3\t1\n') (54, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4V22\t1\n') (55, 'Coleoptera\t[Brassicogethes aeneus]\tCYP4H10\t1\n') (56, 'Coleoptera\t[Brassicogethes aeneus]\tCYP6M11\t1\n') (57, 'Coleoptera\t[Brontispa longissima]\tCYP4U1\t3\n') (58, 'Coleoptera\t[Brontispa longissima]\tCYP4H14\t1\n') (59, 'Coleoptera\t[Brontispa longissima]\tCYP4V20\t1\n') (60, 'Coleoptera\t[Hypothenemus hampei]\tCYP6Y3\t1\n') (61, 'Coleoptera\t[Hypothenemus hampei]\tCYP6M7\t1\n') (62, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9D5\t2\n') (63, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N9\t2\n') (64, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6M4\t2\n') (65, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4G29\t1\n') (66, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4H10\t3\n') (67, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4D2\t5\n') (68, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4G34\t1\n') (69, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4Q10\t1\n') (70, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4Q11\t1\n') (71, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N29\t2\n') (72, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N12\t5\n') (73, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N1\t1\n') (74, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9A20\t5\n') (75, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9A21\t1\n') (76, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9D4\t2\n') (77, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9J2\t3\n') (78, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9J15\t1\n') (79, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP9E2\t3\n') (80, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP12J1\t1\n') (81, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP12J2\t1\n') (82, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4G16\t1\n') (83, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP18A1\t2\n') (84, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP15A1\t1\n') (85, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP1B1\t1\n') (86, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6G4\t1\n') (87, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP4C1\t2\n') (88, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6P5\t1\n') (89, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP12H2\t1\n') (90, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP49A1\t4\n') (91, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP12H6\t1\n') (92, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP12F2\t1\n') (93, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP24A1\t1\n') (94, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N28\t1\n') (95, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N11\t1\n') (96, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6M11\t3\n') (97, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6M5\t1\n') (98, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6P15\t1\n') (99, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6F1\t2\n') (100, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6N17\t1\n') (101, 'Coleoptera\t[Leptinotarsa decemlineata]\tCYP6M9\t1\n')
cnt_insect_prot = {}
for i, line in enumerate(open(fn)):
insect_ord, insect_spe, cyp, cnt = line.strip().split('\t')
if not cnt_insect_prot.has_key(insect_ord):
cnt_insect_prot.update({insect_ord:0})
cnt_insect_prot[insect_ord] += int(cnt)
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import locale
def draw_pie_chart(title, dictionary):
locale.setlocale(locale.LC_NUMERIC,'ja_JP')
total = np.sum(dictionary.values())
labels = []
sizes = []
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral', 'orange'] * (len(dictionary) / 5)
for k, v in sorted(dictionary.items(), key=lambda x:x[1]):
sizes.append(v)
if v < (total * 0.02):
labels.append('')
else:
label = "{0}, {1}".format(k, locale.format('%d', v, True))
labels.append(label)
plt.pie(sizes, labels=labels, colors=colors,
#autopct='%1.1f%%',
autopct=lambda p: '{:.1f}%'.format(p) if p >= 2 else '',
#counterclock=False,
shadow=False, startangle=90)
plt.axis('equal')
plt.title(title)
plt.show()
draw_pie_chart("CYP", cnt_insect_prot)
lst_insect_spe = {}
for i, line in enumerate(open(fn)):
insect_ord, insect_spe, cyp, cnt = line.strip().split('\t')
if not lst_insect_spe.has_key(insect_ord):
lst_insect_spe.update({insect_ord:[]})
if insect_spe not in lst_insect_spe[insect_ord]:
lst_insect_spe[insect_ord].append(insect_spe)
cnt_insect_spe = {}
for k in lst_insect_spe:
print (k, len(lst_insect_spe[k]))
cnt_insect_spe.update({k: len(lst_insect_spe[k])})
('Blattodea', 3) ('Hemiptera', 25) ('Lepidoptera', 42) ('Coleoptera', 23) ('Psocodea', 2) ('Orthoptera', 3) ('Hymenoptera', 50) ('Thysanoptera', 2) ('Diptera', 72)
draw_pie_chart("Species with CYP", cnt_insect_spe)
lst_insect_genus = {}
for i, line in enumerate(open(fn)):
insect_ord, insect_spe, cyp, cnt = line.strip().split('\t')
insect_genus = insect_spe.split()[0]
if not lst_insect_genus.has_key(insect_ord):
lst_insect_genus.update({insect_ord:[]})
if insect_genus not in lst_insect_genus[insect_ord]:
lst_insect_genus[insect_ord].append(insect_genus)
cnt_insect_genus = {}
for k in lst_insect_genus:
print (k, len(lst_insect_genus[k]))
cnt_insect_genus.update({k: len(lst_insect_genus[k])})
('Blattodea', 3) ('Hemiptera', 20) ('Lepidoptera', 29) ('Coleoptera', 17) ('Psocodea', 1) ('Orthoptera', 2) ('Hymenoptera', 33) ('Thysanoptera', 2) ('Diptera', 20)
draw_pie_chart("Genus CYP", cnt_insect_genus)
cypd = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
if not cypd.has_key(cyp):
cypd.update({cyp:0})
cypd[cyp] += int(cnt)
draw_pie_chart("CYP", cypd)
import re
cypd = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
cyp = re.sub(r'\d+$', '', cyp)
if not cypd.has_key(cyp):
cypd.update({cyp:0})
cypd[cyp] += int(cnt)
draw_pie_chart("CYPs", cypd)
import re
cypd = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
cyp = re.sub(r'[A-Z]\d+$', '', cyp)
if not cypd.has_key(cyp):
cypd.update({cyp:0})
cypd[cyp] += int(cnt)
draw_pie_chart("CYPs", cypd)
import re
cypd1 = {}
cypd2 = {}
cypd3 = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
cyp1 = re.sub(r'[A-Z]\d+$', '', cyp)
cyp2 = re.sub(r'\d+$', '', cyp)
if not cypd1.has_key(cyp1):
cypd1.update({cyp1:0})
cypd1[cyp1] += int(cnt)
if not cypd2.has_key(cyp2):
cypd2.update({cyp2:0})
cypd2[cyp2] += int(cnt)
if not cypd3.has_key(cyp):
cypd3.update({cyp:0})
cypd3[cyp] += int(cnt)
def draw_pie_chart2(title, dictionary):
locale.setlocale(locale.LC_NUMERIC,'ja_JP')
total = np.sum(dictionary.values())
labels = []
sizes = []
colors = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral', 'orange'] * (len(dictionary) / 5)
for k, v in sorted(dictionary.items(), key=lambda x:x[1], reverse=True):
sizes.append(v)
if v < (total * 0.02):
labels.append('')
else:
label = "{0}, {1}".format(k, locale.format('%d', v, True))
labels.append(label)
plt.pie(sizes, labels=labels, colors=colors,
labeldistance=0.5,
#autopct='%1.1f%%',
#autopct=lambda p: '{:.1f}%'.format(p) if p >= 2 else '',
counterclock=False,
shadow=False, startangle=90)
plt.axis('equal')
plt.title(title)
#plt.show()
plt.figure(figsize=(20,15))
plt.subplot(2, 3, 1)
draw_pie_chart2("All", cypd1)
plt.subplot(2, 3, 2)
draw_pie_chart2("All", cypd2)
plt.subplot(2, 3, 3)
draw_pie_chart2("All", cypd3)
import re
cypd1 = {}
cypd2 = {}
cypd3 = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
if insect_spe != "[Bombyx mori]":
continue
cyp1 = re.sub(r'[A-Z]\d+$', '', cyp)
cyp2 = re.sub(r'\d+$', '', cyp)
if not cypd1.has_key(cyp1):
cypd1.update({cyp1:0})
cypd1[cyp1] += int(cnt)
if not cypd2.has_key(cyp2):
cypd2.update({cyp2:0})
cypd2[cyp2] += int(cnt)
if not cypd3.has_key(cyp):
cypd3.update({cyp:0})
cypd3[cyp] += int(cnt)
plt.figure(figsize=(20,15))
plt.subplot(2, 3, 1)
draw_pie_chart2("B.mori", cypd1)
plt.subplot(2, 3, 2)
draw_pie_chart2("B.mori", cypd2)
plt.subplot(2, 3, 3)
draw_pie_chart2("B.mori", cypd3)
import re
cypd1 = {}
cypd2 = {}
cypd3 = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
if insect_spe != "[Papilio xuthus]":
continue
cyp1 = re.sub(r'[A-Z]\d+$', '', cyp)
cyp2 = re.sub(r'\d+$', '', cyp)
if not cypd1.has_key(cyp1):
cypd1.update({cyp1:0})
cypd1[cyp1] += int(cnt)
if not cypd2.has_key(cyp2):
cypd2.update({cyp2:0})
cypd2[cyp2] += int(cnt)
if not cypd3.has_key(cyp):
cypd3.update({cyp:0})
cypd3[cyp] += int(cnt)
plt.figure(figsize=(20,15))
plt.subplot(2, 3, 1)
draw_pie_chart2("P.xuthus", cypd1)
plt.subplot(2, 3, 2)
draw_pie_chart2("P.xuthus", cypd2)
plt.subplot(2, 3, 3)
draw_pie_chart2("P.xuthus", cypd3)
import re
cypd1 = {}
cypd2 = {}
cypd3 = {}
for i, line in enumerate(open(fn)):
insect_order, insect_spe, cyp, cnt = line.strip().split('\t')
cyp1 = re.sub(r'[A-Z]\d+$', '', cyp)
cyp2 = re.sub(r'\d+$', '', cyp)
if not cypd1.has_key(insect_order):
cypd1.update({insect_order:{}})
if not cypd1[insect_order].has_key(cyp1):
cypd1[insect_order].update({cyp1:0})
cypd1[insect_order][cyp1] += int(cnt)
if not cypd2.has_key(insect_order):
cypd2.update({insect_order:{}})
if not cypd2[insect_order].has_key(cyp2):
cypd2[insect_order].update({cyp2:0})
cypd2[insect_order][cyp2] += int(cnt)
if not cypd3.has_key(insect_order):
cypd3.update({insect_order:{}})
if not cypd3[insect_order].has_key(cyp):
cypd3[insect_order].update({cyp:0})
cypd3[insect_order][cyp] += int(cnt)
insect_orders = ['Diptera', 'Coleoptera', 'Hemiptera', 'Hymenoptera', 'Lepidoptera']
plt.figure(figsize=(15,10*len(insect_orders)))
index = 0
for insect_order in insect_orders:
index += 1
plt.subplot(len(cypd1), 3, index)
draw_pie_chart2(insect_order, cypd1[insect_order])
index += 1
plt.subplot(len(cypd2), 3, index)
draw_pie_chart2(insect_order, cypd2[insect_order])
index += 1
plt.subplot(len(cypd3), 3, index)
draw_pie_chart2(insect_order, cypd3[insect_order])