import numpy as np
import pylab as pb
import GPy
import Bio.Affy.CelFile as ss
from xlrd import open_workbook
import pickle
%pylab inline
Populating the interactive namespace from numpy and matplotlib
num_genes=10000
from xlrd import open_workbook
wb = open_workbook('ann1.xlsx')
ann=np.repeat("ccccccccccccc",num_genes*2).flatten()
ann=ann.reshape(num_genes,2)
for ss in wb.sheets():
for row in range(1,num_genes+1):
ann[row-1,0] = ss.cell(row,0).value
ann[row-1,1] = ss.cell(row,1).value
#print ann[row,1]
#print ann[36,0], ann[36,1]
def maxx(odds):
if odds[0]<0.5 :
if odds[3]<0.5:
if odds[5]<0.5:
return 4
else :
return 3
elif odds[4]<0.5:
return 4
else : return 2
elif odds[1]<0.5:
if odds[5]<0.5:
return 4
else :
return 3
elif odds[2]<0.5:
return 4
else : return 1
max1=np.zeros(num_genes).flatten()
num_genes=1000
import pickle
kk=np.repeat('cccccccccccccc',(num_genes+1)*25).reshape(num_genes+1,25)
odds=np.zeros(6)
kk[0,:]=['Prob_ID','Gene symbol', 'likelihood value C1','likelihood value C2','likelihood value C3','likelihood value C4','propab. C1','propab.C2','propab. C3'
,'propab. C4','ODDS C1/c2','ODDS C1/c3','ODDS C1/c4' ,'ODDS . C2/c3',' ODDS . C2/c4','ODDS . C3/c4', ' logit C1/c2',' logit C1/c3',' logit C1/c4'
,' logit C2/c3',' logit C2/c4',' logit C3/c4','best model','Gene No.','model 1 or 2' ]
new=[]
m=[0,1,2,3]
for j in range(0,4):
new.append(numpy.load("Case{}_models.npy".format(j+1)))
norm_case=np.zeros(4*num_genes).reshape(4,num_genes)
for i in range(0,num_genes):
for j in range(0,4):
f= open('Case{}_models{}/model_{}.pickle'.format(j+1,int (new[j][i,0]),int (new[j][i,1])),'r' )
m[j]=pickle.load(f).log_likelihood()
m1=np.exp(m)
prop=m1/np.sum(m1)
l=0
for k in range(0,3):
for j in range(k+1,4):
odds[l]=(prop[k]/prop[j])/(1+(prop[k]/prop[j]))
l=l+1
logit=np.log(odds)
max1[i]=int(maxx(odds)) # find the best model by it's Odds value
kk[i+1,:]=[ann[i,0],ann[i,1],m1[0],m1[1],m1[2],m1[3],prop[0],prop[1],prop[2],prop[3],
odds[0],odds[1],odds[2],odds[3],odds[4],odds[5],logit[0],logit[1],logit[2],logit[3]
,logit[4],logit[5] ,int(max1[i]),i ,int(new[int(max1[i])-1][i,0])]
/Users/suraAlrashid/anaconda/lib/python2.7/site-packages/GPy/kern/parts/prod.py:122: RuntimeWarning: invalid value encountered in equal self._params == self._get_params().copy()
for i in range(0,25):
print kk[0,i], '\t:\t', kk[1,i]
Prob_ID : 1415670_at Gene symbol : Copg likelihood val : 1.57926678831e likelihood val : 1.5148095285e- likelihood val : 8.09685974376e likelihood val : 3.91435140224e propab. C1 : 0.154541559933 propab.C2 : 0.014823399647 propab. C3 : 0.792330557839 propab. C4 : 0.038304482580 ODDS C1/c2 : 0.912476584977 ODDS C1/c3 : 0.163212705319 ODDS C1/c4 : 0.801372731941 ODDS . C2/c3 : 0.018365021332 ODDS . C2/c4 : 0.279013561732 ODDS . C3/c4 : 0.953885303754 logit C1/c2 : -0.09159285415 logit C1/c3 : -1.81270098826 logit C1/c4 : -0.22142910688 logit C2/c3 : -3.99730743813 logit C2/c4 : -1.27649488998 logit C3/c4 : -0.04721184143 best model : 3 Gene No. : 0 model 1 or 2 : 1
numpy.save("All_Prop_models.npy", kk)
import csv
with open('All_Prop_models.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ')
for rowx in kk:
spamwriter.writerow(rowx)
kk=numpy.load("All_Prop_models.npy")
s=np.repeat('ccccccccc',25)
for i in range(1,500):
for j in range(i+1,num_genes+1):
if kk[i,int(kk[i,22])+5]<kk[j,int(kk[j,22])+5]:
#print i , j
s[:]=kk[i,:]
kk[i,:]=kk[j,:]
kk[j,:]=s[:]
#print kk[i,:],kk[j,:]
import csv
with open('After_Ranking_Prop_models.csv', 'wb') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=' ')
for rowx in kk:
spamwriter.writerow(rowx)
# another way to save file
numpy.save("After_Ranking_Prop_models.npy", kk)
kk=numpy.load("After_Ranking_Prop_models.npy")
list_Ntg_Ntg_down=[22,71,0,89,74,39,6,52,56,75,18,50,8,29,57,63,4,36,12,190,697,513,264,600,105,808,836,412,11,259,357,925,245,830,427,162,617,339,792,900,612,665,608,881,463,129,338,787,968,649]
list_Ntg_Ntg_up=[269,402,396,651,519,712,222,761,959,639,140,701,236,466]
list_Ntg_SOD1=[484,994,920,858,737,390]
list_same_behaver =[430,318,242,352,967,195]
x=0
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_Ntg_down :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_Ntg_up :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
for i in range(0,1000):
if int(kk[i+1,23]) in list_Ntg_SOD1 :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))
for i in range(0,1000):
if int(kk[i+1,23]) in list_same_behaver :
f=open('Case{}_models{}/model_{}.pickle'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])),'r')
mm= pickle.load(f)
mm.plot(fixed_inputs=[(1, 0), (2, 0)],which_data_rows=range(16),linecol='red')#,fillcol='red')
mm.plot(fixed_inputs=[(1, 0), (2, 1)],which_data_rows=range(16,32), linecol='blue',ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 0)], which_data_rows=range(32,48),linecol='green',resolution=8, ax=pb.gca())
mm.plot(fixed_inputs=[(1, 1), (2, 1)], which_data_rows=range(48,64),linecol='yellow',ax=pb.gca())
pb.text(140, -.1,'Case{}_models{}/model_{}'.format(int(kk[i+1,22]),int(kk[i+1,24]),int(kk[i+1,23])), fontsize = 12, color = 'b')
pb.xlabel('The time series ')
pb.ylabel('Gene expression {} \n for each mutation for each strain '.format(kk[i+1,23]))
pb.title( ' The gene "{}" and the probe_ID "{}"'.format( kk[i+1,1] ,kk[i+1,0]))
#savefig('only_g93A/fig{}.jpg'.format(i))