In [1]:
from trnlib.omeORM import *
from sqlalchemy import func
from IPython.core import display
from IPython.core.display import HTML
from numpy import random, average
from pandas import *

import pymongo
import simplejson as json
import math
import os


ome = Session()
connection = pymongo.Connection()

db_gff = connection.gff_database
gff_data = db_gff.gff_data

anaerobic = ome.query(Condition).filter_by(carbon_source='glucose',eacceptor='anaerobic').first()
nitrate = ome.query(Condition).filter_by(carbon_source='glucose',eacceptor='NO3').first()

def html_table(matrix):
    table = '<table border="1">\n'
    for row in matrix:
        table += '<tr>'
        for entry in row: table += '<td>'+str(entry)+'</td>'
        table += '</tr>'
    table += '</table>'
    return HTML(table)
In [2]:
def get_chip_data(target = 'ArcA', carbon_source = 'glucose', nitrogen_source = 'NH4Cl', eacceptor = 'anaerobic', replicates = [1,2,3],\
                                                             leftpos = 0, rightpos = 200, strand = ['+','-'], value = 0.0):
    condition = ome.query(Condition).filter(and_(Condition.carbon_source == carbon_source, Condition.nitrogen_source == nitrogen_source,\
                                                 Condition.eacceptor == eacceptor)).first()
    gff_exp_wids = [x[0] for x in ome.query(GFFExperiment.wid).filter(and_(GFFExperiment.target == target, GFFExperiment.condition == condition,\
                                                   GFFExperiment.replicate.in_(replicates))).all()]
    probes = []
    for probe in gff_data.find({"$and": 
                           [{"leftpos" : {"$gte": leftpos}}, 
                            {"rightpos": {"$lte": rightpos}}, 
                            {"strand": {"$in" : strand}},
                            {"value": {"$gte": value}},
                            {"experiment_wid": {"$in" : gff_exp_wids}}
                         ]}): 
        probes.append(probe)
    return probes



def nimblegen_AUC(signals):
    AUC = 0
    a = signals.pop(0)
    b = signals.pop(0)
    if a < b: AUC = (a+b)-a/2
    elif a > b: AUC = (a+b)-b/2
    elif a == b: AUC = (a+b)-a/2
    a = b
    sumval = 0
    for b in signals:
        sumval+=b	
        if a < b:
            AUC += b-a/2
        elif a > b:
            AUC += b/2
        else:
            AUC += b-a/2
        a = b
    return AUC
In [3]:
cpge = ChipPeakGeneExpression
data = {}
data2 = {}
for target,eacceptor in [('ArcA','anaerobic'),('ArcA','NO3'),('Fnr','anaerobic'),('Fnr','NO3')]:
    for x in ome.query(cpge).filter(and_(cpge.target == target, cpge.eacceptor == eacceptor)).all():
        #ChIP_value = nimblegen_AUC([y['value'] for y in get_chip_data(target = x.target, eacceptor = x.eacceptor, 
        #                                                    leftpos = x.leftpos, rightpos = x.rightpos)])
        ChIP_value = 1.
        if x.target1 != 'wt': inv = -1.0
        else: inv = 1
        try: 
            
            if x.fdr < .05: data[x.gene_name][(target+' '+eacceptor)] = x.fold_change*inv
            data2[x.gene_name][(target+' '+eacceptor)] = ChIP_value
        except:
            try: data[x.gene_name]
            except:   
                 data[x.gene_name] = {'ArcA anaerobic':0.1,'ArcA NO3':0.1,'Fnr anaerobic':0.1,'Fnr NO3':0.1}
                 data2[x.gene_name] = {'ArcA anaerobic':0.1,'ArcA NO3':0.1,'Fnr anaerobic':0.1,'Fnr NO3':0.1}
                 if x.fdr < .05: data[x.gene_name][target+' '+eacceptor] = x.fold_change*inv
                 data2[x.gene_name][(target+' '+eacceptor)] = ChIP_value
 
                        
data_frame = DataFrame(data)
data_frame2 = DataFrame(data2)
In [ ]:
def write_heatmap_json(genes, name):
    
    name_map = {}
    for gene_name in genes:
        x = ome.query(TU.name).filter(TU.genes.contains(ome.query(Gene).filter_by(name=gene_name).first())).first()[0]
        if x == '': x = gene_name
        name_map[gene_name] = x
        
    data_json = json.dumps({"genes":[{"name": name_map[gene_name] , "data": list(data_frame[gene_name].values), 
                                                     "data2": list(data_frame2[gene_name].values)}
                                 for gene_name in genes],
                            "headers":[{"name" : "ArcA anaerobic"},{"name" : "ArcA NO3"},{"name" : "Fnr anaerobic"},{"name" : "Fnr NO3"}]})

    try: os.remove(name+'.json')
    except: None
    open(name+'.json','w').write(data_json)
   
    
data_frame = data_frame.reindex_axis(Index(['ArcA anaerobic', 'ArcA NO3', 'Fnr anaerobic', 'Fnr NO3'], dtype=object), axis=0)
data_frame2 = data_frame2.reindex_axis(Index(['ArcA anaerobic', 'ArcA NO3', 'Fnr anaerobic', 'Fnr NO3'], dtype=object), axis=0)

write_heatmap_json(flatten_to_genes(catabolism_tu),'catabolism')
write_heatmap_json(flatten_to_genes(anabolism_tu),'anabolism')
write_heatmap_json(flatten_to_genes(chemiosmosis_tu),'chemiosmosis')
In [29]:
%%timeit
nimblegen_AUC([x['value'] for x in get_chip_data(leftpos=449500,rightpos=452000,replicates=[1,2,3],strand=['+','-'])])/6
1 loops, best of 3: 713 ms per loop
In [4]:
def flatten_to_genes(category):
    genes = []
    for x in category: 
        for y in category[x]:
            try: 
                for z in category[x][y]: genes.append(z)
            except:
                genes.append(y)
    return genes
                
In [5]:
genes = flatten_to_genes(catabolism_tu)
name_map = {}
for gene_name in genes:
    x = ome.query(TU.name).filter(TU.genes.contains(ome.query(Gene).filter_by(name=gene_name).first())).first()[0]
    if x == '': x = gene_name
    name_map[gene_name] = x
name_map
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-5-b706e81067c2> in <module>()
----> 1 genes = flatten_to_genes(catabolism_tu)
      2 name_map = {}
      3 for gene_name in genes:
      4     x = ome.query(TU.name).filter(TU.genes.contains(ome.query(Gene).filter_by(name=gene_name).first())).first()[0]
      5     if x == '': x = gene_name

NameError: name 'catabolism_tu' is not defined
In [ ]:
catabolism = {
                "transport": {
                    "carbohydrate": ['mglA','mglB','mglC','ugpB','ugpA','ugpE','ugpC','kgtP','dcuA','focA',
                                     'uraA','ptsG','lldP','exuT'],
                    "amino_acid":   ['metQ','metI','metN','aroP','dppA','dppB','dppB','dppC','dppD','dppF',
                                     'oppA','oppB','oppC','oppD','oppF','sstT','hisJ','hisQ',
                                     'hisM','hisP','potF','potG','potH','potI','potA','potB','potC','potD',
                                     'argT','cycA','putP','tyrP','gabP'],
                    "nucleotides":  ['nupC'],
                    "lipids":       ['fadL']
                             },
                "recycling": {
                    "carbohydrate": ['envC','mltA','pldA','ugpQ','arnA','manA','paaK','mhpC','mhpF'],
                    "amino_acid":   ['clpA','clpS','rnpA','pepN','pepQ','ybiS','yhbU','ygjG','gcvA','gcvB',
                                     'glpG','pepT','astC','astD','astE'],
                    "lipids":       ['fadI','fadJ','fadD','fadE','fadB','fadA','fadH'],
                    "nucleotides":  ['xseA','csrB']
                             },
                "glycolysis":       ['fsaA','dhaK','dhaL','dhaM','gapA','gpmM','eno','pykA','aceE','aceF',
                                     'adhE','lpd','acs'],
                "TCA":              ['gltA','acnB','icd','sucA','sucB','sucC','sucD','sdhA',
                                     'sdhB','sdhC','sdhD','frdA','frdB','frdC','frdD','fumA',
                                     'fumB','mdh','glcD','glcB','glcG','aldA'],
                "accessory":        ['thiE','thiF','thiG','thiH','thiS'],
                "regulators": {
                    "carbohydrate": ['lldR','aer','mhpR','caiF','puuR','pdhR','glcC']
                              }
             }
In [ ]:
anabolism = {
                "macromolecular_synthesis": ['nrdA','nrdB','ndk','nudE','nudJ','upp','nrdD','nrdG','ung',
                                             'dbpA','rimP','rplY','rpsU','ddlA'],
                "biosynthesis": {
                    "nucleotides":          ['sthA','pntA','pntB','pncB','purA','guaA','guaB','ravA',
                                             'viaA','katG','pyrD','purM','purN'],
                    "amino_acids":          ['carA','carB','thiC','thrA','thrB','thrC','gabD','gabT',
                                             'speA','puuA','puuD','putA','mhpD','mhpE','glpE','gltB',
                                             'gltD','gltF','argG'],
                    "lipids":               ['acpP','tpx']
                                },
                "regulators":               ['bssR','rcsB','rcsD','uspA','uspB','uspF','rho','nusA','infA','infB','rutR']
            }
                                         
In [ ]:
chemiosmosis = {
                 "ETC": {
                     "dehydrogenases":       ['fdnH','fdnI','fdoG','fdoH','fdoI','hybC','hybO','glpD',
                                              'hybA','hybB','hybD','hybE','hybF','hybG','nuoA','nuoB',
                                              'nuoC','nuoE','nuoF','nuoG','nuoH','nuoI','nuoJ','nuoK',
                                              'nuoL','nuoM','nuoN','ndh','lldD','mdh','sdhA','sdhB','sdhC','sdhD'],
                     "oxidoreductases":      ['lhgO','mqo','frdA','frdB','frdC','frdD','dmsA','dmsB','dmsC',
                                              'ynfE','ynfF','ynfG','ynfH','nirB','nirD','nirC','nrfA',
                                              'nrfB','nrfC','nrfD','nrfF','narG','narH','narI','cyoA','cyoB',
                                              'cyoC','cyoD','cyoE','cydA','cydB','hcr'],
                     "accessory":            ['dmsD','ynjE','moaA','moaB','moaC','moaD','moaE','fhuF',
                                              'nikA','nikB','nikC','nikD','nikE','hcp','dsbB','msrB','glpE']
                        },
                 "fermentation":             ['pflB','yfiD','pta','ackA','adhE','focA'],
                 "ion_pumps":                ['hemG','feoA','feoB','feoC','dcuA','dcuB','osmY','cydD','cydC',
                                              'ompC','ompW','pitA','kdpC','narK','narJ','cirA','gsiA',
                                              'gsiB','gsiC','gsiD','mntH'],
                 "motility":                 ['flhC','flhD'],
                 "regulators":               ['gadX','gadW','nikR','rstA','rstB','narX','narL','glpR']
               }
                 
                  
In [52]:
ome.query(Gene.info).filter_by(name='yghB').all()
Out[52]:
[(u'conserved inner membrane protein')]
In [60]:
b = [x.bnum+'\t'+x.gene_name for x in ome.query(cpge).join(Gene, Gene.bnum == cpge.bnum).filter(Gene.info.ilike('%membrane%')).filter(and_(cpge.target.in_(['ArcA','Fnr']), cpge.fdr < .05)).filter(not_(cpge.gene_name.in_(a))).order_by(cpge.gene_name).all()]
In [61]:
for x in b: print x
b1445	ydcX
b1750	ydjX
b1752	ydjZ
b1929	yedE
b2295	yfbV
b2298	yfcC
b3009	yghB
b3485	yhhJ
b3964	yijD
b4068	yjcH
b4068	yjcH
b4176	yjeT
b1332	ynaJ
b3002	yqhA
In [9]:
a = flatten_to_genes(catabolism)+flatten_to_genes(anabolism)+flatten_to_genes(chemiosmosis)
cpge = ChipPeakGeneExpression
ome.query(cpge).filter(and_(cpge.target.in_(['ArcA','Fnr']), cpge.fdr < .05)).filter(not_(cpge.gene_name.in_(a))).order_by(cpge.gene_name).all()
Out[9]:
[ChipPeakGeneExpression: Fnr (activator) for Gene: arcA (b4401) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.58, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: arcA (b4401) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -5.15, fdr=3.04e-08),
 ChipPeakGeneExpression: ArcA (activator) for Gene: arcA (b4401) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 11.27, fdr=1.35e-07),
 ChipPeakGeneExpression: ArcA (activator) for Gene: argR (b3237) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.42, fdr=4.33e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: betI (b0313) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 1.39, fdr=2.03e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: caiA (b0039) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.39, fdr=0.04),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: csiD (b2659) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -2.32, fdr=1.26e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: cspA (b3556) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.48, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: cysG (b3368) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.47, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: dnaG (b3066) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.39, fdr=5.33e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: fabD (b1092) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.19, fdr=8.42e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: fabF (b1095) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.43, fdr=0.03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: fabG (b1093) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.12, fdr=0.04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: fnr (b1334) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 10.95, fdr=2.35e-06),
 ChipPeakGeneExpression: Fnr (activator) for Gene: fnr (b1334) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 3.32, fdr=1.88e-05),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: folE (b2153) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.48, fdr=3.40e-04),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: groL (b4143) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -0.45, fdr=0.02),
 ChipPeakGeneExpression: Fnr (activator) for Gene: hsdM (b4349) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.34, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: hsdR (b4350) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 5.08, fdr=7.97e-04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: hsdS (b4348) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.50, fdr=7.83e-03),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: iraP (b0382) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -2.32, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ispH (b0029) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.35, fdr=2.95e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: leuD (b0071) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.21, fdr=0.03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: leuL (b0075) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.76, fdr=0.02),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: macA (b0878) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.59, fdr=7.29e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: mcbA (b0806) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.68, fdr=0.02),
 ChipPeakGeneExpression: Fnr (activator) for Gene: metV (b2816) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.80, fdr=0.02),
 ChipPeakGeneExpression: Fnr (activator) for Gene: metW (b2815) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.86, fdr=2.87e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: metZ (b2814) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.78, fdr=5.61e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: micF (b4439) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.64, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: mutM (b3635) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.63, fdr=0.02),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: phoH (b1020) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 4.48, fdr=9.38e-08),
 ChipPeakGeneExpression: Fnr (activator) for Gene: pnp (b3164) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.24, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: pnp (b3164) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.12, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: pnp (b3164) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.12, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: psrD (b4418) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.39, fdr=8.82e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: rbfA (b3167) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.32, fdr=6.93e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rbfA (b3167) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.11, fdr=0.03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: recF (b3700) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.46, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: recF (b3700) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.64, fdr=0.03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: ribF (b0025) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.12, fdr=8.16e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rplU (b3186) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.23, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpmA (b3185) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.45, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpmB (b3637) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.21, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpmG (b3636) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.37, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: rpmH (b3703) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.24, fdr=0.03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpmH (b3703) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.15, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpsO (b3165) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.36, fdr=0.05),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpsO (b3165) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.36, fdr=0.05),
 ChipPeakGeneExpression: ArcA (activator) for Gene: rpsT (b0023) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.52, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: secG (b3175) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.46, fdr=7.54e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: secG (b3175) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.56, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: suhB (b2533) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.44, fdr=0.01),
 ChipPeakGeneExpression: ArcA (activator) for Gene: thrL (b0001) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.12, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: truB (b3166) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.65, fdr=4.29e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: uxaC (b3092) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.75, fdr=0.03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: xthA (b1749) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.28, fdr=0.03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ybfA/EG11521 (b0699) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 2.00, fdr=5.20e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ybfA/EG11521 (b0699) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -0.85, fdr=6.61e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ybhK/G6402 (b0780) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.74, fdr=3.21e-04),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: ybiU/G6424 (b0821) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -1.84, fdr=4.21e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ybjX/G6460 (b0877) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.03, fdr=4.82e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yceD/EG11119 (b1088) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.24, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydcX/G6756 (b1445) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.75, fdr=0.02),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydfZ/G6815 (b1541) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 12.27, fdr=2.55e-10),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydfZ/G6815 (b1541) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 10.15, fdr=1.16e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: ydgC/G6863 (b1607) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -1.54, fdr=1.71e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydhW/G6900 (b1672) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.97, fdr=0.05),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydhY/G6902 (b1674) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 1.42, fdr=4.14e-04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydjX/G6945 (b1750) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 4.50, fdr=2.10e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydjY/G6946 (b1751) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 5.56, fdr=7.03e-04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ydjZ/G6947 (b1752) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 4.43, fdr=6.84e-06),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yeaD/G6966 (b1780) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 1.40, fdr=0.04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yeaD/G6966 (b1780) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.19, fdr=4.80e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yecH/G7036 (b1906) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 5.85, fdr=1.47e-05),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yecH/G7036 (b1906) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 4.39, fdr=2.35e-03),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: yedE/EG11660 (b1929) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -1.12, fdr=4.91e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yeiG/EG12026 (b2154) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.34, fdr=4.68e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yejG/EG12042 (b2181) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 6.12, fdr=2.05e-09),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yejG/EG12042 (b2181) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -4.00, fdr=3.62e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yfbM/G7179 (b2272) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.72, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yfbV/G7189 (b2295) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.65, fdr=1.37e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yfcC/G7190 (b2298) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 2.85, fdr=1.29e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yfcZ/G7214 (b2343) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.02, fdr=6.26e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yfcZ/G7214 (b2343) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 1.89, fdr=4.46e-03),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: yffL/G7273 (b2443) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.95, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yffS/G7280 (b2450) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.32, fdr=3.95e-03),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: yffS/G7280 (b2450) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -0.32, fdr=0.04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yfiF/EG11786 (b2581) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change -0.26, fdr=0.04),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: yghA/EG11292 (b3003) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.95, fdr=0.03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yghB/EG11824 (b3009) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 1.12, fdr=8.67e-04),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ygjD/EG11171 (b3064) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.31, fdr=0.04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yhbS/G7650 (b3156) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.13, fdr=3.50e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yhbS/G7650 (b3156) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.41, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yhbT/G7651 (b3157) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 2.02, fdr=7.04e-07),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yhbT/G7651 (b3157) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.47, fdr=0.01),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yhbV/G7653 (b3159) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 6.60, fdr=2.11e-05),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yhgE/G7745 (b3402) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.73, fdr=1.53e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yhhJ/EG11767 (b3485) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.36, fdr=0.05),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yhjE/EG12249 (b3523) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 3.75, fdr=4.63e-05),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yhjE/EG12249 (b3523) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -1.92, fdr=2.22e-04),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yiaG/EG12624 (b3555) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.32, fdr=0.02),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yibN/EG12295 (b3611) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 0.75, fdr=1.01e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yieE/EG11722 (b3712) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.40, fdr=0.02),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yigI/EG11467 (b3820) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 5.06, fdr=2.67e-12),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yigZ/EG11484 (b3848) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.41, fdr=1.46e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yijD/EG11395 (b3964) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.48, fdr=4.27e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yjcH/EG11943 (b4068) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -4.58, fdr=5.88e-04),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yjcH/EG11943 (b4068) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 3.12, fdr=7.31e-05),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yjeT/G7844 (b4176) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.67, fdr=0.03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yjiR/G7936 (b4340) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -1.88, fdr=1.42e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: ynaJ/G6668 (b1332) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 4.05, fdr=6.36e-06),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yncD/G6762 (b1451) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -2.16, fdr=1.06e-03),
 ChipPeakGeneExpression: ArcA (repressor) for Gene: yncE/G6763 (b1452) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -4.65, fdr=1.43e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynfK/G6851 (b1593) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 3.91, fdr=1.22e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynfK/G6851 (b1593) on Condition: C:glucose, N:NH4Cl, e:anaerobic (fold change 5.05, fdr=2.09e-06),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynjA/G6948 (b1753) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 1.97, fdr=1.30e-03),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynjB/G6949 (b1754) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 2.14, fdr=9.41e-04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynjC/G6950 (b1755) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 3.02, fdr=2.29e-04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: ynjD/G6951 (b1756) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 2.26, fdr=2.11e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ypdA/G7243 (b2380) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.66, fdr=6.12e-03),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ypdB/G7244 (b2381) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.45, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: ypdC/G7245 (b2382) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.39, fdr=0.02),
 ChipPeakGeneExpression: ArcA (activator) for Gene: yqcA/G7448 (b2790) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.36, fdr=0.04),
 ChipPeakGeneExpression: Fnr (activator) for Gene: yqgC/G7522 (b2940) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change 0.47, fdr=0.03),
 ChipPeakGeneExpression: Fnr (repressor) for Gene: yqhA/G7559 (b3002) on Condition: C:glucose, N:NH4Cl, e:NO3 (fold change -0.36, fdr=0.02)]
In [9]:
catabolism_tu = {
                "transport": {
                    "carbohydrate": ['mglB','ugpE','kgtP','lldP','exuT','focA','uraA','ptsG','dcuA'],
                    "amino_acid":   ['potB','metN','aroP','dppD','oppF','sstT','hisJ','potF','argT',
                                     'cycA','putP','gabP','tyrP'],
                    "nucleotides":  ['nupC'],
                    "lipids":       ['fadL']
                             },
                "recycling": {
                    "carbohydrate": ['envC','mltA','pldA','ugpQ','mhpF','arnA'],
                    "amino_acid":   ['clpA','ygjG','pepN','pepQ','ybiS','yhbU','rnpA','gcvA','gcvB',
                                     'astC',],
                    "lipids":       ['fadJ','fadD','fadE','fadB','fadH']
                             },
                "glycolysis":       ['dhaK','gapA','gpmM','pykA','aceE',
                                     'adhE','lpd','acs'],
                "TCA":              ['gltA','acnB','icd','sucD','sdhC','frdD','fumA','mdh','glcD','aldA'],
                "accessory":        ['thiE'],
                "regulators": {
                    "carbohydrate": ['aer','lldR','mhpR','glcC','puuR','pdhR','caiF']
                              }
                }
In [10]:
anabolism_tu = {
                "macromolecular_synthesis": ['ndk','nudE','nrdA','nrdD','upp','ung','dbpA','rimP','rplY','rpsU','ddlA'],
                "biosynthesis": {
                    "nucleotides":          ['sthA','pntB','pncB','purA','purN','guaB','ravA','katG','pyrD'],
                    "amino_acids":          ['carA','thiC','gabT','puuD','putA','mhpD','thrA','gltB','argG','speA'],
                    "lipids":               ['tpx']
                                
                                },
                "regulators":               ['uspA','uspB','bssR','uspF','rcsD','rho','nusA','infA','infB']
            }
                
In [11]:
chemiosmosis_tu = {
                 "ETC": {
                     "dehydrogenases":       ['fdnI','fdoG','hybO','nuoH','lldD','mdh','sdhC'],
                     "oxidoreductases":      ['lhgO','mqo','frdD','dmsA','ynfE','nirD','nrfB','narH','cyoA','cydA','hcr'],
                     "accessory":            ['msrB','glpE','fhuF','ynjE','moaD','nikB','hcp','dsbB']
                        },
                 "fermentation":             ['pflB','yfiD','pta','ackA','adhE','focA'],
                 "ion_pumps":                ['hemG','dcuB','cydD','ompC','kdpC','pitA','narK','narJ',
                                              'osmY','cirA','feoB','mntH','gsiD'],
                 "motility":                 ['flhD'],
                 "regulators":               ['gadW','glpR','narL','nikR','rstA']
               }
                 
                  
                                              
In [ ]:
['fdnH','fdnI','fdoG','fdoH','fdoI','hybC','hybO',
                                              'hybA','hybB','nuoA','nuoB','nuoC','nuoD','nuoE','nuoF',
                                              'nuoG','nuoH','nuoI','nuoJ','nuoK','nuoL','nuoM','nuoN',
                                              'ndh','lldD','mdh','sdhA','sdhB','sdhC','sdhD'],

['hemG','feoA','feoB','dcuA','dcuB','osmY','cydD','cydC',
                                              'ompC','ompW','pitA','kdpC','kdpD','kdpE'],
In [20]:
avg_peak_width = average([x[0] for x in ome.query(ChipPeakGene.rightpos-ChipPeakGene.leftpos).
                                            group_by(ChipPeakGene.leftpos, ChipPeakGene.rightpos).all()])
width_aucs = []
for y in [[x-(avg_peak_width/2),x+(avg_peak_width/2)] for x in random.randint(0,4.6e6,1000)]:
    width_aucs.append(nimblegen_AUC([z['value'] for z in get_chip_data(leftpos=y[0],rightpos=y[1])])/6)
In [ ]:
 
In [36]:
len(chem_genes+cat_genes+ana_genes)
Out[36]:
192
In [ ]:
a = flatten_to_genes(catabolism)+flatten_to_genes(anabolism)+flatten_to_genes(chemiosmosis)
unknown =      ['yncD','yncE','rpmA','yciC','yciB','yecH','ypdC','ypdB','yfaL','eno','yjjY',
                'yhjE','ynjB','ynjA','mutM','pck','rpmG','plsX','rpmF','ypdA','gmhB','yibN',
                'ynjC','yejG','yfiF','ynjD','ybfA','yceI','mcbA','yqcC','yffL','yffS','ydcX',
                'ileS','ychE','fkpB','rbbA','yrfF','ybiT','ydgH','rmf','phnJ','metV','metW',
                'nohB','yddG','aqpZ','yhjE','yfbM','ydjY','ydjZ','ydjX','yceD','yfcC','yfcZ',
                'yfbV','fsaA','yhgE','yieE','ygjD','yjcH','ssrA','yhiL','slyA','yjeH','yffR',
                'yghB','ynaJ','ybiU','rplU','ydgC','yeaD','yhiI','paaH','yhiL','phoH','ydhY',
                'yhiN','metK','yccA','trmJ','yijD','rpoD','uspC','yfdZ','rimO','lpcA','metZ',
                'yqgB','ybjX','yhbS','yhbT','ybjD','yhbV','yiaG','yjiR','ynfK','ybhK','yqhA',
                'yghA','ydhI','yqgC','yhhJ','yqcA','yjeT','yigI','ydhY','yedE','csiD']
redox_exotic = ['yieF']
unexpressed = ['rhoL','pyrG','avtA','lspA','rhlB','ydhJ','trkH','rnpB','anmK','phnD','phnO',
               'leuL','leuA','leuB','leuC','leuD','ydcY','rluE','paaJ','ubiC','iaaA','caiB',
               'kup','asd','fabR','truC','trxA','rutG','fabH','dnaA','dnaN']
unrelated = ['hsdM']
contamination = ['fabD','fabG','psrD']
print data_frame.index
table = []
for x in set(data_frame.columns).difference(set(a+unknown+redox_exotic+unexpressed+unrelated+contamination)): 
    gene = ome.query(Gene).filter_by(name=x).first()
    table.append((gene._repr_html_(),data_frame[x].values,gene.info))
html_table(table)