For more informations please see: http://david.abcc.ncifcrf.gov/content.jsp?file=WS.html
Available functions:
authenticate()
addList(inputIds, idType, listName, listType)
getAllAnnotationCategoryNames()
getAllListNames()
getAllPopulationNames()
getChartReport(threshold, count)
getConversionTypes()
getCurrentList()
getCurrentSpecies()
getCurrentPopulation()
getDefaultCategoryNames()
getGeneClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
# we need to install suds and nvd3 to display our results
!pip install --user --quiet suds
!pip install --user --quiet python-nvd3
# set your registered email address here
email = ''
import sys
import pandas
from StringIO import StringIO
from suds.client import Client
david_wsdl_url = 'http://david.abcc.ncifcrf.gov/webservice/services/DAVIDWebService?wsdl'
client = Client(david_wsdl_url)
registered = client.service.authenticate(email)
# set your input data here; For example in Galaxy you could enter `get(4)`
uniprot = pandas.read_csv('/home/bag/Downloads/uniprot-cytochrome.tab', sep='\t')
%%javascript
require.config({paths: {d3: "//d3js.org/d3.v3.min"}});
from IPython.display import HTML
from nvd3 import pieChart
import nvd3
nvd3.ipynb.initialize_javascript(use_remote=True)
# define a plotting fucntion based on d3.js and nvd3
def pie_chart(x, y, name='piechart'):
"""
x and y are lists of values and label
name needs to be different between different plots, otherwise one plot overwrites the other
"""
chart = pieChart(name=name, color_category='category20c', height=650, width=650)
chart.set_containerheader("\n\n<h2>PieChart</h2>\n\n")
xdata = x
ydata = y
extra_serie = {"tooltip": {"y_start": "", "y_end": " score"}}
chart.add_serie(y=ydata, x=xdata, extra=extra_serie)
chart.buildcontent()
return chart.htmlcontent
def david_setup(input_ids, id_type='UNIPROT_ACCESSION',
bg_ids=[], bg_name='IPython_bg_name',
list_name='IPython_example_list', category=''):
"""
possible categories:
* BBID,GOTERM_CC_FAT,BIOCARTA,GOTERM_MF_FAT,SMART,COG_ONTOLOGY,SP_PIR_KEYWORDS,
KEGG_PATHWAY,INTERPRO,UP_SEQ_FEATURE,OMIM_DISEASE,GOTERM_BP_FAT,PIR_SUPERFAMILY
"""
david = client.service
input_ids = ','.join(input_ids)
if bg_ids:
bg_ids = ','.join(bg_ids)
list_type = 0
print 'Percentage mapped: %s' % david.addList(input_ids, id_type, list_name, list_type)
if bg_ids:
list_type = 1
print 'Percentage mapped (background): %s' % david.addList(bg_ids, id_type, bg_name, list_type)
david.setCategories(category)
return david
def report_to_table(request):
"""
Converts a DAVID report to a pandas DataFrame.
"""
results = list()
for row in request:
results.append(dict(row))
df = pandas.DataFrame()
return df.from_dict(results)
david = david_setup(uniprot['Entry'][:100], 'UNIPROT_ACCESSION', category='GOTERM_CC_FAT')
Percentage mapped: 0.99
ct = 2
thd = 0.1
request = david.getChartReport(thd, ct)
table = report_to_table(request)
table[['categoryName','termName', 'listHits', 'percent', 'ease', 'foldEnrichment', 'benjamini']]
categoryName | termName | listHits | percent | ease | foldEnrichment | benjamini | |
---|---|---|---|---|---|---|---|
0 | GOTERM_CC_FAT | GO:0005739~mitochondrion | 59 | 60.204082 | 1.250867e-40 | 7.708658 | 1.676161e-38 |
1 | GOTERM_CC_FAT | GO:0005740~mitochondrial envelope | 44 | 44.897959 | 2.134157e-40 | 14.914028 | 1.429885e-38 |
2 | GOTERM_CC_FAT | GO:0031966~mitochondrial membrane | 42 | 42.857143 | 1.408322e-38 | 15.139425 | 6.290504e-37 |
3 | GOTERM_CC_FAT | GO:0031090~organelle membrane | 56 | 57.142857 | 1.448194e-36 | 7.256610 | 4.851449e-35 |
4 | GOTERM_CC_FAT | GO:0044429~mitochondrial part | 45 | 45.918367 | 3.648952e-35 | 10.741176 | 9.779191e-34 |
5 | GOTERM_CC_FAT | GO:0031967~organelle envelope | 45 | 45.918367 | 2.176204e-34 | 10.308065 | 4.860189e-33 |
6 | GOTERM_CC_FAT | GO:0031975~envelope | 45 | 45.918367 | 2.501831e-34 | 10.274920 | 4.789220e-33 |
7 | GOTERM_CC_FAT | GO:0005743~mitochondrial inner membrane | 29 | 29.591837 | 3.294510e-24 | 13.459622 | 5.518305e-23 |
8 | GOTERM_CC_FAT | GO:0019866~organelle inner membrane | 29 | 29.591837 | 2.454137e-23 | 12.518676 | 3.653938e-22 |
9 | GOTERM_CC_FAT | GO:0070469~respiratory chain | 14 | 14.285714 | 3.276517e-15 | 26.510815 | 4.463097e-14 |
10 | GOTERM_CC_FAT | GO:0005789~endoplasmic reticulum membrane | 17 | 17.346939 | 4.233412e-11 | 8.975382 | 5.157067e-10 |
11 | GOTERM_CC_FAT | GO:0044455~mitochondrial membrane part | 13 | 13.265306 | 5.626435e-11 | 14.770311 | 6.282850e-10 |
12 | GOTERM_CC_FAT | GO:0042175~nuclear envelope-endoplasmic reticu... | 17 | 17.346939 | 9.522033e-11 | 8.501330 | 9.815013e-10 |
13 | GOTERM_CC_FAT | GO:0005746~mitochondrial respiratory chain | 10 | 10.204082 | 5.125087e-10 | 22.190972 | 4.905440e-09 |
14 | GOTERM_CC_FAT | GO:0044432~endoplasmic reticulum part | 17 | 17.346939 | 1.800255e-09 | 6.957861 | 1.608228e-08 |
15 | GOTERM_CC_FAT | GO:0005792~microsome | 14 | 14.285714 | 8.898764e-09 | 8.389498 | 7.452714e-08 |
16 | GOTERM_CC_FAT | GO:0005741~mitochondrial outer membrane | 10 | 10.204082 | 1.134508e-08 | 15.780247 | 8.942589e-08 |
17 | GOTERM_CC_FAT | GO:0042598~vesicular fraction | 14 | 14.285714 | 1.261559e-08 | 8.148816 | 9.391605e-08 |
18 | GOTERM_CC_FAT | GO:0031968~organelle outer membrane | 10 | 10.204082 | 4.077342e-08 | 13.655983 | 2.875599e-07 |
19 | GOTERM_CC_FAT | GO:0019867~outer membrane | 10 | 10.204082 | 5.674731e-08 | 13.150206 | 3.802069e-07 |
20 | GOTERM_CC_FAT | GO:0005783~endoplasmic reticulum | 23 | 23.469388 | 4.543511e-07 | 3.402616 | 2.899189e-06 |
21 | GOTERM_CC_FAT | GO:0012505~endomembrane system | 19 | 19.387755 | 5.859228e-06 | 3.450668 | 3.568749e-05 |
22 | GOTERM_CC_FAT | GO:0005750~mitochondrial respiratory chain com... | 4 | 4.081633 | 6.428897e-06 | 94.681481 | 3.745473e-05 |
23 | GOTERM_CC_FAT | GO:0045275~respiratory chain complex III | 4 | 4.081633 | 6.428897e-06 | 94.681481 | 3.745473e-05 |
24 | GOTERM_CC_FAT | GO:0000267~cell fraction | 22 | 22.448980 | 1.239361e-05 | 2.885031 | 6.919571e-05 |
25 | GOTERM_CC_FAT | GO:0005624~membrane fraction | 17 | 17.346939 | 1.289663e-04 | 2.984398 | 6.910649e-04 |
26 | GOTERM_CC_FAT | GO:0005626~insoluble fraction | 17 | 17.346939 | 1.961841e-04 | 2.877685 | 1.010691e-03 |
27 | GOTERM_CC_FAT | GO:0019898~extrinsic to membrane | 11 | 11.224490 | 2.235796e-03 | 3.162438 | 1.104712e-02 |
28 | GOTERM_CC_FAT | GO:0005829~cytosol | 18 | 18.367347 | 9.570662e-03 | 1.922105 | 4.498004e-02 |
29 | GOTERM_CC_FAT | GO:0043020~NADPH oxidase complex | 2 | 2.040816 | 6.096751e-02 | 31.560494 | 2.522341e-01 |
30 | GOTERM_CC_FAT | GO:0032592~integral to mitochondrial membrane | 2 | 2.040816 | 8.046160e-02 | 23.670370 | 3.124904e-01 |
overlap = 2
initialSeed = 2
finalSeed = 1
linkage = 1
kappa = 1
request = david.getGeneClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
table[['name', 'score']]
name | score | |
---|---|---|
0 | Gene Cluster 2 | 33.474752 |
1 | Gene Cluster 5 | 33.474752 |
2 | Gene Cluster 25 | 33.385599 |
3 | Gene Cluster 12 | 31.105503 |
4 | Gene Cluster 24 | 28.330592 |
5 | Gene Cluster 21 | 27.484442 |
6 | Gene Cluster 6 | 25.763687 |
7 | Gene Cluster 16 | 25.763687 |
8 | Gene Cluster 10 | 24.133724 |
9 | Gene Cluster 9 | 23.591595 |
10 | Gene Cluster 13 | 23.391020 |
11 | Gene Cluster 19 | 23.227674 |
12 | Gene Cluster 20 | 23.227674 |
13 | Gene Cluster 15 | 22.794789 |
14 | Gene Cluster 1 | 22.590638 |
15 | Gene Cluster 4 | 22.129459 |
16 | Gene Cluster 7 | 21.967561 |
17 | Gene Cluster 14 | 21.967561 |
18 | Gene Cluster 23 | 21.967561 |
19 | Gene Cluster 18 | 13.502007 |
20 | Gene Cluster 17 | 11.906495 |
21 | Gene Cluster 22 | 11.247333 |
22 | Gene Cluster 3 | 10.775179 |
23 | Gene Cluster 8 | 2.359300 |
24 | Gene Cluster 11 | 0.783642 |
overlap = 3
initialSeed = 3
finalSeed = 3
linkage = 0.5
kappa = 50
request = david.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
table[['name', 'score']]
name | score | |
---|---|---|
0 | GO:0005739~mitochondrion | 33.450913 |
1 | GO:0070469~respiratory chain | 11.341551 |
2 | GO:0005746~mitochondrial respiratory chain | 6.558009 |
3 | GO:0005789~endoplasmic reticulum membrane | 6.528910 |
4 | GO:0005741~mitochondrial outer membrane | 6.149982 |
5 | GO:0043025~cell soma | 0.597256 |
6 | GO:0031410~cytoplasmic vesicle | 0.058873 |
7 | GO:0005654~nucleoplasm | 0.007770 |
8 | GO:0005615~extracellular space | 0.007698 |
9 | GO:0005887~integral to plasma membrane | 0.000484 |
HTML(pie_chart(table['name'], table['score'], name="relaxed"))
overlap = 5
initialSeed = 5
finalSeed = 5
linkage = 0.5
kappa = 50
request = david.getTermClusterReport(overlap, initialSeed, finalSeed, linkage, kappa)
table = report_to_table(request)
print table[['name', 'score']]
name score 0 GO:0005739~mitochondrion 33.450913 1 GO:0005789~endoplasmic reticulum membrane 6.528910
HTML(pie_chart(table['name'], table['score']))