This notebook takes a list of gene symbols and queries the IDR for phenotypes associated with the genes in high content screens.
import json
import csv
import pandas as pd
import requests
INDEX_PAGE = "https://idr.openmicroscopy.org/webclient/?experimenter=-1"
# create http session
with requests.Session() as session:
request = requests.Request('GET', INDEX_PAGE)
prepped = session.prepare_request(request)
response = session.send(prepped)
if response.status_code != 200:
response.raise_for_status()
# uncomment the next two lines if you'd rather read gene list in from a file
# with open('./includes/FiveExampleGenes.txt') as f:
# genes = f.read().splitlines()
# comment out the next line if you've read in the gene list from a file
genes = ['ASH2L', 'ash2', '85441' ]
# check the gene list has been read in
genes[:5]
['ASH2L', 'ash2', '85441']
SCREENS_PROJECTS_URL = "https://idr.openmicroscopy.org/mapr/api/{key}/?value={value}"
PLATES_URL = "https://idr.openmicroscopy.org/mapr/api/{key}/plates/?value={value}&id={screen_id}"
IMAGES_URL = "https://idr.openmicroscopy.org/mapr/api/{key}/images/?value={value}&node={parent_type}&id={parent_id}"
ATTRIBUTES_URL = "https://idr.openmicroscopy.org/webclient/api/annotations/?type=map&image={image_id}"
The results are both printed to the screen and saved in a .csv file
attr_type = "gene"
attr_keys = {
"phenotype":
("Phenotype",
"Phenotype Term Name",
"Phenotype Term Accession",
"Phenotype Term Accession URL")
}
from tempfile import NamedTemporaryFile
csvfile = NamedTemporaryFile("w")
try:
fieldnames = [
'Gene', 'Screen', 'Plate', 'Image',
'Phenotype', 'Phenotype Term Name', 'Phenotype Term Accession',
'Phenotype Term Accession URL']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for gene in genes:
qs1 = {'key': attr_type, 'value': gene}
url1 = SCREENS_PROJECTS_URL.format(**qs1)
for s in session.get(url1).json()['screens']:
screen_id = s['id']
screen_name = s['name']
qs2 = {'key': attr_type, 'value': gene, 'screen_id': screen_id}
url2 = PLATES_URL.format(**qs2)
for p in session.get(url2).json()['plates']:
plate_id = p['id']
plate_name = p['name']
qs3 = {'key': attr_type, 'value': gene,
'parent_type': 'plate', 'parent_id': plate_id}
url3 = IMAGES_URL.format(**qs3)
for i in session.get(url3).json()['images']:
image_id = i['id']
url4 = ATTRIBUTES_URL.format(**{'image_id': image_id})
for a in session.get(url4).json()['annotations']:
ontologies = [] # for ontology terms for a phenotype
row = {}
for v in a['values']:
if str(v[0]) in attr_keys['phenotype']:
if str(v[0]) in ['Phenotype']: # has phenotype
row[str(v[0])] = v[1] # so create row
# if there are ontology mappings for the
# phenotype then add them to the ontologies list
ontList = ['Phenotype Term Name',
'Phenotype Term Accession',
'Phenotype Term Accession URL']
if str(v[0]) in ontList:
ontologies.extend([str(v[0]), str(v[1])])
if row:
if (len(ontologies) == 0): # no ontology mapping
row.update({'Gene': gene,
'Screen': screen_name,
'Plate': plate_name,
'Image': image_id})
writer.writerow(row)
if (len(ontologies) > 0): # 1+ ontology mapping
row.update({'Gene': gene,
'Screen': screen_name,
'Plate': plate_name,
'Image': image_id})
# we have the start of a row now
# but we want to print out as many rows
# as there are ontology mappings
# so if there is mapping to 1 ontology term
# print 1 row, if there are 2 ontology terms
# print 2 rows etc
numberOfRows = len(ontologies)/6
# this is 3 pairs of ontology values per mapping
# then add the ontology mappings and print out
n = 1
while (n <= numberOfRows):
row.update({ontologies[0]: ontologies[1],
ontologies[2]: ontologies[3],
ontologies[4]: ontologies[5]})
# remove that set of ontology mappings
ontologies = ontologies[6:]
writer.writerow(row)
n += 1
df = pd.read_csv(csvfile.name)
finally:
csvfile.close()
# view what is in the csv file (displaying the first 10 rows alone)
df.head(10)
Gene | Screen | Plate | Image | Phenotype | Phenotype Term Name | Phenotype Term Accession | Phenotype Term Accession URL | |
---|---|---|---|---|---|---|---|---|
0 | ASH2L | idr0012-fuchs-cellmorph/screenA (2) | HT28 | 1830141 | elongated cells | elongated cell phenotype | CMPO_0000077 | http://www.ebi.ac.uk/cmpo/CMPO_0000077 |
1 | ASH2L | idr0012-fuchs-cellmorph/screenA (2) | HT28 | 1830140 | elongated cells | elongated cell phenotype | CMPO_0000077 | http://www.ebi.ac.uk/cmpo/CMPO_0000077 |
2 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239832 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
3 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239830 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
4 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239833 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
5 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239834 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
6 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239831 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
7 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_120809_S14B | 1239835 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
8 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_121215_J4_1 | 1269432 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
9 | ash2 | idr0001-graml-sysgro/screenA (60) | JL_121215_J4_1 | 1269435 | abnormal microtubule cytoskeleton morphology d... | abnormal microtubule cytoskeleton morphology d... | CMPO_0000438 | http://www.ebi.ac.uk/cmpo/CMPO_0000438 |
License
Copyright (C) 2017 University of Dundee. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.