#!/usr/bin/env python # coding: utf-8 # # Datasets2Tools API Manual # **Denis Torre** # # *September 20, 2017* # # ## 1. Overview # This notebook explains how to extract data from the Datasets2Tools API using Python. The notebook can be downloaded at the following GitHub page: https://github.com/denis-torre/datasets2tools/tree/master/api. # # ##### Basics # - The Datasets2Tools search API can be accessed at the following URL: http://amp.pharm.mssm.edu/datasets2tools/api/search. # - Searches are refined by adding several parameters, which are explained in more detail below. # - The API returns a list of JSON objects containing information about the search results. # # ##### Object Types # The Datasets2Tools API can be used to search three types of objects: # - **Canned Analyses** (http://amp.pharm.mssm.edu/datasets2tools/api/search?object_type=canned_analysis) # - **Datasets** (http://amp.pharm.mssm.edu/datasets2tools/api/search?object_type=dataset) # - **Tools** (http://amp.pharm.mssm.edu/datasets2tools/api/search?object_type=tool) # # More detailed explanation on searching these objects is available below. # # ##### Demo # Here is an example of search results for the analyses endpoint. # In[1]: # Import modules import json import requests import pandas as pd # Get API URL url = 'http://amp.pharm.mssm.edu/datasets2tools/api/search' # Search 5 analyses data = { 'object_type': 'canned_analysis', 'page_size': 5 } # Get response response = requests.post(url, params=data) # Read response results = json.loads(response.text) # Convert to dataframe results_dataframe = pd.DataFrame(results) results_dataframe # ## 2. Search Examples # For convenience, we define a function to search the API and return a pandas DataFrame. # In[2]: # Import modules import json import requests import pandas as pd def search_datasets2tools(search_options): # Get API URL url = 'http://amp.pharm.mssm.edu/datasets2tools/api/search' # Get response response = requests.post(url, params=search_options) try: # Read response results_dict = json.loads(response.text) # Convert to dataframe results_dataframe = pd.DataFrame(results_dict) # Set index results_dataframe.set_index(search_options['object_type']+'_accession', inplace=True) return results_dataframe except: return 'Sorry, there has been an error.' # ### 2.1 Canned Analyses # We can search canned analyses by text, dataset, tool, or metadata tags. # # ##### 2.1.1 By Text # Search all canned analyses that contain the keyword *prostate cancer*. # In[3]: results = search_datasets2tools({'object_type': 'canned_analysis', 'q': 'prostate cancer'}) results.head() # ##### 2.1.2 By Dataset # Search all canned analyses associated to GEO dataset GSE775. # In[4]: results = search_datasets2tools({'object_type': 'canned_analysis', 'dataset_accession': 'GSE775'}) results.head() # ##### 2.1.3 By Tool # Search all canned analyses generated by Enrichr. # In[5]: results = search_datasets2tools({'object_type': 'canned_analysis', 'tool_name': 'Enrichr'}) results.head() # ##### 2.1.4 By Metadata # Search all canned analyses with the *colon cancer* disease name. # In[6]: results = search_datasets2tools({'object_type': 'canned_analysis', 'disease_name': 'colon cancer'}) results.head() # ##### 2.1.5 Combined Search # Search all analyses generated by Enrichr on dataset GSE31106, where the geneset is upregulated. # In[7]: results = search_datasets2tools({'object_type': 'canned_analysis', 'tool_name': 'Enrichr', 'dataset_accession': 'GSE31106', 'geneset': 'upregulated'}) results.head() # ## 2.2 Datasets # We can search datasets by accession, text-based search, names of tools which have analyzed them, accessions of canned analyses generated using them. # # ##### 2.2.1 By Accession # Search dataset GSE775. # In[8]: results = search_datasets2tools({'object_type': 'dataset', 'dataset_accession': 'GSE775'}) results.head() # ##### 2.2.2 By Text # Search all datasets which contain the keyword *asthma*. # In[9]: results = search_datasets2tools({'object_type': 'dataset', 'q': 'asthma'}) results.head() # ##### 2.2.3 By Tool # Search all datasets which have been analyzed by Enrichr. # In[10]: results = search_datasets2tools({'object_type': 'dataset', 'tool_name': 'Enrichr'}) results.head() # ##### 2.3.4 By Canned Analysis # Search all datasets which have been used to generate canned analysis DCA00000002. # In[11]: results = search_datasets2tools({'object_type': 'dataset', 'canned_analysis_accession': 'DCA00000002'}) results.head() # ### 2.3 Tools # We can search tools by name, text-based search, accessions of analyzed datasets, accessions of canned analyses generated using them. # # ##### 2.3.1 By Name # Search ARCHS4. # In[12]: results = search_datasets2tools({'object_type': 'tool', 'tool_name': 'ARCHS4'}) results.head() # ##### 2.3.2 By Text # Search all tools which contain the keyword *enrichment*. # In[13]: results = search_datasets2tools({'object_type': 'tool', 'q': 'enrichment'}) results.head() # ##### 2.3.3 By Dataset # Search all tools which have analyzed GEO dataset GSE775. # In[14]: results = search_datasets2tools({'object_type': 'tool', 'dataset_accession': 'GSE775'}) results.head() # ##### 2.3.4 By Canned Analysis # Search all tools which have been used to generate canned analysis DCA00000002. # In[15]: results = search_datasets2tools({'object_type': 'tool', 'canned_analysis_accession': 'DCA00000002'}) results.head() # In[ ]: