#!/usr/bin/env python
# coding: utf-8
# ### Preparations
# Let's start by making some necessary imports and definitions. You have have to install `requests` first by running `pip install --user requests`.
# In[33]:
import requests
import pprint
import sys
GRAPHQL = 'http://api.catalysis-hub.org/graphql'
def fetch(query):
return requests.get(
GRAPHQL, {'query': query}
).json()['data']
# ### List of Publications
# Let's start flexing our quering muscles by quering a list of publications.
# In[21]:
raw_publications = fetch("""{publications {
edges {
node {
id
authors
title
journal
year
doi
}
}
}}
""")['publications']['edges']
publications = list(map(lambda x: x['node'], raw_publications))
pprint.pprint(publications[:3])
# We only show the first 3 results here for brevity but of you can retrieve the full list by removing the `[:3]` slice. The `['edges']['node']` may seem a little annoying, but it will allow us to responses that would be too large for a single request, as we will see below.
# ### Query Reactions
# Next, let's query some reactions. This is the same type of query as you would get from the Reaction Energetics App. Let's get all energies that end with `CO` adsorbed on the surface and some Palladium in the surface. The tilde (`~`) before the `Pd` indicates that the field only has to contain `Pd`. If you want the exact match, drop the tilde. Here we have al
# In[25]:
fetch("""
{reactions(first: 10, products:"CO", chemicalComposition:"~Pd") {
totalCount
pageInfo {
hasNextPage
hasPreviousPage
startCursor
endCursor
}
edges {
node {
reactants
products
Equation
reactionEnergy
chemicalComposition
}
}
}}
""")
# ### Query Systems
# Next up is `systems`. We use a different filter to filter for energies > -14 eV. So that should gives use from H or H2 at best.
# In[26]:
fetch("""
{systems(first: 100, energy: -14, op:">") {
totalCount
edges {
node {
id
Formula
Cifdata
energy
calculatorParameters
}
}
}}
""")
# ### Combining Queries and Stepping Through Large Queries
# The main tables that `catalysis-hub.org` offers are `reactions`, `systems`, and `publications`. Often it is useful to query more than one table at once (i.e. SQL join) to filter one table but get the data from a different table associated with it. Example we want to filter for a certain type of reaction and get the structures associated with it.
# In[28]:
reaction_systems = fetch("""{reactions(first: 1, after:"", products:"CO", chemicalComposition:"~Pd") {
totalCount
pageInfo {
hasNextPage
hasPreviousPage
startCursor
endCursor
}
edges {
node {
id
reactants
products
Equation
reactionEnergy
chemicalComposition
systems{
InputFile(format:"vasp")
}
}
}
}}
""")
reaction_systems
# One constraint we have to work with is that our server times out requests after 30 seconds (gives others a chance to query, too). Especially when generating a lot of structures we can quickly run into this limitation. To get around this we can use the `pageInfo` attributes as well as the `first` and `after` keywords to roll our own pagination and combine the whole list. We will do simple loop that doesn't end and `break` out of it, when the `pageInfo` indicates that we are done. To step through a large query, do this:
# In[39]:
end_cursor = ''
reaction_systems = {}
while True:
response = fetch("{reactions(first: 5, after:\"" + end_cursor + """", products:"CO", chemicalComposition:"~Pd") {
totalCount
pageInfo {
hasNextPage
hasPreviousPage
startCursor
endCursor
}
edges {
node {
id
reactants
products
Equation
reactionEnergy
chemicalComposition
systems{
InputFile(format:"vasp")
}
}
}
}}""")
for edge in response['reactions']['edges']:
reaction_systems[edge['node']['id']] = edge['node']
# Book-keeping for pagination
if not response['reactions']['pageInfo']['hasNextPage']:
sys.stdout.write(' Done!\n')
break
end_cursor = response['reactions']['pageInfo']['endCursor']
sys.stdout.write('.')
# In[42]:
len(list(reaction_systems.keys()))
# Now we can do further analysis with this combined data set. Note that some reaction energies do not contains geometries (especially older ones). For purely technical reasons they have a placeholder geometry with only one Hydrogen from and a `1x1x1` Angstrom unit cell.
# ### More Resources
# In order quickly test what are possible queries, we have a GraphiQL Interface. You can write your own queries and GraphiQL will try to complete your keywords. Once your are happy with the results, you can copy the query back into e.g. Jupyter Notebook for further analysis. Also check out our Documentation for complete reference of the database schema and more tutorials and examples.
# In[ ]: