#!/usr/bin/env python # coding: utf-8 # ### Preparations # Let's start by making some necessary imports and definitions. You have have to install `requests` first by running `pip install --user requests`. # In[33]: import requests import pprint import sys GRAPHQL = 'http://api.catalysis-hub.org/graphql' def fetch(query): return requests.get( GRAPHQL, {'query': query} ).json()['data'] # ### List of Publications # Let's start flexing our quering muscles by quering a list of publications. # In[21]: raw_publications = fetch("""{publications { edges { node { id authors title journal year doi } } }} """)['publications']['edges'] publications = list(map(lambda x: x['node'], raw_publications)) pprint.pprint(publications[:3]) # We only show the first 3 results here for brevity but of you can retrieve the full list by removing the `[:3]` slice. The `['edges']['node']` may seem a little annoying, but it will allow us to responses that would be too large for a single request, as we will see below. # ### Query Reactions # Next, let's query some reactions. This is the same type of query as you would get from the Reaction Energetics App. Let's get all energies that end with `CO` adsorbed on the surface and some Palladium in the surface. The tilde (`~`) before the `Pd` indicates that the field only has to contain `Pd`. If you want the exact match, drop the tilde. Here we have al # In[25]: fetch(""" {reactions(first: 10, products:"CO", chemicalComposition:"~Pd") { totalCount pageInfo { hasNextPage hasPreviousPage startCursor endCursor } edges { node { reactants products Equation reactionEnergy chemicalComposition } } }} """) # ### Query Systems # Next up is `systems`. We use a different filter to filter for energies > -14 eV. So that should gives use from H or H2 at best. # In[26]: fetch(""" {systems(first: 100, energy: -14, op:">") { totalCount edges { node { id Formula Cifdata energy calculatorParameters } } }} """) # ### Combining Queries and Stepping Through Large Queries # The main tables that `catalysis-hub.org` offers are `reactions`, `systems`, and `publications`. Often it is useful to query more than one table at once (i.e. SQL join) to filter one table but get the data from a different table associated with it. Example we want to filter for a certain type of reaction and get the structures associated with it. # In[28]: reaction_systems = fetch("""{reactions(first: 1, after:"", products:"CO", chemicalComposition:"~Pd") { totalCount pageInfo { hasNextPage hasPreviousPage startCursor endCursor } edges { node { id reactants products Equation reactionEnergy chemicalComposition systems{ InputFile(format:"vasp") } } } }} """) reaction_systems # One constraint we have to work with is that our server times out requests after 30 seconds (gives others a chance to query, too). Especially when generating a lot of structures we can quickly run into this limitation. To get around this we can use the `pageInfo` attributes as well as the `first` and `after` keywords to roll our own pagination and combine the whole list. We will do simple loop that doesn't end and `break` out of it, when the `pageInfo` indicates that we are done. To step through a large query, do this: # In[39]: end_cursor = '' reaction_systems = {} while True: response = fetch("{reactions(first: 5, after:\"" + end_cursor + """", products:"CO", chemicalComposition:"~Pd") { totalCount pageInfo { hasNextPage hasPreviousPage startCursor endCursor } edges { node { id reactants products Equation reactionEnergy chemicalComposition systems{ InputFile(format:"vasp") } } } }}""") for edge in response['reactions']['edges']: reaction_systems[edge['node']['id']] = edge['node'] # Book-keeping for pagination if not response['reactions']['pageInfo']['hasNextPage']: sys.stdout.write(' Done!\n') break end_cursor = response['reactions']['pageInfo']['endCursor'] sys.stdout.write('.') # In[42]: len(list(reaction_systems.keys())) # Now we can do further analysis with this combined data set. Note that some reaction energies do not contains geometries (especially older ones). For purely technical reasons they have a placeholder geometry with only one Hydrogen from and a `1x1x1` Angstrom unit cell. # ### More Resources # In order quickly test what are possible queries, we have a GraphiQL Interface. You can write your own queries and GraphiQL will try to complete your keywords. Once your are happy with the results, you can copy the query back into e.g. Jupyter Notebook for further analysis. Also check out our Documentation for complete reference of the database schema and more tutorials and examples. # In[ ]: