#!/usr/bin/env python # coding: utf-8 # # Gathering and Investigating Materials Project Data # # This notebooks will show how you can use `requests` and `pandas` so gather and explore your data. Often times you will need to suply your data by other methods. # # The `api` that we will be using is the material project. Link to the [api description](https://materialsproject.org/docs/api#materials_.28calculated_materials_data.29) # # ![Materials Projnect](../images/materials_project.png) # In[1]: import requests base_url = 'https://materialsproject.org/rest/v2/' # # Getting Materials Project Api Key # # This [link](https://www.materialsproject.org/open) details the steps necissary. # # 1. Visit [dashboard](https://materialsproject.org/dashboard) you may need to login # 2. Generate API key if it has not already been generated and set `API_KEY` to this value. # # The subprocess method is a way that I store my passwords on my computer and will not work for you. # # Afterwards in the next cell we will test that our API key works. # # This is done by performing a `GET` or `POST` request to `https://www.materialsproject.org/rest/v1/api_check`. # In[2]: import subprocess API_KEY = subprocess.check_output('gopass www/materialsproject.com apikey'.split()).decode('utf-8') # API_KEY = "" session = requests.Session() session.headers.update({'X-API-KEY': API_KEY}) # In[3]: # for some reason the v2 API does not include an API check method?? response = session.get(f'https://www.materialsproject.org/rest/v1/api_check') data = response.json() print(data) if not data['api_key_valid']: raise ValueError('You are not authenticated!') # # Materials Project API # # The materials project provides a RESTfull API for getting material properties which is detailed [here](https://www.materialsproject.org/docs/api#materials_.28calculated_materials_data.29). # # If you have followed the steps above you should be ready to parse materials project data. # # A RESTfull API is a nice way to expose data over the web. While they provide convenient methods for getting each individual material property they have a limit of 500 queries per day so we need to be efficient in our queries. To do this we will use the `npquery` to get properties in batch. # # Lets start by getting a list of materials that are compossed of the following elements `Fe`, `Ti`, `O`, `C`, `N`, `He`. This does not affect your API limit # In[20]: def get_materials(elements): elements_str = '-'.join(elements) response = session.get(f'{base_url}/materials/{elements_str}/mids') data = response.json() print(f'Found {len(data["response"])} Materials in the Materials Project with the elements: {elements}') return data['response'] def get_material_experimental_properties(mid): response = session.get(f'{base_url}/materials/{mid}/exp/') print(response.content) data = response.json()['response'][0] print(data) return data def get_material_vasp_properties(mid, piezoelectric=False, dielelectric=False): response = session.get(f'{base_url}/materials/{mid}/vasp/') material_data = response.json()['response'][0] if piezoelectric: response = session.get(f'{base_url}/materials/{mid}/vasp/piezo') data = response.json() if not data['valid_response']: material_data['piezoelectric'] = None else: material_data['piezoelectric'] = data['response'] if dielelectric: response = session.get(f'{base_url}/materials/{mid}/vasp/diel') data = response.json() if not data['valid_response']: material_data['dielelectric'] = None else: material_data['dielelectric'] = data['response'] return material_data # In[27]: material_ids = get_materials(['Fe', 'O', 'Ni', 'He', 'Zn', 'Cu']) # # Basic VASP properties # # Includes: # # - `energy`, `energy_per_atom`, `volume`, `formation_energy_per_atom`, `nsites`, `unit_cell_formula`, `pretty_formula`, `e_above_hull`, `spacegroup`, `icsd_ids`, `cif`, # # - properties: `band_gap`, `density`, `energry`, `energy_per_atom`, `formation_energy_per_atom`, `elascticity`, `total_magnetization` # # But some properties are still not included: # # - `piezo`, `diel` # In[86]: # MgO material_id = 'mp-1265' # Na2O material_id = 'mp-776952' # In[26]: data = get_material_vasp_properties(material_id, piezoelectric=True, dielelectric=True) data.keys() # # Basic Experimental properties # # Turns out to be thermochemical data and not worth looking at # In[ ]: get_material_experimental_properties(material_id) # # Let's gather the material data # # The Material Project definently is not enforcing their `500` materials per day rate limit. # # Also if you have a query that get greater than 3,000 materials it fails. Thus why some are commented out. # In[29]: materials_data = {} # In[56]: # Lets just grab a bunch of materials material_ids = get_materials(['H', 'He', #'Li', 'Be', #'B', 'C', 'N', 'O', #'F', 'Ne', #'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar' 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', # 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', ]) print('Number of materials', len(material_ids)) # In[57]: # store the results for mid in material_ids: if mid in materials_data: continue materials_data[mid] = get_material_vasp_properties(mid) # In[58]: len(materials_data) # # Save all of the downloaded data to a json file # In[59]: import json # In[60]: json.dump(materials_data, open('mpdata.json', 'w')) # In[61]: get_ipython().system(' du -sh *')