#!/usr/bin/env python
# coding: utf-8

# ## Advanced Visualization

# *authors: Alireza Faghaninia, Alex Dunn, Joseph Montoya, Daniel Dopp*

# This notebook was last updated 11/15/18 for version 0.4.5 of matminer.
# 
# **Note that in order to get the in-line plotting to work, you might need to start Jupyter notebook with a higher data rate, e.g., ``jupyter notebook --NotebookApp.iopub_data_rate_limit=1.0e10``. We recommend you do this before starting.**
# 
# 
# **A [Citrine](citrination.com) API key is required to load the data for this notebook (can be found under account settings). Set the ``CITRINE_API`` environment varible or add API key as an argument to ``CitrineDataRetrieval()``.** (Reference data retrieval notebook)

# This notebook illustrates a few more advanced examples of matminer's visualization features.  Note that these examples and a few additional ones are included in script form in the [matminer_examples](https://github.com/hackingmaterials/matminer_examples/tree/master/figrecipes) repository.

# In[1]:


import pprint

import pandas as pd
from pymatgen.core.composition import Composition
from figrecipes import PlotlyFig
from matminer.datasets import load_dataset
from matminer.data_retrieval.retrieve_Citrine import CitrineDataRetrieval


# ### Plotting thermoelectric data
# 
# This example generates a scatter plot of the properties of thermoelectric materials based on the data
# available in http://www.mrl.ucsb.edu:8080/datamine/thermoelectric.jsp
# The data is extracted via Citrine data retrieval tools. The dataset
# id on Citrine is 150557

# In[2]:


# GET DATA
# Note that your Citrine API key must be set as the CITRINE_API 
# environment variable or as an argument to the CitrineDataRetrieval() constructor
cdr = CitrineDataRetrieval()
df_te = cdr.get_dataframe(criteria={'data_type': 'experimental', 'data_set_id': 150557},
                          properties=['Seebeck coefficient'], secondary_fields=True)

# CLEAN AND PRUNE DATA
# Convert numeric columns to numeric data types
numeric_cols = ['chemicalFormula', 'Electrical resistivity', 'Seebeck coefficient',
                'Thermal conductivity', 'Thermoelectric figure of merit (zT)']
df_te = df_te[numeric_cols].apply(pd.to_numeric, errors='ignore')

# Filter data based on resistivities between 0.0005 and 0.1 and
# Seebeck coefficients less than 500 and simplify zT naming
df_te = df_te[(5e-4 < df_te['Electrical resistivity']) & (df_te['Electrical resistivity'] < 0.1)]
df_te = df_te[abs(df_te['Seebeck coefficient']) < 500]
df_te = df_te.rename(columns={'Thermoelectric figure of merit (zT)': 'zT'})

# GENERATE PLOTS
pf = PlotlyFig(df_te, x_scale='log', fontfamily='Times New Roman',
               hovercolor='white', x_title='Electrical Resistivity (cm/S)',
               y_title='Seebeck Coefficient (uV/K)',
               colorbar_title='Thermal Conductivity (W/m.K)',
               mode='notebook')

pf.xy((df_te['Electrical resistivity'], df_te['Seebeck coefficient']),
      labels='chemicalFormula',
      sizes='zT',
      colors='Thermal conductivity',
      color_range=[0, 5])


# ### Plotting modes
# 
# PlotlyFig may use a number of plotting modes, which are illustrated in the following examples.  First we set up the figure:

# In[3]:


# Note, if this is your first time loading this dataset it will be downloaded from an external repository
df = load_dataset("elastic_tensor_2015")

pf = PlotlyFig(df, title='Elastic data', mode='offline', 
               x_scale='log', y_scale='log')


# * offline - creates a local html file. Note that offline mode in plotly disables LaTeX and some fonts on some systems by default. For the full-featured Plotly experience, please use the Plotly online mode. Offline is the default mode.

# In[4]:


# Lets plot offline (the default) first. An html file will be created.
pf.xy((df['poisson_ratio'], df['elastic_anisotropy']), labels='formula')


# In[5]:


# Plot and save figure without showing offline plot and specifying filename
pf.set_arguments(show_offline_plot=False, filename="myplot.html")
pf.xy((df['poisson_ratio'], df['elastic_anisotropy']), labels='formula')


# * static - creates a single image file. Use height and width to specify the size of the image desired. api_key and username are required for static plotting mode.

# In[6]:


# Uncomment and set your Plotly API information to plot in static mode
# pf.set_arguments(mode='static', api_key=YOUR_API_KEY,
#                 username=YOUR_USERNAME,
#                 filename="my_PlotlyFig_plot.jpeg")
# pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')


# * online - Opens the figure in the Plotly online module.

# In[7]:


# pf.set_arguments(mode='online')
# pf.xy([('poisson_ratio', 'elastic_anisotropy')], labels='formula')


# * notebook - Opens the figure in a Jupyter/IPython notebook.

# In[6]:


pf.set_arguments(mode='notebook')
pf.xy((df['poisson_ratio'], df['elastic_anisotropy']), labels='formula')


# * return - Pass "return_plot=True" into any plotting method - Returns the figure as a 'bare-bones' dictionary. This can then be edited and passed into 'create_plot' of PlotlyFig or used directly with plotly.

# In[7]:


fig = pf.xy((df['poisson_ratio'], df['elastic_anisotropy']), labels='formula',
            return_plot=True)
print("Here's our returned figure!")
pprint.pprint(fig)


# In[8]:


# Edit the figure and plot it with the current plot mode (online):
fig['layout']['hoverlabel']['bgcolor'] = 'pink'
fig['layout']['title'] = 'My Custom Elastic Data Figure'
pf.set_arguments(mode='notebook')
pf.create_plot(fig)


# ### Formatting
# 
# PlotlyFig provides a set of arguments which make setting up good
# looking Plotly templates quicker and easier.
# 
# Most formatting options can be set through the initializer of PlotlyFig.
# These options will remain the same for all figures producted, but you can
# change some common formatting options after instantitating a PlotlyFig
# object using set_arguments.
# 
# Chart-specific formatting options can be passed to plotting methods.

# In[9]:


pf = PlotlyFig(df=df,
               # api_key=api_key,
               # username=username,
               mode='notebook',
               title='Comparison of Bulk Modulus and Shear Modulus',
               x_title='Shear modulus (GPa)',
               y_title='Bulk modulus (GPa)',
               colorbar_title='Poisson Ratio',
               fontfamily='Raleway',
               fontscale=0.75,
               fontcolor='#283747',
               ticksize=30,
               colorscale="Reds",
               hovercolor='white',
               hoverinfo='text',
               bgcolor='#F4F6F6',
               margins=110,
               pad=10)

pf.xy((df['G_VRH'], df['K_VRH']), labels='material_id', colors='poisson_ratio')


# Latex labels are also supported, but only in online/static mode.

# In[12]:


# We can also use LaTeX if we use Plotly online/static
# pf.set_arguments(title="$\\text{Origin of Poisson Ratio } \\nu $",
#                  y_title='$K_{VRH} \\text{(GPa)}$',
#                  x_title='$G_{VRH} \\text{(GPa)}$',
#                  colorbar_title='$\\nu$',
#                  api_key=YOUR_API_KEY, username=YOUR_USERNAME)
# pf.xy(('G_VRH', 'K_VRH'), labels='material_id', colors='poisson_ratio')