#!/usr/bin/env python
# coding: utf-8
#
#
# # Activation impacts on fast cloud responses in a coupled aerosol-climate model ...
#
# ## ... and nifty visualization / analysis tools
# ### or - *Why I don't have cool results to show you (yet)*
# **Daniel Rothenberg (darothen@mit.edu)**
#
# Joint Program Student Luncheon, July 16, 2015
#
#
#
#
#
#
#
# # Python for Scientists
#
# - Mature (well-documented, efficient, easy-to-use) data analysis packages
# - Portable ecosystem - any machine, any operating system
# - Huge userbase
# - Many cool tools in active development
# - Supports many different development styles
# - scripting (editing files from terminal/shell)
# - notebook environment
# - full-blown IDEs for software development - [Spyder](https://github.com/spyder-ide/spyder), [PyCharm](https://www.jetbrains.com/pycharm/), etc.
#
# ## Python helps create reproducible, verifiable science
# # Reproducibility (1) - Version Control
# In[12]:
import subprocess
# In[16]:
def get_git_versioning():
""" Returns the currently checked out commit shortname. """
return subprocess.check_output(
['git', 'rev-parse', '--short', 'HEAD']
).strip()
print("Current HEAD git commit: ", str(get_git_versioning()))
# **Fetch *git* commit at any time, attach it as metadata in whatever figure or output file you create**
# But why should you care about this?
#
# In[35]:
get_ipython().system('git log -2')
# In[19]:
import os
pwd = os.getcwd()
os.chdir("/Users/daniel/workspace/Research/marc_aie")
# **Oh no, I broke something!**
# In[30]:
get_ipython().system('git diff ee8a2e9 ebe1ce2 marc_aie/convert.py')
# ## But, I can't make my *[insert-super-secret-project]* public!
#
# # Reproducibility (2) - Environments
# ## Common Problems
#
# - Something changed in a version of a toolkit or a package and now I get different answers!
# - It's worse, my code doesn't even run any more!
# - Someone forgot to tell me that I need package *xyz:v.a.b.c* but it won't compile on my machine!
# - It's worse, it has a conflicting dependency with another package that I **really** need!
# ## Solution - Package Managers
#
# ### [`conda`](http://conda.pydata.org/docs/)
#
# - A python package manager with sophisticated environment management (a là `virtualenv`)
# - Maintain minimal Python installation for a given project
# - Distribute and automatically build your dependencies
# - Automatically comes with [Anaconda Python distribution](http//www.continuum.io/anaconda) and [Miniconda](http://conda.pydata.org/miniconda.html)
# - Social site [binstar](https://binstar.org) for contributing packages
#
# ### [`Docker`](https://www.docker.com/)
#
# - Full-stack software management
# - Rapidly re-deploy your entire working environment to a new machine (local, supercomputer, distributed)
# **environment.yml**
#
# ```yaml
# name: marc_aie
# channels:
# - unidata
# - scitools
# dependencies:
# - cartopy>=0.12
# - ipython>=3.2.0
# - ipython-notebook
# - matplotlib
# - netcdf4
# - numpy
# - python=3.4
# - seaborn
# - xlrd
# - xray>=0.5
# ```
# **Create environment based on `environment.yml`**
#
# ```bash
# cd [my_repo_dir]
# conda env create
# ```
# **Activate environment**
#
# ```bash
# source activate marc_aie
# ```
# In[45]:
get_ipython().system('conda info -e')
# In[46]:
get_ipython().system('conda list')
# In[48]:
get_ipython().system('binstar search -t conda cartopy')
#
# # Visualizations
#
#
# **[Cartopy](http://scitools.org.uk/cartopy/docs/latest/) is a powerful wrapper for matplotlib enabling cartographic/geographic transformations of your data**
# In[74]:
import marc_aie as ma
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
v = ma.CESMVar("TS")
v.load_datasets()
v.apply(lambda ds: ds['TS'].mean("time"))
data = v.data['arg_comp', 'F2000']
print(data)
# In[83]:
fig = plt.figure(figsize=(12, 5))
ax = fig.add_subplot(111, projection=ccrs.PlateCarree())
gp = ma.global_plot(data, ax=ax, cmap="cubehelix_r",
levels=np.linspace(210, 320, 23))
cb = plt.colorbar(orientation='horizontal')
_ = plt.title(v.long_name + " (%s)" % v.units)
# ## Under the hood
#
#
#
# ([link to xray docs](http://xray.readthedocs.org/en/stable/))
# ## `xray`
#
# - Powerful toolkit for accessing and manipulating and wrangling NetCDF / HDF5 data
# - Maintained by employees from [**THE CLIMATE CORPORATION**](https://www.climate.com/)
# - Under active development
# - Cool features:
# - lazy evaluation system; doesn't do any numerical work until it *absolutely* needs to
# - implements [`dask`](http://dask.pydata.org/en/latest/) out-of-core computation library
# - extends pythonic interfaces for arrays and maps
# - serialize to/from NetCDF
# ### Simple xray example
# In[90]:
v = ma.CESMVar("TS")
v.load_datasets()
data = ma.create_master(v)
# In[91]:
print(data)
# In[106]:
pd_minus_pi = lambda ds: ds.sel(aer='F2000') - ds.sel(aer='F1850')
a = (data['TS']
.pipe(pd_minus_pi)
.mean('lon')
.groupby('time.season')
.mean('time'))
print(a)
# In[120]:
import seaborn as sns
sns.set(style='ticks')
with sns.color_palette('Paired'):
for seas in a.season:
for act in a.act:
d = a.sel(season=seas, act=act)
plt.plot(d.lat, d, label="%s - %s" %
(act.values, seas.values))
sns.despine(offset=10)
plt.legend(loc='best')