#!/usr/bin/env python # coding: utf-8 # # Table of Contents #

1  Analysis of the NASA GISTEMP Datasets
2  Notebook Setup
3  Global Data: Combined Land-Surface Air and Sea-Surface Water Temperature Anomalies (Land-Ocean Temperature Index, LOTI)
3.1  Download and parse the files
3.1.1  Initial Data Inspection
3.2  Interactive LOTI plots
3.2.1  Impact of Smoothing
3.2.2  Seasonal data
4  Spatially Resolved LOTI Data
4.1  Load Datasets
4.2  Plot LOTI Projections
4.3  Plot radial bar chart
5  Cleanup
#
#

Analysis of the NASA GISTEMP Datasets

#
by
#
Andreas Putz
#
# In[ ]: # In[1]: import IPython from IPython.display import HTML, display # In[ ]: # The purpose of this notebook is to reproduce and improve on some graphs and movies I have recently encountered on social media such as LinkedIn and others. One of the most recent was a post on LinkedIn (https://www.linkedin.com/feed/update/urn:li:activity:6300917710327029760/) showing this video: # # # # # # #
#

# Originally I liked this graph, until I looked a bit closer: #

    #
  1. The bars should start at the 0 ring
  2. #
  3. The global graph does not have any scales
  4. #
  5. No definition of temperature anomaly
  6. #
  7. How do I reproduce this from the data source cited?
  8. #
#

# The datasource cited is **Land-Ocean Temperature Index, ERSSTv4, 1200km smoothing**, which can be found here https://data.giss.nasa.gov/gistemp/ . #

# # In this notebook, we will take a first look at the GISTEMP temperature anomaly datasets. These datasest denote the temperature deviation with respect to a reference time periode. For the sake of this notebook, we will not concern ourselves with how the anomaly data is prepred. This is a story for another article. # # Notebook Setup # # This section deals with the module imports for this notebook. This section needs to complete for the notebook to work correctly. # # My anaconda setup: # * Python 3.5 environment # * Anaconda notebook extension + community notebook extensions # * conda-forge channel activated # In[2]: import IPython from IPython.display import HTML, display import datetime from ipywidgets import interact, interactive, fixed, interact_manual import ipywidgets as widgets import scipy as sp import pandas as pd import pylab as plt import matplotlib.mlab as mlab from matplotlib.dates import drange, MonthLocator get_ipython().run_line_magic('matplotlib', 'notebook') # Modules to create nice colormaps and earth projections try: from mpl_toolkits.basemap import Basemap import cmocean except Exception as ex: print('Module basemap,cmocean not installed, please use pip or conda to install basemap and cmocean') print(ex) import urllib.request import os import sys import gzip import zipfile import shutil try: import plotly import plotly.plotly as py from plotly.graph_objs import * plotly.offline.init_notebook_mode(connected=True) #plotly.offline.init_notebook_mode() print('Plotly Version: ', plotly.__version__) except: print('Plotly not installed correctly') try: import netCDF4 except Exception as ex: print('netCDF4 is not correctly installed. Please us pip or conda to install.') print(ex) # In[3]: get_ipython().run_cell_magic('html', '', '\n') # In[4]: print('Notebook Executed:\t ' + str(datetime.datetime.now())) print('='*80) print('Python Version:') print('-'*80) print(sys.version) print('='*80) # # Global Data: Combined Land-Surface Air and Sea-Surface Water Temperature Anomalies (Land-Ocean Temperature Index, LOTI) # The **LOTI** (**L**and-**O**cean **T**emperature **I**ndex) uses a more complicated averaging scheme between the ocean and land data to account for the higher heat capactity of water. According to NASA this leads to a slight underestimation of cooling and warming trends due to the dampening effect of the oceans. The two input datasets for the LOTI calculations are: # * **S**urface **A**ir **T**emperatures (SATs) measured by weather stations worldwide, and # * **S**ea **S**urface **T**emperatures (SST) measured by ships, buoys and more recentely satellite data. # # The units for the LOTI are in degC and denote the temperature deviation with respect to a referece temperature.Currently the reference temperature is the mean temperature from 1951 to 1980. Nasa calls this quantity the *temperature anomaly*. # # Nasa executes the avaeraging procedure every month and make the resulting datafiles available. For this section, three files were analyzed: # * The global mean fore each month since 1880: GLB.Ts+dSST.csv # * The northern hemisphere mean for each month since 1880: NH.Ts+dSST.csv # * The sourthern hemisphere mean for each month since 1880: SH.Ts+dSST.csv # # Source: https://data.giss.nasa.gov/gistemp/faq # ## Download and parse the files # In[5]: months = {} months['all']= ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] months['Q1'] = ['Jan','Feb','Mar'] months['Q2'] = ['Apr','May','Jun'] months['Q3'] = ['Jul','Aug','Sep'] months['Q4'] = ['Oct','Nov','Dec'] # In[6]: try: urllib.request.urlretrieve('https://data.giss.nasa.gov/gistemp/tabledata_v3/GLB.Ts+dSST.csv', 'GLB.Ts+dSST.csv') urllib.request.urlretrieve('https://data.giss.nasa.gov/gistemp/tabledata_v3/NH.Ts+dSST.csv', 'NH.Ts+dSST.csv') urllib.request.urlretrieve('https://data.giss.nasa.gov/gistemp/tabledata_v3/SH.Ts+dSST.csv', 'SH.Ts+dSST.csv') print('Download successful') except: print('Data cold not be retrieved !!') # In[7]: LOTI = {} LOTI["GLOBAL"] = pd.read_csv('GLB.Ts+dSST.csv',header=1,na_values='***',index_col='Year') LOTI["NH"] = pd.read_csv('NH.Ts+dSST.csv',header=1,na_values='***',index_col='Year') LOTI["SH"] = pd.read_csv('SH.Ts+dSST.csv',header=1,na_values='***',index_col='Year') # ### Initial Data Inspection # In[8]: df = LOTI['GLOBAL'] df[-10:] # In[9]: plt.figure(1,figsize=(10,6)) plt.plot(df.index,df[months['all']].mean(axis=1),'k',label='mean') plt.plot(df.index,df[months['all']].max(axis=1),'r--',label='max',alpha=0.3) plt.plot(df.index,df[months['all']].min(axis=1),'b--',label='min',alpha=0.3) ax = plt.gca() ax.fill_between(df.index,df[months['all']].min(axis=1),df[months['all']].max(axis=1),facecolor='k',alpha=0.3) plt.ylabel('LOTI [degC]') plt.xlabel('Year') plt.title('Global LOTI Data') plt.legend() # ## Interactive LOTI plots # # ### Impact of Smoothing # # Plots of the LOTI with different smoothing intervals for various seasonal means. # In[24]: #widget_sel_file = widgets.Dropdown(description='File') #widget_sel_file.options = list(LOTI.keys()) #display(widget_sel_file) widget_sel_avg = widgets.Dropdown(description='Yearly Average Type') widget_sel_avg.options = list(months.keys()) widget_sel_avg.value = 'all' display(widget_sel_avg) widget_sel_window = widgets.IntSlider(description='Averiging Window Size [Years]',min=1,max=20,value=10) display(widget_sel_window) widget_graph = widgets.Button(description = 'Make Graph') display(widget_graph) isfirst = True def on_button_clicked(button): global isfirst plt.figure('LOTI - Averaging Study') i = 1 windowsize = widget_sel_window.value avg_time = months[widget_sel_avg.value] ymax = 0; ymin = 0 for key, df in LOTI.items(): ymax=max(max(df[avg_time].mean(axis=1)),ymax) ymin=min(min(df[avg_time].mean(axis=1)),ymin) for key in LOTI.keys(): df = LOTI[key] plt.subplot(1,3,i) if isfirst: plt.plot(df.index,df[avg_time].mean(axis=1),'k',label='Ann. Mean') plt.plot(df.index, df[avg_time].mean(axis=1).rolling(window=windowsize,center=False).mean(), label=str(windowsize) + ' yr, ' + widget_sel_avg.value) ax = plt.gca() ax.fill_between(df.index, df[avg_time].mean(axis=1).rolling(window=windowsize,center=False).mean()-df[avg_time].mean(axis=1).rolling(window=windowsize,center=False).std(), df[avg_time].mean(axis=1).rolling(window=windowsize,center=False).mean()+df[avg_time].mean(axis=1).rolling(window=windowsize,center=False).std(), alpha=0.2) ax.axvspan(1951,1980,facecolor='green',alpha=0.3) if i==1: plt.ylabel('LOTI [degC]') plt.xlabel('Year') plt.legend() plt.title('Dataset: ' + key) plt.ylim(ymin,ymax) i += 1 isfirst = False plt.figure('LOTI - Averaging Study',figsize=(9.5,4)) widget_graph.on_click(on_button_clicked) # ### Seasonal data # # In adition to the averaged data, we can also look at the monthly temperature variations. # In[11]: plt.figure('Linegraphs - LOTI Monthly',figsize=(9.5,3)) i=1 for key, df in LOTI.items(): plt.subplot(1,3,i) for year in df.index: plt.plot(df.loc[year][months['all']].tolist()) i += 1 # In[12]: plt.figure(4,figsize=(9.5,3)) i=1 for key, df in LOTI.items(): plt.subplot(1,3,i) for year in df.index: plt.plot(df.loc[year][months['all']].tolist()) i += 1 # In[13]: plt.figure('Heatmap - Discrete',figsize=(9.5,4)) i=1 mylimits=[0,0] for key, df in LOTI.items(): mylimits[0] = min([mylimits[0],df.min().min()]) mylimits[1] = max([mylimits[1],df.max().max()]) mymax = max(sp.absolute(mylimits)) for key, df in LOTI.items(): plt.subplot(1,3,i) ax=plt.imshow(df[months['all']].as_matrix(),aspect='auto',vmin=-mymax,vmax=mymax, cmap=cmocean.cm.balance,interpolation=None) if i == 1: plt.ylabel('Year since 1880') ax.axes.get_yaxis().set_visible(True) else: ax.axes.get_yaxis().set_visible(False) plt.title(key) plt.xlabel('Month') i += 1 plt.colorbar() # In[14]: fig = plt.figure('Heatmap - Interpolated',figsize=(9.5,4)) i=1 mylimits=[0,0] for key, df in LOTI.items(): mylimits[0] = min([mylimits[0],df.min().min()]) mylimits[1] = max([mylimits[1],df.max().max()]) mymax = max(sp.absolute(mylimits)) for key, df in LOTI.items(): plt.subplot(1,3,i) ax = plt.imshow(df[months['all']].as_matrix(),aspect='auto',vmin=-mymax,vmax=mymax, cmap=cmocean.cm.balance,interpolation='bilinear') if i == 1: plt.ylabel('Year since 1880') ax.axes.get_yaxis().set_visible(True) else: ax.axes.get_yaxis().set_visible(False) plt.title(key) plt.xlabel('Month') i += 1 plt.colorbar() # # Spatially Resolved LOTI Data # # In addition to the globally averaged LOTI data, NASA also provies a locally resolved dataset, retrievable at *https://data.giss.nasa.gov/pub/gistemp/gistemp1200_ERSSTv5.nc.gz*. # # The dataset is stored as *NETCDF4* file, containing the following datasets: # * *lat* # * *lon* # * *time* # * *time_bnds*, and # * *tempanomaly* # # To visualize this dataset within this notebook, I followed several websites: # # * Howto load NETCDF files with python: http://www.hydro.washington.edu/~jhamman/hydro-logic/blog/2013/10/12/plot-netcdf-data/ # * Projections of earth: http://scitools.org.uk/cartopy/docs/latest/crs/projections.html # * Colorschemes: http://matplotlib.org/cmocean/ # ## Load Datasets # In[15]: try: flocation = 'https://data.giss.nasa.gov/pub/gistemp/gistemp1200_ERSSTv5.nc.gz' fname = 'gistemp1200_ERSSTv5.nc.gz' urllib.request.urlretrieve(flocation, fname) print('Download successful') except Exception as ex: print('Data cold not be retrieved !!') print(ex) # In[16]: with gzip.open(fname, 'rb') as f_in, open(fname[:-3], 'wb') as f_out: f_out.write(f_in.read()) # In[17]: mycdf = netCDF4.Dataset(fname[:-3]) # In[18]: print('File History: ', mycdf.history) print('File Title: ', mycdf.title) print('Variables: ', mycdf.variables.keys()) mycdf.variables # In[19]: lons = mycdf.variables['lon'][:] lats = mycdf.variables['lat'][:] loti = mycdf.variables['tempanomaly'] deltat = mycdf.variables['time'] # ## Plot LOTI Projections # In[25]: # ------------------------------------------- # Setup: Widgets # ------------------------------------------- widget_sel_window = widgets.IntSlider(description='Averiging Window Size [Years]',min=0,max=len(deltat)-1,value=0) display(widget_sel_window) widget_graph = widgets.Button(description = 'Make Graph') display(widget_graph) isfirst = True # ------------------------------------------- # Setup: Plot # ------------------------------------------- m = Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,\ llcrnrlon=-180,urcrnrlon=180,resolution='l') # Create Meshgrid instance lon, lat = sp.meshgrid(lons, lats) xi, yi = m(lon, lat) # ------------------------------------------- # Drawing Function # ------------------------------------------- def on_button_clicked(button): plt.figure('Local LOTI - Monthly Data') myindex = widget_sel_window.value cs = m.pcolor(xi,yi,loti[myindex,:,:],vmin=-16,vmax=16,cmap=cmocean.cm.balance) # Add Grid Lines m.drawparallels(sp.arange(-80., 81., 20.), labels=[1,0,0,0], fontsize=10) m.drawmeridians(sp.arange(-180., 181., 40.), labels=[0,0,0,1], fontsize=10) # Add Coastlines, States, and Country Boundaries m.drawcoastlines() #m.drawstates() m.drawcountries() #m.fillcontinents(color='coral',lake_color='blue') #m.drawmapboundary(fill_color='aqua') cbar = m.colorbar(cs, location='bottom', pad="10%") cbar.set_label('degC') tmp = sp.datetime64('1800-01-01 00:00:00')+sp.timedelta64(int(deltat[myindex]),'D') plt.title('LOTI Temperature Anomaly (' + str(tmp) + ')') plt.show() plt.figure('Local LOTI - Monthly Data',figsize=(9,4)) widget_graph.on_click(on_button_clicked) # In[26]: # ------------------------------------------- # Setup: Widgets # ------------------------------------------- widget_sel_window = widgets.IntSlider(description='Averiging Window Size [Years]',min=0,max=len(deltat)//12,value=0) display(widget_sel_window) widget_graph = widgets.Button(description = 'Make Graph') display(widget_graph) isfirst = True # ------------------------------------------- # Setup: Plot # ------------------------------------------- m = Basemap(projection='cyl',llcrnrlat=-90,urcrnrlat=90,\ llcrnrlon=-180,urcrnrlon=180,resolution='l') # Create Meshgrid instance lon, lat = sp.meshgrid(lons, lats) xi, yi = m(lon, lat) # ------------------------------------------- # Drawing Function # ------------------------------------------- def on_button_clicked(button): plt.figure('Local LOTI - Annual Average') plt.clf() myindex = widget_sel_window.value mywindow = [myindex*12,myindex*12+11] myvalues = loti[mywindow[0]:mywindow[-1],:,:].mean(axis=0) vmax = max((abs(myvalues.max()),abs(myvalues.min()))) cs = m.pcolor(xi,yi,myvalues,vmin=-vmax,vmax=vmax,cmap=cmocean.cm.balance) # Add Grid Lines m.drawparallels(sp.arange(-80., 81., 20.), labels=[1,0,0,0], fontsize=10) m.drawmeridians(sp.arange(-180., 181., 40.), labels=[0,0,0,1], fontsize=10) # Add Coastlines, States, and Country Boundaries m.drawcoastlines() #m.drawstates() m.drawcountries() #m.fillcontinents(color='coral',lake_color='blue') #m.drawmapboundary(fill_color='aqua') cbar = m.colorbar(cs, location='bottom', pad="10%") cbar.set_label('degC') tmp0 = sp.datetime64('1800-01-01 00:00:00')+sp.timedelta64(int(deltat[mywindow[0]]),'D') tmp0 = pd.to_datetime(tmp0).strftime('%Y-%m') try: tmp1 = sp.datetime64('1800-01-01 00:00:00')+sp.timedelta64(int(deltat[mywindow[1]]),'D') tmp1 = pd.to_datetime(tmp1).strftime('%Y-%m') plt.title('LOTI Temperature Anomaly (' + tmp0 + ' to ' + tmp1 + ')' + ',\nAlgebraic Mean: ' + str(myvalues.mean())) except: tmp1 = sp.datetime64('1800-01-01 00:00:00')+sp.timedelta64(int(deltat[-1]),'D') tmp1 = pd.to_datetime(tmp1).strftime('%Y-%m') plt.title('LOTI Temperature Anomaly (' + tmp0 + ' to ' + tmp1 + ')' + '\nAlgebaric Mean: ' + str(myvalues.mean())) plt.show() plt.figure('Local LOTI - Annual Average',figsize=(9,4)) widget_graph.on_click(on_button_clicked) # ## Plot radial bar chart # The final part to pthe puzzle and very much work in progress. # In[22]: plt.figure() N = 80 bottom = 0 max_height = 4 theta = sp.linspace(0.0, 2 * sp.pi, N, endpoint=False) radii = max_height*(sp.random.rand(N)-0.5) width = (2*sp.pi) / N bottom = radii*0. bottom[radii<0] = radii[radii<0] top = radii top[radii<0] = 0. ax = plt.subplot(111, polar=True) bars = ax.bar(theta, top, width=width, bottom=bottom) # Use custom colors and opacity for r, bar in zip(radii, bars): bar.set_facecolor(plt.cm.jet(r / 10.)) bar.set_alpha(0.8) plt.show() # # Cleanup # In[23]: # os.remove('*.csv')