#!/usr/bin/env python # coding: utf-8 # GHCN V2 Temperatures ANOM (C) CR 1200KM 1880-present # # GLOBAL Temperature Anomalies in .01 C base period: 1951-1980 # # http://climatecode.org/ # In[1]: import os import git if not os.path.exists('ccc-gistemp'): git.Git().clone('https://github.com/ClimateCodeFoundation/ccc-gistemp.git') if not os.path.exists('madqc'): git.Git().clone('https://github.com/ClimateCodeFoundation/madqc.git') # It seems that # # http://data.giss.nasa.gov/gistemp/sources_v3/GISTEMPv3_sources.tar.gz # # and # # http://data.giss.nasa.gov/pub/gistemp/SBBX.ERSST.gz # # are down, so let's use a local copy instead. # In[2]: get_ipython().system('mkdir -p ccc-gistemp/input') get_ipython().system('cp GISTEMPv3_sources.tar.gz SBBX.ERSST.gz ccc-gistemp/input') # In[3]: get_ipython().run_line_magic('cd', 'ccc-gistemp/') # We don't really need `pypy` for the fetch phase, but the code is Python 2 and the notebook is Python 3, so this is just a lazy way to call py2k code from a py3k notebook ;-p # # PS: we are also using the International Surface Temperature Initiative data (ISTI). # In[4]: get_ipython().system('pypy tool/fetch.py isti') # QC the ISTI data. # In[5]: get_ipython().system('../madqc/mad.py --progress input/isti.merged.dat') # We need to copy the ISTI data into the `input` directory. # In[6]: get_ipython().system('cp isti.merged.qc.dat input/isti.merged.qc.dat') get_ipython().system('cp input/isti.merged.inv input/isti.merged.qc.inv') # Here is where `pypy` is really needed, this step takes ~35 minutes on valina `python` but only ~100 seconds on `pypy`. # In[7]: get_ipython().system("pypy tool/run.py -p 'data_sources=isti.merged.qc.dat;element=TAVG' -s 0-1,3-5") # Python `gistemp` saves the results in the same format as the Fortran program but it ships with `gistemp2csv.py` to make it easier to read the data with `pandas`. # In[8]: get_ipython().system('pypy tool/gistemp2csv.py result/*.txt') # In[9]: import pandas as pd df = pd.read_csv( 'result/landGLB.Ts.GHCN.CL.PA.csv', skiprows=3, index_col=0, na_values=('*****', '****'), ) # Let's use `sklearn` to compute the full trend... # In[10]: from sklearn import linear_model from sklearn.metrics import mean_squared_error, r2_score reg0 = linear_model.LinearRegression() series0 = df['J-D'].dropna() y = series0.values X = series0.index.values[:, None] reg0.fit(X, y) y_pred0 = reg0.predict(X) R2_0 = mean_squared_error(y, y_pred0) var0 = r2_score(y, y_pred0) # and the past 30 years trend. # In[11]: reg1 = linear_model.LinearRegression() series1 = df['J-D'].dropna().iloc[-30:] y = series1.values X = series1.index.values[:, None] reg1.fit(X, y) y_pred1 = reg1.predict(X) R2_1 = mean_squared_error(y[-30:], y_pred1) var1 = r2_score(y[-30:], y_pred1) # In[12]: get_ipython().run_line_magic('matplotlib', 'inline') ax = df.plot.line(y='J-D', figsize=(9, 9), legend=None) ax.plot(series0.index, y_pred0, 'r--') ax.plot(series1.index, y_pred1, 'r') ax.set_xlim([1879, 2018]) leg = f"""Trend in ℃/century (R²) Full: {reg0.coef_[0]*100:0.2f} ({var0:0.2f}) 30-year: {reg1.coef_[0]*100:0.2f} ({var1:0.2f}) """ ax.text(0.10, 0.75, leg, transform=ax.transAxes);