In this you will learn about the following:
from IPython.display import HTML
input_form = """
<a id="admin_link" target="_blank" href="#">Ajenti Administration Interface</a>
<p>User: root<br> Password: admin</p>
"""
javascript = """
<script type="text/Javascript">
document.getElementById('admin_link').href = "https://" + window.location.hostname + ":8000"
</script>
"""
HTML(input_form + javascript)
User: root
Password: admin
In classification, we train a predictive model to produce a class or a category. Regression is used when you need a predictive model that produces numeric values instead of classes. For example you would use a classification algorithm to predict the probability of rain but use a regression algorithm to predict the temperature.
We will be using data from the Climatic Research Unit (CRU) of the University of East Anglia (UEA).
You can find the dataset of their website:
http://www.cru.uea.ac.uk/cru/data/temperature/
http://www.cru.uea.ac.uk/cru/data/temperature/CRUTEM4-gl.dat
This data was processed using OpenOffice Calc (Similar to Excel).
This file has fixed width columns. So we using fixed width columns when Opening/Importing this data.
Column names are added to row number 1
We add two new columns:
This was calculated using this formula:
This was calculated using this formula:
You should filter out all columns with value of 1. This is what you end up with
Copy your data and past it in a new file. Then, remove Odd/Even column and save you file as a CSV file
The final file is available on your system in this relative path "data/temp_data.csv"
import numpy as np
import pandas as pd
import sklearn
import matplotlib.pyplot as plt
csv_data = pd.read_csv("data/temp_data_features.csv")
plt.figure(figsize=(20,5))
plt.scatter(x=csv_data["Year"], y=csv_data["Average"], marker="o", s=50, c=csv_data["Average"])
plt.plot(csv_data["Year"], csv_data["Average"], label="Annual Global Average Anomaly", alpha=0.4, linewidth=2, c="grey")
plt.hlines(0,min(csv_data["Year"])-3,max(csv_data["Year"])+5)
plt.legend(loc="best")
plt.xlim(min(csv_data["Year"])-3, max(csv_data["Year"])+5)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.colorbar()
plt.grid()
plt.show()
plt.figure(figsize=(20,5))
plt.bar(
csv_data["Year"],
csv_data["Average"],
width=0.7,
edgecolor="none",
color=(csv_data["Average"]>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly",
)
plt.hlines(0,min(csv_data["Year"])-3,max(csv_data["Year"])+5)
plt.legend(loc="best")
plt.xlim(min(csv_data["Year"])-3, max(csv_data["Year"])+5)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.grid()
plt.show()
# Final Record is not complete so average of the last year is not reliable
csv_data[-1:]
Year | Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec | Average | TSI | CO2 | CH4 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
163 | 2014 | 0.95 | 0.408 | 0.929 | 1.048 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0.83375 | NaN | NaN | NaN |
1 rows × 17 columns
# Prepare monthly data
monthly_temp = csv_data.drop("Year", 1).drop("Average", 1).drop("TSI", 1).drop("CO2", 1).drop("CH4", 1)
monthly_temp = pd.Series(np.ravel(monthly_temp)).dropna()
month_index = list((monthly_temp.index/12.) + 1851)
plt.figure(figsize=(15,8))
plt.scatter(
x=month_index,
y=monthly_temp,
marker="o",
label="Monthly Average Global Anomaly",
c=monthly_temp,
alpha=0.6
)
plt.colorbar()
plt.legend(loc="lower right")
plt.xlim(min(month_index)-3,max(month_index)+5)
plt.ylim(min(monthly_temp),max(monthly_temp))
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.grid()
plt.show()
plt.figure(figsize=(20,5))
plt.bar(month_index, monthly_temp, width=0.1, edgecolor="none", color=(monthly_temp>0).map({True: 'r', False: 'b'}),
label="Monthly Average Global Anomaly")
plt.hlines(0,min(month_index)-1,max(month_index)+1)
plt.legend(loc="best")
plt.xlim(min(month_index)-1, max(month_index)+1)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.grid()
plt.show()
annual_temp = csv_data["Average"]
annual_index = list(csv_data["Year"].values)
annual_index_feature = list(csv_data[["Year"]].values)
prediction_annual_index = [[item] for item in range(min(annual_index_feature),max(annual_index_feature)+10)]
# Code source: Jaques Grobler
# License: BSD 3 clause
from sklearn import linear_model
# Create linear regression object
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(annual_index_feature, annual_temp)
# The coefficients
print 'Coefficients:', regr.coef_
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((regr.predict(annual_index_feature) - annual_temp) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(annual_index_feature, annual_temp))
# Plot outputs
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.plot(prediction_annual_index[:], regr.predict(prediction_annual_index[:]), color='green',
linewidth=3, alpha=1.0, label="Linear Regression")
plt.grid()
plt.xlim(np.min(annual_index_feature), np.max(annual_index_feature)+5)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.legend(loc="best")
plt.show()
Coefficients: [ 0.00651672] Residual sum of squares: 0.06 Variance score: 0.60
month_index_feature = [[item] for item in month_index]
prediction_month_index = [[item[0] + 5] for item in month_index_feature]
# Code source: Jaques Grobler
# License: BSD 3 clause
from sklearn import linear_model
regr = linear_model.LinearRegression()
# Train the model using the training sets
regr.fit(month_index_feature, monthly_temp)
# The coefficients
print 'Coefficients:', regr.coef_
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((regr.predict(month_index_feature) - monthly_temp) ** 2))
# Explained variance score: 1 is perfect prediction
print('Variance score: %.2f' % regr.score(month_index_feature, monthly_temp))
# Plot outputs
plt.figure(figsize=(20,5))
plt.bar(month_index, monthly_temp, width=0.1, edgecolor="none", color=(monthly_temp>0).map({True: 'r', False: 'b'}),
label="Monthly Average Global Anomaly", alpha=0.1)
plt.plot(month_index, regr.predict(month_index_feature), color='black',
linewidth=3, alpha=0.5, label="Linear Regression")
plt.plot(prediction_month_index[-5*12:], regr.predict(prediction_month_index[-5*12:]), color='green',
linewidth=3, alpha=1.0, label="Linear Regression Prediction")
plt.grid()
plt.xlim(np.min(month_index_feature), np.max(month_index_feature)+5)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.legend(loc="best")
plt.show()
Coefficients: [ 0.00645693] Residual sum of squares: 0.16 Variance score: 0.37
from sklearn.svm import SVR
# Create linear regression object
regr_linear = SVR(kernel="linear")
regr_rbf_1 = SVR(kernel="rbf", C=100.0, gamma=0.004, epsilon=0.01)
regr_rbf_2 = SVR(kernel="rbf", C=10.0, gamma=0.0001, epsilon=0.01)
regr_rbf_3 = SVR(kernel="rbf", C=1.0, gamma=0.0002, epsilon=0.1)
# Train the model using the training sets
regr_linear.fit(annual_index_feature, annual_temp)
regr_rbf_1.fit(annual_index_feature, annual_temp)
regr_rbf_2.fit(annual_index_feature, annual_temp)
regr_rbf_3.fit(annual_index_feature, annual_temp)
# The coefficients
#print 'Coefficients:', regr.coef_
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((regr_rbf_1.predict(annual_index_feature) - annual_temp) ** 2))
# Explained variance score: 1 is perfect prediction
print('score1: %.2f' % regr_rbf_1.score(annual_index_feature, annual_temp))
print('score2: %.2f' % regr_rbf_2.score(annual_index_feature, annual_temp))
print('score3: %.2f' % regr_rbf_3.score(annual_index_feature, annual_temp))
# Plot outputs
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.plot(prediction_annual_index[:], regr_linear.predict(prediction_annual_index[:]), color='green',
linewidth=3, alpha=0.5, label="Linear Prediction")
plt.plot(prediction_annual_index[:], regr_rbf_1.predict(prediction_annual_index[:]), color='blue',
linewidth=3, alpha=0.5, label="RBF1 Prediction")
plt.plot(prediction_annual_index[:], regr_rbf_2.predict(prediction_annual_index[:]), color='orange',
linewidth=3, alpha=0.5, label="RBF2 Prediction")
plt.plot(prediction_annual_index[:], regr_rbf_3.predict(prediction_annual_index[:]), color='red',
linewidth=3, alpha=0.5, label="RBF3 Prediction")
plt.grid()
plt.xlim(np.min(annual_index_feature), np.max(annual_index_feature)+10)
plt.xticks(np.arange(np.min(annual_index_feature), np.max(annual_index_feature)+10, 10))
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.legend(loc="best")
plt.show()
Residual sum of squares: 0.02 score1: 0.87 score2: 0.82 score3: 0.82
from sklearn.svm import SVR
from sklearn.grid_search import GridSearchCV
regr_rbf = SVR(kernel="rbf")
C = [100, 10, 1]
gamma = [0.005, 0.004, 0.003, 0.002, 0.001]
epsilon=[0.01]
parameters = {"C":C, "gamma":gamma, "epsilon":epsilon}
gs = GridSearchCV(regr_rbf, parameters, scoring="r2")
gs.fit(annual_index_feature, annual_temp)
print "Best Estimator:\n%s" % gs.best_estimator_
Best Estimator: SVR(C=1, cache_size=200, coef0=0.0, degree=3, epsilon=0.01, gamma=0.001, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
from sklearn.grid_search import GridSearchCV
regr_rbf = SVR(kernel="rbf")
C = np.arange(gs.best_estimator_.C * 0.9, gs.best_estimator_.C * 1.1, gs.best_estimator_.C * 0.01)
gamma = np.arange(gs.best_estimator_.gamma * 0.9, gs.best_estimator_.gamma * 1.1, gs.best_estimator_.gamma * 0.01)
parameters = {"C":C, "gamma":gamma}
gs = GridSearchCV(regr_rbf, parameters, scoring="r2")
gs.fit(annual_index_feature, annual_temp)
print "Best Estimator:\n%s" % gs.best_estimator_
Best Estimator: SVR(C=0.93, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma=0.0009, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
from sklearn.svm import SVR
# Create linear regression object
regr_rbf = gs.best_estimator_
# The coefficients
#print 'Coefficients:', regr.coef_
# The mean square error
print("Residual sum of squares: %.2f"
% np.mean((regr_rbf.predict(annual_index_feature) - annual_temp) ** 2))
# Explained variance score: 1 is perfect prediction
print('score: %.2f' % regr_rbf.score(annual_index_feature, annual_temp))
# Plot outputs
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.plot(prediction_annual_index[:], regr_rbf.predict(prediction_annual_index[:]), color='black',
linewidth=3, alpha=0.7, label="Best RBF Prediction")
plt.grid()
plt.title(regr_rbf)
plt.xlim(np.min(annual_index_feature)+1, np.max(annual_index_feature)+10)
plt.xticks(np.arange(np.min(annual_index_feature)-1, np.max(annual_index_feature)+10, 10))
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
plt.legend(loc="best")
plt.show()
Residual sum of squares: 0.02 score: 0.84
There are three features that we can add to the tempreture data to build a better predective model. We will look at greenhouse gases (CO2 and CH4) and solar activity. So we will use the following extra features:
This data is from the following sources:
Dr. Pieter Tans, NOAA/ESRL (www.esrl.noaa.gov/gmd/ccgg/trends/) and Dr. Ralph Keeling, Scripps Institution of Oceanography (scrippsco2.ucsd.edu/).
The Solar Radiation and Climate Experiment (SORCE) is a NASA-sponsored satellite mission that is providing state-of-the-art measurements of incoming x-ray, ultraviolet, visible, near-infrared, and total solar radiation. The measurements provided by SORCE specifically address long-term climate change, natural variability and enhanced climate prediction, and atmospheric ozone and UV-B radiation. These measurements are critical to studies of the Sun; its effect on our Earth system; and its influence on humankind.
import math
def average(x):
assert len(x) > 0
return float(sum(x)) / len(x)
def pearson_def(x, y):
assert len(x) == len(y)
n = len(x)
assert n > 0
avg_x = average(x)
avg_y = average(y)
diffprod = 0
xdiff2 = 0
ydiff2 = 0
for idx in range(n):
xdiff = x[idx] - avg_x
ydiff = y[idx] - avg_y
diffprod += xdiff * ydiff
xdiff2 += xdiff * xdiff
ydiff2 += ydiff * ydiff
return diffprod / math.sqrt(xdiff2 * ydiff2)
import scipy as sp
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.2)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
tsi_ax = plt.twinx()
tsi_ax.plot(csv_data["Year"], csv_data["TSI"], linewidth=3, c="orange", alpha=1.0)
plt.ylabel(u"TSI Reconstruction from IPCC AR5")
plt.legend(loc="best")
plt.grid()
plt.xlim(np.min(annual_index_feature)+1, np.max(annual_index_feature)+10)
plt.show()
print "Correlation between TSI and Temperature: %s%%" % (round(1000*pearson_def(
csv_data[["Average","TSI"]].dropna()["Average"].values,
csv_data[["Average","TSI"]].dropna()["TSI"].values))/10)
/usr/lib/pymodules/python2.7/matplotlib/axes.py:4747: UserWarning: No labeled objects found. Use label='...' kwarg on individual plots. warnings.warn("No labeled objects found. "
Correlation between TSI and Temperature: 35.2%
plt.figure(figsize=(20,5))
plt.bar(csv_data[["Year", "Average","CO2"]].dropna()["Year"],
csv_data[["Year", "Average","CO2"]].dropna()["Average"],
width=0.7, edgecolor="none",
color=(csv_data[["Year", "Average","CO2"]].dropna()["Average"]>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.2)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
co2_ax = plt.twinx()
co2_ax.plot(csv_data["Year"], csv_data["CO2"], linewidth=3, c="g", alpha=.8)
plt.ylabel(u"CO2 CCGG (In Situ) ppm")
plt.legend(loc="best")
plt.grid()
plt.xlim(np.min(csv_data[["Year", "Average","CO2"]].dropna()["Year"]),
np.max(csv_data[["Year", "Average","CO2"]].dropna()["Year"]))
plt.show()
print "Correlation between TSI and Temperature: %s%%" % (round(1000*pearson_def(
csv_data[["Average","CO2"]].dropna()["Average"].values,
csv_data[["Average","CO2"]].dropna()["CO2"].values))/10)
Correlation between TSI and Temperature: 91.0%
plt.figure(figsize=(20,5))
plt.bar(csv_data[["Year", "Average","CH4"]].dropna()["Year"],
csv_data[["Year", "Average","CH4"]].dropna()["Average"],
width=0.7, edgecolor="none",
color=(csv_data[["Year", "Average","CH4"]].dropna()["Average"]>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
ch4_ax = plt.twinx()
ch4_ax.plot(csv_data["Year"], csv_data["CH4"], linewidth=3, c="b", alpha=.8)
plt.ylabel(u"CH4 CCGG (Individual Flasks) ppb")
plt.legend(loc="best")
plt.grid()
plt.xlim(np.min(csv_data[["Year", "Average","CH4"]].dropna()["Year"]),
np.max(csv_data[["Year", "Average","CH4"]].dropna()["Year"]))
plt.show()
print "Correlation between TSI and Temperature: %s%%" % (round(1000*pearson_def(
csv_data[["Average","CH4"]].dropna()["Average"].values,
csv_data[["Average","CH4"]].dropna()["CH4"].values))/10)
Correlation between TSI and Temperature: 80.6%
regr_rbf = SVR(kernel="rbf")
C = [30]
gamma_1 = [0.015]
gamma_2 = [0.005, 0.004, 0.003, 0.002, 0.001, 0.0009, 0.0008, 0.0007, 0.0006, 0.0005, 0.0004, 0.0003, 0.0002, 0.0001]
epsilon=[0.01, 0.001]
parameters_1 = {"C":C, "gamma":gamma_1, "epsilon":epsilon}
parameters_2 = {"C":C, "gamma":gamma_2, "epsilon":epsilon}
gs_1 = GridSearchCV(regr_rbf, parameters_1, scoring="r2")
gs_2 = GridSearchCV(regr_rbf, parameters_2, scoring="r2")
gs_1.fit(csv_data[["Year","TSI"]].dropna()[["Year"]], csv_data[["Year","TSI"]].dropna()["TSI"])
gs_2.fit(csv_data[["Year","TSI"]].dropna()[["Year"]], csv_data[["Year","TSI"]].dropna()["TSI"])
print "Best Estimator:\n%s" % gs_1.best_estimator_
print "Best Estimator:\n%s" % gs_2.best_estimator_
Best Estimator: SVR(C=30, cache_size=200, coef0=0.0, degree=3, epsilon=0.01, gamma=0.015, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False) Best Estimator: SVR(C=30, cache_size=200, coef0=0.0, degree=3, epsilon=0.01, gamma=0.0001, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
annual_index_feature = np.arange(np.min(csv_data[["Year","TSI"]].dropna()["Year"]),
np.max(csv_data[["Year","TSI"]].dropna()["Year"])+10)
annual_index_feature = [[item] for item in annual_index_feature]
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
tsi_ax = plt.twinx()
tsi_ax.plot(csv_data["Year"], csv_data["TSI"], "--", linewidth=3, c="gray", label="TSI")
tsi_ax.plot(annual_index_feature,
gs_1.best_estimator_.predict(annual_index_feature),
linewidth=3, c="green", label="TSI Short-term Trend", alpha=0.4)
tsi_ax.plot(annual_index_feature,
gs_2.best_estimator_.predict(annual_index_feature),
linewidth=3, c="blue", label="TSI Long-term Trend", alpha=0.4)
plt.ylabel(u"TSI Reconstruction from IPCC AR5")
plt.legend(loc="upper left")
plt.xlim(np.min(annual_index_feature)-1, np.max(annual_index_feature))
plt.title("Total Solar Irradiance (TSI) with a short term and long term predictions")
plt.xticks(np.arange(np.min(annual_index_feature)-1, np.max(annual_index_feature), 10))
plt.grid()
plt.show()
st_prediction = gs_1.best_estimator_.predict(annual_index_feature)
lt_prediction = gs_2.best_estimator_.predict(annual_index_feature)
print u"Long-Term Wave Length \u2248 (%s - %s) * 2 \u2248 %s" % (annual_index_feature[np.argmax(lt_prediction)][0],
annual_index_feature[np.argmin(lt_prediction)][0],
(annual_index_feature[np.argmax(lt_prediction)][0]-annual_index_feature[np.argmin(lt_prediction)][0])*2
)
st_min = csv_data["Year"][np.argmin(csv_data["Year"][5:13])]
st_max = csv_data["Year"][np.argmax(csv_data["Year"][5:13])]
print u"Short-Term Wave Length \u2248 (%s - %s) * 2 \u2248 %s" % (st_max,
st_min,
(st_max-st_min)*2
)
Long-Term Wave Length ≈ (1973 - 1882) * 2 ≈ 182 Short-Term Wave Length ≈ (1863 - 1856) * 2 ≈ 14
regr_rbf = SVR(kernel="rbf")
C = [1,10,20,30,50,100,1000]
gamma_2 = [0.01, 0.005, 0.004, 0.003, 0.002, 0.001, 0.0009, 0.0008, 0.0007, 0.0006, 0.0005, 0.0004, 0.0003, 0.0002, 0.0001,
0.00009,0.00008,0.00007,0.00006,0.00005,0.00004,0.00003,0.00002,0.00001]
epsilon=[0.01, 0.001]
parameters_2 = {"C":C, "gamma":gamma_2, "epsilon":epsilon}
gs_2 = GridSearchCV(regr_rbf, parameters_2, scoring="r2")
gs_2.fit(csv_data[["Year","CO2"]].dropna()[["Year"]], csv_data[["Year","CO2"]].dropna()["CO2"])
print "Best Estimator:\n%s" % gs_2.best_estimator_
Best Estimator: SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.001, gamma=5e-05, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
annual_index_feature = np.arange(np.min(csv_data[["Year","CO2"]].dropna()["Year"]),
np.max(csv_data[["Year","CO2"]].dropna()["Year"])+10)
annual_index_feature = [[item] for item in annual_index_feature]
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
tsi_ax = plt.twinx()
tsi_ax.plot(csv_data["Year"], csv_data["CO2"], "--", linewidth=3, c="gray", label="CO2")
tsi_ax.plot(annual_index_feature,
gs_2.best_estimator_.predict(annual_index_feature),
linewidth=3, c="blue", label="CO2 Trend", alpha=0.4)
plt.ylabel(u"CO2 CCGG (In Situ) ppm")
plt.legend(loc="upper left")
plt.xlim(np.min(annual_index_feature)-1, np.max(annual_index_feature))
plt.title("CO2 with trend")
plt.xticks(np.arange(np.min(annual_index_feature)-1, np.max(annual_index_feature), 10))
plt.grid()
plt.show()
regr_rbf = SVR(kernel="rbf")
C = [1,10,20,30,40,50,60,70,80,90,100,1000]
gamma_2 = [0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,0.09, 0.005, 0.004, 0.003, 0.002, 0.001]
epsilon=[0.01, 0.001, 0.0001, 0.00001]
parameters_2 = {"C":C, "gamma":gamma_2, "epsilon":epsilon}
gs_2 = GridSearchCV(regr_rbf, parameters_2, scoring="r2")
gs_2.fit(csv_data[["Year","CH4"]].dropna()[["Year"]], csv_data[["Year","CH4"]].dropna()["CH4"])
print "Best Estimator:\n%s" % gs_2.best_estimator_
Best Estimator: SVR(C=1000, cache_size=200, coef0=0.0, degree=3, epsilon=0.0001, gamma=0.004, kernel=rbf, max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
annual_index_feature = np.arange(np.min(csv_data[["Year","CH4"]].dropna()["Year"]),
np.max(csv_data[["Year","CH4"]].dropna()["Year"])+10)
annual_index_feature = [[item] for item in annual_index_feature]
plt.figure(figsize=(20,5))
plt.bar(annual_index, annual_temp, width=0.7, edgecolor="none", color=(annual_temp>0).map({True: 'r', False: 'b'}),
label="Annual Average Global Anomaly", alpha=0.3)
plt.ylabel(u"CRUTEM4 Temperature Anomaly (\u00B0C)")
tsi_ax = plt.twinx()
tsi_ax.plot(csv_data["Year"], csv_data["CH4"], "--", linewidth=3, c="gray", label="CH4")
tsi_ax.plot(annual_index_feature,
gs_2.best_estimator_.predict(annual_index_feature),
linewidth=3, c="blue", label="CH4 Trend", alpha=0.4)
plt.ylabel(u"CH4 CCGG (Individual Flasks) ppb")
plt.legend(loc="upper left")
plt.xlim(np.min(annual_index_feature)-1, np.max(annual_index_feature))
plt.title("Methane (CH4) with trend")
plt.xticks(np.arange(np.min(annual_index_feature)-1, np.max(annual_index_feature), 10))
plt.grid()
plt.show()
For questions please leave them on:
Previous Lesson - Introduction to Machine Learning
In the next lesson: