#!/usr/bin/env python # coding: utf-8 # # Makeover Monday, 9 April 2018 # # Analysis and visualisation of Arctic sea ice extent for [Makeover Monday](http://www.makeovermonday.co.uk). # # Data from [US National Snow & Ice data centre](https://nsidc.org/) via [Makeover Monday](http://www.makeovermonday.co.uk/data/). # In[1]: import collections from datetime import datetime import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.cm as cm get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import pandas as pd import scipy.stats # # Read the dataset # Rename the columns while we're here. # In[2]: sea_ice_raw = pd.read_excel('Arctic Sea Ice Extent.xlsx') sea_ice_raw.columns = ['date', 'extent'] sea_ice_raw.head() # In[3]: sea_ice_raw.tail() # In[4]: sea_ice_raw.dtypes # In[5]: sea_ice_raw.extent.plot() # # Prepare for reshaping # Annotate each entry in the dataset with the year, month, and day of year. # In[6]: sea_ice_raw['year'] = sea_ice_raw['date'].dt.year sea_ice_raw['doy'] = sea_ice_raw['date'].dt.dayofyear sea_ice_raw['month'] = sea_ice_raw['date'].dt.month sea_ice_raw.head() # # One column per year, one row per day # Create a DataFrame with one column per year, and one row per day number. # # Plot the data, with each line being a separate year. Pick out the first and last years in the data. # In[7]: sea_ice_day_year = pd.pivot_table(sea_ice_raw,index='doy',columns='year',values='extent') # In[8]: ax = sea_ice_day_year.loc[:, 1989:2016].plot(legend=None, label='_nolegend_', color='lightgrey') sea_ice_day_year[1988].plot(color='blue', ax=ax, legend=True) sea_ice_day_year[2017].plot(color='red', ax=ax, legend=True) # # One column per year, one row per month # Aggregate with median, to give an overall value for each month. # In[9]: sea_ice_month_year = pd.pivot_table(sea_ice_raw,index='month',columns='year',values='extent', aggfunc=np.median) # In[10]: sea_ice_month_year # ## Medians for each month # These are the median values of ice extent for each month, over the whole dataset. # In[11]: sea_ice_month_year.median(axis=1) # Now find the difference of each month's extent from the median for that month over all years. # In[12]: sea_ice_month_year_diff = sea_ice_month_year.subtract(sea_ice_month_year.median(axis=1), axis=0) sea_ice_month_year_diff # In[13]: sea_ice_month_year_diff.loc[:, 1988:2017] # ## Quick plot of each year # How each year varies against the median values for each month. # In[14]: sea_ice_month_year_diff.plot(legend=None, subplots=True, layout=(7, 6), sharey=True, figsize=(15, 15), kind='bar'); # Plot just the last 30 complete years. # In[15]: sea_ice_month_year_diff.loc[:, 1988:2017].plot(legend=None, subplots=True, layout=(6, 5), sharey=True, figsize=(15, 15), kind='bar'); # Similar plot, with each month in a subplot, trend over years. # In[16]: sea_ice_month_year_diff.T.plot(legend=None, subplots=True, layout=(3, 4), sharey=True, figsize=(15, 15), kind='line'); # ## Colouring the data # Pandas doesn't make it easy to have each bar in a chart have a different colour depending on its value. Therefore, we'll create a separate dataframe with the colour that each data point should be plotted with. # # To start with, normalise the data, so that the data ranges from 0 to 1. # In[17]: sid_max = sea_ice_month_year_diff.max().max() sid_min = sea_ice_month_year_diff.min().min() sea_ice_month_year_dnorm = (sea_ice_month_year_diff - sid_min) / (sid_max - sid_min) sea_ice_month_year_dnorm # Now use the `matplotlib` color map to find the colour of each value. # # Note that we're doing `1 - value` to make the map scale from blue at high values to magenta at low ones, the opposite of what it normally does. # In[18]: cmap = cm.get_cmap('cool') sea_ice_month_year_colour = (1 - sea_ice_month_year_dnorm).applymap(cmap) sea_ice_month_year_colour # Quick check it works for one chart. # In[19]: plt.bar(sea_ice_month_year_diff.index, sea_ice_month_year_diff[2017], color=sea_ice_month_year_colour[2017]) # Now replot the data for each year, but using the colours we manually specified. # In[29]: # sea_ice_month_year_diff.loc[:, 1988:2017].plot(legend=None, subplots=True, layout=(6, 5), sharey=True, figsize=(15, 15), kind='bar', color=sea_ice_month_year_colour); f, axes = plt.subplots(6, 5, sharey=True, figsize=(15, 15), facecolor='darkslateblue') plt.suptitle('Changes in sea ice extent vs 30-year average\nEach bar is one month', color='lightsteelblue', fontsize=24) for r, row in enumerate(axes): for c, ax in enumerate(row): yr = 1988 + r * 5 + c ax.bar(sea_ice_month_year_diff.index, sea_ice_month_year_diff[yr], color=sea_ice_month_year_colour[yr]) ax.set_frame_on(False) ax.set_axis_off() ax.set_title(str(yr), color='lightsteelblue') # ax.set_facecolor('darkslateblue') # In[30]: f.savefig("changes-per-year.png", facecolor=f.get_facecolor(), transparent=True) # And again, plot for each month, one bar per year. # # Restrict to just the past 30 years of full data. # In[21]: # sea_ice_month_year_diff.loc[:, 1988:2017].plot(legend=None, subplots=True, layout=(6, 5), sharey=True, figsize=(15, 15), kind='bar', color=sea_ice_month_year_colour); f, axes = plt.subplots(3, 4, sharey=True, figsize=(15, 15), facecolor='darkslateblue') for r, row in enumerate(axes): for c, ax in enumerate(row): mt = r * 4 + c + 1 ax.bar(sea_ice_month_year_diff.loc[:, 1988:2017].T.index, sea_ice_month_year_diff.loc[:, 1988:2017].T[mt], color=sea_ice_month_year_colour.loc[:, 1988:2017].T[mt]) ax.set_frame_on(False) ax.set_axis_off() ax.set_title(datetime(1900, mt, 1).strftime('%B'), color='lightsteelblue') # In[ ]: