Let's go

In [1]:
import pandas
In [2]:
raw_df = pandas.read_excel('cpi.xlsx', sheetname=None)
In [3]:
type(raw_df)
Out[3]:
dict
In [4]:
type(raw_df['2004'])
Out[4]:
pandas.core.frame.DataFrame
In [5]:
raw_df
Out[5]:
{'2004':    January  February   March   April     May   June   July  August  September  \
 0    151.8     154.6  156.47  158.12  160.73  163.1  162.1   168.6        172   
 
    October  November  December  
 0    173.4     171.6     172.2  ,
 '2005':    January  February  March  April    May   June   July  August  September  \
 0    174.4     176.1  178.6  183.4  184.5  182.5  181.1   180.2      179.4   
 
    October  November  December  
 0    179.8       182     185.2  ,
 '2006':    January  February  March  April    May   June   July  August  September  \
 0    201.3     209.3  212.8  210.7  208.6  202.5  199.5   200.9      204.2   
 
    October  November  December  
 0      208     208.6     214.1  ,
 '2007':    January  February  March   April    May   June   July  August  September  \
 0    220.7     223.6  225.3  222.61  221.8  224.9  226.6  225.79     228.16   
 
    October  November  December  
 0   229.99    233.28    239.81  ,
 '2008':    January  February   March   April     May    June    July  August  \
 0   260.94    266.37  274.55  281.88  291.79  290.75  286.62  288.06   
 
    September  October  November  December  
 0     292.57   295.38    301.79    306.28  ,
 '2009':    January  February   March   April     May    June    July  August  \
 0   318.02    333.21  345.33  355.36  348.76  342.38  337.59  341.19   
 
    September  October  November  December  
 0     140.51   141.21     141.5     141.9  ,
 '2010':    January  February  March  April    May   June  July  August  September  \
 0      142     105.2    105  105.6  105.8  105.3   106  106.25     106.74   
 
    October  November  December  
 0   106.97    107.86    109.38  ,
 '2011':    January  February   March   April     May    June    July  August  \
 0   110.57    112.05  114.62  118.29  119.48  120.91  122.44  123.97   
 
    September  October  November  December  
 0     125.23    127.2    129.13    130.09  ,
 '2012':    January  February   March   April     May    June    July  August  \
 0   130.82    130.76  132.51  133.74  134.09  133.06  131.92  131.51   
 
    September  October  November  December  
 0     131.89   132.46    133.33    134.25  ,
 '2013':    January  February   March   April     May    June    July  August  \
 0   135.62    136.59  137.96  139.28  139.52  139.59  139.87  140.29   
 
    September  October  November  December  
 0     142.82   142.75    143.14    143.85  ,
 '2014':    January  February   March  April    May    June   July  August  September  \
 0    145.4    145.95  146.61  148.2  149.7  149.91  150.6  152.02     152.24   
 
    October  November  December  
 0   151.92    151.85    152.51  ,
 '2015':    January  February   March  April     May    June    July  August  \
 0   153.43    154.14  155.86  158.7  159.98  160.46  160.57   160.9   
 
    September  October  November  December  
 0     161.33   162.13    162.97    164.72  ,
 '2016':    January  February   March   April     May    June    July  August  \
 0   165.37    164.67  165.92  167.07  167.99  169.76  170.84  170.97   
 
    September  October  November  December  
 0     171.56   172.62    173.85    175.18  ,
 '2017':    January  February   March  April  May  June  July  August  September  \
 0   176.93    179.98  182.98    NaN  NaN   NaN   NaN     NaN        NaN   
 
    October  November  December  
 0      NaN       NaN       NaN  }
In [6]:
df = pandas.concat(raw_df.values(), keys=raw_df.keys())
In [7]:
df
Out[7]:
January February March April May June July August September October November December
2006 0 201.30 209.30 212.80 210.70 208.60 202.50 199.50 200.90 204.20 208.00 208.60 214.10
2015 0 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2017 0 176.93 179.98 182.98 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2008 0 260.94 266.37 274.55 281.88 291.79 290.75 286.62 288.06 292.57 295.38 301.79 306.28
2005 0 174.40 176.10 178.60 183.40 184.50 182.50 181.10 180.20 179.40 179.80 182.00 185.20
2009 0 318.02 333.21 345.33 355.36 348.76 342.38 337.59 341.19 140.51 141.21 141.50 141.90
2016 0 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2013 0 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2010 0 142.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 0 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2014 0 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2007 0 220.70 223.60 225.30 222.61 221.80 224.90 226.60 225.79 228.16 229.99 233.28 239.81
2012 0 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2004 0 151.80 154.60 156.47 158.12 160.73 163.10 162.10 168.60 172.00 173.40 171.60 172.20
In [8]:
df.index
Out[8]:
MultiIndex(levels=[['2006', '2015', '2017', '2008', '2005', '2009', '2016', '2013', '2010', '2011', '2014', '2007', '2012', '2004'], [0]],
           labels=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
In [9]:
df.index = df.index.droplevel(1)
In [10]:
df.index
Out[10]:
Index(['2006', '2015', '2017', '2008', '2005', '2009', '2016', '2013', '2010',
       '2011', '2014', '2007', '2012', '2004'],
      dtype='object')
In [11]:
df
Out[11]:
January February March April May June July August September October November December
2006 201.30 209.30 212.80 210.70 208.60 202.50 199.50 200.90 204.20 208.00 208.60 214.10
2015 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2017 176.93 179.98 182.98 NaN NaN NaN NaN NaN NaN NaN NaN NaN
2008 260.94 266.37 274.55 281.88 291.79 290.75 286.62 288.06 292.57 295.38 301.79 306.28
2005 174.40 176.10 178.60 183.40 184.50 182.50 181.10 180.20 179.40 179.80 182.00 185.20
2009 318.02 333.21 345.33 355.36 348.76 342.38 337.59 341.19 140.51 141.21 141.50 141.90
2016 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2013 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2010 142.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2014 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2007 220.70 223.60 225.30 222.61 221.80 224.90 226.60 225.79 228.16 229.99 233.28 239.81
2012 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2004 151.80 154.60 156.47 158.12 160.73 163.10 162.10 168.60 172.00 173.40 171.60 172.20
In [12]:
df.sort_index()
Out[12]:
January February March April May June July August September October November December
2004 151.80 154.60 156.47 158.12 160.73 163.10 162.10 168.60 172.00 173.40 171.60 172.20
2005 174.40 176.10 178.60 183.40 184.50 182.50 181.10 180.20 179.40 179.80 182.00 185.20
2006 201.30 209.30 212.80 210.70 208.60 202.50 199.50 200.90 204.20 208.00 208.60 214.10
2007 220.70 223.60 225.30 222.61 221.80 224.90 226.60 225.79 228.16 229.99 233.28 239.81
2008 260.94 266.37 274.55 281.88 291.79 290.75 286.62 288.06 292.57 295.38 301.79 306.28
2009 318.02 333.21 345.33 355.36 348.76 342.38 337.59 341.19 140.51 141.21 141.50 141.90
2010 142.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2012 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2013 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2014 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2015 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2016 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2017 176.93 179.98 182.98 NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [13]:
# The number of baskets changed from February 2010, so lets get data from Feb 2010 onwards and plot that
new_df = df.loc[['2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017']]
In [14]:
new_df
Out[14]:
January February March April May June July August September October November December
2010 142.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2012 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2013 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2014 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2015 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2016 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2017 176.93 179.98 182.98 NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [15]:
new_df.set_value('2010', 'January', 0)
Out[15]:
January February March April May June July August September October November December
2010 0.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2012 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2013 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2014 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2015 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2016 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2017 176.93 179.98 182.98 NaN NaN NaN NaN NaN NaN NaN NaN NaN
In [16]:
# Set all NaNs to 0
new_df.fillna(0)
Out[16]:
January February March April May June July August September October November December
2010 0.00 105.20 105.00 105.60 105.80 105.30 106.00 106.25 106.74 106.97 107.86 109.38
2011 110.57 112.05 114.62 118.29 119.48 120.91 122.44 123.97 125.23 127.20 129.13 130.09
2012 130.82 130.76 132.51 133.74 134.09 133.06 131.92 131.51 131.89 132.46 133.33 134.25
2013 135.62 136.59 137.96 139.28 139.52 139.59 139.87 140.29 142.82 142.75 143.14 143.85
2014 145.40 145.95 146.61 148.20 149.70 149.91 150.60 152.02 152.24 151.92 151.85 152.51
2015 153.43 154.14 155.86 158.70 159.98 160.46 160.57 160.90 161.33 162.13 162.97 164.72
2016 165.37 164.67 165.92 167.07 167.99 169.76 170.84 170.97 171.56 172.62 173.85 175.18
2017 176.93 179.98 182.98 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
In [17]:
%matplotlib inline
new_df.plot(kind='bar', figsize=(30,10))
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdc4b7d0128>
In [22]:
# Now to add a title and increase fontsise
new_df.plot(
    kind='bar',
    figsize=(30,10),
    title='Kenyan Consimer Price Index 2010-2017',
    fontsize=14
)
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fdc4a805dd8>