import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 10.0)
rhi_csv = 'tabula-RHI-beneficiaries-non-domestic-individuals-companies.csv'
df = pd.read_csv(rhi_csv)
df['Date'] = pd.to_datetime(df['Date of\rApplication'], format='%d/%m/%Y')
df['Cash'] = df['Amount of payments\rmade to 28 February\r2017 (£)*'].replace('[£,]','', regex=True).astype(float)
df.drop(['Date of\rApplication','Amount of payments\rmade to 28 February\r2017 (£)*'], axis=1, inplace=True)
df.rename(columns={'Business or\rInstallation\rLocation':'Postcode', 'Installation\rCapacity\r(kWth)':'Capacity (kWth)','Technology Type':'Type','Name':'Company'}, inplace=True)
display(df.head())
display(df.dtypes)
Company | Postcode | Type | Capacity (kWth) | Date | Cash | |
---|---|---|---|---|---|---|
0 | Aaron Newell | BT39 | Solid Biomass Boiler | 60.0 | 2015-09-04 | 19084.69 |
1 | Acheson & Glover Precast Ltd | BT75 | Solid Biomass Boiler | 99.0 | 2015-11-06 | 27600.66 |
2 | Acheson & Glover Precast Ltd | BT75 | Solid Biomass Boiler | 99.0 | 2015-11-06 | 30507.19 |
3 | Acheson & Glover Precast Ltd | BT75 | Solid Biomass Boiler | 99.0 | 2015-11-06 | 34416.23 |
4 | Acheson & Glover Precast Ltd | BT75 | Solid Biomass Boiler | 99.0 | 2015-07-13 | 50543.44 |
Company object Postcode object Type object Capacity (kWth) float64 Date datetime64[ns] Cash float64 dtype: object
df[df.Postcode.isnull()]
Company | Postcode | Type | Capacity (kWth) | Date | Cash | |
---|---|---|---|---|---|---|
1157 | NaN | NaN | (GSHP) | NaN | NaT | NaN |
df.Cash.sum()
60295840.10000002