import numpy as np
import xarray as xr
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
Opening Zeppelin hourly data from 2010
filenam1 = '/home/2daa7756-2d5725-2d4dfb-2db0ff-2d5e0a6858a009/shared-ns1000k/inputs//Aerosol_sizedist_obs/Zeppelin_2010_hourly.csv'
filenam2 = '/home/2daa7756-2d5725-2d4dfb-2db0ff-2d5e0a6858a009/shared-ns1000k/inputs//Aerosol_sizedist_obs/Zeppelin_2011_hourly.csv'
flist=[filenam1, filenam2]
ldf = []
for f in flist:
ldf.append(pd.read_csv(f, parse_dates=[['0','0.1','0.2','0.3','0.4']],date_parser = mydateparser))
data = pd.concat(ldf, axis=0)
mydateparser = lambda x: pd.datetime.strptime(x, "%Y %m %d %H %M")
data = pd.read_csv(filenam, parse_dates=[['0','0.1','0.2','0.3','0.4']],date_parser = mydateparser)
#data.head()
#data.info()
data.rename(columns={'0_0.1_0.2_0.3_0.4':'date'}, inplace = True)
data = data.set_index('date')
#remove last column
data.drop(labels='0.6', axis=1, inplace=True)
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-124-dd6d76ed93db> in <module> 1 #remove last column ----> 2 data.drop(labels='0.6', axis=1, inplace=True) 3 /opt/conda/lib/python3.7/site-packages/pandas/core/frame.py in drop(self, labels, axis, index, columns, level, inplace, errors) 4100 level=level, 4101 inplace=inplace, -> 4102 errors=errors, 4103 ) 4104 /opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in drop(self, labels, axis, index, columns, level, inplace, errors) 3912 for axis, labels in axes.items(): 3913 if labels is not None: -> 3914 obj = obj._drop_axis(labels, axis, level=level, errors=errors) 3915 3916 if inplace: /opt/conda/lib/python3.7/site-packages/pandas/core/generic.py in _drop_axis(self, labels, axis, level, errors) 3944 new_axis = axis.drop(labels, level=level, errors=errors) 3945 else: -> 3946 new_axis = axis.drop(labels, errors=errors) 3947 result = self.reindex(**{axis_name: new_axis}) 3948 /opt/conda/lib/python3.7/site-packages/pandas/core/indexes/base.py in drop(self, labels, errors) 5338 if mask.any(): 5339 if errors != "ignore": -> 5340 raise KeyError("{} not found in axis".format(labels[mask])) 5341 indexer = indexer[~mask] 5342 return self.delete(indexer) KeyError: "['0.6'] not found in axis"
fig = plt.figure(1, figsize=[20,5])
#set projection for plotting
ax = plt.subplot(1,1,1)
sns.heatmap(data.T.iloc[::-1], vmin=0, vmax=200, cmap='jet')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2be952e10>
data.iloc[:,1]
date 2010-01-01 00:00:00 9.2141 2010-01-01 01:00:00 7.4015 2010-01-01 02:00:00 8.2293 2010-01-01 03:00:00 6.0217 2010-01-01 04:00:00 9.8351 ... 2010-12-31 19:00:00 -999.0000 2010-12-31 20:00:00 -999.0000 2010-12-31 21:00:00 -999.0000 2010-12-31 22:00:00 -999.0000 2010-12-31 23:00:00 15.7530 Name: 20, Length: 8760, dtype: float64
#Vaihdetaan oikeesta datasta kaikki -999 arvot NaN
data = data.replace(-999,np.nan)
data
0.5 | 20 | 22.44 | 25.179 | 28.251 | 31.698 | 35.566 | 39.905 | 44.774 | 50.238 | ... | 178.25 | 200 | 224.4 | 251.79 | 282.51 | 316.98 | 355.66 | 399.05 | 447.74 | 502.38 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | |||||||||||||||||||||
2010-01-01 00:00:00 | 127.010 | 9.2141 | 13.3240 | 18.261 | 23.245 | 30.351 | 39.4690 | 47.835 | 47.8340 | 42.7440 | ... | 112.370 | 115.400 | 168.740 | 298.220 | 358.390 | 247.520 | 125.5700 | 54.1360 | 22.3210 | 9.6607 |
2010-01-01 01:00:00 | 110.150 | 7.4015 | 12.8350 | 18.326 | 23.133 | 26.412 | 31.9880 | 38.002 | 38.1110 | 38.6730 | ... | 107.270 | 101.910 | 140.920 | 244.340 | 294.450 | 218.560 | 115.0300 | 49.6960 | 20.9440 | 9.4524 |
2010-01-01 02:00:00 | 98.864 | 8.2293 | 10.1860 | 14.526 | 19.346 | 23.288 | 30.8280 | 32.256 | 33.3850 | 34.3440 | ... | 90.091 | 88.761 | 121.910 | 207.210 | 267.870 | 196.080 | 101.8700 | 45.4790 | 20.8550 | 9.9242 |
2010-01-01 03:00:00 | 103.960 | 6.0217 | 10.0140 | 14.795 | 21.470 | 26.444 | 28.6210 | 31.333 | 31.4070 | 35.1930 | ... | 99.499 | 93.949 | 133.170 | 231.840 | 290.350 | 209.660 | 110.8000 | 50.5580 | 23.2000 | 11.1620 |
2010-01-01 04:00:00 | 109.060 | 9.8351 | 12.5260 | 15.748 | 19.991 | 24.453 | 30.3280 | 33.429 | 36.5330 | 36.4650 | ... | 102.890 | 106.060 | 142.540 | 243.290 | 307.780 | 215.960 | 115.7900 | 54.9650 | 24.6220 | 12.0210 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2011-12-31 19:00:00 | 33.237 | 4.5237 | 8.2171 | 12.178 | 13.667 | 15.023 | 15.4340 | 10.998 | 9.2830 | 8.3625 | ... | 66.587 | 74.784 | 73.260 | 58.339 | 35.515 | 19.784 | 12.6390 | 5.4085 | 3.2198 | 4.0654 |
2011-12-31 20:00:00 | 36.031 | 7.5069 | 11.4000 | 15.513 | 16.042 | 16.272 | 15.7030 | 12.395 | 11.8270 | 12.5400 | ... | 65.547 | 67.752 | 62.612 | 48.068 | 31.817 | 18.368 | 8.8337 | 7.7784 | 6.9098 | 5.9238 |
2011-12-31 21:00:00 | 38.550 | 4.4545 | 9.5881 | 15.145 | 18.117 | 17.796 | 13.6140 | 11.582 | 9.2801 | 8.4581 | ... | 81.728 | 91.344 | 84.708 | 53.480 | 37.032 | 22.753 | 10.9250 | 9.4918 | 7.4926 | 5.1997 |
2011-12-31 22:00:00 | 38.347 | 4.6390 | 8.8948 | 13.311 | 12.494 | 11.177 | 9.7762 | 11.722 | 10.7130 | 9.8151 | ... | 72.779 | 84.278 | 82.020 | 57.439 | 37.294 | 20.893 | 9.2273 | 6.8520 | 7.2738 | 8.6812 |
2011-12-31 23:00:00 | 46.968 | 11.0710 | 12.0850 | 13.415 | 18.010 | 20.073 | 18.3820 | 13.677 | 13.1990 | 14.4220 | ... | 91.955 | 91.564 | 84.006 | 65.885 | 39.213 | 21.230 | 13.9460 | 10.3320 | 8.6885 | 8.1243 |
14702 rows × 30 columns
#valitaan datasta pelkästään tietyt kolumnit (20-50nm) ja summataan ne yhteen
small_particle_data = data.iloc[:,1:9].sum(axis=1)
# take the mean for every row for indexes between 1-9 so (20-50 nm)
small_particle_data_mean = data.iloc[:,1:9].mean(axis=1)
#plot the data
fig = plt.figure()
ax = fig.add_subplot(2, 1, 1)
line, = ax.plot(small_particle_data_mean, color='blue', lw=2)
# change the y-axes to log
#ax.set_yscale('log')
small_particle_data_mean
date 2010-01-01 00:00:00 28.691637 2010-01-01 01:00:00 24.526063 2010-01-01 02:00:00 21.505537 2010-01-01 03:00:00 21.263212 2010-01-01 04:00:00 22.855387 ... 2010-12-31 19:00:00 NaN 2010-12-31 20:00:00 NaN 2010-12-31 21:00:00 NaN 2010-12-31 22:00:00 NaN 2010-12-31 23:00:00 33.761125 Length: 8760, dtype: float64
# lets take mean for every month and plot them together
small_particle_data_mean.resample('M').mean().plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fe2c2efe748>
_se = small_particle_data_mean.resample('M').mean()
Y = 'y'
M = 'm'
var = 'spdm'
_df = pd.DataFrame(_se,columns=[var])
_df[M] = _df.index.month
_df[Y] = _df.index.year
_df= _df.set_index(M)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-202-bf7276411ff0> in <module> ----> 1 _df = pd.DataFrame(_se,columns=[var]) 2 _df[M] = _df.index.month 3 _df[Y] = _df.index.year 4 5 _df= _df.set_index(M) NameError: name 'var' is not defined
_df.groupby('y').plot(y='spdm',subplots=True)
y 2010 [AxesSubplot(0.125,0.2;0.775x0.68)] 2011 [AxesSubplot(0.125,0.2;0.775x0.68)] dtype: object
_df.plot(y='spdm',color='y')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21ef6c6a0>
_df.plot(y='spdm',x='m')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21f46e080>
ax = plt.axes()
_df[_df['y']==2010].plot(x='m',y='spdm',ax=ax, label ='2010' )
_df[_df['y']==2011].plot(x='m',y='spdm',ax=ax, label ='2011')
<matplotlib.axes._subplots.AxesSubplot at 0x7fe21f0b7160>
_df.reset_index()
m | spdm | y | |
---|---|---|---|
0 | 1 | 34.447331 | 2010 |
1 | 2 | 65.437523 | 2010 |
2 | 3 | 56.889464 | 2010 |
3 | 4 | 158.139479 | 2010 |
4 | 5 | 557.717221 | 2010 |
5 | 6 | 564.419334 | 2010 |
6 | 7 | 457.328173 | 2010 |
7 | 8 | 297.233322 | 2010 |
8 | 9 | 65.094502 | 2010 |
9 | 10 | 52.024606 | 2010 |
10 | 11 | 23.716328 | 2010 |
11 | 12 | 33.761125 | 2010 |
12 | 1 | NaN | 2011 |
13 | 2 | NaN | 2011 |
14 | 3 | NaN | 2011 |
15 | 4 | 245.573784 | 2011 |
16 | 5 | 170.079196 | 2011 |
17 | 6 | 546.593581 | 2011 |
18 | 7 | 738.481650 | 2011 |
19 | 8 | 351.219401 | 2011 |
20 | 9 | 241.201983 | 2011 |
21 | 10 | 26.122856 | 2011 |
22 | 11 | 21.797788 | 2011 |
23 | 12 | 43.021865 | 2011 |
_df1 = _df.reset_index()
_df2=_df1.set_index(['m','y']).unstack('y')
MultiIndex([('spdm', 2010), ('spdm', 2011)], names=[None, 'y'])
_gr.plot(y='spdm',subplots=True)
y 2010 [AxesSubplot(0.125,0.2;0.775x0.68)] 2011 [AxesSubplot(0.125,0.2;0.775x0.68)] dtype: object