In [1]:
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
In [4]:
p='E:/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'
In [5]:
stations=pd.read_csv(p+'stations.csv')
stations=stations.set_index('ID')
stations.to_csv('data/stations.csv')

0: NoCloud 1: Cloud 4: Fog 6: Rain 7: Snow 11: Hail

In [6]:
weather={
2:1,
3:7,
5:6,
8:6,
9:6,
100:0,
101:0,
102:0,
103:0,
104:0,
105:0,
106:0,
107:0,
108:0,
109:0,
110:4,
111:4,
112:4,
113:1,
114:6,
115:6,
116:6,
117:6,
118:6,
119:1,
120:7,
121:6,
122:7,
123:6,
124:11,
125:6,
126:7,
127:11,
128:4,
129:6,
130:1,
131:1,
132:1,
133:1,
134:1,
135:1,
136:7,
137:7,
138:7,
139:7,
140:4,
141:4,
142:4,
143:4,
144:4,
145:4,
146:4,
147:4,
148:4,
149:4,
150:6,
151:6,
152:6,
153:6,
154:6,
155:6,
156:6,
157:6,
158:6,
159:6,
160:6,
161:6,
162:6,
163:6,
164:6,
165:6,
166:11,
167:11,
168:6,
169:6,
170:7,
171:7,
172:7,
173:7,
174:7,
175:7,
176:7,
177:7,
178:7,
179:7,
180:6,
181:6,
182:6,
183:7,
184:7,
185:7,
186:7,
187:7,
188:7,
189:11,
190:11,
191:6,
192:6,
193:7,
194:7,
195:6,
196:11,
197:6,
198:1,
199:11}
In [7]:
measures=['XTEMP','XSPD','XPCP','XSD','XVSB','YFOG','YPCP','YSNW','YHAL']
ycolumns={1:'YCLD',4:'YFOG',6:'YPCP',7:'YSNW',10:'YCLR',11:'YHAL'}
In [8]:
def load_data(stn,d='high_res',p=p,stations=stations,verbose=True):
    if verbose:
        print('loading...',stn,stations.loc[int(stn)]['LOC'],d)
    df=pd.read_csv(p+'/'+d+'/export/'+stn+'.csv',dtype={' FRSHTT':str})
    df.columns=[i.strip() for i in df.columns]
    df['time']=pd.to_datetime(df['time'])
    df['XTEMP']=(pd.to_numeric(df['TEMP'], errors='coerce').replace(9999.9,np.nan)-32)*5/9 #Fahrenheit to Celsiu
    if d=='high_res':
        df['XSPD']=pd.to_numeric(df['SPD'], errors='coerce')*1.61 #MPH to Km/h
        df['XVSB']=(pd.to_numeric(df['VSB'], errors='coerce')*1.61).apply(lambda x: min(x,10)) #miles to Km, max VSB=20Km
        df['XPCP']=pd.to_numeric(df['PCP06'], errors='coerce')*25.4 #inch to mm
        df['XSD']=pd.to_numeric(df['SD'], errors='coerce')*25.4 #inch to mm
        
        df['PCP01']=pd.to_numeric(df['PCP01'], errors='coerce')
        df['PCP06']=pd.to_numeric(df['PCP06'], errors='coerce')
        df['PCP24']=pd.to_numeric(df['PCP24'], errors='coerce')
        df['PCPXX']=pd.to_numeric(df['PCPXX'], errors='coerce')
        df['PCP06'].loc[~df['PCP06'].isnull()] = 6
        df['PCPXX'].loc[~df['PCPXX'].isnull()] = 6
        df['PCP01'].loc[~df['PCP01'].isnull()] = 6
        df['PCP24'].loc[~df['PCP24'].isnull()] = 6
        df['AW']=pd.to_numeric(df['AW'], errors='coerce')+100
        df['MW']=pd.to_numeric(df['MW'], errors='coerce')+100
        df['W']=pd.to_numeric(df['W'], errors='coerce')
        dz=df[['PCP01','PCP06','PCP24','PCPXX','AW','MW','W']]
        df['W']=dz.ffill(axis=1)['W'].replace(weather).replace(0,10)
        dz=df.groupby(['time','W']).count()['TEMP'].unstack().fillna(0)
        dz.columns=[ycolumns[i] for i in dz.columns]
        df=df.set_index('time').join(dz).reset_index()
    else:
        df['year']=df['time'].dt.year
        df['month']=df['time'].dt.month
        df['day']=df['time'].dt.day
        df['hour']=df['time'].dt.hour
        df['XSPD']=pd.to_numeric(df['WDSP'], errors='coerce').replace(999.9,np.nan)*1.85 #knots to Km/h
        df['XVSB']=(pd.to_numeric(df['VISIB'], errors='coerce').replace(999.9,np.nan)*1.61).apply(lambda x: min(x,10)) #miles to Km, max VSB=20Km
        df['XPCP']=pd.to_numeric(df['PRCP'].str[:-1], errors='coerce').replace(99.99,np.nan)*25.4 #inch to mm
        df['XSD']=pd.to_numeric(df['SNDP'], errors='coerce').replace(999.9,np.nan)*25.4 #inch to mm
        
        df['YFOG']=pd.to_numeric(df['FRSHTT'].str[0], errors='coerce')
        df['YPCP']=pd.to_numeric(df['FRSHTT'].str[1], errors='coerce')
        df['YSNW']=pd.to_numeric(df['FRSHTT'].str[2], errors='coerce')
        df['YHAL']=pd.to_numeric(df['FRSHTT'].str[3], errors='coerce')
    for m in measures:
        if m not in df.columns:
            df[m]=np.nan
    return df[['time','year','month','day','hour']+measures].set_index('time')
In [9]:
# stn='154200' #aurel vlaicu
stn='151700' #mciuc
daily=load_data(stn,'daily')
hires=load_data(stn,'high_res')
loading... 151700 MIERCUREA CIUC daily
loading... 151700 MIERCUREA CIUC high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
C:\ProgramData\Anaconda3\lib\site-packages\pandas\core\indexing.py:205: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)
In [10]:
#keep only days with at least 4 readings (usually at 6,12,18,24) to false means
filt=hires.groupby(['year','month','day'])[['hour']].nunique()
filt2=filt[filt>=4].dropna().reset_index()
filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
In [11]:
def comparison_getter(measure,daily=daily,hires=hires,filt3=filt3):
    if type(measure)!=list:
        measure=[measure]
        
    d=daily[measure]
    h=hires.groupby(['year','month','day']).mean()[measure]
    ymeasures=[m for m in measure if 'Y' in m]
    h[ymeasures]=h[ymeasures][h[ymeasures]==0].fillna(1)
    h=h.reset_index()
    h['time']=pd.to_datetime(dict(year=h['year'], month=h['month'], day=h['day'])).values
    h=h.set_index('time')[measure]
    h=h.loc[filt3]
    return d,h

def comaprison_plotter(measure,daily=daily,hires=hires,filt3=filt3):
    d,h=comparison_getter(measure,daily,hires,filt3)
    d.columns=['d']
    h.columns=['h']
    x=h.join(d,how='outer').dropna()
    x['diff']=(x['h']-x['d'])
    fig,ax=plt.subplots(1,3,figsize=(15,4))
    x['diff'].plot(ax=ax[0],title='diff')
    x['h'].plot(ax=ax[1],title='high res')
    x['d'].plot(ax=ax[2],title='daily')
    return x,d,h
In [12]:
comaprison_plotter('XTEMP');
In [13]:
comaprison_plotter('XSPD');
In [14]:
x,d,h=comaprison_plotter('XPCP')
In [15]:
x,d,h=comaprison_plotter('XSD');
In [16]:
x,d,h=comaprison_plotter('XVSB');
In [17]:
x,d,h=comaprison_plotter('YSNW');
In [18]:
x,d,h=comaprison_plotter('YPCP');
In [19]:
x,d,h=comaprison_plotter('YHAL');
In [20]:
hu=['127720',
'128050',
'128120',
'128220',
'128250',
'128300',
'128390',
'128430',
'128510',
'128600',
'128820',
'128920',
'129100',
'129150',
'129420',
'129600',
'129700',
'129820',
'129920',
'129350']
ro=['150040',
'150100',
'150140',
'150150',
'150200',
'150230',
'150800',
'150850',
'150900',
'151080',
'151200',
'151450',
'151500',
'151700',
'151970',
'152000',
'152300',
'152350',
'152470',
'152600',
'152800',
'152920',
'153100',
'153350',
'153460',
'153500',
'153600',
'154100',
'154200',
'154210',
'154500',
'154600',
'154700',
'154800',
'154810',
'154990']
In [21]:
hs=[]
ds=[]
for stn in ro:
    try:
        d_ok=True
        daily=load_data(stn,'daily')
    except:
        d_ok=False
        print('failed')
    try:
        h_ok=True
        hires=load_data(stn,'high_res')
    except:
        h_ok=False
        print('failed')
    #keep only days with at least 4 readings (usually at 6,12,18,24) to false means
    filt=hires.groupby(['year','month','day'])[['hour']].nunique()
    filt2=filt[filt>=4].dropna().reset_index()
    filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
    d,h=comparison_getter(measures,daily,hires,filt3)
    d['ID']=stn
    h['ID']=stn
    if d_ok: ds.append(d)
    if h_ok: hs.append(h)
ds=pd.concat(ds)
hs=pd.concat(hs)
loading... 150040 SIGHETUL MARMATIEI daily
loading... 150040 SIGHETUL MARMATIEI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:21: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
loading... 150100 SATU MARE daily
loading... 150100 SATU MARE high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,22,23) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150140 TAUTII MAGHERAUS daily
loading... 150140 TAUTII MAGHERAUS high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150150 OCNA SUGATAG daily
loading... 150150 OCNA SUGATAG high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150200 BOTOSANI daily
loading... 150200 BOTOSANI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150230 STEFAN CEL MARE daily
loading... 150230 STEFAN CEL MARE high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,7,13,22,25) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150800 ORADEA daily
loading... 150800 ORADEA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,13,22) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 150850 BISTRITA daily
loading... 150850 BISTRITA high_res
loading... 150900 IASI daily
loading... 150900 IASI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,22,23) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 151080 CEAHLAU TOACA daily
loading... 151080 CEAHLAU TOACA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,22) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 151200 CLUJ NAPOCA daily
loading... 151200 CLUJ NAPOCA high_res
loading... 151450 VIDRASAU daily
loading... 151450 VIDRASAU high_res
loading... 151500 BACAU daily
loading... 151500 BACAU high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,23) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 151700 MIERCUREA CIUC daily
loading... 151700 MIERCUREA CIUC high_res
loading... 151970 BARLAD daily
loading... 151970 BARLAD high_res
loading... 152000 ARAD daily
loading... 152000 ARAD high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 152300 DEVA daily
loading... 152300 DEVA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 152350 FAGARAS daily
loading... 152350 FAGARAS high_res
loading... 152470 TRAIAN VUIA daily
loading... 152470 TRAIAN VUIA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,13,22,23,25) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 152600 SIBIU daily
loading... 152600 SIBIU high_res
loading... 152800 VARFU OMU daily
loading... 152800 VARFU OMU high_res
loading... 152920 CARANSEBES daily
loading... 152920 CARANSEBES high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,24) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 153100 GALATI daily
loading... 153100 GALATI high_res
loading... 153350 CATALOI daily
loading... 153350 CATALOI high_res
loading... 153460 RAMNICU VALCEA daily
loading... 153460 RAMNICU VALCEA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 153500 BUZAU daily
loading... 153500 BUZAU high_res
loading... 153600 SULINA daily
loading... 153600 SULINA high_res
loading... 154100 DROBETA TURNU SEVERIN daily
loading... 154100 DROBETA TURNU SEVERIN high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (12,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 154200 AUREL VLAICU daily
loading... 154200 AUREL VLAICU high_res
loading... 154210 HENRI COANDA daily
loading... 154210 HENRI COANDA high_res
loading... 154500 CRAIOVA daily
loading... 154500 CRAIOVA high_res
loading... 154600 CALARASI daily
loading... 154600 CALARASI high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,22,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 154700 ROSIORII DE VEDE daily
loading... 154700 ROSIORII DE VEDE high_res
loading... 154800 CONSTANTA daily
loading... 154800 CONSTANTA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (12,22,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 154810 MIHAIL KOGALNICEANU daily
loading... 154810 MIHAIL KOGALNICEANU high_res
loading... 154990 MANGALIA daily
loading... 154990 MANGALIA high_res
In [22]:
ds.to_csv('data/ro_ds.csv')
hs.to_csv('data/ro_hs.csv')
In [23]:
hs=[]
ds=[]
for stn in hu:
    try:
        d_ok=True
        daily=load_data(stn,'daily')
    except:
        d_ok=False
        print('failed')
    try:
        h_ok=True
        hires=load_data(stn,'high_res')
    except:
        h_ok=False
        print('failed')
    #keep only days with at least 4 readings (usually at 6,12,18,24) to false means
    filt=hires.groupby(['year','month','day'])[['hour']].nunique()
    filt2=filt[filt>=4].dropna().reset_index()
    filt3=pd.to_datetime(dict(year=filt2['year'], month=filt2['month'], day=filt2['day'])).values
    d,h=comparison_getter(measures,daily,hires,filt3)
    d['ID']=stn
    h['ID']=stn
    if d_ok: ds.append(d)
    if h_ok: hs.append(h)
ds=pd.concat(ds)
hs=pd.concat(hs)
loading... 127720 MISKOLC daily
loading... 127720 MISKOLC high_res
C:\ProgramData\Anaconda3\lib\site-packages\ipykernel_launcher.py:21: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
loading... 128050 SOPRON daily
loading... 128050 SOPRON high_res
loading... 128120 SZOMBATHELY ARPT / VAS daily
loading... 128120 SZOMBATHELY ARPT / VAS high_res
loading... 128220 GYOR daily
loading... 128220 GYOR high_res
loading... 128250 PAPA daily
loading... 128250 PAPA high_res
loading... 128300 VESZPREM/SZENTKIRALYSZABADJA daily
loading... 128300 VESZPREM/SZENTKIRALYSZABADJA high_res
loading... 128390 FERIHEGY daily
loading... 128390 FERIHEGY high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,22,23,25) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 128430 BUDAPEST/PESTSZENTLORINC daily
loading... 128430 BUDAPEST/PESTSZENTLORINC high_res
loading... 128510 KEKESTETO daily
loading... 128510 KEKESTETO high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 128600 SZOLNOK daily
loading... 128600 SZOLNOK high_res
loading... 128820 DEBRECEN daily
loading... 128820 DEBRECEN high_res
loading... 128920 NYIREGYHAZA daily
loading... 128920 NYIREGYHAZA high_res
loading... 129100 SZENTGOTTHARD/FARKASFA daily
loading... 129100 SZENTGOTTHARD/FARKASFA high_res
loading... 129150 ZALAEGERSZEG/ANDRASHIDA daily
loading... 129150 ZALAEGERSZEG/ANDRASHIDA high_res
loading... 129420 PECS SOUTH daily
loading... 129420 PECS SOUTH high_res
loading... 129600 BAJA daily
loading... 129600 BAJA high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (22,23,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
loading... 129700 KECSKEMET daily
loading... 129700 KECSKEMET high_res
loading... 129820 SZEGED (AUT) daily
loading... 129820 SZEGED (AUT) high_res
loading... 129920 BEKESCSABA daily
loading... 129920 BEKESCSABA high_res
loading... 129350 SIOFOK daily
loading... 129350 SIOFOK high_res
C:\ProgramData\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py:3249: DtypeWarning: Columns (5,12,13,21,22,23,24,26) have mixed types. Specify dtype option on import or set low_memory=False.
  if (await self.run_code(code, result,  async_=asy)):
In [24]:
ds.to_csv('data/hu_ds.csv')
hs.to_csv('data/hu_hs.csv')