#!/usr/bin/env python
# coding: utf-8

# In[1]:


import pandas as pd, numpy as np
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[42]:


ro=['CDO7252998062998','CDO5064618063001','CDO3042698063020','CDO7893378063026','CDO4604228063028','CDO7821968063031',
   'CDO5072238063046','CDO4981038063054','CDO4725178063056','CDO5209078063060','CDO699718063062','CDO4894288063064',
   'CDO1632508063066','CDO8765068063068','CDO9993348063070']
hu=['CDO5941998062972','CDO5285728062974','CDO3021588062978','CDO9675788062981']


# In[43]:


p='C:/Users/csala/Onedrive - Lancaster University/Datarepo/szekelydata/klima/'


# In[44]:


stations=pd.read_csv(p+'stations.csv')


# In[76]:


dfs=[]
for i in hu:
    df=pd.read_csv(p+'daily/raw/hu/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})
    dfs.append(df)
    print(i)


# In[77]:


for i in ro:
    df=pd.read_csv(p+'daily/raw/ro/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str})
    dfs.append(df)
    print(i)


# In[78]:


dfs=pd.concat(dfs)


# In[88]:


year_fixer={'199710':'19971001'}


# In[89]:


dfs['time']=pd.to_datetime(dfs[' YEARMODA'].str.strip().replace(year_fixer),format='%Y%m%d')


# In[91]:


dfs.head()


# In[92]:


stn=151700
d=dfs[dfs['STN---']==stn]
d.set_index('time')['   TEMP'].plot()


# In[93]:


for stn in dfs['STN---'].unique():
    d=dfs[dfs['STN---']==stn]
    d.to_csv(p+'daily/export/'+str(stn)+'.csv')
    print(stn)


# Determine most frequent

# In[120]:


for i in np.sort(dfs.groupby('STN---').count()['time'].sort_values(ascending=False).head(50).index):
    print("'"+str(i)+"',")


# In[ ]: