#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd, numpy as np import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[42]: ro=['CDO7252998062998','CDO5064618063001','CDO3042698063020','CDO7893378063026','CDO4604228063028','CDO7821968063031', 'CDO5072238063046','CDO4981038063054','CDO4725178063056','CDO5209078063060','CDO699718063062','CDO4894288063064', 'CDO1632508063066','CDO8765068063068','CDO9993348063070'] hu=['CDO5941998062972','CDO5285728062974','CDO3021588062978','CDO9675788062981'] # In[43]: p='C:/Users/csala/Onedrive - Lancaster University/Datarepo/szekelydata/klima/' # In[44]: stations=pd.read_csv(p+'stations.csv') # In[76]: dfs=[] for i in hu: df=pd.read_csv(p+'daily/raw/hu/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str}) dfs.append(df) print(i) # In[77]: for i in ro: df=pd.read_csv(p+'daily/raw/ro/'+i+'.txt',dtype={' FRSHTT':str,' YEARMODA':str}) dfs.append(df) print(i) # In[78]: dfs=pd.concat(dfs) # In[88]: year_fixer={'199710':'19971001'} # In[89]: dfs['time']=pd.to_datetime(dfs[' YEARMODA'].str.strip().replace(year_fixer),format='%Y%m%d') # In[91]: dfs.head() # In[92]: stn=151700 d=dfs[dfs['STN---']==stn] d.set_index('time')[' TEMP'].plot() # In[93]: for stn in dfs['STN---'].unique(): d=dfs[dfs['STN---']==stn] d.to_csv(p+'daily/export/'+str(stn)+'.csv') print(stn) # Determine most frequent # In[120]: for i in np.sort(dfs.groupby('STN---').count()['time'].sort_values(ascending=False).head(50).index): print("'"+str(i)+"',") # In[ ]: