#!/usr/bin/env python
# coding: utf-8

# In[7]:


import pandas as pd, numpy as np
from scipy import stats
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[88]:


stations=pd.read_csv('data/stations.csv').set_index('ID')


# Setup plot params

# In[89]:


def get_country(c,h='hs',plot=False):
    if c=='huro':
        hu=pd.read_csv('data/'+'hu'+'_'+h+'.csv') #daily data
        ro=pd.read_csv('data/'+'ro'+'_'+h+'.csv') #daily data
        df=pd.concat([hu,ro])
    else:
        df=pd.read_csv('data/'+c+'_'+h+'.csv') #daily data
        # df=pd.read_csv('data/'+c+'_hs.csv') #high_res data
    df['time']=pd.to_datetime(df['time'])
    df['year']=df['time'].dt.year
    df['month']=df['time'].dt.month
    df['day']=df['time'].dt.day
    df['hour']=df['time'].dt.hour
    df=df.set_index('time')
    df=df.sort_index()
    if plot: df.groupby('year').nunique()['ID'].plot()
    return df


# Re-run this section for `ro`, `hu` and `huro`

# In[380]:


c='huro'
if c!='huro':
    df=get_country(c,plot=True)
    df_ro=df.copy()
    # df_hu=df.copy()
else:
    df=pd.concat([df_ro,df_hu])


# In[382]:


import math

def haversine(coord1, coord2):
    R = 6372800  # Earth radius in meters
    lat1, lon1 = coord1
    lat2, lon2 = coord2
    
    phi1, phi2 = math.radians(lat1), math.radians(lat2) 
    dphi       = math.radians(lat2 - lat1)
    dlambda    = math.radians(lon2 - lon1)
    
    a = math.sin(dphi/2)**2 + \
        math.cos(phi1)*math.cos(phi2)*math.sin(dlambda/2)**2
    
    return 2*R*math.atan2(math.sqrt(a), math.sqrt(1 - a))/1000


# In[383]:


coerce={}
for i1 in range(len(stations.index)):
    station1=stations.index[i1]
    for i2 in range(i1,len(stations.index)):
        station2=stations.index[i2]
        if station1!=station2:
            if haversine(stations.loc[station1][['LAT','LON']].values,
            stations.loc[station2][['LAT','LON']].values)<10:
                print(station1,station2,stations.loc[station1]['LOC'],stations.loc[station2]['LOC'])
                if station2 not in coerce:
                    if station1 in coerce:
                        coerce[station2]=coerce[station1]
                    else:
                        coerce[station2]=station1


# In[384]:


df['ID']=df['ID'].replace(coerce)


# In[385]:


station_list=df['ID'].unique()


# In[386]:


coerce={}
for i1 in range(len(station_list)):
    station1=station_list[i1]
    for i2 in range(i1,len(station_list)):
        station2=station_list[i2]
        if station1!=station2:
            if haversine(stations.loc[station1][['LAT','LON']].values,
            stations.loc[station2][['LAT','LON']].values)<10:
                print(station1,station2,stations.loc[station1]['LOC'],stations.loc[station2]['LOC'])
                if station2 not in coerce:
                    if station1 in coerce:
                        coerce[station2]=coerce[station1]
                    else:
                        coerce[station2]=station1


# If nothing found continue, otherwise loop.

# In[398]:


keys=['XTEMP','XSPD','XVSB']
dz=df.groupby(['ID','month','hour','year']).median()[keys]
#universal mean
dzm=dz.groupby(['ID','month','hour']).median()[keys]
#or last 10 mean
dzm10=df[df['year'].isin(range(2009,2020))].groupby(['ID','month','hour']).median()[keys]
dzn=dz-dzm
dzn10=dz-dzm10
dzp=dzn.groupby(['ID','year']).median()
dzp10=dzn10.groupby(['ID','year']).median()
#get outliers
dws=[]
for station in dzp.index.get_level_values(0).unique():
    dw=dzp.loc[station][['XTEMP']]
    dw['z']=np.abs(stats.zscore(dw).flatten())
    dw=dw[dw['z']<3]
    dw['ID']=station
    dws.append(dw)
dws=pd.concat(dws)
#slice under new index
dzp=dzp.loc[dws.reset_index().set_index(['ID','year']).index]
dzp10=dzp10.loc[dws.reset_index().set_index(['ID','year']).index]
#export
dzr=(((dzp-dzp.groupby('ID').min())/(dzp.groupby('ID').max()-dzp.groupby('ID').min()))*2)-1
dzr.columns=[i.replace('X','N') for i in dzr.columns]
dzp10.columns=[i+'10' for i in dzp10.columns]
dzq=dzp.join(dzp10).join(dzr).join(stations[['LOC','LAT','LON','ELEVATION']])


# In[399]:


for i in [10,20,30,40,50,60]:
    dk=dzq.reset_index().set_index('ID').loc[dzp.groupby(['ID']).count()['XTEMP']>i]
    dk.to_csv('stripes/'+c+'_'+str(i)+'.csv')
    if i==40:
        dk.to_csv('stripes/'+c+'.csv')
        translated_stations=dk.index.unique()


# Stations - only for `huro`

# In[370]:


import json


# In[371]:


namer=json.loads(open('data/namer.json','r').read())


# In[393]:


for i in stations.loc[translated_stations]['LOC'].values:
    if i not in namer: 
        print('"'+i+'":"'+i.capitalize()+'",')


# In[394]:


namer2={
"BAISOARA":"Băișoara",
"AVRAMENI":"Avrameni",
"BAIA MARE/MAGHERUSI":"Nagybánya - Miszmogyorós",
"BISTRITA":"Beszterce",
"BARNOVA":"Iași / Bârnova",
"BATOS":"Bátos",
"BAISOARA":"Băișoara",
"BARLAD":"Bârlad",
"ARAD":"Arad",
"ALBA IULIA":"Gyulafehérvár",
"BLAJ":"Balázsfalva",
"BARAOLT":"Barót",
"ADJUD":"Adjud",
"BALINTESTI":"Bălintețti",
"BALEA LAC":"Bâlea-tó",
"BISOCA":"Bisoca",
"BANLOC":"Bánlak",
"APA NEAGRA":"Apa Neagră",
"BAILE HERCULANE":"Herkulesfürdő",
"BERZASCA":"Berzasca",
"BACLES":"Bâcleș",
"BAILESTI":"Băilești",
"ADAMCLISI":"Adamclisi",
"ALEXANDRIA":"Alexandria",
"BECHET":"Bechet",
"DARABANI":"Darabani",
"BORCEA FETESTI AIR BASE":"Fetești légibázis",
"SIGHETUL MARMATIEI":"Máramarossziget",
"RADAUTI":"Rădăuți",
"DOROHOI":"Dorohoi",
"SUCEAVA/SALCEA":"Suceava",
"BALATON":"Balaton",
"PECS":"Pécs",
"CEAHLAU":"Csalhó",
"ARAD":"Arad",
"POSTAVARU":"Postăvaru-csúcs",
"CRAIOVA":"Craiova",
"TURNU-MAGURELE":"Turnu Măgurele"}


# In[395]:


namer.update(namer2)


# In[397]:


open('data/namer.json','w').write(json.dumps(namer))


# Carpet plots

# In[404]:


dzn10.loc[station].groupby(['year','month']).median()[['XTEMP']].reset_index()


# In[410]:


import seaborn as sns


# In[499]:


for station in df['ID'].unique():
    dy=dzn.loc[station].groupby(['year','month']).median()[['XTEMP']].unstack().T
    for i in range(min(dy.columns)+1,max(dy.columns)):
        if i not in dy.columns:
            dy[i]=np.nan
    dy=dy[range(min(dy.columns),max(dy.columns)+1)]
    dy=dy[dy>dy.min().min()*0.7]
    dy=dy[dy<dy.max().max()*0.7]
    fig,ax=plt.subplots(1,1,figsize=(8,2.5))
    sns.heatmap(dy, linewidth=0,ax=ax, cmap='RdBu_r',cbar=False)
    ax.set_ylabel('')
    ax.set_xlabel('')
    ax.set_yticklabels(['január','február','március','április','május','június','július','augusztus',
                        'szeptember','október','november','december'],color='w')
    ax.set_xticklabels(ax.get_xticklabels(),color='w')
    plt.savefig('carpet/'+str(station)+'.png',bbox_inches='tight',transparent=True)


# In[484]:


# In[497]: