#!/usr/bin/env python
# coding: utf-8
# # Storytelling Visualization:
EUR-USD exchange rate - CORONA, Presidencies influence
# ## Introduction
# #### BACKGROUND:
# For this guided project we used a data provided by the European Central Bank that was gathered and structured by D.Chemkaeva into a *.csv file.
# Link: https://www.kaggle.com/lsind18/euro-exchange-daily-rates-19992020
#
# #### SCOPE:
# The Scope of this guided project to find in a first step some ideas for storytelling vizualisations:
# - (4.1) dollar exchange rate during corona pademy 2020 vs 2016-2019
# - (4.2) dollar exchange rate for the last 3 presidencies (bush, obama, trump)
#
# We then set up graphs, that show the influence of CORONA pandemy (4.1) and evolution during presidencies (4.2) on the EUR-USD exchange rate.
# Import Packages
# In[2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
from datetime import datetime
from pandas.tseries.offsets import DateOffset
get_ipython().run_line_magic('matplotlib', 'inline')
# Load CSV data of exchange rates 1999-2021
# In[3]:
exchange_rates = pd.read_csv('euro-daily-hist_1999_2020.csv')
# ## 1. First Glance at Data Set
# In[4]:
exchange_rates.head()
# In[5]:
exchange_rates.tail()
# In[6]:
exchange_rates.info()
# ## 2. Data Cleaning [US Dollar]
# In[7]:
exchange_rates.rename(columns={'[US dollar ]': 'US_dollar',
'Period\\Unit:': 'Time'},
inplace=True)
exchange_rates['Time'] = pd.to_datetime(exchange_rates['Time'])
exchange_rates.sort_values('Time', inplace=True)
exchange_rates.reset_index(drop=True, inplace=True)
# Re-Index with Time/datetime format column
# In[8]:
euro_to_dollar = exchange_rates.set_index(['Time'])
# Check content of US_dollar column
# In[9]:
euro_to_dollar['US_dollar'].value_counts()
# #### Observations:
# - it contains 62 times an empty string marked as '-'
# Drop empty rows and convert to float
# In[10]:
euro_to_dollar.drop(euro_to_dollar[euro_to_dollar['US_dollar']=='-'].index,inplace=True)
euro_to_dollar['US_dollar'] = euro_to_dollar['US_dollar'].astype(float)
# ## 3. Calculate moving average of US_dollar on 30 days basis with rolling() function
# In[11]:
euro_to_dollar['rolling_mean'] = euro_to_dollar['US_dollar'].rolling(30).mean()
# In[12]:
plt.subplot(2,1,1)
plt.plot(euro_to_dollar['US_dollar'])
plt.subplot(2,1,2)
plt.plot(euro_to_dollar['rolling_mean'])
plt.show()
# ## 4. story telling GRAPH ideas:
# - (4.1) dollar exchange rate during corona pademy 2020 vs 2016-2019
# - (4.2) dollar exchange rate for the last 3 presidencies (bush, obama, trump)
# - dollar exchange rate during financial crisis 2007/08 vs 2006,2009
# - dollar exchange rate before/after 2000 internet bubble crash
#
# In[13]:
style.use('fivethirtyeight')
# ### 4.1 EUR/USD - Research of yearly growth indicator
# In[14]:
def avg_diff(vect):
return np.median(np.gradient(vect))
# In[15]:
def p2p_max_min(vect):
return np.max(vect)-np.min(vect)
# In[16]:
def month_sum_var(vect):
return np.sum(np.diff(vect))
# In[17]:
def month_sum_abs_var(vect):
return np.sum(np.abs(np.diff(vect)))
# In[18]:
def delta_per(vect):
return vect[-1]-vect[0]
# In[19]:
fig, ax = plt.subplots(figsize=(9, 5))
ax.axvline(datetime(2020,8,1),linewidth=85,
color='#acb0a9',alpha=.5)
ax.plot(euro_to_dollar['2016':'2020-12'].rolling_mean,
color='grey')
ax.plot(euro_to_dollar['2020-03':'2020-12'].rolling_mean)
# TITLE and SUBTITLE
ax.text(datetime(2015,5,1),1.30,'EURO to US Dollar Exchange Rate',
size=17,weight='bold')
ax.text(datetime(2015,5,1),1.28,'Exchange rate change during CORONA pandemy')
## Summed monthly variations per year
#calculate monthly variation
yrly_var_month = pd.DataFrame([])
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar[yr].groupby(pd.PeriodIndex(euro_to_dollar[yr].index, freq="M"))['rolling_mean']
yrly_var_month[yr] = grouped.agg(delta_per).values
yrly_var = yrly_var_month.sum() # sum up monthly variations to yearly
yrly_max = yrly_var.max() # get max value of over all years
# graph yearly variation
y_offs = 0.95
for yr in yrly_var.keys(): # loop over years
if yrly_var[yr]>=0: # for positive yearly variation -> blue bar
# blue positive bar
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1) + np.round(yrly_var[yr]/yrly_max*30)*DateOffset(days=6)],
y_offs*np.array([1,1]),
linewidth=18, color='#033BC7')
#grey vertical line
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1)],
[y_offs-0.015,y_offs+0.015],linewidth=2,color='grey')
else: # for negative yearly variation -> red bar
# red bar
ax.plot([datetime(int(yr),7,1) + np.round(yrly_var[yr]/yrly_max*30)*DateOffset(days=6),
datetime(int(yr),7,1)],
y_offs*np.array([1,1]),
linewidth=18, color='#af0b1e')
#grey vertical line
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1)],
[y_offs-0.015,y_offs+0.015],linewidth=2,color='grey')
ax.text(datetime(2015,5,1),0.98,'Yearly change rate')
# SPLIT LINE
ax.axhline(1,color='grey',linewidth=2)
# Remove Ylabels in Yearly change rate bar section
for k,label in enumerate(ax.get_yticklabels()):
if k<3:
label.set_visible(False)
# FOOTER LINE
ax.text(datetime(2015,5,1), 0.87, '©DATAQUEST' + ' '*35 + 'Source: European Central Bank Data, prepared by D.Chemkaeva',
color = '#f0f0f0', backgroundcolor = '#4d4d4d', size=12)
# Set Ygrid
ax.grid(False)
ax.grid(True,axis='x')
plt.show()
# #### Observations:
# - we observe a high increase during the CORONA Pandemy (2020/03 -12), that was only exceeded in 2017 Trump's 1st year (https://voxeu.org/article/dollar-euro-exchange-rate-2016-2018)
#
# ### 4.2 EUR/USD - compare presidency effect on exchange rate
# In[20]:
fig, ax = plt.subplots(figsize=(9, 5))
#ax.axvline(datetime(2020,8,1),linewidth=85,
# color='#acb0a9',alpha=.5)
ax.text(datetime(2003,1,1),1.7,'Bush',
size=17, weight='bold',
color='magenta')
ax.text(datetime(2002,6,1),1.65,'(2001 - 2009)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2001':'2009'].rolling_mean,
color='magenta')
ax.axvline(datetime(2009,1,1),linewidth=3,
color='#acb0a9',alpha=.5)
ax.text(datetime(2012,1,1),1.7,'Obama',
size=17, weight='bold',
color='orange')
ax.text(datetime(2011,11,1),1.65,'(2009 - 2017)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2009':'2017'].rolling_mean,
color='orange')
ax.axvline(datetime(2017,1,1),linewidth=3,
color='#acb0a9',alpha=.5)
ax.text(datetime(2019,1,1),1.7,'Trump',
size=17, weight='bold',
color='deepskyblue')
ax.text(datetime(2018,10,1),1.65,'(2017 - 2021)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2017':'2021'].rolling_mean,
color='deepskyblue')
# Bush presidency
bush_pres_month = euro_to_dollar['2001':'2009'].groupby(pd.PeriodIndex(euro_to_dollar['2001':'2009'].index, freq="M"))['rolling_mean'].agg(delta_per).values
bush_pres = bush_pres_month.sum()
# Obama presidency
obama_pres_month = euro_to_dollar['2009':'2017'].groupby(pd.PeriodIndex(euro_to_dollar['2009':'2017'].index, freq="M"))['rolling_mean'].agg(delta_per).values
obama_pres = obama_pres_month.sum()
# Trump presidency
trump_pres_month = euro_to_dollar['2017':'2021'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2021'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres = trump_pres_month.sum()
# Trump presidency excluding CORONA PANDEMY
trump_pres_month_nocovid = euro_to_dollar['2017':'2020-03-01'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2020-03-01'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres_nocovid = trump_pres_month_nocovid.sum()
y_offs = 0.6
yrly_var = {2005: bush_pres,
2012: obama_pres,
2019: trump_pres}
yrly_max = np.max(np.abs(np.array(list(yrly_var.values())))) # max(yrly_var.values())
for yr in yrly_var.keys():
if yrly_var[yr]>=0:
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1) +
np.round(yrly_var[yr]/yrly_max*48)*DateOffset(months=1)],
y_offs*np.array([1,1]),
linewidth=18, color='#033BC7')
#grey vertical line
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1)],
[y_offs-0.040,y_offs+0.040],linewidth=2,color='grey')
else:
ax.plot([datetime(int(yr),1,1) +
np.round(yrly_var[yr]/yrly_max*48)*DateOffset(months=1),
datetime(int(yr),1,1)],
y_offs*np.array([1,1]),
linewidth=18, color='#af0b1e')
#grey vertical line
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1)],
[y_offs-0.040,y_offs+0.040],linewidth=2,color='grey')
# SPLIT LINE
ax.axhline(0.8,color='grey',linewidth=2)
ax.text(datetime(1999,1,1),0.72,'Presidency change rate')
# Remove Ylabels in Yearly change rate bar section
for k,label in enumerate(ax.get_yticklabels()):
if k<3:
label.set_visible(False)
# FOOTER LINE
ax.text(datetime(1999,1,1), 0.3, '©DATAQUEST' + ' '*35 + 'Source: European Central Bank Data, prepared by D.Chemkaeva',
color = '#f0f0f0', backgroundcolor = '#4d4d4d', size=12)
# set Xticks and labels
x_dates=[]
for yr in ['2001','2009','2017','2021']:
x_dates.append(datetime(int(yr),1,1))
ax.set_xticks(x_dates)
ax.set_xticklabels(['2001','2009','2017','2021'])
ax.grid(False)
ax.grid(True,axis='y')
plt.show()
# #### Observations:
# - The BUSH presidency showed the highest increase in the exchange rate
# - The TRUMP presidency shows an increase of roughly 27% [55%*] of the BUSH presidency
# - /!\ excluding the period of the CORONA PANADEMY from the TRUMP presidency, the increase shrinks to roughly 8% [16%*] of the BUSH era
#
# .* doubled values, to match time spans of presidency between BUSH and TRUMP
# Calculate TRUMP variations without CORONA PANDEMY
# In[21]:
# Obama presidency
obama_pres_month = euro_to_dollar['2009':'2017'].groupby(pd.PeriodIndex(euro_to_dollar['2009':'2017'].index, freq="M"))['rolling_mean'].agg(delta_per).values
obama_pres = obama_pres_month.sum()
# Trump presidency
trump_pres_month = euro_to_dollar['2017':'2021'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2021'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres = trump_pres_month.sum()
# Trump presidency
trump_pres_month_nocovid = euro_to_dollar['2017':'2020-03-01'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2020-03-01'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres_nocovid = trump_pres_month_nocovid.sum()
y_offs = 0.6
yrly_var = {2005: bush_pres,
2012: obama_pres,
2019: trump_pres,
2020: trump_pres_nocovid}
yrly_max = np.max(np.abs(np.array(list(yrly_var.values()))))
# In[22]:
yrly_var[2020]/51*120/yrly_max
# # Conclusion
# * I worked out two storytelling vizualisations, displaying EUR-USD exchange rate for a given period together with an indicator of rate variation for that given period
# * (4.1 - CORONA pandemy) we observe a strong increase of the exchange rate, similar level as during TRUMP'S 1st year in office
# * (4.2 - presidencies influence) The Bush presidencies shows a contiunous increase of the exchange rate, where the Obama presidencies show a continous decline (not reach back the initial level befor BUSH). The Trump presidency shows an increase of the exchange rate, that shrinks to 8% of the Bush era if we exclude the Corona pandemy
# ### Annex - try outs
# In[32]:
yr='2020'
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['rolling_mean']
plt.bar(yr,grouped.agg(delta_per).sum())
grouped = euro_to_dollar['2020-03':'2020-12'].groupby(pd.PeriodIndex(euro_to_dollar['2020-03':'2020-12'].index, freq="M"))['rolling_mean']
#correct 2020 as it contains only 9 months
plt.bar(yr,grouped.agg(delta_per).sum()/9*12)
plt.show()
# In[28]:
yr='2020'
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar[yr].groupby(pd.PeriodIndex(euro_to_dollar[yr].index, freq="M"))['rolling_mean']
#avg_rate = grouped.agg(p2p_max_min)
#avg_rate = grouped.agg(month_sum_var)
plt.bar(yr,grouped.agg(delta_per).sum())
#plt.bar(yr,grouped.agg(month_sum_var).sum())
#plt.plot(np.arange(12)+1,avg_rate.values,label=yr+'- '+str(np.round(avg_rate.sum()*1000)/1000))
grouped = euro_to_dollar['2020-03':'2020-12'].groupby(pd.PeriodIndex(euro_to_dollar['2020-03':'2020-12'].index, freq="M"))['rolling_mean']
plt.bar(yr,grouped.agg(delta_per).sum()/9*12)
plt.show()
# In[33]:
#grouped = euro_to_dollar[euro_to_dollar[datetime(2016)]].groupby(pd.PeriodIndex(euro_to_dollar.index, freq="M"))['rolling_mean']
plt.figure()
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['US_dollar']
plt.plot(yr,grouped.agg(avg_diff).values.max(),'s')
plt.plot(yr,grouped.agg(avg_diff).values.mean(),'o')
plt.plot(yr,grouped.agg(avg_diff).values.min(),'v')
# In[34]:
#grouped = euro_to_dollar[euro_to_dollar[datetime(2016)]].groupby(pd.PeriodIndex(euro_to_dollar.index, freq="M"))['rolling_mean']
plt.figure()
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['US_dollar']
#plt.bar(yr,grouped.agg(avg_diff))
plt.plot(np.arange(12)+1,grouped.agg(avg_diff).values,label=yr)
plt.legend()
# In[35]:
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['rolling_mean']
#avg_rate = grouped.agg(p2p_max_min)
#avg_rate = grouped.agg(month_sum_var)
plt.bar(yr,grouped.agg(delta_per).sum())