For this guided project we used a data provided by the European Central Bank that was gathered and structured by D.Chemkaeva into a *.csv file. Link: https://www.kaggle.com/lsind18/euro-exchange-daily-rates-19992020
The Scope of this guided project to find in a first step some ideas for storytelling vizualisations:
Import Packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.style as style
from datetime import datetime
from pandas.tseries.offsets import DateOffset
%matplotlib inline
Load CSV data of exchange rates 1999-2021
exchange_rates = pd.read_csv('euro-daily-hist_1999_2020.csv')
exchange_rates.head()
Period\Unit: | [Australian dollar ] | [Bulgarian lev ] | [Brazilian real ] | [Canadian dollar ] | [Swiss franc ] | [Chinese yuan renminbi ] | [Cypriot pound ] | [Czech koruna ] | [Danish krone ] | ... | [Romanian leu ] | [Russian rouble ] | [Swedish krona ] | [Singapore dollar ] | [Slovenian tolar ] | [Slovak koruna ] | [Thai baht ] | [Turkish lira ] | [US dollar ] | [South African rand ] | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2021-01-08 | 1.5758 | 1.9558 | 6.5748 | 1.5543 | 1.0827 | 7.9184 | NaN | 26.163 | 7.4369 | ... | 4.8708 | 90.8000 | 10.0510 | 1.6228 | NaN | NaN | 36.8480 | 9.0146 | 1.2250 | 18.7212 |
1 | 2021-01-07 | 1.5836 | 1.9558 | 6.5172 | 1.5601 | 1.0833 | 7.9392 | NaN | 26.147 | 7.4392 | ... | 4.8712 | 91.2000 | 10.0575 | 1.6253 | NaN | NaN | 36.8590 | 8.9987 | 1.2276 | 18.7919 |
2 | 2021-01-06 | 1.5824 | 1.9558 | 6.5119 | 1.5640 | 1.0821 | 7.9653 | NaN | 26.145 | 7.4393 | ... | 4.8720 | 90.8175 | 10.0653 | 1.6246 | NaN | NaN | 36.9210 | 9.0554 | 1.2338 | 18.5123 |
3 | 2021-01-05 | 1.5927 | 1.9558 | 6.5517 | 1.5651 | 1.0803 | 7.9315 | NaN | 26.227 | 7.4387 | ... | 4.8721 | 91.6715 | 10.0570 | 1.6180 | NaN | NaN | 36.7760 | 9.0694 | 1.2271 | 18.4194 |
4 | 2021-01-04 | 1.5928 | 1.9558 | 6.3241 | 1.5621 | 1.0811 | 7.9484 | NaN | 26.141 | 7.4379 | ... | 4.8713 | 90.3420 | 10.0895 | 1.6198 | NaN | NaN | 36.7280 | 9.0579 | 1.2296 | 17.9214 |
5 rows × 41 columns
exchange_rates.tail()
Period\Unit: | [Australian dollar ] | [Bulgarian lev ] | [Brazilian real ] | [Canadian dollar ] | [Swiss franc ] | [Chinese yuan renminbi ] | [Cypriot pound ] | [Czech koruna ] | [Danish krone ] | ... | [Romanian leu ] | [Russian rouble ] | [Swedish krona ] | [Singapore dollar ] | [Slovenian tolar ] | [Slovak koruna ] | [Thai baht ] | [Turkish lira ] | [US dollar ] | [South African rand ] | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5694 | 1999-01-08 | 1.8406 | NaN | NaN | 1.7643 | 1.6138 | NaN | 0.58187 | 34.938 | 7.4433 | ... | 1.3143 | 27.2075 | 9.1650 | 1.9537 | 188.8400 | 42.560 | 42.5590 | 0.3718 | 1.1659 | 6.7855 |
5695 | 1999-01-07 | 1.8474 | NaN | NaN | 1.7602 | 1.6165 | NaN | 0.58187 | 34.886 | 7.4431 | ... | 1.3092 | 26.9876 | 9.1800 | 1.9436 | 188.8000 | 42.765 | 42.1678 | 0.3701 | 1.1632 | 6.8283 |
5696 | 1999-01-06 | 1.8820 | NaN | NaN | 1.7711 | 1.6116 | NaN | 0.58200 | 34.850 | 7.4452 | ... | 1.3168 | 27.4315 | 9.3050 | 1.9699 | 188.7000 | 42.778 | 42.6949 | 0.3722 | 1.1743 | 6.7307 |
5697 | 1999-01-05 | 1.8944 | NaN | NaN | 1.7965 | 1.6123 | NaN | 0.58230 | 34.917 | 7.4495 | ... | 1.3168 | 26.5876 | 9.4025 | 1.9655 | 188.7750 | 42.848 | 42.5048 | 0.3728 | 1.1790 | 6.7975 |
5698 | 1999-01-04 | 1.9100 | NaN | NaN | 1.8004 | 1.6168 | NaN | 0.58231 | 35.107 | 7.4501 | ... | 1.3111 | 25.2875 | 9.4696 | 1.9554 | 189.0450 | 42.991 | 42.6799 | 0.3723 | 1.1789 | 6.9358 |
5 rows × 41 columns
exchange_rates.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5699 entries, 0 to 5698 Data columns (total 41 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Period\Unit: 5699 non-null object 1 [Australian dollar ] 5699 non-null object 2 [Bulgarian lev ] 5297 non-null object 3 [Brazilian real ] 5431 non-null object 4 [Canadian dollar ] 5699 non-null object 5 [Swiss franc ] 5699 non-null object 6 [Chinese yuan renminbi ] 5431 non-null object 7 [Cypriot pound ] 2346 non-null object 8 [Czech koruna ] 5699 non-null object 9 [Danish krone ] 5699 non-null object 10 [Estonian kroon ] 3130 non-null object 11 [UK pound sterling ] 5699 non-null object 12 [Greek drachma ] 520 non-null object 13 [Hong Kong dollar ] 5699 non-null object 14 [Croatian kuna ] 5431 non-null object 15 [Hungarian forint ] 5699 non-null object 16 [Indonesian rupiah ] 5699 non-null object 17 [Israeli shekel ] 5431 non-null object 18 [Indian rupee ] 5431 non-null object 19 [Iceland krona ] 3292 non-null float64 20 [Japanese yen ] 5699 non-null object 21 [Korean won ] 5699 non-null object 22 [Lithuanian litas ] 4159 non-null object 23 [Latvian lats ] 3904 non-null object 24 [Maltese lira ] 2346 non-null object 25 [Mexican peso ] 5699 non-null object 26 [Malaysian ringgit ] 5699 non-null object 27 [Norwegian krone ] 5699 non-null object 28 [New Zealand dollar ] 5699 non-null object 29 [Philippine peso ] 5699 non-null object 30 [Polish zloty ] 5699 non-null object 31 [Romanian leu ] 5637 non-null float64 32 [Russian rouble ] 5699 non-null object 33 [Swedish krona ] 5699 non-null object 34 [Singapore dollar ] 5699 non-null object 35 [Slovenian tolar ] 2085 non-null object 36 [Slovak koruna ] 2608 non-null object 37 [Thai baht ] 5699 non-null object 38 [Turkish lira ] 5637 non-null float64 39 [US dollar ] 5699 non-null object 40 [South African rand ] 5699 non-null object dtypes: float64(3), object(38) memory usage: 1.8+ MB
exchange_rates.rename(columns={'[US dollar ]': 'US_dollar',
'Period\\Unit:': 'Time'},
inplace=True)
exchange_rates['Time'] = pd.to_datetime(exchange_rates['Time'])
exchange_rates.sort_values('Time', inplace=True)
exchange_rates.reset_index(drop=True, inplace=True)
Re-Index with Time/datetime format column
euro_to_dollar = exchange_rates.set_index(['Time'])
Check content of US_dollar column
euro_to_dollar['US_dollar'].value_counts()
- 62 1.2276 9 1.1215 8 1.1305 7 1.1346 6 .. 1.2005 1 0.9276 1 0.8386 1 1.3067 1 1.2620 1 Name: US_dollar, Length: 3528, dtype: int64
Drop empty rows and convert to float
euro_to_dollar.drop(euro_to_dollar[euro_to_dollar['US_dollar']=='-'].index,inplace=True)
euro_to_dollar['US_dollar'] = euro_to_dollar['US_dollar'].astype(float)
euro_to_dollar['rolling_mean'] = euro_to_dollar['US_dollar'].rolling(30).mean()
plt.subplot(2,1,1)
plt.plot(euro_to_dollar['US_dollar'])
plt.subplot(2,1,2)
plt.plot(euro_to_dollar['rolling_mean'])
plt.show()
style.use('fivethirtyeight')
def avg_diff(vect):
return np.median(np.gradient(vect))
def p2p_max_min(vect):
return np.max(vect)-np.min(vect)
def month_sum_var(vect):
return np.sum(np.diff(vect))
def month_sum_abs_var(vect):
return np.sum(np.abs(np.diff(vect)))
def delta_per(vect):
return vect[-1]-vect[0]
fig, ax = plt.subplots(figsize=(9, 5))
ax.axvline(datetime(2020,8,1),linewidth=85,
color='#acb0a9',alpha=.5)
ax.plot(euro_to_dollar['2016':'2020-12'].rolling_mean,
color='grey')
ax.plot(euro_to_dollar['2020-03':'2020-12'].rolling_mean)
# TITLE and SUBTITLE
ax.text(datetime(2015,5,1),1.30,'EURO to US Dollar Exchange Rate',
size=17,weight='bold')
ax.text(datetime(2015,5,1),1.28,'Exchange rate change during CORONA pandemy')
## Summed monthly variations per year
#calculate monthly variation
yrly_var_month = pd.DataFrame([])
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar[yr].groupby(pd.PeriodIndex(euro_to_dollar[yr].index, freq="M"))['rolling_mean']
yrly_var_month[yr] = grouped.agg(delta_per).values
yrly_var = yrly_var_month.sum() # sum up monthly variations to yearly
yrly_max = yrly_var.max() # get max value of over all years
# graph yearly variation
y_offs = 0.95
for yr in yrly_var.keys(): # loop over years
if yrly_var[yr]>=0: # for positive yearly variation -> blue bar
# blue positive bar
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1) + np.round(yrly_var[yr]/yrly_max*30)*DateOffset(days=6)],
y_offs*np.array([1,1]),
linewidth=18, color='#033BC7')
#grey vertical line
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1)],
[y_offs-0.015,y_offs+0.015],linewidth=2,color='grey')
else: # for negative yearly variation -> red bar
# red bar
ax.plot([datetime(int(yr),7,1) + np.round(yrly_var[yr]/yrly_max*30)*DateOffset(days=6),
datetime(int(yr),7,1)],
y_offs*np.array([1,1]),
linewidth=18, color='#af0b1e')
#grey vertical line
ax.plot([datetime(int(yr),7,1),
datetime(int(yr),7,1)],
[y_offs-0.015,y_offs+0.015],linewidth=2,color='grey')
ax.text(datetime(2015,5,1),0.98,'Yearly change rate')
# SPLIT LINE
ax.axhline(1,color='grey',linewidth=2)
# Remove Ylabels in Yearly change rate bar section
for k,label in enumerate(ax.get_yticklabels()):
if k<3:
label.set_visible(False)
# FOOTER LINE
ax.text(datetime(2015,5,1), 0.87, '©DATAQUEST' + ' '*35 + 'Source: European Central Bank Data, prepared by D.Chemkaeva',
color = '#f0f0f0', backgroundcolor = '#4d4d4d', size=12)
# Set Ygrid
ax.grid(False)
ax.grid(True,axis='x')
plt.show()
fig, ax = plt.subplots(figsize=(9, 5))
#ax.axvline(datetime(2020,8,1),linewidth=85,
# color='#acb0a9',alpha=.5)
ax.text(datetime(2003,1,1),1.7,'Bush',
size=17, weight='bold',
color='magenta')
ax.text(datetime(2002,6,1),1.65,'(2001 - 2009)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2001':'2009'].rolling_mean,
color='magenta')
ax.axvline(datetime(2009,1,1),linewidth=3,
color='#acb0a9',alpha=.5)
ax.text(datetime(2012,1,1),1.7,'Obama',
size=17, weight='bold',
color='orange')
ax.text(datetime(2011,11,1),1.65,'(2009 - 2017)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2009':'2017'].rolling_mean,
color='orange')
ax.axvline(datetime(2017,1,1),linewidth=3,
color='#acb0a9',alpha=.5)
ax.text(datetime(2019,1,1),1.7,'Trump',
size=17, weight='bold',
color='deepskyblue')
ax.text(datetime(2018,10,1),1.65,'(2017 - 2021)',
size=10, weight='bold',
color='grey')
ax.plot(euro_to_dollar['2017':'2021'].rolling_mean,
color='deepskyblue')
# Bush presidency
bush_pres_month = euro_to_dollar['2001':'2009'].groupby(pd.PeriodIndex(euro_to_dollar['2001':'2009'].index, freq="M"))['rolling_mean'].agg(delta_per).values
bush_pres = bush_pres_month.sum()
# Obama presidency
obama_pres_month = euro_to_dollar['2009':'2017'].groupby(pd.PeriodIndex(euro_to_dollar['2009':'2017'].index, freq="M"))['rolling_mean'].agg(delta_per).values
obama_pres = obama_pres_month.sum()
# Trump presidency
trump_pres_month = euro_to_dollar['2017':'2021'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2021'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres = trump_pres_month.sum()
# Trump presidency excluding CORONA PANDEMY
trump_pres_month_nocovid = euro_to_dollar['2017':'2020-03-01'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2020-03-01'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres_nocovid = trump_pres_month_nocovid.sum()
y_offs = 0.6
yrly_var = {2005: bush_pres,
2012: obama_pres,
2019: trump_pres}
yrly_max = np.max(np.abs(np.array(list(yrly_var.values())))) # max(yrly_var.values())
for yr in yrly_var.keys():
if yrly_var[yr]>=0:
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1) +
np.round(yrly_var[yr]/yrly_max*48)*DateOffset(months=1)],
y_offs*np.array([1,1]),
linewidth=18, color='#033BC7')
#grey vertical line
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1)],
[y_offs-0.040,y_offs+0.040],linewidth=2,color='grey')
else:
ax.plot([datetime(int(yr),1,1) +
np.round(yrly_var[yr]/yrly_max*48)*DateOffset(months=1),
datetime(int(yr),1,1)],
y_offs*np.array([1,1]),
linewidth=18, color='#af0b1e')
#grey vertical line
ax.plot([datetime(int(yr),1,1),
datetime(int(yr),1,1)],
[y_offs-0.040,y_offs+0.040],linewidth=2,color='grey')
# SPLIT LINE
ax.axhline(0.8,color='grey',linewidth=2)
ax.text(datetime(1999,1,1),0.72,'Presidency change rate')
# Remove Ylabels in Yearly change rate bar section
for k,label in enumerate(ax.get_yticklabels()):
if k<3:
label.set_visible(False)
# FOOTER LINE
ax.text(datetime(1999,1,1), 0.3, '©DATAQUEST' + ' '*35 + 'Source: European Central Bank Data, prepared by D.Chemkaeva',
color = '#f0f0f0', backgroundcolor = '#4d4d4d', size=12)
# set Xticks and labels
x_dates=[]
for yr in ['2001','2009','2017','2021']:
x_dates.append(datetime(int(yr),1,1))
ax.set_xticks(x_dates)
ax.set_xticklabels(['2001','2009','2017','2021'])
ax.grid(False)
ax.grid(True,axis='y')
plt.show()
Calculate TRUMP variations without CORONA PANDEMY
# Obama presidency
obama_pres_month = euro_to_dollar['2009':'2017'].groupby(pd.PeriodIndex(euro_to_dollar['2009':'2017'].index, freq="M"))['rolling_mean'].agg(delta_per).values
obama_pres = obama_pres_month.sum()
# Trump presidency
trump_pres_month = euro_to_dollar['2017':'2021'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2021'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres = trump_pres_month.sum()
# Trump presidency
trump_pres_month_nocovid = euro_to_dollar['2017':'2020-03-01'].groupby(pd.PeriodIndex(euro_to_dollar['2017':'2020-03-01'].index, freq="M"))['rolling_mean'].agg(delta_per).values
trump_pres_nocovid = trump_pres_month_nocovid.sum()
y_offs = 0.6
yrly_var = {2005: bush_pres,
2012: obama_pres,
2019: trump_pres,
2020: trump_pres_nocovid}
yrly_max = np.max(np.abs(np.array(list(yrly_var.values()))))
yrly_var[2020]/51*120/yrly_max
0.1638235715872292
yr='2020'
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['rolling_mean']
plt.bar(yr,grouped.agg(delta_per).sum())
grouped = euro_to_dollar['2020-03':'2020-12'].groupby(pd.PeriodIndex(euro_to_dollar['2020-03':'2020-12'].index, freq="M"))['rolling_mean']
#correct 2020 as it contains only 9 months
plt.bar(yr,grouped.agg(delta_per).sum()/9*12)
plt.show()
yr='2020'
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar[yr].groupby(pd.PeriodIndex(euro_to_dollar[yr].index, freq="M"))['rolling_mean']
#avg_rate = grouped.agg(p2p_max_min)
#avg_rate = grouped.agg(month_sum_var)
plt.bar(yr,grouped.agg(delta_per).sum())
#plt.bar(yr,grouped.agg(month_sum_var).sum())
#plt.plot(np.arange(12)+1,avg_rate.values,label=yr+'- '+str(np.round(avg_rate.sum()*1000)/1000))
grouped = euro_to_dollar['2020-03':'2020-12'].groupby(pd.PeriodIndex(euro_to_dollar['2020-03':'2020-12'].index, freq="M"))['rolling_mean']
plt.bar(yr,grouped.agg(delta_per).sum()/9*12)
plt.show()
#grouped = euro_to_dollar[euro_to_dollar[datetime(2016)]].groupby(pd.PeriodIndex(euro_to_dollar.index, freq="M"))['rolling_mean']
plt.figure()
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['US_dollar']
plt.plot(yr,grouped.agg(avg_diff).values.max(),'s')
plt.plot(yr,grouped.agg(avg_diff).values.mean(),'o')
plt.plot(yr,grouped.agg(avg_diff).values.min(),'v')
#grouped = euro_to_dollar[euro_to_dollar[datetime(2016)]].groupby(pd.PeriodIndex(euro_to_dollar.index, freq="M"))['rolling_mean']
plt.figure()
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['US_dollar']
#plt.bar(yr,grouped.agg(avg_diff))
plt.plot(np.arange(12)+1,grouped.agg(avg_diff).values,label=yr)
plt.legend()
<matplotlib.legend.Legend at 0x7f9637a0e340>
for yr in ['2016','2017','2018','2019','2020']:
grouped = euro_to_dollar.loc[yr].groupby(pd.PeriodIndex(euro_to_dollar.loc[yr].index, freq="M"))['rolling_mean']
#avg_rate = grouped.agg(p2p_max_min)
#avg_rate = grouped.agg(month_sum_var)
plt.bar(yr,grouped.agg(delta_per).sum())