import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.style as style
%matplotlib inline
exchange_rates = pd.read_csv('euro-daily-hist_1999_2020.csv')
print(exchange_rates.info())
print(exchange_rates.head())
print(exchange_rates.tail())
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5699 entries, 0 to 5698 Data columns (total 41 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Period\Unit: 5699 non-null object 1 [Australian dollar ] 5699 non-null object 2 [Bulgarian lev ] 5297 non-null object 3 [Brazilian real ] 5431 non-null object 4 [Canadian dollar ] 5699 non-null object 5 [Swiss franc ] 5699 non-null object 6 [Chinese yuan renminbi ] 5431 non-null object 7 [Cypriot pound ] 2346 non-null object 8 [Czech koruna ] 5699 non-null object 9 [Danish krone ] 5699 non-null object 10 [Estonian kroon ] 3130 non-null object 11 [UK pound sterling ] 5699 non-null object 12 [Greek drachma ] 520 non-null object 13 [Hong Kong dollar ] 5699 non-null object 14 [Croatian kuna ] 5431 non-null object 15 [Hungarian forint ] 5699 non-null object 16 [Indonesian rupiah ] 5699 non-null object 17 [Israeli shekel ] 5431 non-null object 18 [Indian rupee ] 5431 non-null object 19 [Iceland krona ] 3292 non-null float64 20 [Japanese yen ] 5699 non-null object 21 [Korean won ] 5699 non-null object 22 [Lithuanian litas ] 4159 non-null object 23 [Latvian lats ] 3904 non-null object 24 [Maltese lira ] 2346 non-null object 25 [Mexican peso ] 5699 non-null object 26 [Malaysian ringgit ] 5699 non-null object 27 [Norwegian krone ] 5699 non-null object 28 [New Zealand dollar ] 5699 non-null object 29 [Philippine peso ] 5699 non-null object 30 [Polish zloty ] 5699 non-null object 31 [Romanian leu ] 5637 non-null float64 32 [Russian rouble ] 5699 non-null object 33 [Swedish krona ] 5699 non-null object 34 [Singapore dollar ] 5699 non-null object 35 [Slovenian tolar ] 2085 non-null object 36 [Slovak koruna ] 2608 non-null object 37 [Thai baht ] 5699 non-null object 38 [Turkish lira ] 5637 non-null float64 39 [US dollar ] 5699 non-null object 40 [South African rand ] 5699 non-null object dtypes: float64(3), object(38) memory usage: 1.8+ MB None Period\Unit: [Australian dollar ] [Bulgarian lev ] [Brazilian real ] \ 0 2021-01-08 1.5758 1.9558 6.5748 1 2021-01-07 1.5836 1.9558 6.5172 2 2021-01-06 1.5824 1.9558 6.5119 3 2021-01-05 1.5927 1.9558 6.5517 4 2021-01-04 1.5928 1.9558 6.3241 [Canadian dollar ] [Swiss franc ] [Chinese yuan renminbi ] [Cypriot pound ] \ 0 1.5543 1.0827 7.9184 NaN 1 1.5601 1.0833 7.9392 NaN 2 1.5640 1.0821 7.9653 NaN 3 1.5651 1.0803 7.9315 NaN 4 1.5621 1.0811 7.9484 NaN [Czech koruna ] [Danish krone ] ... [Romanian leu ] [Russian rouble ] \ 0 26.163 7.4369 ... 4.8708 90.8000 1 26.147 7.4392 ... 4.8712 91.2000 2 26.145 7.4393 ... 4.8720 90.8175 3 26.227 7.4387 ... 4.8721 91.6715 4 26.141 7.4379 ... 4.8713 90.3420 [Swedish krona ] [Singapore dollar ] [Slovenian tolar ] [Slovak koruna ] \ 0 10.0510 1.6228 NaN NaN 1 10.0575 1.6253 NaN NaN 2 10.0653 1.6246 NaN NaN 3 10.0570 1.6180 NaN NaN 4 10.0895 1.6198 NaN NaN [Thai baht ] [Turkish lira ] [US dollar ] [South African rand ] 0 36.8480 9.0146 1.2250 18.7212 1 36.8590 8.9987 1.2276 18.7919 2 36.9210 9.0554 1.2338 18.5123 3 36.7760 9.0694 1.2271 18.4194 4 36.7280 9.0579 1.2296 17.9214 [5 rows x 41 columns] Period\Unit: [Australian dollar ] [Bulgarian lev ] [Brazilian real ] \ 5694 1999-01-08 1.8406 NaN NaN 5695 1999-01-07 1.8474 NaN NaN 5696 1999-01-06 1.8820 NaN NaN 5697 1999-01-05 1.8944 NaN NaN 5698 1999-01-04 1.9100 NaN NaN [Canadian dollar ] [Swiss franc ] [Chinese yuan renminbi ] \ 5694 1.7643 1.6138 NaN 5695 1.7602 1.6165 NaN 5696 1.7711 1.6116 NaN 5697 1.7965 1.6123 NaN 5698 1.8004 1.6168 NaN [Cypriot pound ] [Czech koruna ] [Danish krone ] ... [Romanian leu ] \ 5694 0.58187 34.938 7.4433 ... 1.3143 5695 0.58187 34.886 7.4431 ... 1.3092 5696 0.58200 34.850 7.4452 ... 1.3168 5697 0.58230 34.917 7.4495 ... 1.3168 5698 0.58231 35.107 7.4501 ... 1.3111 [Russian rouble ] [Swedish krona ] [Singapore dollar ] \ 5694 27.2075 9.1650 1.9537 5695 26.9876 9.1800 1.9436 5696 27.4315 9.3050 1.9699 5697 26.5876 9.4025 1.9655 5698 25.2875 9.4696 1.9554 [Slovenian tolar ] [Slovak koruna ] [Thai baht ] [Turkish lira ] \ 5694 188.8400 42.560 42.5590 0.3718 5695 188.8000 42.765 42.1678 0.3701 5696 188.7000 42.778 42.6949 0.3722 5697 188.7750 42.848 42.5048 0.3728 5698 189.0450 42.991 42.6799 0.3723 [US dollar ] [South African rand ] 5694 1.1659 6.7855 5695 1.1632 6.8283 5696 1.1743 6.7307 5697 1.1790 6.7975 5698 1.1789 6.9358 [5 rows x 41 columns]
PROJECT: Storytelling Data Visualization on Exchange Rates The project consists on applying data explanatory techniques. The dataset consists of 40 columns and 5699 rows. Some of the currencies contain considerable amounts of missing values (more than 90%). Eventhough the datatype looks as a float most of them are recorded as an object (str)
#Cleaning the data to isolate the variation between the dollar and euro acrosstime
exchange_rates.rename(columns = {'[US dollar ]':'US_dollar','Period\\Unit:':'Time'}, inplace = True)
exchange_rates['Time'] = pd.to_datetime(exchange_rates['Time'])
exchange_rates.sort_values('Time', inplace = True)
exchange_rates.reset_index(drop=True, inplace=True)
euro_to_dollar = exchange_rates[['Time', 'US_dollar']].copy()
euro_to_dollar['US_dollar'].value_counts()
euro_to_dollar = euro_to_dollar[euro_to_dollar['US_dollar'] != '-']
euro_to_dollar['US_dollar']= euro_to_dollar['US_dollar'].astype('float')
euro_to_dollar.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 5637 entries, 0 to 5698 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Time 5637 non-null datetime64[ns] 1 US_dollar 5637 non-null float64 dtypes: datetime64[ns](1), float64(1) memory usage: 132.1 KB
plt.plot(euro_to_dollar['Time'], euro_to_dollar['US_dollar'])
plt.show()
##Generating a moving mean to soften the variation in the data.
euro_to_dollar['rolling_mean'] = euro_to_dollar['US_dollar'].rolling(30).mean()
plt.plot(euro_to_dollar['Time'], euro_to_dollar['rolling_mean'])
plt.show()
STORYLINE: The story I will try to explain is comparing the euro-dollar rate change under the last three US presidents:
##Create 3 different dataframe for each presidential term
bush_term = euro_to_dollar[(euro_to_dollar['Time'].dt.year >= 2001) & (euro_to_dollar['Time'].dt.year < 2009)]
obama_term = euro_to_dollar[(euro_to_dollar['Time'].dt.year >= 2009) & (euro_to_dollar['Time'].dt.year < 2017)]
trump_term = euro_to_dollar[(euro_to_dollar['Time'].dt.year >= 2017) & (euro_to_dollar['Time'].dt.year < 2021)]
all_terms = euro_to_dollar[(euro_to_dollar['Time'].dt.year >= 2001) & (euro_to_dollar['Time'].dt.year < 2021)]
##print(bush_term.info())
##print(obama_term.info())
##print(trump_term.info())
print(all_terms.info())
<class 'pandas.core.frame.DataFrame'> Int64Index: 5118 entries, 521 to 5693 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Time 5118 non-null datetime64[ns] 1 US_dollar 5118 non-null float64 2 rolling_mean 5118 non-null float64 dtypes: datetime64[ns](1), float64(2) memory usage: 159.9 KB None
##Method 1 of creating grid charts
##Create a grid chart (1 row, 3 columns)to separate the behavior in each term
fig, (ax1, ax2, ax3) = plt.subplots(1,3,figsize = (16,4))
axes = [ax1, ax2, ax3]
terms = [bush_term, obama_term, trump_term]
color = ['#BF5FFF', '#ffa500', '#00B2EE']
style.use('fivethirtyeight')
for ax, t, c in zip(axes, terms, color):
ax.plot(t['Time'], t['rolling_mean'], color = c)
ax.grid(alpha = 0.5)
ax.set_ylim(0.8,1.7)
ax.set_yticks([1.0, 1.2, 1.4, 1.6])
ax.set_yticklabels(['1.0','1.2','1.4','1.6'], alpha = 0.3)
##Bush Grid
ax1.set_xticklabels(['','2001','', '2003','','2005','','2007','','2009'], alpha = 0.3)
ax1.text(731482,1.8, 'BUSH', fontsize = 18, weight = 'bold', color = '#BF5FFF')
ax1.text(731200,1.72, '(2001-2009)', alpha = 0.3, weight = 'bold')
##Obama Grid
ax2.set_xticklabels(['','2009','', '2011','','2013','','2015','','2017'], alpha = 0.3)
ax2.text(734303,1.8, 'OBAMA', fontsize = 18, weight = 'bold', color = '#ffa500')
ax2.text(734121,1.72, '(2009-2017)', alpha = 0.3, weight = 'bold')
##Trump Grid
ax3.set_xticklabels(['','2017','', '2018','','2019','','2020','','2021'], alpha = 0.3)
ax3.text(736778,1.8, 'TRUMP', fontsize = 18, weight = 'bold', color = '#00B2EE')
ax3.text(736678,1.72, '(2017-2021)', alpha = 0.3, weight = 'bold')
##Adding Titles and Subtitles
ax1.text(732000,2.0, 'EURO-ISD rate averaged 1.22 under the last three US presidents', fontsize = 20, weight = 'bold')
ax2.text(730000,1.9, 'EURO-ISD exchange rates under George W. Bush(2001-2009), Barack Obama(2009-2017), and Donald Trump(2017-2021)', fontsize = 16)
plt.show()
#Creating the grid for the sum of all terms
plt.figure(figsize=(10,4))
ax4 = plt.subplot(2,1,2)
ax4.grid(alpha = 0.5)
ax4.set_ylim(0.8,1.7)
ax4.set_yticks([1.0, 1.2, 1.4, 1.6])
ax4.set_yticklabels(['1.0','1.2','1.4','1.6'], alpha = 0.3)
ax4.plot(bush_term['Time'], bush_term['rolling_mean'], color = '#BF5FFF' )
ax4.plot(obama_term['Time'], obama_term['rolling_mean'], color = '#ffa500')
ax4.plot(trump_term['Time'], trump_term['rolling_mean'], color = '#00B2EE')
ax4.set_xticklabels([])
ax4.text(729000, 0.5, '@DATAQUEST' + ' '*103 + 'Source: European Central Bank',
color = '#f0f0f0', backgroundcolor = '#4d4d4d',
size=14)
plt.show()
##Method 2 of creating grid charts
##Create a grid chart (2 row, 3 columns)to separate the behavior in each term and one last graph for all terms
style.use('fivethirtyeight')
plt.figure(figsize=(12,6))
ax1 = plt.subplot(2,3,1)
ax2 = plt.subplot(2,3,2)
ax3 = plt.subplot(2,3,3)
ax4 = plt.subplot(2,1,2)
axes = [ax1, ax2, ax3, ax4]
for ax in axes:
ax.grid(alpha = 0.5)
ax.set_ylim(0.8,1.7)
ax.set_yticks([1.0, 1.2, 1.4, 1.6])
ax.set_yticklabels(['1.0','1.2','1.4','1.6'], alpha = 0.3)
##Bush Grid
ax1.plot(bush_term['Time'], bush_term['rolling_mean'], color ='#BF5FFF')
ax1.set_xticklabels(['','2001','', '2003','','2005','','2007','','2009'], alpha = 0.3)
ax1.text(731482,1.8, 'BUSH', fontsize = 16, weight = 'bold', color = '#BF5FFF')
ax1.text(731200,1.72, '(2001-2009)', alpha = 0.3, weight = 'bold')
##Obama Grid
ax2.plot(obama_term['Time'], obama_term['rolling_mean'], color = '#ffa500')
ax2.set_xticklabels(['','2009','', '2011','','2013','','2015','','2017'], alpha = 0.3)
ax2.text(734303,1.8, 'OBAMA', fontsize = 16, weight = 'bold', color = '#ffa500')
ax2.text(734121,1.72, '(2009-2017)', alpha = 0.3, weight = 'bold')
##Trump Grid
ax3.plot(trump_term['Time'], trump_term['rolling_mean'], color = '#00B2EE')
ax3.set_xticklabels(['','2017','', '2018','','2019','','2020','','2021'], alpha = 0.3)
ax3.text(736778,1.8, 'TRUMP', fontsize = 16, weight = 'bold', color = '#00B2EE')
ax3.text(736678,1.72, '(2017-2021)', alpha = 0.3, weight = 'bold')
##Adding Titles and Subtitles
ax1.text(730000,2.0, 'EURO-ISD rate averaged 1.22 under the last three US presidents', fontsize = 20, weight = 'bold')
ax1.text(730000,1.9, 'EURO-ISD exchange rates under George W. Bush(2001-2009), Barack Obama(2009-2017), and Donald Trump(2017-2021)', fontsize = 14)
#All term
ax4.plot(bush_term['Time'], bush_term['rolling_mean'], color = '#BF5FFF' )
ax4.plot(obama_term['Time'], obama_term['rolling_mean'], color = '#ffa500')
ax4.plot(trump_term['Time'], trump_term['rolling_mean'], color = '#00B2EE')
ax4.set_xticklabels([])
ax4.text(729000, 0.5, '@DATAQUEST' + ' '*140 + 'Source: European Central Bank',
color = '#f0f0f0', backgroundcolor = '#4d4d4d',
size=14)
plt.show()
CONCLUSION: I decided to develop the grid in 2 methods:
Both ways have their pros and cons, but I would prefer to go with the first method.