Animated Visualizations in Python Using Matplotlib

Importing Necessary Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
%matplotlib widget

Getting Data

In [2]:
!wget -O covid_19_clean_complete.csv https://raw.githubusercontent.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning/master/covid_19_clean_complete.csv
--2020-05-16 13:02:50--  https://raw.githubusercontent.com/imdevskp/covid_19_jhu_data_web_scrap_and_cleaning/master/covid_19_clean_complete.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.64.133, 151.101.192.133, 151.101.0.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.64.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1369217 (1.3M) [text/plain]
Saving to: ‘covid_19_clean_complete.csv’

covid_19_clean_comp 100%[===================>]   1.31M  18.1KB/s    in 2m 9s   

2020-05-16 13:05:11 (10.3 KB/s) - ‘covid_19_clean_complete.csv’ saved [1369217/1369217]

Processing Data

In [3]:
pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date']).head()
Out[3]:
Province/State Country/Region Lat Long Date Confirmed Deaths Recovered
0 NaN Afghanistan 33.0000 65.0000 2020-01-22 0 0 0
1 NaN Albania 41.1533 20.1683 2020-01-22 0 0 0
2 NaN Algeria 28.0339 1.6596 2020-01-22 0 0 0
3 NaN Andorra 42.5063 1.5218 2020-01-22 0 0 0
4 NaN Angola -11.2027 17.8739 2020-01-22 0 0 0
In [4]:
#read the dataset
covid_df = pd.read_csv('covid_19_clean_complete.csv',parse_dates=['Date'])
# Total by date and country
covid_df = covid_df.groupby(['Date','Country/Region']).agg({'Confirmed':np.sum})
# Get Top 10 Contries based on confirmed cases per date
covid_df = covid_df.groupby('Date')['Confirmed'].nlargest(10).reset_index(level=1,drop=True)
In [5]:
covid_df.head(20)
Out[5]:
Date        Country/Region
2020-01-22  China             548
            Japan               2
            Thailand            2
            South Korea         1
            Taiwan*             1
            US                  1
            Afghanistan         0
            Albania             0
            Algeria             0
            Andorra             0
2020-01-23  China             643
            Thailand            3
            Japan               2
            Vietnam             2
            Singapore           1
            South Korea         1
            Taiwan*             1
            US                  1
            Afghanistan         0
            Albania             0
Name: Confirmed, dtype: int64

Visualization

Choosing a color for a country

In [6]:
#Country is the second level index of_covid_df
countries = np.array([ country[1] for country in covid_df.index])
countries = np.unique(countries)  #Countries can be repeated on different Dates
cmap = plt.get_cmap('tab20')
colors = cmap(np.linspace(0, 1, len(countries)))
color_dict = dict(zip(countries,colors))

Plotting Bar Chart

In [10]:
plt.figure()
date = pd.to_datetime('4/22/20',format="%m/%d/%y")
xvals = covid_df.loc[date].index
data = covid_df.loc[date].values
bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
plt.suptitle('Cumulative Confirmed Covid-19 Cases')
plt.title(date.strftime("%d %b %Y"))
ax = plt.gca()
ax.invert_yaxis()

Improving the plot

In [11]:
plt.figure()
date = pd.to_datetime('4/22/20',format="%m/%d/%y")
xvals = covid_df.loc[date].index
data = covid_df.loc[date].values
bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
plt.suptitle('Cumulative Confirmed Covid-19 Cases')
plt.title(date.strftime("%d %b %Y"))
ax = plt.gca()
ax.invert_yaxis()

# Removing borders
for spine in ax.spines.values():
    spine.set_visible(False)
# Removing Tickmarks and values in X-axis
plt.tick_params(left=False, bottom=False, labelbottom=False)
# Labelling The bars directly
for bar in bars:
    ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, '  ' + str(bar.get_width()), va='center')

Animating the plot

In [12]:
# Each uniuqe date we have in covid_df
dates = np.sort(np.unique(covid_df.index.get_level_values(level=0)))
n = len(dates)

def update(curr):
    if curr == n:
        # Last date we have in covid_df
        a.event_source.stop()
    plt.cla()
    date = dates[curr]
    xvals = covid_df.loc[date].index
    data = covid_df.loc[date].values
    bars = plt.barh(xvals,data,color=[ color_dict[country] for country in xvals])
    plt.suptitle('Cumulative Confirmed Covid-19 Cases')
    plt.title(pd.to_datetime(date).strftime("%d %b %Y"))
    ax = plt.gca()
    ax.invert_yaxis()
    # Removing borders
    for spine in ax.spines.values():
        spine.set_visible(False)
    # Removing Tickmarks and values in X-axis
    plt.tick_params(left=False, bottom=False, labelbottom=False)
    # Labelling The bars directly
    for bar in bars:
        ax.text(bar.get_width(), bar.get_y() + bar.get_height()/2, '  ' + str(bar.get_width()), va='center')
fig = plt.figure(figsize=[11,5]) #Adjusting margins
a = animation.FuncAnimation(fig, update, interval=100, frames=n,repeat=False)

Saving the animation

In [13]:
a.save('final.mp4')