%matplotlib inline
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib import pylab as lab
import matplotlib as mp
df = pd.read_csv('./passport_scores_2018.csv', names=['Country', 'Rank'])
df.Rank = pd.to_numeric(df.Rank)
df.set_index(df.Country, inplace=True)
df.drop(['Country'], inplace=True, axis=1)
# Pie chart to show top 20 countries in terms of passport ranking.
top_20=df.head(n=20)
labels=top_20.index.values
ranks=top_20.Rank.values
explode=tuple(0.1 for x in range(20))
fig1, ax1 = plt.subplots()
total=sum(df.Rank[0:20])
ax1.pie(ranks, explode=explode, labels=labels, autopct=lambda p: '{:.0f}'.format(p * total / 100),
shadow=True, startangle=90)
ax1.axis('equal')
plt.tight_layout()
fig=plt.gcf()
fig.set_size_inches(10,5)
# Histogram to show how many countries are within range (First 20 countries).
df.head(n=20).plot(kind='hist', subplots=True, figsize=(20,10))
array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f019b41a7f0>], dtype=object)
# Last 20 countries by passport score
bottom_20=df.tail(n=20)
labels=bottom_20.index.values
ranks=bottom_20.Rank.values
explode=tuple(0.1 for x in range(20))
fig1, ax1 = plt.subplots()
total=sum(df.Rank[len(df)-20:])
ax1.pie(ranks, explode=explode, labels=labels, autopct=lambda p: '{:.0f}'.format(p * total / 100),
shadow=True, startangle=90)
ax1.axis('equal')
plt.tight_layout()
fig=plt.gcf()
fig.set_size_inches(10,7)
# passport scores buckets.
df.plot(kind='hist')
<matplotlib.axes._subplots.AxesSubplot at 0x7f0181e7e400>
df.Rank.describe()
count 199.000000 mean 96.532663 std 44.002384 min 25.000000 25% 56.000000 50% 84.000000 75% 140.000000 max 164.000000 Name: Rank, dtype: float64
bucket_counts = df.Rank.value_counts()
df.Rank.value_counts().head(n=60).plot(kind='bar', figsize=(10,10))
<matplotlib.axes._subplots.AxesSubplot at 0x7f0180c12f98>
# scores containing more than 1 country.
bucket_counts.loc[bucket_counts[bucket_counts.index]>1].plot(kind='bar', figsize=(10,6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f01805dde10>