In this project, we will practice improving plot aesthetics and rendering visualizations more effective in communicating our objective.
We will be using data compiled by Randal Olson on the percentage of bachelors degrees awarded to women in the USA. The raw data can be found at the website
# Modules
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
# Data
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
# Color Palettes
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))
for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show();
Objective is to plot all three categories of degrees ordered in descending order by percentage of degrees aworded to women
# Degree Categories
stem_cats = ['Psychology', 'Biology', 'Math and Statistics',
'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English',
'Communications and Journalism', 'Art and Performance',
'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration',
'Education', 'Agriculture', 'Business', 'Architecture']
fig = plt.figure(figsize=(20, 20))
# Plot column of STEM degrees Category
#=====================================
for i in range(0,18, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index_cat]],
c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index_cat]],
c=cb_orange, label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off')
#Annotate top right and bottom right plots
if index_cat ==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
# Plot column of figures for degrees in Libral Arts
#==================================================
for i in range(2, 16, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ lib_arts_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[lib_arts_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
# Plot column of figures for degrees in other categories
# other_cats
#========================================================
for i in range(3, 19, 3):
index_cat= int(i/3) -1
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ other_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[other_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
plt.show();
To declutter the figure, remove repeating x-axis labels (years) for all plots except for the bottom subplot for each column
fig = plt.figure(figsize=(20, 20))
# Plot column of STEM degrees Category
for i in range(0,18, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index_cat]],
c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index_cat]],
c=cb_orange, label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat ==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in libral arts
for i in range(2, 16, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ lib_arts_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[lib_arts_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 4:
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in other categories
# other_cats
for i in range(3, 19, 3):
index_cat= int(i/3) -1
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ other_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[other_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
plt.show();
Further reduce clutter in our figure by removing most y-axis tick labels and retain only zero and a hundred (0, 100)
fig = plt.figure(figsize=(20, 20))
# Plot column of STEM degrees Category
for i in range(0,18, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index_cat]],
c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index_cat]],
c=cb_orange, label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(stem_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat ==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in libral arts
for i in range(2, 16, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ lib_arts_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[lib_arts_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(lib_arts_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 4:
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in other categories
# other_cats
for i in range(3, 19, 3):
index_cat= int(i/3) -1
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ other_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[other_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(other_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
plt.show();
Add a horizontal gray line at the 50-mark in the y-axis to help our readers easily see trend in degree discrepancy by gender gaps
fig = plt.figure(figsize=(20, 20))
# Plot column of STEM degrees Category
for i in range(0,18, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index_cat]],
c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index_cat]],
c=cb_orange, label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(stem_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat ==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in libral arts
for i in range(2, 16, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ lib_arts_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[lib_arts_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(lib_arts_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 4:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in other categories
# other_cats
for i in range(3, 19, 3):
index_cat= int(i/3) -1
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ other_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[other_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(other_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
plt.show();
Save the figure we generated in a separate file. Details in Documentation
fig = plt.figure(figsize=(20, 20), dpi=300)
# Plot column of STEM degrees Category
for i in range(0,18, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[index_cat]],
c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index_cat]],
c=cb_orange, label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(stem_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat ==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in libral arts
for i in range(2, 16, 3):
index_cat= int(i/3)
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ lib_arts_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[lib_arts_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(lib_arts_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 4:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
# Generate column of figures for degrees in other categories
# other_cats
for i in range(3, 19, 3):
index_cat= int(i/3) -1
ax= fig.add_subplot(6, 3, i)
ax.plot(women_degrees['Year'],
women_degrees[ other_cats[index_cat]], c=cb_dark_blue,
label='Women', linewidth=3)
ax.plot(women_degrees['Year'],
100-women_degrees[other_cats[index_cat]], c=cb_orange,
label='Men', linewidth=3)
#Remove Spines
for key, spine in ax.spines.items():
spine.set_visible(False)
ax.tick_params(top='off', bottom='off', right='off', left='off')
# axis limits and title
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_yticks([0,100])
ax.set_title(other_cats[index_cat])
ax.tick_params(top='off', bottom='off', right='off', left='off',
labelbottom='off')
# Add Horizontal line
ax.axhline(y=50,c=(171/255, 171/255, 171/255), alpha=0.3)
#Annotate top right and bottom right plots
if index_cat==0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif index_cat == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
plt.suptitle('Gender Gap In College Degrees', fontsize=20)
plt.savefig('gender_degrees.png')
plt.show();