A visualization of the gender gap in 17 different degree fields from 1970 to 2012.
The Department of Education Statistics releases a data set annually containing the percentage of bachelor's degrees granted to women from 1970 to 2012. The data set is broken up into 17 categories of degrees, with each column as a separate category.
Below I will plot a line chart of each column using the various plot aesthetic techniques available on the Python modules Matplotlib and Pandas.
# Import modules
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
# Read dataset into Pandas dataframe
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
# Line colours
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
# Organise degree categories into lists
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
# Set figure size
fig = plt.figure(figsize=(12, 20))
# Loop through Stem column adding a plot for each degree category
position = -2
for sp in range(0,6):
position += 3
ax = fig.add_subplot(6,3,position)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
# Remove spines
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
# Set x and y axis limits
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# Only label 0 and 100 on y axis
ax.set_yticks([0, 100])
# Horizontal line at 50%
ax.axhline(50,c= (171/255, 171/255/ 127, 171/255), alpha=0.3)
# Remove unwanted x ticks
if position == 16:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=True)
else:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False)
# Set title
ax.set_title(stem_cats[sp])
# Label the lines on top and bottom charts
if position == 1:
ax.text(2000, 90, 'Men')
ax.text(1990, 7, 'Women')
elif position == 16:
ax.text(1975, 63, 'Men')
ax.text(1980, 36, 'Women')
# Loop through other column adding a plot for each degree category
position_1 = 0
for tp in range(0, 6):
position_1 += 3
ax = fig.add_subplot(6,3, position_1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[tp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[tp]], c=cb_orange, label='Men', linewidth=3)
# Remove spines
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
# Set x and y axis limits
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# Only label 0 and 100 on y axis
ax.set_yticks([0, 100])
# Horizontal line at 50%
ax.axhline(50,c= (171/255, 171/255/ 127, 171/255), alpha=0.3)
# Remove unwanted x ticks
if position_1 == 18:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=True)
else:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False)
# Set title
ax.set_title(other_cats[tp])
# Label the lines on top and bottom charts
if position_1 == 3:
ax.text(1995, 90, 'Women')
ax.text(1995, 7, 'Men')
elif position_1 == 18:
ax.text(2000, 75, 'Men')
ax.text(2000, 25, 'Women')
# Loop through lib_arts column adding a plot for each degree category
position_2 = -1
for up in range(0,5):
position_2 += 3
ax = fig.add_subplot(6,3, position_2)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[up]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[up]], c=cb_orange, label='Men', linewidth=3)
# Remove unwanted x ticks
if position_2 == 14:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=True)
else:
ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False)
# Remove spines
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
# Set x and y axis limits
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
# Only label 0 and 100 on y axis
ax.set_yticks([0, 100])
# Set title
ax.set_title(lib_arts_cats[up])
# Horizontal line at 50%
ax.axhline(50,c= (171/255, 171/255/ 127, 171/255), alpha=0.3)
# Label the lines in top chart
if position_2 == 2:
ax.text(1990, 80, 'Women')
ax.text(1990, 20, 'Men')
# Save image to png file
plt.savefig("gender_degrees.png")
# Show image
plt.show()