Introduction
The Department of Education Statistics releases a data set annually containing the percentage of bachelor's degrees granted to women from 1970 to 2012. The data set is broken up into 17 categories of degrees, with each column as a separate category.
Randal Olson, a data scientist at University of Pennsylvania, has cleaned the data set and made it available on his personal website. The dataset Randal compiled can be downloaded here.
Randal compiled this data set to explore the gender gap in STEM fields, which stands for science, technology, engineering, and mathematics. This gap is reported on often in the news and not everyone agrees that there is a gap.
In this Project, we'll explore how we can communicate the nuanced narrative of gender gap using effective data visualization and also visualizing the gender gap across all college degrees categories.
STEM Degree Comparisation
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv')
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))
for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()
We can conclude that the gender gap in Computer Science and Engineering have big gender gaps while the gap in Biology and Math and Statistics is quite small.
Comparing across all degree categories
stem_cats = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
fig = plt.figure(figsize=(18, 21))
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[ cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[ cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[ cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 80, 'Women')
ax.text(2005, 15, 'Men')
for sp in range(2,20,3):
cat_index=int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 60, 'Men')
ax.text(2003, 30, 'Women')
plt.show()
Hiding x-axis labels
fig = plt.figure(figsize=(18, 21))
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
if cat_index == 5:
ax.tick_params(labelbottom='on')
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[ cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[ cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[ cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
if cat_index == 0:
ax.text(2003, 80, 'Women')
ax.text(2005, 15, 'Men')
if cat_index == 4:
ax.tick_params(labelbottom='on')
for sp in range(2,20,3):
cat_index=int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 60, 'Men')
ax.text(2003, 30, 'Women')
if cat_index == 5:
ax.tick_params(labelbottom='on')
plt.show()
Setting y-axis labels
fig = plt.figure(figsize=(18, 21))
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[ cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[ cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[ cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
if cat_index == 0:
ax.text(2003, 80, 'Women')
ax.text(2005, 15, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')
for sp in range(2,20,3):
cat_index=int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 60, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')
plt.show()
Adding a horizontal line
fig = plt.figure(figsize=(18, 21))
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[ cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[ cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[ cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 80, 'Women')
ax.text(2005, 15, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')
for sp in range(2,20,3):
cat_index=int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 60, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')
plt.show()
Exporting to a file
fig = plt.figure(figsize=(18, 21))
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on')
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[ cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[ cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[ cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 80, 'Women')
ax.text(2005, 15, 'Men')
elif cat_index == 4:
ax.tick_params(labelbottom='on')
for sp in range(2,20,3):
cat_index=int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off",labelbottom='off')
ax.set_yticks([0,100])
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 60, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on')
plt.savefig('gender_degrees.png')
plt.show()
Conclusion
For STEM Degrees:
have big gender gaps while the gap in Biology and Math and Statistics is quite small.
For Liberal Arts Degrees:
degree. Only Social Sciences and History would have about the same number of women and men. Overall the gender gaps for liberal arts degrees are not as wide as in STEM degrees.
For Other Degrees:
Education with females taking up a higher proportion of the students. For other courses like Agriculture, Business and Architecture, the gendar gaps are not significant.