#!/usr/bin/env python # coding: utf-8 # # Visualizing The Gender Gap In College Degrees # # In this project we will be working on visualizing the data from [The Department of Education Statistics](http://nces.ed.gov/programs/digest/2013menu_tables.asp). # # The department releases a data set annually containing the percentage of bachelor's degrees granted to women from 1970 to 2012. The data set is broken up into 17 categories of degrees, with each column as a separate category. The dataset for this project, compiled by Randal Olsan, a data scientist at the University of Pennsylvania can be downloaded [here](http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv). # # ### Aim # To compare the gender gap in all degree categories using data visualization. # ### 1. Open the dataset and visualize the gap across STEM fields # In[11]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import matplotlib.pyplot as plt women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv') cb_dark_blue = (0/255,107/255,164/255) cb_orange = (255/255, 128/255, 14/255) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] fig = plt.figure(figsize=(18, 3)) for sp in range(0,6): ax = fig.add_subplot(1,6,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(stem_cats[sp]) ax.tick_params(bottom="off", top="off", left="off", right="off") if sp == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif sp == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') plt.show() # ### 2. Compare all degrees # In[12]: fig = plt.figure(figsize=(18, 20)) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] for sp in range(0,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(stem_cats[index]) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for key,spine in ax.spines.items(): spine.set_visible(False) for sp in range(1,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(other_cats[index]) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for sp in range(2,16,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(lib_arts_cats[index]) ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 4: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot plt.show() # ### 3. Hide x-axis labels # # To hide the x-axis labels, we set the `labelbottom` to `off` in the ``` Axes.tickparams() ```: # # # ``` ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ``` # # To declutter the charts, we are disabling the x-axis labels of al the line charts except the bottommost line chart in each column. # # In[13]: fig = plt.figure(figsize=(18, 20)) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] for sp in range(0,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(stem_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for key,spine in ax.spines.items(): spine.set_visible(False) for sp in range(1,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(other_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for sp in range(2,16,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(lib_arts_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 4: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot plt.show() # ### 4. Setting y-axis labels # # For all the plots, we are setting y-axis labels to `0` and `100` # In[14]: fig = plt.figure(figsize=(18, 20)) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] for sp in range(0,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(stem_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for key,spine in ax.spines.items(): spine.set_visible(False) for sp in range(1,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(other_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for sp in range(2,16,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(lib_arts_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 4: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot plt.show() # ### 5. Adding a horizontal line # # For all plots, to generate a horizontal line we use ``` Axes.axhline() ``` # In[15]: fig = plt.figure(figsize=(18, 20)) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] for sp in range(0,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(stem_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for key,spine in ax.spines.items(): spine.set_visible(False) for sp in range(1,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(other_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for sp in range(2,16,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(lib_arts_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 4: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot plt.show() # ### 6. Export the plot as a `.png` file # In[16]: fig = plt.figure(figsize=(18, 20)) stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] for sp in range(0,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(stem_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for key,spine in ax.spines.items(): spine.set_visible(False) for sp in range(1,18,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[other_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(other_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 5: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot for sp in range(2,16,3): index = int(sp/3) ax = fig.add_subplot(6,3,sp+1) ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[index]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[index]], c=cb_orange, label='Men', linewidth=3) ax.set_title(lib_arts_cats[index]) ax.tick_params(bottom="off", top="off", left="off", right="off", labelbottom='off') ax.spines["right"].set_visible(False) ax.spines["left"].set_visible(False) ax.spines["top"].set_visible(False) ax.spines["bottom"].set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) # set starting and ending labels (0 and 100) ax.set_yticks([0,100]) # add horizontal line ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) for key,spine in ax.spines.items(): spine.set_visible(False) if index == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') ax.legend(loc='best') elif index == 4: ax.text(2005, 62, 'Men') ax.text(2001, 35, 'Women') ax.tick_params(labelbottom='on') # show labels only on the last bottomost plot fig.savefig("gender_degrees.png") plt.show()