#!/usr/bin/env python # coding: utf-8 # # *Visualizing The Gender Gap In College Degrees* # # **Here we have dataset complied by Randal Olson, a data scientist at University of Pennsylvania. Through this [data set](http://www.randalolson.com/wp-content/uploads/percent-bachelors-degrees-women-usa.csv) we will explore the gender gap in STEM fields(Science, Technology, Engineering, and Mathematics). This gap is reported on often in the news and not everyone agrees that there is a gap.** # # **Here, we'll explore how we can communicate the nuanced narrative of gender gap using effective data visualization.** # ### Introduction to Data # In[16]: get_ipython().run_line_magic('matplotlib', 'inline') import pandas as pd import matplotlib.pyplot as plt women_degrees = pd.read_csv('percent-bachelors-degrees-women-usa.csv') women_degrees.head() # In[2]: women_degrees.tail() # The Data above shows percentage of Bachelor Degrees awarded per Subject to women from year 1970 to 2011. # # ### Visualizing the Gender Gap # In[3]: fig = plt.figure() plt.plot(women_degrees["Year"], women_degrees['Biology'], c='blue', label='Women') plt.plot(women_degrees["Year"], 100 - women_degrees['Biology'], c='green', label= "Men") plt.title("Percentage of Biology Degrees Awarded By Gender") plt.legend(loc="upper right") plt.show() # ### Hiding Tick-Marks # In[6]: fig = plt.figure() plt.plot(women_degrees['Year'], women_degrees['Biology'], label='Women', c='blue') plt.plot(women_degrees['Year'], 100 - women_degrees['Biology'], c='green', label='Men') # Hiding Tick Marks plt.tick_params(top = False, bottom = False, right = False, left = False) plt.title("Percentage of Biology Degrees Awarded By Gender") plt.legend(loc="upper right") plt.show() # ### Hiding Spines # In[10]: fig, ax = plt.subplots() ax.plot(women_degrees['Year'], women_degrees['Biology'], label='Women', c='blue') ax.plot(women_degrees['Year'], 100-women_degrees['Biology'], label='Men', c='green') ax.tick_params(top = False, bottom = False, right = False, left = False) # Removing Spines # ax.spines['right'].set_visible(False) # ax.spines['top'].set_visible(False) # ax.spines['bottom'].set_visible(False) # ax.spines['left'].set_visible(False) # We can use either of them for key, spine in ax.spines.items(): spine.set_visible(False) ax.legend(loc='upper right') ax.set_title('Percentage of Biology Degrees Awarded By Gender') plt.savefig('new.png') plt.show() # ### Comparing Gender Gap Across Categories # In[11]: major_cats = ['Biology', 'Computer Science', 'Engineering', 'Math and Statistics'] fig = plt.figure(figsize=(12, 12)) for sp in range(0,4): ax = fig.add_subplot(2,2,sp+1) ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c='blue', label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c='green', label='Men') # Formatting Axes Objects ax.set_xlim(1968, 2011) ax.set_ylim(0, 100) for key, spine in ax.spines.items(): spine.set_visible(False) ax.tick_params(top = False, bottom = False, right = False, left = False) ax.set_title(major_cats[sp]) # Calling pyplot.legend() here will add the legend to the last subplot that was created. plt.legend(loc='upper right') plt.show() # We can conclude that Computer Science and Engineering have big gender gaps, while the gap in Biology and Math and Statistics is quite small. In addition, the first two degree categories are dominated by men while the latter degree categories are much more balanced. # # --- # In order to publish the data visualizations that we create, we have to be mindful of color blindness. # # Thankfully, there are color palettes we can use that are friendly for people with color blindness. One of them is called Color Blind 10 and was released by Tableau, the company that makes the data visualization platform of the same name. We can navigate to this [page](http://tableaufriction.blogspot.ro/2012/11/finally-you-can-use-tableau-data-colors.html) and select the Color Blind 10 option from the list of palettes to see the ten colors included in the palette. # ### Setting Line Color using RGB # In[14]: fig = plt.figure(figsize=(12, 12)) for sp in range(0,4): ax = fig.add_subplot(2,2,sp+1) # The color for each line is assigned here. cb_dark_blue = (0/255, 107/255, 164/255) cb_orange = (255/255, 128/255, 14/255) # Formatting Axes Objects ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women') ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(top = False, bottom = False, right = False, left = False) plt.legend(loc='upper right') plt.show() # ### Setting Line Width # In[17]: cb_dark_blue = (0/255, 107/255, 164/255) cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(12, 12)) for sp in range(0,4): ax = fig.add_subplot(2,2,sp+1) # Set the line width when specifying how each line should look. ax.plot(women_degrees['Year'], women_degrees[major_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[major_cats[sp]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(major_cats[sp]) ax.tick_params(top = False, bottom = False, right = False, left = False) plt.legend(loc='upper right') plt.show() # ### Improving the Layout and Ordering # In[29]: stem_cats = ['Engineering', 'Computer Science', 'Physical Sciences', 'Math and Statistics', 'Biology', 'Psychology'] fig = plt.figure(figsize=(18, 3)) for sp in range(0,6): ax = fig.add_subplot(1,6,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(stem_cats[sp]) ax.tick_params(top = False, bottom = False, right = False, left = False) plt.legend(loc='upper right') plt.show() # ### Replacing the Legend with Annotations # In[30]: cb_dark_blue = (0/255, 107/255, 164/255) cb_orange = (255/255, 128/255, 14/255) fig = plt.figure(figsize=(18, 3)) for sp in range(0,6): ax = fig.add_subplot(1,6,sp+1) ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3) if sp == 0: ax.text(2005, 87, 'Men') ax.text(2002, 8, 'Women') elif sp == 5: ax.text(2004, 13, 'Men') ax.text(1999, 83, 'Women') for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(stem_cats[sp]) ax.tick_params(top = False, bottom = False, right = False, left = False) plt.show() # ### Comparing Across All Degrees # # - We will generate a 6 row by 3 column grid of subplots. # - We will generate line charts for both male and female percentages: # - In the first column, for every degree in stem_cats. # - In the second column, for every degree in lib_arts_cats. # - In the third column, for every degree in other_cats. # - Add text annotations for Women and Men in the topmost and bottommost plots of each column. # In[47]: stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] fig = plt.figure(figsize=(15, 18)) def create_axes_objects(axes_range, column, data, Annot_1_M, Annot_1_F, Annot_2_M, Annot_2_F): for num in range(axes_range): # Plotting Data ax = fig.add_subplot(6, 3, (num*3)+column) ax.plot(women_degrees['Year'], women_degrees[data[num]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[data[num]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(data[num]) ax.tick_params(bottom=False, top=False, left=False, right=False) if num == 0: ax.text(2006, Annot_1_M, 'Men') ax.text(2003, Annot_1_F, 'Women') elif num == (axes_range - 1): ax.text(2006, Annot_2_M, 'Men') ax.text(2003, Annot_2_F, 'Women') create_axes_objects(6, 1, stem_cats, 87, 8, 62, 35) create_axes_objects(5, 2, lib_arts_cats, 20, 75, 56, 38) create_axes_objects(6, 3, other_cats, 7, 90, 62, 36) # ### Hiding x-axis Labels # # - We will disable the x-axis labels for all line charts except the bottommost line charts in each column. # In[48]: stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] fig = plt.figure(figsize=(15, 18)) def create_axes_objects(axes_range, column, data, Annot_1_M, Annot_1_F, Annot_2_M, Annot_2_F): for num in range(axes_range): # Plotting Data ax = fig.add_subplot(6, 3, (num*3)+column) ax.plot(women_degrees['Year'], women_degrees[data[num]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[data[num]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(data[num]) # Hiding x-axis labels for all charts (labelbottom=False) ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False) if num == 0: ax.text(2006, Annot_1_M, 'Men') ax.text(2003, Annot_1_F, 'Women') elif num == (axes_range - 1): ax.text(2006, Annot_2_M, 'Men') ax.text(2003, Annot_2_F, 'Women') ax.tick_params(labelbottom=True) create_axes_objects(6, 1, stem_cats, 87, 8, 62, 35) create_axes_objects(5, 2, lib_arts_cats, 20, 75, 56, 38) create_axes_objects(6, 3, other_cats, 7, 90, 62, 36) # ### Setting y-axis Labels # # - For all plots, we will enable just the y-axis labels at 0 and 100. # In[49]: stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] fig = plt.figure(figsize=(15, 18)) def create_axes_objects(axes_range, column, data, Annot_1_M, Annot_1_F, Annot_2_M, Annot_2_F): for num in range(axes_range): # Plotting Data ax = fig.add_subplot(6, 3, (num*3)+column) ax.plot(women_degrees['Year'], women_degrees[data[num]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[data[num]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(data[num]) # Setting y-axis labels for all charts at just 0 and 100 (set_yticks([0, 100])) ax.set_yticks([0,100]) ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False) if num == 0: ax.text(2006, Annot_1_M, 'Men') ax.text(2003, Annot_1_F, 'Women') elif num == (axes_range - 1): ax.text(2006, Annot_2_M, 'Men') ax.text(2003, Annot_2_F, 'Women') ax.tick_params(labelbottom=True) create_axes_objects(6, 1, stem_cats, 87, 8, 62, 35) create_axes_objects(5, 2, lib_arts_cats, 20, 75, 56, 38) create_axes_objects(6, 3, other_cats, 7, 90, 62, 36) # ### Adding A Horizontal Line # # - For all plots, we will generate a horizontal line with the following properties: # - Starts at the y-axis position 50 # - Set to the third color (light gray) in the Color Blind 10 palette # - Has a transparency of 0.3 # In[50]: stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] fig = plt.figure(figsize=(15, 18)) def create_axes_objects(axes_range, column, data, Annot_1_M, Annot_1_F, Annot_2_M, Annot_2_F): for num in range(axes_range): # Plotting Data ax = fig.add_subplot(6, 3, (num*3)+column) ax.plot(women_degrees['Year'], women_degrees[data[num]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[data[num]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(data[num]) ax.set_yticks([0,100]) # Generating Horizontal Line at position 50, with transparency 0.3, and colors given ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False) if num == 0: ax.text(2006, Annot_1_M, 'Men') ax.text(2003, Annot_1_F, 'Women') elif num == (axes_range - 1): ax.text(2006, Annot_2_M, 'Men') ax.text(2003, Annot_2_F, 'Women') ax.tick_params(labelbottom=True) create_axes_objects(6, 1, stem_cats, 87, 8, 62, 35) create_axes_objects(5, 2, lib_arts_cats, 20, 75, 56, 38) create_axes_objects(6, 3, other_cats, 7, 90, 62, 36) # ### Exporting A File # - Finally, we will export the figure containing all of the line charts to *"gender_degrees.png"*. # In[51]: stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics'] lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History'] other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture'] import matplotlib as mpb fig = plt.figure(figsize=(15, 18)) def create_axes_objects(axes_range, column, data, Annot_1_M, Annot_1_F, Annot_2_M, Annot_2_F): for num in range(axes_range): # Plotting Data ax = fig.add_subplot(6, 3, (num*3)+column) ax.plot(women_degrees['Year'], women_degrees[data[num]], c=cb_dark_blue, label='Women', linewidth=3) ax.plot(women_degrees['Year'], 100-women_degrees[data[num]], c=cb_orange, label='Men', linewidth=3) # Formatting Axes Objects for key,spine in ax.spines.items(): spine.set_visible(False) ax.set_xlim(1968, 2011) ax.set_ylim(0,100) ax.set_title(data[num]) ax.set_yticks([0,100]) # Generating Horizontal Line at position 50, with transparency 0.3, and colors given ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) ax.tick_params(bottom=False, top=False, left=False, right=False, labelbottom=False) if num == 0: ax.text(2006, Annot_1_M, 'Men') ax.text(2003, Annot_1_F, 'Women') elif num == (axes_range - 1): ax.text(2006, Annot_2_M, 'Men') ax.text(2003, Annot_2_F, 'Women') ax.tick_params(labelbottom=True) create_axes_objects(6, 1, stem_cats, 87, 8, 62, 35) create_axes_objects(5, 2, lib_arts_cats, 20, 75, 56, 38) create_axes_objects(6, 3, other_cats, 7, 90, 62, 36) # print(mpb.get_backend()) plt.savefig("gender_degrees.png") plt.show()