It contains the percentage of bachelor's degrees granted to women from 1970 to 2012 released by The Department of Education Statistics and compiled by a data scientist to explore the gender gap in STEM fields, which stands for science, technology, engineering, and mathematics and made dataset public on his personal website.
Pandas, Matplotlib, Python
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
# Path of file
women_degrees_file_path = (r'C:\Users\anuja\Downloads\percent-bachelors-degrees-women-usa (1).csv')
women_degrees = pd.read_csv(women_degrees_file_path, encoding='Latin-1')
cb_dark_blue = (0/255,107/255,164/255)
cb_orange = (255/255, 128/255, 14/255)
stem_cats = ['Engineering', 'Computer Science', 'Psychology', 'Biology', 'Physical Sciences', 'Math and Statistics']
fig = plt.figure(figsize=(18, 3))
for sp in range(0,6):
ax = fig.add_subplot(1,6,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[sp]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[sp]], c=cb_orange, label='Men', linewidth=3)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)
ax.spines["bottom"].set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[sp])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if sp == 0:
ax.text(2005, 87, 'Men')
ax.text(2002, 8, 'Women')
elif sp == 5:
ax.text(2005, 62, 'Men')
ax.text(2001, 35, 'Women')
plt.show()
women_degrees.columns
Index(['Year', 'Agriculture', 'Architecture', 'Art and Performance', 'Biology', 'Business', 'Communications and Journalism', 'Computer Science', 'Education', 'Engineering', 'English', 'Foreign Languages', 'Health Professions', 'Math and Statistics', 'Physical Sciences', 'Psychology', 'Public Administration', 'Social Sciences and History'], dtype='object')
len(women_degrees.columns)
18
Because there are seventeen degrees that we need to generate line charts for, we'll use a subplot grid layout of 6 rows by 3 columns. We can then group the degrees into STEM, liberal arts, and other, in the following way:
stem_cats = ['Psychology', 'Biology', 'Math and Statistics', 'Physical Sciences', 'Computer Science', 'Engineering']
lib_arts_cats = ['Foreign Languages', 'English', 'Communications and Journalism', 'Art and Performance', 'Social Sciences and History']
other_cats = ['Health Professions', 'Public Administration', 'Education', 'Agriculture','Business', 'Architecture']
fig = plt.figure(figsize=(16, 20))
## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
plt.show()
This will disable the x-axis labels for all of the line charts. We can then enable the x-axis labels for the bottommost line charts in each column:
ax.tick_params(labelbottom='on')
fig = plt.figure(figsize=(16, 20))
## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
plt.show()
We can use the Axes.set_yticks() method to specify which labels we want displayed. The following code enables just the 0 and 100 labels to be displayed:
ax.set_yticks([0,100])
fig = plt.figure(figsize=(16, 20))
## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
plt.show()
We can generate a horizontal line across an entire subplot using the Axes.axhline() method. The only required parameter is the y-axis location for the start of the line:
ax.axhline(50)
We can set the color using the c parameter and the transparency using the alpha parameter. The value passed in to the alpha parameter must range between 0 and 1:
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3)
The Color Blind 10 palette for this horizontal line, which has an RGB value of (171, 171, 171) is used
fig = plt.figure(figsize=(16, 20))
## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
plt.show()
Export file before calling pyplot.show()
Example - fig.savefig("gender_degrees.png")
or
Example - plt.plot(women_degrees['Year'], women_degrees['Biology'])
plt.savefig('biology_degrees.png')
The image will be exported into the same folder that our Jupyter Notebook server is running.
Exporting plots we create using matplotlib allows us to use them in Word documents, Powerpoint presentations, and even in emails.
fig = plt.figure(figsize=(16, 20))
## Generate first column of line charts. STEM degrees.
for sp in range(0,18,3):
cat_index = int(sp/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[stem_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[stem_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(stem_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 85, 'Women')
ax.text(2005, 10, 'Men')
elif cat_index == 5:
ax.text(2005, 87, 'Men')
ax.text(2003, 7, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate second column of line charts. Liberal arts degrees.
for sp in range(1,16,3):
cat_index = int((sp-1)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[lib_arts_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[lib_arts_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(lib_arts_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 78, 'Women')
ax.text(2005, 18, 'Men')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
## Generate third column of line charts. Other degrees.
for sp in range(2,20,3):
cat_index = int((sp-2)/3)
ax = fig.add_subplot(6,3,sp+1)
ax.plot(women_degrees['Year'], women_degrees[other_cats[cat_index]], c=cb_dark_blue, label='Women', linewidth=3)
ax.plot(women_degrees['Year'], 100-women_degrees[other_cats[cat_index]], c=cb_orange, label='Men', linewidth=3)
for key,spine in ax.spines.items():
spine.set_visible(False)
ax.set_xlim(1968, 2011)
ax.set_ylim(0,100)
ax.set_title(other_cats[cat_index])
ax.tick_params(bottom="off", top="off", left="off", right="off")
ax.axhline(50, c=(171/255, 171/255, 171/255), alpha=0.3) # Adding a horizontal line
ax.set_yticks([0,100]) # Setting y axis labels
if cat_index == 0:
ax.text(2003, 90, 'Women')
ax.text(2005, 5, 'Men')
elif cat_index == 5:
ax.text(2005, 62, 'Men')
ax.text(2003, 30, 'Women')
ax.tick_params(labelbottom='on') # Hiding X- Axis Labels
# Export file before calling pyplot.show()
fig.savefig("gender_degrees.png")
plt.show()
We got the final image using exporting feature, which Compares Gender Gap Across Degree Categories visually based on dataset,after applying Matplotlib features.