Datas collected in https://www.nsf.gov/statistics/nsf13327/content.cfm?pub_id=4266&id=2
import pandas as pd
import numpy as np
import seaborn as sns
%pylab inline
sns.set(style="darkgrid")
df = pd.read_csv('datas.csv')
df.set_index(["Academic year ending"])
df.head(6)
Populating the interactive namespace from numpy and matplotlib
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 89 | 76 | 13 | 238 | 221 | 17 | 0 | 0 | 0 |
1 | 1967 | 222 | 198 | 24 | 449 | 423 | 26 | 0 | 0 | 0 |
2 | 1968 | 459 | 404 | 55 | 548 | 518 | 30 | 0 | 0 | 0 |
3 | 1969 | 933 | 812 | 121 | 1012 | 939 | 73 | 0 | 0 | 0 |
4 | 1970 | 1544 | 1345 | 199 | 1459 | 1324 | 135 | 0 | 0 | 0 |
5 | 1971 | 2388 | 2064 | 324 | 1588 | 1424 | 164 | 0 | 0 | 0 |
df["Percent Male Bachelor's"]=df["Male Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Female Bachelor's"]=df["Female Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Male Master's"]=df["Male Master's"]/df["All recipients Master's"]*100
df["Percent Female Master's"]=df["Female Master's"]/df["All recipients Master's"]*100
df["Percent Male Doctoratea"]=df["Male Doctoratea"]/df["All recipients Doctoratea"]*100
df["Percent Female Doctoratea"]=df["Female Doctoratea"]/df["All recipients Doctoratea"]*100
df["total"]=df["All recipients Bachelor's"]+df["All recipients Master's"]+df["All recipients Doctoratea"]
df["Male total"]=df["Male Bachelor's"]+df["Male Master's"]+df["Male Doctoratea"]
df["Female total"]=df["Female Bachelor's"]+df["Female Master's"]+df["Female Doctoratea"]
df["Percent Male total"]=df["Male total"]*100/df["total"]
df["Percent Female total"]=df["Female total"]*100/df["total"]
#differences between years
d=[0]
for i in range(1,len(df["All recipients Bachelor's"])):
d.append(df["All recipients Bachelor's"][i]-df["All recipients Bachelor's"][i-1])
df["Difference All recipients Bachelor's"]=d
d=[0]
for i in range(1,len(df["Male Bachelor's"])):
d.append(df["Male Bachelor's"][i]-df["Male Bachelor's"][i-1])
df["Difference Male Bachelor's"]=d
d=[0]
for i in range(1,len(df["Female Bachelor's"])):
d.append(df["Female Bachelor's"][i]-df["Female Bachelor's"][i-1])
df["Difference Female Bachelor's"]=d
d=[0]
for i in range(1,len(df["All recipients Master's"])):
d.append(df["All recipients Master's"][i]-df["All recipients Master's"][i-1])
df["Difference All recipients Master's"]=d
d=[0]
for i in range(1,len(df["Male Master's"])):
d.append(df["Male Master's"][i]-df["Male Master's"][i-1])
df["Difference Male Master's"]=d
d=[0]
for i in range(1,len(df["Female Master's"])):
d.append(df["Female Master's"][i]-df["Female Master's"][i-1])
df["Difference Female Master's"]=d
d=[0]
for i in range(1,len(df["All recipients Doctoratea"])):
d.append(df["All recipients Doctoratea"][i]-df["All recipients Doctoratea"][i-1])
df["Difference All recipients Doctoratea"]=d
d=[0]
for i in range(1,len(df["Male Doctoratea"])):
d.append(df["Male Doctoratea"][i]-df["Male Doctoratea"][i-1])
df["Difference Male Doctoratea"]=d
d=[0]
for i in range(1,len(df["Female Doctoratea"])):
d.append(df["Female Doctoratea"][i]-df["Female Doctoratea"][i-1])
df["Difference Female Doctoratea"]=d
d=[0]
for i in range(1,len(df["total"])):
d.append(df["total"][i]-df["total"][i-1])
df["Difference total"]=d
d=[0]
for i in range(1,len(df["Male total"])):
d.append(df["Male total"][i]-df["Male total"][i-1])
df["Difference Male total"]=d
d=[0]
for i in range(1,len(df["Female total"])):
d.append(df["Female total"][i]-df["Female total"][i-1])
df["Difference Female total"]=d
df=df.fillna(0)
df.set_index(["Academic year ending"])
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 89 | 76 | 13 | 238 | 221 | 17 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 222 | 198 | 24 | 449 | 423 | 26 | 0 | 0 | 0 | ... | 11 | 211 | 202 | 9 | 0 | 0 | 0 | 344 | 324 | 20 |
2 | 1968 | 459 | 404 | 55 | 548 | 518 | 30 | 0 | 0 | 0 | ... | 31 | 99 | 95 | 4 | 0 | 0 | 0 | 336 | 301 | 35 |
3 | 1969 | 933 | 812 | 121 | 1012 | 939 | 73 | 0 | 0 | 0 | ... | 66 | 464 | 421 | 43 | 0 | 0 | 0 | 938 | 829 | 109 |
4 | 1970 | 1544 | 1345 | 199 | 1459 | 1324 | 135 | 0 | 0 | 0 | ... | 78 | 447 | 385 | 62 | 0 | 0 | 0 | 1058 | 918 | 140 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only percent total
df_p_total=df[["Academic year ending","Percent Female total","Percent Male total"]]
df_p_total.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd5907e9ef0>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only total
df_total=df[["Academic year ending","Female total","Male total"]]
df_total.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd56419f390>
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 89 | 76 | 13 | 238 | 221 | 17 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 222 | 198 | 24 | 449 | 423 | 26 | 0 | 0 | 0 | ... | 11 | 211 | 202 | 9 | 0 | 0 | 0 | 344 | 324 | 20 |
2 | 1968 | 459 | 404 | 55 | 548 | 518 | 30 | 0 | 0 | 0 | ... | 31 | 99 | 95 | 4 | 0 | 0 | 0 | 336 | 301 | 35 |
3 | 1969 | 933 | 812 | 121 | 1012 | 939 | 73 | 0 | 0 | 0 | ... | 66 | 464 | 421 | 43 | 0 | 0 | 0 | 938 | 829 | 109 |
4 | 1970 | 1544 | 1345 | 199 | 1459 | 1324 | 135 | 0 | 0 | 0 | ... | 78 | 447 | 385 | 62 | 0 | 0 | 0 | 1058 | 918 | 140 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only Percent female
df_p_female=df[["Academic year ending","Percent Female Bachelor's","Percent Female Master's","Percent Female Doctoratea","Percent Female total"]]
df_p_female.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd56498b630>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only female
df_female=df[["Academic year ending","Female Bachelor's","Female Master's","Female Doctoratea","Female total"]]
df_female.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd5648a59b0>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Male Bachelor's","Male Master's","Male Doctoratea","Male total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd564847a58>
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 89 | 76 | 13 | 238 | 221 | 17 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 222 | 198 | 24 | 449 | 423 | 26 | 0 | 0 | 0 | ... | 11 | 211 | 202 | 9 | 0 | 0 | 0 | 344 | 324 | 20 |
2 | 1968 | 459 | 404 | 55 | 548 | 518 | 30 | 0 | 0 | 0 | ... | 31 | 99 | 95 | 4 | 0 | 0 | 0 | 336 | 301 | 35 |
3 | 1969 | 933 | 812 | 121 | 1012 | 939 | 73 | 0 | 0 | 0 | ... | 66 | 464 | 421 | 43 | 0 | 0 | 0 | 938 | 829 | 109 |
4 | 1970 | 1544 | 1345 | 199 | 1459 | 1324 | 135 | 0 | 0 | 0 | ... | 78 | 447 | 385 | 62 | 0 | 0 | 0 | 1058 | 918 | 140 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Male Master's","Difference Male Doctoratea","Difference Male total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd56466aac8>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Female Bachelor's","Difference Female Master's","Difference Female Doctoratea","Difference Female total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd5644aa080>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Female Bachelor's"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7fd5646cfda0>