Datas collected in https://www.nsf.gov/statistics/nsf13327/content.cfm?pub_id=4266&id=2
import pandas as pd
import numpy as np
import seaborn as sns
%pylab inline
sns.set(style="darkgrid")
df = pd.read_csv('datasbio.csv')
df.set_index(["Academic year ending"])
df.head(6)
Populating the interactive namespace from numpy and matplotlib
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 23477 | 16143 | 7334 | 4224 | 3077 | 1147 | 2135 | 1818 | 317 |
1 | 1967 | 25476 | 17686 | 7790 | 4988 | 3707 | 1281 | 2360 | 1971 | 389 |
2 | 1968 | 28710 | 19993 | 8717 | 5517 | 3963 | 1554 | 2827 | 2351 | 476 |
3 | 1969 | 32388 | 22663 | 9725 | 5765 | 4096 | 1669 | 3092 | 2566 | 526 |
4 | 1970 | 34303 | 24061 | 10242 | 5835 | 3991 | 1844 | 3361 | 2846 | 515 |
5 | 1971 | 36033 | 25462 | 10571 | 5756 | 3813 | 1943 | 3654 | 3023 | 631 |
df["Percent Male Bachelor's"]=df["Male Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Female Bachelor's"]=df["Female Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Male Master's"]=df["Male Master's"]/df["All recipients Master's"]*100
df["Percent Female Master's"]=df["Female Master's"]/df["All recipients Master's"]*100
df["Percent Male Doctoratea"]=df["Male Doctoratea"]/df["All recipients Doctoratea"]*100
df["Percent Female Doctoratea"]=df["Female Doctoratea"]/df["All recipients Doctoratea"]*100
df["total"]=df["All recipients Bachelor's"]+df["All recipients Master's"]+df["All recipients Doctoratea"]
df["Male total"]=df["Male Bachelor's"]+df["Male Master's"]+df["Male Doctoratea"]
df["Female total"]=df["Female Bachelor's"]+df["Female Master's"]+df["Female Doctoratea"]
df["Percent Male total"]=df["Male total"]*100/df["total"]
df["Percent Female total"]=df["Female total"]*100/df["total"]
#differences between years
d=[0]
for i in range(1,len(df["All recipients Bachelor's"])):
d.append(df["All recipients Bachelor's"][i]-df["All recipients Bachelor's"][i-1])
df["Difference All recipients Bachelor's"]=d
d=[0]
for i in range(1,len(df["Male Bachelor's"])):
d.append(df["Male Bachelor's"][i]-df["Male Bachelor's"][i-1])
df["Difference Male Bachelor's"]=d
d=[0]
for i in range(1,len(df["Female Bachelor's"])):
d.append(df["Female Bachelor's"][i]-df["Female Bachelor's"][i-1])
df["Difference Female Bachelor's"]=d
d=[0]
for i in range(1,len(df["All recipients Master's"])):
d.append(df["All recipients Master's"][i]-df["All recipients Master's"][i-1])
df["Difference All recipients Master's"]=d
d=[0]
for i in range(1,len(df["Male Master's"])):
d.append(df["Male Master's"][i]-df["Male Master's"][i-1])
df["Difference Male Master's"]=d
d=[0]
for i in range(1,len(df["Female Master's"])):
d.append(df["Female Master's"][i]-df["Female Master's"][i-1])
df["Difference Female Master's"]=d
d=[0]
for i in range(1,len(df["All recipients Doctoratea"])):
d.append(df["All recipients Doctoratea"][i]-df["All recipients Doctoratea"][i-1])
df["Difference All recipients Doctoratea"]=d
d=[0]
for i in range(1,len(df["Male Doctoratea"])):
d.append(df["Male Doctoratea"][i]-df["Male Doctoratea"][i-1])
df["Difference Male Doctoratea"]=d
d=[0]
for i in range(1,len(df["Female Doctoratea"])):
d.append(df["Female Doctoratea"][i]-df["Female Doctoratea"][i-1])
df["Difference Female Doctoratea"]=d
d=[0]
for i in range(1,len(df["total"])):
d.append(df["total"][i]-df["total"][i-1])
df["Difference total"]=d
d=[0]
for i in range(1,len(df["Male total"])):
d.append(df["Male total"][i]-df["Male total"][i-1])
df["Difference Male total"]=d
d=[0]
for i in range(1,len(df["Female total"])):
d.append(df["Female total"][i]-df["Female total"][i-1])
df["Difference Female total"]=d
df=df.fillna(0)
df.set_index(["Academic year ending"])
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 23477 | 16143 | 7334 | 4224 | 3077 | 1147 | 2135 | 1818 | 317 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 25476 | 17686 | 7790 | 4988 | 3707 | 1281 | 2360 | 1971 | 389 | ... | 456 | 764 | 630 | 134 | 225 | 153 | 72 | 2988 | 2326 | 662 |
2 | 1968 | 28710 | 19993 | 8717 | 5517 | 3963 | 1554 | 2827 | 2351 | 476 | ... | 927 | 529 | 256 | 273 | 467 | 380 | 87 | 4230 | 2943 | 1287 |
3 | 1969 | 32388 | 22663 | 9725 | 5765 | 4096 | 1669 | 3092 | 2566 | 526 | ... | 1008 | 248 | 133 | 115 | 265 | 215 | 50 | 4191 | 3018 | 1173 |
4 | 1970 | 34303 | 24061 | 10242 | 5835 | 3991 | 1844 | 3361 | 2846 | 515 | ... | 517 | 70 | -105 | 175 | 269 | 280 | -11 | 2254 | 1573 | 681 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only percent total
df_p_total=df[["Academic year ending","Percent Female total","Percent Male total"]]
df_p_total.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f685878ce48>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only total
df_total=df[["Academic year ending","Female total","Male total"]]
df_total.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f68586c8940>
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 23477 | 16143 | 7334 | 4224 | 3077 | 1147 | 2135 | 1818 | 317 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 25476 | 17686 | 7790 | 4988 | 3707 | 1281 | 2360 | 1971 | 389 | ... | 456 | 764 | 630 | 134 | 225 | 153 | 72 | 2988 | 2326 | 662 |
2 | 1968 | 28710 | 19993 | 8717 | 5517 | 3963 | 1554 | 2827 | 2351 | 476 | ... | 927 | 529 | 256 | 273 | 467 | 380 | 87 | 4230 | 2943 | 1287 |
3 | 1969 | 32388 | 22663 | 9725 | 5765 | 4096 | 1669 | 3092 | 2566 | 526 | ... | 1008 | 248 | 133 | 115 | 265 | 215 | 50 | 4191 | 3018 | 1173 |
4 | 1970 | 34303 | 24061 | 10242 | 5835 | 3991 | 1844 | 3361 | 2846 | 515 | ... | 517 | 70 | -105 | 175 | 269 | 280 | -11 | 2254 | 1573 | 681 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only Percent female
df_p_female=df[["Academic year ending","Percent Female Bachelor's","Percent Female Master's","Percent Female Doctoratea","Percent Female total"]]
df_p_female.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f68586c8ba8>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only female
df_female=df[["Academic year ending","Female Bachelor's","Female Master's","Female Doctoratea","Female total"]]
df_female.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f685857a3c8>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Male Bachelor's","Male Master's","Male Doctoratea","Male total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f68584fe748>
df.head()
Academic year ending | All recipients Bachelor's | Male Bachelor's | Female Bachelor's | All recipients Master's | Male Master's | Female Master's | All recipients Doctoratea | Male Doctoratea | Female Doctoratea | ... | Difference Female Bachelor's | Difference All recipients Master's | Difference Male Master's | Difference Female Master's | Difference All recipients Doctoratea | Difference Male Doctoratea | Difference Female Doctoratea | Difference total | Difference Male total | Difference Female total | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 23477 | 16143 | 7334 | 4224 | 3077 | 1147 | 2135 | 1818 | 317 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 1967 | 25476 | 17686 | 7790 | 4988 | 3707 | 1281 | 2360 | 1971 | 389 | ... | 456 | 764 | 630 | 134 | 225 | 153 | 72 | 2988 | 2326 | 662 |
2 | 1968 | 28710 | 19993 | 8717 | 5517 | 3963 | 1554 | 2827 | 2351 | 476 | ... | 927 | 529 | 256 | 273 | 467 | 380 | 87 | 4230 | 2943 | 1287 |
3 | 1969 | 32388 | 22663 | 9725 | 5765 | 4096 | 1669 | 3092 | 2566 | 526 | ... | 1008 | 248 | 133 | 115 | 265 | 215 | 50 | 4191 | 3018 | 1173 |
4 | 1970 | 34303 | 24061 | 10242 | 5835 | 3991 | 1844 | 3361 | 2846 | 515 | ... | 517 | 70 | -105 | 175 | 269 | 280 | -11 | 2254 | 1573 | 681 |
5 rows × 33 columns
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Male Master's","Difference Male Doctoratea","Difference Male total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f6857f09da0>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Female Bachelor's","Difference Female Master's","Difference Female Doctoratea","Difference Female total"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f68584090f0>
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Female Bachelor's"]]
df_male.set_index("Academic year ending").plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7f68581ab048>