In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
%pylab inline


sns.set(style="darkgrid")
df = pd.read_csv('datasbio.csv')
df.set_index(["Academic year ending"])
df.head(6)
Populating the interactive namespace from numpy and matplotlib
Out[1]:
Academic year ending All recipients Bachelor's Male Bachelor's Female Bachelor's All recipients Master's Male Master's Female Master's All recipients Doctoratea Male Doctoratea Female Doctoratea
0 1966 23477 16143 7334 4224 3077 1147 2135 1818 317
1 1967 25476 17686 7790 4988 3707 1281 2360 1971 389
2 1968 28710 19993 8717 5517 3963 1554 2827 2351 476
3 1969 32388 22663 9725 5765 4096 1669 3092 2566 526
4 1970 34303 24061 10242 5835 3991 1844 3361 2846 515
5 1971 36033 25462 10571 5756 3813 1943 3654 3023 631
In [2]:
df["Percent Male Bachelor's"]=df["Male Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Female Bachelor's"]=df["Female Bachelor's"]/df["All recipients Bachelor's"]*100
df["Percent Male Master's"]=df["Male Master's"]/df["All recipients Master's"]*100
df["Percent Female Master's"]=df["Female Master's"]/df["All recipients Master's"]*100
df["Percent Male Doctoratea"]=df["Male Doctoratea"]/df["All recipients Doctoratea"]*100
df["Percent Female Doctoratea"]=df["Female Doctoratea"]/df["All recipients Doctoratea"]*100
df["total"]=df["All recipients Bachelor's"]+df["All recipients Master's"]+df["All recipients Doctoratea"]
df["Male total"]=df["Male Bachelor's"]+df["Male Master's"]+df["Male Doctoratea"]
df["Female total"]=df["Female Bachelor's"]+df["Female Master's"]+df["Female Doctoratea"]
df["Percent Male total"]=df["Male total"]*100/df["total"]
df["Percent Female total"]=df["Female total"]*100/df["total"]
#differences between years
d=[0]
for i in range(1,len(df["All recipients Bachelor's"])):
    d.append(df["All recipients Bachelor's"][i]-df["All recipients Bachelor's"][i-1])
df["Difference All recipients Bachelor's"]=d

d=[0]
for i in range(1,len(df["Male Bachelor's"])):
    d.append(df["Male Bachelor's"][i]-df["Male Bachelor's"][i-1])
df["Difference Male Bachelor's"]=d

d=[0]
for i in range(1,len(df["Female Bachelor's"])):
    d.append(df["Female Bachelor's"][i]-df["Female Bachelor's"][i-1])
df["Difference Female Bachelor's"]=d

d=[0]
for i in range(1,len(df["All recipients Master's"])):
    d.append(df["All recipients Master's"][i]-df["All recipients Master's"][i-1])
df["Difference All recipients Master's"]=d

d=[0]
for i in range(1,len(df["Male Master's"])):
    d.append(df["Male Master's"][i]-df["Male Master's"][i-1])
df["Difference Male Master's"]=d

d=[0]
for i in range(1,len(df["Female Master's"])):
    d.append(df["Female Master's"][i]-df["Female Master's"][i-1])
df["Difference Female Master's"]=d


d=[0]
for i in range(1,len(df["All recipients Doctoratea"])):
    d.append(df["All recipients Doctoratea"][i]-df["All recipients Doctoratea"][i-1])
df["Difference All recipients Doctoratea"]=d

d=[0]
for i in range(1,len(df["Male Doctoratea"])):
    d.append(df["Male Doctoratea"][i]-df["Male Doctoratea"][i-1])
df["Difference Male Doctoratea"]=d

d=[0]
for i in range(1,len(df["Female Doctoratea"])):
    d.append(df["Female Doctoratea"][i]-df["Female Doctoratea"][i-1])
df["Difference Female Doctoratea"]=d

d=[0]
for i in range(1,len(df["total"])):
    d.append(df["total"][i]-df["total"][i-1])
df["Difference total"]=d

d=[0]
for i in range(1,len(df["Male total"])):
    d.append(df["Male total"][i]-df["Male total"][i-1])
df["Difference Male total"]=d

d=[0]
for i in range(1,len(df["Female total"])):
    d.append(df["Female total"][i]-df["Female total"][i-1])
df["Difference Female total"]=d


df=df.fillna(0)
df.set_index(["Academic year ending"])
df.head()
Out[2]:
Academic year ending All recipients Bachelor's Male Bachelor's Female Bachelor's All recipients Master's Male Master's Female Master's All recipients Doctoratea Male Doctoratea Female Doctoratea ... Difference Female Bachelor's Difference All recipients Master's Difference Male Master's Difference Female Master's Difference All recipients Doctoratea Difference Male Doctoratea Difference Female Doctoratea Difference total Difference Male total Difference Female total
0 1966 23477 16143 7334 4224 3077 1147 2135 1818 317 ... 0 0 0 0 0 0 0 0 0 0
1 1967 25476 17686 7790 4988 3707 1281 2360 1971 389 ... 456 764 630 134 225 153 72 2988 2326 662
2 1968 28710 19993 8717 5517 3963 1554 2827 2351 476 ... 927 529 256 273 467 380 87 4230 2943 1287
3 1969 32388 22663 9725 5765 4096 1669 3092 2566 526 ... 1008 248 133 115 265 215 50 4191 3018 1173
4 1970 34303 24061 10242 5835 3991 1844 3361 2846 515 ... 517 70 -105 175 269 280 -11 2254 1573 681

5 rows × 33 columns

In [3]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only percent total
df_p_total=df[["Academic year ending","Percent Female total","Percent Male total"]]
df_p_total.set_index("Academic year ending").plot()
Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f685878ce48>
In [4]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only total
df_total=df[["Academic year ending","Female total","Male total"]]
df_total.set_index("Academic year ending").plot()
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f68586c8940>
In [5]:
df.head()
Out[5]:
Academic year ending All recipients Bachelor's Male Bachelor's Female Bachelor's All recipients Master's Male Master's Female Master's All recipients Doctoratea Male Doctoratea Female Doctoratea ... Difference Female Bachelor's Difference All recipients Master's Difference Male Master's Difference Female Master's Difference All recipients Doctoratea Difference Male Doctoratea Difference Female Doctoratea Difference total Difference Male total Difference Female total
0 1966 23477 16143 7334 4224 3077 1147 2135 1818 317 ... 0 0 0 0 0 0 0 0 0 0
1 1967 25476 17686 7790 4988 3707 1281 2360 1971 389 ... 456 764 630 134 225 153 72 2988 2326 662
2 1968 28710 19993 8717 5517 3963 1554 2827 2351 476 ... 927 529 256 273 467 380 87 4230 2943 1287
3 1969 32388 22663 9725 5765 4096 1669 3092 2566 526 ... 1008 248 133 115 265 215 50 4191 3018 1173
4 1970 34303 24061 10242 5835 3991 1844 3361 2846 515 ... 517 70 -105 175 269 280 -11 2254 1573 681

5 rows × 33 columns

In [6]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only Percent female
df_p_female=df[["Academic year ending","Percent Female Bachelor's","Percent Female Master's","Percent Female Doctoratea","Percent Female total"]]
df_p_female.set_index("Academic year ending").plot()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f68586c8ba8>
In [7]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only female
df_female=df[["Academic year ending","Female Bachelor's","Female Master's","Female Doctoratea","Female total"]]
df_female.set_index("Academic year ending").plot()
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f685857a3c8>
In [8]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Male Bachelor's","Male Master's","Male Doctoratea","Male total"]]
df_male.set_index("Academic year ending").plot()
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f68584fe748>
In [9]:
df.head()
Out[9]:
Academic year ending All recipients Bachelor's Male Bachelor's Female Bachelor's All recipients Master's Male Master's Female Master's All recipients Doctoratea Male Doctoratea Female Doctoratea ... Difference Female Bachelor's Difference All recipients Master's Difference Male Master's Difference Female Master's Difference All recipients Doctoratea Difference Male Doctoratea Difference Female Doctoratea Difference total Difference Male total Difference Female total
0 1966 23477 16143 7334 4224 3077 1147 2135 1818 317 ... 0 0 0 0 0 0 0 0 0 0
1 1967 25476 17686 7790 4988 3707 1281 2360 1971 389 ... 456 764 630 134 225 153 72 2988 2326 662
2 1968 28710 19993 8717 5517 3963 1554 2827 2351 476 ... 927 529 256 273 467 380 87 4230 2943 1287
3 1969 32388 22663 9725 5765 4096 1669 3092 2566 526 ... 1008 248 133 115 265 215 50 4191 3018 1173
4 1970 34303 24061 10242 5835 3991 1844 3361 2846 515 ... 517 70 -105 175 269 280 -11 2254 1573 681

5 rows × 33 columns

In [10]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Male Master's","Difference Male Doctoratea","Difference Male total"]]
df_male.set_index("Academic year ending").plot()
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f6857f09da0>
In [11]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Female Bachelor's","Difference Female Master's","Difference Female Doctoratea","Difference Female total"]]
df_male.set_index("Academic year ending").plot()
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f68584090f0>
In [12]:
pylab.rcParams['figure.figsize'] = (20, 18)
#df only male
df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Female Bachelor's"]]
df_male.set_index("Academic year ending").plot()
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f68581ab048>