#!/usr/bin/env python # coding: utf-8 # Datas collected in https://www.nsf.gov/statistics/nsf13327/content.cfm?pub_id=4266&id=2 # In[1]: import pandas as pd import numpy as np import seaborn as sns get_ipython().run_line_magic('pylab', 'inline') sns.set(style="darkgrid") df = pd.read_csv('datasbio.csv') df.set_index(["Academic year ending"]) df.head(6) # In[2]: df["Percent Male Bachelor's"]=df["Male Bachelor's"]/df["All recipients Bachelor's"]*100 df["Percent Female Bachelor's"]=df["Female Bachelor's"]/df["All recipients Bachelor's"]*100 df["Percent Male Master's"]=df["Male Master's"]/df["All recipients Master's"]*100 df["Percent Female Master's"]=df["Female Master's"]/df["All recipients Master's"]*100 df["Percent Male Doctoratea"]=df["Male Doctoratea"]/df["All recipients Doctoratea"]*100 df["Percent Female Doctoratea"]=df["Female Doctoratea"]/df["All recipients Doctoratea"]*100 df["total"]=df["All recipients Bachelor's"]+df["All recipients Master's"]+df["All recipients Doctoratea"] df["Male total"]=df["Male Bachelor's"]+df["Male Master's"]+df["Male Doctoratea"] df["Female total"]=df["Female Bachelor's"]+df["Female Master's"]+df["Female Doctoratea"] df["Percent Male total"]=df["Male total"]*100/df["total"] df["Percent Female total"]=df["Female total"]*100/df["total"] #differences between years d=[0] for i in range(1,len(df["All recipients Bachelor's"])): d.append(df["All recipients Bachelor's"][i]-df["All recipients Bachelor's"][i-1]) df["Difference All recipients Bachelor's"]=d d=[0] for i in range(1,len(df["Male Bachelor's"])): d.append(df["Male Bachelor's"][i]-df["Male Bachelor's"][i-1]) df["Difference Male Bachelor's"]=d d=[0] for i in range(1,len(df["Female Bachelor's"])): d.append(df["Female Bachelor's"][i]-df["Female Bachelor's"][i-1]) df["Difference Female Bachelor's"]=d d=[0] for i in range(1,len(df["All recipients Master's"])): d.append(df["All recipients Master's"][i]-df["All recipients Master's"][i-1]) df["Difference All recipients Master's"]=d d=[0] for i in range(1,len(df["Male Master's"])): d.append(df["Male Master's"][i]-df["Male Master's"][i-1]) df["Difference Male Master's"]=d d=[0] for i in range(1,len(df["Female Master's"])): d.append(df["Female Master's"][i]-df["Female Master's"][i-1]) df["Difference Female Master's"]=d d=[0] for i in range(1,len(df["All recipients Doctoratea"])): d.append(df["All recipients Doctoratea"][i]-df["All recipients Doctoratea"][i-1]) df["Difference All recipients Doctoratea"]=d d=[0] for i in range(1,len(df["Male Doctoratea"])): d.append(df["Male Doctoratea"][i]-df["Male Doctoratea"][i-1]) df["Difference Male Doctoratea"]=d d=[0] for i in range(1,len(df["Female Doctoratea"])): d.append(df["Female Doctoratea"][i]-df["Female Doctoratea"][i-1]) df["Difference Female Doctoratea"]=d d=[0] for i in range(1,len(df["total"])): d.append(df["total"][i]-df["total"][i-1]) df["Difference total"]=d d=[0] for i in range(1,len(df["Male total"])): d.append(df["Male total"][i]-df["Male total"][i-1]) df["Difference Male total"]=d d=[0] for i in range(1,len(df["Female total"])): d.append(df["Female total"][i]-df["Female total"][i-1]) df["Difference Female total"]=d df=df.fillna(0) df.set_index(["Academic year ending"]) df.head() # In[3]: pylab.rcParams['figure.figsize'] = (20, 18) #df only percent total df_p_total=df[["Academic year ending","Percent Female total","Percent Male total"]] df_p_total.set_index("Academic year ending").plot() # In[4]: pylab.rcParams['figure.figsize'] = (20, 18) #df only total df_total=df[["Academic year ending","Female total","Male total"]] df_total.set_index("Academic year ending").plot() # In[5]: df.head() # In[6]: pylab.rcParams['figure.figsize'] = (20, 18) #df only Percent female df_p_female=df[["Academic year ending","Percent Female Bachelor's","Percent Female Master's","Percent Female Doctoratea","Percent Female total"]] df_p_female.set_index("Academic year ending").plot() # In[7]: pylab.rcParams['figure.figsize'] = (20, 18) #df only female df_female=df[["Academic year ending","Female Bachelor's","Female Master's","Female Doctoratea","Female total"]] df_female.set_index("Academic year ending").plot() # In[8]: pylab.rcParams['figure.figsize'] = (20, 18) #df only male df_male=df[["Academic year ending","Male Bachelor's","Male Master's","Male Doctoratea","Male total"]] df_male.set_index("Academic year ending").plot() # In[9]: df.head() # In[10]: pylab.rcParams['figure.figsize'] = (20, 18) #df only male df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Male Master's","Difference Male Doctoratea","Difference Male total"]] df_male.set_index("Academic year ending").plot() # In[11]: pylab.rcParams['figure.figsize'] = (20, 18) #df only male df_male=df[["Academic year ending","Difference Female Bachelor's","Difference Female Master's","Difference Female Doctoratea","Difference Female total"]] df_male.set_index("Academic year ending").plot() # In[12]: pylab.rcParams['figure.figsize'] = (20, 18) #df only male df_male=df[["Academic year ending","Difference Male Bachelor's","Difference Female Bachelor's"]] df_male.set_index("Academic year ending").plot()