#!/usr/bin/env python # coding: utf-8 # In[1]: #import dependencies import pandas as pd import matplotlib.pyplot as plt plt.style.use('fivethirtyeight') get_ipython().run_line_magic('matplotlib', 'inline') # The code below is cited from Brandon Rhodes. His Pandas Tutorial is very helpful and teaches the core features of Pandas library. # # - Tutorial: https://www.youtube.com/watch?v=5JnMutdy6Fw # - Github: https://github.com/brandon-rhodes/pycon-pandas-tutorial # # I like using the css style into my projects because it looks nice in the eye to differ data, index and header. It comes very handy especially when you have multiple indexes with headers. # In[2]: from IPython.core.display import HTML css = open('style-table.css').read() + open('style-notebook.css').read() HTML(''.format(css)) # In[3]: # assign dataframe to a variable called df df = pd.read_csv('NationalNames.csv', index_col = 'Id') df.head() # In[4]: #get some informatian about our dataset df.info() # There are 1,825,433 rows in our dataset. # In[5]: df['Year'].min() # In[6]: df['Year'].max() # Our dataset has names from 1880 to 2014. # In[7]: # total number of female names f = df[df.Gender == 'F'] f = f['Count'].sum() f # In[8]: #total number of male names m = df[df.Gender == 'M'] m = m['Count'].sum() m # In[9]: #Total number of names f + m # In[10]: #range of male and female names m - f # In[11]: #How many baby named Nicole in the history Nicole = df[df['Name'] == 'Nicole'] Nicole = Nicole['Count'].sum() Nicole # In[12]: df.query('Name=="Nicole"')[['Year', 'Count']].groupby('Year').sum().plot(grid= True, figsize = (10,5)) plt.xlabel('Year') plt.ylabel('Number of Names') plt.title('Number of Nicole in the history of the US') plt.xlim(1880,2016); # In[13]: #How many baby named Numan in the history Numan = df[df['Name'] == 'Numan'] Numan = Numan['Count'].sum() Numan # In[14]: df[df['Name'] == 'Numan'].groupby('Year').sum().plot(kind='bar', figsize = (10,5)) plt.xlabel('Year') plt.ylabel('Number of Names') plt.title('Number of Numans in the history of the US'); # In[15]: #How many baby named Esma in the history Esma = df[df['Name'] == 'Esma'] Esma = Esma['Count'].sum() Esma # In[16]: df.query('Name=="Esma"')[['Year', 'Count']].groupby('Year').sum().plot(grid= True, figsize = (10,5)) plt.xlabel('Year') plt.ylabel('Number of Names') plt.title('Number of Esma in the history of the US') plt.xlim(1880,2016); # In[ ]: