#!/usr/bin/env python
# coding: utf-8

# # Exercise 02.1
# 
# Create a function that receives two inputs a and b, and returns the product of the a decimal of pi and the b decimal of pi.
# 
# ```
# i.e, 
# pi = 3.14159
# if a = 2 and b = 4
# result = 4 * 5
# result = 20
# ```
# 
# Caveats:
# - a and b are between 1 and 15
# - decimals positions 1 and 2 are 1 and 4, respectively. (remember that python start indexing in 0)

# In[ ]:


from math import pi
def mult_dec_pi(a, b):
    
    # Add the solution here
    
    result = ''
    return result


# In[ ]:


mult_dec_pi(a=2, b=4)
# 20.0


# In[ ]:


mult_dec_pi(a=5, b=10)
# 45.0


# In[ ]:


mult_dec_pi(a=14, b=1)
# 9.0


# In[ ]:


mult_dec_pi(a=6, b=8)
# 10.0


# In[ ]:


# Bonus
mult_dec_pi(a=16, b=4)
# 'Error'


# # Exercise 02.2
# 
# Using the given dataset. Estimate a linear regression between Employed and GNP.
# 
# $$Employed = b_0 + b_1 * GNP $$
# 
# $$\hat b = (X^TX)^{-1}X^TY$$
# $$Y = Employed$$
# $$X = [1  \quad GNP]$$

# In[13]:


get_ipython().run_line_magic('matplotlib', 'inline')
import numpy as np
import matplotlib.pyplot as plt
# Import data
raw_data = """
Year,Employed,GNP
1947,60.323,234.289
1948,61.122,259.426
1949,60.171,258.054
1950,61.187,284.599
1951,63.221,328.975
1952,63.639,346.999
1953,64.989,365.385
1954,63.761,363.112
1955,66.019,397.469
1956,67.857,419.18
1957,68.169,442.769
1958,66.513,444.546
1959,68.655,482.704
1960,69.564,502.601
1961,69.331,518.173
1962,70.551,554.894"""

data = []
for line in raw_data.splitlines()[2:]:
    words = line.split(',')
    data.append(words)
data = np.array(data, dtype=np.float)
n_obs = data.shape[0]
plt.plot(data[:, 2], data[:, 1], 'bo')
plt.xlabel("GNP")
plt.ylabel("Employed")


# # Exercise 02.3
# 
# Analyze the baby names dataset using pandas

# In[7]:


import pandas as pd
# Load dataset
import zipfile
with zipfile.ZipFile('../datasets/baby-names2.csv.zip', 'r') as z:
    f = z.open('baby-names2.csv')
    names = pd.io.parsers.read_table(f, sep=',')


# In[8]:


names.head()


# In[9]:


names[names.year == 1993].head()   


# ### segment the data into boy and girl names

# In[11]:


boys = names[names.sex == 'boy'].copy()    
girls = names[names.sex == 'girl'].copy()


# ### Analyzing the popularity of a name over time

# In[14]:


william = boys[boys['name']=='William']

plt.plot(range(william.shape[0]), william['prop'])
plt.xticks(range(william.shape[0])[::5], william['year'].values[::5], rotation='vertical')
plt.ylim([0, 0.1])
plt.show()


# In[15]:


Daniel = boys[boys['name']=='Daniel']

plt.plot(range(Daniel.shape[0]), Daniel['prop'])
plt.xticks(range(Daniel.shape[0])[::5], Daniel['year'].values[::5], rotation='vertical')
plt.ylim([0, 0.1])
plt.show()


# # Exercise 02.3
# 
# Which has been the most popular boy name every decade?

# In[ ]:


# # Exercise 02.4
# 
# Which has been the most popular girl name?
# 

# In[ ]:


# # Exercise 02.5
# 
# What is the most popular new girl name? (new is a name that appears only in the 2000's)

# In[ ]: