#!/usr/bin/env python # coding: utf-8 # # Exercise 02.1 # # Create a function that receives two inputs a and b, and returns the product of the a decimal of pi and the b decimal of pi. # # ``` # i.e, # pi = 3.14159 # if a = 2 and b = 4 # result = 4 * 5 # result = 20 # ``` # # Caveats: # - a and b are between 1 and 15 # - decimals positions 1 and 2 are 1 and 4, respectively. (remember that python start indexing in 0) # In[ ]: from math import pi def mult_dec_pi(a, b): # Add the solution here result = '' return result # In[ ]: mult_dec_pi(a=2, b=4) # 20.0 # In[ ]: mult_dec_pi(a=5, b=10) # 45.0 # In[ ]: mult_dec_pi(a=14, b=1) # 9.0 # In[ ]: mult_dec_pi(a=6, b=8) # 10.0 # In[ ]: # Bonus mult_dec_pi(a=16, b=4) # 'Error' # # Exercise 02.2 # # Using the given dataset. Estimate a linear regression between Employed and GNP. # # $$Employed = b_0 + b_1 * GNP $$ # # $$\hat b = (X^TX)^{-1}X^TY$$ # $$Y = Employed$$ # $$X = [1 \quad GNP]$$ # In[13]: get_ipython().run_line_magic('matplotlib', 'inline') import numpy as np import matplotlib.pyplot as plt # Import data raw_data = """ Year,Employed,GNP 1947,60.323,234.289 1948,61.122,259.426 1949,60.171,258.054 1950,61.187,284.599 1951,63.221,328.975 1952,63.639,346.999 1953,64.989,365.385 1954,63.761,363.112 1955,66.019,397.469 1956,67.857,419.18 1957,68.169,442.769 1958,66.513,444.546 1959,68.655,482.704 1960,69.564,502.601 1961,69.331,518.173 1962,70.551,554.894""" data = [] for line in raw_data.splitlines()[2:]: words = line.split(',') data.append(words) data = np.array(data, dtype=np.float) n_obs = data.shape[0] plt.plot(data[:, 2], data[:, 1], 'bo') plt.xlabel("GNP") plt.ylabel("Employed") # # Exercise 02.3 # # Analyze the baby names dataset using pandas # In[7]: import pandas as pd # Load dataset import zipfile with zipfile.ZipFile('../datasets/baby-names2.csv.zip', 'r') as z: f = z.open('baby-names2.csv') names = pd.io.parsers.read_table(f, sep=',') # In[8]: names.head() # In[9]: names[names.year == 1993].head() # ### segment the data into boy and girl names # In[11]: boys = names[names.sex == 'boy'].copy() girls = names[names.sex == 'girl'].copy() # ### Analyzing the popularity of a name over time # In[14]: william = boys[boys['name']=='William'] plt.plot(range(william.shape[0]), william['prop']) plt.xticks(range(william.shape[0])[::5], william['year'].values[::5], rotation='vertical') plt.ylim([0, 0.1]) plt.show() # In[15]: Daniel = boys[boys['name']=='Daniel'] plt.plot(range(Daniel.shape[0]), Daniel['prop']) plt.xticks(range(Daniel.shape[0])[::5], Daniel['year'].values[::5], rotation='vertical') plt.ylim([0, 0.1]) plt.show() # # Exercise 02.3 # # Which has been the most popular boy name every decade? # In[ ]: # # Exercise 02.4 # # Which has been the most popular girl name? # # In[ ]: # # Exercise 02.5 # # What is the most popular new girl name? (new is a name that appears only in the 2000's) # In[ ]: