Lecture 6

In [ ]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

Arrays and lists

In [ ]:
streets = make_array('Bancroft', 'Durant', 'Channing', 'Haste')
streets
In [ ]:
southside = Table().with_columns(
    'Street', streets,
    'Blocks from Campus', np.arange(4)
)
southside
In [ ]:
type(southside.row(0))
In [ ]:
southside = southside.with_row(['Dwight', 4])
southside
In [ ]:
southside = southside.with_column('One-Way', ['Yes', 'Yes', 'No', 'Yes', 'Yes'])
southside
In [ ]:
southside.column('One-Way')

Discussion

In [ ]:
nba = Table.read_table('nba_salaries.csv')
nba
In [ ]:
nba = nba.relabeled(3, 'SALARY').drop('TEAM')
nba 
In [ ]:
#Create an array containing the names of all point guards (PG) who make more than $15M/year
nba.where(1, 'PG').where(2, are.above(15)).column(0)
In [ ]:
nba.where('POSITION', 'PG').where('SALARY', are.above(15)).column('PLAYER')
In [ ]:
nba.with_row(['Bernie', 'Mascot', 100])
nba.where('PLAYER', are.containing('Bern'))
In [ ]:
nba2 = nba.with_row(['Bernie', 'Mascot', 100])
nba2.where('PLAYER', are.containing('Bern'))

Census

In [ ]:
full = Table.read_table('nc-est2014-agesex-res.csv')
full
In [ ]:
partial = full.select('SEX', 'AGE', 'POPESTIMATE2010', 'POPESTIMATE2014')
partial
In [ ]:
simple = partial.relabeled('POPESTIMATE2010', '2010').relabeled('POPESTIMATE2014', '2014')
simple
In [ ]:
simple.sort('AGE', descending=True)
In [ ]:
no_999 = simple.where('AGE', are.below(999))
no_999.sort("AGE", descending=True)
In [ ]:
everyone = no_999.where('SEX', 0).drop('SEX')
males = no_999.where('SEX', 1).drop('SEX')
females = no_999.where('SEX', 2).drop('SEX')
In [ ]:
females
In [ ]:
females.sort('2014', descending=True)
In [ ]:
males.sort('2014', descending=True)
In [ ]:
pop_2014 = Table().with_column(
    'Age', males.column('AGE'),
    'Males', males.column('2014'),
    'Females', females.column('2014')
)
In [ ]:
pop_2014
In [ ]:
percent_females = 100 *pop_2014.column('Females')/(pop_2014.column('Males') + pop_2014.column('Females'))
counts_and_percents = pop_2014.with_column('Percent Female', percent_females)
In [ ]:
counts_and_percents
In [ ]:
 

Line Graphs

In [ ]:
counts_and_percents.plot('Age', 'Percent Female')
In [ ]:
pop_2014
In [ ]:
pop_2014.plot('Age')
In [ ]:
pop_2014.where('Age', are.between(65, 75))
In [ ]:
2014 - np.arange(67, 73)
In [ ]:
everyone
In [ ]:
everyone = everyone.with_column(
    'Change', everyone.column('2014') - everyone.column('2010')
)
In [ ]:
everyone.sort('Change', descending=True)
In [ ]:
everyone.with_column(
    'Growth Rate', (everyone.column('2014')/everyone.column('2010')) ** (1/4) - 1
).sort('Growth Rate', descending=True)
In [ ]: