from datascience import *
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')
simple_list = ['hello', 7, 3.14, True]
simple_list
my_array = make_array(1, 2, 3)
crowded_list = [my_array, 'what is going on', -10]
crowded_list
Table().with_columns('Numbers', [1, 2, 3])
drinks = Table(['Drink', 'Cafe', 'Price'])
drinks
drinks = drinks.with_rows([
['Milk Tea', 'Asha', 5.5],
['Espresso', 'Strada', 1.75],
['Latte', 'Strada', 3.25],
['Espresso', "FSM", 2]
])
drinks
survey = Table.read_table('welcome_survey_v4.csv')
survey.show(3)
survey.group('Sleep position').show()
survey.group('Sleep position', np.average)
(survey.select('Sleep position', 'Hours of sleep').group('Sleep position', np.average)
survey.group(['Handedness','Sleep position']).show()
survey.pivot('Sleep position', 'Handedness')
survey.pivot('Sleep position', 'Handedness', 'Hours of sleep', np.average)
(survey.select('Handedness', 'Sleep position', 'Hours of sleep')
.group(['Handedness','Sleep position'], np.average)).show()
# Here, pivot doesn't know how to combine all the hours of sleep
# for each subgroup of students
survey.pivot('Sleep position', 'Handedness', 'Hours of sleep')
sky = Table.read_table('skyscrapers_v2.csv')
sky = (sky.with_column('age', 2020 - sky.column('completed'))
.drop('completed'))
sky.show(3)
# 1. For each city, what’s the tallest building for each material?
# 2. For each city, what’s the height difference between the tallest
# steel building and the tallest concrete building?
Don't read ahead until you try the challenge questions yourself first!
sky.select('material', 'city', 'height').group(['city', 'material'], max)
sky_p = sky.pivot('material', 'city', 'height', max)
sky_p.show()
sky_p = sky_p.with_column(
'difference',
abs(sky_p.column('steel') - sky_p.column('concrete'))
)
sky_p
sky_p.sort('difference', True)
# Generate a table of the names of the oldest buildings for each
# material for each city:
drinks
discounts = Table().with_columns(
'Coupon % off', make_array(10, 25, 5),
'Location', make_array('Asha', 'Strada', 'Asha')
)
discounts
combined = drinks.join('Cafe', discounts, 'Location')
combined
discounted_frac = 1 - combined.column('Coupon % off') / 100
combined.with_column(
'Discounted Price',
combined.column('Price') * discounted_frac
)
drinks.join('Cafe', drinks, 'Cafe')