In [ ]:
from datascience import *
import numpy as np

Maps

In [ ]:
stations = Table.read_table('station.csv').relabel("name","labels")
stations
In [ ]:
Marker.map_table(stations.select('lat', 'long', 'labels'))
In [ ]:
#Change size or color of circles with "radius=" or "color="
Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')
In [ ]:
trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')
In [ ]:
starts = trip.group('Start').sort('count', descending=True)
starts
In [ ]:
station_starts = stations.join('labels', starts, 'Start')
station_starts
In [ ]:
landmarks = stations.group('landmark')
landmarks
In [ ]:
landmarks = landmarks.with_column(
    'colors',
    make_array('blue', 'red', 'yellow', 'orange', 'purple')
)
landmarks
In [ ]:
station_starts = station_starts.join('landmark', landmarks.drop('count'))
station_starts
In [ ]:
station_starts = station_starts.with_column(
    'areas',
    station_starts.column('count')/10
)
station_starts
In [ ]:
Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))

Table examples

In [ ]:
drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
    ['Milk Tea', 'Tea One', 4],
    ['Espresso', 'Nefeli',  2],
    ['Coffee',   'Nefeli',  3],
    ['Espresso', "Abe's",   2]
])
drinks
In [ ]:
discounts = Table().with_columns(
    'Coupon % off', make_array(5, 50, 25),
    'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts
In [ ]:
#Discussion question:  Generate a table with one row per cafe that 
#has the name and discounted price of its cheapest discounted drink
# Link (join) drinks with discounts
combined = drinks.join('Cafe', discounts, 'Location')

# Compute discounted prices
discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)

discounted_drinks = combined.with_column(
    'Discounted price', discounted_prices
)
discounted_drinks
# Sort
In [ ]:
#Correct, Espresso is cheaper
discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) 
In [ ]:
#Incorrect - need to sort by "Discounted price" first
discounted_drinks.sort('Cafe', distinct=True) 
In [ ]:
#Incorrect, Coffee is first alphabetically
discounted_drinks.group('Cafe', min) 

Spring 2016 Midterm, Question 2(b)

Challenge yourself and try to solve these on your own before looking at the solutions!

In [ ]:
trip0 = Table.read_table("trip.csv")
trip = Table().with_columns(
"Start", trip0.column("Start Station"),
"End", trip0.column("End Station"),
"Duration", trip0.column("Duration"))
trip.show(3)
In [ ]:
# The name of the station where the most rentals ended 
#(assume no ties).
In [ ]:
 
In [ ]:
# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.
In [ ]:
 
In [ ]:
# The number of stations that have more than 500 starts 
# AND more than 500 ends
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)
In [ ]:
# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows
In [ ]:
# The number of stations that have more than 500 starts 
# AND more than 500 ends
# First, find the start counts
starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')
# Then, find the end counts
ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')
# Combine them with join
starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows
In [ ]:
 

Comparison

In [ ]:
3 > 1
In [ ]:
type(3 > 1)
In [ ]:
3 < 1
In [ ]:
True
In [ ]:
3 == 3
In [ ]:
3 = 3
In [ ]:
x = 14
y = 3
In [ ]:
x > 10
In [ ]:
12 < x < 18
In [ ]:
12 < x
In [ ]:
x < 18
In [ ]:
12 < x-y < 18
In [ ]:
x > 10 and y > 5
In [ ]:
 

Comparisons with arrays

In [ ]:
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets
In [ ]:
pets == 'dog'
In [ ]:
0 + 1 + 0 + 0 + 1 + 0
In [ ]:
sum(make_array(False, True, False, False, True, False))
In [ ]:
sum(pets == 'dog')
In [ ]:
np.count_nonzero(pets == 'dog')
In [ ]:
pets > 'cat'
In [ ]:
sum(pets > 'cat')
In [ ]:
 
In [ ]:
"cat" < "catastrophe"

Predicates and advanced where

In [ ]:
terms = Table().with_column('Semester', np.arange(1, 9))
terms
In [ ]:
terms.where('Semester', are.above(6))
In [ ]:
is_senior = are.above(6)
In [ ]:
is_senior(4)
In [ ]:
def also_is_senior(x):
    return x > 6
In [ ]:
also_is_senior(5)
In [ ]:
terms.apply(also_is_senior, 'Semester')
In [ ]:
terms.where('Semester', are.above(6))
In [ ]:
terms.where('Semester', is_senior)
In [ ]:
terms.where('Semester', also_is_senior)
In [ ]:
terms.where(terms.apply(also_is_senior, 'Semester'))
In [ ]: