In [ ]:

from datascience import *
import numpy as np

Maps¶

In [ ]:

stations = Table.read_table('station.csv').relabel("name","labels")
stations

In [ ]:

Marker.map_table(stations.select('lat', 'long', 'labels'))

In [ ]:

#Change size or color of circles with "radius=" or "color="
Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')

In [ ]:

trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')

In [ ]:

starts = trip.group('Start').sort('count', descending=True)
starts

In [ ]:

station_starts = stations.join('labels', starts, 'Start')
station_starts

In [ ]:

landmarks = stations.group('landmark')
landmarks

In [ ]:

landmarks = landmarks.with_column(
    'colors',
    make_array('blue', 'red', 'yellow', 'orange', 'purple')
)
landmarks

In [ ]:

station_starts = station_starts.join('landmark', landmarks.drop('count'))
station_starts

In [ ]:

station_starts = station_starts.with_column(
    'areas',
    station_starts.column('count')/10
)
station_starts

In [ ]:

Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))

Table examples¶

In [ ]:

drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
    ['Milk Tea', 'Tea One', 4],
    ['Espresso', 'Nefeli',  2],
    ['Coffee',   'Nefeli',  3],
    ['Espresso', "Abe's",   2]
])
drinks

In [ ]:

discounts = Table().with_columns(
    'Coupon % off', make_array(5, 50, 25),
    'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts

In [ ]:

#Discussion question:  Generate a table with one row per cafe that 
#has the name and discounted price of its cheapest discounted drink
# Link (join) drinks with discounts
combined = drinks.join('Cafe', discounts, 'Location')

# Compute discounted prices
discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)

discounted_drinks = combined.with_column(
    'Discounted price', discounted_prices
)
discounted_drinks
# Sort

In [ ]:

#Correct, Espresso is cheaper
discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) 

In [ ]:

#Incorrect - need to sort by "Discounted price" first
discounted_drinks.sort('Cafe', distinct=True) 

In [ ]:

#Incorrect, Coffee is first alphabetically
discounted_drinks.group('Cafe', min) 

Spring 2016 Midterm, Question 2(b)¶

Challenge yourself and try to solve these on your own before looking at the solutions!

In [ ]:

trip0 = Table.read_table("trip.csv")
trip = Table().with_columns(
"Start", trip0.column("Start Station"),
"End", trip0.column("End Station"),
"Duration", trip0.column("Duration"))
trip.show(3)

In [ ]:

# The name of the station where the most rentals ended 
#(assume no ties).

In [ ]:

# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

In [ ]:

# The number of stations that have more than 500 starts 
# AND more than 500 ends

In [ ]:

# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)

In [ ]:

# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows

In [ ]:

# The number of stations that have more than 500 starts 
# AND more than 500 ends
# First, find the start counts
starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')
# Then, find the end counts
ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')
# Combine them with join
starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows

In [ ]:

Comparison¶

In [ ]:

3 > 1

In [ ]:

type(3 > 1)

In [ ]:

3 < 1

In [ ]:

True

In [ ]:

3 == 3

In [ ]:

3 = 3

In [ ]:

x = 14
y = 3

In [ ]:

x > 10

In [ ]:

12 < x < 18

In [ ]:

12 < x

In [ ]:

x < 18

In [ ]:

12 < x-y < 18

In [ ]:

x > 10 and y > 5

In [ ]:

Comparisons with arrays¶

In [ ]:

pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets

In [ ]:

pets == 'dog'

In [ ]:

0 + 1 + 0 + 0 + 1 + 0

In [ ]:

sum(make_array(False, True, False, False, True, False))

In [ ]:

sum(pets == 'dog')

In [ ]:

np.count_nonzero(pets == 'dog')

In [ ]:

pets > 'cat'

In [ ]:

sum(pets > 'cat')

In [ ]:

"cat" < "catastrophe"

Predicates and advanced `where`¶

In [ ]:

terms = Table().with_column('Semester', np.arange(1, 9))
terms

In [ ]:

terms.where('Semester', are.above(6))

In [ ]:

is_senior = are.above(6)

In [ ]:

is_senior(4)

In [ ]:

def also_is_senior(x):
    return x > 6

In [ ]:

also_is_senior(5)

In [ ]:

terms.apply(also_is_senior, 'Semester')

In [ ]:

terms.where('Semester', are.above(6))

In [ ]:

terms.where('Semester', is_senior)

In [ ]:

terms.where('Semester', also_is_senior)

In [ ]:

terms.where(terms.apply(also_is_senior, 'Semester'))

In [ ]:

Maps¶

Table examples¶

Spring 2016 Midterm, Question 2(b)¶

Comparison¶

Comparisons with arrays¶

Predicates and advanced where¶

Predicates and advanced `where`¶