from datascience import *
import numpy as np
stations = Table.read_table('station.csv').relabel("name","labels")
stations
Marker.map_table(stations.select('lat', 'long', 'labels'))
#Change size or color of circles with "radius=" or "color="
Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')
trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')
starts = trip.group('Start').sort('count', descending=True)
starts
station_starts = stations.join('labels', starts, 'Start')
station_starts
landmarks = stations.group('landmark')
landmarks
landmarks = landmarks.with_column(
'colors',
make_array('blue', 'red', 'yellow', 'orange', 'purple')
)
landmarks
station_starts = station_starts.join('landmark', landmarks.drop('count'))
station_starts
station_starts = station_starts.with_column(
'areas',
station_starts.column('count')/10
)
station_starts
Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))
drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
['Milk Tea', 'Tea One', 4],
['Espresso', 'Nefeli', 2],
['Coffee', 'Nefeli', 3],
['Espresso', "Abe's", 2]
])
drinks
discounts = Table().with_columns(
'Coupon % off', make_array(5, 50, 25),
'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts
#Discussion question: Generate a table with one row per cafe that
#has the name and discounted price of its cheapest discounted drink
# Link (join) drinks with discounts
combined = drinks.join('Cafe', discounts, 'Location')
# Compute discounted prices
discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)
discounted_drinks = combined.with_column(
'Discounted price', discounted_prices
)
discounted_drinks
# Sort
#Correct, Espresso is cheaper
discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True)
#Incorrect - need to sort by "Discounted price" first
discounted_drinks.sort('Cafe', distinct=True)
#Incorrect, Coffee is first alphabetically
discounted_drinks.group('Cafe', min)
Challenge yourself and try to solve these on your own before looking at the solutions!
trip0 = Table.read_table("trip.csv")
trip = Table().with_columns(
"Start", trip0.column("Start Station"),
"End", trip0.column("End Station"),
"Duration", trip0.column("Duration"))
trip.show(3)
# The name of the station where the most rentals ended
#(assume no ties).
# The number of stations for which the average duration ending
# at that station was more than 10 minutes.
# The number of stations that have more than 500 starts
# AND more than 500 ends
# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)
# The number of stations for which the average duration ending
# at that station was more than 10 minutes.
# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows
# The number of stations that have more than 500 starts
# AND more than 500 ends
# First, find the start counts
starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')
# Then, find the end counts
ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')
# Combine them with join
starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows
3 > 1
type(3 > 1)
3 < 1
True
3 == 3
3 = 3
x = 14
y = 3
x > 10
12 < x < 18
12 < x
x < 18
12 < x-y < 18
x > 10 and y > 5
pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets
pets == 'dog'
0 + 1 + 0 + 0 + 1 + 0
sum(make_array(False, True, False, False, True, False))
sum(pets == 'dog')
np.count_nonzero(pets == 'dog')
pets > 'cat'
sum(pets > 'cat')
"cat" < "catastrophe"
where
¶terms = Table().with_column('Semester', np.arange(1, 9))
terms
terms.where('Semester', are.above(6))
is_senior = are.above(6)
is_senior(4)
def also_is_senior(x):
return x > 6
also_is_senior(5)
terms.apply(also_is_senior, 'Semester')
terms.where('Semester', are.above(6))
terms.where('Semester', is_senior)
terms.where('Semester', also_is_senior)
terms.where(terms.apply(also_is_senior, 'Semester'))