#!/usr/bin/env python # coding: utf-8 # In[ ]: from datascience import * import numpy as np # ## Maps # In[ ]: stations = Table.read_table('station.csv').relabel("name","labels") stations # In[ ]: Marker.map_table(stations.select('lat', 'long', 'labels')) # In[ ]: #Change size or color of circles with "radius=" or "color=" Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green') # In[ ]: trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End') # In[ ]: starts = trip.group('Start').sort('count', descending=True) starts # In[ ]: station_starts = stations.join('labels', starts, 'Start') station_starts # In[ ]: landmarks = stations.group('landmark') landmarks # In[ ]: landmarks = landmarks.with_column( 'colors', make_array('blue', 'red', 'yellow', 'orange', 'purple') ) landmarks # In[ ]: station_starts = station_starts.join('landmark', landmarks.drop('count')) station_starts # In[ ]: station_starts = station_starts.with_column( 'areas', station_starts.column('count')/10 ) station_starts # In[ ]: Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas')) # ## Table examples # In[ ]: drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([ ['Milk Tea', 'Tea One', 4], ['Espresso', 'Nefeli', 2], ['Coffee', 'Nefeli', 3], ['Espresso', "Abe's", 2] ]) drinks # In[ ]: discounts = Table().with_columns( 'Coupon % off', make_array(5, 50, 25), 'Location', make_array('Tea One', 'Nefeli', 'Tea One') ) discounts # In[ ]: #Discussion question: Generate a table with one row per cafe that #has the name and discounted price of its cheapest discounted drink # Link (join) drinks with discounts combined = drinks.join('Cafe', discounts, 'Location') # Compute discounted prices discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100) discounted_drinks = combined.with_column( 'Discounted price', discounted_prices ) discounted_drinks # Sort # In[ ]: #Correct, Espresso is cheaper discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) # In[ ]: #Incorrect - need to sort by "Discounted price" first discounted_drinks.sort('Cafe', distinct=True) # In[ ]: #Incorrect, Coffee is first alphabetically discounted_drinks.group('Cafe', min) # ## Spring 2016 Midterm, Question 2(b) # Challenge yourself and try to solve these on your own before looking at the solutions! # In[ ]: trip0 = Table.read_table("trip.csv") trip = Table().with_columns( "Start", trip0.column("Start Station"), "End", trip0.column("End Station"), "Duration", trip0.column("Duration")) trip.show(3) # In[ ]: # The name of the station where the most rentals ended #(assume no ties). # In[ ]: # In[ ]: # The number of stations for which the average duration ending # at that station was more than 10 minutes. # In[ ]: # In[ ]: # The number of stations that have more than 500 starts # AND more than 500 ends # In[ ]: # In[ ]: # In[ ]: # In[ ]: # The name of the station where the most rentals ended (assume no ties). # First, find end counts # Then, find the station with the highest end count trip.group('End').sort('count', descending=True).column(0).item(0) # In[ ]: # The number of stations for which the average duration ending # at that station was more than 10 minutes. # First, find the average end time for each station # Then, keep the ones above 10 minutes # Then, count them trip.group('End', np.average).where(2, are.above(10*60)).num_rows # In[ ]: # The number of stations that have more than 500 starts # AND more than 500 ends # First, find the start counts starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station') # Then, find the end counts ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station') # Combine them with join starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows # In[ ]: # ## Comparison ## # In[ ]: 3 > 1 # In[ ]: type(3 > 1) # In[ ]: 3 < 1 # In[ ]: True # In[ ]: 3 == 3 # In[ ]: 3 = 3 # In[ ]: x = 14 y = 3 # In[ ]: x > 10 # In[ ]: 12 < x < 18 # In[ ]: 12 < x # In[ ]: x < 18 # In[ ]: 12 < x-y < 18 # In[ ]: x > 10 and y > 5 # In[ ]: # ## Comparisons with arrays # In[ ]: pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit') pets # In[ ]: pets == 'dog' # In[ ]: 0 + 1 + 0 + 0 + 1 + 0 # In[ ]: sum(make_array(False, True, False, False, True, False)) # In[ ]: sum(pets == 'dog') # In[ ]: np.count_nonzero(pets == 'dog') # In[ ]: pets > 'cat' # In[ ]: sum(pets > 'cat') # In[ ]: # In[ ]: "cat" < "catastrophe" # ## Predicates and advanced `where` # In[ ]: terms = Table().with_column('Semester', np.arange(1, 9)) terms # In[ ]: terms.where('Semester', are.above(6)) # In[ ]: is_senior = are.above(6) # In[ ]: is_senior(4) # In[ ]: def also_is_senior(x): return x > 6 # In[ ]: also_is_senior(5) # In[ ]: terms.apply(also_is_senior, 'Semester') # In[ ]: terms.where('Semester', are.above(6)) # In[ ]: terms.where('Semester', is_senior) # In[ ]: terms.where('Semester', also_is_senior) # In[ ]: terms.where(terms.apply(also_is_senior, 'Semester')) # In[ ]: