#!/usr/bin/env python
# coding: utf-8

# In[ ]:


from datascience import *
import numpy as np


# ## Maps

# In[ ]:


stations = Table.read_table('station.csv').relabel("name","labels")
stations


# In[ ]:


Marker.map_table(stations.select('lat', 'long', 'labels'))


# In[ ]:


#Change size or color of circles with "radius=" or "color="
Circle.map_table(stations.select('lat', 'long', 'labels'), radius=150, color='green')


# In[ ]:


trip = Table.read_table('trip.csv').where('Duration', are.below(1800)).select(3, 6, 1).relabeled(0, 'Start').relabeled(1, 'End')


# In[ ]:


starts = trip.group('Start').sort('count', descending=True)
starts


# In[ ]:


station_starts = stations.join('labels', starts, 'Start')
station_starts


# In[ ]:


landmarks = stations.group('landmark')
landmarks


# In[ ]:


landmarks = landmarks.with_column(
    'colors',
    make_array('blue', 'red', 'yellow', 'orange', 'purple')
)
landmarks


# In[ ]:


station_starts = station_starts.join('landmark', landmarks.drop('count'))
station_starts


# In[ ]:


station_starts = station_starts.with_column(
    'areas',
    station_starts.column('count')/10
)
station_starts


# In[ ]:


Circle.map_table(station_starts.select('lat', 'long', 'labels', 'colors', 'areas'))


# ## Table examples

# In[ ]:


drinks = Table(['Drink', 'Cafe', 'Price']).with_rows([
    ['Milk Tea', 'Tea One', 4],
    ['Espresso', 'Nefeli',  2],
    ['Coffee',   'Nefeli',  3],
    ['Espresso', "Abe's",   2]
])
drinks


# In[ ]:


discounts = Table().with_columns(
    'Coupon % off', make_array(5, 50, 25),
    'Location', make_array('Tea One', 'Nefeli', 'Tea One')
)
discounts


# In[ ]:


#Discussion question:  Generate a table with one row per cafe that 
#has the name and discounted price of its cheapest discounted drink
# Link (join) drinks with discounts
combined = drinks.join('Cafe', discounts, 'Location')

# Compute discounted prices
discounted_prices = combined.column('Price') * (1 - combined.column('Coupon % off')/100)

discounted_drinks = combined.with_column(
    'Discounted price', discounted_prices
)
discounted_drinks
# Sort


# In[ ]:


#Correct, Espresso is cheaper
discounted_drinks.sort('Discounted price').sort('Cafe', distinct=True) 


# In[ ]:


#Incorrect - need to sort by "Discounted price" first
discounted_drinks.sort('Cafe', distinct=True) 


# In[ ]:


#Incorrect, Coffee is first alphabetically
discounted_drinks.group('Cafe', min) 


# ## Spring 2016 Midterm, Question 2(b)

# Challenge yourself and try to solve these on your own before looking at the solutions!

# In[ ]:


trip0 = Table.read_table("trip.csv")
trip = Table().with_columns(
"Start", trip0.column("Start Station"),
"End", trip0.column("End Station"),
"Duration", trip0.column("Duration"))
trip.show(3)


# In[ ]:


# The name of the station where the most rentals ended 
#(assume no ties).


# In[ ]:


# In[ ]:


# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.


# In[ ]:


# In[ ]:


# The number of stations that have more than 500 starts 
# AND more than 500 ends


# In[ ]:


# In[ ]:


# In[ ]:


# In[ ]:


# The name of the station where the most rentals ended (assume no ties).
# First, find end counts
# Then, find the station with the highest end count
trip.group('End').sort('count', descending=True).column(0).item(0)


# In[ ]:


# The number of stations for which the average duration ending 
# at that station was more than 10 minutes.

# First, find the average end time for each station
# Then, keep the ones above 10 minutes
# Then, count them
trip.group('End', np.average).where(2, are.above(10*60)).num_rows


# In[ ]:


# The number of stations that have more than 500 starts 
# AND more than 500 ends
# First, find the start counts
starting = trip.group('Start').relabeled('count', 'Start count').relabeled('Start', 'Station')
# Then, find the end counts
ending = trip.group('End').relabeled('count', 'End count').relabeled('End', 'Station')
# Combine them with join
starting.join('Station', ending).where('Start count', are.above(500)).where('End count', are.above(500)).num_rows


# In[ ]:


# ## Comparison ##

# In[ ]:


3 > 1


# In[ ]:


type(3 > 1)


# In[ ]:


3 < 1


# In[ ]:


True


# In[ ]:


3 == 3


# In[ ]:


3 = 3


# In[ ]:


x = 14
y = 3


# In[ ]:


x > 10


# In[ ]:


12 < x < 18


# In[ ]:


12 < x


# In[ ]:


x < 18


# In[ ]:


12 < x-y < 18


# In[ ]:


x > 10 and y > 5


# In[ ]:


# ## Comparisons with arrays

# In[ ]:


pets = make_array('cat', 'dog', 'cat', 'cat', 'dog', 'rabbit')
pets


# In[ ]:


pets == 'dog'


# In[ ]:


0 + 1 + 0 + 0 + 1 + 0


# In[ ]:


sum(make_array(False, True, False, False, True, False))


# In[ ]:


sum(pets == 'dog')


# In[ ]:


np.count_nonzero(pets == 'dog')


# In[ ]:


pets > 'cat'


# In[ ]:


sum(pets > 'cat')


# In[ ]:


# In[ ]:


"cat" < "catastrophe"


# ## Predicates and advanced `where`

# In[ ]:


terms = Table().with_column('Semester', np.arange(1, 9))
terms


# In[ ]:


terms.where('Semester', are.above(6))


# In[ ]:


is_senior = are.above(6)


# In[ ]:


is_senior(4)


# In[ ]:


def also_is_senior(x):
    return x > 6


# In[ ]:


also_is_senior(5)


# In[ ]:


terms.apply(also_is_senior, 'Semester')


# In[ ]:


terms.where('Semester', are.above(6))


# In[ ]:


terms.where('Semester', is_senior)


# In[ ]:


terms.where('Semester', also_is_senior)


# In[ ]:


terms.where(terms.apply(also_is_senior, 'Semester'))


# In[ ]: