In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import folium as fm
import geopy

from IPython.display import HTML

from sklearn import linear_model, naive_bayes, feature_selection, metrics, tree
from sklearn.cross_validation import train_test_split

%matplotlib inline
In [34]:
tips_adj_df = pd.read_pickle('./dumps/tips_complete_features.pkl')
zipcode_dummies = pd.get_dummies(tips_adj_df['ZIPCODE'].astype(int))
tips_adj_df = tips_adj_df.join(zipcode_dummies)

Predicting Grade "A" Restaurants by zipcode

In [35]:
zip_score_list_A = []

for zipcode in zipcode_dummies.columns.values:
    tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]
    
    X_adjs = tip_zip_subset.ix[:, 34:289]
    y = tip_zip_subset['grade_A']
    
    clf = naive_bayes.MultinomialNB()
    clf = clf.fit(X_adjs, y)
    score = clf.score(X_adjs, y)
    
    zip_score_list_A.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})
In [36]:
tip_by_zip_df_A = pd.DataFrame(zip_score_list_A)
In [37]:
map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
             data=tip_by_zip_df_A, columns=['zipcode', 'score'],
             key_on='feature.properties.postalCode',
             threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
             fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_all_A.html')
In [38]:
HTML('<iframe src=foursquare_zips_all_A.html width=1000 height = 500></iframe>')
Out[38]:
In [39]:
tip_threshold_by_zip_A_df = tip_by_zip_df_A[tip_by_zip_df_A['tip_count'] >= 50]

map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
             data=tip_threshold_by_zip_A_df, columns=['zipcode', 'score'],
             key_on='feature.properties.postalCode',
             threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
             fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_threshold_A.html')

HTML('<iframe src=foursquare_zips_threshold_A.html width=1000 height = 500></iframe>')
Out[39]:

Predicting Grade "C" Restaurants by zipcode

In [40]:
zip_score_list_C = []

for zipcode in zipcode_dummies.columns.values:
    tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]
    
    X_adjs = tip_zip_subset.ix[:, 34:289]
    y = tip_zip_subset['grade_C']
    
    clf = naive_bayes.MultinomialNB()
    clf = clf.fit(X_adjs, y)
    score = clf.score(X_adjs, y)
    
    zip_score_list_C.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})
In [41]:
tip_by_zip_C_df = pd.DataFrame(zip_score_list_C)

map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
             data=tip_by_zip_C_df, columns=['zipcode', 'score'],
             key_on='feature.properties.postalCode',
             threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
             fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_all_C.html')

HTML('<iframe src=foursquare_zips_all_C.html width=1000 height = 500></iframe>')
Out[41]:
In [42]:
tip_threshold_by_zip_C_df = tip_by_zip_C_df[tip_by_zip_C_df['tip_count'] >= 50]

map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
             data=tip_threshold_by_zip_C_df, columns=['zipcode', 'score'],
             key_on='feature.properties.postalCode',
             threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
             fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_threshold_C.html')

HTML('<iframe src=foursquare_zips_threshold_C.html width=1000 height = 500></iframe>')
Out[42]: