import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import folium as fm
import geopy
from IPython.display import HTML
from sklearn import linear_model, naive_bayes, feature_selection, metrics, tree
from sklearn.cross_validation import train_test_split
%matplotlib inline
tips_adj_df = pd.read_pickle('./dumps/tips_complete_features.pkl')
zipcode_dummies = pd.get_dummies(tips_adj_df['ZIPCODE'].astype(int))
tips_adj_df = tips_adj_df.join(zipcode_dummies)
zip_score_list_A = []
for zipcode in zipcode_dummies.columns.values:
tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]
X_adjs = tip_zip_subset.ix[:, 34:289]
y = tip_zip_subset['grade_A']
clf = naive_bayes.MultinomialNB()
clf = clf.fit(X_adjs, y)
score = clf.score(X_adjs, y)
zip_score_list_A.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})
tip_by_zip_df_A = pd.DataFrame(zip_score_list_A)
map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
data=tip_by_zip_df_A, columns=['zipcode', 'score'],
key_on='feature.properties.postalCode',
threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_all_A.html')
HTML('')
tip_threshold_by_zip_A_df = tip_by_zip_df_A[tip_by_zip_df_A['tip_count'] >= 50]
map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
data=tip_threshold_by_zip_A_df, columns=['zipcode', 'score'],
key_on='feature.properties.postalCode',
threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_threshold_A.html')
HTML('')
zip_score_list_C = []
for zipcode in zipcode_dummies.columns.values:
tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1]
X_adjs = tip_zip_subset.ix[:, 34:289]
y = tip_zip_subset['grade_C']
clf = naive_bayes.MultinomialNB()
clf = clf.fit(X_adjs, y)
score = clf.score(X_adjs, y)
zip_score_list_C.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)})
tip_by_zip_C_df = pd.DataFrame(zip_score_list_C)
map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
data=tip_by_zip_C_df, columns=['zipcode', 'score'],
key_on='feature.properties.postalCode',
threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_all_C.html')
HTML('')
tip_threshold_by_zip_C_df = tip_by_zip_C_df[tip_by_zip_C_df['tip_count'] >= 50]
map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner')
map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson',
data=tip_threshold_by_zip_C_df, columns=['zipcode', 'score'],
key_on='feature.properties.postalCode',
threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99],
fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5)
map.create_map(path='foursquare_zips_threshold_C.html')
HTML('')