import pandas as pd import numpy as np import matplotlib.pyplot as plt import folium as fm import geopy from IPython.display import HTML from sklearn import linear_model, naive_bayes, feature_selection, metrics, tree from sklearn.cross_validation import train_test_split %matplotlib inline tips_adj_df = pd.read_pickle('./dumps/tips_complete_features.pkl') zipcode_dummies = pd.get_dummies(tips_adj_df['ZIPCODE'].astype(int)) tips_adj_df = tips_adj_df.join(zipcode_dummies) zip_score_list_A = [] for zipcode in zipcode_dummies.columns.values: tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1] X_adjs = tip_zip_subset.ix[:, 34:289] y = tip_zip_subset['grade_A'] clf = naive_bayes.MultinomialNB() clf = clf.fit(X_adjs, y) score = clf.score(X_adjs, y) zip_score_list_A.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)}) tip_by_zip_df_A = pd.DataFrame(zip_score_list_A) map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner') map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson', data=tip_by_zip_df_A, columns=['zipcode', 'score'], key_on='feature.properties.postalCode', threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99], fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5) map.create_map(path='foursquare_zips_all_A.html') HTML('') tip_threshold_by_zip_A_df = tip_by_zip_df_A[tip_by_zip_df_A['tip_count'] >= 50] map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner') map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson', data=tip_threshold_by_zip_A_df, columns=['zipcode', 'score'], key_on='feature.properties.postalCode', threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99], fill_color='RdPu', fill_opacity=0.65, line_opacity=0.5) map.create_map(path='foursquare_zips_threshold_A.html') HTML('') zip_score_list_C = [] for zipcode in zipcode_dummies.columns.values: tip_zip_subset = tips_adj_df[tips_adj_df[zipcode] == 1] X_adjs = tip_zip_subset.ix[:, 34:289] y = tip_zip_subset['grade_C'] clf = naive_bayes.MultinomialNB() clf = clf.fit(X_adjs, y) score = clf.score(X_adjs, y) zip_score_list_C.append({'zipcode': zipcode, 'score': score, 'tip_count': len(tip_zip_subset)}) tip_by_zip_C_df = pd.DataFrame(zip_score_list_C) map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner') map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson', data=tip_by_zip_C_df, columns=['zipcode', 'score'], key_on='feature.properties.postalCode', threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99], fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5) map.create_map(path='foursquare_zips_all_C.html') HTML('') tip_threshold_by_zip_C_df = tip_by_zip_C_df[tip_by_zip_C_df['tip_count'] >= 50] map = fm.Map(location=[40.714623, -74.006605], zoom_start=11, tiles='Stamen Toner') map.geo_json(geo_path='./nyc-zip-code-tabulation-areas-polygons.geojson', data=tip_threshold_by_zip_C_df, columns=['zipcode', 'score'], key_on='feature.properties.postalCode', threshold_scale=[0.50, 0.8, 0.85, 0.90, 0.95, 0.99], fill_color='BuPu', fill_opacity=0.65, line_opacity=0.5) map.create_map(path='foursquare_zips_threshold_C.html') HTML('')