import pandas as pd pd.set_option('display.float_format', lambda x: '%.2f' % x) %matplotlib inline import matplotlib.pyplot as plt import seaborn as sns colpal = sns.color_palette("hls", 7) sns.set(palette=colpal, style='ticks', rc={"figure.figsize":(7.75,5),'savefig.dpi':150}) import folium from IPython.display import HTML # read data in df = pd.read_csv('data/listings.csv',index_col='id') print('Number of records:',df.shape[0]) print('Number of columns:',df.shape[1]) print(', '.join(df.columns)) #see the columns starting with review_score... # pre-process the data revcols = 'host_id host_listings_count number_of_reviews review_scores_rating review_scores_accuracy review_scores_cleanliness review_scores_checkin review_scores_communication review_scores_location review_scores_value'.split('\t') df = df.dropna(subset=revcols).sort('review_scores_rating',ascending=False) df = df.rename(columns=dict(zip(revcols[3:],[c.split('_')[-1] for c in revcols[3:]]))) df = df.rename(columns={'neighbourhood_cleansed':'neighborhood'}) revcols = [c.split('_')[-1] for c in revcols[4:]] # number of listings per neighborhood grouped = pd.DataFrame(df.pivot_table(index='neighborhood',values='rating',aggfunc='count')).sort('rating',ascending=False).reset_index() grouped # Appendix A # number of listings per neighborhood mapname= 'neighborhoods' m = folium.Map(location=[38.92,-77.03], zoom_start=12, tiles='OpenStreetMap') m.geo_json(geo_path='neighbourhoods.geojson', data=grouped, data_out=mapname+'.json', columns=['neighborhood','rating'], key_on='feature.properties.neighbourhood', fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2, legend_name='AirBnB listings per neighborhood') m.create_map(path=mapname+'.html') HTML('') # describe the data dd = (df[revcols].sum()/len(df)).order(ascending=False) ax = sns.barplot(y=dd.index,x=dd.values) #ax = dd.order().plot(kind='barh',color=sns.color_palette('hls'),figsize=(5,3)) ax.set(xlim=(9,9.81),xlabel='Average Review Score',title='AirBnB Listings in Washington, D.C.'); sns.despine(trim=True) plt.gcf().savefig('docs/review_scores.png',bbox_inches='tight') df.shape #export related fields to a csv file df[revcols] = df[revcols].astype(int) df[['host_id']+revcols+['number_of_reviews','listing_url','name','neighborhood','room_type','description','latitude','longitude']].to_csv('data/reviewed_listings.csv',encoding='utf8') # Ratings (stars)... # # onclick... function change() { var query = document.getElementById('myTextField').value; var cleanliness = $('#cleanliness').val()*2; var accuracy = $('#accuracy').val()*2; var communication = $('#communication').val()*2; var checkin = $('#checkin').val()*2; var loc = $('#loc').val()*2; var val = $('#val').val()*2; console.log(cleanliness); if (query.length == 0) { alert('Nothing to search for...'); return; } group.removeLayer(geojsonlayer); viewparam = { viewparams: "word:" + query + ";cleanliness:" + cleanliness + ";accuracy:" + accuracy + ";communication:" + communication + ";checkin:" + checkin + ";loc:" + loc + ";val:" + val }; parameters = L.Util.extend(defaultParameters, viewparam); URL = owsrootUrl + L.Util.getParamString(parameters); console.log("Geoserver URL: " + URL); queryWFS(URL); } # and the demo... HTML('')