import pandas as pd
pd.set_option('display.float_format', lambda x: '%.2f' % x)
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
colpal = sns.color_palette("hls", 7)
sns.set(palette=colpal, style='ticks', rc={"figure.figsize":(7.75,5),'savefig.dpi':150})
import folium
from IPython.display import HTML
# read data in
df = pd.read_csv('data/listings.csv',index_col='id')
print('Number of records:',df.shape[0])
print('Number of columns:',df.shape[1])
print(', '.join(df.columns)) #see the columns starting with review_score...
# pre-process the data
revcols = 'host_id host_listings_count number_of_reviews review_scores_rating review_scores_accuracy review_scores_cleanliness review_scores_checkin review_scores_communication review_scores_location review_scores_value'.split('\t')
df = df.dropna(subset=revcols).sort('review_scores_rating',ascending=False)
df = df.rename(columns=dict(zip(revcols[3:],[c.split('_')[-1] for c in revcols[3:]])))
df = df.rename(columns={'neighbourhood_cleansed':'neighborhood'})
revcols = [c.split('_')[-1] for c in revcols[4:]]
# number of listings per neighborhood
grouped = pd.DataFrame(df.pivot_table(index='neighborhood',values='rating',aggfunc='count')).sort('rating',ascending=False).reset_index()
grouped # Appendix A
# number of listings per neighborhood
mapname= 'neighborhoods'
m = folium.Map(location=[38.92,-77.03], zoom_start=12, tiles='OpenStreetMap')
m.geo_json(geo_path='neighbourhoods.geojson', data=grouped, data_out=mapname+'.json',
columns=['neighborhood','rating'],
key_on='feature.properties.neighbourhood',
fill_color='PuRd', fill_opacity=0.7, line_opacity=0.2,
legend_name='AirBnB listings per neighborhood')
m.create_map(path=mapname+'.html')
HTML('')
# describe the data
dd = (df[revcols].sum()/len(df)).order(ascending=False)
ax = sns.barplot(y=dd.index,x=dd.values)
#ax = dd.order().plot(kind='barh',color=sns.color_palette('hls'),figsize=(5,3))
ax.set(xlim=(9,9.81),xlabel='Average Review Score',title='AirBnB Listings in Washington, D.C.');
sns.despine(trim=True)
plt.gcf().savefig('docs/review_scores.png',bbox_inches='tight')
df.shape
#export related fields to a csv file
df[revcols] = df[revcols].astype(int)
df[['host_id']+revcols+['number_of_reviews','listing_url','name','neighborhood','room_type','description','latitude','longitude']].to_csv('data/reviewed_listings.csv',encoding='utf8')
# Ratings (stars)...
#
# onclick...
function change() {
var query = document.getElementById('myTextField').value;
var cleanliness = $('#cleanliness').val()*2;
var accuracy = $('#accuracy').val()*2;
var communication = $('#communication').val()*2;
var checkin = $('#checkin').val()*2;
var loc = $('#loc').val()*2;
var val = $('#val').val()*2;
console.log(cleanliness);
if (query.length == 0) {
alert('Nothing to search for...');
return;
}
group.removeLayer(geojsonlayer);
viewparam = {
viewparams: "word:" + query +
";cleanliness:" + cleanliness +
";accuracy:" + accuracy +
";communication:" + communication +
";checkin:" + checkin +
";loc:" + loc +
";val:" + val
};
parameters = L.Util.extend(defaultParameters, viewparam);
URL = owsrootUrl + L.Util.getParamString(parameters);
console.log("Geoserver URL: " + URL);
queryWFS(URL);
}
# and the demo...
HTML('')