Get some lists from leboncoin.fr
import hashlib
import urllib
import unidecode
import numpy as np
from string import atof,atoi
from pprint import pprint
from bs4 import BeautifulSoup as BS
from datetime import datetime, timedelta
# ---------------------------------------
# get the 1st 100
base_url='https://www.leboncoin.fr/locations/offres/ile_de_france/paris/'
# ----------------------------------------
# a shortcut
urlget = urllib.urlopen
The default search criteria are for flats with size > 30 sqm, in Paris.
The function returns the Beautifulsoup
-ped HTML of a given page.
Each page contains a list of flats.
def get_page_from_url(pgnr,base_url):
CL_paris_apa_html="{0:s}?o={1:d}&sqs=2&ret=1&ret=2&ret=5".format(base_url,pgnr)
print CL_paris_apa_html
try:
xpage = urlget(CL_paris_apa_html)
CL_html = xpage.read()
encoding = xpage.info()['content-type'].split('=')[-1]
except IOError: # in case of network error
print 'IOError'
CL_html = ''
try:
return BS(CL_html.decode(encoding,'replace'),'lxml')
except UnboundLocalError:
return BS(CL_html.decode('latin-1','replace'),'lxml')
human_page=get_page_from_url(2,base_url)
https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=2&sqs=2&ret=1&ret=2&ret=5
From each page given by the previous function, we record the list of flats, their publication date, and pid.
def get_listings_from_html(human_page,base_url):
today = datetime.now()
yesterday = today - timedelta(days=1)
yesterday = yesterday.strftime('%d/%m/%y')
today = today.strftime('%d/%m/%y')
apartments = human_page.findAll('a',attrs={'class':'list_item'})
results=[]
for item in apartments:
try:
title=unidecode.unidecode(''.join(item.select('h2.item_title')[0].contents).strip())
except IndexError:
continue
insert_time = item.select('aside.item_absolute > p.item_supp')
if not len(insert_time):
continue
insert_time = insert_time[0].contents[-1].strip().replace(u"Aujourd'hui",today)
insert_time = insert_time.replace(u"Hier",yesterday)
url = 'http:'+item.attrs['href']
pid = url.split('/')[-1].split('.')[0]
results.append({'insert_time':insert_time,'pid':pid,'url':url,'title':title})
md5_input = title
results[-1].update({'md5sum':hashlib.md5(md5_input).hexdigest()})
return results
apas=get_listings_from_html(human_page,base_url)
# apas[1]
# for jix in xrange(len(flats)):
# keys=['insert_time','pid','url']
# fl_ = {j:flats[jix] for j in keys}
# flats[jix].update({'md5sum':hashlib.md5(str(fl)).hexdigest()})
# print flats[-1]
We have the urls of all flats listed in CL.. well, we have the functions to get the urls, the actual work will be done at the end. Finally, we can get the informations out of each flat individual page. This is really boring, but needed. The results is a list of features for each flat. These features will need further refinement, that I will do in future notebooks
def select2text(din):
basic_string = [' '.join(list(j.stripped_strings)) for j in din]
# basic_string = [unidecode.unidecode(j) for j in basic_string]
return basic_string
def make_col_name(nm):
nm = unidecode.unidecode(nm)
nm = nm.title()
for sub in " ,./;'[]\<>?:{}|=+!@$%^&*()_-#":
nm = nm.replace(sub,'')
if nm[:2]=='Pi' and nm[-2:]=='es':
nm='Pieces'
if 'Meubl' in nm:
nm = 'Meuble'
return nm.strip()
# functions to process the raw features record
from collections import defaultdict as DDict
def default():
return lambda out:out
def loyer(out):
# process loyer
lm = out['LoyerMensuel']
lm = make_col_name(lm)
eur = lm.split('Eur')[0].replace(' ','')
out['LoyerMensuel']=atof(eur)
# add a filed
out['ChargesComprises'] = False
if 'ChargesComprises' in lm:
out['ChargesComprises'] = True
return out
def surface(out):
# process surface
sr = out['Surface']
sr = atof(sr.strip().split('m')[0])
out['Surface'] = sr
return out
def meublenonmeuble(out):
#process MeubleNonMeuble
mnm = out['Meuble']
if 'Non' in mnm:
out['Meuble'] = False
else:
out['Meuble'] = True
return out
def ville(out):
#process Ville
vl = out['Ville']
zipcode = atoi(vl.split()[-1])
arrondissement = zipcode - 75000
out['Arrondissement'] = arrondissement
return out
unrawify_dict=DDict(default)
unrawify_dict['Ville']=ville
unrawify_dict['Meuble']=meublenonmeuble
unrawify_dict['Surface']=surface
unrawify_dict['LoyerMensuel']=loyer
def unrawify_apas(indict):
outdict=indict.copy()
for j in indict.keys():
outdict.update(unrawify_dict[j](indict))
return outdict
def apafeatures(apainfo):
url = apainfo['url']
apa=BS(urlget(url).read())
col=select2text(apa.select('section.properties .property'))
col = map(make_col_name,col)
val=select2text(apa.select('section.properties .value'))
lines = min(len(col),len(val))
out = {}
for i in xrange(lines):
out[col[i]]=val[i]
out = unrawify_apas(out)
if len(val)!=len(col):
out['Problematic']=True
else:
out['Problematic']=False
out.update(apainfo)
return out
As the docstring says: sort of memoizing for apafeatures, specialized for the apas tuple
.
This class saves its cache as pickle
import cPickle as pkl
import os
import time
class get_features_cache(object):
"""
sort of memoizing for apafeatures, specialized for the flat record
"""
def __init__(self,fname,maxdt=-1):
self.fname=fname
try:
self.db=pkl.load(open(fname,'rb'))
except IOError:
self.db=[]
self.clids=[j['md5sum'] for j in self.db]
self.dirty = 0
self.upd_time = time.strftime('%H:%M %d/%m/%y')
def __call__(self,apainfo):
url = apainfo['url']
clid = apainfo['md5sum']
ins_time = apainfo['insert_time']
# check if app was already retrieved
if clid in self.clids:
#TODO check if retrieved version is too old/invalid
return {'data':self.db[self.clids.index(clid)],'from_cache':True}
else:
#retrieve data, store, update self.clids, and finally return
out = apafeatures(apainfo)
self.clids.append(clid)
self.db.append(out)
self.dirty = 1
self.upd_time = time.strftime('%H:%M %d/%m/%y')
return {'data':out,'from_cache':False}
def __len__(self):
return len(self.db)
def __del__(self):
self._save()
def _save(self):
if self.dirty:
print "saving apas db"
print "db rows: {0:d}".format(len(self.db))
pkl.dump(self.db,open(self.fname,'wb'))
self.dirty = 0
def upd_date(self):
return self.upd_time
This is class that behaves as a function (see the __call__
) that waits a random time before retrieving the requested URL.
The waiting time is sampled from a Poisson distribution.
# ----------------------------------------------------------
# Poissonian waiting time in the urlget function
class urlgetter(object):
def __init__(self,waiting_time):
self.mean=waiting_time
def __call__(self,url):
import time
#waiting time [s]
wt = self.poisson()
time.sleep(wt)
return urllib.urlopen(url)
def poisson(self):
from math import log
from random import random
return -log(1.0 - random()) / self.mean
The culprit of all these efforts, the loop that rules them all, where the work is truly done.
import random
pages=range(1,7)
apagetter = get_features_cache(fname='data/LBClocations.pkl',)
print 'The {2:s} the db {0:s} contains {1:d} locations'.format(apagetter.fname,len(apagetter),apagetter.upd_date())
urlget = urlgetter(1/.5)
failed=[]
flats = []
class Found(Exception): pass
try:
for page in pages:
human_page=get_page_from_url(page,base_url)
apas=get_listings_from_html(human_page,base_url)
print 'page', page
if not len(apas): break
for count,apa in enumerate(apas):
try:
last=apagetter(apa)
except Exception,msg:
failed.append(apa)
print 'failed #{1:d} {0:s}'.format(apa,count)
print msg
except Found:
pass
apagetter._save()
print 'last downloaded record:'
print 'title:',last['data']['title']
print 'insert time:',last['data']['insert_time']
The 09:44 26/06/17 the db data/LBClocations.pkl contains 2227 locations https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=1&sqs=2&ret=1&ret=2&ret=5 page 1 https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=2&sqs=2&ret=1&ret=2&ret=5 page 2 https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=3&sqs=2&ret=1&ret=2&ret=5 page 3 https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=4&sqs=2&ret=1&ret=2&ret=5 page 4 https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=5&sqs=2&ret=1&ret=2&ret=5 page 5 https://www.leboncoin.fr/locations/offres/ile_de_france/paris/?o=6&sqs=2&ret=1&ret=2&ret=5 page 6 last downloaded record: title: T2 buttes haumont insert time: 24 juin, 14:36
flats = apagetter.db
len(flats)
2227
%pylab inline
import pandas as pd
import seaborn as sns
from datetime import datetime,date
df = pd.DataFrame(flats)
df.columns
Populating the interactive namespace from numpy and matplotlib
Index([u'Arrondissement', u'ChargesComprises', u'ClasseEnergie', u'ClasseInergie', u'ClasseShchnergie', u'ClasseYnergie', u'Description', u'Ges', u'LoyerMensuel', u'Meuble', u'Pieces', u'Problematic', u'Reference', u'Surface', u'TypeDeBien', u'Ville', u'insert_time', u'md5sum', u'pid', u'title', u'url'], dtype='object')
# https://stackoverflow.com/questions/26294333/parse-french-date-in-python
import dateparser
def correct_instime_format(j):
if not isinstance(j,date):
j = unidecode.unidecode(j)
return dateparser.parse(j)
return j
df['insert_time']=df['insert_time'].apply(correct_instime_format)
#######################################################################
def safe_conv_pos_int(x):
try:
return int(x)
except ValueError:
return -1
df['Pieces']=df['Pieces'].apply(safe_conv_pos_int)
df_all=df.copy() #backup
#######################################################################
# we care only of 2 pieces
df=df[df['Pieces']==2]
#######################################################################
# remove if cheaper than 500 (it's crap) and bigger than 70
df=df[(df.LoyerMensuel>500) & (df.Surface<70)]
print "*"*100
print "{0:d} flats recorded with 2 pieces, of size < 70sqm, and price >500eu".format(df.shape[0])
print "*"*100
**************************************************************************************************** 935 flats recorded with 2 pieces, of size < 70sqm, and price >500eu ****************************************************************************************************
df['aux'] = pd.to_datetime(df['insert_time'])
df['weekday']=df['aux'].apply(lambda x:x.weekday())
df['ins_hour']=df['aux'].apply(lambda x:x.hour)
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
sns.countplot(x='weekday',data=df,ax=ax)
ax.xaxis.set_ticklabels(['Mon','Tue','Wed','Thu','Fri','Sat','Sun'])
sns.countplot(x='ins_hour',data=df,ax=bx,order=np.roll(range(0,24),-6))
<matplotlib.axes._subplots.AxesSubplot at 0x7f5b0a6f1bd0>
print 'Max surface:',df.Surface.max()
print 'Min surface:',df.Surface.min()
print 'Min price:',df.LoyerMensuel.min()
sns.plt.hist(df.Surface.tolist(),bins=np.arange(0-2.5,int(df.Surface.max())+2.5,5))
sns.plt.xlabel('Surface');
Max surface: 69.0 Min surface: 25.0 Min price: 543.0
print ', '.join(df.columns)
Arrondissement, ChargesComprises, ClasseEnergie, ClasseInergie, ClasseShchnergie, ClasseYnergie, Description, Ges, LoyerMensuel, Meuble, Pieces, Problematic, Reference, Surface, TypeDeBien, Ville, insert_time, md5sum, pid, title, url, aux, weekday, ins_hour
i = [10,15,20,25,30,35,40]
o = sns.np.digitize(i,bins=sns.np.arange(0,130,5))
print 'Surface binning legend'
for p,q in zip(i,o):
print p,'sqm -> #',q
Surface binning legend 10 sqm -> # 3 15 sqm -> # 4 20 sqm -> # 5 25 sqm -> # 6 30 sqm -> # 7 35 sqm -> # 8 40 sqm -> # 9
#######################################################################
df['price_bin']=sns.np.digitize(df.LoyerMensuel,bins=sns.np.arange(0,21*250,250))
df['sqm_bin'] = sns.np.digitize(df.Surface,bins=sns.np.arange(0,130,5))
df['size_sqm']= 5*(df['sqm_bin']-1)
df['price_sqm'] = df.LoyerMensuel/df.Surface
df = df[(df['price_sqm'] < 200)]
#######################################################################
mapges2int={'A':1,'B':2,'C':3,'D':4,'E':5,'F':6,'G':7,'H':8,'I':9,'N':10,'V':11,'X':12}
def safe_ges(x):
try:
return x.strip()[0]
except AttributeError:
return 'X'
df['Ges_lit']=df['Ges'].apply(safe_ges)
df['Ges_int']=df['Ges_lit'].apply(lambda x:mapges2int[x])
########################################################################
df['ClasseEnergie_lit']=df['ClasseEnergie'].apply(safe_ges)
df['ClasseEnergie_int']=df['ClasseEnergie_lit'].apply(lambda x:mapges2int[x])
########################################################################
dfCC=df[df['ChargesComprises']==1]
dfNC=df[df['ChargesComprises']==0]
df=dfCC
########################################################################
def sistema_arrondissement(x):
if x>100:
return int(x-100)
else:
return int(x)
df['Arrondissement']=df['Arrondissement'].apply(sistema_arrondissement)
########################################################################
# sorting by average price per arrondissement
def mean_outliers(x):
_m = x.mean()
_v = x.std()
return x[(x>_m-_v) & (x<_m+_v)].mean()
sorted_district_by_aveprice = []
__tmp = []
for c in df.groupby('Arrondissement'):
# __tmp.append((c[0],c[1].price_sqm.mean()))
__tmp.append((c[0],mean_outliers(c[1].price_sqm)))
__tmp = np.asarray(__tmp)
__sidx = np.argsort(__tmp[:,1])
sorted_district_by_aveprice = __tmp[__sidx][:,0]
sorted_district_by_aveprice_labels = ['%d'%j for j in sorted_district_by_aveprice]
sorted_ges_labels = sorted(list(df['Ges_lit'].unique()))
#######################################################################
df['Meuble_int']=df['Meuble'].apply(safe_conv_pos_int)
#######################################################################
df.to_pickle("data/lbc_pandas.pkl")
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
sns.countplot(x='ClasseEnergie_lit',data=df,order=sorted_ges_labels,hue='Meuble',ax=ax)
sns.countplot(x='Arrondissement',data=df,ax=bx)
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
sns.countplot(x='size_sqm',data=df,hue='Meuble',ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x7f5ad5ea79d0>
Remember that the surface nad price bins are:
sqm_bin_palette=sns.cubehelix_palette(9, start=1, rot=.1)
# sns.palplot(sqm_bin_palette)
sns.set_palette('muted')
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
sns.swarmplot(x='size_sqm',y='price_sqm',data=dfCC,ax=ax,palette='muted',hue='Meuble')
sns.boxplot(x='size_sqm',y='price_sqm',data=dfCC,hue='Meuble',ax=bx);
#####################################################################
fig,ax=sns.plt.subplots(1,1,figsize=(15,5))
sns.swarmplot(y=u'price_sqm',x=u'Arrondissement',data=df[df['Meuble']==True],order=sorted_district_by_aveprice,\
ax=ax,size=7,palette=sqm_bin_palette,hue='size_sqm')
sns.swarmplot(y=u'price_sqm',x=u'Arrondissement',data=df[df['Meuble']==False],order=sorted_district_by_aveprice,ax=ax,\
size=2.5,color='#ff0000')
_=ax.xaxis.set_ticklabels(sorted_district_by_aveprice_labels)
#####################################################################
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
sns.swarmplot(y=u'size_sqm',x=u'ClasseEnergie_lit',data=dfCC,order=sorted_ges_labels,ax=bx)
sns.swarmplot(y=u'price_sqm',x=u'ClasseEnergie_lit',data=dfCC[dfCC['Meuble']==False],order=sorted_ges_labels,ax=ax,\
size=2.5,color='#ff0000')
sns.swarmplot(y=u'price_sqm',x=u'ClasseEnergie_lit',data=dfCC[dfCC['Meuble']==True],order=sorted_ges_labels,ax=ax,\
palette=sqm_bin_palette,hue='sqm_bin');
#####################################################################
line="#"*90
print line
print '* color code by sqm_bin too should give the trend of the 1st plot'
print '* # of records',df.shape[0]
print ' /20',df.shape[0]/20.
print ' /15',df.shape[0]/15.
print line
########################################################################################## * color code by sqm_bin too should give the trend of the 1st plot * # of records 907 /20 45.35 /15 60.4666666667 ##########################################################################################
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
range = sns.plt.np.arange
ax.hist2d(df['Arrondissement'],df['LoyerMensuel'],cmap='Blues',bins=[range(1,21)-.5,range(500,2500,100)-50]);
bx.hist2d(df['Arrondissement'],df['size_sqm'],cmap='Blues',bins=[range(1,21)-.5,sns.plt.np.arange(5*5,15*5,5)-2.5]);
ax.set_xlabel('Arrondissement')
ax.set_ylabel('LoyerMensuel')
bx.set_xlabel('Arrondissement')
bx.set_ylabel('size_sqm')
<matplotlib.text.Text at 0x7f5ad4b52690>
careof = ['Surface','price_sqm','Arrondissement','Meuble_int','weekday','ins_hour']
ylbl = ['LoyerMensuel',]
z = df[careof+ylbl].dropna()
print z.shape
(907, 7)
import qgrid
# qgrid.nbinstall(overwrite=True)
What's the probability a flat has a price given it's features?
There is still to few data to compute $\int\, dprice\_sqm price\_smq\, P(price\_sqm|Arr,Meuble,sqm)$, so I use the simpler conditional probabilities $P(Arr,Meuble)$ or $P(Arr,sqm)$ that need less classes.
A similar analysis can be tried with a naive Bayes
or a logit
model.. TODO
df['Arr_Meu_cls']=0
df['Arr_Sqm_cls']=0
def average_flat(to):
def f(group):
mean_price = group.price_sqm.median()
group[to] [group.price_sqm>mean_price]=1
# group.Arr_meu_cls = group.price_sqm>mean_price.astype(int)
return group
return f
df = df.groupby(('Arrondissement','Meuble')).apply(average_flat('Arr_Meu_cls'))
df = df.groupby(('Arrondissement','size_sqm')).apply(average_flat('Arr_Sqm_cls'))
careof = careof = ['LoyerMensuel','Surface','price_sqm','Arrondissement','Meuble','ClasseEnergie_lit','url']
qgrid.show_grid(df[df.Arr_Meu_cls>0][careof])
print df[df.Arr_Meu_cls>0].shape[0]*1./df.shape[0]
0.469680264609
careof = ['LoyerMensuel','Surface','price_sqm','Arrondissement','Meuble_int','weekday','ins_hour']
ylbl = ['Arr_Sqm_cls',]
z = df[careof+ylbl].dropna()
print z.shape
(885, 8)
Since the price
of a flat must be smaller than 1/3 of what ppl gain (in couple),
we get also the distribution of salaries expected for ppl living in paris:
$ price < (salary_1 + salary_2)/3 $
assuming that $salary_1=salary_2$ (bcs of high education jobs), we get $salary_1 > 3/2 price$
def plot_salaries(df,Meuble=0):
fig,(ax,bx)=sns.plt.subplots(1,2,figsize=(15,5))
ifs = (df.Meuble==Meuble)
salary1 = df[ifs].LoyerMensuel.values*3/2.
bins = range(0,6000,100)
ax.hist(salary1,bins=bins);
ax.axvline(salary1.mean(),color='w')
ax.hist(salary1*2,bins=bins);
ax.axvline(salary1.mean()*2,color='w')
ax.set_xlabel('blue mean salary 1p, green 1+1p')
mean_price_per_month = df[ifs].LoyerMensuel.values.mean()
price_per_month = df[ifs].LoyerMensuel.values
min_surf = 7
max_surf = 10+1
ifs=ifs & (df['sqm_bin']<max_surf) & (df['sqm_bin']>min_surf)
price_per_month_select = df[ifs].LoyerMensuel.values
bx.hist(price_per_month,bins=bins);
bx.hist(price_per_month_select,bins=bins);
bx.axvline(mean_price_per_month,color='w')
bx.set_xlabel('blue: mean price per month (2pieces), green: 35<sqm<50' )
bx.text(mean_price_per_month,-2,u'{0:.0f}€'.format(mean_price_per_month),)
fig.suptitle('Meuble:{0:d}'.format(Meuble))
plot_salaries(df,Meuble=1)
plot_salaries(df,Meuble=0)
# Download shapes of paris arrondissements
!wget -c https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements/75/communes.geojson -O data/75.geojson
--2017-06-26 09:48:44-- https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements/75/communes.geojson Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.120.133 Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.120.133|:443... connected. HTTP request sent, awaiting response... 404 Not Found 2017-06-26 09:48:45 ERROR 404: Not Found.
from ipyleaflet import (
Map,
Marker,
TileLayer, ImageOverlay,
Polyline, Polygon, Rectangle, Circle, CircleMarker,
GeoJSON,
DrawControl
)
import json
import matplotlib as mpl
with open('data/75.geojson') as f:
data = json.load(f)
def Gnormalize(x):
"""
normalizes the 1D array/list x to be between 0. and 1.
"""
x = sns.plt.np.asarray(x)
x = x.copy()
x-=x.min()
x/=x.max()
return x
We will assign a colorcode to each arrondissmement, based on the average price
from collections import defaultdict
def ddict(x,default=lambda : np.nan):
"""
returns a default dict with content dict(x)
"""
out = defaultdict(default)
out.update(x)
return out
tmp = []
for c in df[(df.Surface<50) & (df.Meuble_int==1)].groupby('Arrondissement'):
tmp.append((c[0],mean_outliers(c[1].price_sqm)))
tmp = np.asarray(tmp)
aveprice_prenorm = ddict(tmp)
# normalize prices to compute colors per arrondissmement
tmp=tmp.T
tmp[1]=Gnormalize(tmp[1])
map_palette=sns.diverging_palette(220, 20, n=len(tmp),as_cmap=1)
_ = [(i,mpl.colors.rgb2hex(map_palette(j))) for i,j in tmp.T]
aveprice_color = ddict(_)
# flats_per_arrondissement = []
# for j in df[df['Surface']<50].groupby('Arrondissement'):
# flats_per_arrondissement.append([ float(j[0]),len(j[1])])
# flats_per_arrondissement = np.asarray(flats_per_arrondissement)
# flats_per_arrondissement[:,1] = Gnormalize(flats_per_arrondissement[:,1])
# flats_per_arrondissement = dict(flats_per_arrondissement)
for j,feature in enumerate(data['features']):
where = atof(feature['properties']['code'])-75100
price = aveprice_prenorm[where]
if not price:
continue
# price = flats_per_arrondissement[where]
color = aveprice_color[where]
feature['properties']['style'] = {'color':color, 'weight': .5, 'fillColor':color, 'fillOpacity':0.75}
feature['properties']['district']='Arrondissement: {0:.0f}, price: {1:.0f}eu/sqm'.format(where,price)
layer = GeoJSON(data=data, hover_style={'fillOpacity': 1})
def hover_handler(event=None, id=None, properties=None):
sys.stdout.write("\r" + properties['district'])
sys.stdout.flush()
center = [48.85906816414895, 2.343006134033203]
zoom = 13
m = Map(center=center, zoom=zoom)
layer.on_hover(hover_handler)
m.add_layer(layer)
m.height='500px'
print 'price per sqm (<50smq, meuble\') per district'
print 'You need to run the notebook to see the map :/'
m
from IPython.display import HTML
import urllib2
style=open('style.css','r').read()
HTML("""
<style>{0}</style>
""".format(style))