# import libraries and read in the csv file
import re as re, pandas as pd, numpy as np, requests, json
df = pd.read_csv('bay.csv')
print(df[:5])
# clean price and neighborhood
df.price = df.price.str.strip('$').astype('float64')
df.neighborhood = df.neighborhood.str.strip().str.strip('(').str.strip(')')
# break out the date into month day year columns
df['month'] = df['date'].str.split().str[0]
df['day'] = df['date'].str.split().str[1].astype('int32')
df['year'] = df['date'].str.split().str[2].astype('int32')
del df['date']
def clean_br(value):
if isinstance(value, str):
end = value.find('br')
if end == -1:
return None
else:
start = value.find('/') + 2
return int(value[start:end])
df['bedrooms'] = df['bedrooms'].map(clean_br)
def clean_sqft(value):
if isinstance(value, str):
end = value.find('ft')
if end == -1:
return None
else:
if value.find('br') == -1:
start = value.find('/') + 2
else:
start = value.find('-') + 2
return int(value[start:end])
df['sqft'] = df['sqft'].map(clean_sqft)
df.head()