#!/usr/bin/env python # coding: utf-8 # In[96]: # Import quandl to retrieve data # Pandas and matplotlib standard imports for data analysis import quandl import pandas as pd import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # Use Plotly for better graphs import plotly.offline as plotly import plotly.graph_objs as go # Offline Plotting plotly.init_notebook_mode(connected=True) # In[97]: # Constants # Populate your own quandl API Token # Populate your own mapbox API Token quandl.ApiConfig.api_key = {YOUR API KEY} mapbox_access_token = {YOUR API KEY} # Start date and end date should be the same start_date = '2018-06-30' end_date = '2018-06-30' # Specify Zillow Indicator we want to graph indicator = 'MRPFAH' # Specify the county and state we want to generate the map for county = 'Alameda' state = 'CA' # In[98]: # We are going to fetch the data for each zip code in the county and add that to a single dataframe def get_city_data(cities, indicator): df_array = [] for city, code in cities.items(): quandl_db_code = f'ZILLOW/Z{code}_{indicator}' try: data = quandl.get(quandl_db_code, start_date=start_date, end_date=end_date) data.rename(index=str, columns={'Value': city}, inplace=True) data.index = pd.to_datetime(data.index, format='%Y/%m/%d') df_array.append(data) except: print(f'Error fetching for: {quandl_db_code}') ret = pd.concat(df_array, axis=1) return ret # In[99]: # Here we are going to figure out what zip codes are in the specified county # We will also use the logitude / latitude data for plotting ZIP_CODE_DTYPE = {'zip_code': str} ZIP_CODES = pd.read_csv('zip_codes_states.csv', dtype=ZIP_CODE_DTYPE) county_df = ZIP_CODES['county'] == county state_df = ZIP_CODES['state'] == state data = ZIP_CODES[county_df & state_df] # In[100]: data.index = data['zip_code'] # In[101]: # We will call our function to fetch the data from quandl # There will be errors for data we cannot retrieve real_estate_data = get_city_data(data['zip_code'], indicator) # In[102]: # Here we are just doing some transformation and massaging of the data we get back # If you want more information please run the notebook on your own transformed_real_estate_data = real_estate_data.transpose() column_name = f'{indicator} {end_date}' # In[103]: transformed_real_estate_data[column_name] = transformed_real_estate_data[end_date].astype('float') # In[104]: concat_data = pd.concat([data, transformed_real_estate_data[column_name]], axis=1, sort=True) # In[105]: concat_data.dropna(inplace=True) # In[106]: concat_data['text'] = concat_data['zip_code'] + ' ' + concat_data[column_name].astype(str) # In[107]: df = concat_data # In[108]: # Here is our final result dataframe # We can see for each zip code we have the longitude and latitude data # As well as the Zillow Indicator and text label df.head() # In[109]: # Now it's just a matter of plotting using the Plotly Mapbox graph plot_data = [ go.Scattermapbox( lon = df['longitude'], lat = df['latitude'], mode='markers', marker=dict( reversescale = True, autocolorscale = False, size=17, opacity=0.85, colorscale = 'YlOrRd', cmin = df[column_name].min(), color = df[column_name], cmax = df[column_name].max(), colorbar=dict( title="Price" ) ), text = df['text'], hoverinfo= 'text' )] layout = go.Layout( title= f'{indicator} in {county}, {state} {end_date}', autosize=True, hovermode='closest', showlegend=False, mapbox=dict( accesstoken=mapbox_access_token, bearing=0, center=dict( lat=df.iloc[0]['latitude'], lon=df.iloc[0]['longitude'] ), pitch=0, zoom=8, style='light' ), ) # In[110]: # Finally calling our plot function after declaring the layout fig = dict( data=plot_data, layout=layout ) plotly.iplot( fig, filename=f'{indicator} {county} {state} {end_date}') # In[111]: # Output to div for blog post # plotly.plot(fig, filename=f'{indicator} {county} {state} {end_date}', include_plotlyjs=False, output_type='div') # In[ ]: