#!/usr/bin/env python # coding: utf-8 # # A LISA map of spatial clusters in `#Brexit` # # Sources: # # * [Referendum data](http://www.electoralcommission.org.uk/find-information-by-subject/elections-and-referendums/upcoming-elections-and-referendums/eu-referendum/electorate-and-count-information). # * [ONS Local Authority District boundaries](https://data.gov.uk/dataset/local-authority-districts-december-2014-full-extent-boundaries-in-great-britain). # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import geopandas as gpd import pandas as pd import pysal as ps import urllib2 # ## Load and link data # # The official referendum results are available from the Electoral Commission: # In[2]: ref_link = "http://www.electoralcommission.org.uk/__data/assets/file/0014/212135/EU-referendum-result-data.csv" # This is a `csv` that may be read directly from the EC server: # In[3]: ref = pd.read_csv(ref_link, index_col='Area_Code') # Results are available at the Local Authority level, which we can link to the ONS official boundaries. The `GeoJSON` needs to be downloaded locally to read it: # In[4]: lad_link = 'http://opengeography.ons.opendata.arcgis.com/datasets/943be9957ccf419399bfafc2ad92bc0d_0.geojson' # Download the file buf = urllib2.urlopen(lad_link) lads = buf.read() tmp = open('lads.geojson', 'w') tmp.write(lads) tmp.close() # Now we can read it up: # In[5]: lads = gpd.read_file('lads.geojson', driver='GeoJSON').set_index('LAD14CD') # And plot it: # In[6]: lads.plot(linewidth=0); # And now we can simply link the two sides, referendum and spatial data: # In[7]: refl = gpd.GeoDataFrame(lads.join(ref), crs=lads.crs)\ .to_crs(epsg=4277) # ## LISA analysis # * Spatial weights: we'll go for standard queen contiguity. # In[8]: # Write out to a temporaty shapefile refl.to_file('tmp') # Create weights wq = ps.queen_from_shapefile('tmp/tmp.shp') # This might not work on Windows get_ipython().system(' rm -r tmp') # Because of islands, we create a Knn-1 matrix as well: # In[9]: pts = refl.centroid xys = pd.np.array([(pt.x, pt.y) for pt in pts]) knn1 = ps.knnW_from_array(xys, k=1) # And now we join both using a `union`: # In[10]: w = ps.weights.Wsets.w_union(wq, knn1) w.transform = 'R' # * LISA computation # In[11]: lisa = ps.Moran_Local(refl['Pct_Leave'].values, w, permutations=999) # Now let us pull out areas with statistically significant spatial clustering (at the 5% level): # In[12]: refl['significant'] = lisa.p_sim < 0.05 refl['quadrant'] = lisa.q # At this point, we are ready to visualize the output! # In[13]: # Setup the figure and axis f, ax = plt.subplots(1, figsize=(18, 18)) # Plot HH clusters hh = refl.loc[(refl['quadrant']==1) & (refl['significant']==True), 'geometry'] hh.plot(ax=ax, color='red', linewidth=0.1, edgecolor='w') # Plot LL clusters hh = refl.loc[(refl['quadrant']==3) & (refl['significant']==True), 'geometry'] hh.plot(ax=ax, color='blue', linewidth=0.1, edgecolor='w') # Plot LH clusters hh = refl.loc[(refl['quadrant']==2) & (refl['significant']==True), 'geometry'] hh.plot(ax=ax, color='#83cef4', linewidth=0.1, edgecolor='w') # Plot HL clusters hh = refl.loc[(refl['quadrant']==4) & (refl['significant']==True), 'geometry'] hh.plot(ax=ax, color='#e59696', linewidth=0.1, edgecolor='w') # Non-significant ns = refl.loc[refl['significant']!=True, 'geometry'] ns.plot(ax=ax, color='0.75', linewidth=0.1, edgecolor='w') # Style and draw f.suptitle('LISA of % for Leave', size=30) f.set_facecolor('1') ax.set_axis_off() plt.axis('equal') plt.show()