#!/usr/bin/env python
# coding: utf-8

# # A LISA map of spatial clusters in `#Brexit`
# 
# Sources:
# 
# * [Referendum data](http://www.electoralcommission.org.uk/find-information-by-subject/elections-and-referendums/upcoming-elections-and-referendums/eu-referendum/electorate-and-count-information).
# * [ONS Local Authority District boundaries](https://data.gov.uk/dataset/local-authority-districts-december-2014-full-extent-boundaries-in-great-britain).

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')

import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
import pysal as ps
import urllib2


# ## Load and link data
# 
# The official referendum results are available from the Electoral Commission:

# In[2]:


ref_link = "http://www.electoralcommission.org.uk/__data/assets/file/0014/212135/EU-referendum-result-data.csv"


# This is a `csv` that may be read directly from the EC server:

# In[3]:


ref = pd.read_csv(ref_link, index_col='Area_Code')


# Results are available at the Local Authority level, which we can link to the ONS official boundaries. The `GeoJSON` needs to be downloaded locally to read it:

# In[4]:


lad_link = 'http://opengeography.ons.opendata.arcgis.com/datasets/943be9957ccf419399bfafc2ad92bc0d_0.geojson'
# Download the file
buf = urllib2.urlopen(lad_link)
lads = buf.read()
tmp = open('lads.geojson', 'w')
tmp.write(lads)
tmp.close()


# Now we can read it up:

# In[5]:


lads = gpd.read_file('lads.geojson', driver='GeoJSON').set_index('LAD14CD')


# And plot it:

# In[6]:


lads.plot(linewidth=0);


# And now we can simply link the two sides, referendum and spatial data:

# In[7]:


refl = gpd.GeoDataFrame(lads.join(ref), crs=lads.crs)\
            .to_crs(epsg=4277)


# ## LISA analysis

# * Spatial weights: we'll go for standard queen contiguity.

# In[8]:


# Write out to a temporaty shapefile
refl.to_file('tmp')
# Create weights
wq = ps.queen_from_shapefile('tmp/tmp.shp')
# This might not work on Windows
get_ipython().system(' rm -r tmp')


# Because of islands, we create a Knn-1 matrix as well:

# In[9]:


pts = refl.centroid

xys = pd.np.array([(pt.x, pt.y) for pt in pts])
knn1 = ps.knnW_from_array(xys, k=1)


# And now we join both using a `union`:

# In[10]:


w = ps.weights.Wsets.w_union(wq, knn1)
w.transform = 'R'


# * LISA computation

# In[11]:


lisa = ps.Moran_Local(refl['Pct_Leave'].values, w, permutations=999)


# Now let us pull out areas with statistically significant spatial clustering (at the 5% level):

# In[12]:


refl['significant'] = lisa.p_sim < 0.05
refl['quadrant'] = lisa.q


# At this point, we are ready to visualize the output!

# In[13]:


# Setup the figure and axis
f, ax = plt.subplots(1, figsize=(18, 18))
# Plot HH clusters
hh = refl.loc[(refl['quadrant']==1) & (refl['significant']==True), 'geometry']
hh.plot(ax=ax, color='red', linewidth=0.1, edgecolor='w')
# Plot LL clusters
hh = refl.loc[(refl['quadrant']==3) & (refl['significant']==True), 'geometry']
hh.plot(ax=ax, color='blue', linewidth=0.1, edgecolor='w')
# Plot LH clusters
hh = refl.loc[(refl['quadrant']==2) & (refl['significant']==True), 'geometry']
hh.plot(ax=ax, color='#83cef4', linewidth=0.1, edgecolor='w')
# Plot HL clusters
hh = refl.loc[(refl['quadrant']==4) & (refl['significant']==True), 'geometry']
hh.plot(ax=ax, color='#e59696', linewidth=0.1, edgecolor='w')
# Non-significant
ns = refl.loc[refl['significant']!=True, 'geometry']
ns.plot(ax=ax, color='0.75', linewidth=0.1, edgecolor='w')
# Style and draw
f.suptitle('LISA of % for Leave', size=30)
f.set_facecolor('1')
ax.set_axis_off()
plt.axis('equal')
plt.show()