# pyspatial imports
import os
import sys
import glob
import numpy as np
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import geopandas as gpd
import rasterio as rio
from rasterio.plot import show
from rasterio.plot import plotting_extent
from rasterio.warp import calculate_default_transform, reproject, Resampling
from shapely.geometry import Polygon, mapping
from rasterio.mask import mask
import georasters as gr
import folium
import mplleaflet
import pysal as ps
import esda
import libpysal as lps
from pysal.lib.weights.weights import W
from pysal.viz.splot.esda import lisa_cluster
import pysal.viz.splot as esda_viz
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline
# run for jupyter notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
#%%
root = '~/Desktop/atlas'
%cd $root
raw = gpd.read_file('data/poverty_exports_v1_aggregatesee_export.geojson')
/home/alal/Desktop/atlas
lev0 = raw.loc[raw.LEVEL == 'LV0']
lev2 = raw.loc[raw.LEVEL == 'LV2']
lev2.head()
id | CC_2 | ENGTYPE_1 | ENGTYPE_2 | GID_0 | GID_1 | GID_2 | HASC_2 | LEVEL | NAME_0 | ... | V_1_6_consumptions_2003-2006_sum | V_1_6_consumptions_2003-2018_4year_roc_mean | V_1_6_consumptions_2003-2018_4year_roc_sum | V_1_6_consumptions_2007-2010_mean | V_1_6_consumptions_2007-2010_sum | V_1_6_consumptions_2011-2014_mean | V_1_6_consumptions_2011-2014_sum | V_1_6_consumptions_2015-2018_mean | V_1_6_consumptions_2015-2018_sum | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 000058e3213c88e2b2b4 | Province | Filled | LV2 | Western Sahara | ... | 14308.652402 | -0.221642 | -29004.454629 | 4.088659 | 30561.685828 | 3.887429 | 29057.538720 | 3.906107 | 29197.155510 | POLYGON ((-17.10542533932115 20.84151027780265... | |||||
3 | 0000f2d0855e25d2a53b | Province | Filled | LV2 | Western Sahara | ... | 9178.143105 | 0.173301 | 10142.209942 | 3.775642 | 14802.854392 | 4.279764 | 16779.323147 | 4.241402 | 16628.920911 | POLYGON ((-15.0003991189957 24.5872451550492, ... | |||||
5 | 000001ab73d83f47ec14 | Department | LV2 | Mauritania | ... | 12781.034856 | 0.246567 | 17579.683774 | 3.769391 | 16811.764722 | 4.415786 | 19694.734050 | 4.488398 | 20018.587421 | POLYGON ((-13.14573182804001 22.73141093570878... | ||||||
8 | 00004f874dd66803f44d | Department | LV2 | Mauritania | ... | 0.000000 | NaN | 0.000000 | NaN | 0.000000 | NaN | 0.000000 | NaN | 0.000000 | POLYGON ((-12.47445736618586 22.58293696719922... | ||||||
9 | 000082f372b2719bf354 | Department | LV2 | Mauritania | ... | 658.909221 | -0.075290 | -436.876867 | 2.586953 | 981.776122 | 2.399081 | 910.476785 | 2.399081 | 910.476785 | POLYGON ((-12.00560651522347 24.99913908979712... |
5 rows × 46 columns
from pysal.lib.weights.contiguity import Queen
from pysal.lib import examples
from pysal.explore.esda.moran import Moran_Local
from pysal.viz.splot.esda import plot_local_autocorrelation
nga = lev2.loc[:, ['NAME_0', 'NAME_2', 'V_1_1_asset_wealth_2003-2006_mean','V_1_1_asset_wealth_2007-2010_mean','V_1_1_asset_wealth_2011-2014_mean',
'V_1_1_asset_wealth_2015-2018_mean', 'V_1_1_asset_wealth_2003-2018_4year_roc_mean', 'geometry']]
nga.columns = ['country', 'dist', 'wealth_03_06','wealth_07_10','wealth_11_14','wealth_15_18', 'growth', 'geometry']
nga.dropna(inplace = True)
%time w = lps.weights.Queen.from_dataframe(nga)
CPU times: user 56.4 s, sys: 1.28 s, total: 57.7 s Wall time: 58.2 s
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/libpysal/weights/weights.py:168: UserWarning: There are 8 disconnected observations Island ids: 479, 481, 1477, 3472, 4267, 5484, 5499, 5500 " Island ids: %s" % ', '.join(str(island) for island in self.islands))
Row-standardise spatial weight matrix such that spatial lag is average among neighbours.
w.transform = 'R'
('WARNING: ', 479, ' is an island (no neighbors)') ('WARNING: ', 481, ' is an island (no neighbors)') ('WARNING: ', 1477, ' is an island (no neighbors)') ('WARNING: ', 3472, ' is an island (no neighbors)') ('WARNING: ', 4267, ' is an island (no neighbors)') ('WARNING: ', 5484, ' is an island (no neighbors)') ('WARNING: ', 5499, ' is an island (no neighbors)') ('WARNING: ', 5500, ' is an island (no neighbors)')
queen_card = pd.Series(w.cardinalities)
# number of neighbours
sns.distplot(queen_card, bins= 10)
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2e13710>
Sumstats on number of neighbours
w.mean_neighbors
w.min_neighbors
w.max_neighbors
w.islands
5.7087159658450135
0
26
[479, 481, 1477, 3472, 4267, 5484, 5499, 5500]
nga['splag_growth'] = lps.weights.lag_spatial(w, nga['growth'])
mi = esda.moran.Moran(nga.growth, w)
mi.I
mi.p_sim
0.5164074939263005
0.001
Measure of spatial correlation
$$I = \frac{N}{W} \frac{\sum_i \sum_j w_{ij} (x_i - \bar{x} ) ( x_j - \bar{x} ) }{ \sum_i (x_i - \bar{x})^2 }$$where $N$ is the total number of units, $x$ is the variable of interest, $w_{ij}$ is the spatial weight between units $i$ and $j$, and $W$ is the sum of all weights $w_{ij}$
$I \in [-1, 1]$. Under null of no spatial correlation, $E(I) = \frac{-1}{N-1} \rightarrow 0$ with large $N$.
from splot.esda import plot_moran
plot_moran(mi)
(<Figure size 720x288 with 2 Axes>, array([<matplotlib.axes._subplots.AxesSubplot object at 0x7f78f28d7fd0>, <matplotlib.axes._subplots.AxesSubplot object at 0x7f78f28608d0>], dtype=object))
Vanilla Moran Plot
y = nga.growth
li = esda.moran.Moran_Local(y, w, permutations = 1000)
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/esda/moran.py:886: RuntimeWarning: invalid value encountered in true_divide self.z_sim = (self.Is - self.EI_sim) / self.seI_sim /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in greater return (a < x) & (x < b) /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in less return (a < x) & (x < b) /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:1807: RuntimeWarning: invalid value encountered in greater_equal cond2 = (x >= _b) & cond0
sig = 1 * (li.p_sim < 0.05)
hotspot = 1 * (sig * li.q==1)
coldspot = 3 * (sig * li.q==3)
doughnut = 2 * (sig * li.q==2)
diamond = 4 * (sig * li.q==4)
spots = hotspot + coldspot + doughnut + diamond
spot_labels = [ '0 ns', '1 HH', '2 LH', '3 LL', '4 HL']
labels = [spot_labels[i] for i in spots]
f,ax = plt.subplots(3,1, figsize=(100,80))
nga.plot(column='growth', ax=ax[0], cmap='viridis')
lev0.plot(facecolor = 'none', edgecolor = 'r', ax = ax[0])
ax[0].axis(nga.total_bounds[np.asarray([0,2,1,3])])
ax[0].set_title("growth")
nga.plot(column='splag_growth', ax=ax[1], cmap='viridis')
lev0.plot(facecolor = 'none', edgecolor = 'r', ax = ax[1])
ax[1].axis(nga.total_bounds[np.asarray([0,2,1,3])])
ax[1].set_title("Spatial Lag growth")
lisa_cluster(li, nga, ax = ax[2])
lev0.plot(facecolor = 'none', edgecolor = 'k', ax = ax[2])
ax[2].set_title("Local Indicators of Spatial Autocorrelation")
ax[0].axis('off')
ax[1].axis('off')
ax[2].axis('off')
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2ea4b00>
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2ea4b00>
array([-17.16054202, 51.41569248, -34.83513231, 37.54208835])
Text(0.5, 1, 'growth')
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2ebb8d0>
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2ebb8d0>
array([-17.16054202, 51.41569248, -34.83513231, 37.54208835])
Text(0.5, 1, 'Spatial Lag growth')
(<Figure size 7200x5760 with 3 Axes>, <matplotlib.axes._subplots.AxesSubplot at 0x7f78f2d5fbe0>)
<matplotlib.axes._subplots.AxesSubplot at 0x7f78f2d5fbe0>
Text(0.5, 1, 'Local Indicators of Spatial Autocorrelation')
(-17.16054202489259, 51.41569248364537, -34.83513231327589, 37.54208835439316)
(-17.16054202489259, 51.41569248364537, -34.83513231327589, 37.54208835439316)
(-20.978013869660703, 54.8630118338028, -38.453993346659345, 41.16094938777661)
nga.to_file('data/pov_exp_l2.geojson', driver = 'GeoJSON')
def moran_scatterplotter(cname, colname = 'growth', figsize = (30, 10), cmap = 'viridis'):
gdf = nga.loc[nga.country == cname, [colname, 'geometry']]
y = gdf[colname]
w = Queen.from_dataframe(gdf)
# row-standardise
w.transform = 'r'
moran_loc = Moran_Local(y, w)
fig = plot_local_autocorrelation(moran_loc, gdf, colname, p=0.05, figsize = figsize, cmap = cmap)
fig[0].suptitle(f'Spatial Autocorrelation in {colname} in {cname}')
plt.show()
countries = nga.country.unique().tolist()
omit_list = ['Spain', 'Western Sahara']
countries = [x for x in countries if x not in omit_list]
list(map(moran_scatterplotter, countries))
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/weights.py:168: UserWarning: There are 2 disconnected observations Island ids: 1, 3 " Island ids: %s" % ', '.join(str(island) for island in self.islands)) /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/explore/esda/moran.py:895: RuntimeWarning: invalid value encountered in true_divide self.z_sim = (self.Is - self.EI_sim) / self.seI_sim /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in greater return (a < x) & (x < b) /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:901: RuntimeWarning: invalid value encountered in less return (a < x) & (x < b) /home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/scipy/stats/_distn_infrastructure.py:1807: RuntimeWarning: invalid value encountered in greater_equal cond2 = (x >= _b) & cond0
('WARNING: ', 1, ' is an island (no neighbors)') ('WARNING: ', 3, ' is an island (no neighbors)')
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/contiguity.py:184: UserWarning: There is one disconnected observation (no neighbors). Island id: 14 W.__init__(self, neighbors, ids=ids, **kw)
('WARNING: ', 14, ' is an island (no neighbors)')
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/contiguity.py:184: UserWarning: There is one disconnected observation (no neighbors). Island id: 225 W.__init__(self, neighbors, ids=ids, **kw)
('WARNING: ', 225, ' is an island (no neighbors)')
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/weights.py:170: UserWarning: The weights matrix is not fully connected. There are 2 components warnings.warn("The weights matrix is not fully connected. There are %d components" % self.n_components)
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/weights.py:168: UserWarning: There are 3 disconnected observations Island ids: 122, 136, 137 " Island ids: %s" % ', '.join(str(island) for island in self.islands))
('WARNING: ', 122, ' is an island (no neighbors)') ('WARNING: ', 136, ' is an island (no neighbors)') ('WARNING: ', 137, ' is an island (no neighbors)')
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/contiguity.py:184: UserWarning: There is one disconnected observation (no neighbors). Island id: 156 W.__init__(self, neighbors, ids=ids, **kw)
('WARNING: ', 156, ' is an island (no neighbors)')
/home/alal/anaconda3/envs/gds/lib/python3.6/site-packages/pysal/lib/weights/contiguity.py:184: UserWarning: There is one disconnected observation (no neighbors). Island id: 128 W.__init__(self, neighbors, ids=ids, **kw)
('WARNING: ', 128, ' is an island (no neighbors)')
[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]