%matplotlib inline
from IPython.display import IFrame, Image
import contextily as ctx
import geopandas as gpd
import numpy as np
import pandas as pd
import pysal as ps
import matplotlib.pyplot as plt
/Users/dani/anaconda/envs/gds/lib/python3.6/site-packages/pysal/__init__.py:65: VisibleDeprecationWarning: PySAL's API will be changed on 2018-12-31. The last release made with this API is version 1.14.4. A preview of the next API version is provided in the `pysal` 2.0 prelease candidate. The API changes and a guide on how to change imports is provided at https://pysal.org/about ), VisibleDeprecationWarning)
url = ('https://impakter.com/wp-content/uploads/'\
'2017/12/favela-et-condos-de-luxe-au-bresil'\
'-noah-addis-corbis-768x510.jpg')
Image(url=url)
url = ('https://www.gov.uk/government/uploads/system/'\
'uploads/attachment_data/file/467774/'\
'File_7_ID_2015_All_ranks__deciles_and'\
'_scores_for_the_Indices_of_Deprivation_'\
'_and_population_denominators.csv')
imd = pd.read_csv(url, index_col=0)
imd.head()
LSOA name (2011) | Local Authority District code (2013) | Local Authority District name (2013) | Index of Multiple Deprivation (IMD) Score | Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived) | Index of Multiple Deprivation (IMD) Decile (where 1 is most deprived 10% of LSOAs) | Income Score (rate) | Income Rank (where 1 is most deprived) | Income Decile (where 1 is most deprived 10% of LSOAs) | Employment Score (rate) | ... | Indoors Sub-domain Rank (where 1 is most deprived) | Indoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs) | Outdoors Sub-domain Score | Outdoors Sub-domain Rank (where 1 is most deprived) | Outdoors Sub-domain Decile (where 1 is most deprived 10% of LSOAs) | Total population: mid 2012 (excluding prisoners) | Dependent Children aged 0-15: mid 2012 (excluding prisoners) | Population aged 16-59: mid 2012 (excluding prisoners) | Older population aged 60 and over: mid 2012 (excluding prisoners) | Working age population 18-59/64: for use with Employment Deprivation Domain (excluding prisoners) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
LSOA code (2011) | |||||||||||||||||||||
E01031349 | Adur 001A | E07000223 | Adur | 12.389 | 21352 | 7 | 0.096 | 18992 | 6 | 0.083 | ... | 20379 | 7 | 0.312 | 11318 | 4 | 1318 | 206 | 694 | 418 | 702.75 |
E01031350 | Adur 001B | E07000223 | Adur | 28.619 | 8864 | 3 | 0.187 | 9233 | 3 | 0.162 | ... | 16285 | 5 | 0.234 | 12445 | 4 | 1212 | 232 | 712 | 268 | 720.75 |
E01031351 | Adur 001C | E07000223 | Adur | 11.713 | 22143 | 7 | 0.065 | 24539 | 8 | 0.066 | ... | 25054 | 8 | 0.208 | 12820 | 4 | 1577 | 290 | 829 | 458 | 838.25 |
E01031352 | Adur 001D | E07000223 | Adur | 16.446 | 17252 | 6 | 0.117 | 16087 | 5 | 0.113 | ... | 24455 | 8 | 0.109 | 14350 | 5 | 1453 | 233 | 739 | 481 | 748.25 |
E01031370 | Adur 001E | E07000223 | Adur | 18.265 | 15643 | 5 | 0.102 | 17918 | 6 | 0.115 | ... | 20214 | 7 | 0.321 | 11202 | 4 | 1443 | 306 | 799 | 338 | 795.50 |
5 rows × 56 columns
url = ('http://geoportal1-ons.opendata.arcgis.com/'\
'datasets/da831f80764346889837c72508f046fa_3')
IFrame(url, 600, 400)
url = ('http://geoportal1-ons.opendata.arcgis.com/datasets'\
'/da831f80764346889837c72508f046fa_3.geojson')
%time lsoa = gpd.read_file(url).set_index('lsoa11cd')
CPU times: user 3.44 s, sys: 383 ms, total: 3.82 s Wall time: 21.2 s
lsoa.head()
objectid | lsoa11nm | lsoa11nmw | st_areashape | st_lengthshape | geometry | |
---|---|---|---|---|---|---|
lsoa11cd | ||||||
E01013150 | 1 | North East Lincolnshire 013B | North East Lincolnshire 013B | 2.842166e+05 | 2286.649368 | POLYGON ((-0.1361269459310195 53.5564371900317... |
E01015580 | 2 | Swindon 007D | Swindon 007D | 4.534211e+05 | 3426.828755 | POLYGON ((-1.785727243039375 51.57653396919847... |
E01023601 | 3 | North Hertfordshire 010A | North Hertfordshire 010A | 2.801437e+05 | 2228.370570 | POLYGON ((-0.2742822481507952 51.9490044407343... |
E01024217 | 4 | Dover 005B | Dover 005B | 7.351294e+06 | 13012.376562 | POLYGON ((1.40257897665496 51.23871265508399, ... |
E01000722 | 5 | Bromley 023A | Bromley 023A | 5.691620e+05 | 3524.308527 | POLYGON ((0.1146063324029615 51.39254569837126... |
db = lsoa.join(imd)\
.loc[:, ['LSOA name (2011)',
'Index of Multiple Deprivation (IMD) Score',
'Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived)',
'geometry']]\
.dropna()\
.to_crs(epsg=3857)
db.info()
<class 'geopandas.geodataframe.GeoDataFrame'> Index: 32844 entries, E01013150 to E01033556 Data columns (total 4 columns): LSOA name (2011) 32844 non-null object Index of Multiple Deprivation (IMD) Score 32844 non-null float64 Index of Multiple Deprivation (IMD) Rank (where 1 is most deprived) 32844 non-null float64 geometry 32844 non-null object dtypes: float64(2), object(2) memory usage: 1.3+ MB
ax = db.plot(column='Index of Multiple Deprivation (IMD) Score',
scheme='quantiles', legend=True,
figsize=(12, 12))
ctx.add_basemap(ax, url=ctx.sources.ST_TONER_BACKGROUND,
alpha=0.25);
/Users/dani/anaconda/envs/gds/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result. return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
%time w = ps.weights.Queen.from_dataframe(db)
CPU times: user 11.1 s, sys: 112 ms, total: 11.2 s Wall time: 11.3 s
/Users/dani/anaconda/envs/gds/lib/python3.6/site-packages/pysal/weights/weights.py:186: UserWarning: There is one disconnected observation (no neighbors) warnings.warn("There is one disconnected observation (no neighbors)") /Users/dani/anaconda/envs/gds/lib/python3.6/site-packages/pysal/weights/weights.py:187: UserWarning: Island id: E01019077 warnings.warn("Island id: %s" % str(self.islands[0]))
def disparity(row):
id = row.name
neis = db.loc[w.neighbors[id],
'Index of Multiple Deprivation (IMD) Score']
disp = neis - row['Index of Multiple Deprivation (IMD) Score']
disp = disp.reset_index()
disp['lsoa11cd'] = disp['lsoa11cd'].apply(
lambda x: '-'.join(
pd.Series([x, id]).sort_values())
)
return disp
tst = disparity(db.iloc[0, :])
tst
lsoa11cd | Index of Multiple Deprivation (IMD) Score | |
---|---|---|
0 | E01013150-E01013228 | 1.990 |
1 | E01013150-E01013226 | -3.159 |
2 | E01013149-E01013150 | -2.496 |
3 | E01013148-E01013150 | -4.380 |
%time disps = pd.concat(db.apply(disparity, axis=1).values)
disps.head()
CPU times: user 2min 23s, sys: 593 ms, total: 2min 24s Wall time: 2min 25s
lsoa11cd | Index of Multiple Deprivation (IMD) Score | |
---|---|---|
0 | E01013150-E01013228 | 1.990 |
1 | E01013150-E01013226 | -3.159 |
2 | E01013149-E01013150 | -2.496 |
3 | E01013148-E01013150 | -4.380 |
0 | E01015580-E01015582 | -12.062 |
disps.shape
(192584, 2)
disps = disps.drop_duplicates(subset='lsoa11cd')
disps.shape
(96292, 2)
disps['abs_diff'] = np.abs(disps['Index of Multiple Deprivation (IMD) Score'])
disps.head()
lsoa11cd | Index of Multiple Deprivation (IMD) Score | abs_diff | |
---|---|---|---|
0 | E01013150-E01013228 | 1.990 | 1.990 |
1 | E01013150-E01013226 | -3.159 | 3.159 |
2 | E01013149-E01013150 | -2.496 | 2.496 |
3 | E01013148-E01013150 | -4.380 | 4.380 |
0 | E01015580-E01015582 | -12.062 | 12.062 |
top = disps.sort_values('abs_diff', ascending=False).head()
top
lsoa11cd | Index of Multiple Deprivation (IMD) Score | abs_diff | |
---|---|---|---|
5 | E01008978-E01032145 | 74.645 | 74.645 |
7 | E01012721-E01025569 | 73.216 | 73.216 |
0 | E01013818-E01028121 | -70.560 | 70.560 |
0 | E01013190-E01013208 | -69.397 | 69.397 |
4 | E01013185-E01013208 | -68.847 | 68.847 |
def map_disparity(pair, saveto=None):
sub = db.loc[pair.split('-'), :]
least, most = sub.sort_values('Index of Multiple Deprivation (IMD) Score').index
f, ax = plt.subplots(1, figsize=(20, 20))
sub.loc[[most], :].plot(color='red', ax=ax, alpha=0.35)
sub.loc[[least], :].plot(color='blue', ax=ax, alpha=0.35)
ctx.add_basemap(ax, url=ctx.sources.ST_TONER,
alpha=0.75, zoom=14)
most_name = db.loc[most, 'LSOA name (2011)']
least_name = db.loc[least, 'LSOA name (2011)']
most_imd = np.round(db.loc[most, 'Index of Multiple Deprivation (IMD) Score'], 2)
least_imd = np.round(db.loc[least, 'Index of Multiple Deprivation (IMD) Score'], 2)
title = (f"Red: {most_name} (IMD: {most_imd}) | "\
f"Blue: {least_name} (IMD: {least_imd})")
ax.set_title(title, fontsize=20)
ax.set_axis_off()
if saveto is not None:
plt.savefig(saveto, dpi=300)
return plt.show()
Top disparity maps:
for pair in top['lsoa11cd']:
map_disparity(pair)