the shape files can be downloaded at
http://www.gadm.org/country
for the data see also
http://www.naturalearthdata.com/
take a look at the following example
http://www.geophysique.be/2011/01/27/matplotlib-basemap-tutorial-07-shapefiles-unleached/
import pandas
import pylab
from mpl_toolkits.basemap import Basemap
import matplotlib as mpl
/usr/local/lib/python2.7/dist-packages/pytz/__init__.py:35: UserWarning: Module dap was already imported from None, but /usr/lib/python2.7/dist-packages is being added to sys.path from pkg_resources import resource_stream
def chart_info(Series_data, shapefile, extension, shape_key,
title='', cmap = ['b','purple','r']):
"""create a basemap from a shapefile and the information contained in a series coded by region name
Arguments
===========
Series_data: a pandas Series
This series should contains the data for each region with the same code that can be found under the shapefile
shapefile: string
The location of the shapefile that contains the information about the geography
extension: 4-tuple
the extension of the projection, in the format north, south, east, west
shape_key: string
tha name of the field in the shape file that indicate the region. The values of this field
should match those on the Series_data
title: string, optional
title of the plot
cmap: pylab colormap or list of colour names, optional
this gives the colormap that will be used to colorize the plot
Returns
===========
ax: pylab.Axes
the axes on which the plot has been drawn
"""
ax = pylab.gca()
# create the colormap if a list of names is given, otherwise
# use the given colormap
lscm = matplotlib.colors.LinearSegmentedColormap
if isinstance(cmap,(list,tuple)):
cmap = lscm.from_list('mycm',cmap)
#create a new basemap with the given extension
#TODO: allow to create more general projections
north, south, east, west = extension
m = Basemap(llcrnrlon=west,llcrnrlat=south,urcrnrlon=east,urcrnrlat=north,
projection='lcc',lat_0=(south+north)/2, lon_0=(east+west)/2)
#use basemap the read and draw the shapefile
#it will add two variables to the basemap, m.states and m.states_info
m.readshapefile(shapefile,'states',drawbounds=True);
#find minimum and maximum of the dataset to normalize the colors
max_pop = Series_data.max()*1.0
min_pop = Series_data.min()*1.0
# cycle through states, color each one.
# m.states contains the lines of the borders
# m.states_info contains the info on the region, like the name
for state_borders, state_info in zip(m.states, m.states_info):
statename = state_info[shape_key]
#skip those that aren't in the dataset without complaints
if statename not in Series_data:
continue
#set the color for each region
pop = Series_data[statename]
color = cmap( (pop-min_pop) / (max_pop-min_pop) )
#extract the x and y of the countours and plot them
xx,yy = zip(*state_borders)
patches = ax.fill(xx,yy,facecolor=color,edgecolor=color)
ax.set_title(title);
#generate a sintetic colorbar starting from the maximum and minimum of the dataset
axc, kw = matplotlib.colorbar.make_axes(ax)
norm = mpl.colors.Normalize(vmin=min_pop, vmax=max_pop)
cb1 = mpl.colorbar.ColorbarBase(axc, cmap=cmap, norm=norm)
return ax
try:
data_USA = pandas.read_csv('./USAinfo/unemployment_2011.csv')
except:
data_USA = pandas.read_csv("http://stats202.stanford.edu/data/unemployment_2011.csv",sep='|')
data_USA.to_csv('./USAinfo/unemployment_2011.csv')
data_USA[:5]
Unnamed: 0 | code | state_fips | county_fips | period | civilian | employed | unemployed | rate | county | state | state_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0 | CN010010 | 1 | 1 | Jun-11 | 26328 | 23987 | 2341 | 8.9 | autauga | alabama | AL |
1 | 1 | PA011000 | 1 | 3 | Jun-11 | 88382 | 80841 | 7541 | 8.5 | baldwin | alabama | AL |
2 | 2 | CN010050 | 1 | 5 | Jun-11 | 9957 | 8774 | 1183 | 11.9 | barbour | alabama | AL |
3 | 3 | CN010070 | 1 | 7 | Jun-11 | 9360 | 8307 | 1053 | 11.3 | bibb | alabama | AL |
4 | 4 | CN010090 | 1 | 9 | Jun-11 | 26573 | 24168 | 2405 | 9.1 | blount | alabama | AL |
reduced = data_USA[data_USA.period=='May-12'].groupby('state').sum()[['civilian','employed','unemployed']]
reduced['ratio'] = reduced['unemployed'] / reduced['civilian']
reduced.index = [ i.title() for i in reduced.index ]
reduced[:5]
civilian | employed | unemployed | ratio | |
---|---|---|---|---|
Alabama | 2150161 | 1989181 | 160980 | 0.074869 |
Alaska | 367279 | 341506 | 25773 | 0.070173 |
Arizona | 3026485 | 2778186 | 248299 | 0.082042 |
Arkansas | 1392559 | 1290240 | 102319 | 0.073476 |
California | 18431866 | 16519142 | 1912724 | 0.103773 |
popdensity_USA = reduced['ratio']
#fig, ax = pylab.subplots(1,figsize=(8,6))
chart_info(popdensity_USA, shapefile='./USAmaps/st99_d00', cmap = pylab.cm.hot,
extension=(49, 22, -64, -119), shape_key='NAME', title='Unemployment fraction for state')
<matplotlib.axes.AxesSubplot at 0x4181810>
http://www.eea.europa.eu/data-and-maps/data/eea-reference-grids/
http://www3.istat.it/ambiente/cartografia/
riferiti alla zona:
ED_1950_UTM Zona 32
latitudini (prese da wikipedia):
47° 05' 31" di latitudine Nord
35° 29' 24" di latitudine Nord
18°31'18" di longitudine Est
6° 37'32" di longitudine Est
infomazioni trovate su:
http://www.istat.it/it/archivio/16777
convertite in utf-8 con:
iconv -f LATIN1 -t UTF-8 -o output_file.csv Archivio_unico_indicatori_regionali.csv
data_ITA = pandas.read_csv('./ITAinfo/output_file.csv',sep=';')
interesse = 'Indice di criminalità diffusa (1)'
#interesse = 'Indice di criminalità diffusa (2)'
#interesse = 'Indice di criminalità organizzata'
#interesse = 'Indice di criminalità violenta'
#interesse = 'Indice di criminalità minorile (escluso il furto)'
#interesse = 'Indice di criminalità minorile'
#interesse = 'Percezione delle famiglie del rischio di criminalità nella zona in cui vivono'
#interesse = 'Indice di microcriminalità nelle città (1)'
#interesse = 'Indice di microcriminalità nelle città (2)'
regions = pandas.Index( s.lower() for s in data_ITA['DESCRIZIONE_RIPARTIZIONE'].unique()[:20] )
data_redux = data_ITA[data_ITA['TITOLO'] == interesse]
data_redux = data_redux[['DESCRIZIONE_RIPARTIZIONE', 'ANNO_RIFERIMENTO', 'VALORE']]
data_redux['VALORE'] = data_redux['VALORE'].str.replace(',','.').apply(float)
data_redux['ANNO_RIFERIMENTO'] = data_redux['ANNO_RIFERIMENTO'].apply(int)
data_redux['DESCRIZIONE_RIPARTIZIONE'] = data_redux['DESCRIZIONE_RIPARTIZIONE'].str.lower()
data_redux.columns = ['region', 'year', 'rate']
data_redux = data_redux.groupby('region').mean()
data_redux = data_redux.ix[regions]['rate']
keys = ['Abruzzo', 'Basilicata', 'Calabria', 'Campania', 'Emilia-Romagna', 'Friuli-Venezia Giulia', 'Lazio', 'Liguria',
'Lombardia', 'Marche', 'Molise', 'Piemonte', 'Apulia', 'Sardegna', 'Sicily', 'Toscana', 'Trentino-Alto Adige', 'Umbria', "Valle d'Aosta", 'Veneto']
popdensity_ITA = pandas.Series({ new: data_redux[old] for old, new in zip(sorted(data_redux.keys()),keys) })
titolo = data_ITA[data_ITA['TITOLO'] == interesse].SOTTOTITOLO.unique()[0].decode('utf8')
#fig, ax =pylab.subplots(1,figsize=(12,9))
chart_info(popdensity_ITA, shapefile='./ITAmaps/amministrativa/ITA_adm1',
extension=(47, 36, 20, 7), shape_key='NAME_1', title=titolo)
<matplotlib.axes.AxesSubplot at 0x506de50>
shapefiles found on:
http://epp.eurostat.ec.europa.eu/portal/page/portal/gisco_Geographical_information_maps/popups/references/administrative_units_statistical_units_1
copre tutto il mondo, in realtà...
data found on:
https://ec.europa.eu/digital-agenda/en/download-data
http://epp.eurostat.ec.europa.eu/portal/page/portal/eurostat/home/
conv = { i: lambda s: s.split()[0] for i in range(1995,2012)}
data_EUR = pandas.read_csv("./EURinfo/GDP per capita in PPS.tsv", sep='[\t,]', na_values=':', skiprows=1,
converters=conv, names=['indicator','aggregatore','country']+range(1995,2012))
data_EUR = data_EUR.set_index('country')
popdensity_EUR = data_EUR[2011].apply(float)
#fig, ax =pylab.subplots(1,figsize=(8,6))
chart_info(popdensity_EUR, shapefile='./EURmaps/CNTR_2010_60M_SH/data/CNTR_RG_60M_2010', cmap = ['r','y','g','c','b'],
extension=(70, 32, 45, -7), shape_key='CNTR_ID', title='GDP per capita in PPS')
<matplotlib.axes.AxesSubplot at 0x506d9d0>