Jeffrey S. Whitaker Phone : (303)497-6313
Meteorologist FAX : (303)497-6449
NOAA/OAR/CDC R/CDC1 Email :
Jeffrey.S.Whitaker-32lpuo7BZBA@xxxxxxxxxxxxxxxx
325 Broadway Office : Skaggs Research Cntr 1D-124
Boulder, CO, USA 80303-3328 Web : http://tinyurl.com/5telg

http://osdir.com/ml/python.matplotlib.general/2005-10/msg00029.htmlvedi anche:

http://www.stanford.edu/class/stats202/unemployment.html
import pandas
import pylab
from mpl_toolkits.basemap import Basemap
import matplotlib as mpl

def chart_info(Series_data, shapefile, extension, shape_key, 
                title='', cmap = ['b','purple','r']):
    """create a basemap from a shapefile and the information contained in a series coded by region name
    
    Arguments
    ===========
    Series_data: a pandas Series
                This series should contains the data for each region with the same code that can be found under the shapefile
    shapefile: string
                The location of the shapefile that contains the information about the geography
    extension: 4-tuple
                the extension of the projection, in the format north, south, east, west
    shape_key: string
                tha name of the field in the shape file that indicate the region. The values of this field
                should match those on the Series_data
    title: string, optional
                title of the plot
    cmap: pylab colormap or list of colour names, optional
                this gives the colormap that will be used to colorize the plot
    
    Returns
    ===========
    ax: pylab.Axes
                the axes on which the plot has been drawn
    """
    ax = pylab.gca()
    # create the colormap if a list of names is given, otherwise
    # use the given colormap
    lscm = matplotlib.colors.LinearSegmentedColormap
    if isinstance(cmap,(list,tuple)):
        cmap = lscm.from_list('mycm',cmap)
    #create a new basemap with the given extension
    #TODO: allow to create more general projections
    north, south, east, west = extension
    m = Basemap(llcrnrlon=west,llcrnrlat=south,urcrnrlon=east,urcrnrlat=north,
            projection='lcc',lat_0=(south+north)/2, lon_0=(east+west)/2)
    #use basemap the read and draw the shapefile
    #it will add two variables to the basemap, m.states and m.states_info
    m.readshapefile(shapefile,'states',drawbounds=True);
    #find minimum and maximum of the dataset to normalize the colors
    max_pop = Series_data.max()*1.0
    min_pop = Series_data.min()*1.0
    # cycle through states, color each one.
    # m.states contains the lines of the borders
    # m.states_info contains the info on the region, like the name
    for state_borders, state_info in zip(m.states, m.states_info):
        statename = state_info[shape_key]
        #skip those that aren't in the dataset without complaints
        if statename not in Series_data:
            continue
        #set the color for each region
        pop = Series_data[statename]
        color =  cmap( (pop-min_pop) / (max_pop-min_pop) )
        #extract the x and y of the countours and plot them
        xx,yy = zip(*state_borders)
        patches = ax.fill(xx,yy,facecolor=color,edgecolor=color)
    ax.set_title(title);
    #generate a sintetic colorbar starting from the maximum and minimum of the dataset
    axc, kw = matplotlib.colorbar.make_axes(ax)
    norm = mpl.colors.Normalize(vmin=min_pop, vmax=max_pop)
    cb1 = mpl.colorbar.ColorbarBase(axc, cmap=cmap, norm=norm)
    return ax

try:
    data_USA = pandas.read_csv('./USAinfo/unemployment_2011.csv')
except:
    data_USA = pandas.read_csv("http://stats202.stanford.edu/data/unemployment_2011.csv",sep='|')
    data_USA.to_csv('./USAinfo/unemployment_2011.csv')

data_USA[:5]

reduced = data_USA[data_USA.period=='May-12'].groupby('state').sum()[['civilian','employed','unemployed']]
reduced['ratio'] =  reduced['unemployed'] / reduced['civilian']
reduced.index = [ i.title() for i in reduced.index ]

reduced[:5]

popdensity_USA = reduced['ratio']

#fig, ax = pylab.subplots(1,figsize=(8,6))
chart_info(popdensity_USA, shapefile='./USAmaps/st99_d00', cmap = pylab.cm.hot,
            extension=(49, 22, -64, -119), shape_key='NAME', title='Unemployment fraction for state')

data_ITA = pandas.read_csv('./ITAinfo/output_file.csv',sep=';')

interesse = 'Indice di criminalità diffusa (1)'
#interesse = 'Indice di criminalità diffusa (2)'
#interesse = 'Indice di criminalità organizzata'
#interesse = 'Indice di criminalità violenta'
#interesse = 'Indice di criminalità minorile (escluso il furto)'
#interesse = 'Indice di criminalità minorile'
#interesse = 'Percezione delle famiglie del rischio di criminalità nella zona in cui vivono'
#interesse = 'Indice di microcriminalità nelle città (1)'
#interesse = 'Indice di microcriminalità nelle città (2)'


regions = pandas.Index( s.lower() for s in data_ITA['DESCRIZIONE_RIPARTIZIONE'].unique()[:20] )

data_redux = data_ITA[data_ITA['TITOLO'] == interesse]
data_redux = data_redux[['DESCRIZIONE_RIPARTIZIONE', 'ANNO_RIFERIMENTO', 'VALORE']]
data_redux['VALORE'] = data_redux['VALORE'].str.replace(',','.').apply(float)
data_redux['ANNO_RIFERIMENTO'] = data_redux['ANNO_RIFERIMENTO'].apply(int)
data_redux['DESCRIZIONE_RIPARTIZIONE'] = data_redux['DESCRIZIONE_RIPARTIZIONE'].str.lower()
data_redux.columns = ['region', 'year', 'rate']
data_redux = data_redux.groupby('region').mean()
data_redux = data_redux.ix[regions]['rate']

keys = ['Abruzzo', 'Basilicata', 'Calabria', 'Campania', 'Emilia-Romagna', 'Friuli-Venezia Giulia', 'Lazio', 'Liguria', 
'Lombardia', 'Marche', 'Molise', 'Piemonte', 'Apulia', 'Sardegna', 'Sicily', 'Toscana', 'Trentino-Alto Adige', 'Umbria', "Valle d'Aosta", 'Veneto']

popdensity_ITA = pandas.Series({ new: data_redux[old] for old, new in zip(sorted(data_redux.keys()),keys) })

titolo = data_ITA[data_ITA['TITOLO'] == interesse].SOTTOTITOLO.unique()[0].decode('utf8')

#fig, ax =pylab.subplots(1,figsize=(12,9))
chart_info(popdensity_ITA, shapefile='./ITAmaps/amministrativa/ITA_adm1',
            extension=(47, 36, 20, 7), shape_key='NAME_1', title=titolo)

conv = { i: lambda s: s.split()[0] for i in range(1995,2012)}
data_EUR = pandas.read_csv("./EURinfo/GDP per capita in PPS.tsv", sep='[\t,]', na_values=':', skiprows=1,
                         converters=conv, names=['indicator','aggregatore','country']+range(1995,2012))
data_EUR = data_EUR.set_index('country')

popdensity_EUR = data_EUR[2011].apply(float)

#fig, ax =pylab.subplots(1,figsize=(8,6))
chart_info(popdensity_EUR, shapefile='./EURmaps/CNTR_2010_60M_SH/data/CNTR_RG_60M_2010', cmap = ['r','y','g','c','b'],
            extension=(70, 32, 45, -7), shape_key='CNTR_ID', title='GDP per capita in PPS')