Jeffrey S. Whitaker Phone : (303)497-6313 Meteorologist FAX : (303)497-6449 NOAA/OAR/CDC R/CDC1 Email : Jeffrey.S.Whitaker-32lpuo7BZBA@xxxxxxxxxxxxxxxx 325 Broadway Office : Skaggs Research Cntr 1D-124 Boulder, CO, USA 80303-3328 Web : http://tinyurl.com/5telg http://osdir.com/ml/python.matplotlib.general/2005-10/msg00029.htmlvedi anche: http://www.stanford.edu/class/stats202/unemployment.html import pandas import pylab from mpl_toolkits.basemap import Basemap import matplotlib as mpl def chart_info(Series_data, shapefile, extension, shape_key, title='', cmap = ['b','purple','r']): """create a basemap from a shapefile and the information contained in a series coded by region name Arguments =========== Series_data: a pandas Series This series should contains the data for each region with the same code that can be found under the shapefile shapefile: string The location of the shapefile that contains the information about the geography extension: 4-tuple the extension of the projection, in the format north, south, east, west shape_key: string tha name of the field in the shape file that indicate the region. The values of this field should match those on the Series_data title: string, optional title of the plot cmap: pylab colormap or list of colour names, optional this gives the colormap that will be used to colorize the plot Returns =========== ax: pylab.Axes the axes on which the plot has been drawn """ ax = pylab.gca() # create the colormap if a list of names is given, otherwise # use the given colormap lscm = matplotlib.colors.LinearSegmentedColormap if isinstance(cmap,(list,tuple)): cmap = lscm.from_list('mycm',cmap) #create a new basemap with the given extension #TODO: allow to create more general projections north, south, east, west = extension m = Basemap(llcrnrlon=west,llcrnrlat=south,urcrnrlon=east,urcrnrlat=north, projection='lcc',lat_0=(south+north)/2, lon_0=(east+west)/2) #use basemap the read and draw the shapefile #it will add two variables to the basemap, m.states and m.states_info m.readshapefile(shapefile,'states',drawbounds=True); #find minimum and maximum of the dataset to normalize the colors max_pop = Series_data.max()*1.0 min_pop = Series_data.min()*1.0 # cycle through states, color each one. # m.states contains the lines of the borders # m.states_info contains the info on the region, like the name for state_borders, state_info in zip(m.states, m.states_info): statename = state_info[shape_key] #skip those that aren't in the dataset without complaints if statename not in Series_data: continue #set the color for each region pop = Series_data[statename] color = cmap( (pop-min_pop) / (max_pop-min_pop) ) #extract the x and y of the countours and plot them xx,yy = zip(*state_borders) patches = ax.fill(xx,yy,facecolor=color,edgecolor=color) ax.set_title(title); #generate a sintetic colorbar starting from the maximum and minimum of the dataset axc, kw = matplotlib.colorbar.make_axes(ax) norm = mpl.colors.Normalize(vmin=min_pop, vmax=max_pop) cb1 = mpl.colorbar.ColorbarBase(axc, cmap=cmap, norm=norm) return ax try: data_USA = pandas.read_csv('./USAinfo/unemployment_2011.csv') except: data_USA = pandas.read_csv("http://stats202.stanford.edu/data/unemployment_2011.csv",sep='|') data_USA.to_csv('./USAinfo/unemployment_2011.csv') data_USA[:5] reduced = data_USA[data_USA.period=='May-12'].groupby('state').sum()[['civilian','employed','unemployed']] reduced['ratio'] = reduced['unemployed'] / reduced['civilian'] reduced.index = [ i.title() for i in reduced.index ] reduced[:5] popdensity_USA = reduced['ratio'] #fig, ax = pylab.subplots(1,figsize=(8,6)) chart_info(popdensity_USA, shapefile='./USAmaps/st99_d00', cmap = pylab.cm.hot, extension=(49, 22, -64, -119), shape_key='NAME', title='Unemployment fraction for state') data_ITA = pandas.read_csv('./ITAinfo/output_file.csv',sep=';') interesse = 'Indice di criminalità diffusa (1)' #interesse = 'Indice di criminalità diffusa (2)' #interesse = 'Indice di criminalità organizzata' #interesse = 'Indice di criminalità violenta' #interesse = 'Indice di criminalità minorile (escluso il furto)' #interesse = 'Indice di criminalità minorile' #interesse = 'Percezione delle famiglie del rischio di criminalità nella zona in cui vivono' #interesse = 'Indice di microcriminalità nelle città (1)' #interesse = 'Indice di microcriminalità nelle città (2)' regions = pandas.Index( s.lower() for s in data_ITA['DESCRIZIONE_RIPARTIZIONE'].unique()[:20] ) data_redux = data_ITA[data_ITA['TITOLO'] == interesse] data_redux = data_redux[['DESCRIZIONE_RIPARTIZIONE', 'ANNO_RIFERIMENTO', 'VALORE']] data_redux['VALORE'] = data_redux['VALORE'].str.replace(',','.').apply(float) data_redux['ANNO_RIFERIMENTO'] = data_redux['ANNO_RIFERIMENTO'].apply(int) data_redux['DESCRIZIONE_RIPARTIZIONE'] = data_redux['DESCRIZIONE_RIPARTIZIONE'].str.lower() data_redux.columns = ['region', 'year', 'rate'] data_redux = data_redux.groupby('region').mean() data_redux = data_redux.ix[regions]['rate'] keys = ['Abruzzo', 'Basilicata', 'Calabria', 'Campania', 'Emilia-Romagna', 'Friuli-Venezia Giulia', 'Lazio', 'Liguria', 'Lombardia', 'Marche', 'Molise', 'Piemonte', 'Apulia', 'Sardegna', 'Sicily', 'Toscana', 'Trentino-Alto Adige', 'Umbria', "Valle d'Aosta", 'Veneto'] popdensity_ITA = pandas.Series({ new: data_redux[old] for old, new in zip(sorted(data_redux.keys()),keys) }) titolo = data_ITA[data_ITA['TITOLO'] == interesse].SOTTOTITOLO.unique()[0].decode('utf8') #fig, ax =pylab.subplots(1,figsize=(12,9)) chart_info(popdensity_ITA, shapefile='./ITAmaps/amministrativa/ITA_adm1', extension=(47, 36, 20, 7), shape_key='NAME_1', title=titolo) conv = { i: lambda s: s.split()[0] for i in range(1995,2012)} data_EUR = pandas.read_csv("./EURinfo/GDP per capita in PPS.tsv", sep='[\t,]', na_values=':', skiprows=1, converters=conv, names=['indicator','aggregatore','country']+range(1995,2012)) data_EUR = data_EUR.set_index('country') popdensity_EUR = data_EUR[2011].apply(float) #fig, ax =pylab.subplots(1,figsize=(8,6)) chart_info(popdensity_EUR, shapefile='./EURmaps/CNTR_2010_60M_SH/data/CNTR_RG_60M_2010', cmap = ['r','y','g','c','b'], extension=(70, 32, 45, -7), shape_key='CNTR_ID', title='GDP per capita in PPS')