from mpl_toolkits.basemap import Basemap import numpy as np import matplotlib.pyplot as plt from matplotlib import cm %matplotlib inline import pandas as pd gdp = pd.read_csv('horizontal-file_03-2007 - GDP.csv', skip_footer=4, thousands=',') gdp.ix[:5,:10] gdp.rename(columns = {'Unnamed: 0':'Country'}, inplace=True) gdp.Country[:10].values gdp.Country = gdp.Country.map(lambda s: s.strip()) gdp.set_index('Country', inplace=True) gdp_fill = gdp.applymap(lambda v: v if type(v) == np.float64 else np.nan) gdp_fill.astype(float) gdp_fill.ix[:,0].fillna(0, inplace=True) gdp_fill.fillna(method='ffill', axis=1, inplace=True) gdp_fill.ix[:5,:10] coord = pd.read_table('country_centroids_primary.csv', sep='\t') coord.rename(columns = {'SHORT_NAME':'Country'}, inplace=True) coord.ix[:5] print "Has coordinates but no GDP:", sorted(set(coord.Country) - set(gdp_fill.index)) print "\nHas GDP but no coordinates:", sorted(set(gdp_fill.index) - set(coord.Country)) coord.Country[coord.Country=='Russia'] = 'Russian Federation' rf = gdp.ix['Russian Federation'] ussr = gdp_fill.ix['Total Former USSR'] gdp_fill.ix['Russian Federation'][rf.isnull()] = ussr[rf.isnull()] coord.set_index('Country', inplace=True) joined = pd.merge(gdp_fill, coord, left_index=True, right_index=True, how='inner', sort=True) joined.columns[187:191] N_years = 189 plt.plot([sum(gdp.ix[:,x].notnull()) for x in range(189)]) threshold = 14 def plot1(llon, llat, ulon, ulat): plt.figure(figsize=(20, 12)) plt.title('World\'s Center of Economic Activity (years 1-2003)', fontsize=24) # Initialize the map and configure the style world = Basemap(resolution='l',projection='merc', area_thresh=10000, llcrnrlon=llon, llcrnrlat=llat, urcrnrlon=ulon, urcrnrlat=ulat) world.drawcoastlines(linewidth=0.1) world.drawcountries(linewidth=0.1) world.drawlsmask(land_color='#E1E1D1', ocean_color='#F0F0E8') # indices of year columns with enough GDP data years = [x for x in range(N_years) if sum(gdp.ix[:,x].notnull()) > threshold] N = len(years) for (c, i) in enumerate(years): year_gdp = joined.ix[:,i] # weight the coordinates for each country by the corresponding GDP lat, lon = year_gdp * joined.LAT, year_gdp * joined.LONG total = sum(year_gdp) # find the center of mass and convert to map coordinates x, y = world(sum(lon)/total, sum(lat)/total) world.plot(x, y, 'o', color=cm.Spectral(float(c)/N), markersize=10, label=joined.columns[i]) # Pick the first 4 points and then every 20th for the legend handles_labels = zip(*plt.gca().get_legend_handles_labels()) handles_labels = [handles_labels[i] for i in range(4)+range(5,N,20)] handles, labels = zip(*handles_labels) legend = plt.legend(handles, labels, title='Year', fontsize=16, numpoints=1) plt.setp(legend.get_title(),fontsize=18) return world plot1(-110, -40, 140, 65) plot1(-10, 35, 30, 45) def plot2(llon, llat, ulon, ulat): plt.figure(figsize=(20, 15)) plt.title('World\'s Center of Economic Activity (years 1-2003)', fontsize=24) world = Basemap(resolution='l',projection='merc', area_thresh=10000, llcrnrlon=llon, llcrnrlat=llat, urcrnrlon=ulon, urcrnrlat=ulat) world.drawcoastlines(linewidth=0.1) world.drawcountries(linewidth=0.1) world.drawlsmask(land_color='#E1E1D1', ocean_color='#F0F0E8') t = 0 years = [x for x in range(45) + range(50, N_years, 5) + [N_years-1] if sum(gdp.ix[:,x].notnull()) > threshold] N = len(years) labels = [0, 1, 3, 5, 7, 11, 18, 24, 28, 36] for (c, i) in enumerate(years): year_gdp = joined.ix[:,i] lat, lon = year_gdp * joined.LAT, year_gdp * joined.LONG total = sum(year_gdp) x, y = world(sum(lon)/total, sum(lat)/total) world.plot(x, y, 'o', color=cm.Spectral(float(c)/N), markersize=12, label=joined.columns[i]) if t in labels: plt.annotate(joined.columns[i], xy=(x-150000, y+140000), family='cursive', size=23) t += 1 return world plot2(-20, 20, 80, 50) # CSS styling within IPython notebook from IPython.display import display, HTML display(HTML(open('custom.css').read()))