Image(filename='homicides_month_hour_heatmap.png') Image(filename='personal_model_comparison.png') Image(filename='2014_homicide_predictions.png') Image(filename='personal_crime_rates_down.png') import pandas as pd import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.sandbox.regression.predstd import wls_prediction_std import seaborn as sb from collections import Counter from IPython.core.display import Image import scipy.stats as stats for i in range(2001,2015): with open('{0}.csv'.format(str(i)),'wb') as f: data = urllib2.urlopen('http://www.wunderground.com/history/airport/ORD/{0}/1/1/CustomHistory.html?dayend=1&monthend=1&yearend={1}&format=1'.format(str(i),str(i+1))).read() f.write(data) weather = list() for i in range(2001,2015): weather.append(pd.read_csv('{0}.csv'.format(str(i)),skiprows=1)) weather_df = pd.concat(weather) weather_df.set_index('CST',inplace=True) weather_df.index = pd.to_datetime(weather_df.index,unit='D') weather_df.to_csv('weather.csv') weather_df = pd.read_csv('weather.csv',low_memory=False,index_col=0) weather_df.index = pd.to_datetime(weather_df.index,unit='D') weather_df['PrecipitationIn'] = weather_df['PrecipitationIn'].replace('T',np.nan) weather_df['PrecipitationIn'] = weather_df['PrecipitationIn'].dropna().astype(float) weather_df.tail() weather_df.columns crime_df = pd.read_csv('all_crime.csv') crime_df['Datetime'] = pd.to_datetime(crime_df['Date'],format="%m/%d/%Y %I:%M:%S %p") crime_df['Date'] = crime_df['Datetime'].apply(lambda x:x.date()) crime_df['Weekday'] = crime_df['Datetime'].apply(lambda x:x.weekday()) crime_df['Hour'] = crime_df['Datetime'].apply(lambda x:x.hour) crime_df['Day'] = crime_df['Datetime'].apply(lambda x:x.day) crime_df['Week'] = crime_df['Datetime'].apply(lambda x:x.week) crime_df['Month'] = crime_df['Datetime'].apply(lambda x:x.month) crime_df.head() dict(Counter(crime_df['Primary Type'])) personal_crimes = ['ASSAULT','BATTERY','CRIM SEXUAL ASSAULT','HOMICIDE'] property_crimes = ['ARSON','BURGLARY','MOTOR VEHICLE THEFT','ROBBERY','THEFT'] arson_gb = crime_df[crime_df['Primary Type'] == 'ARSON'].groupby('Date')['ID'].agg(len) assault_gb = crime_df[crime_df['Primary Type'] == 'ASSAULT'].groupby('Date')['ID'].agg(len) battery_gb = crime_df[crime_df['Primary Type'] == 'BATTERY'].groupby('Date')['ID'].agg(len) burglary_gb = crime_df[crime_df['Primary Type'] == 'BURGLARY'].groupby('Date')['ID'].agg(len) homicide_gb = crime_df[crime_df['Primary Type'] == 'HOMICIDE'].groupby('Date')['ID'].agg(len) sexual_assault_gb = crime_df[crime_df['Primary Type'] == 'CRIM SEXUAL ASSAULT'].groupby('Date')['ID'].agg(len) robbery_gb = crime_df[crime_df['Primary Type'] == 'ROBBERY'].groupby('Date')['ID'].agg(len) theft_gb = crime_df[crime_df['Primary Type'] == 'THEFT'].groupby('Date')['ID'].agg(len) vehicle_theft_gb = crime_df[crime_df['Primary Type'] == 'MOTOR VEHICLE THEFT'].groupby('Date')['ID'].agg(len) personal_gb = crime_df[crime_df['Primary Type'].isin(personal_crimes)].groupby('Date')['ID'].agg(len) property_gb = crime_df[crime_df['Primary Type'].isin(property_crimes)].groupby('Date')['ID'].agg(len) arson_gb.index = pd.to_datetime(arson_gb.index,unit='D') assault_gb.index = pd.to_datetime(assault_gb.index,unit='D') battery_gb.index = pd.to_datetime(battery_gb.index,unit='D') burglary_gb.index = pd.to_datetime(burglary_gb.index,unit='D') homicide_gb.index = pd.to_datetime(homicide_gb.index,unit='D') sexual_assault_gb.index = pd.to_datetime(sexual_assault_gb.index,unit='D') robbery_gb.index = pd.to_datetime(robbery_gb.index,unit='D') theft_gb.index = pd.to_datetime(theft_gb.index,unit='D') vehicle_theft_gb.index = pd.to_datetime(vehicle_theft_gb.index,unit='D') personal_gb.index = pd.to_datetime(personal_gb.index,unit='D') property_gb.index = pd.to_datetime(property_gb.index,unit='D') ts = pd.DataFrame({'Arson':arson_gb.ix[:'2014-3-31'], 'Assault':assault_gb.ix[:'2014-3-31'], 'Battery':battery_gb.ix[:'2014-3-31'], 'Burglary':burglary_gb.ix[:'2014-3-31'], 'Homicide':homicide_gb.ix[:'2014-3-31'], 'Sexual_assault':sexual_assault_gb.ix[:'2014-3-31'], 'Robbery':robbery_gb.ix[:'2014-3-31'], 'Vehicle_theft':vehicle_theft_gb.ix[:'2014-3-31'], 'Theft':theft_gb.ix[:'2014-3-31'], 'Personal':personal_gb.ix[:'2014-3-31'], 'Property':property_gb.ix[:'2014-3-31'], 'Temperature':weather_df['Mean TemperatureF'].ix[:'2014-3-31'], 'Binned temperature':weather_df['Mean TemperatureF'].ix[:'2014-3-31']//10.*10, 'Humidity':weather_df[' Mean Humidity'].ix[:'2014-3-31'], 'Precipitation':weather_df['PrecipitationIn'].ix[:'2014-3-31'] }) ts['Time'] = range((max(ts.index)-min(ts.index)).days+1) ts.reset_index(inplace=True) ts.set_index('index',drop=False,inplace=True) ts['Weekday'] = ts['index'].apply(lambda x:x.weekday()) ts['Hour'] = ts['index'].apply(lambda x:x.hour) ts['Week'] = ts['index'].apply(lambda x:x.week) ts['Month'] = ts['index'].apply(lambda x:x.month) ts['Year'] = ts['index'].apply(lambda x:x.year) ts['Weekend'] = ts['Weekday'].isin([5,6]).astype(int) # adapted from http://matplotlib.org/examples/api/barchart_demo.html def autolabel(rects): max_height = max([rect.get_height() for rect in rects if hasattr(rect,'get_height') and not np.isnan(rect.get_height())]) for rect in rects: if hasattr(rect,'get_height'): height = rect.get_height() if not np.isnan(height): ax.text(rect.get_x()+rect.get_width()/2., height-.05*max_height, '%d'%int(height), ha='center', va='bottom',color='w') figsize(12,6) ts2 = pd.DataFrame({'Arson':arson_gb/float(arson_gb.max()), 'Assault':assault_gb/float(assault_gb.max()), 'Battery':battery_gb/float(battery_gb.max()), 'Burglary':burglary_gb/float(burglary_gb.max()), 'Homicide':homicide_gb/float(homicide_gb.max()), 'Sexual assault':sexual_assault_gb/float(sexual_assault_gb.max()), 'Robbery':robbery_gb/float(robbery_gb.max()), 'Theft':theft_gb/float(theft_gb.max()),}) ax = ts2.resample('M').plot(lw=4,alpha=.75,colormap='hsv') ax.set_ylabel('Incidents (normalized)',fontsize=18) #ax.right_ax.set_ylabel('Temperature (F)',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_ylim((0,1)) #ax.set_yscale('log') ax.grid(False,which='minor') plt.axvline('2014-3-1',c='k',ls='--') ax.legend(loc='upper center',ncol=4) figsize(8,6) #ts.corr().columns a = np.array(ts[['Arson','Burglary','Robbery','Theft','Assault','Battery','Homicide','Sexual_assault','Temperature','Humidity','Precipitation']].resample('M').corr()) np.fill_diagonal(a,0) plt.pcolor(a,cmap='RdBu_r',edgecolors='k') plt.xlim((0,11)) plt.ylim((0,11)) plt.xticks(arange(.5,11.5),['Arson','Burglary','Robbery','Theft','Assault','Battery','Homicide','Sexual assault','Temperature','Humidity','Precipitation'],rotation=90,fontsize=15) plt.yticks(arange(.5,11.5),['Arson','Burglary','Robbery','Theft','Assault','Battery','Homicide','Sexual assault','Temperature','Humidity','Precipitation'],fontsize=15) plt.title('Correlation between crime occurences',fontsize=20) plt.colorbar() plt.grid(b=True,which='major',alpha=.5) figsize(12,6) ts3 = pd.DataFrame({'Personal':personal_gb/float(personal_gb.max()), 'Property':property_gb/float(property_gb.max()) }) ax = ts3.resample('M').plot(lw=4,alpha=.75,colormap='jet') ax.set_ylabel('Incidents (normalized)',fontsize=18) #ax.right_ax.set_ylabel('Temperature (F)',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_ylim((0,1)) #ax.set_yscale('log') ax.grid(False,which='minor') ax.legend(loc='upper center',ncol=4,fontsize=18) ax = ts.boxplot(['Personal','Property'],by='Binned temperature') ax[0].set_ylabel('Number of crimes',fontsize=18) ax[0].set_xlabel('Temperature (F)',fontsize=18) ax[0].set_title('Personal crimes',fontsize=15) ax[1].set_xlabel('Temperature (F)',fontsize=18) ax[1].set_title('Property crimes',fontsize=15) plt.suptitle('Crime increases with temperature',fontsize=20) #plt.xticks(plt.xticks()[0],arange(-10,100,10),fontsize=15) var = 'Personal' ct1 = ts.groupby(['Temperature','Humidity'])[var].agg(np.sum).reset_index() array1 = np.array(pd.pivot_table(ct1,values=var,rows='Temperature',cols='Humidity').fillna(0)) ct2 = ts.groupby(['Temperature','Humidity'])[var].agg(len).reset_index() array2 = np.array(pd.pivot_table(ct2,values=var,rows='Temperature',cols='Humidity').fillna(0)) normalized_crime = array1/array2 plt.imshow(normalized_crime,cmap='RdBu_r',origin='lower',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Humidity',fontsize=18) plt.ylabel('Temperature',fontsize=18) plt.title('{0} crimes by temperature and humidity'.format(var),fontsize=24) plt.colorbar() var = 'Property' ct1 = ts.groupby(['Temperature','Humidity'])[var].agg(np.sum).reset_index() array1 = np.array(pd.pivot_table(ct1,values=var,rows='Temperature',cols='Humidity').fillna(0)) ct2 = ts.groupby(['Temperature','Humidity'])[var].agg(len).reset_index() array2 = np.array(pd.pivot_table(ct2,values=var,rows='Temperature',cols='Humidity').fillna(0)) normalized_crime = array1/array2 plt.imshow(normalized_crime,cmap='RdBu_r',origin='lower',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Humidity',fontsize=18) plt.ylabel('Temperature',fontsize=18) plt.title('{0} crimes by temperature and humidity'.format(var),fontsize=24) plt.colorbar() crime_df_gb_personal = crime_df[crime_df['Primary Type'].isin(personal_crimes)] crime_df_gb_property = crime_df[crime_df['Primary Type'].isin(property_crimes)] bar_df = pd.DataFrame() bar_df['Personal'] = crime_df_gb_personal.groupby('Month')['ID'].agg(len)/len(crime_df_gb_personal)*100 bar_df['Property'] = crime_df_gb_property.groupby('Month')['ID'].agg(len)/len(crime_df_gb_property)*100 ax = bar_df.plot(kind='bar') plt.title('Crimes peak in summer',fontsize=20) plt.xlabel('Month',fontsize=18) plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.ylabel('Percentage of crimes',fontsize=18) plt.yticks(fontsize=15) plt.ylim((0,12)) plt.legend(fontsize=15,ncol=2,loc='upper center') autolabel(ax.patches) bar_df = pd.DataFrame() bar_df['Personal'] = crime_df_gb_personal.groupby('Hour')['ID'].agg(len)/len(crime_df_gb_personal)*100 bar_df['Property'] = crime_df_gb_property.groupby('Hour')['ID'].agg(len)/len(crime_df_gb_property)*100 ax = bar_df.plot(kind='bar') plt.title('Crimes peak in afternoon',fontsize=20) plt.xlabel('Hour of day',fontsize=18) plt.xticks(fontsize=15,rotation=0) plt.ylabel('Percentage of crimes',fontsize=18) plt.yticks(fontsize=15) plt.legend(fontsize=15,ncol=2,loc='upper center') autolabel(ax.patches) bar_df = pd.DataFrame() bar_df['Personal'] = crime_df_gb_personal.groupby('Weekday')['ID'].agg(len)/len(crime_df_gb_personal)*100 bar_df['Property'] = crime_df_gb_property.groupby('Weekday')['ID'].agg(len)/len(crime_df_gb_property)*100 ax = bar_df.plot(kind='bar') plt.title('Property crimes fall,\npersonal crimes rise over weekends',fontsize=20) plt.xlabel('Day of week',fontsize=18) plt.xticks(plt.xticks()[0],['Mon','Tue','Wed','Thu','Fri','Sat','Sun'],fontsize=15,rotation=0) plt.ylabel('Percentage of crimes',fontsize=18) plt.yticks(fontsize=15) plt.ylim((0,18)) plt.legend(fontsize=15,ncol=2,loc='upper center') autolabel(ax.patches) ct = crime_df[crime_df['Primary Type'] == 'ASSAULT'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Assault by time of day and year',fontsize=24) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'ASSAULT'].groupby(['Weekday','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Weekday',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Day of week',fontsize=18) plt.title('Assault by time of day and week',fontsize=24) plt.yticks(arange(0,7),['Mon','Tue','Wed','Thu','Fri','Sat','Sun']) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'BATTERY'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Battery by time of day and year',fontsize=24) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'SEX OFFENSE'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Sexual assault by time of day and year',fontsize=24) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'ROBBERY'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Robbery by time of day and year',fontsize=24) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'BURGLARY'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Burglary by time of day and year',fontsize=24) plt.colorbar() ct = crime_df[crime_df['Primary Type'] == 'HOMICIDE'].groupby(['Month','Hour'])['ID'].agg(len).reset_index() plt.imshow(np.array(pd.pivot_table(ct,values='ID',rows='Month',cols='Hour')),cmap='RdBu_r',label='Count') #plt.legend(loc='upper right',bbox_to_anchor=(1,.5)) plt.xlabel('Hour of day',fontsize=18) plt.ylabel('Month of year',fontsize=18) plt.title('Homicides by time of day and year',fontsize=24) plt.colorbar() ax = ts[['Temperature','Assault']].resample('M').plot(secondary_y=['Assault'],lw=4) ax.set_ylabel(u'Temperature (\N{DEGREE SIGN}F)',fontsize=18) ax.right_ax.set_ylabel('Assaults',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_ylim((0,100)) plt.axvspan(xmin='2014-3-1',xmax=ts.index.max(),color='k',alpha=.1) plt.title(u'Assaults, 2001\N{EN DASH}present',fontsize=24) plt.autoscale() ax = ts[['Temperature','Assault']].resample('W').ix['2013-1-1':].plot(secondary_y=['Assault'],lw=4) ax.set_ylabel(u'Temperature (\N{DEGREE SIGN}F)',fontsize=18) ax.right_ax.set_ylabel('Incidents',fontsize=18) ax.set_xlabel('Time',fontsize=18) #ax.grid(False,which='minor') plt.axvspan(xmin='2014-3-1',xmax=ts.index.max(),color='k',alpha=.1) plt.title(u'Assaults, 2013\N{EN DASH}present',fontsize=24) plt.autoscale() ax = ts[['Temperature','Homicide']].resample('M').plot(secondary_y=['Homicide'],lw=4,alpha=.75) ax.set_ylabel(u'Temperature (\N{DEGREE SIGN}F)',fontsize=18) ax.right_ax.set_ylabel('Homicides',fontsize=18) ax.set_xlabel('Time',fontsize=18) #ax.set_yticklabels(plt.yticks()[0],range(0,90,10)) plt.axvspan(xmin='2014-3-1',xmax=ts.index.max(),color='k',alpha=.1) plt.title(u'Homicides, 2001\N{EN DASH}present',fontsize=24) ax = ts[['Temperature','Homicide']].resample('W').ix['2013-1-1':].plot(secondary_y=['Homicide'],lw=4,alpha=.75) ax.set_ylabel(u'Temperature (\N{DEGREE SIGN}F)',fontsize=18) ax.right_ax.set_ylabel('Homicides',fontsize=18) ax.set_xlabel('Time',fontsize=18) #ax.set_yticklabels(plt.yticks()[0],range(0,90,10)) plt.axvspan(xmin='2014-3-1',xmax=ts.index.max(),color='k',alpha=.1) plt.title(u'Homicides, 2013\N{EN DASH}present',fontsize=24) d = ts['Temperature'].dropna() n = len(d) ps = np.abs(np.fft.fft(d)**2) plt.plot(ps,lw=4) plt.yscale('log') plt.xscale('log') plt.xlim((10,100)) x = plt.xticks()[0] plt.xticks(x,(n/x).round(1)) plt.axvline(n/365,ls='--',c='c') # Annual signal plt.grid(False,which='Minor') plt.xlabel('Frequency (days)',fontsize=18) plt.ylabel('Power',fontsize=18) plt.title('Cycles of temperatures',fontsize=24) d = ts['Assault'].dropna() n = len(d) ps = np.abs(np.fft.fft(d)**2) plt.plot(ps,lw=4,c='g',alpha=.75) plt.yscale('log') plt.xscale('log') plt.xlim((10,100)) x = plt.xticks()[0] plt.xticks(x,(n/x).round(1)) plt.axvline(n/365,ls='--',c='c') # Annual signal plt.axvline(n/182,ls='--',c='m') # Quarterly signal plt.axvline(n/7,ls='--',c='y') # Weekly signal plt.grid(False,which='Minor') plt.xlabel('Frequency (days)',fontsize=18) plt.ylabel('Power',fontsize=18) plt.title('Cycles of assaults',fontsize=24) d = ts['Assault'].resample('W',how=np.sum).dropna() n = len(d) ps = np.abs(np.fft.fft(d)**2) plt.plot(ps,lw=4,c='g',alpha=.75) plt.yscale('log') plt.xscale('log') plt.xlim((10,100)) x = plt.xticks()[0] plt.xticks(x,(n/x).round(1)) plt.axvline(n/52.,ls='--',c='c') # Annual signal plt.axvline(n/26,ls='--',c='m') # Quarterly signal plt.axvline(n,ls='--',c='y') # Weekly signal plt.grid(False,which='Minor') plt.xlabel('Frequency (weeks)',fontsize=18) plt.ylabel('Power',fontsize=18) plt.title('Cycles of assaults',fontsize=24) d = ts['Homicide'].dropna() n = len(d) ps = np.abs(np.fft.fft(d)**2) plt.plot(ps,lw=4,c='r',alpha=.75) plt.yscale('log') plt.xscale('log') plt.xlim((10,100)) x = plt.xticks()[0] plt.xticks(x,(n/x).round(1)) plt.axvline(n/365,ls='--',c='c') # Annual signal plt.axvline(n/90,ls='--',c='m') # Quarterly signal plt.axvline(n/7,ls='--',c='y') # Weekly signal plt.axvline(n/255,ls='--',c='k') # ??? signal plt.grid(False,which='Minor') plt.xlabel('Frequency (days)',fontsize=18) plt.ylabel('Power',fontsize=18) plt.title('Cycles of homicides',fontsize=24) d = ts['Homicide'].resample('W').dropna() n = len(d) ps = np.abs(np.fft.fft(d)**2) plt.plot(ps,lw=4,c='r',alpha=.75) plt.yscale('log') plt.xscale('log') plt.xlim((10,100)) x = plt.xticks()[0] plt.xticks(x,(n/x).round(1)) plt.axvline(n/52.,ls='--',c='c') # Annual signal #plt.axvline(n/13.,ls='--',c='m') # Quarterly signal plt.axvline(n/1.,ls='--',c='y') # Weekly signal plt.grid(False,which='Minor') plt.xlabel('Frequency (weeks)',fontsize=18) plt.ylabel('Power',fontsize=18) plt.title('Cycles of homicides',fontsize=24) # from http://nbviewer.ipython.org/gist/keflavich/4042018 from scipy.optimize import curve_fit def sinfunc(x,a,b,c): return a*np.sin(2*np.pi/365.*x+b)+c y2001 = np.array(ts['Temperature']['2001-1-1':'2001-12-31']) x = arange(len(y2001)) fitpars,covmat = curve_fit(f=sinfunc,xdata=x,ydata=y2001) plt.scatter(x,y2001) y = sinfunc(x,*fitpars) plt.plot(x,y,'r-',lw=8,alpha=.5) plt.ylabel('Temperature',fontsize=18) plt.xlabel('Day',fontsize=18) plt.xlim((0,365)) for y in range(2002,2014): y_i = np.array(ts['Temperature']['{0}-1-1'.format(y):'{0}-12-31'.format(y)]) x = arange(len(y_i)) try: fitpars_i,covmat = curve_fit(f=sinfunc,xdata=x,ydata=y_i) except RuntimeError: print "Couldn't find estimates for {0}".format(y) pass fitpars = np.vstack([fitpars,fitpars_i]) avg_params = fitpars.mean(axis=0) sin_df = pd.DataFrame(index=ts.index) t = len(sin_df) sin_df['sin'] = sinfunc(arange(t),*avg_params) sin_df['Temp'] = ts['Temperature'] sin_df['error'] = sin_df['sin'] - sin_df['Temp'] ax = sin_df.plot(legend=False,subplots=True) print u"The average error across all observations is: {0}\N{DEGREE SIGN}F".format(round(sin_df['error'].mean(),2)) lm_assault = smf.ols('Assault ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_assault_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_assault_x['Temperature'] = sinfunc(arange(len(lm_assault_x)),*avg_params) lm_assault_x['Month'] = lm_assault_x['Date'].apply(lambda x:x.month) lm_assault_x['Week'] = lm_assault_x['Date'].apply(lambda x:x.week) lm_assault_x['Weekday'] = lm_assault_x['Date'].apply(lambda x:x.weekday()) lm_assault_x['Time'] = arange(len(lm_assault_x)) lm_assault_x = lm_assault_x.set_index('Date') std,lower,upper = wls_prediction_std(lm_assault) start_date = '2013-01-01' assault_pred = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) assault_pred['Predictions'] = lm_assault.predict(lm_assault_x[start_date:]) assault_pred['Observations'] = ts['Assault'].ix[start_date:] assault_pred['Lower CI'] = assault_pred['Predictions']-2*std.mean() assault_pred['Upper CI'] = assault_pred['Predictions']+2*std.mean() assault_pred[['Lower CI','Upper CI']].resample('W').plot(c='g',lw=1,alpha=.75) assault_pred['Predictions'].resample('W').plot(c='g',lw=4,alpha=.75,label='Predictions') #plt.plot(assault_pred.index,assault_pred['Lower_CI']) plt.scatter(assault_pred.index,assault_pred['Observations'],c='g',alpha=.25,label='Observations') plt.ylabel('Number of assaults',fontsize=18) plt.xlabel('Time',fontsize=18) plt.axvspan(xmin='2014-3-1',xmax=assault_pred.index.max(),color='k',alpha=.1) plt.legend() lm_homicide = smf.ols('Homicide ~ Temperature + Time + C(Week) + C(Weekday)',data=ts).fit() lm_homicide_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_homicide_x['Temperature'] = sinfunc(arange(len(lm_homicide_x)),*avg_params) lm_homicide_x['Month'] = lm_homicide_x['Date'].apply(lambda x:x.month) lm_homicide_x['Week'] = lm_homicide_x['Date'].apply(lambda x:x.week) lm_homicide_x['Weekday'] = lm_homicide_x['Date'].apply(lambda x:x.weekday()) lm_homicide_x['Time'] = arange(len(lm_homicide_x)) lm_homicide_x = lm_homicide_x.set_index('Date') std,lower,upper = wls_prediction_std(lm_homicide) start_date = '2001-01-01' homicide_pred = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) homicide_pred['Predictions'] = np.floor(lm_homicide.predict(lm_homicide_x.ix[start_date:])) homicide_pred['Observations'] = ts['Homicide'].ix[start_date:] homicide_pred['Lower CI'] = homicide_pred['Predictions'] - 2*std.mean() homicide_pred['Upper CI'] = homicide_pred['Predictions'] + 2*std.mean() homicide_pred[['Lower CI','Upper CI']].resample('W').plot(c='r',lw=1,alpha=.75) homicide_pred['Predictions'].resample('W').plot(c='r',lw=4,alpha=.75,label='Predictions') #plt.plot(assault_pred.index,assault_pred['Lower_CI']) plt.scatter(homicide_pred.index,homicide_pred['Observations'],c='r',alpha=.25,label='Observations') plt.ylabel('Number of homicides',fontsize=18) plt.xlabel('Time',fontsize=18) plt.legend() plt.axvspan(xmin='2014-3-1',xmax=assault_pred.index.max(),color='k',alpha=.1) plt.axhline(0,c='k',lw=4) plt.title('Comparison of model predictions and observed data',fontsize=24) (homicide_pred['Observations'] - homicide_pred['Predictions']).resample('M').ix[:'2014-3-31'].plot(lw=2,c='r') plt.axhline(y=0,c='k',lw=2,alpha=.75) plt.ylabel('Observed error from predictions',fontsize=18) plt.xlabel('Time',fontsize=18) plt.title('Homicides following model since 2013',fontsize=24) bar_df = pd.DataFrame() bar_df['Predictions'] = homicide_pred['Predictions'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Homicide'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.title('2013 homicide estimates',fontsize=24) plt.ylabel('Number of homicides',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,60)) autolabel(ax.patches) print "The model predicts {0} homicides. {1} homicides were reported.".format(round(bar_df['Predictions'].sum()), bar_df['Observations'].sum()) lm_personal = smf.ols('Personal ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_personal_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_personal_x['Temperature'] = sinfunc(arange(len(lm_personal_x)),*avg_params) lm_personal_x['Month'] = lm_personal_x['Date'].apply(lambda x:x.month) lm_personal_x['Week'] = lm_personal_x['Date'].apply(lambda x:x.week) lm_personal_x['Weekday'] = lm_personal_x['Date'].apply(lambda x:x.weekday()) lm_personal_x['Time'] = arange(len(lm_personal_x)) lm_personal_x = lm_personal_x.set_index('Date') std,lower,upper = wls_prediction_std(lm_personal) start_date = '2001-01-01' personal_pred = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) personal_pred['Predictions'] = lm_personal.predict(lm_personal_x[start_date:]) personal_pred['Observations'] = ts['Personal'].ix[start_date:] personal_pred['Lower CI'] = personal_pred['Predictions']-2*std.mean() personal_pred['Upper CI'] = personal_pred['Predictions']+2*std.mean() personal_pred[['Lower CI','Upper CI']].resample('W').plot(c='b',lw=1,alpha=.75) personal_pred['Predictions'].resample('W').plot(c='b',lw=4,alpha=.75,label='Predictions') #plt.plot(assault_pred.index,assault_pred['Lower_CI']) plt.scatter(personal_pred.index,personal_pred['Observations'],c='b',alpha=.25,label='Observations') plt.ylabel('Number of personal crimes',fontsize=18) plt.xlabel('Time',fontsize=18) plt.axvspan(xmin='2014-3-1',xmax=personal_pred.index.max(),color='k',alpha=.1) plt.legend() plt.title('Comparison of model predictions and observed data',fontsize=24) (personal_pred['Observations'] - personal_pred['Predictions']).resample('M').ix[:'2014-3-31'].plot(lw=2,c='b') plt.axhline(y=0,c='k',lw=2,alpha=.75) plt.ylabel('Observed error from predictions',fontsize=18) plt.xlabel('Time',fontsize=18) plt.title('Personal crimes below expectations since 2013',fontsize=24) figsize(12,6) bar_df = pd.DataFrame() bar_df['Predictions'] = personal_pred['Predictions'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Personal'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.title('2013 personal crime estimates',fontsize=24) plt.ylabel('Number of crimes',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,10000)) autolabel(ax.patches) print "The model predicted {0} personal crimes in 2013. {1} personal crimes occurred in 2013.".format(round(bar_df['Predictions'].sum()), bar_df['Observations'].sum()) lm_property = smf.ols('Property ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_property_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_property_x['Temperature'] = sinfunc(arange(len(lm_property_x)),*avg_params) lm_property_x['Month'] = lm_property_x['Date'].apply(lambda x:x.month) lm_property_x['Week'] = lm_property_x['Date'].apply(lambda x:x.week) lm_property_x['Weekday'] = lm_property_x['Date'].apply(lambda x:x.weekday()) lm_property_x['Time'] = arange(len(lm_property_x)) lm_property_x = lm_property_x.set_index('Date') std,lower,upper = wls_prediction_std(lm_property) start_date = '2001-01-01' property_pred = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) property_pred['Predictions'] = lm_property.predict(lm_property_x[start_date:]) property_pred['Observations'] = ts['Property'].ix[start_date:] property_pred['Lower CI'] = property_pred['Predictions'] - 2*std.mean() property_pred['Upper CI'] = property_pred['Predictions'] + 2*std.mean() property_pred[['Lower CI','Upper CI']].resample('W').plot(c='g',lw=1,alpha=.75) property_pred['Predictions'].resample('W').plot(c='g',lw=4,alpha=.75,label='Predictions') #plt.plot(assault_pred.index,assault_pred['Lower_CI']) plt.scatter(property_pred.index,property_pred['Observations'],c='g',alpha=.25,label='Observations') plt.ylabel('Number of property crimes',fontsize=18) plt.xlabel('Time',fontsize=18) plt.axvspan(xmin='2014-3-1',xmax=property_pred.index.max(),color='k',alpha=.1) plt.legend() plt.title('Comparison of model predictions and observed data',fontsize=24) (property_pred['Observations'] - property_pred['Predictions']).resample('M').ix[:'2014-3-31'].plot(lw=2,c='g') plt.axhline(y=0,c='k',lw=2,alpha=.75) plt.ylabel('Observed error from predictions',fontsize=18) plt.xlabel('Time',fontsize=18) plt.title('Property crimes plummet below expectations after 2013',fontsize=24) bar_df = pd.DataFrame() bar_df['Predictions'] = property_pred['Predictions'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Property'].ix['2013-1-1':'2013-12-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.title('2013 property crime estimates',fontsize=24) plt.ylabel('Number of crimes',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,14000)) autolabel(ax.patches) print "The model predicted {0} property crimes in 2013. {1} property crimes occurred in 2013.".format(round(bar_df['Predictions'].sum()), bar_df['Observations'].sum()) bar_df = pd.DataFrame() bar_df['Predictions'] = homicide_pred['Predictions'].ix['2014-1-1':].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Homicide'].ix['2014-1-1':'2014-3-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.axvspan(xmin=3,xmax=13,color='k',alpha=.1) plt.title('2014 homicide estimates',fontsize=24) plt.ylabel('Number of homicides',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,60)) autolabel(ax.patches) print "The model predicts {0} homicides in 2014.".format(round(bar_df['Predictions'].sum())) bar_df = pd.DataFrame() bar_df['Predictions'] = personal_pred['Predictions'].ix['2014-1-1':].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Personal'].ix['2014-1-1':'2014-3-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.axvspan(xmin=3,xmax=13,color='k',alpha=.1) plt.title('2014 personal crime estimates',fontsize=24) plt.ylabel('Number of crimes',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,10000)) autolabel(ax.patches) print "The model predicts {0} personal crimes in 2014.".format(round(bar_df['Predictions'].sum())) bar_df = pd.DataFrame() bar_df['Predictions'] = property_pred['Predictions'].ix['2014-1-1':].groupby(lambda x:x.month).agg(np.sum) bar_df['Observations'] = ts['Property'].ix['2014-1-1':'2014-3-31'].groupby(lambda x:x.month).agg(np.sum) ax = bar_df.plot(kind='bar') plt.xticks(plt.xticks()[0],['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],fontsize=15,rotation=0) plt.axvspan(xmin=3,xmax=13,color='k',alpha=.1) plt.title('2014 property crime estimates',fontsize=24) plt.ylabel('Number of crimes',fontsize=18) plt.xlabel('Month',fontsize=18) ax.legend(fontsize=15,ncol=2,loc='upper center') plt.ylim((0,14000)) autolabel(ax.patches) print "The model predicts {0} property crimes in 2014.".format(round(bar_df['Predictions'].sum())) property_crimes_ts = ['Arson','Burglary','Vehicle_theft','Robbery','Theft'] ax = ts[property_crimes_ts].resample('M').plot(lw=4) ax.set_yscale('log') ax.grid(False,which='minor') ax.legend(loc='upper center',ncol=5,fontsize=14) ax.set_ylabel('Number of crimes',fontsize=18) ax.set_xlabel('Time',fontsize=18) lm_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_x['Temperature'] = sinfunc(arange(len(lm_x)),*avg_params) lm_x['Month'] = lm_x['Date'].apply(lambda x:x.month) lm_x['Week'] = lm_x['Date'].apply(lambda x:x.week) lm_x['Weekday'] = lm_x['Date'].apply(lambda x:x.weekday()) lm_x['Time'] = arange(len(lm_x)) lm_x = lm_x.set_index('Date') lm_arson = smf.ols('Arson ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_burglary = smf.ols('Burglary ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_robbery = smf.ols('Robbery ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_theft = smf.ols('Theft ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_vehicle_theft = smf.ols('Vehicle_theft ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() arson_std,lower,upper = wls_prediction_std(lm_arson) burglary_std,lower,upper = wls_prediction_std(lm_burglary) robbery_std,lower,upper = wls_prediction_std(lm_robbery) theft_std,lower,upper = wls_prediction_std(lm_theft) vehicle_theft_std,lower,upper = wls_prediction_std(lm_vehicle_theft) start_date = '2001-01-01' property_pred2 = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) property_pred2['Arson_prediction'] = lm_arson.predict(lm_x[start_date:]) property_pred2['Burglary_prediction'] = lm_burglary.predict(lm_x[start_date:]) property_pred2['Robbery_prediction'] = lm_robbery.predict(lm_x[start_date:]) property_pred2['Theft_prediction'] = lm_theft.predict(lm_x[start_date:]) property_pred2['Vehicle_theft_prediction'] = lm_vehicle_theft.predict(lm_x[start_date:]) property_pred2['Arson_observation'] = ts['Arson'].ix[start_date:] property_pred2['Burglary_observation'] = ts['Burglary'].ix[start_date:] property_pred2['Robbery_observation'] = ts['Robbery'].ix[start_date:] property_pred2['Theft_observation'] = ts['Theft'].ix[start_date:] property_pred2['Vehicle_theft_observation'] = ts['Vehicle_theft'].ix[start_date:] property_pred2['Arson_error'] = property_pred2['Arson_observation'] - property_pred2['Arson_prediction'] property_pred2['Burglary_error'] = property_pred2['Burglary_observation'] - property_pred2['Burglary_prediction'] property_pred2['Robbery_error'] = property_pred2['Robbery_observation'] - property_pred2['Robbery_prediction'] property_pred2['Theft_error'] = property_pred2['Theft_observation'] - property_pred2['Theft_prediction'] property_pred2['Vehicle_theft_error'] = property_pred2['Vehicle_theft_observation'] - property_pred2['Vehicle_theft_prediction'] ax = property_pred2[['Arson_error','Burglary_error','Robbery_error','Theft_error','Vehicle_theft_error']].ix['2010-1-1':'2014-3-31'].resample('M',np.sum).plot(lw=4) #ax.set_ylim((-40,40)) ax.set_ylabel('Observed error from predictions',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_title('Most property crimes below predictions since Q2 2013',fontsize=24) ax.legend(['Arson','Burglary','Robbery','Theft','Vehicle theft'],fontsize=12,ncol=5,loc='upper center') personal_crimes_ts = ['Assault','Battery','Sexual_assault','Homicide'] ax = ts[personal_crimes_ts].resample('M',np.sum).plot(lw=4) ax.set_yscale('log') ax.grid(False,which='minor') ax.legend(loc='upper center',ncol=5,fontsize=14) ax.set_ylabel('Number of crimes',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_ylim((10e0,10e4)) lm_x = pd.DataFrame({'Date':pd.date_range('2001-01-01','2014-12-31')}) lm_x['Temperature'] = sinfunc(arange(len(lm_x)),*avg_params) lm_x['Month'] = lm_x['Date'].apply(lambda x:x.month) lm_x['Week'] = lm_x['Date'].apply(lambda x:x.week) lm_x['Weekday'] = lm_x['Date'].apply(lambda x:x.weekday()) lm_x['Time'] = arange(len(lm_x)) lm_x = lm_x.set_index('Date') lm_assault = smf.ols('Assault ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_battery = smf.ols('Battery ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_sexual_assault = smf.ols('Sexual_assault ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() lm_homicide = smf.ols('Homicide ~ Temperature + Time + C(Week) + C(Weekday)',data=ts.dropna()).fit() start_date = '2001-01-01' personal_pred2 = pd.DataFrame(index = pd.date_range(start_date,'2014-12-31')) personal_pred2['Assault_prediction'] = lm_assault.predict(lm_x[start_date:]) personal_pred2['Battery_prediction'] = lm_battery.predict(lm_x[start_date:]) personal_pred2['Sexual_assault_prediction'] = lm_sexual_assault.predict(lm_x[start_date:]) personal_pred2['Homicide_prediction'] = np.floor(lm_homicide.predict(lm_x[start_date:])) # Minority Effect red ball personal_pred2['Assault_observation'] = ts['Assault'].ix[start_date:] personal_pred2['Battery_observation'] = ts['Battery'].ix[start_date:] personal_pred2['Sexual_assault_observation'] = ts['Sexual_assault'].ix[start_date:] personal_pred2['Homicide_observation'] = ts['Homicide'].ix[start_date:] personal_pred2['Assault_error'] = personal_pred2['Assault_observation'] - personal_pred2['Assault_prediction'] personal_pred2['Battery_error'] = personal_pred2['Battery_observation'] - personal_pred2['Battery_prediction'] personal_pred2['Sexual_assault_error'] = personal_pred2['Sexual_assault_observation'] - personal_pred2['Sexual_assault_prediction'] personal_pred2['Homicide_error'] = personal_pred2['Homicide_observation'] - personal_pred2['Homicide_prediction'] ax = personal_pred2[['Assault_error','Battery_error','Sexual_assault_error','Homicide_error']].ix['2010-1-1':'2014-3-31'].resample('M',np.sum).plot(lw=4) #ax.set_ylim((-40,40)) ax.set_ylabel('Observed error from predictions',fontsize=18) ax.set_xlabel('Time',fontsize=18) ax.set_title('Most personal crimes below predictions since Q2 2013',fontsize=24) ax.legend(['Assault','Battery','Sexual assault','Homicide'],fontsize=12,ncol=5,loc='upper center') # From: http://gis.chicagopolice.org/pdfs/district_beat.pdf Image(url='http://i.imgur.com/KMMkatm.png') homicide_df = crime_df[crime_df['Primary Type'] == "HOMICIDE"] homicide_gb_district_df = pd.DataFrame() for year in range(2003,2014): homicide_gb_district_df[year] = homicide_df[homicide_df['Year'] == year].groupby('District')['ID'].agg(len).ix[:25] district_homicide_change_df = pd.DataFrame() district_homicide_change_df[u'2010\N{EN DASH}2012 change'] = (homicide_gb_district_df[2012]-homicide_gb_district_df[2010])/homicide_gb_district_df[2010]*100 district_homicide_change_df['2013 change'] = (homicide_gb_district_df[2013]-homicide_gb_district_df[2012])/homicide_gb_district_df[2012]*100 districts = [int(i) for i in list(homicide_gb_district_df.T.columns)] ax = district_homicide_change_df.plot(kind='bar') ax.set_xticklabels(districts,rotation=0) ax.legend(ncol=3,fontsize=15,loc='upper center') ax.set_ylim((-100,500)) ax.set_ylabel("Percentage change",fontsize=18) ax.set_xlabel("CPD District",fontsize=18) ax.set_title("Homicide rates across districts fall\nin 2013 despite significant increases in recent years",fontsize=24) property_gb_district_df = pd.DataFrame() for year in range(2003,2014): property_gb_district_df[year] = crime_df_gb_property[crime_df_gb_property['Year'] == year].groupby('District')['ID'].agg(len).ix[:25] district_property_change_df = pd.DataFrame() district_property_change_df[u'2010\N{EN DASH}2012 change'] = (property_gb_district_df[2012]-property_gb_district_df[2010])/property_gb_district_df[2010]*100 district_property_change_df['2013 change'] = (property_gb_district_df[2013]-property_gb_district_df[2012])/property_gb_district_df[2012]*100 districts = [int(i) for i in list(property_gb_district_df.T.columns)] ax = district_property_change_df.plot(kind='bar') ax.set_xticklabels(districts,rotation=0) ax.legend(ncol=3,fontsize=15,loc='upper center') ax.set_ylim((-30,30)) ax.set_ylabel("Percentage change",fontsize=18) ax.set_xlabel("CPD District",fontsize=18) ax.set_title("Property crime rates across districts fall faster\nin 2013 than previous 3 years combined",fontsize=24) personal_gb_district_df = pd.DataFrame() for year in range(2003,2014): personal_gb_district_df[year] = crime_df_gb_personal[crime_df_gb_personal['Year'] == year].groupby('District')['ID'].agg(len).ix[:25] district_personal_change_df = pd.DataFrame() district_personal_change_df[u'2010\N{EN DASH}2012 change'] = (personal_gb_district_df[2012]-personal_gb_district_df[2010])/personal_gb_district_df[2010]*100 district_personal_change_df['2013 change'] = (personal_gb_district_df[2013]-personal_gb_district_df[2012])/personal_gb_district_df[2012]*100 districts = [int(i) for i in list(district_personal_change_df.T.columns)] ax = district_personal_change_df.plot(kind='bar') ax.set_xticklabels(districts,rotation=0) ax.legend(ncol=3,fontsize=15,loc='upper center') ax.set_ylim((-30,20)) ax.set_ylabel("Percentage change",fontsize=18) ax.set_xlabel("CPD District",fontsize=18) ax.set_title("Personal crime rates across districts fall faster\nin 2013 than previous 3 years combined",fontsize=24) (homicide_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1)*100).plot(xlim=(-200,200),kind='kde',colormap='jet',alpha=.75,lw=2) for i,district in enumerate(districts): plt.axvline(x=homicide_gb_district_df.T.pct_change().ix[2013][district]*100,c='k') plt.legend(districts,title='District',ncol=2,fontsize=12) plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Year-over-year percentage change in crime',fontsize=18) plt.title('Homicide changes spread across district distributions',fontsize=24) (property_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1)*100).plot(kind='kde',colormap='jet',alpha=.75,lw=2) for i,district in enumerate(districts): plt.axvline(x=property_gb_district_df.T.pct_change().ix[2013][district]*100,c='k') plt.legend(districts,title='District',ncol=2,fontsize=12) plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Year-over-year percentage change in crime',fontsize=18) plt.title('Decrease in 2013 property crime rates across districts\nis lower than normal',fontsize=24) (personal_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1)*100).plot(kind='kde',colormap='jet',alpha=.75,lw=2) for i,district in enumerate(districts): plt.axvline(x=personal_gb_district_df.T.pct_change().ix[2013][district]*100,c='k') plt.legend(districts,title='District',ncol=2,fontsize=12) plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Year-over-year percentage change in crime',fontsize=18) plt.title('Decrease in 2013 personal crime rates across districts\nis lower than normal',fontsize=24) homicide_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1).apply(np.std,axis=1).plot(kind='kde',colormap='jet',alpha=.75,lw=2) plt.axvline(x=np.std(homicide_gb_district_df.T.pct_change().ix[2013]),c='k') plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Spread of changes in property crime rates across districts',fontsize=18) plt.title('The spread of changes in homicide rates\nis less than previous years',fontsize=24) property_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1).apply(np.std,axis=1).plot(kind='kde',colormap='jet',alpha=.75,lw=2) plt.axvline(x=np.std(personal_gb_district_df.T.pct_change().ix[2013]),c='k') plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Spread of changes in property crime rates across districts',fontsize=18) plt.title('The spread of changes in property crime rates\nis less than previous years',fontsize=24) personal_gb_district_df.T.pct_change().ix[2004:2012].dropna(axis=1).apply(np.std,axis=1).plot(kind='kde',colormap='jet',alpha=.75,lw=2) plt.axvline(x=np.std(personal_gb_district_df.T.pct_change().ix[2013]),c='k') plt.ylabel('Count of changes',fontsize=18) plt.xlabel('Spread of changes in personal crime rates across districts',fontsize=18) plt.title('But the spread of changes in personal crime rates\ndoes not differ from previous years',fontsize=24) for district in districts: print district, stats.ttest_1samp(property_gb_district_df.T.pct_change().ix[2004:2012][district].values,property_gb_district_df.T.pct_change().ix[2013][district])