from pandas import * import matplotlib.pyplot as plt import datetime from IPython.core.display import Image, HTML from pandas.core.datetools import * from numpy import arange,array,ones#,random,linalg from scipy import stats figsize(12,10) pandas.__version__ #matplotlib.__version__ pandas.set_option('display.height', 500) pandas.set_option('display.max_rows', 500) pandas.set_option('display.max_columns', 500) pandas.set_option('display.width', 500) pandas.set_option('display.mpl_style', False) eq_data = read_excel('eq_disasters_by_year®ion.xlsx', 0, index_col=None, parse_dates=[1]) eq_data.drop(['id','Affected','Impacted'],axis=1,inplace=True) eq_data.head() storm_data = read_excel('storm_disasters_by_year®ion.xlsx', 0, index_col=None, parse_dates=[1]) storm_data.drop(['id','Affected','Impacted'],axis=1,inplace=True) #storm_data.drop(['id','Affected','Impacted', 'Homeless','Killed','Injured'],axis=1,inplace=True) storm_data.head() eq_dmg = eq_data.ix[eq_data['Damage'] > 1000000] # Number over $1billion print "Number of earthquake disasters over $1billion since 1900 = ", len(eq_dmg) eq_kill = eq_data.ix[eq_data['Killed'] > 10000] # Number over 10,000 print "Number of earthquake disasters with over 10,000 dead since 1900 = ", len(eq_kill) eq_inj = eq_data.ix[eq_data['Injured'] > 10000] # Number over 10,000 print "Number of earthquake disasters with over 10,000 injured since 1900 = ", len(eq_inj) eq_home = eq_data.ix[eq_data['Homeless'] > 10000] # Number over 10,000 print "Number of earthquake disasters with over 10,000 homeless since 1900 = ", len(eq_home) storm_dmg = storm_data.ix[storm_data['Damage'] > 1000000] # Number over $1billion print "Number of storm disasters over $1billion since 1900 = ", len(storm_dmg) storm_kill = storm_data.ix[storm_data['Killed'] > 10000] # Number over 10,000 print "Number of storm disasters with over 10,000 dead since 1900 = ", len(storm_kill) storm_inj = storm_data.ix[storm_data['Injured'] > 10000] # Number over 10,000 print "Number of storm disasters with over 10,000 injured since 1900 = ", len(storm_inj) storm_home = storm_data.ix[storm_data['Homeless'] > 10000] # Number over 10,000 print "Number of storm disasters with over 10,000 homeless since 1900 = ", len(storm_home) eq_region = eq_data.groupby('Region') eq_region_max = eq_region.max() eq_max_occur_region = eq_region_max['Occurrence'].idxmax() eq_max_occur_count = eq_region['Occurrence'].max().max() print 'Region with maximum earthquake disaster occurrences:', eq_max_occur_region, '=', eq_max_occur_count eq_max_kill_region = eq_region_max['Killed'].idxmax() eq_max_kill_count = eq_region['Killed'].max().max() print 'Region with maximum earthquake disaster deaths:', eq_max_kill_region, '=', eq_max_kill_count eq_max_inj_region = eq_region_max['Injured'].idxmax() eq_max_inj_count = eq_region['Injured'].max().max() print 'Region with maximum earthquake disaster injuries:', eq_max_inj_region, '=', eq_max_inj_count eq_max_home_region = eq_region_max['Homeless'].idxmax() eq_max_home_count = eq_region['Homeless'].max().max() print 'Region with maximum earthquake disaster homeless population:', eq_max_home_region, '=', eq_max_home_count eq_max_dmg_region = eq_region_max['Damage'].idxmax() eq_max_dmg_count = eq_region['Damage'].max().max() print 'Region with maximum earthquake disaster damage ($):', eq_max_dmg_region, '=', eq_max_dmg_count storm_region = storm_data.groupby('Region') storm_region_max = storm_region.max() storm_max_occur_region = storm_region_max['Occurrence'].idxmax() storm_max_occur_count = storm_region['Occurrence'].max().max() print 'Region with maximum storm disaster occurrences:', storm_max_occur_region, '=', storm_max_occur_count storm_max_kill_region = storm_region_max['Killed'].idxmax() storm_max_kill_count = storm_region['Killed'].max().max() print 'Region with maximum storm disaster deaths:', storm_max_kill_region, '=', storm_max_kill_count storm_max_inj_region = storm_region_max['Injured'].idxmax() storm_max_inj_count = storm_region['Injured'].max().max() print 'Region with maximum storm disaster injuries:', storm_max_inj_region, '=', storm_max_inj_count storm_max_home_region = storm_region_max['Homeless'].idxmax() storm_max_home_count = storm_region['Homeless'].max().max() print 'Region with maximum storm disaster homeless population:', storm_max_home_region, '=', storm_max_home_count storm_max_dmg_region = storm_region_max['Damage'].idxmax() storm_max_dmg_count = storm_region['Damage'].max().max() print 'Region with maximum storm disaster damage ($):', storm_max_dmg_region, '=', storm_max_dmg_count eq_yr = eq_data.groupby('Year') eq_yr_sum = eq_yr.sum() storm_yr = storm_data.groupby('Year') storm_yr_sum = storm_yr.sum() #storm_eq_yr_sum = concat([eq_yr_sum, storm_yr_sum]) storm_eq_yr_sum = concat([eq_yr_sum.rename(columns={'Occurrence': 'Occurrence (Earthquake)', 'Killed': 'Killed (Earthquake)', 'Injured': 'Injured (Earthquake)', 'Damage': 'Damage (Earthquake)', 'Homeless': 'Homeless (Earthquake)'}), storm_yr_sum.rename(columns={'Occurrence': 'Occurrence (Storm)', 'Killed': 'Killed (Storm)', 'Injured': 'Injured (Storm)', 'Damage': 'Damage (Storm)', 'Homeless': 'Homeless (Storm)'})], axis=1) print 'Earthquake Spearman correlation = ', eq_yr_sum['Occurrence'].corr(eq_yr_sum['Damage'], method='spearman') print 'Storm Spearman correlation = ', storm_yr_sum['Occurrence'].corr(storm_yr_sum['Damage'], method='spearman') print 'Earthquake Spearman correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='spearman') print 'Storm Spearman correlation from 1980 = ', storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='spearman') print 'Earthquake Spearman correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(eq_yr_sum['Damage'], 3, center=True), method='spearman') print 'Storm Spearman correlation (rolling mean) = ', rolling_mean(storm_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['Damage'], 3, center=True), method='spearman') print 'Earthquake Spearman correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='spearman') print 'Storm Spearman correlation from 1980 (rolling mean) = ', rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='spearman') print 'Earthquake Pearson correlation = ', eq_yr_sum['Occurrence'].corr(eq_yr_sum['Damage'], method='pearson') print 'Storm Pearson correlation = ', storm_yr_sum['Occurrence'].corr(storm_yr_sum['Damage'], method='pearson') print 'Earthquake Pearson correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='pearson') print 'Storm Pearson correlation from 1980 = ', storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='pearson') print 'Earthquake Pearson correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(eq_yr_sum['Damage'], 3, center=True), method='pearson') print 'Storm Pearson correlation (rolling mean) = ', rolling_mean(storm_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['Damage'], 3, center=True), method='pearson') print 'Earthquake Pearson correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='pearson') print 'Storm Pearson correlation from 1980 (rolling mean) = ', rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='pearson') print 'Occurrence Spearman correlation = ', eq_yr_sum['Occurrence'].corr(storm_yr_sum['Occurrence'], method='spearman') print 'Damage Spearman correlation = ', eq_yr_sum['Damage'].corr(storm_yr_sum['Damage'], method='spearman') print 'Occurrence Spearman correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], method='spearman') print 'Damage Spearman correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Damage'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='spearman') print 'Occurrence Spearman correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['Occurrence'], 3, center=True), method='spearman') print 'Damage Spearman correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Damage'], 3, center=True).corr(rolling_mean(storm_yr_sum['Damage'], 3, center=True), method='spearman') print 'Occurrence Spearman correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True), method='spearman') print 'Damage Spearman correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='spearman') print 'Occurrence Pearson correlation = ', eq_yr_sum['Occurrence'].corr(storm_yr_sum['Occurrence'], method='pearson') print 'Damage Pearson correlation = ', eq_yr_sum['Damage'].corr(storm_yr_sum['Damage'], method='pearson') print 'Occurrence Pearson correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], method='pearson') print 'Damage Pearson correlation from 1980 = ', eq_yr_sum['1980-01-01':'2013-01-01']['Damage'].corr(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], method='pearson') print 'Occurrence Pearson correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['Occurrence'], 3, center=True), method='pearson') print 'Damage Pearson correlation (rolling mean) = ', rolling_mean(eq_yr_sum['Damage'], 3, center=True).corr(rolling_mean(storm_yr_sum['Damage'], 3, center=True), method='pearson') print 'Occurrence Pearson correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Occurrence'], 3, center=True), method='pearson') print 'Damage Pearson correlation from 1980 (rolling mean) = ', rolling_mean(eq_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True).corr(rolling_mean(storm_yr_sum['1980-01-01':'2013-01-01']['Damage'], 3, center=True), method='pearson') fig, ax1 = subplots() ax1.plot(eq_yr_sum.index, eq_yr_sum['Occurrence'], label='Earthquake Occurrences', color='g') ax1.plot(storm_yr_sum.index, storm_yr_sum['Occurrence'], label='Storm Occurrences', color='m') ax1.tick_params(labelsize=12) ax1.set_xlabel('Year', size='16') ax1.set_ylabel('Occurrences (EMDAT criteria)', size='16') legend(loc='upper left') ax2 = ax1.twinx() ax2.plot(eq_yr_sum.index, eq_yr_sum['Damage'], label='Earthquake Losses', color='g', linestyle='--') ax2.plot(storm_yr_sum.index, storm_yr_sum['Damage'], label='Storm Losses', color='m', linestyle='--') ax2.ticklabel_format(style='plain', axis='y') ax2.tick_params(labelsize=12) ax2.set_ylabel('Losses ($)', size='16') #xlim(['1980-01-01', '2015-01-01']) legend(loc='upper right') fig, ax1 = subplots() ax1.plot(eq_yr_sum.index, rolling_mean(eq_yr_sum['Occurrence'], 3), label='Earthquake Occurrences', color='g') ax1.plot(storm_yr_sum.index, rolling_mean(storm_yr_sum['Occurrence'], 3), label='Storm Occurrences', color='m') ax1.tick_params(labelsize=12) ax1.set_xlabel('Year', size='16') ax1.set_ylabel('Occurrences (EMDAT criteria)', size='16') legend(loc='upper left') ax2 = ax1.twinx() ax2.plot(eq_yr_sum.index, rolling_mean(eq_yr_sum['Damage'], 3), label='Earthquake Losses', color='g', linestyle='--') ax2.plot(storm_yr_sum.index, rolling_mean(storm_yr_sum['Damage'], 3), label='Storm Losses', color='m', linestyle='--') ax2.ticklabel_format(style='plain', axis='y') ax2.tick_params(labelsize=12) ax2.set_ylabel('Losses ($)', size='16') xlim(['1980-01-01', '2015-01-01']) legend(loc='upper right') storm_eq_occur_diff = (storm_yr_sum['Occurrence'] - eq_yr_sum['Occurrence'] ) storm_eq_dmg_diff = (storm_yr_sum['Damage'] - eq_yr_sum['Damage'] ) storm_eq_occur_diff = rolling_mean(storm_yr_sum['Occurrence'] - eq_yr_sum['Occurrence'], 3, center=True) storm_eq_dmg_diff = rolling_mean(storm_yr_sum['Damage'] - eq_yr_sum['Damage'], 3, center=True ) fig, ax1 = subplots() ax1.plot(storm_eq_occur_diff.index, storm_eq_occur_diff, label='Occurrences', color='g') ax1.tick_params(labelsize=12) ax1.set_xlabel('Year', size='16') ax1.set_ylabel('Storm - Earthquake Occurrences (EMDAT criteria)', size='16') legend(loc='upper left') ax2 = ax1.twinx() ax2.plot(storm_eq_occur_diff.index, storm_eq_dmg_diff, label='Damage', color='m') ax2.ticklabel_format(style='plain', axis='y') ax2.tick_params(labelsize=12) ax2.set_ylabel('Storm - Earthquake Losses ($)', size='16') xlim(['1980-01-01', '2015-01-01']) legend(loc='upper right') fig, ax1 = subplots() ax1.plot(storm_eq_dmg_diff.index, storm_eq_occur_diff, label='Occurrences', color='g') ax1.tick_params(labelsize=12) ax1.set_xlabel('Year', size='16') ax1.set_ylabel('Storm - Earthquake Occurrences (EMDAT criteria)', size='16') legend(loc='upper left') ax2 = ax1.twinx() ax2.plot(storm_eq_dmg_diff.index, storm_eq_dmg_diff, label='Damage ($)', color='m') ax2.ticklabel_format(style='plain', axis='y') ax2.tick_params(labelsize=12) ax2.set_ylabel('Storm - Earthquake Losses ($)', size='16') #xlim(['1980-01-01', '2015-01-01']) legend(loc='upper right') storm_eq_kill_diff = rolling_mean(storm_yr_sum['Killed'] - eq_yr_sum['Killed'], 3, center=True ) storm_eq_inj_diff = rolling_mean(storm_yr_sum['Injured'] - eq_yr_sum['Injured'], 3, center=True ) storm_eq_occur_diff = rolling_mean(storm_yr_sum['Occurrence'] - eq_yr_sum['Occurrence'], 3, center=True) storm_eq_home_diff = rolling_mean(storm_yr_sum['Homeless'] - eq_yr_sum['Homeless'], 3, center=True ) occur_diff_roll = rolling_mean(storm_eq_occur_diff, 3, center=True).dropna() dmg_diff_roll = rolling_mean(storm_eq_dmg_diff, 3, center=True).dropna() kill_diff_roll = rolling_mean(storm_eq_kill_diff, 3, center=True).dropna() inj_diff_roll = rolling_mean(storm_eq_inj_diff, 3, center=True).dropna() home_diff_roll = rolling_mean(storm_eq_home_diff, 3, center=True).dropna() years = occur_diff_roll.index.year occur_slope, occur_int, occur_r, occur_p, occur_std = stats.linregress(years,occur_diff_roll) occur_trend = occur_slope*years+occur_int dmg_slope, dmg_int, dmg_r, dmg_p, dmg_std = stats.linregress(years,dmg_diff_roll) dmg_trend = dmg_slope*years+dmg_int kill_slope, kill_int, kill_r, kill_p, kill_std = stats.linregress(years,kill_diff_roll) kill_trend = kill_slope*years+kill_int inj_slope, inj_int, inj_r, inj_p, inj_std = stats.linregress(years,inj_diff_roll) inj_trend = inj_slope*years+inj_int home_slope, home_int, home_r, home_p, home_std = stats.linregress(years,home_diff_roll) home_trend = home_slope*years+home_int fig, ax = subplots(5, sharex=True) ax[0].plot(storm_eq_occur_diff.index, storm_eq_occur_diff, label='Occurrence', color='k') ax[0].plot(occur_diff_roll.index,occur_trend, linestyle='--', linewidth=1, color='k') ax[0].set_title('Differences between disaster indicators (storm - earthquake)') ax[0].set_ylabel('Occurrences') ax[1].plot(storm_eq_dmg_diff.index, storm_eq_dmg_diff, label='Damage', color='k') ax[1].plot(dmg_diff_roll.index,dmg_trend, linestyle='--', linewidth=1, color='k') ax[1].set_ylabel('Losses ($)') ax[2].plot(storm_eq_kill_diff.index, storm_eq_kill_diff, label='Killed', color='k') ax[2].plot(kill_diff_roll.index,kill_trend, linestyle='--', linewidth=1, color='k') ax[2].set_ylabel('Deaths') ax[3].plot(storm_eq_inj_diff.index, storm_eq_inj_diff, label='Injured', color='k') ax[3].plot(inj_diff_roll.index,inj_trend, linestyle='--', linewidth=1, color='k') ax[3].set_ylabel('Injuries') ax[4].plot(storm_eq_home_diff.index, storm_eq_home_diff, label='Homeless', color='k') ax[4].plot(occur_diff_roll.index,home_trend, linestyle='--', linewidth=1, color='k') ax[4].set_ylabel('Homeless') #xlim(['1980-01-01', '2015-01-01']) occur_diff_roll = rolling_mean(storm_eq_occur_diff['1980-01-01':'2015-01-01'], 3, center=True).dropna() dmg_diff_roll = rolling_mean(storm_eq_dmg_diff['1980-01-01':'2015-01-01'], 3, center=True).dropna() kill_diff_roll = rolling_mean(storm_eq_kill_diff['1980-01-01':'2015-01-01'], 3, center=True).dropna() inj_diff_roll = rolling_mean(storm_eq_inj_diff['1980-01-01':'2015-01-01'], 3, center=True).dropna() home_diff_roll = rolling_mean(storm_eq_home_diff['1980-01-01':'2015-01-01'], 3, center=True).dropna() years = occur_diff_roll.index.year occur_slope, occur_int, occur_r, occur_p, occur_std = stats.linregress(years,occur_diff_roll) occur_trend = occur_slope*years+occur_int dmg_slope, dmg_int, dmg_r, dmg_p, dmg_std = stats.linregress(years,dmg_diff_roll) dmg_trend = dmg_slope*years+dmg_int kill_slope, kill_int, kill_r, kill_p, kill_std = stats.linregress(years,kill_diff_roll) kill_trend = kill_slope*years+kill_int inj_slope, inj_int, inj_r, inj_p, inj_std = stats.linregress(years,inj_diff_roll) inj_trend = inj_slope*years+inj_int home_slope, home_int, home_r, home_p, home_std = stats.linregress(years,home_diff_roll) home_trend = home_slope*years+home_int fig, ax = subplots(5, sharex=True) ax[0].plot(storm_eq_occur_diff.index, storm_eq_occur_diff, label='Occurrence', color='k') ax[0].plot(occur_diff_roll.index,occur_trend, linestyle='--', linewidth=1, color='k') ax[0].set_title('Differences between disaster indicators (storm - earthquake)') ax[0].set_ylabel('Occurrences') ax[1].plot(storm_eq_dmg_diff.index, storm_eq_dmg_diff, label='Damage', color='k') ax[1].plot(dmg_diff_roll.index,dmg_trend, linestyle='--', linewidth=1, color='k') ax[1].set_ylabel('Losses ($)') ax[2].plot(storm_eq_kill_diff.index, storm_eq_kill_diff, label='Killed', color='k') ax[2].plot(kill_diff_roll.index,kill_trend, linestyle='--', linewidth=1, color='k') ax[2].set_ylabel('Deaths') ax[3].plot(storm_eq_inj_diff.index, storm_eq_inj_diff, label='Injured', color='k') ax[3].plot(inj_diff_roll.index,inj_trend, linestyle='--', linewidth=1, color='k') ax[3].set_ylabel('Injuries') ax[4].plot(storm_eq_home_diff.index, storm_eq_home_diff, label='Homeless', color='k') ax[4].plot(occur_diff_roll.index,home_trend, linestyle='--', linewidth=1, color='k') ax[4].set_ylabel('Homeless') xlim(['1980-01-01', '2015-01-01'])