import statsmodels.api as sm
import patsy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
RR = pd.read_csv('RR-processed.csv')
RR.groupby('Country').size()
Country Australia 64 Austria 62 Belgium 63 Canada 64 Denmark 56 Finland 64 France 54 Germany 59 Greece 40 Ireland 63 Italy 59 Japan 54 Netherlands 53 New Zealand 64 Norway 64 Portugal 58 Spain 42 Sweden 64 UK 64 US 64
bins = ["0-30%","30-60%","60-90%","Above 90%"]
RR['dgcat'] = np.digitize(RR.debtgdp, [0,30,60,90,np.inf]) - 1
RR.dgcat = [bins[x] for x in RR.dgcat]
bins = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
RR['dgcat2'] = np.digitize(RR.debtgdp, [0,30,60,90,120,np.inf]) - 1
RR.dgcat2 = [bins[x] for x in RR.dgcat2]
y,X = patsy.dmatrices('dRGDP ~ dgcat', data=RR[['dRGDP', 'dgcat']].dropna())
print sm.OLS(y,X).fit().summary()
OLS Regression Results ============================================================================== Dep. Variable: dRGDP R-squared: 0.045 Model: OLS Adj. R-squared: 0.042 Method: Least Squares F-statistic: 18.36 Date: Thu, 18 Apr 2013 Prob (F-statistic): 1.22e-11 Time: 21:14:59 Log-Likelihood: -2927.9 No. Observations: 1175 AIC: 5864. Df Residuals: 1171 BIC: 5884. Df Model: 3 ====================================================================================== coef std err t P>|t| [95.0% Conf. Int.] -------------------------------------------------------------------------------------- Intercept 4.1735 0.142 29.413 0.000 3.895 4.452 dgcat[T.30-60%] -1.0814 0.199 -5.429 0.000 -1.472 -0.691 dgcat[T.60-90%] -0.9869 0.251 -3.931 0.000 -1.479 -0.494 dgcat[T.Above 90%] -2.0056 0.313 -6.403 0.000 -2.620 -1.391 ============================================================================== Omnibus: 208.322 Durbin-Watson: 1.385 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1757.480 Skew: 0.558 Prob(JB): 0.00 Kurtosis: 8.887 Cond. No. 4.57 ==============================================================================
y2,X2 = patsy.dmatrices('dRGDP ~ dgcat2', data=RR[['dRGDP', 'dgcat2']].dropna())
print sm.OLS(y2,X2).fit().summary()
OLS Regression Results ============================================================================== Dep. Variable: dRGDP R-squared: 0.046 Model: OLS Adj. R-squared: 0.043 Method: Least Squares F-statistic: 14.24 Date: Thu, 18 Apr 2013 Prob (F-statistic): 2.36e-11 Time: 21:14:59 Log-Likelihood: -2926.9 No. Observations: 1175 AIC: 5864. Df Residuals: 1170 BIC: 5889. Df Model: 4 ======================================================================================== coef std err t P>|t| [95.0% Conf. Int.] ---------------------------------------------------------------------------------------- Intercept 4.1735 0.142 29.423 0.000 3.895 4.452 dgcat2[T.30-60%] -1.0814 0.199 -5.431 0.000 -1.472 -0.691 dgcat2[T.60-90%] -0.9869 0.251 -3.933 0.000 -1.479 -0.495 dgcat2[T.90-120%] -1.7676 0.359 -4.929 0.000 -2.471 -1.064 dgcat2[T.Above 120%] -2.6120 0.545 -4.796 0.000 -3.680 -1.543 ============================================================================== Omnibus: 210.356 Durbin-Watson: 1.388 Prob(Omnibus): 0.000 Jarque-Bera (JB): 1756.317 Skew: 0.570 Prob(JB): 0.00 Kurtosis: 8.880 Cond. No. 7.10 ==============================================================================
## Country-Year average by debtgdp ("correct weights")
RR.dRGDP.groupby(RR.dgcat).mean()
dgcat 0-30% 4.173523 30-60% 3.092145 60-90% 3.186575 Above 90% 2.167972
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat]).mean().unstack()
dgcat | 0-30% | 30-60% | 60-90% | Above 90% |
---|---|---|---|---|
Country | ||||
Australia | 3.205885 | 4.947205 | 4.042175 | 3.774250 |
Austria | 5.207527 | 3.256526 | -3.824000 | NaN |
Belgium | NaN | 4.191655 | 3.079868 | 2.566828 |
Canada | 2.515704 | 3.525446 | 4.523574 | 2.956640 |
Denmark | 3.518584 | 1.700034 | 2.391147 | NaN |
Finland | 3.817029 | 2.418535 | 5.488887 | NaN |
France | 5.058476 | 2.616159 | 3.019631 | NaN |
Germany | 3.873759 | 0.875803 | NaN | NaN |
Greece | 4.001282 | 0.340200 | 2.696000 | 2.910632 |
Ireland | 4.209251 | 4.452167 | 3.950139 | 2.428571 |
Italy | 5.352632 | 2.054284 | 1.771529 | 1.028900 |
Japan | 7.331001 | 3.957143 | 1.008411 | 0.687258 |
Netherlands | 4.082614 | 2.620772 | 1.070436 | NaN |
New Zealand | 2.465556 | 2.889572 | 3.883683 | 2.574727 |
Norway | 3.400122 | 5.108289 | 10.201270 | NaN |
Portugal | 4.451419 | 3.549482 | 1.893899 | NaN |
Spain | 1.549332 | 3.398669 | 4.156250 | NaN |
Sweden | 3.567385 | 2.932237 | 2.665824 | NaN |
UK | NaN | 2.231213 | 2.522133 | 2.399096 |
US | NaN | 3.370208 | 3.264068 | -1.988893 |
## Country-Year average by debtgdp ("correct weights") expanded categories
RR.dRGDP.groupby(RR.dgcat2).mean()
dgcat2 0-30% 4.173523 30-60% 3.092145 60-90% 3.186575 90-120% 2.405934 Above 120% 1.561553
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat2]).mean().unstack()
dgcat2 | 0-30% | 30-60% | 60-90% | 90-120% | Above 120% |
---|---|---|---|---|---|
Country | |||||
Australia | 3.205885 | 4.947205 | 4.042175 | 6.920201 | 2.987763 |
Austria | 5.207527 | 3.256526 | -3.824000 | NaN | NaN |
Belgium | NaN | 4.191655 | 3.079868 | 2.702629 | -0.692378 |
Canada | 2.515704 | 3.525446 | 4.523574 | 4.544839 | 0.574341 |
Denmark | 3.518584 | 1.700034 | 2.391147 | NaN | NaN |
Finland | 3.817029 | 2.418535 | 5.488887 | NaN | NaN |
France | 5.058476 | 2.616159 | 3.019631 | NaN | NaN |
Germany | 3.873759 | 0.875803 | NaN | NaN | NaN |
Greece | 4.001282 | 0.340200 | 2.696000 | 2.910632 | NaN |
Ireland | 4.209251 | 4.452167 | 3.950139 | 2.428571 | NaN |
Italy | 5.352632 | 2.054284 | 1.771529 | 1.028900 | NaN |
Japan | 7.331001 | 3.957143 | 1.008411 | 1.359564 | 0.537857 |
Netherlands | 4.082614 | 2.620772 | 1.070436 | NaN | NaN |
New Zealand | 2.465556 | 2.889572 | 3.883683 | -2.256588 | 9.821699 |
Norway | 3.400122 | 5.108289 | 10.201270 | NaN | NaN |
Portugal | 4.451419 | 3.549482 | 1.893899 | NaN | NaN |
Spain | 1.549332 | 3.398669 | 4.156250 | NaN | NaN |
Sweden | 3.567385 | 2.932237 | 2.665824 | NaN | NaN |
UK | NaN | 2.231213 | 2.522133 | 3.303428 | 1.871568 |
US | NaN | 3.370208 | 3.264068 | 0.995529 | -10.942159 |
idx = (RR.Country == 'New Zealand') & (RR.Year < 1950) | (RR.Country == 'Australia') & (RR.Year < 1951) | (RR.Country == 'Canada') & (RR.Year < 1951)
RR_selective = RR[idx == False]
RR_selective.dRGDP.groupby(RR_selective.dgcat).mean()
dgcat 0-30% 4.173523 30-60% 3.092145 60-90% 3.186575 Above 90% 1.919934
RR_selective.mean()
Unnamed: 0 5.916408e+02 Year 1.979633e+03 Debt 1.621458e+07 RGDP 2.369173e+05 GDP 1.957611e+05 dRGDP 3.408270e+00 GDPI 5.034180e+01 GDP1 1.470725e+07 GDP2 1.824865e+07 RGDP1 1.425590e+07 RGDP2 3.072470e+07 GDPI1 5.589542e+02 GDPI2 8.690705e+01 Infl 5.632643e+00 Debt1 5.625405e+05 Debt2 1.050730e+05 Debtalt 1.006665e+07 GDP2alt 4.450790e+05 GDPalt 2.079659e+06 RGDP2alt 1.033331e+05 debtgdp 4.530375e+01 GDP3 7.689113e+04 GNI 5.156239e+08 lRGDP 2.351587e+05 lRGDP1 1.405853e+07 lRGDP2 3.047113e+07
RR_selective.dRGDP.groupby([RR_selective.Country, RR_selective.dgcat]).mean().unstack()
dgcat | 0-30% | 30-60% | 60-90% | Above 90% |
---|---|---|---|---|
Country | ||||
Australia | 3.205885 | 4.947205 | 4.042175 | NaN |
Austria | 5.207527 | 3.256526 | -3.824000 | NaN |
Belgium | NaN | 4.191655 | 3.079868 | 2.566828 |
Canada | 2.515704 | 3.525446 | 4.523574 | NaN |
Denmark | 3.518584 | 1.700034 | 2.391147 | NaN |
Finland | 3.817029 | 2.418535 | 5.488887 | NaN |
France | 5.058476 | 2.616159 | 3.019631 | NaN |
Germany | 3.873759 | 0.875803 | NaN | NaN |
Greece | 4.001282 | 0.340200 | 2.696000 | 2.910632 |
Ireland | 4.209251 | 4.452167 | 3.950139 | 2.428571 |
Italy | 5.352632 | 2.054284 | 1.771529 | 1.028900 |
Japan | 7.331001 | 3.957143 | 1.008411 | 0.687258 |
Netherlands | 4.082614 | 2.620772 | 1.070436 | NaN |
New Zealand | 2.465556 | 2.889572 | 3.883683 | -7.635102 |
Norway | 3.400122 | 5.108289 | 10.201270 | NaN |
Portugal | 4.451419 | 3.549482 | 1.893899 | NaN |
Spain | 1.549332 | 3.398669 | 4.156250 | NaN |
Sweden | 3.567385 | 2.932237 | 2.665824 | NaN |
UK | NaN | 2.231213 | 2.522133 | 2.399096 |
US | NaN | 3.370208 | 3.264068 | -1.988893 |
drop = ["Australia","Austria","Belgium","Canada","Denmark"]
idx = [False if x in drop else True for x in RR_selective.Country]
RR_selective_spreadsheet = RR_selective[idx]
RR_selective_spreadsheet.dRGDP.groupby(RR.dgcat).mean()
dgcat 0-30% 4.236391 30-60% 2.958902 60-90% 3.160164 Above 90% 1.692155
RR_selective_spreadsheet_transcription = RR_selective_spreadsheet.copy()
RR_selective_spreadsheet_transcription.RGDP[RR_selective_spreadsheet_transcription.Country=='New Zealand'] = -7.9
RR_selective_spreadsheet_transcription.dRGDP.groupby(RR.dgcat).mean()
dgcat 0-30% 4.236391 30-60% 2.958902 60-90% 3.160164 Above 90% 1.692155
a = RR_selective_spreadsheet_transcription.Country
b = RR_selective_spreadsheet_transcription.dgcat
RR_selective_spreadsheet_transcription.dRGDP.groupby(b).mean()
dgcat 0-30% 4.236391 30-60% 2.958902 60-90% 3.160164 Above 90% 1.692155
published_means = RR_selective_spreadsheet_transcription.dRGDP.groupby([a,b]).mean().unstack()
published_means.ix['New Zealand', 'Above 90%'] = -7.9
published_means.mean()
dgcat 0-30% 4.089220 30-60% 2.854316 60-90% 3.399440 Above 90% -0.062062
RR.dRGDP.groupby(RR.dgcat).median() # Correct, equal weight
dgcat 0-30% 4.145376 30-60% 3.104629 60-90% 2.897829 Above 90% 2.335324
RR.dRGDP.groupby(RR.dgcat2).median() # Correct, expanded categories, equal weight
dgcat2 0-30% 4.145376 30-60% 3.104629 60-90% 2.897829 90-120% 2.373340 Above 120% 2.039469
RR.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
dgcat 0-30% 426 30-60% 439 60-90% 200 Above 90% 110
RR_selective.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
dgcat 0-30% 426 30-60% 439 60-90% 200 Above 90% 96
RR_selective_spreadsheet.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
dgcat 0-30% 329 30-60% 324 60-90% 138 Above 90% 71
labels = ["0-30%","30-60%","60-90%","Above 90%"]
dat = [np.array(RR.dRGDP[RR.dgcat==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)
Figure(480x320)
labels = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
dat = [np.array(RR.dRGDP[RR.dgcat2==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)
Figure(480x320)
years = range(1950, 2001, 10)
f = lambda x: (x, RR[RR.Year >= x].dRGDP.groupby(RR[RR.Year >= x].dgcat).mean())
[f(x) for x in years]
[(1950, dgcat 0-30% 4.135295 30-60% 2.980839 60-90% 3.100982 Above 90% 2.121852), (1960, dgcat 0-30% 3.895619 30-60% 2.909601 60-90% 2.779663 Above 90% 2.074064), (1970, dgcat 0-30% 3.145224 30-60% 2.644951 60-90% 2.559289 Above 90% 1.959229), (1980, dgcat 0-30% 2.541408 30-60% 2.451346 60-90% 2.435681 Above 90% 1.959229), (1990, dgcat 0-30% 2.669334 30-60% 2.403622 60-90% 2.457587 Above 90% 1.823201), (2000, dgcat 0-30% 2.747593 30-60% 1.881735 60-90% 1.290506 Above 90% 1.745087)]
RR['dRGDP_lag'] = RR.dRGDP.groupby(RR.Country).apply(lambda x: x.shift())
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag']].dropna())
print sm.OLS(y,X).fit().summary()
OLS Regression Results ============================================================================== Dep. Variable: dRGDP R-squared: 0.182 Model: OLS Adj. R-squared: 0.179 Method: Least Squares F-statistic: 63.84 Date: Thu, 18 Apr 2013 Prob (F-statistic): 8.90e-49 Time: 21:15:00 Log-Likelihood: -2723.0 No. Observations: 1155 AIC: 5456. Df Residuals: 1150 BIC: 5481. Df Model: 4 ====================================================================================== coef std err t P>|t| [95.0% Conf. Int.] -------------------------------------------------------------------------------------- Intercept 2.5045 0.173 14.461 0.000 2.165 2.844 dgcat[T.30-60%] -0.6746 0.179 -3.778 0.000 -1.025 -0.324 dgcat[T.60-90%] -0.5782 0.224 -2.585 0.010 -1.017 -0.139 dgcat[T.Above 90%] -1.0539 0.287 -3.668 0.000 -1.618 -0.490 dRGDP_lag 0.3715 0.027 13.668 0.000 0.318 0.425 ============================================================================== Omnibus: 111.755 Durbin-Watson: 2.108 Prob(Omnibus): 0.000 Jarque-Bera (JB): 577.799 Skew: -0.272 Prob(JB): 3.41e-126 Kurtosis: 6.422 Cond. No. 20.6 ==============================================================================
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag + Country', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag', 'Country']].dropna())
print sm.OLS(y,X).fit().summary()
OLS Regression Results ============================================================================== Dep. Variable: dRGDP R-squared: 0.195 Model: OLS Adj. R-squared: 0.178 Method: Least Squares F-statistic: 11.89 Date: Thu, 18 Apr 2013 Prob (F-statistic): 1.84e-39 Time: 21:15:00 Log-Likelihood: -2713.7 No. Observations: 1155 AIC: 5475. Df Residuals: 1131 BIC: 5597. Df Model: 23 ========================================================================================== coef std err t P>|t| [95.0% Conf. Int.] ------------------------------------------------------------------------------------------ Intercept 2.8787 0.355 8.118 0.000 2.183 3.574 dgcat[T.30-60%] -0.8192 0.208 -3.943 0.000 -1.227 -0.412 dgcat[T.60-90%] -0.8052 0.258 -3.115 0.002 -1.312 -0.298 dgcat[T.Above 90%] -1.2941 0.339 -3.817 0.000 -1.959 -0.629 Country[T.Austria] -0.2098 0.463 -0.454 0.650 -1.117 0.698 Country[T.Belgium] -0.0507 0.480 -0.106 0.916 -0.993 0.891 Country[T.Canada] 0.3403 0.469 0.725 0.468 -0.580 1.261 Country[T.Denmark] -0.8121 0.476 -1.707 0.088 -1.746 0.121 Country[T.Finland] -0.4436 0.458 -0.969 0.333 -1.342 0.455 Country[T.France] -0.0764 0.479 -0.159 0.873 -1.017 0.864 Country[T.Germany] -0.7266 0.469 -1.548 0.122 -1.648 0.195 Country[T.Greece] -0.3446 0.538 -0.640 0.522 -1.401 0.711 Country[T.Ireland] 0.3872 0.471 0.822 0.411 -0.537 1.312 Country[T.Italy] -0.3490 0.469 -0.743 0.457 -1.270 0.572 Country[T.Japan] 0.4994 0.482 1.036 0.301 -0.447 1.445 Country[T.Netherlands] -0.4065 0.486 -0.837 0.403 -1.360 0.547 Country[T.New Zealand] -0.2624 0.465 -0.565 0.572 -1.174 0.649 Country[T.Norway] -0.3741 0.459 -0.814 0.416 -1.276 0.527 Country[T.Portugal] -0.0705 0.469 -0.150 0.881 -0.992 0.851 Country[T.Spain] -0.1694 0.528 -0.321 0.748 -1.205 0.866 Country[T.Sweden] -0.4845 0.462 -1.050 0.294 -1.390 0.421 Country[T.UK] -0.3898 0.474 -0.823 0.411 -1.319 0.540 Country[T.US] 0.0911 0.471 0.193 0.847 -0.833 1.015 dRGDP_lag 0.3515 0.028 12.701 0.000 0.297 0.406 ============================================================================== Omnibus: 111.745 Durbin-Watson: 2.106 Prob(Omnibus): 0.000 Jarque-Bera (JB): 575.683 Skew: -0.274 Prob(JB): 9.82e-126 Kurtosis: 6.415 Cond. No. 91.7 ==============================================================================