Reinhart-Rogoff replication

In [2]:
import statsmodels.api as sm
import patsy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

RR = pd.read_csv('RR-processed.csv')

Number of observations per country

In [3]:
RR.groupby('Country').size()
Out[3]:
Country
Australia      64
Austria        62
Belgium        63
Canada         64
Denmark        56
Finland        64
France         54
Germany        59
Greece         40
Ireland        63
Italy          59
Japan          54
Netherlands    53
New Zealand    64
Norway         64
Portugal       58
Spain          42
Sweden         64
UK             64
US             64

Bins

In [4]:
bins = ["0-30%","30-60%","60-90%","Above 90%"]
RR['dgcat'] = np.digitize(RR.debtgdp, [0,30,60,90,np.inf]) - 1
RR.dgcat = [bins[x] for x in RR.dgcat]

bins = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
RR['dgcat2'] = np.digitize(RR.debtgdp, [0,30,60,90,120,np.inf]) - 1
RR.dgcat2 = [bins[x] for x in RR.dgcat2]

Regression analysis

In [5]:
y,X = patsy.dmatrices('dRGDP ~ dgcat', data=RR[['dRGDP', 'dgcat']].dropna())
print sm.OLS(y,X).fit().summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.045
Model:                            OLS   Adj. R-squared:                  0.042
Method:                 Least Squares   F-statistic:                     18.36
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           1.22e-11
Time:                        21:14:59   Log-Likelihood:                -2927.9
No. Observations:                1175   AIC:                             5864.
Df Residuals:                    1171   BIC:                             5884.
Df Model:                           3                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              4.1735      0.142     29.413      0.000         3.895     4.452
dgcat[T.30-60%]       -1.0814      0.199     -5.429      0.000        -1.472    -0.691
dgcat[T.60-90%]       -0.9869      0.251     -3.931      0.000        -1.479    -0.494
dgcat[T.Above 90%]    -2.0056      0.313     -6.403      0.000        -2.620    -1.391
==============================================================================
Omnibus:                      208.322   Durbin-Watson:                   1.385
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1757.480
Skew:                           0.558   Prob(JB):                         0.00
Kurtosis:                       8.887   Cond. No.                         4.57
==============================================================================
In [6]:
y2,X2 = patsy.dmatrices('dRGDP ~ dgcat2', data=RR[['dRGDP', 'dgcat2']].dropna())
print sm.OLS(y2,X2).fit().summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.046
Model:                            OLS   Adj. R-squared:                  0.043
Method:                 Least Squares   F-statistic:                     14.24
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           2.36e-11
Time:                        21:14:59   Log-Likelihood:                -2926.9
No. Observations:                1175   AIC:                             5864.
Df Residuals:                    1170   BIC:                             5889.
Df Model:                           4                                         
========================================================================================
                           coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept                4.1735      0.142     29.423      0.000         3.895     4.452
dgcat2[T.30-60%]        -1.0814      0.199     -5.431      0.000        -1.472    -0.691
dgcat2[T.60-90%]        -0.9869      0.251     -3.933      0.000        -1.479    -0.495
dgcat2[T.90-120%]       -1.7676      0.359     -4.929      0.000        -2.471    -1.064
dgcat2[T.Above 120%]    -2.6120      0.545     -4.796      0.000        -3.680    -1.543
==============================================================================
Omnibus:                      210.356   Durbin-Watson:                   1.388
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1756.317
Skew:                           0.570   Prob(JB):                         0.00
Kurtosis:                       8.880   Cond. No.                         7.10
==============================================================================

Table 3 Corrected

In [7]:
## Country-Year average by debtgdp ("correct weights")
RR.dRGDP.groupby(RR.dgcat).mean()
Out[7]:
dgcat
0-30%        4.173523
30-60%       3.092145
60-90%       3.186575
Above 90%    2.167972
In [8]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat]).mean().unstack()
Out[8]:
dgcat 0-30% 30-60% 60-90% Above 90%
Country
Australia 3.205885 4.947205 4.042175 3.774250
Austria 5.207527 3.256526 -3.824000 NaN
Belgium NaN 4.191655 3.079868 2.566828
Canada 2.515704 3.525446 4.523574 2.956640
Denmark 3.518584 1.700034 2.391147 NaN
Finland 3.817029 2.418535 5.488887 NaN
France 5.058476 2.616159 3.019631 NaN
Germany 3.873759 0.875803 NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632
Ireland 4.209251 4.452167 3.950139 2.428571
Italy 5.352632 2.054284 1.771529 1.028900
Japan 7.331001 3.957143 1.008411 0.687258
Netherlands 4.082614 2.620772 1.070436 NaN
New Zealand 2.465556 2.889572 3.883683 2.574727
Norway 3.400122 5.108289 10.201270 NaN
Portugal 4.451419 3.549482 1.893899 NaN
Spain 1.549332 3.398669 4.156250 NaN
Sweden 3.567385 2.932237 2.665824 NaN
UK NaN 2.231213 2.522133 2.399096
US NaN 3.370208 3.264068 -1.988893
In [9]:
## Country-Year average by debtgdp ("correct weights") expanded categories
RR.dRGDP.groupby(RR.dgcat2).mean()
Out[9]:
dgcat2
0-30%         4.173523
30-60%        3.092145
60-90%        3.186575
90-120%       2.405934
Above 120%    1.561553
In [10]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat2]).mean().unstack()
Out[10]:
dgcat2 0-30% 30-60% 60-90% 90-120% Above 120%
Country
Australia 3.205885 4.947205 4.042175 6.920201 2.987763
Austria 5.207527 3.256526 -3.824000 NaN NaN
Belgium NaN 4.191655 3.079868 2.702629 -0.692378
Canada 2.515704 3.525446 4.523574 4.544839 0.574341
Denmark 3.518584 1.700034 2.391147 NaN NaN
Finland 3.817029 2.418535 5.488887 NaN NaN
France 5.058476 2.616159 3.019631 NaN NaN
Germany 3.873759 0.875803 NaN NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632 NaN
Ireland 4.209251 4.452167 3.950139 2.428571 NaN
Italy 5.352632 2.054284 1.771529 1.028900 NaN
Japan 7.331001 3.957143 1.008411 1.359564 0.537857
Netherlands 4.082614 2.620772 1.070436 NaN NaN
New Zealand 2.465556 2.889572 3.883683 -2.256588 9.821699
Norway 3.400122 5.108289 10.201270 NaN NaN
Portugal 4.451419 3.549482 1.893899 NaN NaN
Spain 1.549332 3.398669 4.156250 NaN NaN
Sweden 3.567385 2.932237 2.665824 NaN NaN
UK NaN 2.231213 2.522133 3.303428 1.871568
US NaN 3.370208 3.264068 0.995529 -10.942159

Selective treatment of early years

In [11]:
idx = (RR.Country == 'New Zealand') & (RR.Year < 1950) | (RR.Country == 'Australia') & (RR.Year < 1951) | (RR.Country == 'Canada') & (RR.Year < 1951) 
RR_selective = RR[idx == False]
RR_selective.dRGDP.groupby(RR_selective.dgcat).mean()
Out[11]:
dgcat
0-30%        4.173523
30-60%       3.092145
60-90%       3.186575
Above 90%    1.919934

Equal weights

Table 3 Weights,Exclusion

In [12]:
RR_selective.mean()
Out[12]:
Unnamed: 0    5.916408e+02
Year          1.979633e+03
Debt          1.621458e+07
RGDP          2.369173e+05
GDP           1.957611e+05
dRGDP         3.408270e+00
GDPI          5.034180e+01
GDP1          1.470725e+07
GDP2          1.824865e+07
RGDP1         1.425590e+07
RGDP2         3.072470e+07
GDPI1         5.589542e+02
GDPI2         8.690705e+01
Infl          5.632643e+00
Debt1         5.625405e+05
Debt2         1.050730e+05
Debtalt       1.006665e+07
GDP2alt       4.450790e+05
GDPalt        2.079659e+06
RGDP2alt      1.033331e+05
debtgdp       4.530375e+01
GDP3          7.689113e+04
GNI           5.156239e+08
lRGDP         2.351587e+05
lRGDP1        1.405853e+07
lRGDP2        3.047113e+07

Correct weights

Table 3 Selective years exclusion

In [13]:
RR_selective.dRGDP.groupby([RR_selective.Country, RR_selective.dgcat]).mean().unstack()
Out[13]:
dgcat 0-30% 30-60% 60-90% Above 90%
Country
Australia 3.205885 4.947205 4.042175 NaN
Austria 5.207527 3.256526 -3.824000 NaN
Belgium NaN 4.191655 3.079868 2.566828
Canada 2.515704 3.525446 4.523574 NaN
Denmark 3.518584 1.700034 2.391147 NaN
Finland 3.817029 2.418535 5.488887 NaN
France 5.058476 2.616159 3.019631 NaN
Germany 3.873759 0.875803 NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632
Ireland 4.209251 4.452167 3.950139 2.428571
Italy 5.352632 2.054284 1.771529 1.028900
Japan 7.331001 3.957143 1.008411 0.687258
Netherlands 4.082614 2.620772 1.070436 NaN
New Zealand 2.465556 2.889572 3.883683 -7.635102
Norway 3.400122 5.108289 10.201270 NaN
Portugal 4.451419 3.549482 1.893899 NaN
Spain 1.549332 3.398669 4.156250 NaN
Sweden 3.567385 2.932237 2.665824 NaN
UK NaN 2.231213 2.522133 2.399096
US NaN 3.370208 3.264068 -1.988893

And dropping because of spreadsheet error

In [14]:
drop = ["Australia","Austria","Belgium","Canada","Denmark"]
idx = [False if x in drop else True for x in RR_selective.Country]
RR_selective_spreadsheet = RR_selective[idx]
RR_selective_spreadsheet.dRGDP.groupby(RR.dgcat).mean()
Out[14]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155

New Zealand transcription error

In [15]:
RR_selective_spreadsheet_transcription = RR_selective_spreadsheet.copy()
RR_selective_spreadsheet_transcription.RGDP[RR_selective_spreadsheet_transcription.Country=='New Zealand'] = -7.9
RR_selective_spreadsheet_transcription.dRGDP.groupby(RR.dgcat).mean()
Out[15]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155
In [16]:
a = RR_selective_spreadsheet_transcription.Country
b = RR_selective_spreadsheet_transcription.dgcat
RR_selective_spreadsheet_transcription.dRGDP.groupby(b).mean()
Out[16]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155
In [17]:
published_means = RR_selective_spreadsheet_transcription.dRGDP.groupby([a,b]).mean().unstack()
published_means.ix['New Zealand', 'Above 90%'] = -7.9
published_means.mean()
Out[17]:
dgcat
0-30%        4.089220
30-60%       2.854316
60-90%       3.399440
Above 90%   -0.062062

Medians

In [18]:
RR.dRGDP.groupby(RR.dgcat).median() # Correct, equal weight
Out[18]:
dgcat
0-30%        4.145376
30-60%       3.104629
60-90%       2.897829
Above 90%    2.335324
In [19]:
RR.dRGDP.groupby(RR.dgcat2).median() # Correct, expanded categories, equal weight
Out[19]:
dgcat2
0-30%         4.145376
30-60%        3.104629
60-90%        2.897829
90-120%       2.373340
Above 120%    2.039469

Counts of years

In [20]:
RR.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
Out[20]:
dgcat
0-30%        426
30-60%       439
60-90%       200
Above 90%    110
In [21]:
RR_selective.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
Out[21]:
dgcat
0-30%        426
30-60%       439
60-90%       200
Above 90%     96
In [22]:
RR_selective_spreadsheet.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()
Out[22]:
dgcat
0-30%        329
30-60%       324
60-90%       138
Above 90%     71

Categorical scatterplot

In [23]:
labels = ["0-30%","30-60%","60-90%","Above 90%"]
dat = [np.array(RR.dRGDP[RR.dgcat==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)
Figure(480x320)
In [24]:
labels = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
dat = [np.array(RR.dRGDP[RR.dgcat2==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)
Figure(480x320)

Country-Year average by debtgdp for more recent samples

In [25]:
years = range(1950, 2001, 10)
f = lambda x: (x, RR[RR.Year >= x].dRGDP.groupby(RR[RR.Year >= x].dgcat).mean())
[f(x) for x in years]
Out[25]:
[(1950,
  dgcat
0-30%        4.135295
30-60%       2.980839
60-90%       3.100982
Above 90%    2.121852),
 (1960,
  dgcat
0-30%        3.895619
30-60%       2.909601
60-90%       2.779663
Above 90%    2.074064),
 (1970,
  dgcat
0-30%        3.145224
30-60%       2.644951
60-90%       2.559289
Above 90%    1.959229),
 (1980,
  dgcat
0-30%        2.541408
30-60%       2.451346
60-90%       2.435681
Above 90%    1.959229),
 (1990,
  dgcat
0-30%        2.669334
30-60%       2.403622
60-90%       2.457587
Above 90%    1.823201),
 (2000,
  dgcat
0-30%        2.747593
30-60%       1.881735
60-90%       1.290506
Above 90%    1.745087)]

Lagged dependent variable

In [26]:
RR['dRGDP_lag'] = RR.dRGDP.groupby(RR.Country).apply(lambda x: x.shift())
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag']].dropna())
print sm.OLS(y,X).fit().summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.182
Model:                            OLS   Adj. R-squared:                  0.179
Method:                 Least Squares   F-statistic:                     63.84
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           8.90e-49
Time:                        21:15:00   Log-Likelihood:                -2723.0
No. Observations:                1155   AIC:                             5456.
Df Residuals:                    1150   BIC:                             5481.
Df Model:                           4                                         
======================================================================================
                         coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              2.5045      0.173     14.461      0.000         2.165     2.844
dgcat[T.30-60%]       -0.6746      0.179     -3.778      0.000        -1.025    -0.324
dgcat[T.60-90%]       -0.5782      0.224     -2.585      0.010        -1.017    -0.139
dgcat[T.Above 90%]    -1.0539      0.287     -3.668      0.000        -1.618    -0.490
dRGDP_lag              0.3715      0.027     13.668      0.000         0.318     0.425
==============================================================================
Omnibus:                      111.755   Durbin-Watson:                   2.108
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              577.799
Skew:                          -0.272   Prob(JB):                    3.41e-126
Kurtosis:                       6.422   Cond. No.                         20.6
==============================================================================

Fixed effects

In [27]:
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag + Country', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag', 'Country']].dropna())
print sm.OLS(y,X).fit().summary()
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.195
Model:                            OLS   Adj. R-squared:                  0.178
Method:                 Least Squares   F-statistic:                     11.89
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           1.84e-39
Time:                        21:15:00   Log-Likelihood:                -2713.7
No. Observations:                1155   AIC:                             5475.
Df Residuals:                    1131   BIC:                             5597.
Df Model:                          23                                         
==========================================================================================
                             coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  2.8787      0.355      8.118      0.000         2.183     3.574
dgcat[T.30-60%]           -0.8192      0.208     -3.943      0.000        -1.227    -0.412
dgcat[T.60-90%]           -0.8052      0.258     -3.115      0.002        -1.312    -0.298
dgcat[T.Above 90%]        -1.2941      0.339     -3.817      0.000        -1.959    -0.629
Country[T.Austria]        -0.2098      0.463     -0.454      0.650        -1.117     0.698
Country[T.Belgium]        -0.0507      0.480     -0.106      0.916        -0.993     0.891
Country[T.Canada]          0.3403      0.469      0.725      0.468        -0.580     1.261
Country[T.Denmark]        -0.8121      0.476     -1.707      0.088        -1.746     0.121
Country[T.Finland]        -0.4436      0.458     -0.969      0.333        -1.342     0.455
Country[T.France]         -0.0764      0.479     -0.159      0.873        -1.017     0.864
Country[T.Germany]        -0.7266      0.469     -1.548      0.122        -1.648     0.195
Country[T.Greece]         -0.3446      0.538     -0.640      0.522        -1.401     0.711
Country[T.Ireland]         0.3872      0.471      0.822      0.411        -0.537     1.312
Country[T.Italy]          -0.3490      0.469     -0.743      0.457        -1.270     0.572
Country[T.Japan]           0.4994      0.482      1.036      0.301        -0.447     1.445
Country[T.Netherlands]    -0.4065      0.486     -0.837      0.403        -1.360     0.547
Country[T.New Zealand]    -0.2624      0.465     -0.565      0.572        -1.174     0.649
Country[T.Norway]         -0.3741      0.459     -0.814      0.416        -1.276     0.527
Country[T.Portugal]       -0.0705      0.469     -0.150      0.881        -0.992     0.851
Country[T.Spain]          -0.1694      0.528     -0.321      0.748        -1.205     0.866
Country[T.Sweden]         -0.4845      0.462     -1.050      0.294        -1.390     0.421
Country[T.UK]             -0.3898      0.474     -0.823      0.411        -1.319     0.540
Country[T.US]              0.0911      0.471      0.193      0.847        -0.833     1.015
dRGDP_lag                  0.3515      0.028     12.701      0.000         0.297     0.406
==============================================================================
Omnibus:                      111.745   Durbin-Watson:                   2.106
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              575.683
Skew:                          -0.274   Prob(JB):                    9.82e-126
Kurtosis:                       6.415   Cond. No.                         91.7
==============================================================================
In [27]: