# Reinhart-Rogoff replication¶

In [2]:
import statsmodels.api as sm
import patsy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt



## Number of observations per country¶

In [3]:
RR.groupby('Country').size()

Out[3]:
Country
Australia      64
Austria        62
Belgium        63
Denmark        56
Finland        64
France         54
Germany        59
Greece         40
Ireland        63
Italy          59
Japan          54
Netherlands    53
New Zealand    64
Norway         64
Portugal       58
Spain          42
Sweden         64
UK             64
US             64

## Bins¶

In [4]:
bins = ["0-30%","30-60%","60-90%","Above 90%"]
RR['dgcat'] = np.digitize(RR.debtgdp, [0,30,60,90,np.inf]) - 1
RR.dgcat = [bins[x] for x in RR.dgcat]

bins = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
RR['dgcat2'] = np.digitize(RR.debtgdp, [0,30,60,90,120,np.inf]) - 1
RR.dgcat2 = [bins[x] for x in RR.dgcat2]


## Regression analysis¶

In [5]:
y,X = patsy.dmatrices('dRGDP ~ dgcat', data=RR[['dRGDP', 'dgcat']].dropna())
print sm.OLS(y,X).fit().summary()

                            OLS Regression Results
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.045
Method:                 Least Squares   F-statistic:                     18.36
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           1.22e-11
Time:                        21:14:59   Log-Likelihood:                -2927.9
No. Observations:                1175   AIC:                             5864.
Df Residuals:                    1171   BIC:                             5884.
Df Model:                           3
======================================================================================
coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              4.1735      0.142     29.413      0.000         3.895     4.452
dgcat[T.30-60%]       -1.0814      0.199     -5.429      0.000        -1.472    -0.691
dgcat[T.60-90%]       -0.9869      0.251     -3.931      0.000        -1.479    -0.494
dgcat[T.Above 90%]    -2.0056      0.313     -6.403      0.000        -2.620    -1.391
==============================================================================
Omnibus:                      208.322   Durbin-Watson:                   1.385
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1757.480
Skew:                           0.558   Prob(JB):                         0.00
Kurtosis:                       8.887   Cond. No.                         4.57
==============================================================================

In [6]:
y2,X2 = patsy.dmatrices('dRGDP ~ dgcat2', data=RR[['dRGDP', 'dgcat2']].dropna())
print sm.OLS(y2,X2).fit().summary()

                            OLS Regression Results
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.046
Method:                 Least Squares   F-statistic:                     14.24
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           2.36e-11
Time:                        21:14:59   Log-Likelihood:                -2926.9
No. Observations:                1175   AIC:                             5864.
Df Residuals:                    1170   BIC:                             5889.
Df Model:                           4
========================================================================================
coef    std err          t      P>|t|      [95.0% Conf. Int.]
----------------------------------------------------------------------------------------
Intercept                4.1735      0.142     29.423      0.000         3.895     4.452
dgcat2[T.30-60%]        -1.0814      0.199     -5.431      0.000        -1.472    -0.691
dgcat2[T.60-90%]        -0.9869      0.251     -3.933      0.000        -1.479    -0.495
dgcat2[T.90-120%]       -1.7676      0.359     -4.929      0.000        -2.471    -1.064
dgcat2[T.Above 120%]    -2.6120      0.545     -4.796      0.000        -3.680    -1.543
==============================================================================
Omnibus:                      210.356   Durbin-Watson:                   1.388
Prob(Omnibus):                  0.000   Jarque-Bera (JB):             1756.317
Skew:                           0.570   Prob(JB):                         0.00
Kurtosis:                       8.880   Cond. No.                         7.10
==============================================================================


## Table 3 Corrected¶

In [7]:
## Country-Year average by debtgdp ("correct weights")
RR.dRGDP.groupby(RR.dgcat).mean()

Out[7]:
dgcat
0-30%        4.173523
30-60%       3.092145
60-90%       3.186575
Above 90%    2.167972
In [8]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat]).mean().unstack()

Out[8]:
dgcat 0-30% 30-60% 60-90% Above 90%
Country
Australia 3.205885 4.947205 4.042175 3.774250
Austria 5.207527 3.256526 -3.824000 NaN
Belgium NaN 4.191655 3.079868 2.566828
Denmark 3.518584 1.700034 2.391147 NaN
Finland 3.817029 2.418535 5.488887 NaN
France 5.058476 2.616159 3.019631 NaN
Germany 3.873759 0.875803 NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632
Ireland 4.209251 4.452167 3.950139 2.428571
Italy 5.352632 2.054284 1.771529 1.028900
Japan 7.331001 3.957143 1.008411 0.687258
Netherlands 4.082614 2.620772 1.070436 NaN
New Zealand 2.465556 2.889572 3.883683 2.574727
Norway 3.400122 5.108289 10.201270 NaN
Portugal 4.451419 3.549482 1.893899 NaN
Spain 1.549332 3.398669 4.156250 NaN
Sweden 3.567385 2.932237 2.665824 NaN
UK NaN 2.231213 2.522133 2.399096
US NaN 3.370208 3.264068 -1.988893
In [9]:
## Country-Year average by debtgdp ("correct weights") expanded categories
RR.dRGDP.groupby(RR.dgcat2).mean()

Out[9]:
dgcat2
0-30%         4.173523
30-60%        3.092145
60-90%        3.186575
90-120%       2.405934
Above 120%    1.561553
In [10]:
## Averaged Country averages by debtgdp ("equal weights")
RR.dRGDP.groupby([RR.Country, RR.dgcat2]).mean().unstack()

Out[10]:
dgcat2 0-30% 30-60% 60-90% 90-120% Above 120%
Country
Australia 3.205885 4.947205 4.042175 6.920201 2.987763
Austria 5.207527 3.256526 -3.824000 NaN NaN
Belgium NaN 4.191655 3.079868 2.702629 -0.692378
Canada 2.515704 3.525446 4.523574 4.544839 0.574341
Denmark 3.518584 1.700034 2.391147 NaN NaN
Finland 3.817029 2.418535 5.488887 NaN NaN
France 5.058476 2.616159 3.019631 NaN NaN
Germany 3.873759 0.875803 NaN NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632 NaN
Ireland 4.209251 4.452167 3.950139 2.428571 NaN
Italy 5.352632 2.054284 1.771529 1.028900 NaN
Japan 7.331001 3.957143 1.008411 1.359564 0.537857
Netherlands 4.082614 2.620772 1.070436 NaN NaN
New Zealand 2.465556 2.889572 3.883683 -2.256588 9.821699
Norway 3.400122 5.108289 10.201270 NaN NaN
Portugal 4.451419 3.549482 1.893899 NaN NaN
Spain 1.549332 3.398669 4.156250 NaN NaN
Sweden 3.567385 2.932237 2.665824 NaN NaN
UK NaN 2.231213 2.522133 3.303428 1.871568
US NaN 3.370208 3.264068 0.995529 -10.942159

## Selective treatment of early years¶

In [11]:
idx = (RR.Country == 'New Zealand') & (RR.Year < 1950) | (RR.Country == 'Australia') & (RR.Year < 1951) | (RR.Country == 'Canada') & (RR.Year < 1951)
RR_selective = RR[idx == False]
RR_selective.dRGDP.groupby(RR_selective.dgcat).mean()

Out[11]:
dgcat
0-30%        4.173523
30-60%       3.092145
60-90%       3.186575
Above 90%    1.919934

## Table 3 Weights,Exclusion¶

In [12]:
RR_selective.mean()

Out[12]:
Unnamed: 0    5.916408e+02
Year          1.979633e+03
Debt          1.621458e+07
RGDP          2.369173e+05
GDP           1.957611e+05
dRGDP         3.408270e+00
GDPI          5.034180e+01
GDP1          1.470725e+07
GDP2          1.824865e+07
RGDP1         1.425590e+07
RGDP2         3.072470e+07
GDPI1         5.589542e+02
GDPI2         8.690705e+01
Infl          5.632643e+00
Debt1         5.625405e+05
Debt2         1.050730e+05
Debtalt       1.006665e+07
GDP2alt       4.450790e+05
GDPalt        2.079659e+06
RGDP2alt      1.033331e+05
debtgdp       4.530375e+01
GDP3          7.689113e+04
GNI           5.156239e+08
lRGDP         2.351587e+05
lRGDP1        1.405853e+07
lRGDP2        3.047113e+07

## Table 3 Selective years exclusion¶

In [13]:
RR_selective.dRGDP.groupby([RR_selective.Country, RR_selective.dgcat]).mean().unstack()

Out[13]:
dgcat 0-30% 30-60% 60-90% Above 90%
Country
Australia 3.205885 4.947205 4.042175 NaN
Austria 5.207527 3.256526 -3.824000 NaN
Belgium NaN 4.191655 3.079868 2.566828
Denmark 3.518584 1.700034 2.391147 NaN
Finland 3.817029 2.418535 5.488887 NaN
France 5.058476 2.616159 3.019631 NaN
Germany 3.873759 0.875803 NaN NaN
Greece 4.001282 0.340200 2.696000 2.910632
Ireland 4.209251 4.452167 3.950139 2.428571
Italy 5.352632 2.054284 1.771529 1.028900
Japan 7.331001 3.957143 1.008411 0.687258
Netherlands 4.082614 2.620772 1.070436 NaN
New Zealand 2.465556 2.889572 3.883683 -7.635102
Norway 3.400122 5.108289 10.201270 NaN
Portugal 4.451419 3.549482 1.893899 NaN
Spain 1.549332 3.398669 4.156250 NaN
Sweden 3.567385 2.932237 2.665824 NaN
UK NaN 2.231213 2.522133 2.399096
US NaN 3.370208 3.264068 -1.988893

## And dropping because of spreadsheet error¶

In [14]:
drop = ["Australia","Austria","Belgium","Canada","Denmark"]
idx = [False if x in drop else True for x in RR_selective.Country]

Out[14]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155

## New Zealand transcription error¶

In [15]:
RR_selective_spreadsheet_transcription = RR_selective_spreadsheet.copy()

Out[15]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155
In [16]:
a = RR_selective_spreadsheet_transcription.Country

Out[16]:
dgcat
0-30%        4.236391
30-60%       2.958902
60-90%       3.160164
Above 90%    1.692155
In [17]:
published_means = RR_selective_spreadsheet_transcription.dRGDP.groupby([a,b]).mean().unstack()
published_means.ix['New Zealand', 'Above 90%'] = -7.9
published_means.mean()

Out[17]:
dgcat
0-30%        4.089220
30-60%       2.854316
60-90%       3.399440
Above 90%   -0.062062

## Medians¶

In [18]:
RR.dRGDP.groupby(RR.dgcat).median() # Correct, equal weight

Out[18]:
dgcat
0-30%        4.145376
30-60%       3.104629
60-90%       2.897829
Above 90%    2.335324
In [19]:
RR.dRGDP.groupby(RR.dgcat2).median() # Correct, expanded categories, equal weight

Out[19]:
dgcat2
0-30%         4.145376
30-60%        3.104629
60-90%        2.897829
90-120%       2.373340
Above 120%    2.039469

## Counts of years¶

In [20]:
RR.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

Out[20]:
dgcat
0-30%        426
30-60%       439
60-90%       200
Above 90%    110
In [21]:
RR_selective.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

Out[21]:
dgcat
0-30%        426
30-60%       439
60-90%       200
Above 90%     96
In [22]:
RR_selective_spreadsheet.Country.groupby([RR.Country, RR.dgcat]).size().unstack().sum()

Out[22]:
dgcat
0-30%        329
30-60%       324
60-90%       138
Above 90%     71

## Categorical scatterplot¶

In [23]:
labels = ["0-30%","30-60%","60-90%","Above 90%"]
dat = [np.array(RR.dRGDP[RR.dgcat==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)

Figure(480x320)

In [24]:
labels = ["0-30%","30-60%","60-90%","90-120%","Above 120%"]
dat = [np.array(RR.dRGDP[RR.dgcat2==x]) for x in labels]
print sm.graphics.violinplot(dat, labels=labels)

Figure(480x320)


## Country-Year average by debtgdp for more recent samples¶

In [25]:
years = range(1950, 2001, 10)
f = lambda x: (x, RR[RR.Year >= x].dRGDP.groupby(RR[RR.Year >= x].dgcat).mean())
[f(x) for x in years]

Out[25]:
[(1950,
dgcat
0-30%        4.135295
30-60%       2.980839
60-90%       3.100982
Above 90%    2.121852),
(1960,
dgcat
0-30%        3.895619
30-60%       2.909601
60-90%       2.779663
Above 90%    2.074064),
(1970,
dgcat
0-30%        3.145224
30-60%       2.644951
60-90%       2.559289
Above 90%    1.959229),
(1980,
dgcat
0-30%        2.541408
30-60%       2.451346
60-90%       2.435681
Above 90%    1.959229),
(1990,
dgcat
0-30%        2.669334
30-60%       2.403622
60-90%       2.457587
Above 90%    1.823201),
(2000,
dgcat
0-30%        2.747593
30-60%       1.881735
60-90%       1.290506
Above 90%    1.745087)]

# Lagged dependent variable¶

In [26]:
RR['dRGDP_lag'] = RR.dRGDP.groupby(RR.Country).apply(lambda x: x.shift())
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag']].dropna())
print sm.OLS(y,X).fit().summary()

                            OLS Regression Results
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.182
Method:                 Least Squares   F-statistic:                     63.84
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           8.90e-49
Time:                        21:15:00   Log-Likelihood:                -2723.0
No. Observations:                1155   AIC:                             5456.
Df Residuals:                    1150   BIC:                             5481.
Df Model:                           4
======================================================================================
coef    std err          t      P>|t|      [95.0% Conf. Int.]
--------------------------------------------------------------------------------------
Intercept              2.5045      0.173     14.461      0.000         2.165     2.844
dgcat[T.30-60%]       -0.6746      0.179     -3.778      0.000        -1.025    -0.324
dgcat[T.60-90%]       -0.5782      0.224     -2.585      0.010        -1.017    -0.139
dgcat[T.Above 90%]    -1.0539      0.287     -3.668      0.000        -1.618    -0.490
dRGDP_lag              0.3715      0.027     13.668      0.000         0.318     0.425
==============================================================================
Omnibus:                      111.755   Durbin-Watson:                   2.108
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              577.799
Skew:                          -0.272   Prob(JB):                    3.41e-126
Kurtosis:                       6.422   Cond. No.                         20.6
==============================================================================


# Fixed effects¶

In [27]:
y,X = patsy.dmatrices('dRGDP ~ dgcat + dRGDP_lag + Country', data=RR[['dRGDP', 'dgcat', 'dRGDP_lag', 'Country']].dropna())
print sm.OLS(y,X).fit().summary()

                            OLS Regression Results
==============================================================================
Dep. Variable:                  dRGDP   R-squared:                       0.195
Method:                 Least Squares   F-statistic:                     11.89
Date:                Thu, 18 Apr 2013   Prob (F-statistic):           1.84e-39
Time:                        21:15:00   Log-Likelihood:                -2713.7
No. Observations:                1155   AIC:                             5475.
Df Residuals:                    1131   BIC:                             5597.
Df Model:                          23
==========================================================================================
coef    std err          t      P>|t|      [95.0% Conf. Int.]
------------------------------------------------------------------------------------------
Intercept                  2.8787      0.355      8.118      0.000         2.183     3.574
dgcat[T.30-60%]           -0.8192      0.208     -3.943      0.000        -1.227    -0.412
dgcat[T.60-90%]           -0.8052      0.258     -3.115      0.002        -1.312    -0.298
dgcat[T.Above 90%]        -1.2941      0.339     -3.817      0.000        -1.959    -0.629
Country[T.Austria]        -0.2098      0.463     -0.454      0.650        -1.117     0.698
Country[T.Belgium]        -0.0507      0.480     -0.106      0.916        -0.993     0.891
Country[T.Canada]          0.3403      0.469      0.725      0.468        -0.580     1.261
Country[T.Denmark]        -0.8121      0.476     -1.707      0.088        -1.746     0.121
Country[T.Finland]        -0.4436      0.458     -0.969      0.333        -1.342     0.455
Country[T.France]         -0.0764      0.479     -0.159      0.873        -1.017     0.864
Country[T.Germany]        -0.7266      0.469     -1.548      0.122        -1.648     0.195
Country[T.Greece]         -0.3446      0.538     -0.640      0.522        -1.401     0.711
Country[T.Ireland]         0.3872      0.471      0.822      0.411        -0.537     1.312
Country[T.Italy]          -0.3490      0.469     -0.743      0.457        -1.270     0.572
Country[T.Japan]           0.4994      0.482      1.036      0.301        -0.447     1.445
Country[T.Netherlands]    -0.4065      0.486     -0.837      0.403        -1.360     0.547
Country[T.New Zealand]    -0.2624      0.465     -0.565      0.572        -1.174     0.649
Country[T.Norway]         -0.3741      0.459     -0.814      0.416        -1.276     0.527
Country[T.Portugal]       -0.0705      0.469     -0.150      0.881        -0.992     0.851
Country[T.Spain]          -0.1694      0.528     -0.321      0.748        -1.205     0.866
Country[T.Sweden]         -0.4845      0.462     -1.050      0.294        -1.390     0.421
Country[T.UK]             -0.3898      0.474     -0.823      0.411        -1.319     0.540
Country[T.US]              0.0911      0.471      0.193      0.847        -0.833     1.015
dRGDP_lag                  0.3515      0.028     12.701      0.000         0.297     0.406
==============================================================================
Omnibus:                      111.745   Durbin-Watson:                   2.106
Prob(Omnibus):                  0.000   Jarque-Bera (JB):              575.683
Skew:                          -0.274   Prob(JB):                    9.82e-126
Kurtosis:                       6.415   Cond. No.                         91.7
==============================================================================

In [27]: