import pandas as pd
import numpy as np
import microdf as mdf
import matplotlib.pyplot as plt
import matplotlib as mpl
import os
ASEC_F = '~/data/asec/2019/pppub19.csv'
SPM_COLS = ['povthreshold', 'resources', 'poor', 'numper', 'numkids',
'numadults', 'id', 'weight']
OTHER_COLS = ['A_AGE', 'MARSUPWT', 'UC_VAL']
cols = ['SPM_' + i.upper() for i in SPM_COLS] + OTHER_COLS
raw = pd.read_csv(ASEC_F, usecols=cols)
df = raw.copy(deep=True)
df.columns = map(str.lower, df.columns)
Add true weight by dividing by 100.
df['w'] = df.marsupwt / 100.
df['spm_w'] = df.spm_weight / 100.
mdf.weighted_sum(df, 'uc_val', 'w') / 1e9
15.499327626209999
UI_BONUS = 600 * 17
df['ui_bonus'] = np.where(df.uc_val > 0, UI_BONUS, 0)
spmu = df.groupby('spm_id')[['ui_bonus']].sum()
spmu_raw = df[['spm_resources', 'spm_povthreshold', 'spm_id', 'spm_numper',
'spm_w']].drop_duplicates().set_index('spm_id')
spmu = spmu.join(spmu_raw)
spmu
ui_bonus | spm_resources | spm_povthreshold | spm_numper | spm_w | |
---|---|---|---|---|---|
spm_id | |||||
4001 | 0 | 12961 | 10080 | 1 | 2031.67 |
6001 | 0 | 13292 | 11483 | 1 | 1232.04 |
7001 | 0 | 10232 | 11483 | 1 | 1209.17 |
8001 | 0 | 20084 | 16190 | 2 | 1146.23 |
13001 | 0 | 21534 | 11546 | 1 | 1587.98 |
... | ... | ... | ... | ... | ... |
94629001 | 0 | 70975 | 17526 | 1 | 444.01 |
94630001 | 0 | 28273 | 20304 | 2 | 412.47 |
94631001 | 0 | 40796 | 46249 | 6 | 515.55 |
94632001 | 10200 | 48348 | 54069 | 6 | 391.67 |
94633001 | 0 | 35551 | 24711 | 2 | 448.29 |
71517 rows × 5 columns
budget = mdf.weighted_sum(spmu, 'ui_bonus', 'spm_w')
budget / 1e9
32.08249503
ubi = budget / (spmu.spm_numper * spmu.spm_w).sum()
ubi
101.28110034605258
spmu['ubi'] = ubi * spmu.spm_numper
mdf.weighted_sum(spmu, 'ubi', 'spm_w') - budget
-3.814697265625e-06
Add new resource columns and merge back to person level dataset.
spmu['spm_resources_ui'] = spmu.spm_resources + spmu.ui_bonus
spmu['spm_resources_ubi'] = spmu.spm_resources + spmu.ubi
df = df.merge(spmu[['spm_resources_ui', 'spm_resources_ubi']], on='spm_id')
df['spm_poor_ui'] = np.where(df.spm_resources_ui < df.spm_povthreshold, 1, 0)
df['spm_poor_ubi'] = np.where(df.spm_resources_ubi < df.spm_povthreshold, 1,
0)
pov_rate_base = mdf.weighted_mean(df, 'spm_poor', 'w')
pov_rate_ui = mdf.weighted_mean(df, 'spm_poor_ui', 'w')
pov_rate_ubi = mdf.weighted_mean(df, 'spm_poor_ubi', 'w')
(pov_rate_base - pov_rate_ubi) / (pov_rate_base - pov_rate_ui)
2.3919225609540185
Poverty gap.
def pov_gap(resource_col):
pov_gap = np.maximum(spmu.spm_povthreshold - spmu[resource_col], 0)
return (pov_gap * spmu.spm_w).sum() / 1e9
pov_gap_base = pov_gap('spm_resources')
pov_gap_ui = pov_gap('spm_resources_ui')
pov_gap_ubi = pov_gap('spm_resources_ubi')
(pov_gap_base - pov_gap_ubi) / (pov_gap_base - pov_gap_ui)
3.1953888839221003
def pov_rate(df):
return mdf.weighted_mean(df, 'spm_poor', 'w')
pov_rate(df[df.uc_val > 0])
0.07913886915242613
pov_rate(df[df.uc_val == 0])
0.12781609087159032
Please don’t resent our lowest paid workers in America for getting $600.
spmu['pov_ratio'] = spmu.spm_resources / spmu.spm_povthreshold
spmu_ui = spmu[spmu.ui_bonus > 0].copy(deep=True)
spmu_noui = spmu[spmu.ui_bonus == 0].copy(deep=True)
mdf.weighted_median(spmu, 'pov_ratio', 'spm_w')
2.349907929357153
mdf.weighted_median(spmu_ui, 'pov_ratio', 'spm_w')
2.421105938846996
mdf.weighted_median(spmu_noui, 'pov_ratio', 'spm_w')
2.348263751046827
Highly significant (2nd element is p value).
from statsmodels.stats import weightstats
weightstats.ttest_ind(spmu_ui.pov_ratio, spmu_noui.pov_ratio,
weights=(spmu_ui.spm_w, spmu_noui.spm_w))
(-108.25803113968871, 0.0, 135408586.06)