#!/usr/bin/env python # coding: utf-8 # # EDS Case Study # # Exploring changes in political beliefs # # Allen Downey # # [MIT License](https://en.wikipedia.org/wiki/MIT_License) # In[1]: import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from utils import decorate # ## Loading and validation # # In[2]: gss = pd.read_hdf('eds.gss.hdf5', 'gss') gss.shape # In[3]: def replace_invalid(df): # recode so depends is in the middle d = {1:1, 2:0, 3:0.5} df.trust.replace(d, inplace=True) df.helpful.replace(d, inplace=True) df.racpres.replace(d, inplace=True) df.owngun.replace(d, inplace=True) df.fepol.replace(d, inplace=True) df.sexeduc.replace(d, inplace=True) d = {1:0, 2:1, 3:0.5} df.fair.replace(d, inplace=True) replace_invalid(gss) # In[4]: vars = ['year', 'id_', 'agewed', 'divorce', 'sibs', 'childs', 'age', 'educ', 'paeduc', 'maeduc', 'speduc', 'degree', 'padeg', 'madeg', 'spdeg', 'sex', 'race', 'res16', 'reg16', 'srcbelt', 'partyid', 'pres04', 'pres08', 'pres12', 'polviews', 'natspac', 'natenvir', 'natheal', 'natcity', 'natcrime', 'natdrug', 'nateduc', 'natrace', 'natarms', 'nataid', 'natfare', 'spkath', 'colath', 'libath', 'spkhomo', 'colhomo', 'libhomo', 'cappun', 'gunlaw', 'grass', 'relig', 'fund', 'attend', 'reliten', 'postlife', 'pray', 'relig16', 'fund16', 'sprel16', 'prayer', 'bible', 'racmar', 'racpres', 'affrmact', 'happy', 'hapmar', 'health', 'life', 'helpful', 'fair', 'trust', 'conclerg', 'coneduc', 'confed', 'conpress', 'conjudge', 'conlegis', 'conarmy', 'satjob', 'class_', 'satfin', 'finrela', 'union_', 'fepol', 'abany', 'chldidel', 'sexeduc', 'premarsx', 'xmarsex', 'homosex', 'spanking', 'fear', 'owngun', 'pistol', 'hunt', 'phone', 'memchurh', 'realinc', 'cohort', 'marcohrt', 'ballot', 'wtssall', 'adults', 'compuse', 'databank', 'wtssnr', 'spkrac', 'spkcom', 'spkmil', 'spkmslm'] # ### Validating `polviews` # # `polviews` contains responses to the [following question](https://gssdataexplorer.norc.org/projects/52787/variables/178/vshow) # # > We hear a lot of talk these days about liberals and conservatives. # I'm going to show you a seven-point scale on which the political views that people might hold are arranged from extremely liberal--point 1--to extremely conservative--point 7. Where would you place yourself on this scale? # # The following function takes a variable column and returns a series of values and their frequencies. # In[8]: def values(series): return series.value_counts().sort_index() # In[9]: values(gss['polviews']) # In[11]: gss74 = gss['year']==1974 gss74.sum() # In[13]: values(gss.loc[gss74, 'polviews']) # In[14]: by_year = gss.groupby('year') by_year # In[20]: series = by_year['polviews'].mean() series.plot(label='polviews') decorate(ylabel='Mean (7 point scale)', title='Mean of polviews') # In[30]: series = by_year['polviews'].std() series.plot(color='C1', label='polviews') decorate(ylabel='Standard deviation (7 point scale)', title='Standard deviation of polviews') # ### Local regression # In[31]: from statsmodels.nonparametric.smoothers_lowess import lowess def make_lowess(series): endog = series.values exog = series.index.values smooth = lowess(endog, exog) index, data = np.transpose(smooth) return pd.Series(data, index=index) # In[32]: palette = sns.color_palette('muted', 5) sns.palplot(palette) # In[33]: colors = dict(Conservative=palette[3], Moderate=palette[4], Liberal=palette[0]) # In[47]: def plot_series_lowess(series, color): series.plot(lw=0, marker='o', color=color, alpha=0.5) smooth = make_lowess(series) smooth.plot(label='_', color=color) # In[35]: series = by_year['polviews'].mean() plot_series_lowess(series, 'C0') decorate(ylabel='Mean (7 point scale)', title='Mean of polviews') # In[36]: series = by_year['polviews'].std() plot_series_lowess(series, color='C1') decorate(ylabel='Standard deviation (7 point scale)', title='Standard deviation of polviews') # ### 3-point scale # # To make it easier to visualize groups, I'm going to lump the 7-point scale into a 3-point scale. # # With this scale, there are roughly the same number of people in each group. # In[39]: # replace 7 point scale with 3 point scale d = {1:'Liberal', 2:'Liberal', 3:'Liberal', 4:'Moderate', 5:'Conservative', 6:'Conservative', 7:'Conservative'} gss['polviews3'] = gss.polviews.replace(d) values(gss['polviews3']) # In[42]: def count_by_year(gss, varname): """ """ grouped = gss.groupby([varname, 'year']) count = grouped[varname].count().unstack(level=0) # note: the following is not ideal, because it does not # distinguish 0 from NA count = count.replace(0, np.nan).dropna() return count polviews3_count = count_by_year(gss, 'polviews3') polviews3_count.head() # In[43]: total = polviews3_count.sum(axis=1) total.head() # In[45]: polviews3_prop = polviews3_count.div(total, axis=0) polviews3_prop.head() # In[48]: def plot_columns_lowess(df, columns, colors): for col in columns: series = df[col] plot_series_lowess(series, colors[col]) # In[49]: columns = ['Moderate', 'Liberal', 'Conservative'] plot_columns_lowess(polviews3_prop, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title='Fraction of people with each political view', xlim=[1970, 2020]) # In[ ]: # ## Trust # In[ ]: trust_mean = gss.groupby('year').trust.mean().dropna() trust_mean.head() # In[ ]: plot_series_lowess(trust_mean, 'C1') decorate(xlabel='Year', ylabel='Fraction', title='Can people be trusted', xlim=[1970, 2020]) # In[ ]: trust_by_polviews3 = grouped.trust.mean().dropna().unstack(level=0) trust_by_polviews3.head() # In[ ]: columns = ['Liberal', 'Conservative', 'Moderate'] plot_columns_lowess(trust_by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title='Can people be trusted?', xlim=[1970, 2020]) # ## Fair # # https://gssdataexplorer.norc.org/projects/52787/variables/440/vshow # # Do you think most people would try to take advantage of you if they got a chance, or would they try to be fair? # # In[ ]: fair_mean = gss.groupby('year')['fair'].mean().dropna() plot_series_lowess(fair_mean, 'C1') decorate(xlabel='Year', ylabel='Proportion', title='Would people try to be fair?', xlim=[1970, 2020]) # In[ ]: fair_by_polviews3 = grouped['fair'].mean().dropna().unstack(level=0) fair_by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(fair_by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title='Would people try to be fair?', xlim=[1970, 2020]) # In[ ]: # ## helpful # # https://gssdataexplorer.norc.org/projects/52787/variables/439/vshow # # Would you say that most of the time people try to be helpful, or that they are mostly just looking out for themselves? # # In[ ]: helpful_mean = gss.groupby('year')['helpful'].mean().dropna() plot_series_lowess(helpful_mean, 'C1') decorate(xlabel='Year', ylabel='Proportion', title='People try to be helpful?', xlim=[1970, 2020]) # In[ ]: helpful_by_polviews3 = grouped['helpful'].mean().dropna().unstack(level=0) helpful_by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(helpful_by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title='People try to be helpful?', xlim=[1970, 2020]) # In[ ]: # In[ ]: # ## homosex # # https://gssdataexplorer.norc.org/projects/52787/variables/634/vshow # # What about sexual relations between two adults of the same sex--do you think it is always wrong, almost always wrong, wrong only sometimes, or not wrong at all? # # In[ ]: varname = 'homosex' title='Sexual relations between adults of the same sex' mean = gss.groupby('year')[varname].mean().dropna() plot_series_lowess(mean, 'C1') decorate(xlabel='Year', ylabel='Approval', title=title, xlim=[1970, 2020]) # In[ ]: by_polviews3 = grouped[varname].mean().dropna().unstack(level=0) by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title=title, xlim=[1970, 2020]) # In[ ]: # ## racpres # # https://gssdataexplorer.norc.org/projects/52787/variables/400/vshow # # # If your party nominated a (Negro/Black/African-American) for President, would you vote for him if he were qualified for the job? # # # In[ ]: varname = 'racpres' title='Would you vote for a black person for President' mean = gss.groupby('year')[varname].mean().dropna() plot_series_lowess(mean, 'C1') decorate(xlabel='Year', ylabel='Approval', title=title, xlim=[1970, 2020]) # In[ ]: by_polviews3 = grouped[varname].mean().dropna().unstack(level=0) by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title=title, xlim=[1970, 2020]) # In[ ]: # ## owngun # # https://gssdataexplorer.norc.org/projects/52787/variables/679/vshow # # Do you happen to have in your home (IF HOUSE: or garage) any guns or revolvers? # # # # In[ ]: varname = 'owngun' title='Have gun in home' mean = gss.groupby('year')[varname].mean().dropna() plot_series_lowess(mean, 'C1') decorate(xlabel='Year', ylabel='Approval', title=title, xlim=[1970, 2020]) # In[ ]: by_polviews3 = grouped[varname].mean().dropna().unstack(level=0) by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title=title, xlim=[1970, 2020]) # In[ ]: # ## fepol # # https://gssdataexplorer.norc.org/projects/52787/variables/591/vshow # # Tell me if you agree or disagree with this statement: Most men are better suited emotionally for politics than are most women. # # In[ ]: varname = 'fepol' title='Men better suited for politics' mean = gss.groupby('year')[varname].mean().dropna() plot_series_lowess(mean, 'C1') decorate(xlabel='Year', ylabel='Approval', title=title, xlim=[1970, 2020]) # In[ ]: by_polviews3 = grouped[varname].mean().dropna().unstack(level=0) by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Proportion', title=title, xlim=[1970, 2020]) # In[ ]: # ## sexeduc # # https://gssdataexplorer.norc.org/projects/52787/variables/626/vshow # # Would you be for or against sex education in the public schools? # # # # In[ ]: varname = 'sexeduc' title='Favor sex education in the public schools?' mean = gss.groupby('year')[varname].mean().dropna() plot_series_lowess(mean, 'C1') decorate(xlabel='Year', ylabel='Favor', title=title, xlim=[1970, 2020]) # In[ ]: by_polviews3 = grouped[varname].mean().dropna().unstack(level=0) by_polviews3.head() # In[ ]: columns = ['Liberal', 'Moderate', 'Conservative'] plot_columns_lowess(by_polviews3, columns, colors) decorate(xlabel='Year', ylabel='Favor', title=title, xlim=[1970, 2020]) # In[ ]: