Last time we explored groupby operations in Pandas using Census SF1 data. Today we continue with our Census data exploration, turning to the Public Use Microdata, or PUMS. These are much larger files and at the individual person and household level. They are widely used to do analysis of the demographic and social characteristics of the population, so provide a good opportunity to explore these data using the methods we have been learning: filtering, string manipulation, groupby operations and others.
%matplotlib inline
import pandas as pd
import pylab as P
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.cm as cm, matplotlib.font_manager as fm
sns.set(style="darkgrid")
(with credits to Aksel Olsen, from whom a substantial amount of the material for today's session is adapted)
Data source: Census ACS 1-year PUMS data, 2012 release
The 2012 PUMS household and person tables for California were downloaded from:
Persons: census ftp
Households: census ftp
Note - these are large files and take time to download and to load into memory. If you have any trouble downloading from the Census ftp site using the links above, these two files are also in our files directory on bCourses.
pums_h = pd.read_csv('data/csv_hca/ss12hca.csv')
#pums_h=pd.read_csv('data/pums_h_sf_2012.csv')
pums_h.index=pums_h.SERIALNO
pums_h.head()
RT | SERIALNO | DIVISION | PUMA | REGION | ST | ADJHSG | ADJINC | WGTP | NP | ... | wgtp71 | wgtp72 | wgtp73 | wgtp74 | wgtp75 | wgtp76 | wgtp77 | wgtp78 | wgtp79 | wgtp80 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
SERIALNO | |||||||||||||||||||||
11 | H | 11 | 9 | 8508 | 4 | 6 | 1000000 | 1010207 | 110 | 2 | ... | 176 | 178 | 120 | 99 | 173 | 33 | 129 | 35 | 34 | 130 |
19 | H | 19 | 9 | 7502 | 4 | 6 | 1000000 | 1010207 | 180 | 0 | ... | 308 | 184 | 288 | 52 | 185 | 337 | 288 | 188 | 190 | 197 |
22 | H | 22 | 9 | 8105 | 4 | 6 | 1000000 | 1010207 | 75 | 1 | ... | 78 | 127 | 125 | 110 | 72 | 20 | 73 | 135 | 83 | 143 |
31 | H | 31 | 9 | 1305 | 4 | 6 | 1000000 | 1010207 | 76 | 2 | ... | 88 | 24 | 75 | 76 | 131 | 140 | 20 | 29 | 130 | 141 |
40 | H | 40 | 9 | 2901 | 4 | 6 | 1000000 | 1010207 | 12 | 2 | ... | 12 | 24 | 20 | 21 | 12 | 10 | 12 | 13 | 12 | 3 |
5 rows × 206 columns
print(list(pums_h.columns))
['RT', 'SERIALNO', 'DIVISION', 'PUMA', 'REGION', 'ST', 'ADJHSG', 'ADJINC', 'WGTP', 'NP', 'TYPE', 'ACR', 'AGS', 'BATH', 'BDSP', 'BLD', 'BUS', 'CONP', 'ELEP', 'FS', 'FULP', 'GASP', 'HFL', 'INSP', 'MHP', 'MRGI', 'MRGP', 'MRGT', 'MRGX', 'REFR', 'RMSP', 'RNTM', 'RNTP', 'RWAT', 'SINK', 'SMP', 'STOV', 'TEL', 'TEN', 'TOIL', 'VACS', 'VALP', 'VEH', 'WATP', 'YBL', 'FES', 'FFINCP', 'FGRNTP', 'FHINCP', 'FINCP', 'FPARC', 'FSMOCP', 'GRNTP', 'GRPIP', 'HHL', 'HHT', 'HINCP', 'HUGCL', 'HUPAC', 'HUPAOC', 'HUPARC', 'KIT', 'LNGI', 'MULTG', 'MV', 'NOC', 'NPF', 'NPP', 'NR', 'NRC', 'OCPIP', 'PARTNER', 'PLM', 'PSF', 'R18', 'R60', 'R65', 'RESMODE', 'SMOCP', 'SMX', 'SRNT', 'SVAL', 'TAXP', 'WIF', 'WKEXREL', 'WORKSTAT', 'FACRP', 'FAGSP', 'FBATHP', 'FBDSP', 'FBLDP', 'FBUSP', 'FCONP', 'FELEP', 'FFSP', 'FFULP', 'FGASP', 'FHFLP', 'FINSP', 'FKITP', 'FMHP', 'FMRGIP', 'FMRGP', 'FMRGTP', 'FMRGXP', 'FMVP', 'FPLMP', 'FREFRP', 'FRMSP', 'FRNTMP', 'FRNTP', 'FRWATP', 'FSINKP', 'FSMP', 'FSMXHP', 'FSMXSP', 'FSTOVP', 'FTAXP', 'FTELP', 'FTENP', 'FTOILP', 'FVACSP', 'FVALP', 'FVEHP', 'FWATP', 'FYBLP', 'wgtp1', 'wgtp2', 'wgtp3', 'wgtp4', 'wgtp5', 'wgtp6', 'wgtp7', 'wgtp8', 'wgtp9', 'wgtp10', 'wgtp11', 'wgtp12', 'wgtp13', 'wgtp14', 'wgtp15', 'wgtp16', 'wgtp17', 'wgtp18', 'wgtp19', 'wgtp20', 'wgtp21', 'wgtp22', 'wgtp23', 'wgtp24', 'wgtp25', 'wgtp26', 'wgtp27', 'wgtp28', 'wgtp29', 'wgtp30', 'wgtp31', 'wgtp32', 'wgtp33', 'wgtp34', 'wgtp35', 'wgtp36', 'wgtp37', 'wgtp38', 'wgtp39', 'wgtp40', 'wgtp41', 'wgtp42', 'wgtp43', 'wgtp44', 'wgtp45', 'wgtp46', 'wgtp47', 'wgtp48', 'wgtp49', 'wgtp50', 'wgtp51', 'wgtp52', 'wgtp53', 'wgtp54', 'wgtp55', 'wgtp56', 'wgtp57', 'wgtp58', 'wgtp59', 'wgtp60', 'wgtp61', 'wgtp62', 'wgtp63', 'wgtp64', 'wgtp65', 'wgtp66', 'wgtp67', 'wgtp68', 'wgtp69', 'wgtp70', 'wgtp71', 'wgtp72', 'wgtp73', 'wgtp74', 'wgtp75', 'wgtp76', 'wgtp77', 'wgtp78', 'wgtp79', 'wgtp80']
#pums_p=pd.read_csv('data/pums_p_sf_2012.csv')
pums_p = pd.read_csv('data/csv_pca/ss12pca.csv')
pums_p.head()
RT | SERIALNO | SPORDER | PUMA | ST | ADJINC | PWGTP | AGEP | CIT | CITWP | ... | pwgtp71 | pwgtp72 | pwgtp73 | pwgtp74 | pwgtp75 | pwgtp76 | pwgtp77 | pwgtp78 | pwgtp79 | pwgtp80 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | P | 11 | 1 | 8508 | 6 | 1010207 | 109 | 63 | 1 | NaN | ... | 176 | 178 | 120 | 100 | 173 | 32 | 129 | 35 | 34 | 129 |
1 | P | 11 | 2 | 8508 | 6 | 1010207 | 117 | 60 | 1 | NaN | ... | 224 | 192 | 124 | 106 | 161 | 36 | 132 | 30 | 35 | 137 |
2 | P | 22 | 1 | 8105 | 6 | 1010207 | 75 | 79 | 1 | NaN | ... | 79 | 127 | 125 | 110 | 72 | 20 | 72 | 134 | 83 | 143 |
3 | P | 31 | 1 | 1305 | 6 | 1010207 | 76 | 49 | 1 | NaN | ... | 88 | 24 | 75 | 75 | 131 | 139 | 19 | 28 | 131 | 141 |
4 | P | 31 | 2 | 1305 | 6 | 1010207 | 98 | 48 | 1 | NaN | ... | 129 | 26 | 81 | 93 | 179 | 173 | 25 | 41 | 164 | 198 |
5 rows × 286 columns
print(list(pums_p.columns))
['RT', 'SERIALNO', 'SPORDER', 'PUMA', 'ST', 'ADJINC', 'PWGTP', 'AGEP', 'CIT', 'CITWP', 'COW', 'DDRS', 'DEAR', 'DEYE', 'DOUT', 'DPHY', 'DRAT', 'DRATX', 'DREM', 'ENG', 'FER', 'GCL', 'GCM', 'GCR', 'HINS1', 'HINS2', 'HINS3', 'HINS4', 'HINS5', 'HINS6', 'HINS7', 'INTP', 'JWMNP', 'JWRIP', 'JWTR', 'LANX', 'MAR', 'MARHD', 'MARHM', 'MARHT', 'MARHW', 'MARHYP', 'MIG', 'MIL', 'MLPA', 'MLPB', 'MLPC', 'MLPD', 'MLPE', 'MLPF', 'MLPG', 'MLPH', 'MLPI', 'MLPJ', 'MLPK', 'NWAB', 'NWAV', 'NWLA', 'NWLK', 'NWRE', 'OIP', 'PAP', 'RELP', 'RETP', 'SCH', 'SCHG', 'SCHL', 'SEMP', 'SEX', 'SSIP', 'SSP', 'WAGP', 'WKHP', 'WKL', 'WKW', 'WRK', 'YOEP', 'ANC', 'ANC1P', 'ANC2P', 'DECADE', 'DIS', 'DRIVESP', 'ESP', 'ESR', 'FOD1P', 'FOD2P', 'HICOV', 'HISP', 'INDP', 'JWAP', 'JWDP', 'LANP', 'MIGPUMA', 'MIGSP', 'MSP', 'NAICSP', 'NATIVITY', 'NOP', 'OC', 'OCCP', 'PAOC', 'PERNP', 'PINCP', 'POBP', 'POVPIP', 'POWPUMA', 'POWSP', 'PRIVCOV', 'PUBCOV', 'QTRBIR', 'RAC1P', 'RAC2P', 'RAC3P', 'RACAIAN', 'RACASN', 'RACBLK', 'RACNH', 'RACNUM', 'RACPI', 'RACSOR', 'RACWHT', 'RC', 'SCIENGP', 'SCIENGRLP', 'SFN', 'SFR', 'SOCP', 'SSPA', 'VPS', 'WAOB', 'FAGEP', 'FANCP', 'FCITP', 'FCITWP', 'FCOWP', 'FDDRSP', 'FDEARP', 'FDEYEP', 'FDISP', 'FDOUTP', 'FDPHYP', 'FDRATP', 'FDRATXP', 'FDREMP', 'FENGP', 'FESRP', 'FFERP', 'FFODP', 'FGCLP', 'FGCMP', 'FGCRP', 'FHINS1P', 'FHINS2P', 'FHINS3C', 'FHINS3P', 'FHINS4C', 'FHINS4P', 'FHINS5C', 'FHINS5P', 'FHINS6P', 'FHINS7P', 'FHISP', 'FINDP', 'FINTP', 'FJWDP', 'FJWMNP', 'FJWRIP', 'FJWTRP', 'FLANP', 'FLANXP', 'FMARHDP', 'FMARHMP', 'FMARHTP', 'FMARHWP', 'FMARHYP', 'FMARP', 'FMIGP', 'FMIGSP', 'FMILPP', 'FMILSP', 'FOCCP', 'FOIP', 'FPAP', 'FPERNP', 'FPINCP', 'FPOBP', 'FPOWSP', 'FPRIVCOVP', 'FPUBCOVP', 'FRACP', 'FRELP', 'FRETP', 'FSCHGP', 'FSCHLP', 'FSCHP', 'FSEMP', 'FSEXP', 'FSSIP', 'FSSP', 'FWAGP', 'FWKHP', 'FWKLP', 'FWKWP', 'FWRKP', 'FYOEP', 'pwgtp1', 'pwgtp2', 'pwgtp3', 'pwgtp4', 'pwgtp5', 'pwgtp6', 'pwgtp7', 'pwgtp8', 'pwgtp9', 'pwgtp10', 'pwgtp11', 'pwgtp12', 'pwgtp13', 'pwgtp14', 'pwgtp15', 'pwgtp16', 'pwgtp17', 'pwgtp18', 'pwgtp19', 'pwgtp20', 'pwgtp21', 'pwgtp22', 'pwgtp23', 'pwgtp24', 'pwgtp25', 'pwgtp26', 'pwgtp27', 'pwgtp28', 'pwgtp29', 'pwgtp30', 'pwgtp31', 'pwgtp32', 'pwgtp33', 'pwgtp34', 'pwgtp35', 'pwgtp36', 'pwgtp37', 'pwgtp38', 'pwgtp39', 'pwgtp40', 'pwgtp41', 'pwgtp42', 'pwgtp43', 'pwgtp44', 'pwgtp45', 'pwgtp46', 'pwgtp47', 'pwgtp48', 'pwgtp49', 'pwgtp50', 'pwgtp51', 'pwgtp52', 'pwgtp53', 'pwgtp54', 'pwgtp55', 'pwgtp56', 'pwgtp57', 'pwgtp58', 'pwgtp59', 'pwgtp60', 'pwgtp61', 'pwgtp62', 'pwgtp63', 'pwgtp64', 'pwgtp65', 'pwgtp66', 'pwgtp67', 'pwgtp68', 'pwgtp69', 'pwgtp70', 'pwgtp71', 'pwgtp72', 'pwgtp73', 'pwgtp74', 'pwgtp75', 'pwgtp76', 'pwgtp77', 'pwgtp78', 'pwgtp79', 'pwgtp80']
print("there are {} observations of {} variables in the housing table ".format(pums_h.shape[0],pums_h.shape[1]))
print("there are {} observations of {} variables in the population table ".format(pums_p.shape[0],pums_p.shape[1]))
there are 153115 observations of 206 variables in the housing table there are 368047 observations of 286 variables in the population table
Per the Census Bureau's quick facts sheet, there were about 38 million people and 13.8 million housing units in California in 2012.
Thus, we have:
Let's do a few statistics--how many men and women in the Golden State? To answer this, we first need the code book, which we will need regularly for these types of exercises. Which table do we need?
We need the population table. As ever, refer to the data dictionary. Search for gender, male, female, or sex and see what appears.
pums_p.SEX
0 1 1 2 2 2 3 1 4 2 5 2 6 2 7 2 8 1 9 1 10 1 11 1 12 2 13 1 14 1 15 1 16 2 17 2 18 2 19 1 20 1 21 2 22 2 23 2 24 1 25 1 26 2 27 2 28 1 29 1 .. 368017 1 368018 2 368019 2 368020 1 368021 1 368022 1 368023 2 368024 2 368025 1 368026 2 368027 2 368028 2 368029 2 368030 1 368031 1 368032 2 368033 1 368034 1 368035 2 368036 1 368037 2 368038 1 368039 2 368040 1 368041 1 368042 2 368043 1 368044 2 368045 1 368046 2 Name: SEX, Length: 368047, dtype: int64
pums_p.SEX.value_counts() #.map({'1':"Male",'2':"Female"})
2 NaN 1 NaN Name: SEX, dtype: object
So, there are 181,000 1's and 186,000 2's. Done?
No, we need to remember that these records have sample weights associated with them, allowing us to make population estimates. Think of the weights as roughly inversely proportional to the sampling probability. They should hover around 100, given the sample rate is around 1%. But some groups are much more likely to respond to surveys than others. The Census Bureau employs such information when designing the survey and sample rates for specific sub-groups.
To get population estimates, we typically need to sum the weights.
pop_by_gender = pums_p.groupby(['SEX'])['PWGTP'].agg(['sum'])
pop_by_gender
sum | |
---|---|
SEX | |
1 | 18910864 |
2 | 19130566 |
pop_by_gender.plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x1197c8278>
We want to keep just the Bay Area records for now. So what geographic identifiers do we have in the PUMS files?
bayareafips ={'001':'Alameda', '013':'Contra Costa', '041':'Marin', '055':'Napa', '075':'San Francisco', '081':'San Mateo', '085':'Santa Clara', '095':'Sonoma', '097':'Solano'}
bayareafips.keys()
dict_keys(['085', '013', '055', '041', '001', '097', '075', '081', '095'])
pums_p.PUMA.unique().shape
(265,)
(Incidentally, since there are 265 PUMA areas in the Bay Area, it means they easily satisfy the 100,000 population per PUMA area required for the Census confidentiality conditions)
To know which PUMAS are in which counties, we need to get a correspondence. Or we can cheat--SF PUMAS provided below.
sfPUMAS = [k for k in range(7501,7508)]
sfPUMAS
[7501, 7502, 7503, 7504, 7505, 7506, 7507]
pums_p_sf=pums_p.loc[pums_p.PUMA.isin(sfPUMAS)]
pums_p_sf.shape
(7311, 286)
pums_h_sf=pums_h.loc[pums_h.PUMA.isin(sfPUMAS)]
pums_h_sf.shape
(3586, 206)
Down to several thousand records now. How many records in each PUMA?
## records per puma
pums_p_sf.groupby(['PUMA'])['PWGTP'].agg(['size'])
size | |
---|---|
PUMA | |
7501 | 1231 |
7502 | 896 |
7503 | 971 |
7504 | 955 |
7505 | 969 |
7506 | 1063 |
7507 | 1226 |
What is the 2012 population of San Francisco based on these data?
sf_pop = pums_p_sf.PWGTP.sum()
sf_pop
824855
Data comes with numeric codes. Let's assign labels to a few as well as create some dummies.
PUMS variables are not exposed in the census API (yet, at least). Instead, grab variable labels--pasted directly from PDF data dictionary.
Wrapper here reads the pasted strings, cleans them a bit and tries to stuff into a dictionary.
def map_value_labels(codes_pasted):
code = []
for item in codes_pasted.strip('\n').replace(' .',',').split('\n'):
id = int(item.split(',')[0])
val = item.split(',')[1]
code.append([id,val])
return dict(code)
def assign_value_labels(df,value_map,var):
target = var+'L'
if target in df.columns:
df[target].replace(df[var].map(value_map))
else:
df[target] = df[var].map(value_map)
print(df[target].value_counts())
Paste the codes, store in a variable as a string.
codessex ="""
1 .Male
2 .Female
"""
## we ship the blank category here--Ideally fix, but currently most vars are loaded as integers, so 'bb' won't work here
## RACE
## note that these are not actually verbatim categories from the code book. Instead, this collapses categories.
codesrace ="""
1 .White
2 .Black
3 .Other
4 .Other
5 .Other
6 .Asian
7 .Other
8 .Other
9 .Two + Races
"""
## TENURE
codesten="""
1 .Owned with mortgage or loan (include home equity loans)
2 .Owned free and clear
3 .Rented
4 .Occupied without payment of rent
"""
## YEAR BUILT
codeybl="""
01 .1939 or earlier
02 .1940 to 1959
03 .1940 to 1959
04 .1960 to 1979
05 .1960 to 1979
06 .1980 to 1999
07 .1980 to 1999
08 .2000 to 2011
09 .2000 to 2011
10 .2000 to 2011
11 .2000 to 2011
12 .2000 to 2011
13 .2000 to 2011
14 .2000 to 2011
15 .2000 to 2011
"""
## YEAR MOVED INTO UNIT
codemovein="""
1 .12 months or less
2 .13 to 23 months
3 .2 to 4 years
4 .5 to 9 years
5 .10 to 19 years
6 .20 to 29 years
7 .30 years or more
"""
Then, grab those strings, and call the two wrapper functions
2. race_value_map
in the example below stores the mapping dictionary
3. assign_value_labels
applies those value labels to the passed dataframe in the source variable name, with an _l
(for label) appended.
var = 'SEX'
value_map = map_value_labels(codessex)
assign_value_labels(pums_p,value_map,var)
Female 186615 Male 181432 Name: SEXL, dtype: int64
var = 'RAC1P'
value_map = map_value_labels(codesrace)
assign_value_labels(pums_p,value_map,var)
White 233425 Asian 51761 Other 46025 Black 20146 Two + Races 16690 Name: RAC1PL, dtype: int64
var = 'MV'
value_map = map_value_labels(codemovein)
assign_value_labels(pums_h,value_map,var)
5 to 9 years 30248 10 to 19 years 28520 2 to 4 years 23960 12 months or less 16910 13 to 23 months 16562 30 years or more 15107 20 to 29 years 13988 Name: MVL, dtype: int64
var = 'TEN'
value_map = map_value_labels(codesten)
assign_value_labels(pums_h,value_map,var)
Owned with mortgage or loan (include home equity loans) 55072 Rented 49868 Owned free and clear 22270 Occupied without payment of rent 2044 Name: TENL, dtype: int64
var = 'YBL'
value_map = map_value_labels(codeybl)
assign_value_labels(pums_h,value_map,var)
1960 to 1979 43970 1980 to 1999 34802 1940 to 1959 29456 2000 to 2011 15354 1939 or earlier 13446 Name: YBLL, dtype: int64
## Bedrooms
def brclassifier(val):
if val < 1: return "Studio"
elif val <=1: return "1 bedroom"
elif val <=2: return "2 bedrooms"
#elif val <=3: return "3 bedrooms"
#elif val <=4: return "4 bedrooms"
elif val >=3: return "3 or more bedrooms"
pums_h['BDSP_l']=pums_h.BDSP.apply(lambda x: brclassifier(x))
pums_h.BDSP_l.value_counts()
3 or more bedrooms 78247 2 bedrooms 36841 1 bedroom 18010 Studio 3976 Name: BDSP_l, dtype: int64
def age_mapping_dyn(x):
## ordinal var
breaks = [0,18,34,59,79,99]
## given the breaks above, what is the interval between each?
diff=[breaks[i+1]-breaks[i] for i in range(len(breaks)-1)]
## make tuples of breaks and distance--we need this to generate a list of age ranges
joint = zip(breaks,diff)
l=['%s-%s'%(yr[0],yr[0]+yr[1]-1) for yr in joint]
# Which bin in breaks does the age fall in?
cnt=0
for k in breaks:
#print k
if x<k:
break
else:
cnt+=1
index = 0 if cnt==0 else cnt-1
## the bin id is then used to select the appropriate age range
return l[index]
age_mapping_dyn(67)
'59-78'
pums_p['AGEPL']=pums_p.AGEP.map(age_mapping_dyn)
pums_p.AGEPL.head()
0 59-78 1 59-78 2 79-98 3 34-58 4 34-58 Name: AGEPL, dtype: object
pums_p.AGEPL.value_counts()
34-58 124688 0-17 83553 18-33 78013 59-78 64487 79-98 17306 Name: AGEPL, dtype: int64
First, we create a new variable, called newcomer, referring to folks who moved to SF during the past year. The relevant variable is MIGPUMA
. How does it work? There are lots of missing values--most folks do not move during a given year.
(Codes differ from PUMA designations, as it happens. Refer to code list here:)
pums_p['MIGPUMA'].dtypes
dtype('float64')
pums_p.MIGPUMA.value_counts().head(8)
3700.0 11612 7300.0 3520 5900.0 3235 6500.0 2696 1.0 2367 7100.0 2236 100.0 2235 6700.0 2141 Name: MIGPUMA, dtype: int64
pums_p['newcomer'], pums_h['newcomer']=0,0
pums_p.loc[(pums_p.MIGPUMA!=7500)&(pums_p.MIGPUMA>0),['newcomer']]=1
pums_p['newcomer']=pums_p['newcomer'].map({0:'existing resident',1:'new resident'})
pums_p['newcomer'].value_counts()
existing resident 317362 new resident 50685 Name: newcomer, dtype: int64
pums_p.loc[:,['newcomer','SEXL','PWGTP']].groupby(['newcomer','SEXL']).sum().unstack().plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x11af796d8>
Let's add some age information to movers, picking arbitrary age breaks for categorization. How do recent arrivals compare to current residents on their age profile?
pums_p.loc[:,['newcomer','AGEPL','PWGTP']].groupby(['newcomer','AGEPL']).sum()
PWGTP | ||
---|---|---|
newcomer | AGEPL | |
existing resident | 0-17 | 7882541 |
18-33 | 6709605 | |
34-58 | 11216011 | |
59-78 | 5187931 | |
79-98 | 1319422 | |
new resident | 0-17 | 1347003 |
18-33 | 2263351 | |
34-58 | 1622568 | |
59-78 | 390474 | |
79-98 | 102524 |
pums_p.loc[:,['newcomer','AGEPL','PWGTP']].groupby(['newcomer','AGEPL']).sum().unstack().plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x11e930eb8>
How do recent arrivals compare to current residents in racial profile?
new_by_race = pums_p.loc[:,['newcomer','RAC1PL','PWGTP']].groupby(['newcomer','RAC1PL']).sum()
new_by_race
PWGTP | ||
---|---|---|
newcomer | RAC1PL | |
existing resident | Asian | 4388282 |
Black | 1846543 | |
Other | 4475976 | |
Two + Races | 1388573 | |
White | 20216136 | |
new resident | Asian | 738948 |
Black | 422679 | |
Other | 863996 | |
Two + Races | 303564 | |
White | 3396733 |
new_by_race.unstack().plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x11ea1f7f0>
Now we use MIGPUMA
on the person table to determine a newcomer household (filtering on selected pums_p.SERIALNO values). All handshakes make use of this variable.
Notice the nesting of pums_p
criteria inside a pums_h
call.
pums_h['newcomer'] = 0
pums_h.loc[pums_p.loc[(pums_p.MIGPUMA!=7500)&(pums_p.MIGPUMA>0),'SERIALNO'].tolist(),'newcomer']=1
pums_h['recent']=pums_h['newcomer'].map({0:'existing resident',1:'new resident'})
pums_h.recent.value_counts()
existing resident 125077 new resident 28038 Name: recent, dtype: int64
Let's look at how recent arrival households compare to existing resident households in terms of the vintage of housing they live in.
hsg_by_bltyr = pums_h.loc[:,['recent','YBLL','WGTP']].groupby(['recent','YBLL']).sum()
hsg_by_bltyr.unstack().plot(kind='barh')
hsg_by_bltyr
WGTP | ||
---|---|---|
recent | YBLL | |
existing resident | 1939 or earlier | 1125958 |
1940 to 1959 | 2399213 | |
1960 to 1979 | 3618134 | |
1980 to 1999 | 2951629 | |
2000 to 2011 | 1391347 | |
new resident | 1939 or earlier | 181456 |
1940 to 1959 | 371722 | |
1960 to 1979 | 712846 | |
1980 to 1999 | 608062 | |
2000 to 2011 | 342896 |
Let's get some additional practice with groupby operations, using our rental listings data.
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
rents = pd.read_csv('data/rent_bay_filtered.csv').dropna()
for name in rents.columns:
print(name+',',)
Unnamed: 0, neighborhood, title, price, bedrooms, pid, longitude, subregion, link, latitude, sqft, month, day, year, blockfips, countyfips, county_x, tractfips, logrecno, fipsblock, state, county_y, tract, blkgrp, block, arealand, P0010001, H00010001, outlier, view, walk,
Generate a groupby object called 'grouped' grouping on county_x
gr = rents.groupby('county_x')
Now compute the mean price for each county_x
mean_prices = gr['price'].mean()
Plot this as a bar chart
mean_prices.plot(kind='bar')
<matplotlib.axes._subplots.AxesSubplot at 0x11b5d0160>
Compute the maximum price per county_x
max_prices = gr['price'].max()
Plot this as a horizontal bar chart
max_prices.plot(kind='barh')
<matplotlib.axes._subplots.AxesSubplot at 0x118a9cb00>
Compute a groupby object using county, and within county_x, by number of bedrooms.
gr2 = rents.groupby(['county_x', 'bedrooms'])
Compute the mean price and sqft in each groupby value (county_x and bedroom)
means = gr2[['price', 'sqft']].mean()
Use the 'unstack' method to present the results as a two-dimensional table
means = means.unstack()
Plot this as a horizontal bar chart (increase figsize to 10,10 to make this legible)
means.plot(kind='barh', edgecolor='None', figsize=(10,10))
<matplotlib.axes._subplots.AxesSubplot at 0x11e244a90>
Do the same tabulation (no plot), but presenting the 10th percentile values instead of means
gr2[['price', 'sqft']].quantile(0.9)
0.9 | price | sqft | |
---|---|---|---|
county_x | bedrooms | ||
Alameda | 0.0 | 1510.8 | 7500.0 |
1.0 | 1855.0 | 840.0 | |
2.0 | 2929.0 | 1214.0 | |
3.0 | 3191.4 | 1791.7 | |
4.0 | 3565.0 | 2873.2 | |
5.0 | 5197.0 | 3440.0 | |
8.0 | 9995.0 | 2500.0 | |
Contra Costa | 0.0 | 695.0 | 450.0 |
1.0 | 1861.8 | 810.4 | |
2.0 | 2384.4 | 1254.8 | |
3.0 | 2503.5 | 1849.0 | |
4.0 | 3437.0 | 2757.8 | |
5.0 | 2583.5 | 3319.5 | |
Marin | 0.0 | 1495.0 | 460.0 |
1.0 | 2514.0 | 884.0 | |
2.0 | 3620.0 | 1205.0 | |
3.0 | 4950.0 | 2028.0 | |
4.0 | 7950.0 | 3397.0 | |
5.0 | 8120.0 | 3460.0 | |
Napa | 0.0 | 668.0 | 366.4 |
1.0 | 2160.0 | 1100.0 | |
2.0 | 2250.0 | 1579.5 | |
3.0 | 2840.0 | 2400.0 | |
4.0 | 3060.0 | 2760.0 | |
5.0 | 2650.0 | 2502.0 | |
San Francisco | 0.0 | 3124.0 | 589.5 |
1.0 | 3900.0 | 1000.0 | |
2.0 | 5995.0 | 1500.0 | |
3.0 | 9375.0 | 2350.0 | |
4.0 | 8345.0 | 2300.0 | |
San Mateo | 0.0 | 2183.0 | 600.0 |
1.0 | 2711.0 | 850.0 | |
2.0 | 3293.0 | 1250.0 | |
3.0 | 4643.4 | 1950.0 | |
4.0 | 8000.0 | 3000.0 | |
5.0 | 6950.0 | 2805.0 | |
6.0 | 7475.0 | 2770.0 | |
Santa Clara | 0.0 | 2675.0 | 622.8 |
1.0 | 2361.5 | 860.3 | |
2.0 | 3292.8 | 1215.6 | |
3.0 | 4840.0 | 1896.4 | |
4.0 | 5810.0 | 2918.7 | |
5.0 | 7500.0 | 3535.0 | |
Solano | 0.0 | 1215.4 | 816.0 |
1.0 | 1248.0 | 792.0 | |
2.0 | 1539.5 | 1167.2 | |
3.0 | 1868.0 | 1704.8 | |
4.0 | 2256.0 | 1997.2 | |
5.0 | 1938.5 | 1816.4 | |
Sonoma | 0.0 | 2245.0 | 1430.0 |
1.0 | 1485.2 | 851.8 | |
2.0 | 1896.6 | 1367.5 | |
3.0 | 2840.0 | 2035.8 | |
4.0 | 4680.0 | 5134.0 | |
5.0 | 3210.0 | 2576.1 |
Do the same, but use describe to get the full statistical profile
gr2[['price', 'sqft']].describe()
price | sqft | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | mean | std | min | 25% | 50% | 75% | max | count | mean | std | min | 25% | 50% | 75% | max | ||
county_x | bedrooms | ||||||||||||||||
Alameda | 0.0 | 8.0 | 1253.875000 | 325.730141 | 650.0 | 1087.50 | 1397.5 | 1500.00 | 1536.0 | 8.0 | 2206.875000 | 3269.982457 | 200.0 | 393.75 | 552.5 | 2343.75 | 7500.0 |
1.0 | 47.0 | 1544.297872 | 277.325418 | 825.0 | 1387.50 | 1575.0 | 1742.50 | 2065.0 | 47.0 | 716.234043 | 140.061200 | 500.0 | 650.00 | 701.0 | 777.00 | 1275.0 | |
2.0 | 76.0 | 2160.250000 | 683.561616 | 1164.0 | 1837.50 | 2000.0 | 2350.75 | 6000.0 | 76.0 | 1024.342105 | 240.547074 | 530.0 | 865.25 | 973.0 | 1120.00 | 2200.0 | |
3.0 | 40.0 | 2424.900000 | 592.837497 | 1150.0 | 2087.25 | 2350.0 | 2722.00 | 4250.0 | 40.0 | 1388.175000 | 273.663736 | 865.0 | 1181.50 | 1400.0 | 1500.00 | 1980.0 | |
4.0 | 10.0 | 3197.500000 | 398.338563 | 2800.0 | 3000.00 | 3000.0 | 3350.00 | 4150.0 | 10.0 | 2345.000000 | 964.455748 | 1530.0 | 1889.25 | 2139.0 | 2372.25 | 4900.0 | |
5.0 | 5.0 | 3689.000000 | 1414.118100 | 2500.0 | 2700.00 | 3250.0 | 4000.00 | 5995.0 | 5.0 | 2350.600000 | 1011.672279 | 1510.0 | 1672.00 | 1971.0 | 2600.00 | 4000.0 | |
8.0 | 1.0 | 9995.000000 | NaN | 9995.0 | 9995.00 | 9995.0 | 9995.00 | 9995.0 | 1.0 | 2500.000000 | NaN | 2500.0 | 2500.00 | 2500.0 | 2500.00 | 2500.0 | |
Contra Costa | 0.0 | 1.0 | 695.000000 | NaN | 695.0 | 695.00 | 695.0 | 695.00 | 695.0 | 1.0 | 450.000000 | NaN | 450.0 | 450.00 | 450.0 | 450.00 | 450.0 |
1.0 | 28.0 | 1465.428571 | 378.384620 | 795.0 | 1247.50 | 1591.5 | 1767.00 | 1952.0 | 28.0 | 708.964286 | 135.913502 | 500.0 | 607.50 | 726.0 | 750.00 | 1200.0 | |
2.0 | 45.0 | 1787.666667 | 489.008598 | 795.0 | 1425.00 | 1850.0 | 2290.00 | 2580.0 | 45.0 | 1023.844444 | 210.165514 | 725.0 | 900.00 | 976.0 | 1100.00 | 1625.0 | |
3.0 | 24.0 | 1881.000000 | 428.841310 | 1245.0 | 1650.00 | 1772.5 | 2050.00 | 2900.0 | 24.0 | 1501.750000 | 303.163215 | 1080.0 | 1300.00 | 1486.5 | 1671.00 | 2200.0 | |
4.0 | 28.0 | 2427.500000 | 870.420822 | 1595.0 | 1890.00 | 2045.0 | 2473.75 | 5200.0 | 28.0 | 2210.607143 | 471.061633 | 1325.0 | 1967.00 | 2213.5 | 2365.75 | 3350.0 | |
5.0 | 8.0 | 2218.125000 | 324.674975 | 1900.0 | 2000.00 | 2125.0 | 2300.00 | 2895.0 | 8.0 | 2729.250000 | 517.936221 | 2050.0 | 2313.25 | 2823.0 | 3071.25 | 3400.0 | |
Marin | 0.0 | 9.0 | 1334.444444 | 138.416140 | 1125.0 | 1195.00 | 1400.0 | 1400.00 | 1495.0 | 9.0 | 406.111111 | 66.038709 | 250.0 | 390.00 | 440.0 | 440.00 | 460.0 |
1.0 | 39.0 | 1956.358974 | 488.900242 | 975.0 | 1695.00 | 1862.0 | 2313.50 | 3200.0 | 39.0 | 728.307692 | 134.389488 | 480.0 | 647.50 | 700.0 | 790.50 | 1200.0 | |
2.0 | 50.0 | 2563.340000 | 629.862784 | 1665.0 | 2100.00 | 2335.5 | 2895.00 | 3995.0 | 50.0 | 1016.640000 | 181.252193 | 706.0 | 900.00 | 1000.0 | 1100.00 | 1650.0 | |
3.0 | 31.0 | 3507.548387 | 1246.130460 | 1671.0 | 2700.00 | 3290.0 | 3750.00 | 7850.0 | 31.0 | 1630.870968 | 506.368360 | 1100.0 | 1298.00 | 1510.0 | 1777.00 | 3400.0 | |
4.0 | 11.0 | 5800.000000 | 1701.029100 | 3000.0 | 4575.00 | 5850.0 | 6975.00 | 8500.0 | 11.0 | 2750.363636 | 843.509843 | 1640.0 | 2469.50 | 2660.0 | 2890.00 | 4781.0 | |
5.0 | 5.0 | 6220.000000 | 1797.776404 | 4200.0 | 5000.00 | 5700.0 | 8000.00 | 8200.0 | 5.0 | 3050.000000 | 409.267639 | 2500.0 | 2850.00 | 3000.0 | 3400.00 | 3500.0 | |
Napa | 0.0 | 2.0 | 668.000000 | 0.000000 | 668.0 | 668.00 | 668.0 | 668.00 | 668.0 | 2.0 | 332.000000 | 60.811183 | 289.0 | 310.50 | 332.0 | 353.50 | 375.0 |
1.0 | 5.0 | 1550.600000 | 547.732873 | 1015.0 | 1199.00 | 1289.0 | 1950.00 | 2300.0 | 5.0 | 876.000000 | 222.693736 | 650.0 | 685.00 | 895.0 | 950.00 | 1200.0 | |
2.0 | 14.0 | 1857.500000 | 378.314836 | 1295.0 | 1547.50 | 1835.0 | 2200.00 | 2500.0 | 14.0 | 1179.214286 | 289.904541 | 895.0 | 912.00 | 1107.0 | 1400.00 | 1700.0 | |
3.0 | 18.0 | 2277.777778 | 647.897555 | 1400.0 | 1887.50 | 2150.0 | 2400.00 | 4200.0 | 18.0 | 1756.833333 | 482.312841 | 1100.0 | 1400.25 | 1602.0 | 2300.00 | 2500.0 | |
4.0 | 9.0 | 2766.666667 | 531.507291 | 2250.0 | 2500.00 | 2650.0 | 2800.00 | 4100.0 | 9.0 | 2216.000000 | 471.055729 | 1559.0 | 2000.00 | 2100.0 | 2433.00 | 3000.0 | |
5.0 | 1.0 | 2650.000000 | NaN | 2650.0 | 2650.00 | 2650.0 | 2650.00 | 2650.0 | 1.0 | 2502.000000 | NaN | 2502.0 | 2502.00 | 2502.0 | 2502.00 | 2502.0 | |
San Francisco | 0.0 | 38.0 | 2407.842105 | 760.625330 | 1000.0 | 2018.75 | 2486.0 | 2721.00 | 5250.0 | 38.0 | 527.789474 | 261.674665 | 180.0 | 457.00 | 500.0 | 566.50 | 2000.0 |
1.0 | 110.0 | 3100.772727 | 795.718897 | 850.0 | 2721.25 | 3027.0 | 3434.00 | 5800.0 | 110.0 | 775.954545 | 354.385639 | 200.0 | 644.00 | 722.5 | 838.75 | 3850.0 | |
2.0 | 91.0 | 4252.824176 | 1270.897571 | 1800.0 | 3500.00 | 4200.0 | 4825.00 | 8450.0 | 91.0 | 1174.945055 | 369.086029 | 550.0 | 959.00 | 1104.0 | 1300.00 | 3000.0 | |
3.0 | 36.0 | 6141.361111 | 2198.891372 | 3000.0 | 4843.75 | 5200.0 | 8373.75 | 9999.0 | 36.0 | 1712.611111 | 440.570557 | 955.0 | 1500.00 | 1553.5 | 1843.75 | 2800.0 | |
4.0 | 12.0 | 5740.833333 | 1917.243326 | 3500.0 | 4423.75 | 5450.0 | 6200.00 | 9950.0 | 12.0 | 1877.083333 | 384.091243 | 1250.0 | 1625.00 | 1922.0 | 2112.50 | 2448.0 | |
San Mateo | 0.0 | 21.0 | 1768.000000 | 354.834891 | 995.0 | 1490.00 | 1800.0 | 1937.00 | 2400.0 | 21.0 | 496.047619 | 106.154829 | 275.0 | 421.00 | 500.0 | 567.00 | 750.0 |
1.0 | 131.0 | 2189.694656 | 420.669172 | 1250.0 | 1936.50 | 2150.0 | 2372.50 | 5200.0 | 131.0 | 712.694656 | 115.311211 | 350.0 | 640.00 | 700.0 | 756.00 | 1200.0 | |
2.0 | 168.0 | 2729.851190 | 482.890420 | 1877.0 | 2393.00 | 2700.0 | 3000.00 | 3990.0 | 168.0 | 1066.589286 | 287.045560 | 762.0 | 950.00 | 1020.0 | 1109.50 | 3953.0 | |
3.0 | 49.0 | 3529.795918 | 866.515171 | 1500.0 | 3000.00 | 3400.0 | 4000.00 | 5999.0 | 49.0 | 1582.612245 | 408.990211 | 980.0 | 1339.00 | 1500.0 | 1800.00 | 3000.0 | |
4.0 | 18.0 | 4953.611111 | 1735.069761 | 3500.0 | 3825.00 | 4085.0 | 5362.50 | 8950.0 | 18.0 | 2384.000000 | 472.837243 | 1470.0 | 2025.00 | 2350.0 | 2725.00 | 3170.0 | |
5.0 | 4.0 | 4949.750000 | 2076.238646 | 3500.0 | 3724.25 | 4149.5 | 5375.00 | 8000.0 | 4.0 | 2437.500000 | 381.608438 | 2200.0 | 2200.00 | 2275.0 | 2512.50 | 3000.0 | |
6.0 | 1.0 | 7475.000000 | NaN | 7475.0 | 7475.00 | 7475.0 | 7475.00 | 7475.0 | 1.0 | 2770.000000 | NaN | 2770.0 | 2770.00 | 2770.0 | 2770.00 | 2770.0 | |
Santa Clara | 0.0 | 17.0 | 1820.941176 | 548.683933 | 1150.0 | 1295.00 | 1825.0 | 2000.00 | 2950.0 | 17.0 | 567.647059 | 346.368832 | 300.0 | 418.00 | 500.0 | 565.00 | 1862.0 |
1.0 | 188.0 | 1934.521277 | 417.776115 | 925.0 | 1650.00 | 1895.0 | 2165.75 | 3850.0 | 188.0 | 721.361702 | 123.136145 | 400.0 | 650.00 | 702.5 | 792.25 | 1100.0 | |
2.0 | 222.0 | 2514.918919 | 627.283126 | 1250.0 | 2096.25 | 2395.0 | 2800.00 | 5364.0 | 222.0 | 1063.000000 | 425.803388 | 420.0 | 915.75 | 1018.0 | 1100.00 | 6500.0 | |
3.0 | 82.0 | 3236.292683 | 918.608334 | 1100.0 | 2591.25 | 3190.0 | 3503.75 | 5600.0 | 82.0 | 1506.804878 | 304.671344 | 1000.0 | 1275.50 | 1430.5 | 1700.00 | 2256.0 | |
4.0 | 28.0 | 3952.142857 | 1654.920513 | 2450.0 | 2950.00 | 3400.0 | 4237.50 | 9000.0 | 28.0 | 2195.000000 | 632.814524 | 1450.0 | 1784.25 | 2047.5 | 2563.00 | 3917.0 | |
5.0 | 11.0 | 5659.090909 | 1720.293315 | 3000.0 | 4350.00 | 6100.0 | 7025.00 | 8000.0 | 11.0 | 3025.272727 | 563.240285 | 1850.0 | 2900.50 | 3100.0 | 3250.00 | 4000.0 | |
Solano | 0.0 | 3.0 | 904.333333 | 377.433879 | 650.0 | 687.50 | 725.0 | 1031.50 | 1338.0 | 3.0 | 635.000000 | 217.772817 | 450.0 | 515.00 | 580.0 | 727.50 | 875.0 |
1.0 | 15.0 | 993.800000 | 202.676096 | 715.0 | 800.50 | 985.0 | 1148.00 | 1289.0 | 15.0 | 703.933333 | 127.387747 | 450.0 | 639.50 | 708.0 | 735.00 | 1050.0 | |
2.0 | 28.0 | 1262.571429 | 250.698052 | 895.0 | 1048.00 | 1270.0 | 1499.00 | 1875.0 | 28.0 | 987.892857 | 184.534919 | 675.0 | 870.00 | 939.0 | 1063.00 | 1550.0 | |
3.0 | 39.0 | 1632.179487 | 252.796972 | 1150.0 | 1505.00 | 1695.0 | 1700.00 | 2300.0 | 39.0 | 1432.358974 | 254.968469 | 1020.0 | 1280.00 | 1450.0 | 1563.00 | 2164.0 | |
4.0 | 19.0 | 1846.842105 | 305.819530 | 1250.0 | 1710.00 | 1800.0 | 2047.50 | 2400.0 | 19.0 | 1666.947368 | 312.334129 | 1300.0 | 1462.00 | 1558.0 | 1764.00 | 2598.0 | |
5.0 | 2.0 | 1692.500000 | 434.870670 | 1385.0 | 1538.75 | 1692.5 | 1846.25 | 2000.0 | 2.0 | 1698.000000 | 209.303607 | 1550.0 | 1624.00 | 1698.0 | 1772.00 | 1846.0 | |
Sonoma | 0.0 | 4.0 | 1612.500000 | 642.099421 | 1050.0 | 1200.00 | 1450.0 | 1862.50 | 2500.0 | 4.0 | 935.000000 | 517.268467 | 600.0 | 630.00 | 720.0 | 1025.00 | 1700.0 |
1.0 | 35.0 | 1297.000000 | 236.807566 | 800.0 | 1199.00 | 1345.0 | 1415.00 | 1900.0 | 35.0 | 670.428571 | 162.252397 | 200.0 | 605.00 | 661.0 | 720.00 | 1100.0 | |
2.0 | 62.0 | 1557.919355 | 277.942989 | 1150.0 | 1350.00 | 1500.0 | 1693.75 | 2249.0 | 62.0 | 1016.274194 | 226.862143 | 580.0 | 891.00 | 1000.0 | 1028.00 | 1742.0 | |
3.0 | 37.0 | 2161.081081 | 534.670375 | 1550.0 | 1750.00 | 2000.0 | 2400.00 | 3900.0 | 37.0 | 1582.702703 | 527.924335 | 950.0 | 1293.00 | 1456.0 | 1700.00 | 4000.0 | |
4.0 | 14.0 | 2962.500000 | 982.576578 | 2200.0 | 2356.25 | 2600.0 | 2850.00 | 5100.0 | 14.0 | 2766.142857 | 1350.843033 | 1350.0 | 2097.00 | 2200.0 | 2800.00 | 5620.0 | |
5.0 | 4.0 | 2948.750000 | 331.420554 | 2500.0 | 2871.25 | 2997.5 | 3075.00 | 3300.0 | 4.0 | 2252.500000 | 412.567974 | 1700.0 | 2056.25 | 2360.5 | 2556.75 | 2589.0 |
Do the same, presenting the count of how many nonmissing price values there are in each group
gr2[['price', 'sqft']].count()
price | sqft | ||
---|---|---|---|
county_x | bedrooms | ||
Alameda | 0.0 | 8 | 8 |
1.0 | 47 | 47 | |
2.0 | 76 | 76 | |
3.0 | 40 | 40 | |
4.0 | 10 | 10 | |
5.0 | 5 | 5 | |
8.0 | 1 | 1 | |
Contra Costa | 0.0 | 1 | 1 |
1.0 | 28 | 28 | |
2.0 | 45 | 45 | |
3.0 | 24 | 24 | |
4.0 | 28 | 28 | |
5.0 | 8 | 8 | |
Marin | 0.0 | 9 | 9 |
1.0 | 39 | 39 | |
2.0 | 50 | 50 | |
3.0 | 31 | 31 | |
4.0 | 11 | 11 | |
5.0 | 5 | 5 | |
Napa | 0.0 | 2 | 2 |
1.0 | 5 | 5 | |
2.0 | 14 | 14 | |
3.0 | 18 | 18 | |
4.0 | 9 | 9 | |
5.0 | 1 | 1 | |
San Francisco | 0.0 | 38 | 38 |
1.0 | 110 | 110 | |
2.0 | 91 | 91 | |
3.0 | 36 | 36 | |
4.0 | 12 | 12 | |
San Mateo | 0.0 | 21 | 21 |
1.0 | 131 | 131 | |
2.0 | 168 | 168 | |
3.0 | 49 | 49 | |
4.0 | 18 | 18 | |
5.0 | 4 | 4 | |
6.0 | 1 | 1 | |
Santa Clara | 0.0 | 17 | 17 |
1.0 | 188 | 188 | |
2.0 | 222 | 222 | |
3.0 | 82 | 82 | |
4.0 | 28 | 28 | |
5.0 | 11 | 11 | |
Solano | 0.0 | 3 | 3 |
1.0 | 15 | 15 | |
2.0 | 28 | 28 | |
3.0 | 39 | 39 | |
4.0 | 19 | 19 | |
5.0 | 2 | 2 | |
Sonoma | 0.0 | 4 | 4 |
1.0 | 35 | 35 | |
2.0 | 62 | 62 | |
3.0 | 37 | 37 | |
4.0 | 14 | 14 | |
5.0 | 4 | 4 |
Define a function called 'q75_q25' that computes the difference between the 75th percentile and the 25th percentile, and analyze this value in the groupby
def q75_q25(x):
return x.quantile(0.75)-x.quantile(0.25)
gr2.agg(q75_q25)
H00010001 | P0010001 | Unnamed: 0 | arealand | blkgrp | block | blockfips | county_y | countyfips | day | ... | outlier | pid | price | sqft | state | tract | tractfips | view | walk | year | ||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
county_x | bedrooms | |||||||||||||||||||||
Alameda | 0.0 | 68.50 | 160.75 | 206.25 | 3875.75 | 1.25 | 1270.50 | 2.057492e+08 | 0 | 0 | 0.00 | ... | False | 19042273.50 | 412.50 | 1950.00 | 0 | 20575.00 | 20575.00 | 0.00 | 0.00 | 0 |
1.0 | 203.50 | 477.00 | 313.50 | 80951.00 | 1.00 | 1013.00 | 2.736255e+08 | 0 | 0 | 0.00 | ... | False | 701582.00 | 355.00 | 127.00 | 0 | 27362.50 | 27362.50 | 0.00 | 0.00 | 0 | |
2.0 | 182.25 | 450.00 | 374.00 | 66438.75 | 2.00 | 1990.50 | 1.729702e+08 | 0 | 0 | 0.00 | ... | False | 618314.00 | 513.25 | 254.75 | 0 | 17297.00 | 17297.00 | 0.00 | 0.00 | 0 | |
3.0 | 74.50 | 162.25 | 316.25 | 89642.75 | 2.00 | 1985.75 | 1.829703e+08 | 0 | 0 | 0.00 | ... | False | 7547054.75 | 634.75 | 318.50 | 0 | 18297.00 | 18297.00 | 0.00 | 0.00 | 0 | |
4.0 | 57.50 | 236.25 | 108.75 | 1122543.50 | 1.00 | 985.50 | 1.042568e+08 | 0 | 0 | 0.00 | ... | False | 10686532.25 | 350.00 | 483.00 | 0 | 10425.75 | 10425.75 | 0.00 | 0.00 | 0 | |
5.0 | 25.00 | 32.00 | 259.00 | 674.00 | 1.00 | 992.00 | 2.013490e+08 | 0 | 0 | 0.00 | ... | False | 10255358.00 | 1300.00 | 928.00 | 0 | 20135.00 | 20135.00 | 0.00 | 0.00 | 0 | |
8.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 0 | 0 | 0.00 | ... | False | 0.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | |
Contra Costa | 0.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 0 | 0 | 0.00 | ... | False | 0.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0 |
1.0 | 224.50 | 381.75 | 455.00 | 103756.50 | 2.00 | 1981.00 | 1.815160e+08 | 0 | 0 | 0.00 | ... | False | 700160.25 | 519.50 | 142.50 | 0 | 18151.50 | 18151.50 | 0.00 | 0.00 | 0 | |
2.0 | 189.00 | 336.00 | 385.00 | 105835.00 | 2.00 | 1981.00 | 2.400190e+08 | 0 | 0 | 0.00 | ... | False | 9982799.00 | 865.00 | 200.00 | 0 | 24002.00 | 24002.00 | 0.00 | 0.00 | 0 | |
3.0 | 114.25 | 301.00 | 371.00 | 100900.00 | 2.00 | 1993.00 | 3.442647e+08 | 0 | 0 | 0.00 | ... | False | 12974118.25 | 400.00 | 371.00 | 0 | 34426.25 | 34426.25 | 0.00 | 0.00 | 0 | |
4.0 | 62.00 | 167.25 | 427.75 | 80769.00 | 2.00 | 1983.75 | 4.760705e+08 | 0 | 0 | 0.00 | ... | False | 13009397.50 | 583.75 | 398.75 | 0 | 47607.00 | 47607.00 | 0.00 | 0.00 | 0 | |
5.0 | 40.00 | 152.75 | 413.25 | 92070.50 | 1.25 | 1264.50 | 5.192875e+08 | 0 | 0 | 0.00 | ... | False | 25805052.75 | 300.00 | 758.00 | 0 | 51928.75 | 51928.75 | 0.00 | 0.00 | 0 | |
Marin | 0.0 | 29.00 | 123.00 | 309.00 | 178239.00 | 2.00 | 1993.00 | 1.919920e+08 | 0 | 0 | 2.00 | ... | False | 15485038.00 | 205.00 | 50.00 | 0 | 19199.00 | 19199.00 | 0.00 | 0.00 | 0 |
1.0 | 196.50 | 167.00 | 282.50 | 214368.00 | 1.00 | 1007.50 | 1.859895e+08 | 0 | 0 | 2.00 | ... | False | 4770607.00 | 618.50 | 143.00 | 0 | 18599.00 | 18599.00 | 0.00 | 0.00 | 0 | |
2.0 | 169.00 | 309.25 | 424.00 | 273517.75 | 1.00 | 1032.00 | 1.927432e+08 | 0 | 0 | 2.00 | ... | False | 6745285.75 | 795.00 | 200.00 | 0 | 19274.25 | 19274.25 | 1.00 | 0.00 | 0 | |
3.0 | 96.50 | 316.50 | 417.50 | 217281.00 | 1.50 | 1514.00 | 1.804950e+08 | 0 | 0 | 2.00 | ... | False | 8868676.50 | 1050.00 | 479.00 | 0 | 18049.50 | 18049.50 | 0.00 | 0.00 | 0 | |
4.0 | 38.00 | 69.00 | 387.50 | 378416.00 | 1.50 | 1500.50 | 1.349995e+08 | 0 | 0 | 3.00 | ... | False | 17604397.00 | 2400.00 | 420.50 | 0 | 13500.00 | 13500.00 | 0.00 | 0.00 | 0 | |
5.0 | 516.00 | 1031.00 | 284.00 | 1390271.00 | 2.00 | 1992.00 | 7.000201e+07 | 0 | 0 | 2.00 | ... | False | 2671410.00 | 3000.00 | 550.00 | 0 | 7000.00 | 7000.00 | 0.00 | 0.00 | 0 | |
Napa | 0.0 | 0.00 | 0.00 | 0.50 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 0 | 0 | 0.50 | ... | False | 620599.00 | 0.00 | 43.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0 |
1.0 | 10.00 | 0.00 | 316.00 | 30019538.00 | 1.00 | 983.00 | 8.981013e+06 | 0 | 0 | 2.00 | ... | False | 3290248.00 | 751.00 | 265.00 | 0 | 898.00 | 898.00 | 0.00 | 0.00 | 0 | |
2.0 | 71.50 | 206.00 | 204.75 | 24564.00 | 2.00 | 1996.75 | 2.998005e+06 | 0 | 0 | 1.75 | ... | False | 32873503.25 | 652.50 | 488.00 | 0 | 300.00 | 300.00 | 0.00 | 0.00 | 0 | |
3.0 | 66.00 | 143.25 | 387.50 | 1316652.50 | 0.75 | 743.00 | 1.324276e+07 | 0 | 0 | 2.00 | ... | False | 15134398.25 | 512.50 | 899.75 | 0 | 1324.25 | 1324.25 | 0.00 | 0.75 | 0 | |
4.0 | 108.00 | 245.00 | 87.00 | 202302.00 | 2.00 | 1989.00 | 2.039982e+06 | 0 | 0 | 2.00 | ... | False | 43865122.00 | 300.00 | 433.00 | 0 | 204.00 | 204.00 | 0.00 | 0.00 | 0 | |
5.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 0 | 0 | 0.00 | ... | False | 0.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | |
San Francisco | 0.0 | 331.75 | 417.00 | 251.50 | 4418.00 | 1.00 | 1004.50 | 5.424751e+07 | 0 | 0 | 1.00 | ... | False | 1924268.75 | 702.25 | 109.50 | 0 | 5424.75 | 5424.75 | 0.00 | 0.00 | 0 |
1.0 | 240.50 | 346.00 | 328.50 | 12857.50 | 1.00 | 1005.75 | 2.842588e+08 | 0 | 0 | 1.00 | ... | False | 9672271.00 | 712.75 | 194.75 | 0 | 28425.75 | 28425.75 | 0.00 | 0.00 | 0 | |
2.0 | 137.50 | 250.00 | 420.00 | 8035.50 | 2.00 | 1989.00 | 1.985095e+08 | 0 | 0 | 1.00 | ... | False | 12489279.00 | 1325.00 | 341.00 | 0 | 19851.00 | 19851.00 | 1.00 | 0.00 | 0 | |
3.0 | 79.75 | 188.25 | 512.00 | 6635.25 | 1.00 | 998.00 | 1.802560e+08 | 0 | 0 | 1.00 | ... | False | 15020026.50 | 3530.00 | 343.75 | 0 | 18025.50 | 18025.50 | 0.25 | 0.00 | 0 | |
4.0 | 65.25 | 80.25 | 428.50 | 4508.25 | 3.00 | 2901.50 | 1.392585e+08 | 0 | 0 | 1.00 | ... | False | 22522950.75 | 1776.25 | 487.50 | 0 | 13925.50 | 13925.50 | 0.25 | 0.00 | 0 | |
San Mateo | 0.0 | 145.00 | 236.00 | 197.00 | 33686.00 | 4.00 | 3992.00 | 5.499798e+07 | 0 | 0 | 1.00 | ... | False | 1712959.00 | 447.00 | 146.00 | 0 | 5500.00 | 5500.00 | 0.00 | 0.00 | 0 |
1.0 | 196.00 | 357.00 | 299.00 | 107714.50 | 2.00 | 1994.00 | 3.249949e+07 | 0 | 0 | 1.00 | ... | False | 2490272.50 | 436.00 | 116.00 | 0 | 3250.00 | 3250.00 | 0.00 | 0.00 | 0 | |
2.0 | 166.50 | 325.50 | 293.50 | 79144.75 | 2.00 | 1995.00 | 4.303099e+07 | 0 | 0 | 1.00 | ... | False | 3487538.00 | 607.00 | 159.50 | 0 | 4303.00 | 4303.00 | 0.00 | 0.00 | 0 | |
3.0 | 83.00 | 200.00 | 506.00 | 66796.00 | 2.00 | 2003.00 | 6.400200e+07 | 0 | 0 | 1.00 | ... | False | 8882570.00 | 1000.00 | 461.00 | 0 | 6400.00 | 6400.00 | 0.00 | 0.00 | 0 | |
4.0 | 37.25 | 111.75 | 162.00 | 114244.75 | 1.75 | 1695.50 | 8.150094e+07 | 0 | 0 | 0.75 | ... | False | 12131167.25 | 1537.50 | 700.00 | 0 | 8150.00 | 8150.00 | 1.00 | 0.00 | 0 | |
5.0 | 78.50 | 285.25 | 30.25 | 488784.00 | 0.25 | 281.00 | 6.125078e+07 | 0 | 0 | 0.25 | ... | False | 9736122.50 | 1650.75 | 312.50 | 0 | 6125.00 | 6125.00 | 0.25 | 0.00 | 0 | |
6.0 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.000000e+00 | 0 | 0 | 0.00 | ... | False | 0.00 | 0.00 | 0.00 | 0 | 0.00 | 0.00 | 0.00 | 0.00 | 0 | |
Santa Clara | 0.0 | 116.00 | 208.00 | 1534.00 | 19217.00 | 2.00 | 1984.00 | 4.395198e+07 | 0 | 0 | 1.00 | ... | False | 12721875.00 | 705.00 | 147.00 | 0 | 4395.00 | 4395.00 | 0.00 | 0.00 | 0 |
1.0 | 197.75 | 400.25 | 790.75 | 66853.00 | 2.00 | 1989.25 | 4.395025e+07 | 0 | 0 | 0.00 | ... | False | 2000267.75 | 515.75 | 142.25 | 0 | 4395.00 | 4395.00 | 0.00 | 0.00 | 0 | |
2.0 | 175.75 | 385.75 | 563.00 | 74347.25 | 2.00 | 1990.75 | 4.376051e+07 | 0 | 0 | 0.00 | ... | False | 6955655.50 | 703.75 | 184.25 | 0 | 4376.00 | 4376.00 | 0.00 | 0.00 | 0 | |
3.0 | 94.50 | 328.75 | 579.50 | 48872.75 | 2.00 | 1982.75 | 6.053275e+07 | 0 | 0 | 0.00 | ... | False | 11797676.00 | 912.50 | 424.50 | 0 | 6053.25 | 6053.25 | 0.00 | 0.00 | 0 | |
4.0 | 70.50 | 148.00 | 551.00 | 89766.25 | 2.00 | 2009.00 | 7.790950e+07 | 0 | 0 | 0.00 | ... | False | 3658682.75 | 1287.50 | 778.75 | 0 | 7791.00 | 7791.00 | 0.00 | 0.00 | 0 | |
5.0 | 75.50 | 172.00 | 1438.00 | 414410.50 | 2.00 | 1993.50 | 3.051451e+07 | 0 | 0 | 1.00 | ... | False | 27962078.50 | 2675.00 | 349.50 | 0 | 3051.50 | 3051.50 | 0.00 | 0.00 | 0 | |
Solano | 0.0 | 48.00 | 140.00 | 137.00 | 56334.00 | 0.50 | 504.00 | 7.059992e+06 | 0 | 0 | 0.00 | ... | False | 15289585.00 | 344.00 | 212.50 | 0 | 706.00 | 706.00 | 0.00 | 0.00 | 0 |
1.0 | 95.50 | 275.00 | 297.50 | 44846.00 | 0.50 | 514.00 | 5.960003e+06 | 0 | 0 | 0.00 | ... | False | 606794.50 | 347.50 | 95.50 | 0 | 596.00 | 596.00 | 0.00 | 0.00 | 0 | |
2.0 | 86.00 | 169.25 | 257.75 | 41409.00 | 1.00 | 1004.25 | 7.377498e+06 | 0 | 0 | 0.00 | ... | False | 14046236.00 | 451.00 | 193.00 | 0 | 737.75 | 737.75 | 0.00 | 0.00 | 0 | |
3.0 | 21.00 | 84.00 | 294.50 | 39584.00 | 1.00 | 1005.50 | 1.008599e+07 | 0 | 0 | 0.00 | ... | False | 20250047.00 | 195.00 | 283.00 | 0 | 1008.50 | 1008.50 | 0.00 | 0.00 | 0 | |
4.0 | 39.50 | 105.00 | 336.50 | 34231.50 | 1.00 | 1001.00 | 8.005999e+06 | 0 | 0 | 0.00 | ... | False | 25628366.00 | 337.50 | 302.00 | 0 | 800.50 | 800.50 | 0.00 | 0.00 | 0 | |
5.0 | 16.50 | 46.00 | 18.00 | 19238.00 | 0.50 | 495.50 | 5.474504e+06 | 0 | 0 | 0.00 | ... | False | 8809319.00 | 307.50 | 148.00 | 0 | 547.50 | 547.50 | 0.00 | 0.00 | 0 | |
Sonoma | 0.0 | 43.25 | 63.25 | 85.50 | 1373290.25 | 0.50 | 522.75 | 1.828529e+06 | 0 | 0 | 0.50 | ... | False | 4202789.00 | 662.50 | 395.00 | 0 | 182.75 | 182.75 | 0.00 | 0.00 | 0 |
1.0 | 67.50 | 108.00 | 269.00 | 98943.50 | 3.00 | 2993.00 | 1.748651e+07 | 0 | 0 | 2.00 | ... | False | 7403740.00 | 216.00 | 115.00 | 0 | 1748.50 | 1748.50 | 0.00 | 0.00 | 0 | |
2.0 | 84.00 | 202.75 | 370.50 | 244785.00 | 3.00 | 2998.00 | 1.775400e+07 | 0 | 0 | 2.00 | ... | False | 11027031.75 | 343.75 | 137.00 | 0 | 1775.25 | 1775.25 | 0.00 | 0.00 | 0 | |
3.0 | 44.00 | 115.00 | 246.00 | 117787.00 | 2.00 | 1998.00 | 2.500300e+07 | 0 | 0 | 1.00 | ... | False | 12972428.00 | 650.00 | 407.00 | 0 | 2500.00 | 2500.00 | 0.00 | 0.00 | 0 | |
4.0 | 41.75 | 129.50 | 233.50 | 46760.25 | 1.75 | 1744.00 | 1.871301e+07 | 0 | 0 | 1.75 | ... | False | 11341200.00 | 493.75 | 703.00 | 0 | 1871.50 | 1871.50 | 0.00 | 0.00 | 0 | |
5.0 | 59.00 | 98.25 | 198.50 | 397540.25 | 1.25 | 1262.00 | 2.399127e+07 | 0 | 0 | 1.25 | ... | False | 3643638.00 | 203.75 | 500.50 | 0 | 2399.00 | 2399.00 | 0.00 | 0.00 | 0 |
55 rows × 24 columns
Create a list of functions to apply, using min, median, max, and q75_q25 for price and sqft by county_x, and bedrooms
f = ['min', 'median', 'max', q75_q25]
gr2.agg(f)
Unnamed: 0 | price | pid | ... | outlier | view | walk | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
min | median | max | q75_q25 | min | median | max | q75_q25 | min | median | ... | max | q75_q25 | min | median | max | q75_q25 | min | median | max | q75_q25 | ||
county_x | bedrooms | |||||||||||||||||||||
Alameda | 0.0 | 1419 | 1504.5 | 1893 | 206.25 | 650.0 | 1397.5 | 1536.0 | 412.50 | 4028984168 | 4.076076e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 |
1.0 | 1141 | 1591.0 | 2146 | 313.50 | 825.0 | 1575.0 | 2065.0 | 355.00 | 4033519017 | 4.076038e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 1030 | 1567.0 | 2149 | 374.00 | 1164.0 | 2000.0 | 6000.0 | 513.25 | 4030650511 | 4.076081e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 1009 | 1594.5 | 2020 | 316.25 | 1150.0 | 2350.0 | 4250.0 | 634.75 | 4012621573 | 4.076045e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
4.0 | 1034 | 1685.5 | 1965 | 108.75 | 2800.0 | 3000.0 | 4150.0 | 350.00 | 4060709125 | 4.073022e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 1184 | 1491.0 | 2015 | 259.00 | 2500.0 | 3250.0 | 5995.0 | 1300.00 | 4056233066 | 4.075765e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
8.0 | 1278 | 1278.0 | 1278 | 0.00 | 9995.0 | 9995.0 | 9995.0 | 0.00 | 4076580106 | 4.076580e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
Contra Costa | 0.0 | 1436 | 1436.0 | 1436 | 0.00 | 695.0 | 695.0 | 695.0 | 0.00 | 4052331595 | 4.052332e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 1 | 1 | 1 | 0.00 |
1.0 | 1007 | 1442.5 | 2005 | 455.00 | 795.0 | 1591.5 | 1952.0 | 519.50 | 4014589145 | 4.076203e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 1008 | 1544.0 | 2014 | 385.00 | 795.0 | 1850.0 | 2580.0 | 865.00 | 4014234759 | 4.076073e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 1035 | 1543.5 | 2076 | 371.00 | 1245.0 | 1772.5 | 2900.0 | 400.00 | 4022685227 | 4.075703e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
4.0 | 1011 | 1578.5 | 2147 | 427.75 | 1595.0 | 2045.0 | 5200.0 | 583.75 | 4016769441 | 4.076052e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 1291 | 1567.0 | 1895 | 413.25 | 1900.0 | 2125.0 | 2895.0 | 300.00 | 4021722521 | 4.057808e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
Marin | 0.0 | 1784 | 2054.0 | 2366 | 309.00 | 1125.0 | 1400.0 | 1495.0 | 205.00 | 4039708493 | 4.065259e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 |
1.0 | 1795 | 2033.0 | 2722 | 282.50 | 975.0 | 1862.0 | 3200.0 | 618.50 | 4049261309 | 4.074121e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 1741 | 2196.5 | 2726 | 424.00 | 1665.0 | 2335.5 | 3995.0 | 795.00 | 4018255201 | 4.070919e+09 | ... | False | False | 0 | 0 | 1 | 1.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 1742 | 2299.0 | 2724 | 417.50 | 1671.0 | 3290.0 | 7850.0 | 1050.00 | 4013956927 | 4.070811e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
4.0 | 1979 | 2330.0 | 2867 | 387.50 | 3000.0 | 5850.0 | 8500.0 | 2400.00 | 4024826967 | 4.068957e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 1792 | 2171.0 | 2633 | 284.00 | 4200.0 | 5700.0 | 8200.0 | 3000.00 | 4069165655 | 4.071153e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
Napa | 0.0 | 1922 | 1922.5 | 1923 | 0.50 | 668.0 | 668.0 | 668.0 | 0.00 | 4073943819 | 4.074564e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 |
1.0 | 1856 | 2121.0 | 2344 | 316.00 | 1015.0 | 1289.0 | 2300.0 | 751.00 | 4071388326 | 4.074047e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
2.0 | 1739 | 2173.0 | 2380 | 204.75 | 1295.0 | 1835.0 | 2500.0 | 652.50 | 4014809974 | 4.071540e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
3.0 | 1855 | 2109.5 | 2634 | 387.50 | 1400.0 | 2150.0 | 4200.0 | 512.50 | 4012930613 | 4.071377e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.75 | |
4.0 | 1930 | 2288.0 | 2368 | 87.00 | 2250.0 | 2650.0 | 4100.0 | 300.00 | 4021769495 | 4.067778e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 2376 | 2376.0 | 2376 | 0.00 | 2650.0 | 2650.0 | 2650.0 | 0.00 | 4058033895 | 4.058034e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
San Francisco | 0.0 | 26 | 191.5 | 1150 | 251.50 | 1000.0 | 2486.0 | 5250.0 | 702.25 | 4038003428 | 4.075042e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 |
1.0 | 7 | 371.5 | 1229 | 328.50 | 850.0 | 3027.0 | 5800.0 | 712.75 | 4012054934 | 4.073881e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 8 | 478.0 | 1148 | 420.00 | 1800.0 | 4200.0 | 8450.0 | 1325.00 | 4012705894 | 4.073902e+09 | ... | False | False | 0 | 0 | 1 | 1.00 | 0 | 0 | 0 | 0.00 | |
3.0 | 39 | 380.0 | 930 | 512.00 | 3000.0 | 5200.0 | 9999.0 | 3530.00 | 4024785317 | 4.064493e+09 | ... | False | False | 0 | 0 | 1 | 0.25 | 0 | 0 | 1 | 0.00 | |
4.0 | 82 | 532.5 | 1227 | 428.50 | 3500.0 | 5450.0 | 9950.0 | 1776.25 | 4025500393 | 4.061193e+09 | ... | False | False | 0 | 0 | 1 | 0.25 | 0 | 0 | 0 | 0.00 | |
San Mateo | 0.0 | 58 | 2780.0 | 2949 | 197.00 | 995.0 | 1800.0 | 2400.0 | 447.00 | 4073190555 | 4.074070e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 1 | 0.00 |
1.0 | 56 | 2693.0 | 3087 | 299.00 | 1250.0 | 2150.0 | 5200.0 | 436.00 | 4019210216 | 4.073924e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 1249 | 2700.0 | 3086 | 293.50 | 1877.0 | 2700.0 | 3990.0 | 607.00 | 4028397524 | 4.074092e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 64 | 2729.0 | 3082 | 506.00 | 1500.0 | 3400.0 | 5999.0 | 1000.00 | 4013352351 | 4.073636e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
4.0 | 74 | 2919.5 | 3081 | 162.00 | 3500.0 | 4085.0 | 8950.0 | 1537.50 | 4034829995 | 4.072421e+09 | ... | False | False | 0 | 0 | 1 | 1.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 2947 | 2995.5 | 3059 | 30.25 | 3500.0 | 4149.5 | 8000.0 | 1650.75 | 4037041032 | 4.072226e+09 | ... | False | False | 0 | 0 | 1 | 0.25 | 0 | 0 | 0 | 0.00 | |
6.0 | 2809 | 2809.0 | 2809 | 0.00 | 7475.0 | 7475.0 | 7475.0 | 0.00 | 4076500244 | 4.076500e+09 | ... | False | False | 1 | 1 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
Santa Clara | 0.0 | 914 | 1336.0 | 3015 | 1534.00 | 1150.0 | 1825.0 | 2950.0 | 705.00 | 4033822934 | 4.074216e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 |
1.0 | 548 | 1074.0 | 3046 | 790.75 | 925.0 | 1895.0 | 3850.0 | 515.75 | 4013946156 | 4.075598e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 552 | 994.0 | 3079 | 563.00 | 1250.0 | 2395.0 | 5364.0 | 703.75 | 4012126585 | 4.075490e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 566 | 1131.5 | 3075 | 579.50 | 1100.0 | 3190.0 | 5600.0 | 912.50 | 4016438052 | 4.074660e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
4.0 | 575 | 1029.5 | 3072 | 551.00 | 2450.0 | 3400.0 | 9000.0 | 1287.50 | 4014283167 | 4.075958e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 676 | 1617.0 | 3051 | 1438.00 | 3000.0 | 6100.0 | 8000.0 | 2675.00 | 4024547288 | 4.073086e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
Solano | 0.0 | 1402 | 1433.0 | 1676 | 137.00 | 650.0 | 725.0 | 1338.0 | 344.00 | 4045907186 | 4.076329e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 |
1.0 | 1135 | 1659.0 | 2009 | 297.50 | 715.0 | 985.0 | 1289.0 | 347.50 | 4036829865 | 4.076069e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
2.0 | 1130 | 1578.0 | 2070 | 257.75 | 895.0 | 1270.0 | 1875.0 | 451.00 | 4029844077 | 4.075965e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 1010 | 1301.0 | 2072 | 294.50 | 1150.0 | 1695.0 | 2300.0 | 195.00 | 4016793447 | 4.063394e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
4.0 | 1017 | 1609.0 | 2150 | 336.50 | 1250.0 | 1800.0 | 2400.0 | 337.50 | 4016764915 | 4.065565e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 1274 | 1292.0 | 1310 | 18.00 | 1385.0 | 1692.5 | 2000.0 | 307.50 | 4049390958 | 4.058200e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
Sonoma | 0.0 | 2101 | 2239.0 | 2341 | 85.50 | 1050.0 | 1450.0 | 2500.0 | 662.50 | 4065288341 | 4.073230e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 |
1.0 | 1746 | 2228.0 | 2639 | 269.00 | 800.0 | 1345.0 | 1900.0 | 216.00 | 4050263814 | 4.071389e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 1 | 0.00 | |
2.0 | 1831 | 2144.0 | 2723 | 370.50 | 1150.0 | 1500.0 | 2249.0 | 343.75 | 4012162253 | 4.072091e+09 | ... | False | False | 0 | 0 | 1 | 0.00 | 0 | 0 | 1 | 0.00 | |
3.0 | 1832 | 2247.0 | 2720 | 246.00 | 1550.0 | 2000.0 | 3900.0 | 650.00 | 4015136072 | 4.069944e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 1 | 0.00 | |
4.0 | 1738 | 2198.5 | 2515 | 233.50 | 2200.0 | 2600.0 | 5100.0 | 493.75 | 4036541365 | 4.065635e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 | |
5.0 | 1924 | 2177.5 | 2283 | 198.50 | 2500.0 | 2997.5 | 3300.0 | 203.75 | 4071096399 | 4.072884e+09 | ... | False | False | 0 | 0 | 0 | 0.00 | 0 | 0 | 0 | 0.00 |
55 rows × 96 columns
Use the pivot_table syntax to create a table of max prices by county_x by bedrooms
rents.pivot_table(index='county_x', columns='bedrooms', values='price')