true
, false
, null
) or string?null
?Torque -- point type geometry
w
, make a torque category mapChoropleth (any type of geometry) or Bubble (point type)
%pylab inline
import pandas as pd
from urllib.request import urlopen
from scipy.stats import describe as desc
from pandas import DataFrame, Series
import json
debug = False
def getRowInfo(acct,tableName):
return 'http://' + acct + '.cartodb.com/api/v2/sql?q=SELECT%20*%20FROM%20' + tableName + '%20LIMIT%200&format=json'
def mkUrl(acct,tableName,cols='*',limit='',fileType='csv'):
"""
acct: cartodb account name
tableName: name of table in account
geom: geometry type for normalization purposes (optional)
cols: columns of data to pull (optional, default: * (all columns))
limit: number of rows requested (optional, default: all rows)
fileType: format of returned data (optional, default: csv)
"""
stem = 'http://' + acct + '.cartodb.com/api/v2/sql?q='
params = '&format=' + fileType + \
'&filename=' + tableName
query = 'SELECT%20' + cols + '%20FROM%20' + tableName + '%20' + limit
url = stem + query + params
return url
def normalizeCols(cols):
m = {ord(','): '/a.area,'}
updatedCols = cols.translate(m) + '/a.area'
return updatedCols
def mkGeoUrl(acct,tableName):
return 'http://' + acct + '.cartodb.com/api/v2/sql' \
'?q=SELECT%20ST_GeometryType(the_geom)%20geometry_type%20FROM%20' + tableName + '%20WHERE%20the_geom%20IS%20NOT%20NULL%20LIMIT%201' \
'&format=json'
def simplifyType(g):
return {
'st_multipolygon': 'polygon',
'st_polygon': 'polygon',
'st_multilinestring': 'line',
'st_linestring': 'line',
'st_multipoint': 'point',
'st_point': 'point'
}[g.lower()]
Populating the interactive namespace from numpy and matplotlib
WARNING: pylab import has clobbered these variables: ['sum'] `%matplotlib` prevents importing * from pylab and numpy
mkUrl('eschbacher','all_month',cols='mag,depth,place',fileType='json')
'http://eschbacher.cartodb.com/api/v2/sql?q=SELECT%20mag,depth,place%20FROM%20all_month%20&format=json&filename=all_month'
testUrl1 = 'http://eschbacher.cartodb.com/api/v2/sql?q=SELECT%20mag,depth,place%20FROM%20all_month%20&format=json&filename=all_month'
testUrl2 = 'http://eschbacher.cartodb.com/api/v2/sql?q=WITH%20a%20AS%20(SELECT%201e6*ST_Area(the_geom::geography)%20area%20FROM%20all_month)%20SELECT%20mag/a.area,depth/a.area,place/a.area%20FROM%20all_month,a%20&format=csv&filename=all_month'
assert mkUrl('eschbacher','all_month',cols='mag,depth,place',fileType='json') == testUrl1
assert normalizeCols('mag,depth,place') == 'mag/a.area,depth/a.area,place/a.area'
acctName = 'common-data'
tableName = 'mnmappluto'
fileName = 'mnbuildings'
# Grab column information
url = getRowInfo(acctName,tableName)
if debug: print(url)
response = urlopen(url).readall().decode('utf-8')
column_data = json.loads(response)
# Filter out the string and boolean columns
str_columns = []
for c in column_data['fields']:
if ( column_data['fields'][c]['type'] in ('string','boolean') ):
str_columns.append(c)
print("Columns (" + str(len(str_columns)) + ") of type 'string' or 'boolean':\n",str_columns)
Columns (40) of type 'string' or 'boolean': ['firecomp', 'ownername', 'taxmap', 'zonedist3', 'builtcode', 'schooldist', 'spdist2', 'ltdheight', 'lottype', 'areasource', 'appdate', 'splitzone', 'ownertype', 'address', 'borough', 'overlay2', 'irrlotcode', 'bsmtcode', 'landmark', 'overlay1', 'cb2010', 'zonedist1', 'ct2010', 'plutomapid', 'bldgclass', 'zmcode', 'zonedist2', 'allzoning2', 'proxcode', 'zonemap', 'histdist', 'edesignum', 'zonedist4', 'version', 'tract2010', 'allzoning1', 'spdist1', 'sanborn', 'landuse', 'ext']
# Create API request
a = mkUrl(acctName,tableName,cols=','.join(str_columns))
print(a)
http://common-data.cartodb.com/api/v2/sql?q=SELECT%20firecomp,ownername,taxmap,zonedist3,builtcode,schooldist,spdist2,ltdheight,lottype,areasource,appdate,splitzone,ownertype,address,borough,overlay2,irrlotcode,bsmtcode,landmark,overlay1,cb2010,zonedist1,ct2010,plutomapid,bldgclass,zmcode,zonedist2,allzoning2,proxcode,zonemap,histdist,edesignum,zonedist4,version,tract2010,allzoning1,spdist1,sanborn,landuse,ext%20FROM%20mnmappluto%20&format=csv&filename=mnmappluto
# Retrieve only approved columns
ad = pd.read_csv(a)
ad.head()
firecomp | ownername | taxmap | zonedist3 | builtcode | schooldist | spdist2 | ltdheight | lottype | areasource | ... | histdist | edesignum | zonedist4 | version | tract2010 | allzoning1 | spdist1 | sanborn | landuse | ext | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | E095 | PARKS AND RECREATION | 10803 | NaN | E | 6 | NaN | NaN | 5 | 7 | ... | NaN | NaN | NaN | 14v1 | 287 | PARK | NaN | 112 036 | 9 | NaN |
1 | E008 | NEW 56-79 IG ASSOCIAT | 10505 | NaN | NaN | 2 | NaN | NaN | 5 | 7 | ... | NaN | NaN | NaN | 14v1 | 11203 | C5-2/MID | MID | 106E044 | 4 | NaN |
2 | L005 | ANDROMACHE CAPSIS | 10210 | NaN | E | 2 | NaN | NaN | 5 | 7 | ... | Greenwich Village | NaN | NaN | 14v1 | 73 | R6 | NaN | 103 015 | 2 | E |
3 | E074 | WILKEY, WAYNE C. | 10408 | NaN | E | 3 | NaN | NaN | 5 | 7 | ... | Upper West Side/Central Park West | NaN | NaN | 14v1 | 165 | R8B | NaN | 107S012 | 1 | NaN |
4 | E053 | 169-175 OPERATING LLC | 10603 | NaN | NaN | 4 | NaN | NaN | 5 | 7 | ... | NaN | NaN | NaN | 14v1 | 166 | C1-5/R7-2 | NaN | 108S046 | 2 | NaN |
5 rows × 40 columns
metrics = DataFrame({'uniques': [len(ad[c].unique()) for c in ad],
'perc_nulls': [ad[c].isnull().sum() / len(ad) for c in ad]
},
index=ad.columns)
metrics.sort('uniques')
perc_nulls | uniques | |
---|---|---|
version | 0.000000 | 1 |
borough | 0.000000 | 1 |
builtcode | 0.461226 | 2 |
zmcode | 0.975402 | 2 |
plutomapid | 0.000000 | 2 |
ltdheight | 0.979622 | 3 |
irrlotcode | 0.001516 | 3 |
splitzone | 0.001516 | 3 |
ext | 0.800233 | 4 |
proxcode | 0.001516 | 5 |
areasource | 0.000000 | 5 |
overlay2 | 0.999324 | 5 |
zonedist4 | 0.999883 | 6 |
ownertype | 0.584425 | 6 |
bsmtcode | 0.001516 | 7 |
overlay1 | 0.852600 | 8 |
schooldist | 0.002425 | 8 |
spdist2 | 0.998881 | 10 |
lottype | 0.001516 | 11 |
landuse | 0.003987 | 12 |
zonemap | 0.000000 | 20 |
spdist1 | 0.814269 | 27 |
zonedist3 | 0.997832 | 39 |
histdist | 0.747284 | 73 |
firecomp | 0.003357 | 77 |
taxmap | 0.001212 | 79 |
zonedist2 | 0.913896 | 85 |
zonedist1 | 0.001329 | 96 |
allzoning2 | 0.913896 | 119 |
edesignum | 0.960084 | 126 |
cb2010 | 0.008837 | 141 |
bldgclass | 0.001516 | 196 |
ct2010 | 0.003987 | 289 |
tract2010 | 0.000000 | 289 |
allzoning1 | 0.001329 | 325 |
landmark | 0.979855 | 767 |
sanborn | 0.001212 | 1077 |
appdate | 0.898088 | 1396 |
ownername | 0.034111 | 33355 |
address | 0.001539 | 42276 |
Making the cut means:
nan
\`null`s is smaller than 80%decision = (metrics['uniques'] >= 2) & (metrics['uniques'] <= 10) & (metrics['perc_nulls'] < 0.8)
winners = [c for c in metrics[decision].index]
metrics[decision].sort('uniques')
perc_nulls | uniques | |
---|---|---|
builtcode | 0.461226 | 2 |
plutomapid | 0.000000 | 2 |
splitzone | 0.001516 | 3 |
irrlotcode | 0.001516 | 3 |
areasource | 0.000000 | 5 |
proxcode | 0.001516 | 5 |
ownertype | 0.584425 | 6 |
bsmtcode | 0.001516 | 7 |
schooldist | 0.002425 | 8 |
print(sorted(winners))
['areasource', 'bsmtcode', 'builtcode', 'irrlotcode', 'ownertype', 'plutomapid', 'proxcode', 'schooldist', 'splitzone']
These Maps are here:
plutomapid
: 2 uniques and no nulls.Solution: We should next filter out data that is very strongly in one column. 95% threshold?
lottype
: failed because of 11 uniques -- visually interesting and the rest of the data can be accessed through infowindowsSolution: See if a large proportion is in the first seven modes instead of too strongly weighting the number of unique entries. Maybe 85-90% in the first seven modes?
ad_win = ad[winners]
maxval = 0
for c in ad:
temp = 100.0 * ad[c].value_counts(dropna=False) / len(ad[c])
if len(temp) < 10:
maxval = len(temp)
else:
maxval = 10
sval = sum(temp.head(maxval))
print(c,sval,'in',maxval,'bins')
print(temp)
# print(100.0 * temp / len(ad[c]))
firecomp 23.336442061086505 in 10 bins E022 2.772208 E074 2.518069 L011 2.483096 L025 2.452786 E044 2.392166 L013 2.231289 E039 2.217300 L030 2.212637 E054 2.098391 L026 1.958498 L005 1.949172 E076 1.918862 E028 1.918862 Q018 1.918862 L022 1.834927 E053 1.806948 L040 1.797622 L012 1.718349 L014 1.713686 E008 1.655398 E024 1.641408 L028 1.552810 E058 1.543483 L035 1.527163 L016 1.527163 L018 1.527163 L023 1.517836 L007 1.445558 L034 1.424574 E005 1.417580 ... L006 1.126137 E003 1.095827 E015 1.091163 L009 1.065516 E069 1.044533 E021 1.016554 E037 0.990907 E023 0.983912 E093 0.958265 L002 0.958265 E014 0.944276 L008 0.939613 E095 0.890651 L024 0.841688 L021 0.825367 L004 0.816041 L036 0.799720 E065 0.757752 E007 0.729774 E026 0.634180 L001 0.620191 E006 0.536256 E081 0.510609 L010 0.384705 E034 0.373047 NaN 0.335743 L015 0.303101 E010 0.265796 E004 0.263465 E260 0.034973 dtype: float64 ownername 7.318722312893448 in 10 bins NaN 3.411052 PARKS AND RECREATION 0.965260 HOUSING PRESERVATION 0.690138 OWNER / AGENT 0.487293 NEW YORK CITY HOUSING 0.417347 NYC DEPARTMENT OF EDU 0.382374 TRUSTEES OF COLUMBIA 0.382374 DEPT OF SMALL BUSINES 0.207508 D C A S 0.205176 NEW YORK UNIVERSITY 0.170203 CITY OF NEW YORK 0.139893 FIRE DEPARTMENT 0.109583 WILLIAM GOTTLIEB REAL 0.093262 DEPT OF TRANSPORTATIO 0.093262 POLICE DEPARTMENT 0.086267 NYC TRANSIT AUTHORITY 0.076941 THE TRUSTEES OF COLUM 0.074609 ROYAL CHARTER PROPERT 0.069946 PORT AUTHORITY OF NY 0.065283 YESHIVA UNIVERSITY 0.065283 JOINTLY OWNED PLAYGRO 0.065283 NEW YORK PUBLIC LIBRA 0.062952 SANITATION 0.062952 THE CITY OF NEW YORK 0.062952 NEW YORK CITY HSG AUT 0.058289 THE MANHATTAN LAND TR 0.055957 MSMC RESIDENTIAL REAL 0.055957 NYC HOUSING AUTHORITY 0.053626 DEPT OF GENERAL SERVI 0.051294 PORT OF NY AUTH 0.048962 ... ST BRIDGETS RC CHURCH 0.002332 729 WEST 186TH STREET 0.002332 PSA LESAGE L.L.C. 0.002332 MARIS JAMES 0.002332 ARCHIVE PRESERVATIONC 0.002332 223 FIRST AVE REALTY 0.002332 1133 5 AVE CORP 0.002332 TAI FOOK CORP 0.002332 CORFU REALTY LLC 0.002332 STREETER, CHARLES 0.002332 BICICI, GONUL 0.002332 SEICHO-NO-IE INC 0.002332 DAVDISON MONTEZ C 0.002332 153 REALTY CORP 0.002332 VORNADO LF 50 WEST 57 0.002332 425 REALTY CO 0.002332 ROBERT A HELLERMAN 0.002332 SPRICE INC 0.002332 307 EAST 77 LLC 0.002332 APOLLO REALTY CORP 0.002332 NEW YRK STUDIO SCHLET 0.002332 12 EAST 78TH STREET L 0.002332 MCBRIDE, TINA 0.002332 RIVERA, IVETTE K 0.002332 BWLOW, LLC 0.002332 GRAMERCY FIVE 0.002332 BRIDGE TOWER PLACE CO 0.002332 LOCAL 798 REALTY CORP 0.002332 BANNISTER TERRANCE 0.002332 TWO SPRING ASSOCIATES 0.002332 dtype: float64 taxmap 26.56096992305899 in 10 bins 10206 3.177897 10710 3.161576 10306 2.905106 10205 2.867801 10105 2.595010 10704 2.429471 10803 2.378177 10514 2.352530 10207 2.350198 10409 2.343204 10406 2.296573 10603 2.210305 10703 2.154348 10408 2.112381 10304 2.035439 10307 2.021450 10607 1.956167 10210 1.930520 10107 1.900210 10801 1.888552 10804 1.830263 10509 1.767312 10508 1.702028 10208 1.674050 10203 1.669387 10709 1.604104 10516 1.541152 10308 1.529494 10705 1.517836 10506 1.471205 ... 10802 0.832362 10201 0.825367 10405 0.799720 10309 0.778736 10701 0.774073 10507 0.769410 10104 0.729774 10604 0.708790 10101 0.669154 10202 0.638844 10503 0.636512 10611 0.608533 10609 0.589881 10402 0.587550 10301 0.554908 10606 0.550245 10610 0.543250 10501 0.529261 10401 0.454651 10102 0.405689 10103 0.317090 10602 0.298438 10505 0.291443 10504 0.275122 10502 0.219165 10106 0.128235 1 0.123572 NaN 0.121240 10601 0.116577 10310 0.100256 dtype: float64 zonedist3 99.89974352996036 in 10 bins NaN 99.783166 R8B 0.032642 C6-6 0.016321 R8 0.011658 C6-1 0.009326 R10 0.009326 C2-8 0.009326 R9X 0.009326 R7-2 0.009326 C1-9 0.009326 R6 0.006995 C5-2.5 0.006995 C6-2A 0.006995 C1-6 0.004663 C1-9A 0.004663 R9A 0.004663 R9 0.004663 R8A 0.004663 C1-7 0.004663 C1-6A 0.004663 C5-2 0.004663 R10A 0.002332 M1-5M 0.002332 C6-4X 0.002332 C6-4A 0.002332 C6-6.5 0.002332 C6-3X 0.002332 C6-3A 0.002332 C1-8X 0.002332 C6-4 0.002332 C6-4M 0.002332 C6-5 0.002332 M1-5A 0.002332 C4-7 0.002332 R7B 0.002332 C5-1 0.002332 C8-3 0.002332 R6A 0.002332 R7A 0.002332 dtype: float64 builtcode 100.0 in 2 bins E 53.877361 NaN 46.122639 dtype: float64 schooldist 100.00000000000001 in 8 bins 2 52.315225 3 14.411285 6 10.074609 5 9.995337 1 7.211471 4 5.238983 10 0.510609 NaN 0.242481 dtype: float64 spdist2 99.99999999999999 in 10 bins NaN 99.888086 MID 0.039636 TA 0.023315 PI 0.018652 MP 0.016321 125 0.004663 MX-15 0.002332 HY 0.002332 EC-3 0.002332 GC 0.002332 dtype: float64 ltdheight 100.00000000000001 in 3 bins NaN 97.962229 LH-1A 1.897878 LH-1 0.139893 dtype: float64 lottype 99.9906738167405 in 10 bins 5 76.675216 3 17.209140 0 2.839823 4 1.841921 1 0.594544 2 0.317090 9 0.163208 6 0.153882 NaN 0.151550 8 0.044299 7 0.009326 dtype: float64 areasource 100.0 in 5 bins 7 97.094894 4 2.692935 0 0.153882 1 0.053626 2 0.004663 dtype: float64 appdate 90.4336675215668 in 10 bins NaN 89.808813 12/25/1986 0.102588 03/15/2001 0.100256 01/07/1987 0.069946 01/03/1995 0.067615 11/15/1990 0.065283 11/30/1987 0.060620 12/31/1987 0.053626 01/08/2003 0.053626 05/11/2004 0.051294 12/31/1988 0.051294 07/11/2013 0.048962 10/07/1988 0.046631 07/22/1998 0.046631 08/18/1998 0.046631 02/08/1988 0.044299 02/02/2001 0.044299 01/08/2008 0.041968 01/06/1986 0.039636 04/20/1987 0.039636 11/01/1988 0.032642 03/21/2000 0.032642 11/12/2008 0.032642 12/08/1988 0.032642 12/29/2006 0.032642 04/11/2000 0.032642 06/08/2007 0.032642 12/20/1999 0.030310 12/31/1985 0.030310 07/22/1989 0.030310 ... 02/07/1996 0.002332 03/07/2006 0.002332 03/05/2009 0.002332 08/03/2012 0.002332 04/19/2012 0.002332 10/28/2008 0.002332 09/24/2004 0.002332 01/27/2010 0.002332 11/12/1986 0.002332 04/14/1993 0.002332 09/17/2002 0.002332 02/20/1998 0.002332 02/05/1996 0.002332 11/22/1995 0.002332 11/09/1999 0.002332 10/07/2011 0.002332 09/20/2011 0.002332 09/05/2012 0.002332 09/06/2011 0.002332 03/26/2012 0.002332 02/25/2013 0.002332 12/12/2008 0.002332 03/18/1988 0.002332 08/14/2002 0.002332 07/23/2009 0.002332 03/31/2005 0.002332 09/07/2011 0.002332 04/30/2013 0.002332 11/05/1993 0.002332 07/29/2013 0.002332 dtype: float64 splitzone 100.0 in 3 bins N 91.168104 Y 8.680345 NaN 0.151550 dtype: float64 ownertype 100.00000000000001 in 6 bins NaN 58.442527 P 33.947307 C 3.359758 X 3.275822 O 0.939613 M 0.034973 dtype: float64 address 0.5362555374213103 in 10 bins NaN 0.153882 F D R DRIVE 0.062952 MARGINAL STREET 0.062952 RIVERSIDE DRIVE 0.048962 ST NICHOLAS AVENUE 0.041968 2 AVENUE 0.037305 BROADWAY 0.034973 12 AVENUE 0.032642 HARLEM RIVER DRIVE 0.030310 7 AVENUE 0.030310 WEST 138 STREET 0.023315 EAST 8 STREET 0.023315 1 AVENUE 0.023315 EAST 5 STREET 0.023315 EAST 4 STREET 0.023315 8 AVENUE 0.020984 9 AVENUE 0.018652 AMSTERDAM AVENUE 0.018652 WEST 126 STREET 0.018652 EAST 6 STREET 0.018652 EAST 112 STREET 0.016321 RIVERVIEW TERRACE 0.016321 FT WASHINGTON AVENUE 0.016321 WEST END AVENUE 0.013989 CABRINI BOULEVARD 0.013989 WEST 135 STREET 0.013989 AVENUE C 0.013989 10 AVENUE 0.013989 3 AVENUE 0.011658 SOUTH STREET 0.011658 ... 107 EAST 7 STREET 0.002332 262 WEST 136 STREET 0.002332 132 WEST 87 STREET 0.002332 764 10 AVENUE 0.002332 419 EAST 84 STREET 0.002332 26 BEDFORD STREET 0.002332 46 WEST 111 STREET 0.002332 278 MOTT STREET 0.002332 8 WEST 38 STREET 0.002332 157 WEST 72 STREET 0.002332 47 ESSEX STREET 0.002332 259 WEST 95 STREET 0.002332 129 EAST 56 STREET 0.002332 4 ALLEN STREET 0.002332 617 WEST 168 STREET 0.002332 79 LAIGHT STREET 0.002332 23 EAST 92 STREET 0.002332 48 EAST 74 STREET 0.002332 346 LEXINGTON AVENUE 0.002332 29 EAST 2 STREET 0.002332 747 9 AVENUE 0.002332 117 WEST 96 STREET 0.002332 177 EAST HOUSTON STREET 0.002332 7 ST LUKES PLACE 0.002332 360 WEST 53 STREET 0.002332 14 EAST 77 STREET 0.002332 863 9 AVENUE 0.002332 112 WEST 78 STREET 0.002332 125 WEST 116 STREET 0.002332 108 1 AVENUE 0.002332 dtype: float64 borough 100.0 in 1 bins MN 100 dtype: float64 overlay2 100.0 in 5 bins NaN 99.932385 C2-5 0.025647 C1-5 0.023315 C2-4 0.016321 C1-4 0.002332 dtype: float64 irrlotcode 100.0 in 3 bins N 76.495687 Y 23.352763 NaN 0.151550 dtype: float64 bsmtcode 100.0 in 7 bins 2 52.830497 5 25.483796 1 16.775472 0 4.593145 NaN 0.151550 4 0.107251 3 0.058289 dtype: float64 landmark 98.17906271858241 in 10 bins NaN 97.985544 ASTOR ROW 0.062952 CITY AND SUBURBAN HOMES COMPANY 0.039636 437-459 WEST 24TH STREET HOUSES 0.025647 25-41 HARRISON STREET 0.018652 ROCKEFELLER CENTER VARIOUS SITES 0.013989 SIDEWALK CLOCK 0.009326 LA GRANGE TERRACE 0.009326 GEORGE F. BAKER JR., HOUSE COMPLEX 0.006995 COLUMBIA UNIV / VARIOUS SITES 0.006995 CITY COLLEGE VARIOUS SITES 0.006995 ST. CECILIA'S CH & REGINA ANGELORUM 0.004663 CHURCH OF THE INCARNATION 0.004663 CHURCH OF THE HOLY TRINITY COMPLEX 0.004663 EDWARD RIDLEY & SONS DEPARTMENT STR 0.004663 HENRY STREET SETTLEMENT 0.004663 AHRENS BLDG 0.004663 NY DAILY NEWS BLDG 0.004663 NEW YORK LIFE INSURANCE CO BUILDING 0.004663 TRINITY SCHOOL 0.004663 VILLARD HOUSES 0.004663 EAST RIVER HOUSES 0.004663 RUSSELL SAGE FOUNDATN BLDG & ANNEX 0.004663 UNIVERSITY VILLAGE 0.004663 SCRIBNER BUILDING 0.004663 UNITED STATES CUSTOM HOUSE 0.004663 JUDSON MEM CHURCH, TOWER, AND HALL 0.004663 GRACE CHURCH (EPISCOPAL) COMPLEX 0.004663 NEW YORK STOCK EXCHANGE 0.004663 BEAUX-ARTS APARTMENT 0.004663 ... 157 EAST 78 STREET HOUSE 0.002332 EMPIRE STATE BUILDING 0.002332 CHELSEA APARTMENTS 0.002332 502 CANAL STREET BLDG 0.002332 BANK OF THE METROPOLIS 0.002332 NY FREE CIRC LIB, OTTENDORFER BR 0.002332 FEDERAL RESERVE BANK OF NEW YORK 0.002332 PANHELLIC TOWER 0.002332 12 WEST 129TH STREET HOUSE 0.002332 63 NASSAU ST BLDG 0.002332 FORWARD BUILDING 0.002332 FREE PUBLIC BATHS & GYM AT E 54 ST 0.002332 GEORGE S. BOWDOIN STABLE 0.002332 SHUBERT THEATER 0.002332 ST. JEAN BAPTISTE R.C. CHURCH 0.002332 103 RIVERSIDE DRIVE HOUSE 0.002332 CBS BUILDING 0.002332 HORN & HARDART AUTOMAT 0.002332 RIVERSIDE PARK AND DRIVE 0.002332 SEAGRAM BUILDING 0.002332 HOTEL MANSFIELD 0.002332 ST. THOMAS CHURCH & PARISH HOUSE 0.002332 CENTURY APARTMENTS 0.002332 335 WEST 85 STREET HOUSE 0.002332 240 CENTRAL PARK SOUTH APARTMENTS 0.002332 PARAMOUNT HOTEL 0.002332 SAMUEL TREDWELL SKIDMORE HOUSE 0.002332 CONSOLIDATED EDISON BUILDING 0.002332 GENERAL ULYSSES S. GRANT TOMB 0.002332 BROWN BUILDING 0.002332 dtype: float64 overlay1 99.99999999999999 in 8 bins NaN 85.259967 C1-5 6.267195 C1-4 4.583819 C2-5 2.464444 C2-4 1.382607 C1-3 0.037305 C2-1 0.002332 C1-2 0.002332 dtype: float64 cb2010 63.527628817906276 in 10 bins 1000 9.580322 2000 9.065050 3000 7.276754 1001 6.852413 2001 6.717183 4000 6.001399 3001 5.376545 5000 4.665423 1002 4.404290 4001 3.588249 6000 2.905106 2002 2.825834 1003 2.357193 3002 2.119375 5001 1.986477 1005 1.639077 1004 1.620424 2003 1.592446 7000 1.482863 3003 1.457216 6001 1.345302 4002 1.294008 NaN 0.883656 1006 0.848683 2004 0.713453 1007 0.680811 1008 0.634180 1009 0.599207 4003 0.573560 2005 0.552576 ... 3017 0.009326 2017 0.009326 2021 0.009326 3012 0.006995 1023 0.006995 6017 0.006995 2020 0.006995 1031 0.004663 8002 0.004663 4022 0.004663 6018 0.004663 1024 0.004663 2018 0.004663 6019 0.002332 2 0.002332 5011 0.002332 1027 0.002332 1036 0.002332 1047 0.002332 6016 0.002332 5010 0.002332 2019 0.002332 6010 0.002332 3021 0.002332 3023 0.002332 4014 0.002332 4019 0.002332 5007 0.002332 5009 0.002332 8003 0.002332 dtype: float64 zonedist1 64.70739100023314 in 10 bins R7-2 19.237585 R8B 16.971322 R7A 6.208907 R8 6.180928 C6-2A 3.084635 R6 3.068314 R8A 2.609000 R7B 2.553043 R6A 2.436465 R10 2.357193 C1-9 2.242947 R10A 1.979482 M1-6 1.678713 C6-4 1.373280 C6-2 1.359291 M1-5B 1.349965 C5-3 1.314992 C6-1 1.128468 C1-6 1.065516 C4-4A 0.953602 C6-1G 0.916298 C5-2 0.911634 C2-8 0.909303 C1-6A 0.890651 C1-8X 0.708790 C5-5 0.701795 C5-1 0.678480 C6-3A 0.666822 M1-5A 0.624854 PARK 0.620191 ... NaN 0.132898 C6-5.5 0.128235 R9X 0.125903 C4-5A 0.123572 M1-4 0.118909 M1-6D 0.114246 C6-7 0.109583 C6-9 0.100256 BPC 0.100256 M1-2 0.100256 M1-5/R7-2 0.079273 C2-6A 0.069946 C2-7 0.069946 C4-6 0.060620 M3-1 0.058289 R7D 0.058289 C6-6.5 0.055957 C4-5X 0.051294 R10H 0.039636 M3-2 0.032642 C5-1A 0.030310 C1-8 0.027979 R7-1 0.027979 M2-1 0.025647 M1-5/R7X 0.018652 C4-2F 0.013989 M2-2 0.011658 R3-2 0.006995 R8X 0.006995 ZNA 0.002332 dtype: float64 ct2010 9.104686407087899 in 10 bins 38.00 1.058522 73.00 1.030543 228.00 0.913966 208.00 0.899977 67.00 0.892982 226.00 0.881324 220.00 0.871998 33.00 0.862672 41.00 0.853346 77.00 0.839356 173.00 0.825367 227.00 0.818373 49.00 0.804383 34.00 0.788062 169.00 0.774073 127.00 0.769410 161.00 0.760084 188.00 0.760084 138.00 0.757752 71.00 0.755421 18.00 0.755421 157.00 0.750758 89.00 0.743763 63.00 0.734437 98.00 0.732105 16.00 0.732105 200.00 0.720448 99.00 0.713453 40.00 0.704127 122.00 0.694801 ... 317.03 0.046631 156.02 0.046631 62.00 0.044299 267.00 0.041968 86.01 0.039636 214.00 0.037305 223.02 0.037305 147.00 0.034973 238.01 0.030310 217.03 0.027979 14.01 0.025647 197.01 0.020984 210.00 0.016321 10.02 0.016321 10.01 0.013989 44.00 0.013989 243.02 0.013989 297.00 0.013989 24.00 0.011658 311.00 0.011658 240.00 0.006995 20.00 0.006995 25.00 0.006995 86.02 0.006995 238.02 0.004663 60.00 0.004663 319.00 0.004663 1.00 0.004663 143.00 0.002332 5.00 0.002332 dtype: float64 plutomapid 100.0 in 2 bins 1 99.84845 3 0.15155 dtype: float64 bldgclass 47.710422009792495 in 10 bins C7 10.538587 C1 5.954768 C4 5.791560 C5 4.968524 D4 4.383306 C0 3.499650 C6 3.413383 RM 3.397062 K9 3.044999 A4 2.718582 O9 2.606668 V1 2.508743 D1 2.403824 C3 1.846584 B1 1.806948 D7 1.790627 D9 1.669387 C2 1.454885 M1 1.450221 S9 1.433901 B9 1.368617 S5 1.242714 S3 1.224062 D6 1.191420 K2 1.123805 K1 1.100490 A9 1.077174 L8 1.025880 D3 0.995570 O3 0.941945 ... Y9 0.009326 Z6 0.009326 E3 0.009326 J2 0.009326 RZ 0.009326 V8 0.009326 RD 0.009326 F1 0.006995 R3 0.006995 H7 0.006995 Q3 0.006995 N4 0.006995 F2 0.006995 T1 0.004663 G3 0.004663 A3 0.004663 G0 0.004663 RW 0.004663 K6 0.004663 E4 0.004663 Z2 0.004663 I2 0.004663 Z7 0.004663 N1 0.002332 Q7 0.002332 Q8 0.002332 A0 0.002332 F4 0.002332 I3 0.002332 RG 0.002332 dtype: float64 zmcode 100.0 in 2 bins NaN 97.540219 Y 2.459781 dtype: float64 zonedist2 96.20191186756819 in 10 bins NaN 91.389601 R8B 1.993472 R7A 0.473304 R7-2 0.445325 R6 0.415015 R8 0.401026 R10 0.303101 R8A 0.296106 C6-2A 0.249475 C5-2.5 0.235486 C1-9 0.219165 C5-3 0.209839 R10A 0.207508 C6-1 0.188855 C6-6 0.158545 C1-6 0.146887 C6-4 0.132898 M1-6 0.123572 C6-4A 0.111914 C5-1 0.111914 C6-2 0.107251 C1-8X 0.104920 R7B 0.100256 C2-6 0.095593 C2-8 0.093262 C6-3A 0.079273 C5-2 0.074609 C6-4.5 0.072278 C1-6A 0.067615 R6A 0.067615 ... M1-5B 0.023315 R7X 0.023315 M1-4 0.023315 R9X 0.020984 C1-8A 0.020984 C8-4 0.020984 R10H 0.020984 C2-7A 0.018652 C6-7T 0.016321 C6-3X 0.016321 R8X 0.016321 C5-P 0.013989 C1-7A 0.011658 M1-5/R7-2 0.009326 C1-8 0.009326 M1-2 0.009326 C5-1A 0.009326 M1-6D 0.006995 C2-7 0.006995 R5 0.004663 C3 0.004663 C4-2F 0.004663 C6-4X 0.004663 R7D 0.004663 M3-1 0.004663 R7-1 0.002332 R6B 0.002332 M2-1 0.002332 M1-5/R7X 0.002332 C5-5 0.002332 dtype: float64 allzoning2 96.04569829797155 in 10 bins NaN 91.389601 R8B 1.949172 R7A 0.435999 R7-2 0.433668 R6 0.415015 R8 0.391700 R10 0.291443 R8A 0.256470 C6-2A 0.249475 C5-2.5 0.233155 C1-9 0.216834 C5-3 0.209839 C6-1 0.188855 R10A 0.184192 C6-6 0.153882 C1-6 0.146887 C6-4 0.125903 M1-6 0.121240 C6-4A 0.111914 C1-8X 0.104920 C6-2 0.102588 R7B 0.100256 C5-1 0.097925 C2-6 0.095593 C2-8 0.093262 C6-3A 0.079273 C5-2 0.074609 C6-4.5 0.069946 C1-6A 0.067615 C6-1G 0.065283 ... C1-4/R8 0.004663 C4-2F 0.004663 C6-4/MID 0.004663 C3 0.004663 C1-4/R7-2 0.004663 R8B/PI 0.004663 C6-6/MID 0.004663 M3-1 0.004663 R6B 0.002332 C2-5/R8 0.002332 C1-5/R9 0.002332 C2-4/R7D 0.002332 C1-9/TA 0.002332 C1-4/R6A 0.002332 C1-5/R8 0.002332 C2-4/R8A 0.002332 R7-1 0.002332 C4-6A/EC-3 0.002332 R10/MP 0.002332 R7D 0.002332 C5-5 0.002332 M1-5/R7X 0.002332 C6-4/HY 0.002332 M1-6/GC 0.002332 C5-2.5/MID 0.002332 R8B/TA 0.002332 M1-5/R7-2/MX-15 0.002332 C2-4/R7A 0.002332 C6-4.5/TA 0.002332 M2-1 0.002332 dtype: float64 proxcode 100.00000000000001 in 5 bins 3 57.976218 0 21.186757 2 16.094661 1 4.590814 NaN 0.151550 dtype: float64 zonemap 91.8489158311961 in 10 bins 12c 14.868268 8d 14.227093 8c 13.844719 6a 12.096060 6b 9.279552 5d 7.666123 12a 6.724178 9a 5.134064 3b 4.740033 3a 3.268827 8b 2.569363 12d 1.942178 12b 1.734670 5c 1.144789 1d 0.494288 1b 0.212171 9b 0.020984 8a 0.016321 3c 0.011658 16a 0.004663 dtype: float64 histdist 90.11890883655863 in 10 bins NaN 74.728375 Greenwich Village 4.390301 Upper West Side/Central Park West 4.341338 Upper East Side 2.259268 SoHo-Cast Iron 1.039869 Ladies' Mile 0.823036 East Village/ Lower East Side 0.706458 Expanded Carnegie Hill 0.624854 Riverside-West End 0.606202 Mount Morris Park 0.599207 Greenwich Village Historic Dist. Ext. II 0.508277 West End-Collegiate Historic Dist. Ext. 0.496619 Hamilton Heights 0.473304 South Village 0.468641 Hamilton Heights/Sugar Hill 0.435999 Riverside-West End Historic Dist. Ext. I 0.433668 Tribeca East 0.429004 Tribeca West 0.415015 St. Nicholas 0.363721 Chelsea 0.340406 West End - Collegiate 0.335743 SoHo-Cast Iron Historic District Ext. 0.310096 Metropolitan Museum 0.305433 NoHo 0.249475 Chelsea Historic District Extension 0.249475 Hamilton Heights/Sugar Hill Northwest 0.249475 Madison Square North 0.219165 Carnegie Hill 0.214502 Treadwell Farm 0.177197 Murray Hill 0.177197 ... Greenwich Village Historic Dist. Ext. 0.086267 Riverside Drive-West 80th-81 Street 0.083936 West 71st Street 0.079273 Hamilton Heights/Sugar Hill Northeast 0.074609 St. Mark's 0.072278 Riverside - West 105th Street 0.069946 East 10th Street 0.060620 West Chelsea 0.055957 Tribeca South Extension 0.053626 Henderson Place 0.053626 Audubon Park 0.048962 MacDougal-Sullivan Gardens 0.048962 Tudor City 0.048962 Turtle Bay Gardens 0.044299 Hamilton Heights/Sugar Hill District Ext 0.034973 Stone Street 0.034973 South Street Seaport Extension 0.027979 Murray Hill Historic District Extension 0.027979 Fraunces Tavern Block 0.025647 Lamartine Place 0.025647 African Burial Ground & The Commons 0.025647 Sniffen Court 0.023315 East 17th Street/Irving Place 0.023315 Weehawken Street 0.023315 Hardenbergh / Rhinelander 0.016321 Audubon Terrace 0.016321 St. Mark's Extension 0.004663 Gramercy Park Extension 0.002332 Ellis Island 0.002332 Governors Island 0.002332 dtype: float64 edesignum 98.878526463045 in 10 bins NaN 96.008394 E-216 1.074843 E-137 0.596876 E-201 0.279785 E-142 0.261133 E-257 0.165540 E-288 0.139893 E-284 0.121240 E-268 0.118909 E-114 0.111914 E-116 0.083936 E-182 0.069946 E-92 0.060620 E-54 0.055957 E-276 0.053626 E-188 0.044299 E-208 0.037305 R-204 0.034973 E-50 0.032642 R-142 0.027979 E-4 0.025647 E-32 0.025647 E-131 0.020984 E-66 0.020984 E-204 0.020984 E-162 0.020984 E-120 0.018652 E-254 0.016321 R-145 0.016321 E-62 0.016321 ... R-9 0.002332 R-43 0.002332 R-38 0.002332 R-59 0.002332 R-76 0.002332 E-88 0.002332 R-77 0.002332 R-22 0.002332 R-141 0.002332 E-325 0.002332 R-137 0.002332 E-87 0.002332 E-256 0.002332 E-333 0.002332 R-91 0.002332 E-172 0.002332 R-201 0.002332 R-124 0.002332 R-143 0.002332 E-78 0.002332 R-5 0.002332 R-87 0.002332 R-199 0.002332 R-158 0.002332 R-178 0.002332 R-11 0.002332 R-61 0.002332 E-312 0.002332 E-34 0.002332 R-36 0.002332 dtype: float64 zonedist4 99.99999999999997 in 6 bins NaN 99.988342 C6-7 0.002332 C1-8A 0.002332 C1-9 0.002332 R8B 0.002332 R8A 0.002332 dtype: float64 version 100.0 in 1 bins 14v1 100 dtype: float64 tract2010 9.104686407087899 in 10 bins 38 1.058522 73 1.030543 228 0.913966 208 0.899977 67 0.892982 226 0.881324 220 0.871998 33 0.862672 41 0.853346 77 0.839356 173 0.825367 227 0.818373 49 0.804383 34 0.788062 169 0.774073 127 0.769410 161 0.760084 188 0.760084 138 0.757752 18 0.755421 71 0.755421 157 0.750758 89 0.743763 63 0.734437 16 0.732105 98 0.732105 200 0.720448 99 0.713453 40 0.704127 122 0.694801 ... 15602 0.046631 31703 0.046631 62 0.044299 267 0.041968 8601 0.039636 214 0.037305 22302 0.037305 147 0.034973 23801 0.030310 21703 0.027979 1401 0.025647 19701 0.020984 1002 0.016321 210 0.016321 24302 0.013989 297 0.013989 1001 0.013989 44 0.013989 24 0.011658 311 0.011658 8602 0.006995 25 0.006995 240 0.006995 20 0.006995 60 0.004663 1 0.004663 23802 0.004663 319 0.004663 143 0.002332 5 0.002332 dtype: float64 allzoning1 48.5287945908137 in 10 bins R8B 14.304034 R7-2 13.658195 R7A 3.709489 R8 3.163908 C1-4/R7-2 2.916764 R6 2.907438 R7B 2.287246 R6A 1.932851 C6-2A 1.839590 C1-9 1.809280 R10A 1.517836 R8B/LH-1A 1.489858 C1-5/R7-2 1.473537 R8/CL 1.359291 M1-5B 1.349965 C5-3/MID 1.175099 C1-5/R7A 1.105153 C1-6 1.065516 C6-2A/TMU 1.049196 M1-6 1.021217 C2-4/R7-2 1.009559 C1-5/R8A 0.855677 R10 0.844020 C5-2 0.839356 C1-6A 0.839356 C4-4A 0.825367 C6-1 0.813709 C6-1G 0.748426 R8A 0.741432 R10/PI 0.729774 ... C2-5/R8B/EC-2 0.002332 C1-4/R7B 0.002332 R10A/EC-3 0.002332 C6-3X/CL 0.002332 M3-2/CL 0.002332 C1-2/R7-2 0.002332 C1-5/R10/PI 0.002332 R7A/125 0.002332 C1-5/R10A/LH-1A 0.002332 C5-2.5 0.002332 R7-2/PC 0.002332 M1-6/CL 0.002332 C4-6/PI 0.002332 C1-4/R7-2/PC 0.002332 C2-5/C4-2F 0.002332 C1-3/R7-2 0.002332 C6-2/HY 0.002332 C4-5/SRI 0.002332 R8/EC-3 0.002332 C2-5/R10/TA 0.002332 ZNA 0.002332 M1-4/TA 0.002332 C2-1/R8 0.002332 C1-5/C4-4D 0.002332 C2-5/C4-7 0.002332 R3-2/GI 0.002332 R8/LM 0.002332 C6-3X/US 0.002332 R8/EC-2 0.002332 C1-9/PI 0.002332 dtype: float64 spdist1 96.0690137561203 in 10 bins NaN 81.426906 MID 3.541618 CL 3.142924 LM 1.326650 TMU 1.314992 LI 1.298671 TA 1.245045 PI 1.058522 MP 0.858009 125 0.855677 HY 0.704127 EC-2 0.671485 WCH 0.522266 GC 0.519935 EC-3 0.391700 HSQ 0.254138 L 0.202844 LC 0.172534 MMU 0.151550 US 0.102588 BPC 0.100256 MX-15 0.086267 MX-6 0.020984 U 0.018652 SRI 0.004663 PC 0.004663 GI 0.002332 dtype: float64 sanborn 5.080438330613197 in 10 bins 103 015 0.636512 111N050 0.582886 103 016 0.561903 102 019 0.536256 111S025 0.515272 103 004 0.482630 111S007 0.456983 107N090 0.454651 108N077 0.429004 103 017 0.424341 103 005 0.424341 103 007 0.419678 102 029 0.412684 102 020 0.410352 101S029 0.410352 101S032 0.408021 107N074 0.403357 102 022 0.398694 102 018 0.391700 102 005 0.389368 101N051 0.389368 103 019 0.384705 106E046 0.380042 107S005 0.373047 111N056 0.370716 101N076 0.361390 107S024 0.361390 108N087 0.359058 103 038 0.354395 104 019 0.354395 ... 18S 018 0.002332 16W 20 0.002332 15N 043 0.002332 105 042 0.002332 11N 100 0.002332 101N/069 0.002332 102 007 0.002332 17S 031 0.002332 16E 065 0.002332 17N 092 0.002332 102 050 0.002332 111N 051 0.002332 18N 074 0.002332 15S 030 0.002332 16E 039 0.002332 18N 069 0.002332 11S 001 0.002332 17S 045 0.002332 16W 022 0.002332 107N066 0.002332 17S 018 0.002332 15N 056 0.002332 18S 026 0.002332 111N062 0.002332 16E 042 0.002332 18S 024 0.002332 18N 058 0.002332 101N 081 0.002332 16W 031 0.002332 18S 045 0.002332 dtype: float64 landuse 98.68733970622523 in 10 bins 2 27.892283 4 23.975286 5 12.639310 3 11.611098 1 8.976451 8 5.775239 11 2.704593 6 2.343204 10 1.704360 7 1.065516 9 0.913966 NaN 0.398694 dtype: float64 ext 100.0 in 4 bins NaN 80.023315 E 19.536022 G 0.268128 EG 0.172534 dtype: float64
temp.head(10)
10
## Number patterns
for c in ad
i = 1
sumvals = 0
for r in ad_win[c].value_counts(dropna=False):
print('i',i)
print('sum',sumvals)
if (i > 7):
break
print(r)
sumvals = sumvals + r
i = i + 1
print(sumvals / len(ad_win))
File "<ipython-input-48-11790661e844>", line 3 for c in ad ^ SyntaxError: invalid syntax
Options
acctName = 'common-data'
tableName = 'tract_2010census_dp1' # 'all_day'
fileName = 'mnbuildings'
# Grab column data
url = mkUrl(acctName,tableName,limit='LIMIT%200',fileType='json')
response = urlopen(url).readall().decode('utf-8')
column_data = json.loads(response)
# Filter out the good ones, exclude bookkeeping ones
num_columns = []
banned = ['longitude','lat','latitude','long','lon','cartodb_id']
for c in column_data['fields']:
if (column_data['fields'][c]['type'] == 'number') & (c not in banned):
num_columns.append(c)
print("Columns (" + str(len(num_columns)) + ") of type 'number':\n",num_columns)
Columns (190) of type 'number': ['dp0010038', 'dp0010025', 'dp0080003', 'shape_area', 'dp0120008', 'dp0010054', 'dp0120013', 'dp0010024', 'dp0180006', 'dp0040002', 'dp0020003', 'dp0090002', 'dp0010026', 'dp0230001', 'dp0010019', 'dp0120016', 'dp0080021', 'dp0110013', 'dp0010030', 'dp0100003', 'dp0100006', 'dp0010006', 'dp0080015', 'dp0100004', 'dp0180008', 'dp0130013', 'dp0080009', 'dp0010012', 'dp0080019', 'dp0180002', 'dp0030002', 'dp0210002', 'dp0130001', 'dp0080018', 'dp0010037', 'dp0010003', 'dp0170001', 'dp0010042', 'dp0080001', 'dp0090003', 'dp0080006', 'dp0010056', 'dp0010009', 'dp0040001', 'dp0110012', 'dp0130007', 'dp0130008', 'dp0220001', 'dp0010044', 'dp0110006', 'dp0160001', 'dp0010032', 'dp0080008', 'dp0090004', 'dp0010013', 'dp0010001', 'aland10', 'dp0090006', 'dp0010007', 'dp0120015', 'dp0080004', 'dp0130009', 'dp0010039', 'dp0110002', 'dp0010034', 'dp0110014', 'dp0080020', 'dp0120020', 'dp0110003', 'dp0120012', 'dp0130014', 'dp0010035', 'dp0130015', 'dp0120004', 'dp0110004', 'dp0010016', 'dp0100001', 'dp0080002', 'dp0110016', 'dp0100002', 'dp0010008', 'dp0080014', 'dp0130012', 'dp0110007', 'dp0010047', 'dp0010036', 'dp0120003', 'dp0010046', 'dp0180004', 'dp0120007', 'dp0010053', 'dp0050001', 'dp0010018', 'dp0130011', 'dp0120019', 'dp0120014', 'dp0010014', 'dp0130006', 'dp0070003', 'dp0010010', 'dp0130010', 'dp0060001', 'dp0060002', 'dp0010043', 'dp0010020', 'dp0080016', 'dp0080010', 'dp0010057', 'dp0110008', 'dp0080013', 'dp0010022', 'dp0130005', 'dp0080007', 'dp0010049', 'dp0230002', 'dp0180005', 'dp0110010', 'dp0080017', 'dp0110005', 'dp0010031', 'dp0070002', 'dp0190001', 'dp0010048', 'dp0120017', 'dp0130002', 'dp0150001', 'dp0120010', 'dp0030003', 'dp0010052', 'dp0010051', 'dp0010027', 'dp0010023', 'shape_leng', 'dp0080005', 'dp0010011', 'dp0010055', 'dp0220002', 'dp0120005', 'dp0200001', 'dp0180003', 'dp0010004', 'dp0080012', 'dp0120001', 'dp0010029', 'dp0030001', 'dp0180001', 'dp0010028', 'dp0020002', 'dp0070001', 'dp0110009', 'dp0010015', 'awater10', 'dp0050002', 'dp0080022', 'dp0140001', 'dp0010050', 'dp0020001', 'dp0110001', 'dp0080023', 'dp0110015', 'dp0090001', 'dp0110017', 'dp0120002', 'dp0180007', 'dp0010017', 'dp0050003', 'dp0100005', 'dp0180009', 'dp0130004', 'dp0090005', 'dp0060003', 'dp0010045', 'dp0210003', 'dp0210001', 'dp0130003', 'dp0010002', 'dp0110011', 'dp0120006', 'dp0080011', 'dp0120018', 'dp0010021', 'dp0010041', 'dp0080024', 'dp0010033', 'dp0120009', 'dp0010005', 'dp0010040', 'dp0100007', 'dp0040003', 'dp0120011']
# Create API request
num_url = mkUrl(acctName,tableName,cols=','.join(num_columns))
print(num_url)
http://common-data.cartodb.com/api/v2/sql?q=SELECT%20dp0010038,dp0010025,dp0080003,shape_area,dp0120008,dp0010054,dp0120013,dp0010024,dp0180006,dp0040002,dp0020003,dp0090002,dp0010026,dp0230001,dp0010019,dp0120016,dp0080021,dp0110013,dp0010030,dp0100003,dp0100006,dp0010006,dp0080015,dp0100004,dp0180008,dp0130013,dp0080009,dp0010012,dp0080019,dp0180002,dp0030002,dp0210002,dp0130001,dp0080018,dp0010037,dp0010003,dp0170001,dp0010042,dp0080001,dp0090003,dp0080006,dp0010056,dp0010009,dp0040001,dp0110012,dp0130007,dp0130008,dp0220001,dp0010044,dp0110006,dp0160001,dp0010032,dp0080008,dp0090004,dp0010013,dp0010001,aland10,dp0090006,dp0010007,dp0120015,dp0080004,dp0130009,dp0010039,dp0110002,dp0010034,dp0110014,dp0080020,dp0120020,dp0110003,dp0120012,dp0130014,dp0010035,dp0130015,dp0120004,dp0110004,dp0010016,dp0100001,dp0080002,dp0110016,dp0100002,dp0010008,dp0080014,dp0130012,dp0110007,dp0010047,dp0010036,dp0120003,dp0010046,dp0180004,dp0120007,dp0010053,dp0050001,dp0010018,dp0130011,dp0120019,dp0120014,dp0010014,dp0130006,dp0070003,dp0010010,dp0130010,dp0060001,dp0060002,dp0010043,dp0010020,dp0080016,dp0080010,dp0010057,dp0110008,dp0080013,dp0010022,dp0130005,dp0080007,dp0010049,dp0230002,dp0180005,dp0110010,dp0080017,dp0110005,dp0010031,dp0070002,dp0190001,dp0010048,dp0120017,dp0130002,dp0150001,dp0120010,dp0030003,dp0010052,dp0010051,dp0010027,dp0010023,shape_leng,dp0080005,dp0010011,dp0010055,dp0220002,dp0120005,dp0200001,dp0180003,dp0010004,dp0080012,dp0120001,dp0010029,dp0030001,dp0180001,dp0010028,dp0020002,dp0070001,dp0110009,dp0010015,awater10,dp0050002,dp0080022,dp0140001,dp0010050,dp0020001,dp0110001,dp0080023,dp0110015,dp0090001,dp0110017,dp0120002,dp0180007,dp0010017,dp0050003,dp0100005,dp0180009,dp0130004,dp0090005,dp0060003,dp0010045,dp0210003,dp0210001,dp0130003,dp0010002,dp0110011,dp0120006,dp0080011,dp0120018,dp0010021,dp0010041,dp0080024,dp0010033,dp0120009,dp0010005,dp0010040,dp0100007,dp0040003,dp0120011%20FROM%20tract_2010census_dp1%20&format=csv&filename=tract_2010census_dp1
# Retrieve only approved columns
choro_df = pd.read_csv(num_url)
choro_df.head()
dp0010038 | dp0010025 | dp0080003 | shape_area | dp0120008 | dp0010054 | dp0120013 | dp0010024 | dp0180006 | dp0040002 | ... | dp0010021 | dp0010041 | dp0080024 | dp0010033 | dp0120009 | dp0010005 | dp0010040 | dp0100007 | dp0040003 | dp0120011 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 12 | 39 | 903 | 0.023157 | 50 | 24 | 28 | 43 | 16 | 494 | ... | 47 | 42 | 14 | 45 | 15 | 91 | 41 | 760 | 452 | 3 |
1 | 12 | 58 | 1145 | 0.022899 | 84 | 39 | 32 | 80 | 5 | 696 | ... | 48 | 68 | 1 | 54 | 8 | 162 | 64 | 1908 | 740 | 3 |
2 | 13 | 59 | 113 | 0.051102 | 129 | 34 | 36 | 72 | 5 | 644 | ... | 49 | 50 | 0 | 76 | 25 | 130 | 36 | 1742 | 736 | 3 |
3 | 6 | 88 | 2410 | 0.022929 | 109 | 55 | 39 | 124 | 20 | 1022 | ... | 82 | 84 | 5 | 89 | 21 | 224 | 80 | 2656 | 1004 | 6 |
4 | 13 | 87 | 3466 | 0.024620 | 91 | 66 | 55 | 127 | 21 | 1353 | ... | 108 | 104 | 0 | 111 | 21 | 246 | 87 | 3554 | 1379 | 16 |
5 rows × 190 columns
Notes about metrics:
blddepth
peaks at small values and get smaller for larger values on it's histogram. It has a strong positive skewness.num_metrics = DataFrame({'uniques': [len(choro_df[c].unique()) for c in choro_df],
'skew': [choro_df[x].skew() for x in choro_df],
'kurtosis': [choro_df[x].kurtosis() for x in choro_df],
'mean': [choro_df[x].mean() for x in choro_df],
'median': [choro_df[x].median() for x in choro_df],
'stddev': [choro_df[x].std() for x in choro_df]
},
index=choro_df.columns)
l = len(choro_df)
cols = []
vals = []
perc_nulls = []
## Does 98% of the data lie in one column?
for c in choro_df:
cols.append(c)
perc_nulls.append(choro_df[c].isnull().sum())
v, b = np.histogram(choro_df[c],bins=7)
if (v / l > 0.98).any():
vals.append(True)
else:
vals.append(False)
## Are there outliers?
## Modality classes: L,J,A,U,F, etc. from:
# http://en.wikipedia.org/wiki/Multimodal_distribution#Galtung.27s_classification
num_metrics['num_98'] = Series(vals, index=cols)
num_metrics['perc_nulls'] = Series(perc_nulls, index=cols)
num_metrics['skew_abs'] = abs(num_metrics['skew'])
num_metrics.sort('uniques')
kurtosis | mean | median | skew | stddev | uniques | num_98 | perc_nulls | skew_abs | |
---|---|---|---|---|---|---|---|---|---|
dp0110007 | 1090.051574 | 7.936272e-01 | 0.000000 | 23.793517 | 3.249843e+00 | 80 | True | 0 | 23.793517 |
dp0180005 | 373.721466 | 2.848491e+00 | 2.000000 | 11.264553 | 4.311877e+00 | 89 | True | 0 | 11.264553 |
dp0080016 | 226.983745 | 1.193752e+00 | 0.000000 | 11.058767 | 4.041861e+00 | 89 | True | 0 | 11.058767 |
dp0180007 | 328.919793 | 5.800303e+00 | 4.000000 | 9.501844 | 6.794094e+00 | 100 | True | 0 | 9.501844 |
dp0110006 | 113.353006 | 2.878692e+00 | 1.000000 | 7.405749 | 6.370228e+00 | 116 | True | 0 | 7.405749 |
dp0120012 | 81.197670 | 1.087440e+01 | 9.000000 | 5.179308 | 9.984859e+00 | 148 | True | 0 | 5.179308 |
dp0120011 | 15.737702 | 1.798600e+01 | 15.000000 | 2.473861 | 1.498486e+01 | 168 | False | 0 | 2.473861 |
dp0080017 | 2287.626375 | 1.481946e+00 | 0.000000 | 35.057540 | 1.086091e+01 | 185 | True | 0 | 35.057540 |
dp0130007 | 3.372249 | 3.810706e+01 | 34.000000 | 1.218209 | 2.364486e+01 | 210 | False | 0 | 1.218209 |
dp0110005 | 229.117683 | 9.522175e+00 | 3.000000 | 8.626220 | 1.852039e+01 | 224 | True | 0 | 8.626220 |
dp0080018 | 1802.998144 | 2.514932e+00 | 0.000000 | 33.379875 | 1.724488e+01 | 237 | True | 0 | 33.379875 |
dp0080015 | 2961.908993 | 2.111646e+00 | 0.000000 | 44.962195 | 2.687508e+01 | 248 | True | 0 | 44.962195 |
dp0080023 | 30.026605 | 2.564477e+01 | 18.000000 | 3.311457 | 2.589451e+01 | 261 | False | 0 | 3.311457 |
dp0010038 | 39.372412 | 2.449912e+01 | 19.000000 | 4.028911 | 2.348700e+01 | 272 | True | 0 | 4.028911 |
dp0010037 | 32.778076 | 3.137694e+01 | 27.000000 | 3.428233 | 2.485339e+01 | 283 | False | 0 | 3.428233 |
dp0110016 | 462.797619 | 8.179387e+00 | 4.000000 | 17.193662 | 2.029264e+01 | 288 | True | 0 | 17.193662 |
dp0130013 | 20.487712 | 4.347170e+01 | 38.000000 | 2.470508 | 3.027230e+01 | 301 | False | 0 | 2.470508 |
dp0180006 | 13458.623590 | 2.601251e+01 | 20.000000 | 78.464933 | 3.508246e+01 | 310 | True | 0 | 78.464933 |
dp0080024 | 13.249179 | 2.385725e+01 | 11.000000 | 2.983644 | 3.404140e+01 | 314 | False | 0 | 2.983644 |
dp0010036 | 67.796789 | 4.359359e+01 | 38.000000 | 4.053310 | 3.163648e+01 | 334 | True | 0 | 4.053310 |
dp0130006 | 2.761241 | 7.908724e+01 | 73.000000 | 1.039986 | 4.345997e+01 | 343 | False | 0 | 1.039986 |
dp0120009 | 18.445596 | 4.034645e+01 | 32.000000 | 2.937590 | 3.422038e+01 | 346 | False | 0 | 2.937590 |
dp0170001 | 16.000052 | 3.088786e+00 | 3.050000 | -1.820353 | 4.879877e-01 | 347 | False | 0 | 1.820353 |
dp0080021 | 75.050724 | 1.938075e+01 | 13.000000 | 6.513502 | 2.717276e+01 | 356 | True | 0 | 6.513502 |
dp0080022 | 44.027825 | 2.195287e+01 | 11.000000 | 4.841899 | 3.284507e+01 | 372 | False | 0 | 4.841899 |
dp0010056 | 22.805548 | 4.713887e+01 | 39.000000 | 2.867766 | 3.662365e+01 | 372 | False | 0 | 2.867766 |
dp0010055 | 23.860877 | 5.665417e+01 | 50.000000 | 2.664802 | 3.865580e+01 | 374 | False | 0 | 2.664802 |
dp0010035 | 313.775984 | 5.818360e+01 | 52.000000 | 7.175303 | 4.028918e+01 | 386 | True | 0 | 7.175303 |
dp0190001 | 219.448891 | 2.692885e+00 | 2.000000 | 11.203075 | 3.402280e+00 | 394 | True | 0 | 11.203075 |
dp0110015 | 1326.637549 | 6.508648e+00 | 1.000000 | 29.473036 | 4.252941e+01 | 399 | True | 0 | 29.473036 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
dp0030003 | 7.005388 | 1.709627e+03 | 1628.000000 | 1.217666 | 7.889298e+02 | 4347 | False | 0 | 1.217666 |
dp0080004 | 15.326906 | 5.322940e+02 | 161.000000 | 3.148669 | 8.826150e+02 | 4384 | False | 0 | 3.148669 |
dp0100003 | 22.353046 | 4.298292e+02 | 86.000000 | 3.984308 | 8.925489e+02 | 4396 | False | 0 | 3.984308 |
dp0090002 | 14.946514 | 5.752683e+02 | 199.000000 | 3.082220 | 9.037400e+02 | 4440 | False | 0 | 3.082220 |
dp0180001 | 11.561820 | 1.801866e+03 | 1706.000000 | 1.515716 | 8.372058e+02 | 4609 | False | 0 | 1.515716 |
dp0010020 | 12.758546 | 2.075167e+03 | 1954.000000 | 1.664084 | 9.968753e+02 | 5269 | False | 0 | 1.664084 |
dp0220002 | 7.215450 | 1.358132e+03 | 1092.000000 | 1.843635 | 1.057676e+03 | 5275 | False | 0 | 1.843635 |
dp0010039 | 7.636998 | 2.147305e+03 | 2035.000000 | 1.358731 | 1.015637e+03 | 5329 | False | 0 | 1.358731 |
dp0110002 | 12.064221 | 7.319539e+02 | 245.000000 | 3.029683 | 1.188849e+03 | 5530 | False | 0 | 3.029683 |
dp0100002 | 12.064221 | 7.319539e+02 | 245.000000 | 3.029683 | 1.188849e+03 | 5530 | False | 0 | 3.029683 |
dp0050001 | 7.686683 | 3.021660e+03 | 2878.000000 | 1.255534 | 1.381101e+03 | 6911 | False | 0 | 1.255534 |
dp0040001 | 8.077503 | 3.207840e+03 | 3049.000000 | 1.291386 | 1.465524e+03 | 7230 | False | 0 | 1.291386 |
dp0030001 | 7.756515 | 3.327093e+03 | 3163.000000 | 1.277770 | 1.519866e+03 | 7435 | False | 0 | 1.277770 |
dp0110011 | 3.060764 | 2.659989e+03 | 2510.000000 | 0.902027 | 1.826100e+03 | 7820 | False | 0 | 0.902027 |
dp0220001 | 7.249132 | 2.755892e+03 | 2528.000000 | 1.455898 | 1.733654e+03 | 7880 | False | 0 | 1.455898 |
dp0080003 | 4.709129 | 3.059084e+03 | 2898.000000 | 1.016043 | 1.806901e+03 | 8176 | False | 0 | 1.016043 |
dp0090001 | 4.914701 | 3.161661e+03 | 3006.000000 | 1.023439 | 1.836126e+03 | 8343 | False | 0 | 1.023439 |
dp0110010 | 5.219241 | 3.490518e+03 | 3309.000000 | 1.087046 | 1.888966e+03 | 8791 | False | 0 | 1.087046 |
dp0100007 | 5.219241 | 3.490518e+03 | 3309.000000 | 1.087046 | 1.888966e+03 | 8791 | False | 0 | 1.087046 |
dp0080002 | 8.354084 | 4.099079e+03 | 3878.500000 | 1.416297 | 1.922690e+03 | 8940 | False | 0 | 1.416297 |
dp0120002 | 7.616482 | 4.114025e+03 | 3897.000000 | 1.356676 | 1.972544e+03 | 9134 | False | 0 | 1.356676 |
dp0010001 | 8.521491 | 4.222471e+03 | 3993.000000 | 1.426214 | 1.982711e+03 | 9139 | False | 0 | 1.426214 |
dp0120001 | 8.521491 | 4.222471e+03 | 3993.000000 | 1.426214 | 1.982711e+03 | 9139 | False | 0 | 1.426214 |
dp0110001 | 8.521491 | 4.222471e+03 | 3993.000000 | 1.426214 | 1.982711e+03 | 9139 | False | 0 | 1.426214 |
dp0080001 | 8.521491 | 4.222471e+03 | 3993.000000 | 1.426214 | 1.982711e+03 | 9139 | False | 0 | 1.426214 |
dp0100001 | 8.521491 | 4.222471e+03 | 3993.000000 | 1.426214 | 1.982711e+03 | 9139 | False | 0 | 1.426214 |
awater10 | 5352.079314 | 9.335530e+06 | 28094.000000 | 65.107902 | 2.224993e+08 | 43861 | True | 0 | 65.107902 |
aland10 | 10267.494460 | 1.237326e+08 | 4823677.500000 | 82.910446 | 1.394208e+09 | 73466 | True | 0 | 82.910446 |
shape_area | 17582.803674 | 1.577478e-02 | 0.000531 | 113.082679 | 2.878312e-01 | 73986 | True | 0 | 113.082679 |
shape_leng | 1430.414824 | 3.270051e-01 | 0.116105 | 27.084948 | 7.794102e-01 | 74002 | True | 0 | 27.084948 |
190 rows × 9 columns
# answers = {}
# for c in cols:
# msg = "do you like " + c + "'s map? "
# ans = input(msg)
# print('you answered',ans)
# answers[c] = ans
answers = {'aland10': 'y',
'awater10': 'y',
'dp0010001': 'y',
'dp0010002': 'y',
'dp0010003': 'y',
'dp0010004': 'n',
'dp0010005': 'y',
'dp0010006': 'y',
'dp0010007': 'y',
'dp0010008': 'y',
'dp0010009': 'y',
'dp0010010': 'y',
'dp0010011': 'y',
'dp0010012': 'y',
'dp0010013': 'y',
'dp0010014': 'y',
'dp0010015': 'y',
'dp0010016': 'y',
'dp0010017': 'y',
'dp0010018': 'y',
'dp0010019': 'n',
'dp0010020': 'y',
'dp0010021': 'y',
'dp0010022': 'y',
'dp0010023': 'y',
'dp0010024': 'y',
'dp0010025': 'y',
'dp0010026': 'y',
'dp0010027': 'y',
'dp0010028': 'y',
'dp0010029': 'y',
'dp0010030': 'y',
'dp0010031': 'y',
'dp0010032': 'y',
'dp0010033': 'y',
'dp0010034': 'y',
'dp0010035': 'n',
'dp0010036': 'y',
'dp0010037': 'n',
'dp0010038': 'n',
'dp0010039': 'y',
'dp0010040': 'y',
'dp0010041': 'y',
'dp0010042': 'y',
'dp0010043': 'y',
'dp0010044': 'n',
'dp0010045': 'y',
'dp0010046': 'y',
'dp0010047': 'y',
'dp0010048': 'y',
'dp0010049': 'y',
'dp0010050': 'y',
'dp0010051': 'y',
'dp0010052': 'y',
'dp0010053': 'y',
'dp0010054': 'y',
'dp0010055': 'y',
'dp0010056': 'y',
'dp0010057': 'y',
'dp0020001': 'y',
'dp0020002': 'y',
'dp0020003': 'y',
'dp0030001': 'y',
'dp0030002': 'y',
'dp0030003': 'y',
'dp0040001': 'y',
'dp0040002': 'y',
'dp0040003': 'y',
'dp0050001': 'y',
'dp0050002': 'y',
'dp0050003': 'y',
'dp0060001': 'y',
'dp0060002': 'y',
'dp0060003': 'y',
'dp0070001': 'y',
'dp0070002': 'y',
'dp0070003': 'y',
'dp0080001': 'y',
'dp0080002': 'y',
'dp0080003': 'y',
'dp0080004': 'y',
'dp0080005': 'y',
'dp0080006': 'y',
'dp0080007': 'n',
'dp0080008': 'n',
'dp0080009': 'n',
'dp0080010': 'n',
'dp0080011': 'n',
'dp0080012': 'n',
'dp0080013': 'n',
'dp0080014': 'n',
'dp0080015': 'n',
'dp0080016': 'n',
'dp0080017': 'n',
'dp0080018': 'n',
'dp0080019': 'n',
'dp0080020': 'n',
'dp0080021': 'y',
'dp0080022': 'n',
'dp0080023': 'n',
'dp0080024': 'n',
'dp0090001': 'y',
'dp0090002': 'y',
'dp0090003': 'y',
'dp0090004': 'n',
'dp0090005': 'n',
'dp0090006': 'y',
'dp0100001': 'y',
'dp0100002': 'y',
'dp0100003': 'y',
'dp0100004': 'n',
'dp0100005': 'n',
'dp0100006': 'n',
'dp0100007': 'y',
'dp0110001': 'y',
'dp0110002': 'y',
'dp0110003': 'y',
'dp0110004': 'n',
'dp0110005': 'y',
'dp0110006': 'n',
'dp0110007': 'n',
'dp0110008': 'y',
'dp0110009': 'n',
'dp0110010': 'y',
'dp0110011': 'y',
'dp0110012': 'y',
'dp0110013': 'y',
'dp0110014': 'n',
'dp0110015': 'n',
'dp0110016': 'n',
'dp0110017': 'y',
'dp0120001': 'y',
'dp0120002': 'y',
'dp0120003': 'y',
'dp0120004': 'y',
'dp0120005': 'y',
'dp0120006': 'y',
'dp0120007': 'y',
'dp0120008': 'y',
'dp0120009': 'y',
'dp0120010': 'y',
'dp0120011': 'y',
'dp0120012': 'y',
'dp0120013': 'y',
'dp0120014': 'y',
'dp0120015': 'y',
'dp0120016': 'y',
'dp0120017': 'y',
'dp0120018': 'n',
'dp0120019': 'n',
'dp0120020': 'n',
'dp0130001': 'y',
'dp0130002': 'y',
'dp0130003': 'y',
'dp0130004': 'y',
'dp0130005': 'y',
'dp0130006': 'n',
'dp0130007': 'y',
'dp0130008': 'y',
'dp0130009': 'y',
'dp0130010': 'y',
'dp0130011': 'y',
'dp0130012': 'y',
'dp0130013': 'y',
'dp0130014': 'y',
'dp0130015': 'y',
'dp0140001': 'n',
'dp0150001': 'y',
'dp0160001': 'y',
'dp0170001': 'y',
'dp0180001': 'y',
'dp0180002': 'y',
'dp0180003': 'y',
'dp0180004': 'n',
'dp0180005': 'n',
'dp0180006': 'y',
'dp0180007': 'y',
'dp0180008': 'y',
'dp0180009': 'y',
'dp0190001': 'n',
'dp0200001': 'y',
'dp0210001': 'y',
'dp0210002': 'y',
'dp0210003': 'y',
'dp0220001': 'y',
'dp0220002': 'y',
'dp0230001': 'y',
'dp0230002': 'y',
'shape_area': 'y',
'shape_leng': 'y'}
num_metrics['approve'] = Series(answers)
len(num_metrics['approve'])
190
colors = {'y': 'b', 'n': 'r'}
df = num_metrics.copy()
df["Color"] = df['approve'].apply(lambda x: colors[x])
df
df.plot(x='kurtosis',y='skew',kind='scatter',c=df.Color,figsize=(12,12),logx=True)
<matplotlib.axes._subplots.AxesSubplot at 0x105e5ccf8>
df.groupby('approve')['kurtosis'].mean()
approve n 461.306406 y 409.846145 Name: kurtosis, dtype: float64
k_max = num_metrics['kurtosis'].max() - 0.05 * abs(num_metrics['kurtosis'].max()) ## make it a little smaller
k_min = 1.05 * num_metrics['kurtosis'].min() ## make it a little larger
t = num_metrics['kurtosis'].groupby(pd.cut(num_metrics["kurtosis"], np.arange(k_min, k_max, 20)))
print('kurtosis ranges\n-------- ------')
for a in t.grouper.levels[0].values:
if a in t.indices:
print(a,': ',', '.join(t.get_group(a).index.values),'\n')
kurtosis ranges -------- ------ (2.667, 22.667] : dp0080003, dp0120008, dp0120013, dp0040002, dp0020003, dp0090002, dp0230001, dp0010030, dp0100003, dp0130013, dp0010012, dp0080019, dp0180002, dp0030002, dp0210002, dp0130001, dp0010003, dp0170001, dp0010042, dp0080001, dp0010009, dp0040001, dp0110012, dp0130007, dp0130008, dp0220001, dp0160001, dp0010013, dp0010001, dp0090006, dp0010007, dp0080004, dp0130009, dp0010039, dp0110002, dp0130014, dp0120004, dp0100001, dp0080002, dp0100002, dp0010008, dp0130012, dp0010047, dp0120003, dp0010046, dp0120007, dp0050001, dp0130011, dp0130006, dp0010010, dp0130010, dp0010020, dp0110008, dp0010022, dp0130005, dp0010049, dp0230002, dp0110010, dp0010031, dp0010048, dp0130002, dp0030003, dp0010051, dp0010027, dp0010023, dp0010011, dp0220002, dp0120005, dp0010004, dp0120001, dp0010029, dp0030001, dp0180001, dp0010028, dp0020002, dp0110009, dp0050002, dp0140001, dp0010050, dp0020001, dp0110001, dp0090001, dp0120002, dp0050003, dp0180009, dp0130004, dp0010045, dp0210003, dp0210001, dp0130003, dp0010002, dp0110011, dp0120006, dp0010021, dp0010041, dp0080024, dp0120009, dp0010040, dp0100007, dp0040003, dp0120011 (22.667, 42.667] : dp0010038, dp0010026, dp0010019, dp0010037, dp0010056, dp0110003, dp0130015, dp0010018, dp0010057, dp0120010, dp0010055, dp0200001, dp0080023, dp0010017 (42.667, 62.667] : dp0080006, dp0090004, dp0110014, dp0070003, dp0080022 (62.667, 82.667] : dp0080021, dp0100006, dp0010044, dp0120012, dp0010036, dp0180004, dp0150001, dp0060003, dp0010033 (82.667, 102.667] : dp0070001 (102.667, 122.667] : dp0010054, dp0010024, dp0100004, dp0110006, dp0080020, dp0060001, dp0010005 (122.667, 142.667] : dp0010014, dp0010043 (142.667, 162.667] : dp0070002, dp0110017 (162.667, 182.667] : dp0060002, dp0080013, dp0010052, dp0180003 (182.667, 202.667] : dp0010006, dp0080007 (202.667, 222.667] : dp0180008, dp0010016, dp0120014, dp0190001 (222.667, 242.667] : dp0110004, dp0080016, dp0110005 (262.667, 282.667] : dp0120020 (302.667, 322.667] : dp0080008, dp0010035 (322.667, 342.667] : dp0180007 (362.667, 382.667] : dp0120015, dp0180005 (382.667, 402.667] : dp0080009 (402.667, 422.667] : dp0090003, dp0010053, dp0100005 (422.667, 442.667] : dp0120016 (462.667, 482.667] : dp0110016, dp0010015, dp0120018 (482.667, 502.667] : dp0080011 (502.667, 522.667] : dp0010034, dp0080012 (522.667, 542.667] : dp0080010, dp0080005 (562.667, 582.667] : dp0110013 (722.667, 742.667] : dp0010025 (982.667, 1002.667] : dp0090005 (1082.667, 1102.667] : dp0110007 (1302.667, 1322.667] : dp0080014 (1322.667, 1342.667] : dp0110015 (1422.667, 1442.667] : shape_leng (1562.667, 1582.667] : dp0120019 (1802.667, 1822.667] : dp0080018 (2282.667, 2302.667] : dp0080017 (2842.667, 2862.667] : dp0120017 (2942.667, 2962.667] : dp0080015 (5342.667, 5362.667] : awater10 (10262.667, 10282.667] : aland10 (13442.667, 13462.667] : dp0180006
df.plot(subplots=True,bins=7,layout=(9,5),figsize=(21,21),kind='hist',logy=True,legend=False);
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-185-a73e470ef1e0> in <module>() ----> 1 df.plot(subplots=True,bins=7,layout=(9,5),figsize=(21,21),kind='hist',logy=True,legend=False); /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/tools/plotting.py in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds) 2483 yerr=yerr, xerr=xerr, 2484 secondary_y=secondary_y, sort_columns=sort_columns, -> 2485 **kwds) 2486 2487 /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/tools/plotting.py in _plot(data, x, y, subplots, ax, kind, **kwds) 2323 plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds) 2324 -> 2325 plot_obj.generate() 2326 plot_obj.draw() 2327 return plot_obj.result /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/tools/plotting.py in generate(self) 918 919 def generate(self): --> 920 self._args_adjust() 921 self._compute_plot_data() 922 self._setup_subplots() /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/pandas/tools/plotting.py in _args_adjust(self) 1941 hist, self.bins = np.histogram(values, bins=self.bins, 1942 range=self.kwds.get('range', None), -> 1943 weights=self.kwds.get('weights', None)) 1944 1945 if com.is_list_like(self.bottom): /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numpy/lib/function_base.py in histogram(a, bins, range, normed, weights, density) 185 range = (0, 1) 186 else: --> 187 range = (a.min(), a.max()) 188 mn, mx = [mi + 0.0 for mi in range] 189 if mn == mx: /opt/local/Library/Frameworks/Python.framework/Versions/3.4/lib/python3.4/site-packages/numpy/core/_methods.py in _amin(a, axis, out, keepdims) 27 28 def _amin(a, axis=None, out=None, keepdims=False): ---> 29 return umr_minimum(a, axis, None, out, keepdims) 30 31 def _sum(a, axis=None, dtype=None, out=None, keepdims=False): TypeError: unorderable types: bool() <= str()
column_names = mkUrl(acctName,tableName,limit='LIMIT%200')
print(column_names)
t = pd.read_csv(column_names)
banned = ['the_geom','the_geom_webmercator','cartodb_id','created_at','updated_at']
cols = []
for c in t:
if (c not in banned) & (t[c].dtype != 'object'):
cols.append(c)
print(','.join(cols))
http://common-data.cartodb.com/api/v2/sql?q=SELECT%20*%20FROM%20all_day%20LIMIT%200&format=csv&filename=all_day
Get more indepth stats using scipy.stats.desc
## Give back the stats of each column (via scipy.stats)
print('\t'.join(['clmn','num','minmax','mean','var','skew','kurtosis']))
for c in df:
n, mm, mu, s, sk, k = desc(df[c].replace([np.inf, -np.inf], np.nan).dropna())
print(c,'\t','\t'.join(str(x) for x in [n,mm,mu,s,sk,k]))
clmn num minmax mean var skew kurtosis depth 222 (0.0, 208.62) 17.0649261261 798.823978299 3.5681803892920207 16.147488224314706 mag 222 (-0.10000000000000001, 5.5999999999999996) 1.81531531532 1.58595713995 1.246953727971373 1.192239989861104 nst 153 (3.0, 82.0) 21.0849673203 242.565101479 1.5853127360965464 2.669515568094612 gap 183 (23.0, 319.0) 107.90704918 3858.77095168 1.3178788032001225 1.2217761865370296 dmin 183 (0.00059239999999999998, 11.324000000000002) 0.402881441694 1.59172067029 5.736046566233354 37.96420844347665 rms 219 (0.01, 2.6800000000000002) 0.31820456621 0.120610742548 2.3009398871390228 9.156679301879363
s = np.random.poisson(20,100000)
count, bins, ignored = plt.hist(s, bins=44, normed=True)
plt.show()
a, m = 3., 1. # shape and mode
s = np.random.pareto(a,100000) + m
count, bins, ignored = plt.hist(s, 100, normed=True)
a = 5. # shape
samples = 100000
s = np.random.power(a, samples)
count, bins, ignored = plt.hist(s, bins=75)
mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 100000)
count, bins, ignored = plt.hist(np.pi + s, bins=100, normed=True)
a = 2. # parameter
s = np.random.zipf(a, 100000)
count, bins, ignored = plt.hist(s[s<50], bins=75, normed=True)
How to identify distributions of data?