import os, re from pandas import * from geopandas import * pylab.rcParams['savefig.dpi'] = 100 shpfile = 'building_footprints_shape_webmercator_12-14/building_1214.shp' bldgs = GeoDataFrame.from_file(shpfile) print 'number of rows:\t' + str(len(bldgs)) print 'columns:\t' + str(bldgs.columns) print 'crs:\t\t' + str(bldgs.crs) # projection info for the geometry data def boronum(bbl): return int(bbl[0]) boro = bldgs['BBL'].apply(boronum) bldgs['boro'] = boro bldgs = bldgs[['BBL','boro','GROUND_ELE','HEIGHT_ROO','Shape_Area','Shape_Leng','geometry']] bldgs.set_index('BBL', inplace=True) bldgs.sort(inplace=True) bldgs.head() bldgs['geometry_latlon'] = bldgs['geometry'].to_crs(epsg=4326) bldgs.head() # notes for further investigation of the conversion issue: # GeoPandas uses pyproj; the code below provides an initial suggestion that something might be amiss with the units handling # #import pyproj #x1 = -8239895.129312261 #y1 = 4966979.423426831 #wgs84 = pyproj.Proj("+init=EPSG:4326") # LatLon with WGS84 datum used by GPS units and Google Earth #epsg2263 = pyproj.Proj("+init=EPSG:2263",preserve_units=True) # local NYC coordinate system #print pyproj.transform(epsg2263, wgs84, x1, y1) #conv = 0.3048 #print pyproj.transform(epsg2263, wgs84, x1*conv, y1*conv) # # outputs: # (-112.67577507532987, 48.093410208201874) # (-87.23011424552195, 43.594327921961934) bldgs.head(500).plot() bldgs[bldgs.boro == 1].plot() filename = 'Energy_and_Water_Data_Disclosure_for_Local_Law_84__2012_.csv' cleanedcsv = '' multilinekeeper = '' with open(filename,'rU') as f: for line in f: line = re.sub(' +',' ',line) line = re.sub(', ',',',line) if multilinekeeper == '': if len(re.findall('"',line)) == 1: multilinekeeper = line.strip() else: cleanedcsv += line else: if '"' in line: cleanedcsv += multilinekeeper + line multilinekeeper = '' else: multilinekeeper += ' ' + line.strip() with open('cleaned_' + filename,'w') as f: f.write(cleanedcsv) energy = read_csv('cleaned_' + filename) print 'number of rows:\t' + str(len(energy)) print 'columns:' print energy.columns energy['BBL'] = energy['BBL'].astype(str) energy = energy.ix[:,[0,1,2,7,8,11,12,13,14,15]] energy.set_index('BBL', inplace=True) energy.sort(inplace=True) energy.head() energy.plot(x=2,y=5,kind='scatter') bldgsenergy = merge(bldgs, energy, left_index=True, right_index=True, how='outer') print 'number of rows:\t' + str(len(bldgsenergy)) print 'columns:' print bldgsenergy.columns bldgsenergy.ix[:,['HEIGHT_ROO','geometry','geometry_latlon','Site EUI(kBtu/ft2)','Total GHG Emissions(MtCO2e)', 'Number of Buildings']].head() def inbldgs(i): return i in bldgs.index def inenergy(i): return i in energy.index bldgsenergy['geometry_avail'] = Series(bldgsenergy.index,index=bldgsenergy.index).apply(inbldgs) bldgsenergy['energy_avail'] = Series(bldgsenergy.index,index=bldgsenergy.index).apply(inenergy) bldgsenergy.ix[:,['geometry_avail','energy_avail','HEIGHT_ROO','geometry','geometry_latlon','Site EUI(kBtu/ft2)', 'Total GHG Emissions(MtCO2e)','Number of Buildings']].head() print 'number of rows in bldgs:\t\t\t\t\t' + str(len(bldgs)) print 'number of rows in energy:\t\t\t\t\t' + str(len(energy)) print 'number of rows in bldgsenergy:\t\t\t\t\t' + str(len(bldgsenergy)) print 'number of rows in bldgsenergy with geometry:\t\t\t' + str(len(bldgsenergy[bldgsenergy['geometry_avail']])) print 'number of rows in bldgsenergy with energy:\t\t\t' + str(len(bldgsenergy[bldgsenergy['energy_avail']])) print 'number of rows in bldgsenergy with both gometry and energy:\t' + \ str(len(bldgsenergy[bldgsenergy['energy_avail'] & bldgsenergy['geometry_avail']])) bldgsenergygeo = GeoDataFrame(bldgsenergy[bldgsenergy['geometry_avail'] & bldgsenergy['energy_avail']]) bldgsenergygeo.head(500).plot() bldgsenergygeo.rename(columns={'geometry':'geometryfeet'}, inplace=True) bldgsenergygeo.rename(columns={'geometry_latlon':'geometry'}, inplace=True) bldgsenergygeo.head(500).plot() bldgsenergygeo['Address'] = bldgsenergygeo['Street Number'] + ' ' + bldgsenergygeo['Street Name'] bldgsenergygeo.columns bldgsenergygeo = bldgsenergygeo.ix[:,[0,2,6,9,10,11,12,13,14,18]] bldgsenergygeo.head() bldgsenergygeomidtown = bldgsenergygeo[(bldgsenergygeo.boro == 1) & (bldgsenergygeo.geometry.bounds['miny'] < 48.129) & (bldgsenergygeo.geometry.bounds['maxy'] > 48.117) ] print len(bldgsenergygeomidtown) bldgsenergygeomidtown.head() with open('bldgsenergygeo.json','w') as f: f.write(bldgsenergygeomidtown.to_json()) os.system('topojson -p -o bldgsenergytopo.json bldgsenergygeo.json')