import arcpy as ARCPY
import arcpy.da as DA
inputFC = r'../data/CA_Polygons.shp'
fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']
tab = DA.TableToNumPyArray(inputFC, fieldNames)
print(tab)
[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3) (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9) (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3) (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0) (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3) (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3) (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3) (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9) (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3) (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4) (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5) (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3) (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1) (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6) (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5) (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4) (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2) (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3) (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2) (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2) (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7) (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9) (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7) (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3) (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6) (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2) (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8) (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2) (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]
Cursors and DataAccess are not assured to read attributes in order.
Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.
Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc..
import SSDataObject as SSDO
import os as OS
inputFC = r'../data/CA_Polygons.shp'
fullFC = OS.path.abspath(inputFC)
fullPath, fcName = OS.path.split(fullFC)
ssdo = SSDO.SSDataObject(inputFC)
uniqueIDField = "MYID"
fieldNames = ['PCR2010', 'POP2010', 'PERCNOHS']
ssdo.obtainData(uniqueIDField, fieldNames)
ssdo = SSDO.SSDataObject(inputFC)
ssdo.obtainData("MYID", fieldNames)
print(ssdo.fields['POP2010'].data)
[1513043 1162 38026 220000 45485 21475 1052605 28634 181151 933168 28112 134762 175136 18519 842813 153183 64753 34811 9826773 151468 252789 18267 87812 256754 9695 14224 416366 136840 98765 3017598 350233 19949 2202978 1422094 55520 2042027 3105115 805340 687661 270112 719604 424630 1786267 262880 177308 3229 44897 414125 484712 515420 94859 63637 13768 443509 55221 825445 201105 72364]
import pandas as PANDAS
df = ssdo.getDataFrame()
print(df)
PCR2010 PERCNOHS POP2010 158 1.206422 37.0 1513043 159 1.079837 38.3 1162 160 0.886305 41.4 38026 161 0.816018 42.9 220000 162 0.877746 48.1 45485 163 1.130213 44.3 21475 164 1.391533 32.1 1052605 165 0.675817 50.0 28634 166 1.209810 36.3 181151 167 0.775344 47.3 933168 168 0.832085 46.4 28112 169 0.818885 44.3 134762 170 0.685975 56.9 175136 171 0.931374 38.3 18519 172 0.742849 48.3 842813 173 0.666717 50.9 153183 174 0.816463 49.6 64753 175 0.692517 43.3 34811 176 1.048479 38.0 9826773 177 0.646304 58.4 151468 178 2.080740 20.9 252789 179 0.839421 44.5 18267 180 0.890045 43.1 87812 181 0.685651 49.3 256754 182 0.849918 40.1 9695 183 0.984652 31.1 14224 184 1.022456 37.5 416366 185 1.249204 36.6 136840 186 1.081794 39.0 98765 187 1.250990 29.5 3017598 188 1.179444 36.9 350233 189 0.973480 39.4 19949 190 0.733145 41.6 2202978 191 0.945829 34.2 1422094 192 0.889421 55.9 55520 193 0.742851 43.3 2042027 194 1.144717 34.7 3105115 195 1.760952 38.2 805340 196 0.771014 50.6 687661 197 0.978288 40.2 270112 198 1.705111 28.5 719604 199 1.100690 28.7 424630 200 1.455588 31.0 1786267 201 1.189412 36.9 262880 202 0.879299 41.1 177308 203 0.833698 41.7 3229 204 0.842544 43.7 44897 205 0.951722 37.3 414125 206 1.108553 36.4 484712 207 0.782673 49.6 515420 208 0.837630 42.7 94859 209 0.664463 44.2 63637 210 0.739619 43.3 13768 211 0.703468 55.8 443509 212 0.897840 41.2 55221 213 1.120266 36.2 825445 214 0.915843 38.4 201105 215 0.747482 43.4 72364
import numpy as NUM
import scipy.cluster.vq as CLUST
import arcgisscripting as ARC
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)
[1 1 2 2 2 1 3 0 1 0 2 2 4 1 0 4 0 2 5 4 3 2 2 0 2 1 1 1 1 3 1 1 0 1 4 0 3 3 0 1 3 1 3 1 2 2 2 1 1 0 2 2 2 4 2 1 1 2]
C:\ArcGIS\bin\Python\envs\arcgispro-py3\lib\site-packages\ipykernel\__main__.py:7: DeprecationWarning: PyArray_FromDims: use PyArray_SimpleNew. C:\ArcGIS\bin\Python\envs\arcgispro-py3\lib\site-packages\ipykernel\__main__.py:7: DeprecationWarning: PyArray_FromDimsAndDataAndDescr: use PyArray_NewFromDescr.
import pysal as PYSAL
import pysal2ArcGIS as PYSAL_UTILS
swmFile = OS.path.join(fullPath, "rook_bin.swm")
w = PYSAL_UTILS.swm2Weights(ssdo, swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, orderIDs in enumerate(maxp.regions):
maxpGroups[orderIDs] = regionID
maxpGroups
array([2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 2, 2, 5, 2, 1, 1, 2, 2, 4, 1, 0, 2, 2, 1, 2, 2, 4, 0, 2, 7, 2, 2, 5, 0, 1, 5, 6, 3, 2, 4, 3, 4, 3, 4, 2, 2, 2, 0, 0, 1, 0, 2, 2, 1, 2, 4, 0, 2])
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
weightsFile = swmFile, kPartitions = 6)
print(skater.partition)
[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3 1 5 5 1 5 1 5 4 4 4 4 4 5 4 4 4 5 4 5 4 4]
ARCPY.env.overwriteOutput = True
outputFC = r'../data/cluster_output.shp'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)