import arcpy as ARCPY
import arcpy.da as DA
inputFC = r'../data/CA_Polygons.shp'
fieldNames = ['PCR2000', 'POP2000', 'PERCNOHS']
tab = DA.TableToNumPyArray(inputFC, fieldNames)
print(tab)
[(1.30711491471, 1449840, 37.0) (0.949885742909, 1209, 38.3) (0.814953582949, 35153, 41.4) (0.763664715408, 203807, 42.9) (0.867384795299, 40645, 48.1) (0.813593366955, 18817, 44.3) (1.5038884687, 952810, 32.1) (0.612516024774, 27440, 50.0) (1.23401339141, 157162, 36.3) (0.758988619134, 801288, 47.3) (0.666868957024, 26416, 46.4) (0.791296201062, 126462, 44.3) (0.625993159441, 142410, 56.9) (0.838241159573, 17955, 38.3) (0.709664321446, 663803, 48.3) (0.558059878736, 129835, 50.9) (0.795906059028, 58538, 49.6) (0.59516742156, 33719, 43.3) (0.98591007865, 9538191, 38.0) (0.626400277293, 123587, 58.4) (2.30772253595, 247520, 20.9) (0.763503906916, 17101, 44.5) (0.856496344376, 86407, 43.1) (0.659142469682, 212258, 49.3) (0.696038086886, 9406, 40.1) (0.906145315496, 12921, 31.1) (1.04151492927, 402990, 37.5) (1.28209385709, 124565, 36.6) (1.06427625094, 92520, 39.0) (1.2656840848, 2854513, 29.5) (1.25503460751, 251012, 36.9) (0.871632187308, 20754, 39.4) (0.809361084377, 1558985, 41.6) (0.970339838629, 1229940, 34.2) (0.97698382771, 53781, 55.9) (0.746540559206, 1718037, 43.3) (1.11463689739, 2827366, 34.7) (1.87815228123, 777885, 38.2) (0.829782920213, 567885, 50.6) (0.94607588002, 247839, 40.2) (2.00222842151, 707820, 28.5) (1.0993818875, 399990, 28.7) (1.83427209221, 1684947, 31.0) (1.35152165122, 255835, 36.9) (0.83764148855, 163615, 41.1) (0.788376107359, 3574, 41.7) (0.771636055401, 44218, 43.7) (0.93624192685, 396974, 37.3) (1.24377672351, 460421, 36.4) (0.8013076198, 449471, 49.6) (0.844111222501, 79192, 42.7) (0.64214819647, 56170, 44.2) (0.676074973366, 12980, 43.3) (0.662256627258, 368627, 55.8) (0.800930337009, 54658, 41.2) (1.13167786774, 756506, 36.2) (0.894016777176, 169835, 38.4) (0.644656909946, 60372, 43.4)]
Cursors and DataAccess are not assured to read attributes in order.
Keeps track of the shapes and their attributes so that one can create output features w/o post-joins.
Unique ID works with Spatial Weights Formats in ArcGIS, PySAL, R, Matlab, GeoDa etc..
import SSDataObject as SSDO
ssdo = SSDO.SSDataObject(inputFC)
ssdo.obtainData("MYID", fieldNames)
print(ssdo.fields['PCR2000'].data)
[ 1.30711491 0.94988574 0.81495358 0.76366472 0.8673848 0.81359337 1.50388847 0.61251602 1.23401339 0.75898862 0.66686896 0.7912962 0.62599316 0.83824116 0.70966432 0.55805988 0.79590606 0.59516742 0.98591008 0.62640028 2.30772254 0.76350391 0.85649634 0.65914247 0.69603809 0.90614532 1.04151493 1.28209386 1.06427625 1.26568408 1.25503461 0.87163219 0.80936108 0.97033984 0.97698383 0.74654056 1.1146369 1.87815228 0.82978292 0.94607588 2.00222842 1.09938189 1.83427209 1.35152165 0.83764149 0.78837611 0.77163606 0.93624193 1.24377672 0.80130762 0.84411122 0.6421482 0.67607497 0.66225663 0.80093034 1.13167787 0.89401678 0.64465691]
import pandas as PANDAS
df = ssdo.getDataFrame()
print(df)
PCR2000 PERCNOHS POP2000 158 1.307115 37.0 1449840 159 0.949886 38.3 1209 160 0.814954 41.4 35153 161 0.763665 42.9 203807 162 0.867385 48.1 40645 163 0.813593 44.3 18817 164 1.503888 32.1 952810 165 0.612516 50.0 27440 166 1.234013 36.3 157162 167 0.758989 47.3 801288 168 0.666869 46.4 26416 169 0.791296 44.3 126462 170 0.625993 56.9 142410 171 0.838241 38.3 17955 172 0.709664 48.3 663803 173 0.558060 50.9 129835 174 0.795906 49.6 58538 175 0.595167 43.3 33719 176 0.985910 38.0 9538191 177 0.626400 58.4 123587 178 2.307723 20.9 247520 179 0.763504 44.5 17101 180 0.856496 43.1 86407 181 0.659142 49.3 212258 182 0.696038 40.1 9406 183 0.906145 31.1 12921 184 1.041515 37.5 402990 185 1.282094 36.6 124565 186 1.064276 39.0 92520 187 1.265684 29.5 2854513 188 1.255035 36.9 251012 189 0.871632 39.4 20754 190 0.809361 41.6 1558985 191 0.970340 34.2 1229940 192 0.976984 55.9 53781 193 0.746541 43.3 1718037 194 1.114637 34.7 2827366 195 1.878152 38.2 777885 196 0.829783 50.6 567885 197 0.946076 40.2 247839 198 2.002228 28.5 707820 199 1.099382 28.7 399990 200 1.834272 31.0 1684947 201 1.351522 36.9 255835 202 0.837641 41.1 163615 203 0.788376 41.7 3574 204 0.771636 43.7 44218 205 0.936242 37.3 396974 206 1.243777 36.4 460421 207 0.801308 49.6 449471 208 0.844111 42.7 79192 209 0.642148 44.2 56170 210 0.676075 43.3 12980 211 0.662257 55.8 368627 212 0.800930 41.2 54658 213 1.131678 36.2 756506 214 0.894017 38.4 169835 215 0.644657 43.4 60372
import numpy as NUM
import scipy.cluster.vq as CLUST
import arcgisscripting as ARC
X = df.as_matrix()
whiteData = CLUST.whiten(X)
centers, distortion = CLUST.kmeans(whiteData, 6)
groups = ARC._ss.closest_centroid(whiteData, centers)
print(groups)
[4 3 3 3 2 3 4 2 4 2 2 3 1 3 2 2 2 3 5 1 0 3 3 2 3 4 4 4 3 4 4 3 3 4 1 2 4 0 2 3 0 4 0 4 3 3 3 3 4 2 3 3 3 1 3 4 3 3]
import pysal as PYSAL
import WeightsUtilities as WU
import SSUtilities as UTILS
def swm2pysal(swmfile):
neighbors = {}
weights = {}
swm = WU.SWMReader(swmfile)
N = swm.numObs
for r in UTILS.ssRange(N):
info = swm.swm.readEntry()
masterID, nn, nhs, whs, sumUnstandard = info
if nn != 0:
neighbors[masterID] = nhs
weights[masterID] = whs
swm.close()
ids = list(neighbors.keys())
ids.sort()
w = PYSAL.W(neighbors, weights, ids)
return w
swmFile = r"C:\Data\Conferences\esri_stat_summit_16\zzQueen.swm"
w = swm2pysal(swmFile)
maxp = PYSAL.region.Maxp(w, X[:,0:2], 3000000., floor_variable = X[:,2])
maxpGroups = NUM.empty((ssdo.numObs,), int)
for regionID, masterIDs in enumerate(maxp.regions):
orderIDs = [ssdo.master2Order[i] for i in masterIDs]
maxpGroups[orderIDs] = regionID
print((regionID, orderIDs))
(0, [0, 38, 33, 6]) (1, [44, 24, 46, 31, 51, 45, 10, 52, 22, 16, 7, 11, 27, 5, 48, 56, 57, 47, 30, 20, 17, 28, 50, 8, 3, 1, 54, 2, 21]) (2, [40, 37, 43, 42, 26]) (3, [13, 35, 29, 25]) (4, [41, 55, 18]) (5, [39, 14, 53, 15, 34, 9, 23, 19, 49, 4]) (6, [12, 32, 36])
import Partition as PART
skater = PART.Partition(ssdo, fieldNames, spaceConcept = "GET_SPATIAL_WEIGHTS_FROM_FILE",
weightsFile = swmFile, kPartitions = 6)
print(skater.partition)
[1 4 4 4 5 4 1 4 4 5 4 4 5 4 5 5 4 4 0 5 2 5 4 5 4 4 5 4 4 3 4 4 5 4 5 5 3 1 5 5 1 5 1 1 4 4 4 4 4 5 4 4 4 5 4 5 4 4]
ARCPY.env.overwriteOutput = True
outputFC = r'C:\Data\Conferences\esri_stat_summit_16\PYDemo\PYDemo.gdb\cluster_output'
outK = SSDO.CandidateField('KMEANS', 'LONG', groups + 1)
outMax = SSDO.CandidateField('MAXP', 'LONG', maxpGroups + 1)
outSKATER = SSDO.CandidateField('SKATER', 'LONG', skater.partitionOutput)
outFields = {'KMEANS': outK, 'MAXP': outMax, 'SKATER': outSKATER}
appendFields = fieldNames + ["NEW_NAME"]
ssdo.output2NewFC(outputFC, outFields, appendFields = appendFields)