#!/usr/bin/env python # coding: utf-8 # # Using the Spatial Statistics Data Object (SSDataObject) Makes Feature IO Simple # - SSDataObject does the read/write and accounting of feature/attribute and NumPy Array order # - Write/Utilize methods that take NumPy Arrays # ## Using NumPy as the common denominator # # - Could use the ArcPy Data Access Module directly, but there are host of issues/information one must take into account: # * How to deal with projections and other environment settings? # * How Cursors affect the accounting of features? # * How to deal with bad records/bad data and error handling? # * How to honor/account for full field object control? # * How do I create output features that correspond to my inputs? # - Points are easy, what about Polygons and Polylines? # - Spatial Statistics Data Object (SSDataObject) # * Almost 30 Spatial Statistics Tools written in Python that ${\bf{must}}$ behave like traditional GP Tools # * Use SSDataObject and your code should adhere # ## The Data Analysis Python Modules # # - [PANDAS (Python Data Analysis Library)](http://pandas.pydata.org/) # # - [SciPy (Scientific Python)](http://www.scipy.org/) # # - [PySAL (Python Spatial Analysis Library)](https://geodacenter.asu.edu/pysal) # ## Basic Imports # In[15]: import arcpy as ARCPY import numpy as NUM import SSDataObject as SSDO # ## Initialize and Load Fields into Spatial Statsitics Data Object # - The Unique ID Field ("MYID" in this example) will keep track of the order of your features # * You can use ```ssdo.oidName``` as your Unique ID Field # * You have no control over Object ID Fields. It is quick, assures "uniqueness", but can't assume they will not get "scrambled" during copies. # * To assure full control I advocate the "Add Field (LONG)" --> "Calculate Field (From Object ID)" workflow. # In[16]: inputFC = r'../data/CA_Polygons.shp' ssdo = SSDO.SSDataObject(inputFC) ssdo.obtainData("MYID", ['GROWTH', 'LOGPCR69', 'PERCNOHS', 'POP1969']) df = ssdo.getDataFrame() print(df.head()) # ## You can get your data using the core NumPy Arrays # - Use ```.data``` to get the native data type # - Use the ```returnDouble()``` function to cast explicitly to float # # # In[17]: pop69 = ssdo.fields['POP1969'] nativePop69 = pop69.data floatPop69 = pop69.returnDouble() print(floatPop69[0:5]) # ## You can get your data in a PANDAS Data Frame # - Note the Unique ID Field is used as the Index # In[18]: df = ssdo.getDataFrame() print(df.head()) # ## By default the SSDataObject only stores the centroids of the features # In[19]: df['XCoords'] = ssdo.xyCoords[:,0] df['YCoords'] = ssdo.xyCoords[:,1] print(df.head()) # ## You can get the core ArcPy Geometries if desired # - Set ```requireGeometry = True``` # In[20]: ssdo = SSDO.SSDataObject(inputFC) ssdo.obtainData("MYID", ['GROWTH', 'LOGPCR69', 'PERCNOHS', 'POP1969'], requireGeometry = True) df = ssdo.getDataFrame() shapes = NUM.array(ssdo.shapes, dtype = object) df['shapes'] = shapes print(df.head()) # ## Coming Soon... ArcPy Geometry Data Frame Integration # - In conjunction with the ArcGIS Python SDK # - Spatial operators on ArcGIS Data Frames: selection, clip, intersection etc. # ## Creating Output Feature Classes # - Simple Example: Adding a field of random standard normal values to your input/output # - ```appendFields``` can be used to copy over any fields from the input whether you read them into the SSDataObject or not. # - E.g. 'NEW_NAME' was never read into Python but it will be copied to the output. This can save you a lot of memory. # In[21]: import numpy.random as RAND import os as OS ARCPY.env.overwriteOutput = True outArray = RAND.normal(0,1, (ssdo.numObs,)) outDict = {} outField = SSDO.CandidateField('STDNORM', 'DOUBLE', outArray, alias = 'Standard Normal') outDict[outField.name] = outField outputFC = OS.path.abspath(r'../data/testMyOutput.shp') ssdo.output2NewFC(outputFC, outDict, appendFields = ['GROWTH', 'PERCNOHS', 'NEW_NAME']) # In[ ]: