from osgeo import gdal ds = gdal.Open("./data/current/pmean_sumrc.img") if ds is None: raise Exception("No such raster") a = ds.GetRasterBand(1).ReadAsArray() del ds type(a), a.shape import rasterio with rasterio.open("./data/current/pmean_sumrc.img") as ds: b = ds.read_band(1) type(b), b.shape %pylab inline import os figsize(6,6) def plotit(x, title, cmap="Blues"): plt.imshow(x, cmap=cmap, interpolation='nearest') plt.colorbar() plt.title(title) plt.show() plotit(b, "Summer Precip") with rasterio.open('./data/current/iso_zns3-27.img') as ds: # geotransform defines the grid dimensions # in a geographic reference system gt = ds.transform crs = ds.crs_wkt x = ds.read_band(1) plotit(x, "Ag Zones", cmap="Set3") print("upper left coordinates:", gt[0], gt[3]) print("cell size:", gt[1], gt[5]) print("coordinate reference system:", crs) print x.shape print x.flatten().shape print np.array([x.flatten(), x.flatten()]).T.shape # Explanatory data # explanatory_fields = "tmin12c tmax8c p_ph_c pmean_wntrc pmean_sumrc irr_lands gt_demc grwsnc d2u2c".split() explanatory_rasters = [ './data/current/tmin12c.img', # Min December Temperature './data/current/tmax8c.img', # Max August Temperature './data/current/p_ph_c.img', # Soil pH './data/current/pmean_wntrc.img', # Mean Winter Precipitation './data/current/pmean_sumrc.img', # Mean Summer Precipitation './data/current/irr_lands.img', # Irrigated Lands './data/current/gt_demc.img', # Elevation './data/current/grwsnc.img', # Growing Season './data/current/d2u2c.img' # Distance to Urban Areas ] # Response data (i.e. Agricultural Zones) response_raster = './data/current/iso_zns3-27.img' # Load the training rasters using the sampled subset from pyimpute import load_training_rasters train_xs, train_y = load_training_rasters(response_raster, explanatory_rasters) train_xs.shape, train_y.shape from pyimpute import stratified_sample_raster selected = stratified_sample_raster(response_raster, min_sample_proportion=0.5) train_xs, train_y = load_training_rasters(response_raster, explanatory_rasters, selected) train_xs.shape, train_y.shape from sklearn.ensemble import RandomForestClassifier clf = RandomForestClassifier() clf.fit(train_xs, train_y) # Cross validate from sklearn import cross_validation k = 4 scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k) print("%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200)) import pandas as pd fimp = dict(zip(explanatory_rasters, clf.feature_importances_)) fimp_df = pd.DataFrame.from_dict([fimp,]) fimp_df.plot(kind="bar", figsize=(8,8)) from sklearn.svm import SVC from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier from sklearn.naive_bayes import GaussianNB from sklearn.lda import LDA from sklearn.qda import QDA from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.dummy import DummyClassifier from sklearn.ensemble import ExtraTreesClassifier import time classifiers = [ ('LDA ', LDA()), ('tree ', DecisionTreeClassifier()), ('Bayes ', GaussianNB()), ('kNN ', KNeighborsClassifier(n_neighbors=6, weights="distance")), #('SVC-RBC ', SVC(kernel="rbf", gamma=2, C=1)), #('logit ', LogisticRegression()), ('rf ', RandomForestClassifier(n_estimators=20, n_jobs=3)), ('extrarf ', ExtraTreesClassifier(n_estimators=20, n_jobs=3)), ] for name, classifier in classifiers: print name start = time.time() classifier.fit(train_xs, train_y) scores = cross_validation.cross_val_score(clf, train_xs, train_y, cv=k) print "\t%d-fold Cross Validation Accuracy: %0.2f (+/- %0.2f)" % (k, scores.mean() * 100, scores.std() * 200) print "\t", time.time() - start, "seconds" from pyimpute import load_targets target_xs, raster_info = load_targets(explanatory_rasters) print(raster_info) print(target_xs.shape) agzones = clf.predict(target_xs) print(agzones.shape) print(agzones[9668]) print(agzones.reshape(raster_info['shape']).shape) from pyimpute import impute impute(target_xs, clf, raster_info, outdir="_aez_output_current", class_prob=True, certainty=True) !ls -1 "./_aez_output_current" with rasterio.open("./_aez_output_current/responses.img") as ds: zones = ds.read_band(1) with rasterio.open("./data/current/iso_zns3-27.img") as ds: zones_orig = ds.read_band(1) plotit(zones, "Ag Zones", cmap='Set3') plotit(zones_orig, "Ag Zones", cmap="Set3") explanatory_2070_rasters = [ './data/2070s_HadGEM3_rcp85/tmin12c.img', # Min December Temperature './data/2070s_HadGEM3_rcp85/tmax8c.img', # Max August Temperature './data/current/p_ph_c.img', # Soil pH './data/2070s_HadGEM3_rcp85/pmean_wntrc.img', # Mean Winter Precipitation './data/2070s_HadGEM3_rcp85/pmean_sumrc.img', # Mean Summer Precipitation './data/current/irr_lands.img', # Irrigated Lands './data/current/gt_demc.img', # Elevation './data/2070s_HadGEM3_rcp85/grwsnc.img', # Growing Season './data/current/d2u2c.img' # Distance to Urban Areas ] target_xs, raster_info = load_targets(explanatory_2070_rasters) impute(target_xs, clf, raster_info, outdir="_aez_output_2070s_HadGEM3_rcp85", class_prob=True, certainty=True) !ls -1 "./_aez_output_2070s_HadGEM3_rcp85" plotit(zones, "Ag Zones", cmap='Set3') with rasterio.open("./_aez_output_2070s_HadGEM3_rcp85/responses.img") as ds: future_zones = ds.read_band(1) plotit(future_zones, "Ag Zones 2070", cmap='Set3') diff = np.not_equal(future_zones, zones) plotit(diff, "Shift in Ag Zones by 2070") with rasterio.open("./_aez_output_2070s_HadGEM3_rcp85/probability_127.img") as ds: future_zone127 = ds.read_band(1) plotit(future_zone127[0:80,0:75], "Zone 127 in 2070", cmap="Greys") with rasterio.open("./_aez_output_2070s_HadGEM3_rcp85/certainty.img") as ds: certainty = ds.read_band(1) plotit(certainty, "Overall Certainty", cmap="RdYlBu") # image of BLM agreement map from IPython.core.display import Image Image(filename='./img/dougfir_viability.png') %timeit impute(target_xs, clf, raster_info, outdir="_test_default", class_prob=True, certainty=True) %timeit impute(target_xs, clf, raster_info, linechunk=1, outdir="_test_lc1", class_prob=True, certainty=True) Image("./img/Rplot01.png") Image("./img/Rplot02.png") Image("./img/Rplot.png") from geopandas import GeoDataFrame points = './data/dougfir/DF_PNW_prj.shp' dougfir = GeoDataFrame.from_file(points) dougfir['GRIDCODE'][10:19] Image("./img/dfpoints.png") # load training from vector dataset from pyimpute import load_training_vector train_xs, train_y = load_training_vector(points, explanatory_rasters, response_field='GRIDCODE') train_xs.shape, train_y.shape clf = RandomForestClassifier() clf.fit(train_xs, train_y) impute(target_xs, clf, raster_info, outdir="_df_output_2070", class_prob=True, certainty=True) with rasterio.open("./_df_output_2070/probability_1.img") as ds: future_df = ds.read_band(1) plotit(future_df[25:125,10:80], "Douglas-fir - 2070 Viable Range", cmap="Greens")