# some utility imports from pprint import pprint from matplotlib import pyplot as plt # main imports import numpy import distarray numpy.set_printoptions(precision=2) # display formatting nparr = numpy.random.random((4, 5)) nparr # NumPy array attributes print "type:", type(nparr) print "dtype:", nparr.dtype print "ndim:", nparr.ndim print "shape:", nparr.shape print "itemsize:", nparr.itemsize print "nbytes:", nparr.nbytes from distarray.globalapi import Context context = Context() darr = context.fromarray(nparr) darr # parts of the array are stored on each engine for i, a in enumerate(darr.get_localarrays()): print i, a # DistArray attributes print "type:", type(darr) print "dtype:", darr.dtype print "ndim:", darr.ndim print "shape:", darr.shape print "itemsize:", darr.itemsize print "nbytes:", darr.nbytes # with some extra... print "targets:", darr.targets print "context:", darr.context print "distribution:", darr.distribution ## NumPy ## numpy.sin(nparr) ## DistArray ## import distarray.globalapi as da da.sin(darr) da.sin(darr).toarray() ## NumPy ## nparr + nparr ## DistArray ## darr + darr (darr + darr).toarray() # Distributions control which processes own which (global) indices distribution = darr.distribution # this is a 2D distribution pprint(distribution.maps) # setup from distarray.plotting import plot_array_distribution process_coords = [(0, 0), (1, 0), (2, 0), (3, 0)] plot_array_distribution(darr, process_coords, cell_label=False, legend=True) distribution.maps[0].bounds # the above is the default, you can make more complex distributions from distarray.globalapi import Distribution distribution = Distribution.from_shape(context, (64, 64), dist=('b', 'c')) a = context.zeros(distribution, dtype='int32') plot_array_distribution(a, process_coords, cell_label=False, legend=True) # Context objects manage the setup and communication of the worker processes # for DistArray objects. print "targets:", context.targets print "comm:", context.comm # load .npy files in parallel numpy.save("/tmp/outfile.npy", nparr) distribution = Distribution.from_shape(context, nparr.shape) new_darr = context.load_npy("/tmp/outfile.npy", distribution) new_darr # save DistArrays to .hdf5 files in parallel context.save_hdf5("/tmp/outfile.hdf5", darr, mode='w') # load DistArrays from .hdf5 files in parallel (using h5py) context.load_hdf5("/tmp/outfile.hdf5", distribution) # save to .dnpy (a built-in flat-file format based on .npy) context.save_dnpy("/tmp/outfile", darr) # load from .dnpy context.load_dnpy("/tmp/outfile") ## NumPy ## print "sum:", nparr.sum() print "sum over an axis:", nparr.sum(axis=1) ## DistArray ## print "sum:", darr.sum(), darr.sum().toarray() print "sum over an axis:", darr.sum(axis=1), darr.sum(axis=1).toarray() def get_local_random(): import numpy return numpy.random.randint(10) context.apply(get_local_random) def get_local_var(darr): return darr.ndarray.var() context.apply(get_local_var, args=(darr.key,)) # as a reminder darr.toarray() darr.get_localshapes() # take a column slice darr_view = darr[:, 3] print darr_view print darr_view.toarray() # changes in the view change the original darr_view[3] = -0.99 print "view:" print darr_view.toarray() print "original:" print darr.toarray() # a more complex slice print darr[:, 2::2] print darr[:-1, 2::2].toarray() def return_protocol_structure(darr): return darr.__distarray__() context.apply(return_protocol_structure, args=(darr.key,))