#!/usr/bin/env python # coding: utf-8 # ![](./scipy-numpy.png) # # NumPy arrays: Creation and handling # # *Computer tools for Astronomers* # # This first part is an enhanced version of the slide about fundamental array handling in Numpy/SciPy. # # Basically, all I have done here is to add some plots that visualize the arrays with colors, because it makes it clearer what happens when we perform the different operations .For thispurpose, I am introducing the function `imshow()` from Matplotlib. We shall look more at plotting later; for now it is enough to know that `imshow()` can translate the value of each array element into a color, which can make it quite a bit easier to see whatis going on in the array than just looking at a bunch of numbers. # # # > **Please note**: I am using the notebook interface to write this, since it is great for making # > demos of all the feature. However, I recommend that you try the code below in the terminal or QtConsole # > interface, as these have some features that this doesn't. # > For example, the plots will show up in an interactive window that allow, panning zooming, etc., unlike this # > interface that produces static raster images of the plots. # In[ ]: import scipy as sp import matplotlib.pyplot as plt from scipy import random as ra # Purely laziness # Only do this if you are working in the Notebook interface: get_ipython().run_line_magic('matplotlib', 'inline') # ### Array construction # # The simplest way to define an array is by feeding it a list of numbers. # In[ ]: a = sp.array([1., 2., 3.]) print a # If you want a 2D array, feed it a list of lists of numbers: # In[ ]: a = sp.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) print a # NB! In the above, the line breaks are only there to look good. We could just as well have written: # ```python # a = sp.array([[1., 2., 3.], [4., 5., 6.], [7., 8., 9.]]) # ``` # Now I have been reassigning values to the variable name '`a`' a few times. Let's keep it the next time: # In[ ]: a = sp.zeros((8, 8)); print a # What we wrote here was "create an array of $8 \times 8$ elements and set them all to zero". # # There's another convenience function called `ones()`, which can also be called with a tuple of numbers: # In[ ]: b = sp.ones((8, 8)) print b # Besides, there are also the convenience functions `zeros_like()` and `ones_like()` that take another array as the input and returns an array of zeros resp. ones of the same size as the input array: # In[ ]: b = sp.ones_like(a) print(b) # An array can be reshaped to any shape you want, as long as it has the correct number of elements: # In[ ]: b = sp.ones_like(a).reshape((4, 16)) print b # In[ ]: print(b.transpose()) # Or b.T = easier to write but less explicit. b = b.reshape((8, 8)) # Just to get the good ol' `b` array back. # There are a number of different random number generating functions, the simplest of which is `scipy.random.random()`. # Since in the imports section we have already abbreviated `scipy.random` to `ra`, we can write this as: # ```python # c = ra.random(b.shape) # ``` # Remember, you can always get help for a function by writing e.g. `ra.random?` in your IPython console. # Printing an array of random floating point numbers between zero and one doesn't show much, so we will visualize the array with a plotting function instead. Try running the below commands a few times to convince yourself that it is in fact a pretty well randomized set you get: # In[ ]: # This cell should be ignored or deleted... get_ipython().run_line_magic('pinfo', 'plt.colormaps') # In[ ]: plt.set_cmap('hot') c = ra.random(b.shape) * .5 plt.matshow(c) plt.colorbar() # plt.show() # Remember to write this when calling it from the terminal # Another convenience function gives you the Identity matrix, usually called $I$. Since `I` is a bad name for a function, they called it something very similar yet not the same: # In[ ]: d = sp.eye(b.shape[0]); print(d) plt.matshow(d); plt.colorbar() #b.shape # Arribute #b.max() # Method # ### Array vs. matrix # # Unlike e.g. Matlab$^®$, NumPy doesn't perform matrix operations by default, but only when you specifically ask about it. If we use the arrays `c` and `d` as examples, the difference is quite clear. Taking the sum of the two is the same in both languages, as the matrix sum is just defined as the element-wise sum like it is explained in wikipedia: # # ![Matrix sum](https://upload.wikimedia.org/math/7/e/c/7ec5cd9a9e5ef6cf052bfeed0cb8f0b3.png) # In our case, the sum of `c` and `d`, hardly surprising, becomes: # In[ ]: plt.matshow(c + d, interpolation='nearest'); plt.colorbar() # However, in in NumPy our array is ***not treated as a matrix unless we explicitly do this***. # # Here's the difference (by the way, compare the diagonals): # In[ ]: array_product = c * d # Element-wise product matrix_product = sp.dot(d, c) # Matrix product # And now some plotting magic: plt.subplot(121) plt.imshow(array_product, interpolation='nearest') plt.title('Element wise `Array` product \n default in NumPy') plt.subplot(122) plt.imshow(matrix_product, interpolation='nearest') plt.title('`Matrix` product \n default in Matlab/Octave') # plt.show() # Let's call `c` and `d` interpreted as matrices $C$ and $D$, respectively. # Since $D$ is the identity matrix, it should come as no surprise that $D C = C$. # # ### Reshaping # # As mentioned, arrays are easily reshaped, as long as you don't mess up the total number of elements: # In[ ]: e = sp.arange(64).reshape(1, -1) # '-1' here means 'as many as it takes'. plt.imshow(e, interpolation='nearest') # plt.show() # In[ ]: e = sp.arange(64).reshape((8, 8)) # Same array, different shape! plt.imshow(e, interpolation='nearest'); plt.colorbar() # In[ ]: f = e.reshape((4, 16)) plt.imshow(f, interpolation='nearest') plt.colorbar(orientation='horizontal') # ### On loops vs. arrays # # NumPy is optimized for performing element-wise operations on arrays in parallel. # This means that you get both cleaner code and much faster computations if you utilize vectorization well. # To give a completely unscientific idea about the kind of speed you gain, I have written my own function which raises an array`a` to the `x`th power, by looping through the array's indices $(i, j)$ and raise the elements **one by one**, where NumPy does it with all of them in parallel: # In[ ]: def powerit2d(indata, expon): # Version for 2D array for i in range(indata.shape[0]): for j in range(indata.shape[1]): indata[i, j] = indata[i, j] ** expon return indata def powerit3d(indata, expon): # Version for 3D array for i in range(indata.shape[0]): for j in range(indata.shape[1]): for k in range(indata.shape[2]): indata[i, j, k] = indata[i, j, k] ** expon return indata aa = ra.random((20, 20)) aaa = ra.random((20, 20, 20)) # We now time the different methods operating on the 2D array. Note that different runs will give different results, this is *not* a strict benchmarking but only to give some rough feeling of the difference: # In[ ]: get_ipython().run_line_magic('timeit', 'aa ** 2000') get_ipython().run_line_magic('timeit', 'powerit2d(aa, 2000)') # A bit less than a factor of 50 in difference. Now for the 3D array: # In[ ]: get_ipython().run_line_magic('timeit', 'aaa ** 2000') get_ipython().run_line_magic('timeit', 'powerit3d(aaa, 2000)') # ...which gives a slightly larger difference. # # **We really want to utilize NumPy's array optimization when we can.** # ## Slicing & Dicing # # *A short description of this section could be "How to elegantly select subsets of your dataset"*. # # Try to see what you get out of the following manipulations, and understand what they do. If you learn them, you can work very efficiently with $N$-simensional datasets: # In[ ]: print(e) # In[ ]: print(e[0, 0]) # In[ ]: print(e[:, 2:3]) # In these intervals, the last index value is not included. # In[ ]: print(e[2:4, :]) # In[ ]: print(e[2:4]) # Think a bit about this one. # In[ ]: print(e[2:-1, 0]) print(e[2:-1, 0:1]) # What's the difference here? # In[ ]: print e print(e[::-1, 0:2]) # In[ ]: print(e[1:6:2, :]) # In[ ]: print(e[::2, :]) # You can of course always assign one of these subsets to a new variable: # In[ ]: f = e[1:6:2, :] # Etc. # ### Logical indexing: # # This is a way to perform operations on elements in your array when you don't know exactly where they are, but you know that they live up to a certain logical criterion, e.g. below we say "please show me all elements in the array `g` for which we know that the element is an even number" # In[ ]: g = sp.arange(64).reshape((8, 8)) evens = sp.where(g % 2 == 0); print g[evens] # The `where()` function can be used to pick elements by pretty complex criteria: # In[ ]: my_indices = sp.where(((g > 10) & (g < 20)) | (g > 45)) # To see exactly what that last one did, we'll try to "color" the elements that it selected and plot the array: # In[ ]: h = sp.ones_like(d); h[my_indices] = 0. print(h) plt.imshow(h, interpolation='nearest'); plt.colorbar() # In[ ]: pp = sp.random.random((8, 8)) idx = sp.where(pp > .75) qq = sp.zeros_like(pp) qq[idx] = 1 plt.figure() plt.gcf().set_size_inches(9,4) plt.subplot(1, 2, 1) plt.imshow(pp, interpolation='nearest')#; plt.colorbar() plt.subplot(1, 2, 2) plt.imshow(qq, interpolation='nearest') # ## Learning more: # # There's a NumPy tutorial at the [SciPy Wiki](http://wiki.scipy.org/Tentative_NumPy_Tutorial). # # For users of Matlab and Octave, the web site [NumPy for Matlab users](http://wiki.scipy.org/NumPy_for_Matlab_Users) on the same wiki could be very useful. # # There's also a [NumPy for IDL users](http://wiki.scipy.org/NumPy_for_Matlab_Users) page which could possibly be quite useful also if you never used IDL. # ## A couple of more general videos about Python i Science # A video about how to use IPython for several steps in your work flow: Computations, plotting, writing papers etc. # In[1]: from IPython.display import YouTubeVideo YouTubeVideo('iwVvqwLDsJo') # A talk by an astronomer at Berkeley about how Python can be used for everything from running a remote telescope over auto-detection of interesting discoveries to the publication process. # In[2]: YouTubeVideo('mLuIB8aW2KA') # In[ ]: