#!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.rc('figure', figsize=(10, 6)) PREVIOUS_MAX_ROWS = pd.options.display.max_rows pd.options.display.max_columns = 20 pd.options.display.max_rows = 20 pd.options.display.max_colwidth = 80 np.set_printoptions(precision=4, suppress=True) # In[2]: rng = np.random.default_rng(seed=12345) # In[3]: np.ones((10, 5)).shape # In[4]: np.ones((3, 4, 5), dtype=np.float64).strides # In[5]: ints = np.ones(10, dtype=np.uint16) floats = np.ones(10, dtype=np.float32) np.issubdtype(ints.dtype, np.integer) np.issubdtype(floats.dtype, np.floating) # In[6]: np.float64.mro() # In[7]: np.issubdtype(ints.dtype, np.number) # In[8]: arr = np.arange(8) arr arr.reshape((4, 2)) # In[9]: arr.reshape((4, 2)).reshape((2, 4)) # In[10]: arr = np.arange(15) arr.reshape((5, -1)) # In[11]: other_arr = np.ones((3, 5)) other_arr.shape arr.reshape(other_arr.shape) # In[12]: arr = np.arange(15).reshape((5, 3)) arr arr.ravel() # In[13]: arr.flatten() # In[14]: arr = np.arange(12).reshape((3, 4)) arr arr.ravel() arr.ravel('F') # In[15]: arr1 = np.array([[1, 2, 3], [4, 5, 6]]) arr2 = np.array([[7, 8, 9], [10, 11, 12]]) np.concatenate([arr1, arr2], axis=0) np.concatenate([arr1, arr2], axis=1) # In[16]: np.vstack((arr1, arr2)) np.hstack((arr1, arr2)) # In[17]: arr = rng.standard_normal((5, 2)) arr first, second, third = np.split(arr, [1, 3]) first second third # In[18]: arr = np.arange(6) arr1 = arr.reshape((3, 2)) arr2 = rng.standard_normal((3, 2)) np.r_[arr1, arr2] np.c_[np.r_[arr1, arr2], arr] # In[19]: np.c_[1:6, -10:-5] # In[20]: arr = np.arange(3) arr arr.repeat(3) # In[21]: arr.repeat([2, 3, 4]) # In[22]: arr = rng.standard_normal((2, 2)) arr arr.repeat(2, axis=0) # In[23]: arr.repeat([2, 3], axis=0) arr.repeat([2, 3], axis=1) # In[24]: arr np.tile(arr, 2) # In[25]: arr np.tile(arr, (2, 1)) np.tile(arr, (3, 2)) # In[26]: arr = np.arange(10) * 100 inds = [7, 1, 2, 6] arr[inds] # In[27]: arr.take(inds) arr.put(inds, 42) arr arr.put(inds, [40, 41, 42, 43]) arr # In[28]: inds = [2, 0, 2, 1] arr = rng.standard_normal((2, 4)) arr arr.take(inds, axis=1) # In[29]: arr = np.arange(5) arr arr * 4 # In[30]: arr = rng.standard_normal((4, 3)) arr.mean(0) demeaned = arr - arr.mean(0) demeaned demeaned.mean(0) # In[31]: arr row_means = arr.mean(1) row_means.shape row_means.reshape((4, 1)) demeaned = arr - row_means.reshape((4, 1)) demeaned.mean(1) # In[32]: arr - arr.mean(1) # In[33]: arr - arr.mean(1).reshape((4, 1)) # In[34]: arr = np.zeros((4, 4)) arr_3d = arr[:, np.newaxis, :] arr_3d.shape arr_1d = rng.standard_normal(3) arr_1d[:, np.newaxis] arr_1d[np.newaxis, :] # In[35]: arr = rng.standard_normal((3, 4, 5)) depth_means = arr.mean(2) depth_means depth_means.shape demeaned = arr - depth_means[:, :, np.newaxis] demeaned.mean(2) # In[36]: arr = np.zeros((4, 3)) arr[:] = 5 arr # In[37]: col = np.array([1.28, -0.42, 0.44, 1.6]) arr[:] = col[:, np.newaxis] arr arr[:2] = [[-1.37], [0.509]] arr # In[38]: arr = np.arange(10) np.add.reduce(arr) arr.sum() # In[39]: my_rng = np.random.default_rng(12346) # for reproducibility arr = my_rng.standard_normal((5, 5)) arr arr[::2].sort(1) # sort a few rows arr[:, :-1] < arr[:, 1:] np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1) # In[40]: arr = np.arange(15).reshape((3, 5)) np.add.accumulate(arr, axis=1) # In[41]: arr = np.arange(3).repeat([1, 2, 2]) arr np.multiply.outer(arr, np.arange(5)) # In[42]: x, y = rng.standard_normal((3, 4)), rng.standard_normal(5) result = np.subtract.outer(x, y) result.shape # In[43]: arr = np.arange(10) np.add.reduceat(arr, [0, 5, 8]) # In[44]: arr = np.multiply.outer(np.arange(4), np.arange(5)) arr np.add.reduceat(arr, [0, 2, 4], axis=1) # In[45]: def add_elements(x, y): return x + y add_them = np.frompyfunc(add_elements, 2, 1) add_them(np.arange(8), np.arange(8)) # In[46]: add_them = np.vectorize(add_elements, otypes=[np.float64]) add_them(np.arange(8), np.arange(8)) # In[47]: arr = rng.standard_normal(10000) get_ipython().run_line_magic('timeit', 'add_them(arr, arr)') get_ipython().run_line_magic('timeit', 'np.add(arr, arr)') # In[48]: dtype = [('x', np.float64), ('y', np.int32)] sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype) sarr # In[49]: sarr[0] sarr[0]['y'] # In[50]: sarr['x'] # In[51]: dtype = [('x', np.int64, 3), ('y', np.int32)] arr = np.zeros(4, dtype=dtype) arr # In[52]: arr[0]['x'] # In[53]: arr['x'] # In[54]: dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)] data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype) data['x'] data['y'] data['x']['a'] # In[55]: arr = rng.standard_normal(6) arr.sort() arr # In[56]: arr = rng.standard_normal((3, 5)) arr arr[:, 0].sort() # Sort first column values in place arr # In[57]: arr = rng.standard_normal(5) arr np.sort(arr) arr # In[58]: arr = rng.standard_normal((3, 5)) arr arr.sort(axis=1) arr # In[59]: arr[:, ::-1] # In[60]: values = np.array([5, 0, 1, 3, 2]) indexer = values.argsort() indexer values[indexer] # In[61]: arr = rng.standard_normal((3, 5)) arr[0] = values arr arr[:, arr[0].argsort()] # In[62]: first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara']) last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters']) sorter = np.lexsort((first_name, last_name)) sorter list(zip(last_name[sorter], first_name[sorter])) # In[63]: values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third']) key = np.array([2, 2, 1, 1, 1]) indexer = key.argsort(kind='mergesort') indexer values.take(indexer) # In[64]: rng = np.random.default_rng(12345) arr = rng.standard_normal(20) arr np.partition(arr, 3) # In[65]: indices = np.argpartition(arr, 3) indices arr.take(indices) # In[66]: arr = np.array([0, 1, 7, 12, 15]) arr.searchsorted(9) # In[67]: arr.searchsorted([0, 8, 11, 16]) # In[68]: arr = np.array([0, 0, 0, 1, 1, 1, 1]) arr.searchsorted([0, 1]) arr.searchsorted([0, 1], side='right') # In[69]: data = np.floor(rng.uniform(0, 10000, size=50)) bins = np.array([0, 100, 1000, 5000, 10000]) data # In[70]: labels = bins.searchsorted(data) labels # In[71]: pd.Series(data).groupby(labels).mean() # In[72]: import numpy as np def mean_distance(x, y): nx = len(x) result = 0.0 count = 0 for i in range(nx): result += x[i] - y[i] count += 1 return result / count # In[73]: mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000)) mmap # In[74]: section = mmap[:5] # In[75]: section[:] = rng.standard_normal((5, 10000)) mmap.flush() mmap del mmap # In[76]: mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000)) mmap # In[77]: get_ipython().run_line_magic('xdel', 'mmap') get_ipython().system('rm mymmap') # In[78]: arr_c = np.ones((100, 10000), order='C') arr_f = np.ones((100, 10000), order='F') arr_c.flags arr_f.flags arr_f.flags.f_contiguous # In[79]: get_ipython().run_line_magic('timeit', 'arr_c.sum(1)') get_ipython().run_line_magic('timeit', 'arr_f.sum(1)') # In[80]: arr_f.copy('C').flags # In[81]: arr_c[:50].flags.contiguous arr_c[:, :50].flags # In[82]: get_ipython().run_line_magic('xdel', 'arr_c') get_ipython().run_line_magic('xdel', 'arr_f') # In[83]: # In[84]: pd.options.display.max_rows = PREVIOUS_MAX_ROWS