#!/usr/bin/env python # coding: utf-8 # # Chapter 10: Sparse matrices and graphs # Robert Johansson # # Source code listings for [Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib](https://www.apress.com/us/book/9781484242452) (ISBN 978-1-484242-45-2). # In[1]: get_ipython().run_line_magic('matplotlib', 'inline') get_ipython().run_line_magic('config', "InlineBackend.figure_format='retina'") # In[2]: import matplotlib.pyplot as plt import matplotlib as mpl # mpl.rcParams['text.usetex'] = True mpl.rcParams['mathtext.fontset'] = 'stix' mpl.rcParams['font.family'] = 'serif' mpl.rcParams['font.sans-serif'] = 'stix' # In[3]: import scipy.sparse as sp # In[4]: import scipy.sparse.linalg # In[5]: import numpy as np # In[6]: import scipy.linalg as la # In[7]: import networkx as nx # ## Coordinate list format # In[8]: values = [1, 2, 3, 4] # In[9]: rows = [0, 1, 2, 3] # In[10]: cols = [1, 3, 2, 0] # In[11]: A = sp.coo_matrix((values, (rows, cols)), shape=[4, 4]) # In[12]: A.todense() # In[13]: A # In[14]: A.shape, A.size, A.dtype, A.ndim # In[15]: A.nnz, A.data # In[16]: A.row # In[17]: A.col # In[18]: A.tocsr() # In[19]: A.toarray() # In[20]: A.todense() # Not all sparse matrix formats supports indexing: # In[21]: A[1, 2] # In[22]: A.tobsr()[1, 2] # But some do: # In[23]: A.tocsr()[1, 2] # In[24]: A.tolil()[1:3, 3] # ## CSR # In[25]: A = np.array([[1, 2, 0, 0], [0, 3, 4, 0], [0, 0, 5, 6], [7, 0, 8, 9]]); A # In[26]: A = sp.csr_matrix(A) # In[27]: A.data # In[28]: A.indices # In[29]: A.indptr # In[30]: i = 2 # In[31]: A.indptr[i], A.indptr[i+1]-1 # In[32]: A.indices[A.indptr[i]:A.indptr[i+1]] # In[33]: A.data[A.indptr[i]:A.indptr[i+1]] # ## Functions for constructing sparse matrices # In[34]: N = 10 # In[35]: A = -2 * sp.eye(N) + sp.eye(N, k=1) + sp.eye(N, k=-1) # In[36]: A # In[37]: A.todense() # In[38]: fig, ax = plt.subplots() ax.spy(A) fig.savefig("ch10-sparse-matrix-1.pdf"); # In[39]: A = sp.diags([1,-2,1], [1,0,-1], shape=[N, N], format='csc') # In[40]: A # In[41]: fig, ax = plt.subplots() ax.spy(A); # In[42]: B = sp.diags([1, 1], [-1, 1], shape=[3,3]) # In[43]: B # In[44]: C = sp.kron(A, B, format='csr') C # In[45]: fig, (ax_A, ax_B, ax_C) = plt.subplots(1, 3, figsize=(12, 4)) ax_A.spy(A) ax_B.spy(B) ax_C.spy(C) fig.savefig("ch10-sparse-matrix-2.pdf"); # ## Sparse linear algebra # In[46]: N = 10 # In[47]: A = sp.diags([1, -2, 1], [1, 0, -1], shape=[N, N], format='csc') # In[48]: b = -np.ones(N) # In[49]: x = sp.linalg.spsolve(A, b) # In[50]: x # In[51]: np.linalg.solve(A.todense(), b) # In[52]: lu = sp.linalg.splu(A) # In[53]: lu.L # In[54]: lu.perm_r # In[55]: lu.U # In[56]: def sp_permute(A, perm_r, perm_c): """ permute rows and columns of A """ M, N = A.shape # row permumation matrix Pr = sp.coo_matrix((np.ones(M), (perm_r, np.arange(N)))).tocsr() # column permutation matrix Pc = sp.coo_matrix((np.ones(M), (np.arange(M), perm_c))).tocsr() return Pr.T * A * Pc.T # In[57]: sp_permute(lu.L * lu.U, lu.perm_r, lu.perm_c) - A # In[58]: fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4)) ax1.spy(lu.L) ax2.spy(lu.U) ax3.spy(A) # In[59]: x = lu.solve(b) # In[60]: x # In[61]: # use_umfpack=True is only effective if scikit-umfpack is installed # (in which case UMFPACK is the default solver) x = sp.linalg.spsolve(A, b, use_umfpack=True) # In[62]: x # In[63]: x, info = sp.linalg.cg(A, b) # In[64]: x # In[65]: x, info = sp.linalg.bicgstab(A, b) # In[66]: x # In[67]: # atol argument is a recent addition x, info = sp.linalg.lgmres(A, b, atol=1e-5) # In[68]: x # In[69]: N = 25 # ### An example of a matrix reording method: Reverse Cuthil McKee # In[70]: A = sp.diags([1, -2, 1], [8, 0, -8], shape=[N, N], format='csc') # In[71]: perm = sp.csgraph.reverse_cuthill_mckee(A) perm # In[72]: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4)) ax1.spy(A) ax2.spy(sp_permute(A, perm, perm)) # ### Performance comparison sparse/dense # In[73]: # compare performance of solving Ax=b vs system size N, # where A is the sparse matrix for the 1d poisson problem import time def setup(N): A = sp.diags([1,-2,1], [1,0,-1], shape=[N, N], format='csr') b = -np.ones(N) return A, A.todense(), b reps = 10 N_vec = np.arange(2, 300, 1) t_sparse = np.empty(len(N_vec)) t_dense = np.empty(len(N_vec)) for idx, N in enumerate(N_vec): A, A_dense, b = setup(N) t = time.time() for r in range(reps): x = np.linalg.solve(A_dense, b) t_dense[idx] = (time.time() - t)/reps t = time.time() for r in range(reps): x = sp.linalg.spsolve(A, b, use_umfpack=True) t_sparse[idx] = (time.time() - t)/reps fig, ax = plt.subplots(figsize=(8, 4)) ax.plot(N_vec, t_dense * 1e3, '.-', label="dense") ax.plot(N_vec, t_sparse * 1e3, '.-', label="sparse") ax.set_xlabel(r"$N$", fontsize=16) ax.set_ylabel("elapsed time (ms)", fontsize=16) ax.legend(loc=0) fig.tight_layout() fig.savefig("ch10-sparse-vs-dense.pdf") # ### Eigenvalue problems # In[74]: N = 10 # In[75]: A = sp.diags([1, -2, 1], [1, 0, -1], shape=[N, N], format='csc') # In[76]: evals, evecs = sp.linalg.eigs(A, k=4, which='LM') # In[77]: evals # In[78]: np.allclose(A.dot(evecs[:,0]), evals[0] * evecs[:,0]) # In[79]: evals, evecs = sp.linalg.eigsh(A, k=4, which='LM') # In[80]: evals # In[81]: evals, evecs = sp.linalg.eigs(A, k=4, which='SR') # In[82]: evals # In[83]: np.real(evals).argsort() # In[84]: def sp_eigs_sorted(A, k=6, which='SR'): """ compute and return eigenvalues sorted by real value """ evals, evecs = sp.linalg.eigs(A, k=k, which=which) idx = np.real(evals).argsort() return evals[idx], evecs[idx] # In[85]: evals, evecs = sp_eigs_sorted(A, k=4, which='SM') # In[86]: evals # #### Random matrix example # In[87]: N = 100 # In[88]: x_vec = np.linspace(0, 1, 50) # In[89]: # seed sp.rand with random_state to obtain a reproducible result M1 = sp.rand(N, N, density=0.2, random_state=112312321) # M1 = M1 + M1.conj().T M2 = sp.rand(N, N, density=0.2, random_state=984592134) # M2 = M2 + M2.conj().T # In[90]: evals = np.array([sp_eigs_sorted((1-x)*M1 + x*M2, k=25)[0] for x in x_vec]) # In[91]: fig, ax = plt.subplots(figsize=(8, 4)) for idx in range(evals.shape[1]): ax.plot(x_vec, np.real(evals[:,idx]), lw=0.5) ax.set_xlabel(r"$x$", fontsize=16) ax.set_ylabel(r"eig.vals. of $(1-x)M_1+xM_2$", fontsize=16) fig.tight_layout() fig.savefig("ch10-sparse-eigs.pdf") # ## Graphs # In[92]: g = nx.Graph() # In[93]: g.add_node(1) # In[94]: g.nodes() # In[95]: g.add_nodes_from([3, 4, 5]) # In[96]: g.nodes() # In[97]: g.add_edge(1, 2) # In[98]: g.edges() # In[99]: g.add_edges_from([(3, 4), (5, 6)]) # In[100]: g.edges() # In[101]: g.add_weighted_edges_from([(1, 3, 1.5), (3, 5, 2.5)]) # In[102]: g.edges() # In[103]: g.edges(data=True) # In[104]: g.add_weighted_edges_from([(6, 7, 1.5)]) # In[105]: g.nodes() # In[106]: g.edges() # In[107]: import numpy as np # In[108]: import json # In[109]: with open("tokyo-metro.json") as f: data = json.load(f) # In[110]: data.keys() # In[111]: data["C"] # In[112]: # data # In[113]: g = nx.Graph() for line in data.values(): g.add_weighted_edges_from(line["travel_times"]) g.add_edges_from(line["transfers"]) # In[114]: for n1, n2 in g.edges(): g[n1][n2]["transfer"] = "weight" not in g[n1][n2] # In[115]: g.number_of_nodes() # In[116]: list(g.nodes())[:5] # In[117]: g.number_of_edges() # In[118]: list(g.edges())[:5] # In[119]: on_foot = [edge for edge in g.edges() if g.get_edge_data(*edge)["transfer"]] # In[120]: on_train = [edge for edge in g.edges() if not g.get_edge_data(*edge)["transfer"]] # In[121]: colors = [data[n[0].upper()]["color"] for n in g.nodes()] # In[122]: # from networkx.drawing.nx_agraph import graphviz_layout # In[124]: fig, ax = plt.subplots(1, 1, figsize=(14, 10)) pos = nx.drawing.nx_agraph.graphviz_layout(g, prog="neato") nx.draw(g, pos, ax=ax, node_size=200, node_color=colors) nx.draw_networkx_labels(g, pos=pos, ax=ax, font_size=6) nx.draw_networkx_edges(g, pos=pos, ax=ax, edgelist=on_train, width=2) nx.draw_networkx_edges(g, pos=pos, ax=ax, edgelist=on_foot, edge_color="blue") # removing the default axis on all sides: for side in ['bottom','right','top','left']: ax.spines[side].set_visible(False) # removing the axis labels and ticks ax.set_xticks([]) ax.set_yticks([]) ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('none') fig.savefig("ch10-metro-graph.pdf") fig.savefig("ch10-metro-graph.png") fig.tight_layout() # In[125]: g.degree() # In[126]: d_max = max(d for (n, d) in g.degree()) # In[127]: [(n, d) for (n, d) in g.degree() if d == d_max] # In[128]: p = nx.shortest_path(g, "Y24", "C19") # In[129]: np.array(p) # In[130]: np.sum([g[p[n]][p[n+1]]["weight"] for n in range(len(p)-1) if "weight" in g[p[n]][p[n+1]]]) # In[131]: h = g.copy() # In[132]: for n1, n2 in h.edges(): if "transfer" in h[n1][n2]: h[n1][n2]["weight"] = 5 # In[133]: p = nx.shortest_path(h, "Y24", "C19") # In[134]: np.array(p) # In[135]: np.sum([h[p[n]][p[n+1]]["weight"] for n in range(len(p)-1)]) # In[136]: p = nx.shortest_path(h, "Z1", "H16") # In[137]: np.sum([h[p[n]][p[n+1]]["weight"] for n in range(len(p)-1)]) # In[138]: A = nx.to_scipy_sparse_matrix(g) # In[139]: A # In[140]: perm = sp.csgraph.reverse_cuthill_mckee(A) # In[141]: fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4)) ax1.spy(A, markersize=2) ax2.spy(sp_permute(A, perm, perm), markersize=2) fig.tight_layout() fig.savefig("ch12-rcm-graph.pdf") # ## Versions # In[142]: get_ipython().run_line_magic('reload_ext', 'version_information') # In[143]: get_ipython().run_line_magic('version_information', 'numpy, scipy, matplotlib, networkx, pygraphviz')