#!/usr/bin/env python
# coding: utf-8

# # Chapter 10: Sparse matrices and graphs

# Robert Johansson
# 
# Source code listings for [Numerical Python - Scientific Computing and Data Science Applications with Numpy, SciPy and Matplotlib](https://www.apress.com/us/book/9781484242452) (ISBN 978-1-484242-45-2).

# In[1]:


get_ipython().run_line_magic('matplotlib', 'inline')
get_ipython().run_line_magic('config', "InlineBackend.figure_format='retina'")


# In[2]:


import matplotlib.pyplot as plt
import matplotlib as mpl
# mpl.rcParams['text.usetex'] = True
mpl.rcParams['mathtext.fontset'] = 'stix'
mpl.rcParams['font.family'] = 'serif'
mpl.rcParams['font.sans-serif'] = 'stix'


# In[3]:


import scipy.sparse as sp


# In[4]:


import scipy.sparse.linalg


# In[5]:


import numpy as np


# In[6]:


import scipy.linalg as la


# In[7]:


import networkx as nx


# ## Coordinate list format

# In[8]:


values = [1, 2, 3, 4]


# In[9]:


rows = [0, 1, 2, 3]


# In[10]:


cols = [1, 3, 2, 0]


# In[11]:


A = sp.coo_matrix((values, (rows, cols)), shape=[4, 4])


# In[12]:


A.todense()


# In[13]:


A


# In[14]:


A.shape, A.size, A.dtype, A.ndim


# In[15]:


A.nnz, A.data


# In[16]:


A.row


# In[17]:


A.col


# In[18]:


A.tocsr()


# In[19]:


A.toarray()


# In[20]:


A.todense()


# Not all sparse matrix formats supports indexing:

# In[21]:


A[1, 2]


# In[22]:


A.tobsr()[1, 2]


# But some do:

# In[23]:


A.tocsr()[1, 2]


# In[24]:


A.tolil()[1:3, 3]


# ## CSR

# In[25]:


A = np.array([[1, 2, 0, 0], [0, 3, 4, 0], [0, 0, 5, 6], [7, 0, 8, 9]]); A


# In[26]:


A = sp.csr_matrix(A)


# In[27]:


A.data


# In[28]:


A.indices


# In[29]:


A.indptr


# In[30]:


i = 2


# In[31]:


A.indptr[i], A.indptr[i+1]-1


# In[32]:


A.indices[A.indptr[i]:A.indptr[i+1]]


# In[33]:


A.data[A.indptr[i]:A.indptr[i+1]]


# ## Functions for constructing sparse matrices

# In[34]:


N = 10


# In[35]:


A = -2 * sp.eye(N) + sp.eye(N, k=1) + sp.eye(N, k=-1)


# In[36]:


A


# In[37]:


A.todense()


# In[38]:


fig, ax = plt.subplots()
ax.spy(A)
fig.savefig("ch10-sparse-matrix-1.pdf");


# In[39]:


A = sp.diags([1,-2,1], [1,0,-1], shape=[N, N], format='csc')


# In[40]:


A


# In[41]:


fig, ax = plt.subplots()
ax.spy(A);


# In[42]:


B = sp.diags([1, 1], [-1, 1], shape=[3,3])


# In[43]:


B


# In[44]:


C = sp.kron(A, B, format='csr')
C


# In[45]:


fig, (ax_A, ax_B, ax_C) = plt.subplots(1, 3, figsize=(12, 4))
ax_A.spy(A)
ax_B.spy(B)
ax_C.spy(C)
fig.savefig("ch10-sparse-matrix-2.pdf");


# ## Sparse linear algebra

# In[46]:


N = 10


# In[47]:


A = sp.diags([1, -2, 1], [1, 0, -1], shape=[N, N], format='csc')


# In[48]:


b = -np.ones(N)


# In[49]:


x = sp.linalg.spsolve(A, b)


# In[50]:


x


# In[51]:


np.linalg.solve(A.todense(), b)


# In[52]:


lu = sp.linalg.splu(A)


# In[53]:


lu.L


# In[54]:


lu.perm_r


# In[55]:


lu.U


# In[56]:


def sp_permute(A, perm_r, perm_c):
    """ permute rows and columns of A """
    M, N = A.shape
    # row permumation matrix
    Pr = sp.coo_matrix((np.ones(M), (perm_r, np.arange(N)))).tocsr()
    # column permutation matrix
    Pc = sp.coo_matrix((np.ones(M), (np.arange(M), perm_c))).tocsr()
    return Pr.T * A * Pc.T


# In[57]:


sp_permute(lu.L * lu.U, lu.perm_r, lu.perm_c) - A


# In[58]:


fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4))
ax1.spy(lu.L)
ax2.spy(lu.U)
ax3.spy(A)


# In[59]:


x = lu.solve(b)


# In[60]:


x


# In[61]:


# use_umfpack=True is only effective if scikit-umfpack is installed
# (in which case UMFPACK is the default solver)
x = sp.linalg.spsolve(A, b, use_umfpack=True)


# In[62]:


x


# In[63]:


x, info = sp.linalg.cg(A, b)


# In[64]:


x


# In[65]:


x, info = sp.linalg.bicgstab(A, b)


# In[66]:


x


# In[67]:


# atol argument is a recent addition
x, info = sp.linalg.lgmres(A, b, atol=1e-5)


# In[68]:


x


# In[69]:


N = 25


# ### An example of a matrix reording method: Reverse Cuthil McKee

# In[70]:


A = sp.diags([1, -2, 1], [8, 0, -8], shape=[N, N], format='csc')


# In[71]:


perm = sp.csgraph.reverse_cuthill_mckee(A)
perm


# In[72]:


fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.spy(A)
ax2.spy(sp_permute(A, perm, perm))


# ### Performance comparison sparse/dense

# In[73]:


# compare performance of solving Ax=b vs system size N,
# where A is the sparse matrix for the 1d poisson problem
import time

def setup(N):
    A = sp.diags([1,-2,1], [1,0,-1], shape=[N, N], format='csr')
    b = -np.ones(N)
    return A, A.todense(), b

reps = 10
N_vec = np.arange(2, 300, 1)
t_sparse = np.empty(len(N_vec))
t_dense = np.empty(len(N_vec))
for idx, N in enumerate(N_vec):
    A, A_dense, b = setup(N)
    t = time.time()
    for r in range(reps):
        x = np.linalg.solve(A_dense, b)
    t_dense[idx] = (time.time() - t)/reps
    t = time.time()
    for r in range(reps):
        x = sp.linalg.spsolve(A, b, use_umfpack=True)
    t_sparse[idx] = (time.time() - t)/reps
    
fig, ax = plt.subplots(figsize=(8, 4))
ax.plot(N_vec, t_dense * 1e3, '.-', label="dense")
ax.plot(N_vec, t_sparse * 1e3, '.-', label="sparse")
ax.set_xlabel(r"$N$", fontsize=16)
ax.set_ylabel("elapsed time (ms)", fontsize=16)
ax.legend(loc=0)
fig.tight_layout()
fig.savefig("ch10-sparse-vs-dense.pdf")


# ### Eigenvalue problems

# In[74]:


N = 10


# In[75]:


A = sp.diags([1, -2, 1], [1, 0, -1], shape=[N, N], format='csc')


# In[76]:


evals, evecs = sp.linalg.eigs(A, k=4, which='LM')


# In[77]:


evals


# In[78]:


np.allclose(A.dot(evecs[:,0]), evals[0] * evecs[:,0])


# In[79]:


evals, evecs = sp.linalg.eigsh(A, k=4, which='LM')


# In[80]:


evals


# In[81]:


evals, evecs = sp.linalg.eigs(A, k=4, which='SR')


# In[82]:


evals


# In[83]:


np.real(evals).argsort()


# In[84]:


def sp_eigs_sorted(A, k=6, which='SR'):
    """ compute and return eigenvalues sorted by real value """
    evals, evecs = sp.linalg.eigs(A, k=k, which=which)
    idx = np.real(evals).argsort()
    return evals[idx], evecs[idx]


# In[85]:


evals, evecs = sp_eigs_sorted(A, k=4, which='SM')


# In[86]:


evals


# #### Random matrix example

# In[87]:


N = 100


# In[88]:


x_vec = np.linspace(0, 1, 50)


# In[89]:


# seed sp.rand with random_state to obtain a reproducible result
M1 = sp.rand(N, N, density=0.2, random_state=112312321)
# M1 = M1 + M1.conj().T
M2 = sp.rand(N, N, density=0.2, random_state=984592134)
# M2 = M2 + M2.conj().T


# In[90]:


evals = np.array([sp_eigs_sorted((1-x)*M1 + x*M2, k=25)[0] for x in x_vec])


# In[91]:


fig, ax = plt.subplots(figsize=(8, 4))

for idx in range(evals.shape[1]):
    ax.plot(x_vec, np.real(evals[:,idx]), lw=0.5)

ax.set_xlabel(r"$x$", fontsize=16)
ax.set_ylabel(r"eig.vals. of $(1-x)M_1+xM_2$", fontsize=16)

fig.tight_layout()
fig.savefig("ch10-sparse-eigs.pdf")


# ## Graphs

# In[92]:


g = nx.Graph()


# In[93]:


g.add_node(1)


# In[94]:


g.nodes()


# In[95]:


g.add_nodes_from([3, 4, 5])


# In[96]:


g.nodes()


# In[97]:


g.add_edge(1, 2)


# In[98]:


g.edges()


# In[99]:


g.add_edges_from([(3, 4), (5, 6)])


# In[100]:


g.edges()


# In[101]:


g.add_weighted_edges_from([(1, 3, 1.5), (3, 5, 2.5)])


# In[102]:


g.edges()


# In[103]:


g.edges(data=True)


# In[104]:


g.add_weighted_edges_from([(6, 7, 1.5)])


# In[105]:


g.nodes()


# In[106]:


g.edges()


# In[107]:


import numpy as np


# In[108]:


import json


# In[109]:


with open("tokyo-metro.json") as f:
    data = json.load(f)


# In[110]:


data.keys()


# In[111]:


data["C"]


# In[112]:


# data


# In[113]:


g = nx.Graph()

for line in data.values():
    g.add_weighted_edges_from(line["travel_times"])
    g.add_edges_from(line["transfers"])


# In[114]:


for n1, n2 in g.edges():
    g[n1][n2]["transfer"] = "weight" not in g[n1][n2]


# In[115]:


g.number_of_nodes()


# In[116]:


list(g.nodes())[:5]


# In[117]:


g.number_of_edges()


# In[118]:


list(g.edges())[:5]


# In[119]:


on_foot = [edge for edge in g.edges() if g.get_edge_data(*edge)["transfer"]]


# In[120]:


on_train = [edge for edge in g.edges() if not g.get_edge_data(*edge)["transfer"]]


# In[121]:


colors = [data[n[0].upper()]["color"] for n in g.nodes()]


# In[122]:


# from networkx.drawing.nx_agraph import graphviz_layout


# In[124]:


fig, ax = plt.subplots(1, 1, figsize=(14, 10))

pos = nx.drawing.nx_agraph.graphviz_layout(g, prog="neato")
nx.draw(g, pos, ax=ax, node_size=200, node_color=colors)
nx.draw_networkx_labels(g, pos=pos, ax=ax, font_size=6)
nx.draw_networkx_edges(g, pos=pos, ax=ax, edgelist=on_train, width=2)
nx.draw_networkx_edges(g, pos=pos, ax=ax, edgelist=on_foot, edge_color="blue")

# removing the default axis on all sides:
for side in ['bottom','right','top','left']:
    ax.spines[side].set_visible(False)

# removing the axis labels and ticks
ax.set_xticks([])
ax.set_yticks([])
ax.xaxis.set_ticks_position('none')
ax.yaxis.set_ticks_position('none')

fig.savefig("ch10-metro-graph.pdf")
fig.savefig("ch10-metro-graph.png")
fig.tight_layout()


# In[125]:


g.degree()


# In[126]:


d_max = max(d for (n, d) in g.degree())


# In[127]:


[(n, d) for (n, d) in g.degree() if d == d_max]


# In[128]:


p = nx.shortest_path(g, "Y24", "C19")


# In[129]:


np.array(p)


# In[130]:


np.sum([g[p[n]][p[n+1]]["weight"] for n in range(len(p)-1) if "weight" in g[p[n]][p[n+1]]])


# In[131]:


h = g.copy()


# In[132]:


for n1, n2 in h.edges():
    if "transfer" in h[n1][n2]:
        h[n1][n2]["weight"] = 5


# In[133]:


p = nx.shortest_path(h, "Y24", "C19")


# In[134]:


np.array(p)


# In[135]:


np.sum([h[p[n]][p[n+1]]["weight"] for n in range(len(p)-1)])


# In[136]:


p = nx.shortest_path(h, "Z1", "H16")


# In[137]:


np.sum([h[p[n]][p[n+1]]["weight"] for n in range(len(p)-1)])


# In[138]:


A = nx.to_scipy_sparse_matrix(g)


# In[139]:


A


# In[140]:


perm = sp.csgraph.reverse_cuthill_mckee(A)


# In[141]:


fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
ax1.spy(A, markersize=2)
ax2.spy(sp_permute(A, perm, perm), markersize=2)
fig.tight_layout()
fig.savefig("ch12-rcm-graph.pdf")


# ## Versions

# In[142]:


get_ipython().run_line_magic('reload_ext', 'version_information')


# In[143]:


get_ipython().run_line_magic('version_information', 'numpy, scipy, matplotlib, networkx, pygraphviz')