Refer to
compressed.py
and
csc.py
in scipy.sparse
.
import scipy.sparse
def csr_matrix_indices(S):
"""
Return a list of the indices of nonzero entries of a csr_matrix S
"""
major_dim, minor_dim = S.shape
minor_indices = S.indices
major_indices = np.empty(len(minor_indices), dtype=S.indices.dtype)
scipy.sparse._sparsetools.expandptr(major_dim, S.indptr, major_indices)
return zip(major_indices, minor_indices)
def csr_matrix_indices_gen(S):
"""
Generate the indices of nonzero entries of a csr_matrix S
"""
major_dim, minor_dim = S.shape
minor_indices = S.indices
for i in range(major_dim):
for j in range(S.indptr[i], S.indptr[i+1]):
major_index, minor_index = i, minor_indices[j]
yield major_index, minor_index
import numpy as np
n = 4
A = np.random.randint(2, size=(n, n))
A_csr = scipy.sparse.csr_matrix(A > 0)
A
array([[1, 0, 1, 0], [1, 1, 0, 0], [1, 1, 1, 0], [0, 0, 1, 0]])
for x in csr_matrix_indices(A_csr):
print x
(0, 0) (0, 2) (1, 0) (1, 1) (2, 0) (2, 1) (2, 2) (3, 2)
for x in csr_matrix_indices_gen(A_csr):
print x
(0, 0) (0, 2) (1, 0) (1, 1) (2, 0) (2, 1) (2, 2) (3, 2)
nonzero
method of csr_matrix
calls the tocoo
method;
source codenonzero
method of csc_matrix
involves swapping;
source codeThey are expected to be slower.
for x in zip(A_csr.nonzero()[0], A_csr.nonzero()[1]):
print x
(0, 0) (0, 2) (1, 0) (1, 1) (2, 0) (2, 1) (2, 2) (3, 2)
for x in zip(A_csr.nonzero()[0], A_csr.nonzero()[1]):
print x
(0, 0) (0, 2) (1, 0) (1, 1) (2, 0) (2, 1) (2, 2) (3, 2)
A_csc = scipy.sparse.csc_matrix(A > 0)
for x in zip(A_csc.nonzero()[0], A_csc.nonzero()[1]):
print x
(0, 0) (0, 2) (1, 0) (1, 1) (2, 0) (2, 1) (2, 2) (3, 2)
sizes = [10, 50, 100, 1000]
matrices = []
for n in sizes:
A = np.random.randint(2, size=(n, n))
matrix = {
'n': n,
'dense': A,
'csr': scipy.sparse.csr_matrix(A > 0),
'csc': scipy.sparse.csc_matrix(A > 0)
}
matrices.append(matrix)
print matrices[0]['csr'].todense()
[[False True True False False False False False True False] [ True True False True True True True False True False] [ True True True True False False False True True True] [False True False False False True False True False True] [ True True True False True True True True False True] [False True False False False False True True False True] [ True False False False True True True False False False] [False False False True False True True True True True] [ True False False False True False False True True False] [False False False True False True False True True False]]
for matrix in matrices:
print '{0} x {1}'.format(matrix['n'], matrix['n'])
%timeit for x in zip(matrix['csr'].nonzero()[0], matrix['csr'].nonzero()[1]): x
%timeit for x in zip(matrix['csr'].tocoo().row, matrix['csr'].tocoo().col): x
%timeit for x in zip(matrix['csc'].nonzero()[0], matrix['csc'].nonzero()[1]): x
%timeit for x in np.nditer(np.where(matrix['dense'] > 0)): x
%timeit for x in csr_matrix_indices(matrix['csr']): x
%timeit for x in csr_matrix_indices_gen(matrix['csr']): x
10 x 10 1000 loops, best of 3: 232 µs per loop 1000 loops, best of 3: 216 µs per loop 10000 loops, best of 3: 41 µs per loop 10000 loops, best of 3: 24.2 µs per loop 10000 loops, best of 3: 20.7 µs per loop 10000 loops, best of 3: 29.6 µs per loop 50 x 50 1000 loops, best of 3: 597 µs per loop 1000 loops, best of 3: 587 µs per loop 1000 loops, best of 3: 530 µs per loop 1000 loops, best of 3: 411 µs per loop 1000 loops, best of 3: 359 µs per loop 1000 loops, best of 3: 442 µs per loop 100 x 100 1000 loops, best of 3: 1.78 ms per loop 1000 loops, best of 3: 1.73 ms per loop 100 loops, best of 3: 2.33 ms per loop 1000 loops, best of 3: 1.73 ms per loop 1000 loops, best of 3: 1.54 ms per loop 1000 loops, best of 3: 1.67 ms per loop 1000 x 1000 1 loops, best of 3: 203 ms per loop 1 loops, best of 3: 204 ms per loop 1 loops, best of 3: 328 ms per loop 10 loops, best of 3: 168 ms per loop 1 loops, best of 3: 190 ms per loop 10 loops, best of 3: 150 ms per loop
sizes = [10, 50, 100, 1000]
matrices = []
for n in sizes:
A = np.ones((n, n))
matrix = {
'n': n,
'dense': A,
'csr': scipy.sparse.csr_matrix(A > 0),
'csc': scipy.sparse.csc_matrix(A > 0)
}
matrices.append(matrix)
for matrix in matrices:
print '{0} x {1}'.format(matrix['n'], matrix['n'])
%timeit for x in zip(matrix['csr'].nonzero()[0], matrix['csr'].nonzero()[1]): x
%timeit for x in zip(matrix['csr'].tocoo().row, matrix['csr'].tocoo().col): x
%timeit for x in zip(matrix['csc'].nonzero()[0], matrix['csc'].nonzero()[1]): x
%timeit for x in np.nditer(np.where(matrix['dense'] > 0)): x
%timeit for x in csr_matrix_indices(matrix['csr']): x
%timeit for x in csr_matrix_indices_gen(matrix['csr']): x
10 x 10 1000 loops, best of 3: 249 µs per loop 1000 loops, best of 3: 235 µs per loop 10000 loops, best of 3: 58.5 µs per loop 10000 loops, best of 3: 40.8 µs per loop 10000 loops, best of 3: 35.6 µs per loop 10000 loops, best of 3: 45.6 µs per loop 50 x 50 1000 loops, best of 3: 1.01 ms per loop 1000 loops, best of 3: 953 µs per loop 1000 loops, best of 3: 1.09 ms per loop 1000 loops, best of 3: 821 µs per loop 1000 loops, best of 3: 727 µs per loop 1000 loops, best of 3: 813 µs per loop 100 x 100 100 loops, best of 3: 3.37 ms per loop 100 loops, best of 3: 3.29 ms per loop 100 loops, best of 3: 4.56 ms per loop 100 loops, best of 3: 3.19 ms per loop 100 loops, best of 3: 2.87 ms per loop 100 loops, best of 3: 3.17 ms per loop 1000 x 1000 1 loops, best of 3: 425 ms per loop 1 loops, best of 3: 410 ms per loop 1 loops, best of 3: 692 ms per loop 1 loops, best of 3: 320 ms per loop 1 loops, best of 3: 386 ms per loop 1 loops, best of 3: 300 ms per loop