%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.mplot3d import Axes3D
def axisEqual3D(ax):
extents = np.array([getattr(ax, 'get_{}lim'.format(dim))() for dim in 'xyz'])
sz = extents[:,1] - extents[:,0]
centers = np.mean(extents, axis=1)
maxsize = max(abs(sz))
r = maxsize/2
for ctr, dim in zip(centers, 'xyz'):
getattr(ax, 'set_{}lim'.format(dim))(ctr - r/3, ctr + r/3)
def plotmy3Ddata(input_data,title='',view=[35,-50]):
dim,num_samples = input_data.shape
num_centroids = dim/3;
x_ind = range(num_centroids)
y_ind = range(num_centroids,num_centroids*2)
z_ind = range(num_centroids*2,num_centroids*3)
fig = plt.figure(figsize=(16, 10));
ax = fig.add_subplot(111, projection='3d');
colors = iter(cm.rainbow(np.linspace(0, 1, num_samples)))
for col in range(num_samples):
c_color = next(colors)
ax.scatter( input_data[x_ind,col],
input_data[y_ind,col],
input_data[z_ind,col],
color=c_color,
s=16,
marker='.' )
ax.plot(input_data[x_ind,col], input_data[y_ind,col], input_data[z_ind,col], color=c_color)
ax.set_title(title,fontsize=20,x=0.65,y=0.95)
axisEqual3D(ax)
ax.view_init(view[0],view[1])
plt.show()
# Implementation of the Umeyama method for computing rigid-body transformations
def umeyama_rigid(X, Y):
# Get dimension and number of points
m, n = X.shape
# Demean the point sets X and Y
X_mean = X.mean(1) #MODEL ANSWER
Y_mean = Y.mean(1) #MODEL ANSWER
X_demean = X - np.tile(X_mean, (n, 1)).T #MODEL ANSWER
Y_demean = Y - np.tile(Y_mean, (n, 1)).T #MODEL ANSWER
# Computing matrix XY' using demeaned point sets
XY = np.dot(X_demean, Y_demean.T) #MODEL ANSWER
# Singular value decomposition
U,D,V = np.linalg.svd(XY,full_matrices=True,compute_uv=True)
V=V.T.copy()
# Determine rotation
R = np.dot( V, U.T) #MODEL ANSWER
# Determine translation
t = Y_mean - np.dot(R, X_mean) #MODEL ANSWER
return R,t
# Implementation of the Umeyama method for computing similarity transformations
def umeyama_similarity(X, Y):
# Get dimension and number of points
m, n = X.shape
# Demean the point sets X and Y
X_mean = X.mean(1) #MODEL ANSWER
Y_mean = Y.mean(1) #MODEL ANSWER
X_demean = X - np.tile(X_mean, (n, 1)).T #MODEL ANSWER
Y_demean = Y - np.tile(Y_mean, (n, 1)).T #MODEL ANSWER
# Computing matrix XY' using demeaned and NORMALISED point sets (divide by the number of points n)
# See Equation (38) in the paper
XY = np.dot(X_demean, Y_demean.T) / n #MODEL ANSWER
# Determine variances of points X and Y, see Equation (36),(37) in the paper
X_var = np.mean(np.sum(X_demean*X_demean, 0))
Y_var = np.mean(np.sum(Y_demean*Y_demean, 0))
# Singular value decomposition
U,D,V = np.linalg.svd(XY,full_matrices=True,compute_uv=True)
V=V.T.copy()
# Determine rotation
R = np.dot( V, U.T) #MODEL ANSWER
# Determine the scaling, see Equation (42) in the paper (assume S to be the identity matrix, so ignore)
c = np.trace(np.diag(D)) / X_var #MODEL ANSWER
# Determine translation, see Equation (41) in the paper
t = Y_mean - c * np.dot(R, X_mean) #MODEL ANSWER
return R,c,t
train_data = np.genfromtxt('./data/spine_training.txt')
dimension,num_examples_train = train_data.shape
num_centroids = dimension/3;
x_ind = range(num_centroids)
y_ind = range(num_centroids,num_centroids*2)
z_ind = range(num_centroids*2,num_centroids*3)
print 'num of dimension: %d' % dimension
print 'num of training examples: %d' % num_examples_train
plotmy3Ddata(train_data,title='Original Training Data')
num of dimension: 78 num of training examples: 200
# pre-processing: align shapes - three variants
# 1: center-of-mass alignment
# 2: rigid-body alignment
# 3: similarity alignment
align = 1;
aligned_data = np.zeros(train_data.shape);
if align == 1:
cx = train_data[x_ind,:];
cy = train_data[y_ind,:];
cz = train_data[z_ind,:];
aligned_data[x_ind,:] = cx - np.tile(np.mean(cx,axis=0),(num_centroids,1));
aligned_data[y_ind,:] = cy - np.tile(np.mean(cy,axis=0),(num_centroids,1));
aligned_data[z_ind,:] = cz - np.tile(np.mean(cz,axis=0),(num_centroids,1));
elif align == 2:
ref_shape = train_data[:,0].reshape((3, num_centroids))
for ii in range(1,num_examples_train):
mov_shape = train_data[:,ii].reshape((3, num_centroids))
R, t = umeyama_rigid(mov_shape,ref_shape)
mov_shape = np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose()
aligned_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten()
aligned_data[:,0] = train_data[:,0];
elif align == 3:
ref_shape = train_data[:,0].reshape((3, num_centroids))
for ii in range(1,num_examples_train):
mov_shape = train_data[:,ii].reshape((3, num_centroids))
R, c, t = umeyama_similarity(mov_shape,ref_shape)
mov_shape = c * np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose()
aligned_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten()
aligned_data[:,0] = train_data[:,0];
plotmy3Ddata(aligned_data,title='Aligned Training Data')
# Perform Shape PCA
# compute the mean over all samples
aligned_data_mean = np.mean(aligned_data,axis=1);
# demean data
aligned_data_m = aligned_data - np.transpose(np.tile(aligned_data_mean,(num_examples_train,1)))
# get eigenvectors and eigenvalues via SVD
U, D, V = np.linalg.svd( aligned_data_m / np.sqrt(num_examples_train-1), full_matrices=True)
D = np.multiply(D, D)
# get the retained variance with increasing number of modes
retained_variance = np.cumsum(D) / np.sum(D);
# get number of modes to retain 95% of variance (from 0 to num_modes_95)
num_modes_95 = next(idx for idx,v in enumerate(retained_variance) if v > 0.95)
# get number of modes to retain 99% of variance (from 0 to num_modes_99)
num_modes_99 = next(idx for idx,v in enumerate(retained_variance) if v > 0.99)
fig = plt.figure(figsize=(12, 8));
ax = fig.add_subplot(111);
ax.plot([num_modes_95,num_centroids*3],[0.95,0.95],color='r',linestyle='--',linewidth=1.5)
ax.text(dimension+3, 0.95,'95%', fontsize=15)
ax.plot([num_modes_99,num_centroids*3],[0.99,0.99],color='b',linestyle='--',linewidth=1.5)
ax.text(dimension+3, 0.99,'99%', fontsize=15)
ax.plot([num_modes_95,num_modes_95],[0,0.95],color='r',linestyle='--',linewidth=1.5)
ax.plot([num_modes_99,num_modes_99],[0,0.99],color='b',linestyle='--',linewidth=1.5)
ax.plot(retained_variance,color='k',linewidth=1.5)
ax.grid(True)
ax.tick_params(axis='both', which='major', labelsize=15)
x1,x2,y1,y2 = plt.axis()
plt.axis((x1,x2,0,1.1))
ax.set_title("Modes vs Variance: ( {0} |95%) / ( {1} |99%)".format(num_modes_95,num_modes_99),fontsize=16)
plt.show()
#plot variation of mean shape for the modes of 95% variance
fig = plt.figure(figsize=(15, 2.5*num_modes_95));
for ii in range(num_modes_95+1):
# add and subtract 3 times the standard deviation (which contains 99.7% of the data)
sp = aligned_data_mean + U[:,ii] * np.sqrt(D[ii]) * 3
sn = aligned_data_mean - U[:,ii] * np.sqrt(D[ii]) * 3
ax = fig.add_subplot(np.ceil((num_modes_95+1)/float(2)),2,ii+1, projection='3d');
ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k')
ax.scatter(sp[x_ind], sp[y_ind], sp[z_ind], s=70, marker='.', color='b')
ax.scatter(sn[x_ind], sn[y_ind], sn[z_ind], s=70, marker='.', color='b')
ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k')
ax.plot(sp[x_ind], sp[y_ind], sp[z_ind], color='b')
ax.plot(sn[x_ind], sn[y_ind], sn[z_ind], color='b')
ax.set_title('Mode {0}'.format(ii),fontsize=16,x=0.65)
ax.view_init(45,-45)
axisEqual3D(ax)
plt.show()
#random sampling of a few shapes from PCA model
#Here we show that PCA gives us a generative model
fig = plt.figure(figsize=(16, 10));
scaling = 2;
std_dev = np.sqrt(D[:num_modes_95+1]);
num_shapes = 5
ax = fig.add_subplot(111, projection='3d');
ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k')
ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k', label='mean shape')
colors = iter(cm.rainbow(np.linspace(0, 1, num_shapes)))
for ii in range(num_shapes):
c_color = next(colors)
coeffs = np.multiply(np.random.normal(size=[num_modes_95+1]),std_dev*scaling)
random_shape = aligned_data_mean - np.dot(U[:,:num_modes_95+1],coeffs);
ax.scatter(random_shape[x_ind],random_shape[y_ind],random_shape[z_ind], s=70, marker='.', color=c_color);
ax.plot( random_shape[x_ind], random_shape[y_ind], random_shape[z_ind],
color=c_color, label='random shape {0}'.format(ii))
ax.set_title('Mean Shape and Random Shapes',fontsize=16,x=0.65);
axisEqual3D(ax)
ax.view_init(35,-50)
plt.legend( loc='upper right' )
plt.show()
# project shapes from a patient database and determine abnormal cases
# Here we show that the PCA model can be used for abnormality detection
test_data = np.genfromtxt('./data/spine_testing.txt')
num_examples_test = test_data.shape[1]
plotmy3Ddata(test_data,title='Original Testing Data',view=[35,20])
# align test data in the same way as training data
aligned_testing_data = np.zeros(test_data.shape)
if align == 1:
cx = test_data[x_ind,:];
cy = test_data[y_ind,:];
cz = test_data[z_ind,:];
aligned_testing_data[x_ind,:] = cx - np.tile(np.mean(cx,axis=0),(num_centroids,1));
aligned_testing_data[y_ind,:] = cy - np.tile(np.mean(cy,axis=0),(num_centroids,1));
aligned_testing_data[z_ind,:] = cz - np.tile(np.mean(cz,axis=0),(num_centroids,1));
elif align == 2:
ref_shape = train_data[:,0].reshape((3, num_centroids))
for ii in range(0,num_examples_test):
mov_shape = test_data[:,ii].reshape((3, num_centroids))
R, t = umeyama_rigid(mov_shape,ref_shape)
mov_shape = np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose()
aligned_testing_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten()
elif align == 3:
ref_shape = train_data[:,0].reshape((3, num_centroids))
for ii in range(0,num_examples_test):
mov_shape = test_data[:,ii].reshape((3, num_centroids))
R, c, t = umeyama_similarity(mov_shape,ref_shape)
mov_shape = c * np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose()
aligned_testing_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten()
plotmy3Ddata(aligned_testing_data,title='Aligned Testing Data',view=[35,20])
# project test and training data on the PCA shape model
coeffs_test = np.dot( np.transpose(U),
aligned_testing_data - np.transpose(np.tile(aligned_data_mean,(num_examples_test,1))))
coeffs_train = np.dot( np.transpose(U),
aligned_data - np.transpose(np.tile(aligned_data_mean,(num_examples_train,1))))
# compute per subject probability, consider only the 95% variance modes
variance = D[:num_modes_95+1];
p_test = np.prod(np.exp(np.divide(- 0.5 * np.power(coeffs_test[:num_modes_95+1,:],2),
np.transpose(np.tile(variance*2,(num_examples_test,1))))),axis=0)
p_test,ind_test = np.sort(p_test),np.argsort(p_test)
p_train = np.prod(np.exp(np.divide(- 0.5 * np.power(coeffs_train[:num_modes_95+1,:],2),
np.transpose(np.tile(variance*2,(num_examples_train,1))))),axis=0)
p_train,ind_train = np.sort(p_train),np.argsort(p_train)
# plot sorted probabilities
fig = plt.figure(figsize=(12, 8));
ax = fig.add_subplot(111);
ax.plot(p_train,color='b',linestyle='',marker='+',linewidth=1.5,label='training data')
ax.plot(p_test,color='r',linestyle='',marker='+',linewidth=1.5,label='testing data')
ax.grid(True)
ax.tick_params(axis='both', which='major', labelsize=15)
ax.set_title('Sorted shape probabilities',fontsize=16)
ax.set_xlabel('Sample Points',fontsize=16)
ax.set_ylabel('Similarity between the mean shape and sample shape',fontsize=16)
plt.legend( loc='upper left',prop={'size':16} )
plt.show()
# plot the 5 most abnormal ones from the patient dataset
fig = plt.figure(figsize=(16, 10));
num_shapes = 5
ax = fig.add_subplot(111, projection='3d');
ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k')
ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k', label='mean shape')
colors = iter(cm.rainbow(np.linspace(0, 1, num_shapes)))
for ii in range(num_shapes):
c_color = next(colors)
abnormal_shape = aligned_testing_data[:,ind_test[ii]]
ax.scatter(abnormal_shape[x_ind],abnormal_shape[y_ind],abnormal_shape[z_ind], s=70, marker='.', color=c_color)
ax.plot( abnormal_shape[x_ind], abnormal_shape[y_ind], abnormal_shape[z_ind],
color=c_color, label='abnormal shape {0}'.format(ii))
ax.set_title('Mean Shape and Abnormal Shapes',fontsize=16,x=0.65)
axisEqual3D(ax)
ax.view_init(35,-50)
plt.legend( loc='upper right' )
plt.show()