%matplotlib inline import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from mpl_toolkits.mplot3d import Axes3D def axisEqual3D(ax): extents = np.array([getattr(ax, 'get_{}lim'.format(dim))() for dim in 'xyz']) sz = extents[:,1] - extents[:,0] centers = np.mean(extents, axis=1) maxsize = max(abs(sz)) r = maxsize/2 for ctr, dim in zip(centers, 'xyz'): getattr(ax, 'set_{}lim'.format(dim))(ctr - r/3, ctr + r/3) def plotmy3Ddata(input_data,title='',view=[35,-50]): dim,num_samples = input_data.shape num_centroids = dim/3; x_ind = range(num_centroids) y_ind = range(num_centroids,num_centroids*2) z_ind = range(num_centroids*2,num_centroids*3) fig = plt.figure(figsize=(16, 10)); ax = fig.add_subplot(111, projection='3d'); colors = iter(cm.rainbow(np.linspace(0, 1, num_samples))) for col in range(num_samples): c_color = next(colors) ax.scatter( input_data[x_ind,col], input_data[y_ind,col], input_data[z_ind,col], color=c_color, s=16, marker='.' ) ax.plot(input_data[x_ind,col], input_data[y_ind,col], input_data[z_ind,col], color=c_color) ax.set_title(title,fontsize=20,x=0.65,y=0.95) axisEqual3D(ax) ax.view_init(view[0],view[1]) plt.show() # Implementation of the Umeyama method for computing rigid-body transformations def umeyama_rigid(X, Y): # Get dimension and number of points m, n = X.shape # Demean the point sets X and Y X_mean = X.mean(1) #MODEL ANSWER Y_mean = Y.mean(1) #MODEL ANSWER X_demean = X - np.tile(X_mean, (n, 1)).T #MODEL ANSWER Y_demean = Y - np.tile(Y_mean, (n, 1)).T #MODEL ANSWER # Computing matrix XY' using demeaned point sets XY = np.dot(X_demean, Y_demean.T) #MODEL ANSWER # Singular value decomposition U,D,V = np.linalg.svd(XY,full_matrices=True,compute_uv=True) V=V.T.copy() # Determine rotation R = np.dot( V, U.T) #MODEL ANSWER # Determine translation t = Y_mean - np.dot(R, X_mean) #MODEL ANSWER return R,t # Implementation of the Umeyama method for computing similarity transformations def umeyama_similarity(X, Y): # Get dimension and number of points m, n = X.shape # Demean the point sets X and Y X_mean = X.mean(1) #MODEL ANSWER Y_mean = Y.mean(1) #MODEL ANSWER X_demean = X - np.tile(X_mean, (n, 1)).T #MODEL ANSWER Y_demean = Y - np.tile(Y_mean, (n, 1)).T #MODEL ANSWER # Computing matrix XY' using demeaned and NORMALISED point sets (divide by the number of points n) # See Equation (38) in the paper XY = np.dot(X_demean, Y_demean.T) / n #MODEL ANSWER # Determine variances of points X and Y, see Equation (36),(37) in the paper X_var = np.mean(np.sum(X_demean*X_demean, 0)) Y_var = np.mean(np.sum(Y_demean*Y_demean, 0)) # Singular value decomposition U,D,V = np.linalg.svd(XY,full_matrices=True,compute_uv=True) V=V.T.copy() # Determine rotation R = np.dot( V, U.T) #MODEL ANSWER # Determine the scaling, see Equation (42) in the paper (assume S to be the identity matrix, so ignore) c = np.trace(np.diag(D)) / X_var #MODEL ANSWER # Determine translation, see Equation (41) in the paper t = Y_mean - c * np.dot(R, X_mean) #MODEL ANSWER return R,c,t train_data = np.genfromtxt('./data/spine_training.txt') dimension,num_examples_train = train_data.shape num_centroids = dimension/3; x_ind = range(num_centroids) y_ind = range(num_centroids,num_centroids*2) z_ind = range(num_centroids*2,num_centroids*3) print 'num of dimension: %d' % dimension print 'num of training examples: %d' % num_examples_train plotmy3Ddata(train_data,title='Original Training Data') # pre-processing: align shapes - three variants # 1: center-of-mass alignment # 2: rigid-body alignment # 3: similarity alignment align = 1; aligned_data = np.zeros(train_data.shape); if align == 1: cx = train_data[x_ind,:]; cy = train_data[y_ind,:]; cz = train_data[z_ind,:]; aligned_data[x_ind,:] = cx - np.tile(np.mean(cx,axis=0),(num_centroids,1)); aligned_data[y_ind,:] = cy - np.tile(np.mean(cy,axis=0),(num_centroids,1)); aligned_data[z_ind,:] = cz - np.tile(np.mean(cz,axis=0),(num_centroids,1)); elif align == 2: ref_shape = train_data[:,0].reshape((3, num_centroids)) for ii in range(1,num_examples_train): mov_shape = train_data[:,ii].reshape((3, num_centroids)) R, t = umeyama_rigid(mov_shape,ref_shape) mov_shape = np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose() aligned_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten() aligned_data[:,0] = train_data[:,0]; elif align == 3: ref_shape = train_data[:,0].reshape((3, num_centroids)) for ii in range(1,num_examples_train): mov_shape = train_data[:,ii].reshape((3, num_centroids)) R, c, t = umeyama_similarity(mov_shape,ref_shape) mov_shape = c * np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose() aligned_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten() aligned_data[:,0] = train_data[:,0]; plotmy3Ddata(aligned_data,title='Aligned Training Data') # Perform Shape PCA # compute the mean over all samples aligned_data_mean = np.mean(aligned_data,axis=1); # demean data aligned_data_m = aligned_data - np.transpose(np.tile(aligned_data_mean,(num_examples_train,1))) # get eigenvectors and eigenvalues via SVD U, D, V = np.linalg.svd( aligned_data_m / np.sqrt(num_examples_train-1), full_matrices=True) D = np.multiply(D, D) # get the retained variance with increasing number of modes retained_variance = np.cumsum(D) / np.sum(D); # get number of modes to retain 95% of variance (from 0 to num_modes_95) num_modes_95 = next(idx for idx,v in enumerate(retained_variance) if v > 0.95) # get number of modes to retain 99% of variance (from 0 to num_modes_99) num_modes_99 = next(idx for idx,v in enumerate(retained_variance) if v > 0.99) fig = plt.figure(figsize=(12, 8)); ax = fig.add_subplot(111); ax.plot([num_modes_95,num_centroids*3],[0.95,0.95],color='r',linestyle='--',linewidth=1.5) ax.text(dimension+3, 0.95,'95%', fontsize=15) ax.plot([num_modes_99,num_centroids*3],[0.99,0.99],color='b',linestyle='--',linewidth=1.5) ax.text(dimension+3, 0.99,'99%', fontsize=15) ax.plot([num_modes_95,num_modes_95],[0,0.95],color='r',linestyle='--',linewidth=1.5) ax.plot([num_modes_99,num_modes_99],[0,0.99],color='b',linestyle='--',linewidth=1.5) ax.plot(retained_variance,color='k',linewidth=1.5) ax.grid(True) ax.tick_params(axis='both', which='major', labelsize=15) x1,x2,y1,y2 = plt.axis() plt.axis((x1,x2,0,1.1)) ax.set_title("Modes vs Variance: ( {0} |95%) / ( {1} |99%)".format(num_modes_95,num_modes_99),fontsize=16) plt.show() #plot variation of mean shape for the modes of 95% variance fig = plt.figure(figsize=(15, 2.5*num_modes_95)); for ii in range(num_modes_95+1): # add and subtract 3 times the standard deviation (which contains 99.7% of the data) sp = aligned_data_mean + U[:,ii] * np.sqrt(D[ii]) * 3 sn = aligned_data_mean - U[:,ii] * np.sqrt(D[ii]) * 3 ax = fig.add_subplot(np.ceil((num_modes_95+1)/float(2)),2,ii+1, projection='3d'); ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k') ax.scatter(sp[x_ind], sp[y_ind], sp[z_ind], s=70, marker='.', color='b') ax.scatter(sn[x_ind], sn[y_ind], sn[z_ind], s=70, marker='.', color='b') ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k') ax.plot(sp[x_ind], sp[y_ind], sp[z_ind], color='b') ax.plot(sn[x_ind], sn[y_ind], sn[z_ind], color='b') ax.set_title('Mode {0}'.format(ii),fontsize=16,x=0.65) ax.view_init(45,-45) axisEqual3D(ax) plt.show() #random sampling of a few shapes from PCA model #Here we show that PCA gives us a generative model fig = plt.figure(figsize=(16, 10)); scaling = 2; std_dev = np.sqrt(D[:num_modes_95+1]); num_shapes = 5 ax = fig.add_subplot(111, projection='3d'); ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k') ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k', label='mean shape') colors = iter(cm.rainbow(np.linspace(0, 1, num_shapes))) for ii in range(num_shapes): c_color = next(colors) coeffs = np.multiply(np.random.normal(size=[num_modes_95+1]),std_dev*scaling) random_shape = aligned_data_mean - np.dot(U[:,:num_modes_95+1],coeffs); ax.scatter(random_shape[x_ind],random_shape[y_ind],random_shape[z_ind], s=70, marker='.', color=c_color); ax.plot( random_shape[x_ind], random_shape[y_ind], random_shape[z_ind], color=c_color, label='random shape {0}'.format(ii)) ax.set_title('Mean Shape and Random Shapes',fontsize=16,x=0.65); axisEqual3D(ax) ax.view_init(35,-50) plt.legend( loc='upper right' ) plt.show() # project shapes from a patient database and determine abnormal cases # Here we show that the PCA model can be used for abnormality detection test_data = np.genfromtxt('./data/spine_testing.txt') num_examples_test = test_data.shape[1] plotmy3Ddata(test_data,title='Original Testing Data',view=[35,20]) # align test data in the same way as training data aligned_testing_data = np.zeros(test_data.shape) if align == 1: cx = test_data[x_ind,:]; cy = test_data[y_ind,:]; cz = test_data[z_ind,:]; aligned_testing_data[x_ind,:] = cx - np.tile(np.mean(cx,axis=0),(num_centroids,1)); aligned_testing_data[y_ind,:] = cy - np.tile(np.mean(cy,axis=0),(num_centroids,1)); aligned_testing_data[z_ind,:] = cz - np.tile(np.mean(cz,axis=0),(num_centroids,1)); elif align == 2: ref_shape = train_data[:,0].reshape((3, num_centroids)) for ii in range(0,num_examples_test): mov_shape = test_data[:,ii].reshape((3, num_centroids)) R, t = umeyama_rigid(mov_shape,ref_shape) mov_shape = np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose() aligned_testing_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten() elif align == 3: ref_shape = train_data[:,0].reshape((3, num_centroids)) for ii in range(0,num_examples_test): mov_shape = test_data[:,ii].reshape((3, num_centroids)) R, c, t = umeyama_similarity(mov_shape,ref_shape) mov_shape = c * np.dot(R,mov_shape) + np.tile(t, (num_centroids, 1)).transpose() aligned_testing_data[:,ii] = mov_shape.reshape((3*num_centroids,1)).flatten() plotmy3Ddata(aligned_testing_data,title='Aligned Testing Data',view=[35,20]) # project test and training data on the PCA shape model coeffs_test = np.dot( np.transpose(U), aligned_testing_data - np.transpose(np.tile(aligned_data_mean,(num_examples_test,1)))) coeffs_train = np.dot( np.transpose(U), aligned_data - np.transpose(np.tile(aligned_data_mean,(num_examples_train,1)))) # compute per subject probability, consider only the 95% variance modes variance = D[:num_modes_95+1]; p_test = np.prod(np.exp(np.divide(- 0.5 * np.power(coeffs_test[:num_modes_95+1,:],2), np.transpose(np.tile(variance*2,(num_examples_test,1))))),axis=0) p_test,ind_test = np.sort(p_test),np.argsort(p_test) p_train = np.prod(np.exp(np.divide(- 0.5 * np.power(coeffs_train[:num_modes_95+1,:],2), np.transpose(np.tile(variance*2,(num_examples_train,1))))),axis=0) p_train,ind_train = np.sort(p_train),np.argsort(p_train) # plot sorted probabilities fig = plt.figure(figsize=(12, 8)); ax = fig.add_subplot(111); ax.plot(p_train,color='b',linestyle='',marker='+',linewidth=1.5,label='training data') ax.plot(p_test,color='r',linestyle='',marker='+',linewidth=1.5,label='testing data') ax.grid(True) ax.tick_params(axis='both', which='major', labelsize=15) ax.set_title('Sorted shape probabilities',fontsize=16) ax.set_xlabel('Sample Points',fontsize=16) ax.set_ylabel('Similarity between the mean shape and sample shape',fontsize=16) plt.legend( loc='upper left',prop={'size':16} ) plt.show() # plot the 5 most abnormal ones from the patient dataset fig = plt.figure(figsize=(16, 10)); num_shapes = 5 ax = fig.add_subplot(111, projection='3d'); ax.scatter(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], s=70, marker='.', color='k') ax.plot(aligned_data_mean[x_ind], aligned_data_mean[y_ind], aligned_data_mean[z_ind], color='k', label='mean shape') colors = iter(cm.rainbow(np.linspace(0, 1, num_shapes))) for ii in range(num_shapes): c_color = next(colors) abnormal_shape = aligned_testing_data[:,ind_test[ii]] ax.scatter(abnormal_shape[x_ind],abnormal_shape[y_ind],abnormal_shape[z_ind], s=70, marker='.', color=c_color) ax.plot( abnormal_shape[x_ind], abnormal_shape[y_ind], abnormal_shape[z_ind], color=c_color, label='abnormal shape {0}'.format(ii)) ax.set_title('Mean Shape and Abnormal Shapes',fontsize=16,x=0.65) axisEqual3D(ax) ax.view_init(35,-50) plt.legend( loc='upper right' ) plt.show()