from scipy.io import loadmat
from modshogun import RealFeatures, MulticlassLabels

# load the dataset
dataset = loadmat('../../../data/multiclass/usps.mat')

Xall = dataset['data']
# the usps dataset has the digits labeled from 1 to 10 
# we'll subtract 1 to make them in the 0-9 range instead
Yall = np.array(dataset['label'].squeeze(), dtype=np.double)-1 

# 7000 examples for training
Xtrain = RealFeatures(Xall[:,0:7000])
Ytrain = MulticlassLabels(Yall[0:7000])

# the rest for testing
Xtest = RealFeatures(Xall[:,7000:-1])
Ytest = MulticlassLabels(Yall[7000:-1])

from modshogun import NeuralLayers, DeepAutoencoder

layers = NeuralLayers()
layers = layers.input(256).rectified_linear(512).rectified_linear(128).rectified_linear(512).linear(256).done()

ae = DeepAutoencoder(layers)

from modshogun import AENT_DROPOUT, NNOM_GRADIENT_DESCENT, MSG_INFO

ae.pt_noise_type.set_const(AENT_DROPOUT) # use dropout noise
ae.pt_noise_parameter.set_const(0.5) # each input has a 50% chance of being set to zero

ae.pt_optimization_method.set_const(NNOM_GRADIENT_DESCENT) # train using gradient descent
ae.pt_gd_learning_rate.set_const(0.01)
ae.pt_gd_mini_batch_size.set_const(128)

ae.pt_max_num_epochs.set_const(100)
ae.pt_epsilon.set_const(0.0) # disable automatic convergence testing

# allow the INFO messages to be printed to the console, useful for monitoring training progress
ae.io.set_loglevel(MSG_INFO) 

# start pre-training. this might take some time
ae.pre_train(Xtrain)

ae.noise_type = AENT_DROPOUT # same noise type we used for pre-training
ae.noise_parameter = 0.5

ae.max_num_epochs = 100
ae.optimization_method = NNOM_GRADIENT_DESCENT
ae.gd_mini_batch_size = 128
ae.gd_learning_rate = 0.0001
ae.epsilon = 0.0

# start fine-tuning. this might take some time
_ = ae.train(Xtrain)

# get a 50-example subset of the test set
subset = Xtest[:,0:50].copy()

# corrupt the first 25 examples with multiplicative noise
subset[:,0:25] *= (random.random((256,25))>0.5)

# corrupt the other 25 examples with additive noise 
subset[:,25:50] += random.random((256,25))

# obtain the reconstructions
reconstructed_subset = ae.reconstruct(RealFeatures(subset))

# plot the corrupted data and the reconstructions
figure(figsize=(10,10))
for i in range(50):
    ax1=subplot(10,10,i*2+1)
    ax1.imshow(subset[:,i].reshape((16,16)), interpolation='nearest', cmap = cm.Greys_r)
    ax1.set_xticks([])
    ax1.set_yticks([])

    ax2=subplot(10,10,i*2+2)
    ax2.imshow(reconstructed_subset[:,i].reshape((16,16)), interpolation='nearest', cmap = cm.Greys_r)
    ax2.set_xticks([])
    ax2.set_yticks([])

# obtain the weights matrix of the first hidden layer
# the 512 is the number of biases in the layer (512 neurons)
# the transpose is because numpy stores matrices in row-major format, and Shogun stores 
# them in column major format
w1 = ae.get_layer_parameters(1)[512:].reshape(256,512).T

# visualize the weights between the first 100 neurons in the hidden layer 
# and the neurons in the input layer
figure(figsize=(10,10))
for i in range(100):
	ax1=subplot(10,10,i+1)
	ax1.imshow(w1[i,:].reshape((16,16)), interpolation='nearest', cmap = cm.Greys_r)
	ax1.set_xticks([])
	ax1.set_yticks([])

from modshogun import NeuralSoftmaxLayer

nn = ae.convert_to_neural_network(NeuralSoftmaxLayer(10))

nn.max_num_epochs = 50

nn.io.set_loglevel(MSG_INFO)

nn.set_labels(Ytrain)
_ = nn.train(Xtrain)

from modshogun import MulticlassAccuracy

predictions = nn.apply_multiclass(Xtest)
accuracy = MulticlassAccuracy().evaluate(predictions, Ytest) * 100

print "Classification accuracy on the test set =", accuracy, "%"