#!/usr/bin/env python # coding: utf-8 # Here the accuracy and computation time of the training of simple fully-connected neural networks using numpy and pytorch implementations and applied to the MNIST data set are compared. The Adam optimization algorithm in numpy and pytorch are compared, as well as the Scaled Conjugate Gradient optimization algorithm in numpy. # # Additional comments and explanations will be added shortly. If you have suggestions or corrections, please write to . # In[ ]: get_ipython().system('nvidia-smi') # In[ ]: import numpy as np import pickle import gzip import json import time import sys import subprocess # For my numpy neural network implementation import neuralnetworks as nn # for pytorch import os import torch import torch.nn as tnn import torchvision.datasets as dsets import torchvision.transforms as transforms from torch.autograd import Variable # for reading and plotting results import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') # In[ ]: ###################################################################### ## Read mnist data into dataTrain with gzip.open('mnist.pkl.gz', 'rb') as f: train_set, valid_set, test_set = pickle.load(f, encoding='latin1') Xtrain = np.array(train_set[0]) Xval = np.array(valid_set[0]) Xtest = np.array(test_set[0]) Ttrain = np.array(train_set[1]).reshape((-1, 1)) Tval = np.array(valid_set[1]).reshape((-1, 1)) Ttest = np.array(test_set[1]).reshape((-1, 1)) # to match with main-gpu.py in res/pytorch...01../feed.../main-gpu.py Xtrain = np.vstack((Xtrain, Xval)) Ttrain = np.vstack((Ttrain, Tval)) dataTrain = np.hstack((Xtrain, Ttrain)) # so we can shuffle the order dataTrain = dataTrain.astype(np.float32) nSamples = dataTrain.shape[0] # In[ ]: ###################################################################### ## Write results def writeResults(filename, timeAcc, label): if filename == 'stdout': f = sys.stdout else: f = open(filename, 'a') f.write(label+'\n') f.write(str(len(timeAcc)) + '\n') for ta in timeAcc: f.write('{:.2f} {:.3f}\n'.format(ta[0], ta[1])) if filename != 'stdout': f.close() # In[ ]: def runnumpy(batchSize=None, numEpochs=10, hidden=[100], nIterations=100, useRelu=False, useAdam=False): label = 'Numpy ' label += 'Adam' if useAdam else 'SCG' label += ' batch {} epochs {:d} hids {} nIter {:d}'.format(batchSize, numEpochs, hidden, nIterations) label += ' ReLU ' if useRelu else ' Tanh ' label += time.strftime('%m/%d/17-%H:%M') Xtrain = dataTrain[:,:-1] Ttrain = dataTrain[:,-1:] nnet = nn.NeuralNetworkClassifier([Xtrain.shape[1]] + hidden + [10], np.arange(10), useRelu=useRelu) # NOT STANDARDIZING THE INPUTS!!! nnet.setStandardize(False) secsAcc = [] # numEpochs determines number of breaks during training to calculate test error if batchSize is None: startTime = time.time() for i in range(numEpochs): Xtrain = dataTrain[:, :-1] Ttrain = dataTrain[:, -1:] nnet.train(Xtrain, Ttrain, nIterations=nIterations, verbose=False) ptest = nnet.use(Xtest) secsAcc.append([time.time() - startTime, np.mean(ptest!=Ttest)]) else: # numpyg on batches nSamples = dataTrain.shape[0] if nSamples % batchSize != 0: print('WARNING: nSamples {} is not divisible by batchSize {}'.format( nSampmles, batchSize)) nBatches = nSamples // batchSize startTime = time.time() for epoch in range(numEpochs): np.random.shuffle(dataTrain) for traini in range(0, nSamples, batchSize): Xtrain = dataTrain[traini:traini+batchSize,:-1] Ttrain = dataTrain[traini:traini+batchSize,-1:] nnet.train(Xtrain, Ttrain, restart=True, nIterations=nIterations, useAdam=useAdam) ptest = nnet.use(Xtest) secsAcc.append([time.time() - startTime, np.mean(ptest!=Ttest)]) writeResults(resultsFilename, secsAcc, label) if numEpochs <= 10: writeResults('stdout', secsAcc, label) # In[ ]: def runpytorch(batchSize=100, numEpochs=10, hidden=[100], learningRate=0.001, nIterations=100, useRelu=True, useGPU=False): if useGPU: os.environ['CUDA_VISIBLE_DEVICES'] = '0' print('torch.cuda.is_available() is', torch.cuda.is_available()) if not torch.cuda.is_available(): print('GPU is not available. Not running sgd pytorch cpu') return label = 'Pytorch ' if useGPU: label += 'GPU ' label += 'Adam batch {} epochs {:d} lr {:.6f} hids {} nIter {:d}'.format(batchSize, numEpochs, learningRate, hidden, nIterations) label += ' ReLU ' if useRelu else ' Tanh ' label += time.strftime('%m/%d/17-%H:%M') # Neural Network Model (1 hidden layer) class Net(tnn.Module): def __init__(self, input_size, hidden_size, num_classes): self.hidden_size = hidden_size super(Net, self).__init__() self.fc1 = tnn.Linear(input_size, hidden_size[0]) self.relu = tnn.ReLU() if useRelu else tnn.Tanh() if len(hidden_size) > 1: self.fc2 = tnn.Linear(hidden_size[0], hidden_size[1]) self.relu2 = tnn.ReLU() if useRelu else tnn.Tanh() self.fc3 = tnn.Linear(hidden_size[1], num_classes) else: self.fc3 = tnn.Linear(hidden_size[0], num_classes) def forward(self, x): out = self.fc1(x) out = self.relu(out) if len(self.hidden_size) > 1: out = self.fc2(out) out = self.relu2(out) out = self.fc3(out) return out train_dataset = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batchSize, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batchSize, shuffle=False) num_classes = 10 net = Net(784, hidden, num_classes) if useGPU: net.cuda() # Loss and Optimizer criterion = tnn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=learningRate) global dataTrain, nSamples if nSamples % batchSize != 0: print('WARNING: nSamples {} is not divisible by batchSize {}'.format( nSampmles, batchSize)) nBatches = nSamples // batchSize secsAcc = [] startTime = time.time() for epoch in range(numEpochs): np.random.shuffle(dataTrain) for i, (images, labels) in enumerate(train_loader): # Forward + Backward + Optimize if useGPU: images = Variable(images.view(-1, 28*28)).cuda() labels = Variable(labels).cuda() else: images = Variable(images.view(-1, 28*28)) labels = Variable(labels) # Forward + Backward + Optimize for iter in range(nIterations): optimizer.zero_grad() # zero the gradient buffer outputs = net(images) loss = criterion(outputs, labels) loss.backward() optimizer.step() correct = 0 total = 0 for images, labels in test_loader: if useGPU: images = Variable(images.view(-1, 28*28)).cuda() else: images = Variable(images.view(-1, 28*28)) outputs = net(images) _, predicted = torch.max(outputs.data, 1) total += labels.size(0) if useGPU: correct += (predicted.cpu() == labels).sum() else: correct += (predicted == labels).sum() secsAcc.append([time.time() - startTime, (total-correct) / total]) writeResults(resultsFilename, secsAcc, label) if numEpochs <= 10: writeResults('stdout', secsAcc, label) # In[ ]: resultsFilename = 'test.results' subprocess.call(['rm', resultsFilename]) hidden = [500, 500] # can contain one or two ints, for one or two hidden layers batchSize = 100 numEpochs = 50 # In[ ]: runpytorch(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, learningRate=0.001, nIterations=1, useRelu=False, useGPU=True) # In[ ]: runpytorch(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, learningRate=0.001, nIterations=1, useRelu=True, useGPU=False) # In[ ]: runpytorch(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, learningRate=0.001, nIterations=1, useRelu=False, useGPU=False) # In[ ]: runpytorch(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, learningRate=0.001, nIterations=1, useRelu=True, useGPU=False) # In[ ]: def plotFromFile(filename='test.results'): results = {} with open(filename,'r') as f: while True: label = f.readline() if label is None or label == '': break; n = int(f.readline()) secsAcc = [] for i in range(n): secsAcc.append([float(s) for s in f.readline().split(' ')]) results[label] = secsAcc markers = ['s','8','>','^','<','v','o','X','P','d','h','*','p','D','H'] mi = 0 print(sorted(results)) for key in sorted(results): value = results[key] value = np.array(value) plt.plot(value[:, 0], value[:, 1], '-', marker=markers[mi], label=key, lw=4, markersize=15) mi = (mi + 1) % len(markers) plt.xlabel('Seconds') plt.ylabel('Fraction of test samples incorrectly classified') plt.legend(); # In[ ]: # cat test.results # In[ ]: plt.figure(figsize=(20, 12)) plotFromFile('test.results') # In[ ]: runnumpy(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, nIterations=1, useRelu=False, useAdam=True) runnumpy(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, nIterations=1, useRelu=False, useAdam=False) runnumpy(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, nIterations=1, useRelu=True, useAdam=True) runnumpy(batchSize=batchSize, numEpochs=numEpochs, hidden=hidden, nIterations=1, useRelu=True, useAdam=False) # In[ ]: runnumpy(batchSize=None, numEpochs=50, hidden=hidden, nIterations=10, useRelu=False, useAdam=True) runnumpy(batchSize=None, numEpochs=50, hidden=hidden, nIterations=10, useRelu=False, useAdam=False) runnumpy(batchSize=None, numEpochs=50, hidden=hidden, nIterations=10, useRelu=True, useAdam=True) runnumpy(batchSize=None, numEpochs=50, hidden=hidden, nIterations=10, useRelu=True, useAdam=False) # In[ ]: plt.figure(figsize=(20, 12)) plotFromFile('test.results') # In[ ]: plt.figure(figsize=(20, 12)) plotFromFile('test.results') plt.ylim(0.01,0.125) # plt.xlim(0,40) # In[ ]: plt.figure(figsize=(20, 12)) plotFromFile('test.results') plt.ylim(0.01,0.04) # plt.xlim(0,40) # In[ ]: