In [10]:

using Distributions
using DelimitedFiles
using LinearAlgebra
using CuArrays

In [11]:

use_gpu = true # helper to easily switch between gpu/cpu

todevice(x) = use_gpu ? gpu(x) : x

Out[11]:

todevice (generic function with 1 method)

In [12]:

# Activation function
sigmoid(x) = 1 / (1 + exp(-x))

Out[12]:

sigmoid (generic function with 1 method)

In [13]:

mutable struct NetInfo
    # Input Layer, Hidden Layer and Output Layer nodes numbers
    inodes::Int
    hnodes::Int
    onodes::Int

    # Link weight matrix
    wih::Array{Float64, 2} # from input layer to hidden layer
    who::Array{Float64, 2} # from hidden layer to output layer
    
    # learning rate
    lr::Float64
end

In [14]:

# Factory method - Init Net
function InitNet(inodes::Int, hnodes::Int, onodes::Int, lr::Float64)
    wih = rand(Normal(0, hnodes^(-0.5)), hnodes, inodes)
    who = rand(Normal(0, onodes^(-0.5)), onodes, hnodes)
    return NetInfo(inodes, hnodes, onodes, wih, who, lr)
end

Out[14]:

InitNet (generic function with 1 method)

In [15]:

# Query Result
function Query(net::NetInfo, inputs::Array{Float64, 2})
    # Calculate the signal entering the hidden layer
    hidden_inputs = net.wih * inputs
    hidden_outputs = sigmoid.(hidden_inputs)
    
    # Calculate the signal entering the output layer
    final_inputs = net.who * hidden_outputs
    final_outputs = sigmoid.(final_inputs)
    
    return final_outputs
end

Out[15]:

Query (generic function with 1 method)

In [16]:

function Train!(net::NetInfo, inputs::Array{Float64, 2}, targets::Array{Float64, 2})
    # PART 1: Consistent with the Query function
    # Calculate the signal entering the hidden layer
    hidden_inputs = net.wih * inputs
    hidden_outputs = sigmoid.(hidden_inputs)
    
    # Calculate the signal entering the output layer
    final_inputs = net.who * hidden_outputs
    final_outputs = sigmoid.(final_inputs)
    
    # PART 2：Compare the resulting output with the desired output to guide the update of network weights
    # Output layer error = (target - actual)
    output_errors = targets - final_outputs
    hidden_errors = net.who' * output_errors
        
    net.who += net.lr .* (output_errors .* final_outputs .* (1.0 .- final_outputs)) * hidden_outputs'
    net.wih += net.lr .* (hidden_errors .* hidden_outputs .* (1.0 .- hidden_outputs)) * inputs'
end

Out[16]:

Train! (generic function with 1 method)

In [17]:

# parameters
input_nodes = 784
hidden_nodes = 200
output_nodes = 10
learning_rate = 0.1
epochs = 5

Out[17]:

In [18]:

net_test = InitNet(input_nodes, input_nodes, output_nodes, learning_rate);

In [19]:

# import training set
training_data_file = readdlm("mnist_dataset/mnist_train.csv", ',');

In [20]:

# import test set
test_data_file = readdlm("mnist_dataset/mnist_test.csv", ',');

In [21]:

# data sets size
train_data_size = 60000
test_data_size = 10000

Out[21]:

In [24]:

use_gpu = true # helper to easily switch between gpu/cpu

todevice(x) = use_gpu ? gpu(x) : x

epochs = 5
# Cycle training
@time for e = 1:epochs
    # Training neural network
    for record = 1:train_data_size
        inputs = (training_data_file[record, 2:end] ./ 255.0 .* 0.99) .+ 0.01
        inputs = reshape(inputs, input_nodes, 1) # Adjustment dimension
        targets = zeros(output_nodes) .+ 0.01
        targets[round(Int, training_data_file[record, 1]) + 1] = 0.99
        targets = reshape(targets, 10, 1)
        
        Train!(net_test, inputs, targets)
    end
end

1112.022629 seconds (11.39 M allocations: 2.729 TiB, 17.21% gc time)

In [25]:

# Effect test
scorecard = []

for record = 1:test_data_size
    correct_label = Int(test_data_file[record, 1])
    inputs = (test_data_file[record, 2:end] ./ 255.0 .* 0.99) .+ 0.01
    inputs = reshape(inputs, input_nodes, 1) # Adjustment dimension
    
    outputs = Query(net_test, inputs)
    label = findmax(outputs)[2][1]
    if (label - 1 == correct_label)
        append!(scorecard, 1)
    else
        append!(scorecard, 0)
    end
end

In [26]:

# Accuracy
print("performance = ", sum(scorecard) / length(scorecard))

performance = 0.973

In [ ]: