using Pkg
Pkg.activate("../../../lessons/")
Pkg.instantiate();

using CSV
using DataFrames
using LinearAlgebra
using Distributions
using StatsFuns
using RxInfer
using Plots
default(label="", margin=10Plots.pt)

# Read CSV file
df = DataFrame(CSV.File("../datasets/stock_exchange.csv"))

# Count number of samples
time_period = 436:536
num_samples = length(time_period)

# Extract columns
dates_num = 1:num_samples
dates_str = df[time_period,1]
stock_val = df[time_period,2]

# Set xticks
xtick_points = Int64.(round.(range(1, stop=num_samples, length=5)))

# Scatter exchange levels
scatter(dates_num, 
        stock_val, 
        color="black",
        label="", 
        ylabel="Stock Market Levels", 
        xlabel="time (days)",
        xticks=(xtick_points, [dates_str[i] for i in xtick_points]), 
        size=(800,300))

@model function linear_regression(μ_θ, Σ_θ, σ2_y; N=1)
    "Bayesian linear regression"
    
    # Allocate data variables
    X = datavar(Vector{Float64}, N)
    y = datavar(Float64, N)
    
    # Prior distribution of coefficients
    θ ~ MvNormalMeanCovariance(μ_θ, Σ_θ)
    
    for i = 1:N

        # Likelihood of i-th sample
        y[i] ~ NormalMeanVariance(dot(θ,X[i]), σ2_y)
        
    end
    return y, X, θ
end

# Prior parameters
μ_θ, Σ_θ = (zeros(2), diagm(ones(2)))

# Likelihood variance
σ2_y = 1.0;

# Call inference function
results = inference(
    model       = linear_regression(μ_θ, Σ_θ, σ2_y, N=num_samples),
    data        = (y = stock_val, X = [[dates_num[i], 1.0] for i in 1:num_samples]),
    returnvars  = (θ = KeepLast()),
)

# Extract posterior weights 
post_θ = results.posteriors[:θ]

# Define ranges for plot
x1 = range(-1.5, length=500, stop=1.5)
x2 = range(-2.5, length=500, stop=2.5)

# Draw contour plots of distributions
prior_θ = MvNormal(μ_θ, Σ_θ)
p1a = contour(x1, x2, (x1,x2) -> pdf(prior_θ, [x1,x2]), xlabel="θ1", ylabel="θ2", title="prior", label="")
p1b = contour(x1, x2, (x1,x2) -> pdf(post_θ, [x1,x2]), xlabel="θ1", title="posterior", label="")
plot(p1a, p1b, size=(900,300))

# Extract estimated weights
θ_MAP = mode(post_θ)

# Report results
println("Slope coefficient = "*string(θ_MAP[1]))
println("Intercept coefficient = "*string(θ_MAP[2]))

# Make predictions
regression_estimated = dates_num * θ_MAP[1] .+ θ_MAP[2];

# Visualize observations
scatter(dates_num, stock_val, color="black", xticks=(xtick_points, [dates_str[i] for i in xtick_points]), label="observations", legend=:topleft)

# Overlay regression function
plot!(dates_num, regression_estimated, color="blue", label="regression", linewidth=2, size=(800,300))

# Read CSV file
df = DataFrame(CSV.File("../datasets/diagnosis_train.csv"))

# Split dataframe into features and labels
features_train = Matrix(df[:,1:5])
labels_train = Vector(df[:,6])

# Store number of features
num_features = size(features_train,2)

# Number of training samples
num_train = size(features_train,1);

plot(xlabel="feature1", ylabel="feature2", size=(800,400))
scatter!(features_train[labels_train .== 0, 1], features_train[labels_train .== 0, 2], color="blue", label="Healthy Controls")
scatter!(features_train[labels_train .== 1, 1], features_train[labels_train .== 1, 2], color="red",  label="COPD patients")

# Parameters for priors
μ_θ = zeros(num_features+1,)
Σ_θ = diagm(ones(num_features+1));

@model function linear_classification(μ_θ, Σ_θ; N=1)
    "Bayesian classification model"
    
    # Allocate data variables
    X = datavar(Vector{Float64}, N)
    y = datavar(Float64, N)
    
    # Weight prior distribution
    θ ~ MvNormalMeanCovariance(μ_θ, Σ_θ)
    
    # Binary likelihood
    for i = 1:N
        y[i] ~ Probit(dot(θ, X[i]))
    end
    
    return y, X, θ
end

results = inference(
    model       = linear_classification(μ_θ, Σ_θ, N=num_train),
    data        = (y = labels_train, X = [[features_train[i,:]; 1.0] for i in 1:num_train]),
    returnvars  = (θ = KeepLast()),
    iterations  = 10,
)

# Coefficients to visualize
cix = [1,2]

# Reduce posterior distribution to chosen dimensions
m_cix = mean(results.posteriors[:θ])[cix]
S_cix = cov( results.posteriors[:θ])[cix,cix]
post_θ = MvNormal(m_cix, S_cix)

# Reduce prior distribution to chosen dimensions
prior_θ = MvNormal(μ_θ[cix], Σ_θ[cix,cix])

# Define ranges for plot
x1 = range(-1., length=500, stop=1.)
x2 = range(-1., length=500, stop=1.)

# Draw contour plots of distributions
p1a = contour(x1, x2, (x1,x2) -> pdf(prior_θ, [x1,x2]), xlabel="θ1", ylabel="θ2", title="prior", label="")
p1b = contour(x1, x2, (x1,x2) -> pdf(post_θ, [x1,x2]), xlabel="θ1", title="posterior", label="")
plot(p1a, p1b, size=(900,300))

# Read CSV file
df = DataFrame(CSV.File("../datasets/diagnosis_test.csv"))

# Split dataframe into features and labels
features_test = Matrix(df[:,1:5])
labels_test = Vector(df[:,6])

# Number of test samples
num_test = size(features_test,1);

# Extract MAP estimate of classification parameters
θ_MAP = mode(results.posteriors[:θ])

# Compute dot product between parameters and test data
fθ_pred = [features_test ones(num_test,)] * θ_MAP

# Predict labels through probit
labels_pred = round.(normcdf.(fθ_pred));

# Compute classification accuracy of test data
accuracy_test = mean(labels_test .== labels_pred)

# Report result
println("Test Accuracy = "*string(accuracy_test*100)*"%")