using DataFrames, CSV, LinearAlgebra include("scripts/gmm_plot.jl") # Holds plotting function old_faithful = CSV.read("datasets/old_faithful.csv") X = convert(Matrix{Float64}, [old_faithful[1] old_faithful[2]]') N = size(X, 2) # Initialize the GMM. We assume 2 clusters. clusters = [MvNormal([4.;60.], [.5 0;0 10^2]); MvNormal([2.;80.], [.5 0;0 10^2])] π_hat = [0.5; 0.5] # Mixing weights γ = fill!(Matrix{Float64}(undef,2,N), NaN) # Responsibilities (row per cluster) # Define functions for updating the parameters and responsibilities function updateResponsibilities!(X, clusters, π_hat, γ) # Expectation step: update γ norm = [pdf(clusters[1], X) pdf(clusters[2], X)] * π_hat γ[1,:] = (π_hat[1] * pdf(clusters[1],X) ./ norm)' γ[2,:] = 1 .- γ[1,:] end function updateParameters!(X, clusters, π_hat, γ) # Maximization step: update π_hat and clusters using ML estimation m = sum(γ, dims=2) π_hat = m / N μ_hat = (X * γ') ./ m' for k=1:2 Z = (X .- μ_hat[:,k]) Σ_k = Symmetric(((Z .* (γ[k,:])') * Z') / m[k]) clusters[k] = MvNormal(μ_hat[:,k], convert(Matrix, Σ_k)) end end # Execute the algorithm: iteratively update parameters and responsibilities subplot(2,3,1); plotGMM(X, clusters, γ); title("Initial situation") updateResponsibilities!(X, clusters, π_hat, γ) subplot(2,3,2); plotGMM(X, clusters, γ); title("After first E-step") updateParameters!(X, clusters, π_hat, γ) subplot(2,3,3); plotGMM(X, clusters, γ); title("After first M-step") iter_counter = 1 for i=1:3 for j=1:i+1 updateResponsibilities!(X, clusters, π_hat, γ) updateParameters!(X, clusters, π_hat, γ) iter_counter += 1 end subplot(2,3,3+i); plotGMM(X, clusters, γ); title("After $(iter_counter) iterations") end PyPlot.tight_layout() open("../../styles/aipstyle.html") do f display("text/html", read(f,String)) end