include("scripts/pca_demo_helpers.jl") X = readDataSet("datasets/virus3.dat") using LinearAlgebra include("scripts/pca_demo_helpers.jl") X = readDataSet("datasets/virus3.dat") (θ, Z) = pPCA(convert(Matrix,X'), 2)# uses EM, implemented in scripts/pca_demo_helpers.jl. Feel free to try more/less dimensions. using PyPlot plot(Z[1,:], Z[2,:], "w") for n=1:size(Z,2) PyPlot.text(Z[1,n], Z[2,n], string(n), fontsize=10) # put a label on the position of the data point end title("Projection of Tobamovirus data set on two dimensions (numbers correspond to data points)", fontsize=10); X_corrupt = convert(Matrix{Float64}, X)# convert to floating point matrix so we can use NaN to indicate missing values indices = findall(rand(Float64,size(X)) .< 0.2) X_corrupt[indices] .= NaN println(X_corrupt) (θ, Z) = pPCA(convert(Matrix,X_corrupt'), 2) # Perform pPCA on the corrupted data set plot(Z[1,:], Z[2,:], "w") for n=1:size(Z,2) PyPlot.text(Z[1,n], Z[2,n], string(n), fontsize=10) # put a label on the position of the data point end title("Projection of CORRUPTED Tobamovirus data set on two dimensions", fontsize=10); open("../../styles/aipstyle.html") do f display("text/html", read(f, String)) end