function input_data() parse_data1= Float64[] parse_data2 = Float64[] open("./dataset/data_3.txt","r") do fp data = readlines(fp) for i in 1:length(data) push!(parse_data1,parse(Float64,split(data[i], " ")[1])) push!(parse_data2 ,parse(Float64,split(data[i], " ")[2])) end end X = [parse_data1 parse_data2] return X end X = input_data() using Plots gr() function dist(μx, μy, xx, xy) # ユークリッド距離 return sqrt((xx-μx)^2+(μy-xy)^2) end # 重心の数 k = 2 # 重心の変数. μx_old = Float64[] μy_old = Float64[] for i in 1:k # データの端を乱数の範囲にする. # 初期の重心を決める. push!(μx_old, rand(minimum(X[:,1]):0.1:maximum(X[:,1]))) push!(μy_old, rand(minimum(X[:,2]):0.1:maximum(X[:,2]))) end val_min = 1000000.0 cluster = 0 center_of_gravity_x = Float64[] center_of_gravity_y = Float64[] clusters = Int64[] n = length(X[:,1]) μx_new = Float64[] μy_new = Float64[] # 収束判定用の変数 error = 100.0 error_arr = Float64[] # プロットのgif用変数 plot_data_arr = Any[] μx_arr = Any[] μy_arr = Any[] # 誤差関数が収束すれば終わり. while error != 0.0 global clusters = Int64[] for i in 1:n # 重心割り当て # どの重心に一番近いかを決める.その距離の計算をしている. # iには全データが入る. global val_min = 100000.0 for j in 1:k distance = dist(μx_old[j], μy_old[j], X[i,1], X[i,2]) if val_min > distance global val_min = distance global cluster = j end end push!(clusters, cluster) end global plotdata = [X [string(i) for i=clusters];] push!(plot_data_arr, plotdata) # 主に重心の計算 for i in 1:k cluster_len = length(findall((in)(["$i"]), plotdata[:,3])) for j in findall((in)(["$i"]), plotdata[:,3]) # 下二行が重心の計算 push!(center_of_gravity_x,((X[j,1]) / cluster_len)) push!(center_of_gravity_y,((X[j,2]) / cluster_len)) end push!(μx_new, sum(center_of_gravity_x)) push!(μy_new, sum(center_of_gravity_y)) global center_of_gravity_x = Float64[] global center_of_gravity_y = Float64[] global error = dist(μx_new[i], μy_new[i], μx_old[i], μy_old[i]) push!(error_arr, error) end push!(μx_arr, μx_old) push!(μy_arr, μy_old) global μx_old = μx_new global μy_old = μy_new global μx_new = Float64[] global μy_new = Float64[] end tmp_x = Float64[] tmp_y = Float64[] x_arr = [] y_arr = [] function result(cluster_num) tmp_x = Float64[] tmp_y = Float64[] for p in 1:cluster_num result = findall((in)(["$p"]),plotdata[:,3]) # 割り振られたデータをx座標をx_arr, y座標をy_arrにそれぞれ代入する. # 重心数が3だったら三つの配列で帰ってくる. for i in result push!(tmp_x, plotdata[i,1]) push!(tmp_y, plotdata[i,2]) end push!(x_arr, tmp_x) push!(y_arr, tmp_y) tmp_x = Float64[] tmp_y = Float64[] end end plotdata = [X [string(i) for i=clusters];] result(k) plot(error_arr) scatter(x_arr, y_arr,legend=false) scatter!(μx_old, μy_old,label="center",legend=false) function result2(cluster_num) tmp_x = Float64[] tmp_y = Float64[] x_arr = [] y_arr = [] anim = @animate for m in 1:length(plot_data_arr) plotdata = plot_data_arr[m] μx_old = μx_arr[m] μy_old = μy_arr[m] for p in 1:cluster_num result = findall((in)(["$p"]), plotdata[:,3]) # 割り振られたデータをx座標をx_arr, y座標をy_arrにそれぞれ代入する. # 重心数が3だったら三つの配列で帰ってくる. for i in result push!(tmp_x, plotdata[i,1]) push!(tmp_y, plotdata[i,2]) end push!(x_arr, tmp_x) push!(y_arr, tmp_y) tmp_x = Float64[] tmp_y = Float64[] end scatter(x_arr, y_arr,legend=false) scatter!(μx_old, μy_old,label="center",legend=false) x_arr = [] y_arr = [] end every 1 gif(anim, "/Users/noriakioshita/Github/julia/ml_page/julia/kmeans.gif", fps=1) end result2(k) function input_data() parse_data1= Float64[] parse_data2 = Float64[] open("./dataset/data_4.txt","r") do fp data = readlines(fp) for i in 1:length(data) push!(parse_data1,parse(Float64,split(data[i], " ")[1])) push!(parse_data2 ,parse(Float64,split(data[i], " ")[2])) end end X = [parse_data1 parse_data2] return X end X = input_data() n = length(X[:,1]) using Plots gr() function dist(μx, μy, xx, xy) return sqrt.((xx.-μx).^2 .+(μy.-xy).^2) end # 重心の数 k = 9 # 重心の変数. μx_old = Float64[] μy_old = Float64[] DX = Float64[] probs = [] # プロットのgif用変数 plot_data_arr = Any[] μx_arr = Any[] μy_arr = Any[] function centroid() # データ点からランダムに選びそれを重心とする. z1 = rand(1:1:length(X[:,1])) push!(μx_old, X[z1,1]) push!(μy_old, X[z1,2]) for i in 1:k DX = Float64[] for j in 1:n # 最近傍距離を計算 Dx = minimum([dist(X[j,1], X[j,2], μx_old[m], μy_old[m]).^2 for m in 1:length(μx_old)]) push!(DX, Dx) end probs = DX ./ sum(DX) cum_probs = cumsum(probs) r = rand() #println(r) push!(μx_old, X[findall(cum_probs -> cum_probs >= r, cum_probs)[1],1]) push!(μy_old, X[findall(cum_probs -> cum_probs >= r, cum_probs)[1],2]) end end centroid() print(μx_old) # 以下は全てk-meansと同じ. min = 1000000.0 cluster = 0 center_of_gravity_x = Float64[] center_of_gravity_y = Float64[] clusters = Int64[] n = length(X[:,1]) μx_new = Float64[] μy_new = Float64[] # 収束判定用の変数 error = 100.0 error_arr = Float64[] # 誤差関数が収束すれば終わり. while error != 0.0 global clusters = Int64[] for i in 1:n # 重心割り当て # どの重心に一番近いかを決める.その距離の計算をしている. # iには全データが入る. global min = 100000.0 for j in 1:k distance = dist(μx_old[j], μy_old[j], X[i,1], X[i,2]) if min > distance global min = distance global cluster = j end end push!(clusters, cluster) end global plotdata = [X [string(i) for i=clusters];] push!(plot_data_arr, plotdata) # 主に重心の計算 for i in 1:k cluster_len = length(findall((in)(["$i"]), plotdata[:,3])) for j in findall((in)(["$i"]), plotdata[:,3]) # 下二行が重心の計算 push!(center_of_gravity_x,((X[j,1]) / cluster_len)) push!(center_of_gravity_y,((X[j,2]) / cluster_len)) end push!(μx_new, sum(center_of_gravity_x)) push!(μy_new, sum(center_of_gravity_y)) global center_of_gravity_x = Float64[] global center_of_gravity_y = Float64[] global error = dist(μx_new[i], μy_new[i], μx_old[i], μy_old[i]) push!(error_arr, error) end push!(μx_arr, μx_old) push!(μy_arr, μy_old) global μx_old = μx_new global μy_old = μy_new global μx_new = Float64[] global μy_new = Float64[] end tmp_x = Float64[] tmp_y = Float64[] x_arr = [] y_arr = [] function result(cluster_num) tmp_x = Float64[] tmp_y = Float64[] for p in 1:cluster_num result = findall((in)(["$p"]),plotdata[:,3]) # 割り振られたデータをx座標をx_arr, y座標をy_arrにそれぞれ代入する. # 重心数が3だったら三つの配列で帰ってくる. for i in result push!(tmp_x, plotdata[i,1]) push!(tmp_y, plotdata[i,2]) end push!(x_arr, tmp_x) push!(y_arr, tmp_y) tmp_x = Float64[] tmp_y = Float64[] end end plotdata = [X [string(i) for i=clusters];] result(k) plot(error_arr) scatter(x_arr, y_arr,legend=false) scatter!(μx_old, μy_old,label="center",legend=false) function result2(cluster_num) tmp_x = Float64[] tmp_y = Float64[] x_arr = [] y_arr = [] anim = @animate for m in 1:length(plot_data_arr) plotdata = plot_data_arr[m] μx_old = μx_arr[m] μy_old = μy_arr[m] for p in 1:cluster_num result = findall((in)(["$p"]), plotdata[:,3]) # 割り振られたデータをx座標をx_arr, y座標をy_arrにそれぞれ代入する. # 重心数が3だったら三つの配列で帰ってくる. for i in result push!(tmp_x, plotdata[i,1]) push!(tmp_y, plotdata[i,2]) end push!(x_arr, tmp_x) push!(y_arr, tmp_y) tmp_x = Float64[] tmp_y = Float64[] end scatter(x_arr, y_arr,legend=false) scatter!(μx_old, μy_old,label="center",legend=false) x_arr = [] y_arr = [] end every 1 gif(anim, "/Users/noriakioshita/Github/julia/ml_page/julia/kmeans_plus.gif", fps=1) print(anim) end result2(k) function input_data() parse_data1= Float64[] parse_data2 = Float64[] open("./dataset/data_4.txt","r") do fp data = readlines(fp) for i in 1:length(data) push!(parse_data1,parse(Float64,split(data[i], " ")[1])) push!(parse_data2 ,parse(Float64,split(data[i], " ")[2])) end end X = [parse_data1 parse_data2] return X end X = input_data() n = length(X[:,1]) using Plots gr() function dist(μx, μy, xx, xy) return sqrt.((xx.-μx).^2 .+(μy.-xy).^2) end # 重心の数 k = 9 # 重心の変数. μx_old = Float64[] μy_old = Float64[] DX = Float64[] probs = [] # プロットのgif用変数 plot_data_arr = Any[] μx_arr = Any[] μy_arr = Any[] Gram = Array{Float64}(undef, n, n) function kernel(X) for i in 1:n for j in i:n if i != j squareDist = sqrt.((X[i,1]).^2 .+ (X[i,2]).^2) base = 2.0 * 4.0^2 Gram[i, j] =exp.(-squareDist ./ base) Gram[j, i] = Gram[i,j] end end end return Gram end function centroid() # データ点からランダムに選びそれを重心とする. z1 = rand(1:1:length(X[:,1])) push!(μx_old, X[z1,1]) push!(μy_old, X[z1,2]) for i in 1:k DX = Float64[] for j in 1:n # 最近傍距離を計算 Dx = minimum([dist(X[j,1], X[j,2], μx_old[m], μy_old[m]).^2 for m in 1:length(μx_old)]) push!(DX, Dx) end probs = DX ./ sum(DX) cum_probs = cumsum(probs) r = rand() #println(r) push!(μx_old, X[findall(cum_probs -> cum_probs >= r, cum_probs)[1],1]) push!(μy_old, X[findall(cum_probs -> cum_probs >= r, cum_probs)[1],2]) end end X = kernel(X) println(X) centroid() print(μx_old) # 以下は全てk-meansと同じ. val_min = 1000000.0 cluster = 0 center_of_gravity_x = Float64[] center_of_gravity_y = Float64[] clusters = Int64[] n = length(X[:,1]) μx_new = Float64[] μy_new = Float64[] # 収束判定用の変数 val_error = 100.0 error_arr = Float64[] # 誤差関数が収束すれば終わり. while val_error != 0.0 global clusters = Int64[] for i in 1:n # 重心割り当て # どの重心に一番近いかを決める.その距離の計算をしている. # iには全データが入る. global val_min = 100000.0 for j in 1:k distance = dist(μx_old[j], μy_old[j], X[i,1], X[i,2]) if val_min > distance global val_min = distance global cluster = j end end push!(clusters, cluster) end global plotdata = [X [string(i) for i=clusters];] push!(plot_data_arr, plotdata) # 主に重心の計算 for i in 1:k cluster_len = length(findall((in)(["$i"]), plotdata[:,3])) for j in findall((in)(["$i"]), plotdata[:,3]) # 下二行が重心の計算 push!(center_of_gravity_x,((X[j,1]) / cluster_len)) push!(center_of_gravity_y,((X[j,2]) / cluster_len)) end push!(μx_new, sum(center_of_gravity_x)) push!(μy_new, sum(center_of_gravity_y)) global center_of_gravity_x = Float64[] global center_of_gravity_y = Float64[] global val_error = dist(μx_new[i], μy_new[i], μx_old[i], μy_old[i]) push!(error_arr, val_error) end push!(μx_arr, μx_old) push!(μy_arr, μy_old) global μx_old = μx_new global μy_old = μy_new global μx_new = Float64[] global μy_new = Float64[] end tmp_x = Float64[] tmp_y = Float64[] x_arr = [] y_arr = [] function result(cluster_num) tmp_x = Float64[] tmp_y = Float64[] for p in 1:cluster_num result = findall((in)(["$p"]),plotdata[:,3]) # 割り振られたデータをx座標をx_arr, y座標をy_arrにそれぞれ代入する. # 重心数が3だったら三つの配列で帰ってくる. for i in result push!(tmp_x, plotdata[i,1]) push!(tmp_y, plotdata[i,2]) end push!(x_arr, tmp_x) push!(y_arr, tmp_y) tmp_x = Float64[] tmp_y = Float64[] end end plotdata = [X [string(i) for i=clusters];] result(k) n for i in 1:n for j in i:n if i != j Gram[i,j] = 2.0 Gram[j,i] = Gram[i,j] end end end Gram