import numpy as np
from scipy.spatial.distance import cdist,pdist, squareform
from sklearn.datasets import load_iris
Xorig,yorig = load_iris(return_X_y=True)
y = np.concatenate( (yorig[yorig==0], yorig[yorig==1]), axis=0)
y[y==0] = -1.
X = np.concatenate((Xorig[np.where(yorig==0)[0],:],Xorig[np.where(yorig==1)[0],:]),axis=0)
KK = np.matrix([[np.dot(X[i,:],X[j,:]) for i in range(X.shape[0])]for j in range(X.shape[0])])
print(np.dot(X[0,:],X[0,:]), '\n', squareform(pdist(X, lambda x,y:np.dot(x,y)))) # must be equal but...
if not all(np.equal(squareform(pdist(X, lambda x,y:np.dot(x,y))), KK)):
print('pdist provides zero elements for the diagnal elements of the output.')
if all(np.equal(cdist(X, X, lambda x,y:np.dot(x,y)), KK)):
print('cdist is acceptable, but slow.')
40.26 [[ 0. 37.49 37.03 ... 48.05 39.18 44.87] [37.49 0. 34.49 ... 45.36 36.91 42.33] [37.03 34.49 0. ... 44.27 36.09 41.34] ... [48.05 45.36 44.27 ... 0. 53.2 62.78] [39.18 36.91 36.09 ... 53.2 0. 49.8 ] [44.87 42.33 41.34 ... 62.78 49.8 0. ]] pdist provides zero elements for the diagnal elements of the output. cdist is acceptable, but slow.
# ラグランジュ乗数を二次計画法(Quadratic Programming)で求める
N = X.shape[0]
K = np.zeros((N,N))
K_x = squareform(pdist(X, lambda x,y:np.dot(x,y)))
for i in range(N):
for j in range(N):
K[i, j] = y[i] * y[j] * np.dot(X[i,:],X[j,:])
# L_dual = gamma * alpha - alpha.T * (y.T * K * y) * alpha -> max
# - L_dual -> min in solvers.qp
Q = cvxopt.matrix(K)
p = cvxopt.matrix(-np.ones(N)) # -1がN個の列ベクトル
G = cvxopt.matrix(np.diag([-1.0]*N)) # 対角成分が-1のNxN行列
h = cvxopt.matrix(np.zeros(N)) # 0がN個の列ベクトル
A = cvxopt.matrix(array(y.tolist()).astype('double'), (1,N))# N個の教師信号が要素の行ベクトル(1xN)
# Cast A to double type, thanks to a post on stack overflow https://stackoverflow.com/questions/36510859/cvxopt-qp-solver-typeerror-a-must-be-a-d-matrix-with-1000-columns
b = cvxopt.matrix(0.0) # 定数0.0
sol = cvxopt.solvers.qp(Q, p, G, h, A, b) # 二次計画法でラグランジュ乗数aを求める
a = array(sol['x']).reshape(N) # 'x'がaに対応する
print(a)
pcost dcost gap pres dres 0: -3.9465e+00 -6.9439e+00 2e+02 1e+01 2e+00 1: -1.4432e+00 -2.0830e+00 2e+01 1e+00 1e-01 2: -3.2790e-01 -1.3980e+00 2e+00 5e-02 5e-03 3: -5.1882e-01 -7.6752e-01 3e-01 4e-03 5e-04 4: -5.8889e-01 -8.1845e-01 3e-01 3e-03 3e-04 5: -7.1313e-01 -7.5571e-01 4e-02 2e-04 2e-05 6: -7.4690e-01 -7.4817e-01 1e-03 1e-06 1e-07 7: -7.4805e-01 -7.4806e-01 1e-05 1e-08 2e-09 8: -7.4806e-01 -7.4806e-01 1e-07 1e-10 2e-11 Optimal solution found. [2.24761794e-09 4.09846137e-09 2.38370393e-09 4.80162818e-09 2.05193612e-09 3.53769742e-09 2.58427175e-09 3.07558369e-09 4.65617861e-09 4.21695047e-09 2.31827654e-09 4.08141748e-09 3.53204549e-09 1.81086236e-09 1.39151465e-09 1.66336890e-09 1.66502410e-09 2.42549572e-09 3.72787825e-09 2.22327217e-09 6.91253905e-09 2.65134220e-09 1.23379963e-09 6.71332988e-01 8.96125917e-07 1.44144088e-08 6.00850626e-09 2.77761071e-09 2.48841471e-09 6.23200906e-09 8.73647367e-09 4.10038009e-09 1.66529977e-09 1.51538144e-09 4.98468690e-09 2.03448823e-09 1.96517255e-09 1.91842436e-09 2.89891210e-09 3.10373766e-09 2.01062404e-09 7.67238122e-02 2.32688598e-09 7.27925672e-09 1.31044187e-08 4.76233807e-09 2.49810991e-09 2.95683358e-09 2.29919433e-09 2.72037255e-09 7.72610987e-10 8.79501736e-10 6.53318498e-10 1.00790612e-09 7.21754720e-10 8.32220663e-10 7.75383305e-10 4.54589082e-09 7.78502688e-10 1.31160073e-09 1.74207133e-09 1.05262300e-09 1.04985551e-09 7.28852053e-10 2.45299073e-09 9.31357247e-10 8.47958560e-10 1.23691146e-09 6.72038323e-10 1.32311107e-09 6.95684275e-10 1.24239270e-09 5.93801058e-10 7.40955572e-10 9.74641661e-10 8.99430639e-10 6.53877169e-10 5.92273078e-10 8.06505799e-10 2.85841995e-09 1.40759495e-09 1.70753937e-09 1.40122721e-09 5.70808264e-10 8.56014999e-10 9.35129509e-10 7.36386413e-10 7.42965486e-10 1.28069903e-09 1.09956701e-09 8.61392780e-10 7.97719715e-10 1.18752595e-09 3.78344175e-09 1.00538007e-09 1.20115997e-09 1.09546862e-09 9.82758941e-10 7.48057813e-01 1.14675695e-09]
threshold = .00001
SV = np.where(a>=threshold)[0]
SV
array([23, 41, 98])
import numpy as np
from scipy.linalg import norm
import cvxopt
import cvxopt.solvers
from pylab import *
N = 100 # データ数
def f(x1, w, b):
return - (w[0] / w[1]) * x1 - (b / w[1])
def kernel(x, y):
return np.dot(x, y) # 線形カーネル
# 訓練データを作成
cls1 = []
cls2 = []
mean1 = [-1, 2]
mean2 = [1, -1]
cov = [[1.0,0.8], [0.8, 1.0]]
half_n = int(N/2)
cls1.extend(np.random.multivariate_normal(mean1, cov, half_n))
cls2.extend(np.random.multivariate_normal(mean2, cov, half_n))
# データ行列Xを作成
X = vstack((cls1, cls2))
# ラベルtを作成
t = []
for i in range(half_n):
t.append(1.0) # クラス1
for i in range(half_n):
t.append(-1.0) # クラス2
t = array(t)
# ラグランジュ乗数を二次計画法(Quadratic Programming)で求める
K = np.zeros((N, N))
for i in range(N):
for j in range(N):
K[i, j] = t[i] * t[j] * kernel(X[i], X[j])
# L_dual = gamma * alpha - alpha.T * (y.T * K * y) * alpha -> max
# - L_dual -> min in solvers.qp
Q = cvxopt.matrix(K)
p = cvxopt.matrix(-np.ones(N)) # -1がN個の列ベクトル
G = cvxopt.matrix(np.diag([-1.0]*N)) # 対角成分が-1のNxN行列
h = cvxopt.matrix(np.zeros(N)) # 0がN個の列ベクトル
A = cvxopt.matrix(t, (1,N)) # N個の教師信号が要素の行ベクトル(1xN)
b = cvxopt.matrix(0.0) # 定数0.0
sol = cvxopt.solvers.qp(Q, p, G, h, A, b) # 二次計画法でラグランジュ乗数aを求める
a = array(sol['x']).reshape(N) # 'x'がaに対応する
# print( a)
# サポートベクトルのインデックスを抽出
S = []
for i in range(len(a)):
if a[i] < 0.00001: continue
S.append(i)
# wを計算
w = np.zeros(2)
for n in S:
w += a[n] * t[n] * X[n]
# bを計算
sum = 0
for n in S:
temp = 0
for m in S:
temp += a[m] * t[m] * kernel(X[n], X[m])
sum += (t[n] - temp)
b = sum / len(S)
print( S, b)
# 訓練データを描画
x1, x2 = np.array(cls1).transpose()
plot(x1, x2, 'rx')
x1, x2 = np.array(cls2).transpose()
plot(x1, x2, 'bx')
# サポートベクトルを描画
for n in S:
scatter(X[n,0], X[n,1], s=80, c='c', marker='o')
# 識別境界を描画
x1 = np.linspace(-6, 6, 1000)
x2 = [f(x, w, b) for x in x1]
plot(x1, x2, 'g-')
xlim(-6, 6)
ylim(-6, 6)
show()
pcost dcost gap pres dres 0: -7.3675e+00 -1.4603e+01 3e+02 2e+01 2e+00 1: -1.0653e+01 -9.8805e+00 1e+02 7e+00 8e-01 2: -2.6799e+01 -1.4985e+01 1e+02 4e+00 5e-01 3: -2.4022e+01 -8.9103e+00 4e+01 1e+00 1e-01 4: -5.0380e+00 -5.6137e+00 4e+00 6e-02 7e-03 5: -4.5350e+00 -4.6529e+00 1e-01 7e-04 8e-05 6: -4.6178e+00 -4.6203e+00 3e-03 8e-06 9e-07 7: -4.6199e+00 -4.6199e+00 3e-05 8e-08 9e-09 8: -4.6199e+00 -4.6199e+00 3e-07 8e-10 9e-11 Optimal solution found. [28, 94] -1.8540954210053662
S
[34, 68, 93]