In [1]:

using Gadfly
using DataFrames

In [2]:

# サンプルデータ（訓練データ）生成
function makelabels{T<:AbstractFloat}(data::AbstractMatrix{T})
    r = mapslices(norm, data, 2)
    θ = atan2(data[:, 2], data[:, 1])
    d = mod((r .- θ./2π), 1.0)
    res = vec(sign(d * 2 - 1))
    # 0.0 -> 1.0
    res[res .== 0] = one(T)
    res
end

Out[2]:

makelabels (generic function with 1 method)

In [3]:

# サンプルデータ（訓練データ）生成
function makedata(N::Int=1000)
    data = rand(N, 2) .* 2.0 .- 1.0
    labels = makelabels(data)
    DataFrame(x=vec(data[:,1]), y=vec(data[:,2]), c=vec(labels))
end

Out[3]:

makedata (generic function with 2 methods)

In [4]:

data = makedata(1000)

Out[4]:

	x	y	c
1	0.38180505663952236	0.9905464063891887	1.0
2	0.7335323272468997	-0.8054871559563419	-1.0
3	-0.9678779706343863	0.6384841090731945	1.0
4	-0.2561152174200445	0.49395503584433964	-1.0
5	0.1899006218163084	-0.6021796696084123	1.0
6	-0.49271553858433803	-0.6721542135490504	-1.0
7	-0.4023007857844809	0.1730269762206249	-1.0
8	-0.7506390247805568	0.47423807623021164	-1.0
9	-0.5544015432164011	-0.007436882065078976	-1.0
10	0.668011831664217	0.20775069367404742	1.0
11	-0.11765415839762294	-0.3048174973163382	1.0
12	0.0635803539535309	-0.9301882824507244	-1.0
13	-0.4482198212901025	-0.49588431966644775	-1.0
14	-0.0792536250447009	-0.7087850780321849	1.0
15	-0.6477727675982017	0.07315996086304866	-1.0
16	0.4207662709640596	-0.8192022084605708	-1.0
17	-0.5839458536748774	-0.6877689915499148	-1.0
18	-0.4684066089028365	0.051711658117823855	1.0
19	-0.4986847297130299	0.38990798852357056	-1.0
20	0.7485546510180225	0.9476121216412619	-1.0
21	0.1760008792238703	-0.7894631030266352	-1.0
22	0.8170279732076415	0.27875454441004655	1.0
23	-0.8106935788315881	0.5423738917181034	1.0
24	0.16356918260737352	0.8596095420688465	1.0
25	0.7724330139077722	-0.2668039805969391	1.0
26	-0.6055859887123347	0.2781313302924149	-1.0
27	-0.5531076795586691	0.9915532368547093	1.0
28	0.8832958310592054	-0.8316945229141703	-1.0
29	-0.6568027666324392	-0.8890466801722687	-1.0
30	0.6809165451585435	-0.2505752131724277	1.0
⋮	⋮	⋮	⋮

In [5]:

function spirallayer(x::Float64=0.0, y::Float64=0.0, a::Float64=1.0, l::Float64=1.5)
    r = linspace(-l, l, 401)
    xs = x.+a.*r.*cos(2π.*r)
    ys = y.+a.*abs(r).*sin(2π.*r)
    layer(x=xs, y=ys, Geom.path, Theme(default_color=colorant"black"))
end

Out[5]:

spirallayer (generic function with 5 methods)

In [6]:

function dispresult(data)
    colscale = if data[1, :c] < 1
        Scale.color_discrete_manual(colorant"red",colorant"blue")
    else
        Scale.color_discrete_manual(colorant"blue",colorant"red")
    end
    plot(
        layer(data, x=:x, y=:y, color=:c, Geom.point),
        spirallayer(0.0, 0.0, 1.0),
        colscale,
        Coord.cartesian(xmin=-1.0, xmax=1.0, ymin=-1.0, ymax=1.0)
    )
end

Out[6]:

dispresult (generic function with 1 method)

In [7]:

dispresult(data)

Out[7]:

AdaBoost¶

In [8]:

X = convert(Array, data[:,1:2]); Y = collect(data[:, 3]);

In [9]:

Out[9]:

1000×2 Array{Float64,2}:
  0.381805    0.990546  
  0.733532   -0.805487  
 -0.967878    0.638484  
 -0.256115    0.493955  
  0.189901   -0.60218   
 -0.492716   -0.672154  
 -0.402301    0.173027  
 -0.750639    0.474238  
 -0.554402   -0.00743688
  0.668012    0.207751  
 -0.117654   -0.304817  
  0.0635804  -0.930188  
 -0.44822    -0.495884  
  ⋮                     
  0.963771    0.428773  
  0.2735     -0.285869  
  0.155255   -0.369354  
  0.878592   -0.199518  
 -0.506498    0.683235  
  0.408383    0.289063  
 -0.468245   -0.850559  
  0.647787   -0.870747  
 -0.231584    0.244115  
 -0.818909   -0.0248224 
  0.644493    0.669608  
 -0.9977     -0.941009

In [10]:

Out[10]:

1000-element Array{Float64,1}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0

In [11]:

include("adaboost.jl")

Decision Stump (t=1000)¶

In [12]:

# H = adaboost(X, Y)
H = adaboost(X, Y; t=1000)

Out[12]:

AdaBoost{Float64,DecisionStumpWeakClassifier{Float64}}([0.188191,0.236826,0.194064,0.103144,0.12273,0.15296,0.143989,0.097593,0.0910932,0.143874  …  0.0136665,0.0136258,0.0110542,0.0156314,0.0148444,0.0138454,0.0135426,0.0163845,0.0152943,0.0142186],DecisionStumpWeakClassifier{Float64}[DecisionStumpWeakClassifier{Float64}(2,1.0,0.509896),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.717636),DecisionStumpWeakClassifier{Float64}(1,1.0,0.380964),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.428321),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.878973),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.852034),DecisionStumpWeakClassifier{Float64}(2,1.0,0.744298),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.502475),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.0451205),DecisionStumpWeakClassifier{Float64}(2,-1.0,0.228064)  …  DecisionStumpWeakClassifier{Float64}(1,1.0,0.85737),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.881831),DecisionStumpWeakClassifier{Float64}(2,-1.0,-0.954017),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.709505),DecisionStumpWeakClassifier{Float64}(1,1.0,0.903471),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.881831),DecisionStumpWeakClassifier{Float64}(1,1.0,0.903471),DecisionStumpWeakClassifier{Float64}(2,-1.0,0.825494),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.648173),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.575658)])

In [13]:

length(H.alpha)

Out[13]:

In [14]:

Yd = H(X)

Out[14]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0

In [15]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Yd)))

Out[15]:

In [16]:

function calc_confusionmatrix(Y, Yd)
    tp = sum(1+Y[Yd.==1])/2
    fp = sum(1-Y[Yd.==1])/2
    tn = sum(1-Y[Yd.==-1])/2
    fn = sum(1+Y[Yd.==-1])/2
    [tp fp; fn tn]
end

Out[16]:

calc_confusionmatrix (generic function with 1 method)

In [17]:

function calc_mlmetrics(Y, Yd)
    cm = calc_confusionmatrix(Y, Yd)
    (tp, fn, fp, tn) = vec(cm)
    accuracy = (tp + tn) / (tp + fp + fn + tn)
    precision_score = tp / (tp + fp)
    recall_score = tp / (tp + fn)
    f_measure = 2 * precision_score * recall_score / (precision_score + recall_score)
    (accuracy, precision_score, recall_score, f_measure)
end

Out[17]:

calc_mlmetrics (generic function with 1 method)

In [18]:

calc_confusionmatrix(Y, Yd)

Out[18]:

2×2 Array{Float64,2}:
 408.0  101.0
  98.0  393.0

In [19]:

(a, p, r, f) = calc_mlmetrics(Y, Yd)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.801000
precision: 0.801572
recall: 0.806324
f_measure: 0.803941

In [20]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = H(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[20]:

In [21]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.700000
precision: 0.700000
recall: 0.700000
f_measure: 0.700000

Decision Stump (t=2000)¶

In [22]:

# H = adaboost(X, Y; t=1000)
H2k = adaboost(X, Y; t=2000)

Out[22]:

AdaBoost{Float64,DecisionStumpWeakClassifier{Float64}}([0.188191,0.232909,0.0911539,0.175994,0.164423,0.135608,0.155039,0.097304,0.135661,0.114414  …  0.0114101,0.0135136,0.0127485,0.0112635,0.0119819,0.0117974,0.0110987,0.0111608,0.0110473,0.0132695],DecisionStumpWeakClassifier{Float64}[DecisionStumpWeakClassifier{Float64}(2,1.0,0.509896),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.428321),DecisionStumpWeakClassifier{Float64}(1,1.0,0.455402),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.717636),DecisionStumpWeakClassifier{Float64}(2,1.0,0.744298),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.852034),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.878973),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.852034),DecisionStumpWeakClassifier{Float64}(2,-1.0,0.228064),DecisionStumpWeakClassifier{Float64}(2,1.0,0.744298)  …  DecisionStumpWeakClassifier{Float64}(2,1.0,-0.989386),DecisionStumpWeakClassifier{Float64}(2,-1.0,-0.904355),DecisionStumpWeakClassifier{Float64}(1,-1.0,0.950563),DecisionStumpWeakClassifier{Float64}(1,1.0,0.944574),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.91595),DecisionStumpWeakClassifier{Float64}(1,1.0,0.981522),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.980124),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.990485),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.989386),DecisionStumpWeakClassifier{Float64}(2,-1.0,-0.954017)])

In [23]:

length(H2k.alpha)

Out[23]:

In [24]:

Yd2k = H2k(X)

Out[24]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0

In [25]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Yd2k)))

Out[25]:

In [26]:

calc_confusionmatrix(Y, Yd2k)

Out[26]:

2×2 Array{Float64,2}:
 418.0   90.0
  88.0  404.0

In [27]:

(a, p, r, f) = calc_mlmetrics(Y, Yd2k)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.822000
precision: 0.822835
recall: 0.826087
f_measure: 0.824458

In [28]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = H2k(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[28]:

In [29]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.690000
precision: 0.636364
recall: 0.760870
f_measure: 0.693069

Beveled Stump¶

In [30]:

include("beveleddecisionstumpweakclassifier.jl")

Beveled Stump (t=100)¶

In [31]:

# H = adaboost(X, Y; t=1000)
Hb = adaboost(X, Y; t=100, wc=BeveledDecisionStumpWeakClassifier)

Out[31]:

AdaBoost{Float64,BeveledDecisionStumpWeakClassifier{Float64}}([0.147051,0.270244,0.275764,0.191788,0.132136,0.159215,0.13237,0.169408,0.141089,0.19097  …  0.1193,0.132391,0.0962243,0.110626,0.121419,0.107507,0.0884583,0.105509,0.131003,0.123395],BeveledDecisionStumpWeakClassifier{Float64}[BeveledDecisionStumpWeakClassifier{Float64}([-0.550473,0.860318],0.832594),BeveledDecisionStumpWeakClassifier{Float64}([-0.0884629,0.82473],-0.623709),BeveledDecisionStumpWeakClassifier{Float64}([0.57209,0.872162],0.608784),BeveledDecisionStumpWeakClassifier{Float64}([0.431862,0.0366313],-0.202186),BeveledDecisionStumpWeakClassifier{Float64}([-0.851118,0.573833],0.932783),BeveledDecisionStumpWeakClassifier{Float64}([-0.544234,0.029751],-0.494053),BeveledDecisionStumpWeakClassifier{Float64}([-0.665673,-0.957775],1.27889),BeveledDecisionStumpWeakClassifier{Float64}([-0.829175,0.459068],-0.840543),BeveledDecisionStumpWeakClassifier{Float64}([-0.354876,-0.534968],0.707904),BeveledDecisionStumpWeakClassifier{Float64}([0.656779,0.695513],-0.607332)  …  BeveledDecisionStumpWeakClassifier{Float64}([-0.918991,-0.695198],-1.25031),BeveledDecisionStumpWeakClassifier{Float64}([-0.359115,0.774094],0.522427),BeveledDecisionStumpWeakClassifier{Float64}([0.962898,0.00127196],-0.364344),BeveledDecisionStumpWeakClassifier{Float64}([-0.186434,-0.793125],-0.190065),BeveledDecisionStumpWeakClassifier{Float64}([-0.947432,0.4731],0.948585),BeveledDecisionStumpWeakClassifier{Float64}([-0.0160832,-0.53733],-0.500641),BeveledDecisionStumpWeakClassifier{Float64}([-0.0651143,0.785215],0.611826),BeveledDecisionStumpWeakClassifier{Float64}([-0.37968,0.735116],-0.627051),BeveledDecisionStumpWeakClassifier{Float64}([0.997093,-0.755483],0.531511),BeveledDecisionStumpWeakClassifier{Float64}([-0.49677,-0.152931],0.00459947)])

In [32]:

length(Hb.alpha)

Out[32]:

In [33]:

Ydb = Hb(X)

Out[33]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0

In [34]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Ydb)))

Out[34]:

In [35]:

(a, p, r, f) = calc_mlmetrics(Y, Ydb)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.978000
precision: 0.972656
recall: 0.984190
f_measure: 0.978389

In [36]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = Hb(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[36]:

In [37]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.940000
precision: 0.918367
recall: 0.957447
f_measure: 0.937500

Beveled Stump (t=200)¶

In [38]:

# Hb = adaboost(X, Y; t=100, wc=BeveledDecisionStumpWeakClassifier)
Hb2h = adaboost(X, Y; t=200, wc=BeveledDecisionStumpWeakClassifier)

Out[38]:

AdaBoost{Float64,BeveledDecisionStumpWeakClassifier{Float64}}([0.134811,0.258179,0.156224,0.115033,0.158012,0.193748,0.12347,0.173406,0.261998,0.25791  …  0.132571,0.112608,0.121843,0.107423,0.127721,0.100551,0.108749,0.103524,0.0989798,0.144762],BeveledDecisionStumpWeakClassifier{Float64}[BeveledDecisionStumpWeakClassifier{Float64}([-0.721013,0.855502],0.85171),BeveledDecisionStumpWeakClassifier{Float64}([0.261933,0.571929],-0.289927),BeveledDecisionStumpWeakClassifier{Float64}([0.923771,0.208048],0.495129),BeveledDecisionStumpWeakClassifier{Float64}([0.0479147,0.484883],-0.336506),BeveledDecisionStumpWeakClassifier{Float64}([0.672136,0.972659],0.707604),BeveledDecisionStumpWeakClassifier{Float64}([-0.0947732,-0.933046],-0.141899),BeveledDecisionStumpWeakClassifier{Float64}([0.838946,0.765744],-0.700248),BeveledDecisionStumpWeakClassifier{Float64}([-0.286338,0.605569],0.522256),BeveledDecisionStumpWeakClassifier{Float64}([-0.941615,0.508435],-0.957077),BeveledDecisionStumpWeakClassifier{Float64}([0.47229,-0.776823],0.121168)  …  BeveledDecisionStumpWeakClassifier{Float64}([0.279398,0.199705],0.204649),BeveledDecisionStumpWeakClassifier{Float64}([-0.203063,0.17769],-0.211548),BeveledDecisionStumpWeakClassifier{Float64}([-0.475133,-0.58616],0.0925267),BeveledDecisionStumpWeakClassifier{Float64}([0.390934,-0.0370233],-0.164852),BeveledDecisionStumpWeakClassifier{Float64}([-0.357482,0.139733],0.35681),BeveledDecisionStumpWeakClassifier{Float64}([-0.72058,0.918458],-0.997222),BeveledDecisionStumpWeakClassifier{Float64}([-0.844543,-0.0347637],0.805367),BeveledDecisionStumpWeakClassifier{Float64}([0.785783,-0.600253],-1.34543),BeveledDecisionStumpWeakClassifier{Float64}([0.454884,0.808659],0.621566),BeveledDecisionStumpWeakClassifier{Float64}([0.454108,0.191542],-0.255589)])

In [39]:

length(Hb2h.alpha)

Out[39]:

In [40]:

Ydb2h = Hb2h(X)

Out[40]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0

In [41]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Ydb2h)))

Out[41]:

In [42]:

(a, p, r, f) = calc_mlmetrics(Y, Ydb2h)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.990000
precision: 0.992063
recall: 0.988142
f_measure: 0.990099

In [43]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = Hb2h(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[43]:

In [44]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.950000
precision: 0.961538
recall: 0.943396
f_measure: 0.952381

With DecisionTree¶

In [45]:

include("decisiontreeweakclassifier.jl")

Decision Tree (t=100, max_depth=3)¶

In [46]:

# H = adaboost(X, Y; t=1000)
Ht = adaboost(X, Y; t=100, wc=DecisionTreeWeakClassifier)

Out[46]:

AdaBoost{Float64,DecisionTreeWeakClassifier{Float64}}([0.440429,0.345589,0.544707,0.424655,0.379719,0.256735,0.318183,0.229867,0.254043,0.240014  …  0.0890981,0.206681,0.0781879,0.150555,0.140002,0.135528,0.219566,0.103254,0.165641,0.125415],DecisionTreeWeakClassifier{Float64}[DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.254641,DTreeNode{Float64}(-1.0,2,0.509896,DTreeNode{Float64}(-1.0,2,-0.96856,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.515432,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))),DTreeNode{Float64}(1.0,2,-0.709505,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,2,0.977816,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.744298,DTreeNode{Float64}(-1.0,1,-0.905877,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,0.714061,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.228064,DTreeNode{Float64}(1.0,1,-0.504523,DTreeNode{Float64}(-1.0,2,-0.701133,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,0.852884,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))),DTreeNode{Float64}(-1.0,1,0.371575,DTreeNode{Float64}(-1.0,1,-0.684666,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.825494,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.751215,DTreeNode{Float64}(-1.0,1,0.502206,DTreeNode{Float64}(-1.0,2,-0.18811,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,-0.4404,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))),DTreeLeaf{Float64}(1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.878973,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,-0.474168,DTreeNode{Float64}(-1.0,1,0.981697,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,1,-0.0447778,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.717636,DTreeNode{Float64}(-1.0,2,-0.972643,DTreeNode{Float64}(1.0,1,0.924166,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.594097,DTreeNode{Float64}(-1.0,2,-0.972643,DTreeNode{Float64}(1.0,2,-0.989386,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.457856,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))),DTreeLeaf{Float64}(1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.870848,DTreeNode{Float64}(-1.0,2,0.228064,DTreeNode{Float64}(1.0,1,-0.504523,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,0.687623,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.509896,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.746374,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,0.572229,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.5451,DTreeNode{Float64}(1.0,1,0.977875,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,-0.844933,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))),DTreeNode{Float64}(-1.0,2,0.751215,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.714061,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))))  …  DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.780023,DTreeNode{Float64}(1.0,2,0.196278,DTreeNode{Float64}(-1.0,1,-0.991537,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.981522,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.98416,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.274138,DTreeNode{Float64}(-1.0,2,-0.752054,DTreeNode{Float64}(1.0,1,-0.632499,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.577061,DTreeNode{Float64}(1.0,1,-0.502475,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,-0.614312,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.939354,DTreeNode{Float64}(1.0,2,-0.758105,DTreeNode{Float64}(-1.0,2,-0.959789,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.983809,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))),DTreeNode{Float64}(-1.0,1,0.981522,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.98416,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.18811,DTreeNode{Float64}(1.0,2,-0.717636,DTreeNode{Float64}(-1.0,1,-0.718576,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,2,0.449946,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.224453,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.273727,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,0.528258,DTreeNode{Float64}(-1.0,1,0.944574,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,2,0.700986,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.15478,DTreeNode{Float64}(-1.0,2,-0.724556,DTreeNode{Float64}(1.0,1,0.0844386,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.115787,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,0.795276,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.365588,DTreeNode{Float64}(-1.0,2,0.00263185,DTreeNode{Float64}(-1.0,2,-0.658653,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.555691,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))),DTreeNode{Float64}(1.0,1,0.939354,DTreeNode{Float64}(1.0,2,-0.546977,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,2,0.466746,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.0292521,DTreeNode{Float64}(1.0,2,-0.607234,DTreeNode{Float64}(-1.0,2,-0.901062,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,-0.980848,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))),DTreeNode{Float64}(-1.0,1,0.981522,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.98416,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.0187958,DTreeNode{Float64}(-1.0,2,0.733746,DTreeNode{Float64}(-1.0,2,-0.96856,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,2,-0.0926104,DTreeNode{Float64}(1.0,2,-0.758105,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.327421,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.779287,DTreeNode{Float64}(1.0,2,-0.758105,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.944574,DTreeNode{Float64}(-1.0,2,-0.243177,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,1,0.977716,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))))])

In [47]:

length(Ht.alpha)

Out[47]:

In [48]:

Ydt = Ht(X)

Out[48]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0

In [49]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Ydt)))

Out[49]:

In [50]:

(a, p, r, f) = calc_mlmetrics(Y, Ydt)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.996000
precision: 0.994094
recall: 0.998024
f_measure: 0.996055

In [51]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = Ht(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[51]:

In [52]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.950000
precision: 0.981132
recall: 0.928571
f_measure: 0.954128

Decision Tree (t=100, max_depth=2)¶

In [53]:

# Ht = adaboost(X, Y; t=100, wc=DecisionTreeWeakClassifier)
Ht2d = adaboost(X, Y; t=100, wc=DecisionTreeWeakClassifier, maxdepth=2)

Out[53]:

AdaBoost{Float64,DecisionTreeWeakClassifier{Float64}}([0.331647,0.240541,0.232847,0.301052,0.273562,0.203332,0.197905,0.142434,0.180336,0.183854  …  0.095477,0.0998118,0.19328,0.126841,0.123101,0.0971369,0.0699668,0.085581,0.0810726,0.0668229],DecisionTreeWeakClassifier{Float64}[DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.509896,DTreeNode{Float64}(-1.0,1,0.455402,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,2,0.515198,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.852034,DTreeNode{Float64}(1.0,1,-0.428321,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.709505,DTreeNode{Float64}(-1.0,2,-0.972643,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,0.228064,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.736248,DTreeNode{Float64}(-1.0,1,-0.878973,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,0.714061,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.77307,DTreeNode{Float64}(-1.0,2,-0.954017,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.744298,DTreeNode{Float64}(-1.0,1,0.502206,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,1,0.714061,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.980124,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,0.724792,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.686505,DTreeNode{Float64}(1.0,2,0.313981,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.777742,DTreeNode{Float64}(-1.0,1,-0.632499,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,2,-0.143292,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.474168,DTreeNode{Float64}(-1.0,2,-0.904355,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(1.0)))  …  DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.263483,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,0.262973,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.878973,DTreeNode{Float64}(1.0,2,-0.575679,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(-1.0,2,0.991177,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.643145,DTreeNode{Float64}(-1.0,1,-0.632499,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,-0.428321,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.380551,DTreeNode{Float64}(1.0,2,-0.27047,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.981522,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.746672,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,0.467597,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.608342,DTreeNode{Float64}(1.0,2,-0.777742,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,2,0.991177,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.881831,DTreeNode{Float64}(1.0,2,0.115787,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeLeaf{Float64}(-1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.751202,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,2,-0.545574,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.658653,DTreeNode{Float64}(1.0,2,-0.989386,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,-0.905877,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.104912,DTreeNode{Float64}(-1.0,2,-0.959789,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,1,0.977716,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))])

In [54]:

length(Ht2d.alpha)

Out[54]:

In [55]:

Ydt2d = Ht2d(X)

Out[55]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0

In [56]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Ydt2d)))

Out[56]:

In [57]:

(a, p, r, f) = calc_mlmetrics(Y, Ydt2d)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.950000
precision: 0.954183
recall: 0.946640
f_measure: 0.950397

In [58]:

x_test = rand(100, 2) .* 2.0 .- 1.0
y_test = Ht2d(x_test)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[58]:

In [59]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.940000
precision: 0.977778
recall: 0.897959
f_measure: 0.936170

Another Features¶

In [60]:

X2 = [X X.^2 reducedim(*, X, 2);]

Out[60]:

1000×5 Array{Float64,2}:
  0.381805    0.990546    0.145775    0.981182     0.378196  
  0.733532   -0.805487    0.53807     0.64881     -0.590851  
 -0.967878    0.638484    0.936788    0.407662    -0.617975  
 -0.256115    0.493955    0.065595    0.243992    -0.126509  
  0.189901   -0.60218     0.0360622   0.36262     -0.114354  
 -0.492716   -0.672154    0.242769    0.451791     0.331181  
 -0.402301    0.173027    0.161846    0.0299383   -0.0696089 
 -0.750639    0.474238    0.563459    0.224902    -0.355982  
 -0.554402   -0.00743688  0.307361    5.53072e-5   0.00412302
  0.668012    0.207751    0.44624     0.0431604    0.13878   
 -0.117654   -0.304817    0.0138425   0.0929137    0.035863  
  0.0635804  -0.930188    0.00404246  0.86525     -0.0591417 
 -0.44822    -0.495884    0.200901    0.245901     0.222265  
  ⋮                                                          
  0.963771    0.428773    0.928855    0.183846     0.413239  
  0.2735     -0.285869    0.0748022   0.0817211   -0.0781852 
  0.155255   -0.369354    0.0241042   0.136422    -0.0573441 
  0.878592   -0.199518    0.771923    0.0398074   -0.175295  
 -0.506498    0.683235    0.25654     0.46681     -0.346057  
  0.408383    0.289063    0.166777    0.0835577    0.118049  
 -0.468245   -0.850559    0.219253    0.723451     0.39827   
  0.647787   -0.870747    0.419628    0.758201    -0.564059  
 -0.231584    0.244115    0.0536313   0.059592    -0.0565331 
 -0.818909   -0.0248224   0.670611    0.00061615   0.0203272 
  0.644493    0.669608    0.415372    0.448375     0.431558  
 -0.9977     -0.941009    0.995404    0.885497     0.938844

Decision Stump (t=1000)¶

In [61]:

H2 = adaboost(X2, Y; t=1000)

Out[61]:

AdaBoost{Float64,DecisionStumpWeakClassifier{Float64}}([0.0460325,0.185354,0.14605,0.18055,0.180284,0.139224,0.06146,0.0716916,0.125182,0.126542  …  0.0209131,0.0169005,0.0176732,0.0199021,0.0179122,0.0189123,0.0182897,0.0166503,0.0133249,0.0197383],DecisionStumpWeakClassifier{Float64}[DecisionStumpWeakClassifier{Float64}(5,-1.0,-0.021462),DecisionStumpWeakClassifier{Float64}(2,1.0,0.515198),DecisionStumpWeakClassifier{Float64}(5,-1.0,0.672651),DecisionStumpWeakClassifier{Float64}(1,1.0,0.365588),DecisionStumpWeakClassifier{Float64}(2,1.0,-0.717636),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.878973),DecisionStumpWeakClassifier{Float64}(4,1.0,0.00620339),DecisionStumpWeakClassifier{Float64}(4,-1.0,0.149681),DecisionStumpWeakClassifier{Float64}(2,1.0,0.744298),DecisionStumpWeakClassifier{Float64}(5,1.0,-0.235001)  …  DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.878973),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.980848),DecisionStumpWeakClassifier{Float64}(3,1.0,0.922192),DecisionStumpWeakClassifier{Float64}(5,1.0,-0.365571),DecisionStumpWeakClassifier{Float64}(4,-1.0,0.00423528),DecisionStumpWeakClassifier{Float64}(2,-1.0,-0.0926104),DecisionStumpWeakClassifier{Float64}(4,1.0,0.00062403),DecisionStumpWeakClassifier{Float64}(3,-1.0,0.0139063),DecisionStumpWeakClassifier{Float64}(1,1.0,-0.980848),DecisionStumpWeakClassifier{Float64}(1,-1.0,-0.990485)])

In [62]:

length(H2.alpha)

Out[62]:

In [63]:

Yd2 = H2(X2)

Out[63]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0

In [64]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Yd2)))

Out[64]:

In [65]:

(a, p, r, f) = calc_mlmetrics(Y, Yd2)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.840000
precision: 0.848790
recall: 0.832016
f_measure: 0.840319

In [66]:

x_test = rand(100, 2) .* 2.0 .- 1.0
x_test2 = [x_test x_test.^2 reducedim(*, x_test, 2);]
y_test = H2(x_test2)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[66]:

In [67]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.720000
precision: 0.685185
recall: 0.770833
f_measure: 0.725490

Beveled Stump (t=100)¶

In [68]:

H2b = adaboost(X2, Y; t=100, wc=BeveledDecisionStumpWeakClassifier)

Out[68]:

AdaBoost{Float64,BeveledDecisionStumpWeakClassifier{Float64}}([0.116525,0.235644,0.270388,0.204673,0.29648,0.27773,0.161874,0.228241,0.227682,0.125  …  0.156864,0.21178,0.133913,0.116127,0.117578,0.182353,0.100741,0.110266,0.113749,0.225046],BeveledDecisionStumpWeakClassifier{Float64}[BeveledDecisionStumpWeakClassifier{Float64}([-0.445067,0.585157,-0.876724,-0.115326,0.471897],1.32477),BeveledDecisionStumpWeakClassifier{Float64}([0.947061,0.103608,-0.961654,-0.615689,-0.173344],0.293904),BeveledDecisionStumpWeakClassifier{Float64}([-0.480942,0.740757,-0.725534,0.279109,-0.606455],-0.624892),BeveledDecisionStumpWeakClassifier{Float64}([0.527187,0.892778,-0.0200872,-0.2068,-0.212955],0.811584),BeveledDecisionStumpWeakClassifier{Float64}([0.8414,0.0309458,-0.00767811,0.77942,-0.915443],-0.391795),BeveledDecisionStumpWeakClassifier{Float64}([-0.945423,-0.444783,-0.131473,-0.368581,-0.402222],0.0502661),BeveledDecisionStumpWeakClassifier{Float64}([0.598392,-0.905309,0.24139,-0.366103,-0.329221],0.15317),BeveledDecisionStumpWeakClassifier{Float64}([0.600577,0.907212,0.699775,0.452862,0.906019],-1.01333),BeveledDecisionStumpWeakClassifier{Float64}([0.106678,0.800255,-0.957537,0.957028,0.753049],0.722838),BeveledDecisionStumpWeakClassifier{Float64}([0.425672,0.0454405,0.165763,0.764473,-0.591033],-0.26994)  …  BeveledDecisionStumpWeakClassifier{Float64}([-0.359494,-0.978341,0.655672,0.877565,-0.97124],-0.374221),BeveledDecisionStumpWeakClassifier{Float64}([0.262922,-0.59721,-0.838904,0.805415,0.602023],0.27721),BeveledDecisionStumpWeakClassifier{Float64}([0.589477,-0.13325,0.28918,0.496289,-0.133592],-0.314672),BeveledDecisionStumpWeakClassifier{Float64}([-0.339465,0.713285,-0.967329,0.83883,-0.730143],-0.00317837),BeveledDecisionStumpWeakClassifier{Float64}([0.0746756,0.647013,0.480865,0.38588,0.474868],-0.408356),BeveledDecisionStumpWeakClassifier{Float64}([-0.341939,-0.542254,0.352006,-0.385168,-0.467056],0.0982439),BeveledDecisionStumpWeakClassifier{Float64}([0.807311,0.789229,-0.598132,-0.279942,-0.743558],-0.209001),BeveledDecisionStumpWeakClassifier{Float64}([-0.57307,-0.66207,0.51953,-0.587289,0.236097],0.127418),BeveledDecisionStumpWeakClassifier{Float64}([-0.822626,0.638581,0.694476,-0.557719,-0.0261417],0.0932249),BeveledDecisionStumpWeakClassifier{Float64}([-0.300768,0.0362653,0.922275,0.99279,-0.391153],-1.18075)])

In [69]:

length(H2b.alpha)

Out[69]:

In [70]:

Yd2b = H2b(X2)

Out[70]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
 -1.0
  1.0
  1.0

In [71]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Yd2b)))

Out[71]:

In [72]:

(a, p, r, f) = calc_mlmetrics(Y, Yd2b)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.980000
precision: 0.989919
recall: 0.970356
f_measure: 0.980040

In [73]:

x_test = rand(100, 2) .* 2.0 .- 1.0
x_test2 = [x_test x_test.^2 reducedim(*, x_test, 2);]
y_test = H2b(x_test2)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[73]:

In [74]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.890000
precision: 0.939394
recall: 0.775000
f_measure: 0.849315

Decision Tree (t=100, max_depth=2)¶

In [75]:

H2t = adaboost(X2, Y; t=100, wc=DecisionTreeWeakClassifier, maxdepth=2)

Out[75]:

AdaBoost{Float64,DecisionTreeWeakClassifier{Float64}}([0.255413,0.201622,0.21577,0.261643,0.121079,0.0876513,0.21402,0.165936,0.0978114,0.0837865  …  0.105944,0.113723,0.165807,0.193165,0.110964,0.113512,0.132862,0.209217,0.0887083,0.200869],DecisionTreeWeakClassifier{Float64}[DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.254641,DTreeNode{Float64}(-1.0,3,0.772594,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,3,0.725962,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,3,0.144819,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,1,-0.502475,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.509896,DTreeLeaf{Float64}(-1.0),DTreeNode{Float64}(1.0,3,0.000323783,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.717636,DTreeNode{Float64}(-1.0,4,0.946036,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,3,0.95593,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.914296,DTreeNode{Float64}(1.0,2,0.0405514,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeLeaf{Float64}(-1.0))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,3,0.00264797,DTreeNode{Float64}(-1.0,3,1.84327e-6,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,5,0.672651,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.0451205,DTreeNode{Float64}(1.0,4,0.537129,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,5,0.185197,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,5,0.177503,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,5,0.895117,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,4,0.149681,DTreeNode{Float64}(1.0,5,-0.230739,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,4,0.905591,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,5,-0.0141381,DTreeNode{Float64}(1.0,5,-0.235001,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,4,0.0205328,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0))))  …  DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,4,0.0353854,DTreeNode{Float64}(-1.0,1,0.514379,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,4,0.984653,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,4,0.0471988,DTreeNode{Float64}(1.0,4,0.00062403,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,3,0.81626,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,0.744298,DTreeNode{Float64}(-1.0,5,0.201032,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,1,0.714061,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,5,0.21587,DTreeNode{Float64}(1.0,4,4.52642e-7,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(-1.0,1,0.00247774,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,-0.878973,DTreeLeaf{Float64}(1.0),DTreeNode{Float64}(-1.0,4,0.00151145,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,4,0.00620339,DTreeNode{Float64}(-1.0,1,0.506852,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,2,0.115787,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,4,0.0353854,DTreeNode{Float64}(-1.0,1,0.066303,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(1.0,3,0.657726,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,1,0.365588,DTreeNode{Float64}(-1.0,2,0.736248,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,2,-0.251552,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,3,8.17244e-5,DTreeNode{Float64}(-1.0,5,0.00299301,DTreeLeaf{Float64}(-1.0),DTreeLeaf{Float64}(1.0)),DTreeNode{Float64}(1.0,4,0.981434,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)))),DecisionTreeWeakClassifier{Float64}(DTreeNode{Float64}(1.0,2,-0.293657,DTreeNode{Float64}(1.0,3,0.182186,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0)),DTreeNode{Float64}(-1.0,1,-0.228967,DTreeLeaf{Float64}(1.0),DTreeLeaf{Float64}(-1.0))))])

In [76]:

length(H2t.alpha)

Out[76]:

In [77]:

Yd2t = H2t(X2)

Out[77]:

1000×1 Array{Float64,2}:
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
  1.0
 -1.0
 -1.0
  1.0
  1.0
 -1.0
 -1.0
  ⋮  
  1.0
  1.0
  1.0
  1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
 -1.0
  1.0
  1.0

In [78]:

dispresult(DataFrame(x=X[:,1], y=X[:,2], c=vec(Yd2t)))

Out[78]:

In [79]:

(a, p, r, f) = calc_mlmetrics(Y, Yd2t)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.960000
precision: 0.960474
recall: 0.960474
f_measure: 0.960474

In [80]:

x_test = rand(100, 2) .* 2.0 .- 1.0
x_test2 = [x_test x_test.^2 reducedim(*, x_test, 2);]
y_test = H2t(x_test2)
dispresult(DataFrame(x=x_test[:,1], y=x_test[:,2], c=vec(y_test)))

Out[80]:

In [81]:

(a, p, r, f) = calc_mlmetrics(makelabels(x_test), y_test)

@printf("accuracy: %.06f\n", a)
@printf("precision: %.06f\n", p)
@printf("recall: %.06f\n", r)
@printf("f_measure: %.06f\n", f)

accuracy: 0.890000
precision: 0.854545
recall: 0.940000
f_measure: 0.895238

TimeIt (fit)¶

Decision Stump (t=1000)¶

In [82]:

Base.gc(true);
@time adaboost(X, Y; t=1000);

  0.155572 seconds (59.16 k allocations: 179.065 MB, 18.35% gc time)

Decision Stump (t=2000)¶

In [83]:

Base.gc(true);
@time adaboost(X, Y; t=2000);

  0.427481 seconds (118.03 k allocations: 358.090 MB, 11.37% gc time)

Beveled Stump (t=100)¶

In [84]:

Base.gc(true);
@time adaboost(X, Y; t=100, wc=BeveledDecisionStumpWeakClassifier);

  0.020341 seconds (6.42 k allocations: 17.191 MB)

Beveled Stump (t=200)¶

In [85]:

Base.gc(true);
@time adaboost(X, Y; t=200, wc=BeveledDecisionStumpWeakClassifier);

  0.032742 seconds (12.83 k allocations: 34.358 MB, 6.91% gc time)

Decision Tree (t=100, maxdepth=3)¶

In [86]:

Base.gc(true);
@time adaboost(X, Y; t=100, wc=DecisionTreeWeakClassifier);

  0.065813 seconds (57.44 k allocations: 54.248 MB, 12.39% gc time)

Decision Tree (t=100, maxdepth=2)¶

In [87]:

Base.gc(true);
@time adaboost(X, Y; t=100, wc=DecisionTreeWeakClassifier, maxdepth=2);

  0.034316 seconds (26.79 k allocations: 37.628 MB, 9.32% gc time)

TimeIt (predict)¶

Decision Stump (t=1000)¶

In [88]:

Base.gc(true);
@time H(X);

  0.135507 seconds (14.54 k allocations: 8.029 MB)

Decision Stump (t=2000)¶

In [89]:

Base.gc(true);
@time H2k(X);

  0.088738 seconds (14.54 k allocations: 15.659 MB)

Beveled Stump (t=100)¶

In [90]:

Base.gc(true);
@time Hb(X);

  0.101591 seconds (415.54 k allocations: 7.251 MB)

Beveled Stump (t=200)¶

In [91]:

Base.gc(true);
@time Hb2h(X);

  0.126774 seconds (815.54 k allocations: 14.224 MB)

Decision Tree (t=100, maxdepth=3)¶

In [92]:

Base.gc(true);
@time Ht(X);

  0.102042 seconds (372.97 k allocations: 6.602 MB)

Decision Tree (t=100, maxdepth=2)¶

In [93]:

Base.gc(true);
@time Ht2d(X);

  0.061358 seconds (304.78 k allocations: 5.561 MB)

Another Features¶

In [94]:

Base.gc(true);
@time adaboost(X2, Y; t=1000);

  0.150262 seconds (59.03 k allocations: 179.057 MB, 14.92% gc time)

In [95]:

Base.gc(true);
@time adaboost(X2, Y; t=100, wc=BeveledDecisionStumpWeakClassifier);

  0.018559 seconds (6.42 k allocations: 17.203 MB)

In [96]:

Base.gc(true);
@time adaboost(X2, Y; t=100, wc=DecisionTreeWeakClassifier);

  0.112001 seconds (61.34 k allocations: 62.557 MB, 9.61% gc time)

In [97]:

Base.gc(true);
@time H2(X2);

  0.044224 seconds (14.54 k allocations: 8.029 MB)

In [98]:

Base.gc(true);
@time H2b(X2);

  0.050282 seconds (415.54 k allocations: 7.251 MB)

In [99]:

Base.gc(true);
@time H2t(X2);

  0.055222 seconds (311.29 k allocations: 5.660 MB)

In [ ]: