using DataFrames, RDatasets
vn = dataset("Ecdat","VietNamI")
Pharvis | LnhhExp | Age | Sex | Married | Educ | Illness | Injury | Illdays | Actdays | Insurance | Commune | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 2.730363 | 3.7612 | male | 1 | 2 | 1 | 0 | 7 | 0 | 0 | 192 |
2 | 0 | 2.737248 | 2.944439 | female | 0 | 0 | 1 | 0 | 4 | 0 | 0 | 167 |
3 | 0 | 2.266935 | 2.56495 | male | 0 | 4 | 0 | 0 | 0 | 0 | 1 | 76 |
4 | 1 | 2.392753 | 3.637586 | female | 1 | 3 | 1 | 0 | 3 | 0 | 1 | 123 |
5 | 1 | 3.105335 | 3.295837 | male | 1 | 3 | 1 | 0 | 10 | 0 | 0 | 148 |
6 | 0 | 3.760884 | 3.367296 | male | 1 | 9 | 0 | 0 | 0 | 0 | 1 | 20 |
7 | 0 | 3.155609 | 3.663562 | female | 1 | 2 | 0 | 0 | 0 | 0 | 1 | 40 |
8 | 0 | 3.724682 | 2.197225 | male | 0 | 5 | 0 | 0 | 0 | 0 | 1 | 57 |
9 | 2 | 2.861691 | 3.7612 | female | 1 | 2 | 2 | 0 | 4 | 0 | 0 | 49 |
10 | 3 | 2.615077 | 4.234107 | male | 1 | 0 | 1 | 0 | 7 | 0 | 0 | 170 |
11 | 1 | 2.653243 | 2.772589 | male | 0 | 4 | 1 | 0 | 1 | 0 | 0 | 40 |
12 | 1 | 2.139857 | 3.663562 | female | 1 | 2 | 2 | 0 | 5 | 0 | 0 | 127 |
13 | 2 | 2.625683 | 3.555348 | female | 1 | 3 | 1 | 0 | 3 | 0 | 0 | 106 |
14 | 1 | 2.767746 | 1.94591 | female | 0 | 5 | 2 | 0 | 3 | 0 | 0 | 168 |
15 | 0 | 2.871242 | 2.302585 | male | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 185 |
16 | 2 | 1.983811 | 3.135494 | male | 1 | 4 | 3 | 0 | 10 | 0 | 0 | 41 |
17 | 0 | 1.260201 | 3.091043 | male | 0 | 4 | 0 | 0 | 0 | 0 | 0 | 106 |
18 | 0 | 2.298178 | 3.401197 | male | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 141 |
19 | 3 | 2.132508 | 3.332205 | male | 1 | 4 | 3 | 0 | 10 | 0 | 0 | 61 |
20 | 0 | 2.000231 | 3.258096 | male | 1 | 3 | 0 | 0 | 0 | 0 | 0 | 159 |
21 | 0 | 2.018375 | 3.218876 | male | 1 | 6 | 1 | 0 | 7 | 0 | 0 | 121 |
22 | 1 | 1.886142 | 2.639057 | female | 0 | 2 | 1 | 0 | 3 | 0 | 0 | 56 |
23 | 10 | 2.953125 | 1.098612 | female | 0 | 2 | 2 | 0 | 4 | 0 | 0 | 34 |
24 | 0 | 2.378481 | 3.526361 | male | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 127 |
25 | 0 | 1.574376 | 3.555348 | male | 1 | 2 | 0 | 0 | 0 | 0 | 0 | 135 |
26 | 3 | 2.495514 | 3.637586 | female | 1 | 3 | 1 | 0 | 5 | 0 | 0 | 147 |
27 | 1 | 2.318077 | 3.7612 | male | 1 | 4 | 1 | 0 | 5 | 0 | 0 | 94 |
28 | 0 | 2.029045 | 4.248495 | female | 1 | 3 | 1 | 0 | 30 | 0 | 0 | 125 |
29 | 1 | 1.788754 | 3.610918 | female | 1 | 3 | 3 | 0 | 3 | 0 | 0 | 79 |
30 | 0 | 2.091107 | 2.079442 | female | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 143 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
days_ill = vn[:Illdays]
27765-element DataArray{Int32,1}: 7 4 0 3 10 0 0 0 4 7 1 5 3 ⋮ 3 10 0 1 0 0 30 3 0 0 20 7
delete!(vn,:Illdays)
Pharvis | LnhhExp | Age | Sex | Married | Educ | Illness | Injury | Actdays | Insurance | Commune | |
---|---|---|---|---|---|---|---|---|---|---|---|
1 | 0 | 2.730363 | 3.7612 | male | 1 | 2 | 1 | 0 | 0 | 0 | 192 |
2 | 0 | 2.737248 | 2.944439 | female | 0 | 0 | 1 | 0 | 0 | 0 | 167 |
3 | 0 | 2.266935 | 2.56495 | male | 0 | 4 | 0 | 0 | 0 | 1 | 76 |
4 | 1 | 2.392753 | 3.637586 | female | 1 | 3 | 1 | 0 | 0 | 1 | 123 |
5 | 1 | 3.105335 | 3.295837 | male | 1 | 3 | 1 | 0 | 0 | 0 | 148 |
6 | 0 | 3.760884 | 3.367296 | male | 1 | 9 | 0 | 0 | 0 | 1 | 20 |
7 | 0 | 3.155609 | 3.663562 | female | 1 | 2 | 0 | 0 | 0 | 1 | 40 |
8 | 0 | 3.724682 | 2.197225 | male | 0 | 5 | 0 | 0 | 0 | 1 | 57 |
9 | 2 | 2.861691 | 3.7612 | female | 1 | 2 | 2 | 0 | 0 | 0 | 49 |
10 | 3 | 2.615077 | 4.234107 | male | 1 | 0 | 1 | 0 | 0 | 0 | 170 |
11 | 1 | 2.653243 | 2.772589 | male | 0 | 4 | 1 | 0 | 0 | 0 | 40 |
12 | 1 | 2.139857 | 3.663562 | female | 1 | 2 | 2 | 0 | 0 | 0 | 127 |
13 | 2 | 2.625683 | 3.555348 | female | 1 | 3 | 1 | 0 | 0 | 0 | 106 |
14 | 1 | 2.767746 | 1.94591 | female | 0 | 5 | 2 | 0 | 0 | 0 | 168 |
15 | 0 | 2.871242 | 2.302585 | male | 0 | 0 | 0 | 0 | 0 | 0 | 185 |
16 | 2 | 1.983811 | 3.135494 | male | 1 | 4 | 3 | 0 | 0 | 0 | 41 |
17 | 0 | 1.260201 | 3.091043 | male | 0 | 4 | 0 | 0 | 0 | 0 | 106 |
18 | 0 | 2.298178 | 3.401197 | male | 1 | 2 | 0 | 0 | 0 | 0 | 141 |
19 | 3 | 2.132508 | 3.332205 | male | 1 | 4 | 3 | 0 | 0 | 0 | 61 |
20 | 0 | 2.000231 | 3.258096 | male | 1 | 3 | 0 | 0 | 0 | 0 | 159 |
21 | 0 | 2.018375 | 3.218876 | male | 1 | 6 | 1 | 0 | 0 | 0 | 121 |
22 | 1 | 1.886142 | 2.639057 | female | 0 | 2 | 1 | 0 | 0 | 0 | 56 |
23 | 10 | 2.953125 | 1.098612 | female | 0 | 2 | 2 | 0 | 0 | 0 | 34 |
24 | 0 | 2.378481 | 3.526361 | male | 1 | 0 | 0 | 0 | 0 | 0 | 127 |
25 | 0 | 1.574376 | 3.555348 | male | 1 | 2 | 0 | 0 | 0 | 0 | 135 |
26 | 3 | 2.495514 | 3.637586 | female | 1 | 3 | 1 | 0 | 0 | 0 | 147 |
27 | 1 | 2.318077 | 3.7612 | male | 1 | 4 | 1 | 0 | 0 | 0 | 94 |
28 | 0 | 2.029045 | 4.248495 | female | 1 | 3 | 1 | 0 | 0 | 0 | 125 |
29 | 1 | 1.788754 | 3.610918 | female | 1 | 3 | 3 | 0 | 0 | 0 | 79 |
30 | 0 | 2.091107 | 2.079442 | female | 0 | 0 | 0 | 0 | 0 | 0 | 143 |
⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
# Pkg.add("DecisionTree")
using DecisionTree
signals = convert(Array,days_ill)
features = convert(Array,vn);
methods(build_forest)
signals _array of the signal we want to calculate
features the corresponding feature array that indicates those signals
festures used the number features for the each split or branch of the tree
number of trees trees in the forrest, larger takes longer, but could be more accurate.
sampling rate number lowered from 1.0 to favor minority signals
model = build_forest(signals,features,int(sqrt(length(features[1,:]))),10,.9)
Ensemble of Decision Trees Trees: 10 Avg Leaves: 3674.7 Avg Depth: 24.3
predictions = apply_forest(model,features)
27765-element Array{Any,1}: 7 3 0 3 10 0 0 0 4 7 2 5 3 ⋮ 3 10 0 1 0 0 30 3 0 0 20 7
confusion_matrix(signals,predictions)
32x32 Array{Int64,2}: 16566 0 5 10 1 1 0 … 0 0 0 0 0 0 0 0 1 0 1 290 68 68 16 5 0 0 0 0 0 0 0 0 0 0 0 1 4 1383 114 41 7 0 0 0 0 1 0 0 0 0 0 0 0 2 87 1856 45 7 2 0 0 0 0 0 0 1 0 0 0 1 1 32 95 1129 4 1 0 0 0 1 0 0 1 0 0 0 1 0 56 143 46 841 1 … 0 0 0 0 0 0 1 0 0 0 1 0 23 55 26 8 287 0 0 0 0 0 0 2 0 0 0 1 0 24 66 16 8 0 0 0 0 0 0 0 0 0 0 0 0 0 4 16 12 3 1 0 0 0 0 0 0 0 0 0 0 0 0 1 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 40 18 5 1 … 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 2 4 6 1 0 0 0 0 0 0 0 1 0 0 0 ⋮ ⋮ ⋱ ⋮ ⋮ 1 3 7 23 13 6 0 … 0 0 0 1 0 0 3 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 0 0 0 0 0 1 2 0 … 0 0 0 56 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0 0 0 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 0 0 0 0 0 0 13 22 4 3 0 0 0 0 0 0 0 274 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 7 18 6 4 1 … 0 0 0 0 0 0 0 0 153 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0
Classes: {0,1,2,3,4,5,6,7,8,9 … 22,23,24,25,26,27,28,29,30,60} Matrix: Accuracy: 0.9280388978930308 Kappa: 0.8846820611436211