In [1]:
import random
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
In [2]:
iris = datasets.load_iris()
X, y = iris.data, iris.target
print(X.shape, y.shape)
(150, 4) (150,)
In [3]:
def simulate(X, y, n=100):
    X_new, y_new = [], []
    Xmax = np.max(X, axis=0)
    Xmin = np.min(X, axis=0)
    for t in range(n):
        tmp = []
        for i in range(X.shape[1]):
            rg = np.linspace(Xmin[i], Xmax[i], 30)
            choice = round(np.random.choice(rg), 1)
            tmp.append(choice)
        X_new.append(tmp)
        y_new.append(random.choice(list(set(y))))
    return np.array(X_new), np.array(y_new) 
In [4]:
X_new, y_new = simulate(X, y, 1000)
In [5]:
X_all = np.concatenate((X, X_new), axis=0)
y_all = np.concatenate((y, y_new), axis=0)
print(X_all.shape, y_all.shape)
(1150, 4) (1150,)
In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)
X_train.shape, y_train.shape, X_test.shape, y_test.shape
Out[6]:
((920, 4), (920,), (230, 4), (230,))
In [7]:
m1 = svm.SVC(gamma='auto')
m2 = GaussianNB()
m1.fit(X_train, y_train);
m2.fit(X_train, y_train);
m1.score(X_test, y_test), m2.score(X_test, y_test)
Out[7]:
(0.3869565217391304, 0.30434782608695654)
In [8]:
def boostrap(test, m1, m2, b=10, n=10, delta=0.05):
    ds = []
    X_test, y_test = test
    for i in range(b):
        choices = random.choices(range(X_test.shape[0]), k=n)
        _X_test = X_test[choices, :]
        _y_test = y_test[choices]
        dm1 = m1.score(_X_test, _y_test)
        dm2 = m2.score(_X_test, _y_test)
        d = dm1 - dm2
        ds.append(d)
    p = sum([_ for _ in ds if _ > 2*delta])/b
    return p
In [9]:
boostrap((X_test, y_test), m1, m2, 30, 10, 0.025)
# 不能拒绝原假设:m1 在双边置信度 0.05 下不比 m2 好,所以 m1 不比 m2 好
Out[9]:
0.1166666666666667
In [10]:
boostrap((X_test, y_test), m1, m2, 30, 10, 0.05)
# 拒绝原假设:m1 在双边置信度 0.1 下不比 m2 好,所以 m1 比 m2 好
Out[10]:
0.08666666666666666
In [ ]: