In [1]:

import random
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.naive_bayes import GaussianNB

In [2]:

iris = datasets.load_iris()
X, y = iris.data, iris.target
print(X.shape, y.shape)

(150, 4) (150,)

In [3]:

def simulate(X, y, n=100):
    X_new, y_new = [], []
    Xmax = np.max(X, axis=0)
    Xmin = np.min(X, axis=0)
    for t in range(n):
        tmp = []
        for i in range(X.shape[1]):
            rg = np.linspace(Xmin[i], Xmax[i], 30)
            choice = round(np.random.choice(rg), 1)
            tmp.append(choice)
        X_new.append(tmp)
        y_new.append(random.choice(list(set(y))))
    return np.array(X_new), np.array(y_new) 

In [4]:

X_new, y_new = simulate(X, y, 1000)

In [5]:

X_all = np.concatenate((X, X_new), axis=0)
y_all = np.concatenate((y, y_new), axis=0)
print(X_all.shape, y_all.shape)

(1150, 4) (1150,)

In [6]:

X_train, X_test, y_train, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=42)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

Out[6]:

((920, 4), (920,), (230, 4), (230,))

In [7]:

m1 = svm.SVC(gamma='auto')
m2 = GaussianNB()
m1.fit(X_train, y_train);
m2.fit(X_train, y_train);
m1.score(X_test, y_test), m2.score(X_test, y_test)

Out[7]:

(0.3869565217391304, 0.30434782608695654)

In [8]:

def boostrap(test, m1, m2, b=10, n=10, delta=0.05):
    ds = []
    X_test, y_test = test
    for i in range(b):
        choices = random.choices(range(X_test.shape[0]), k=n)
        _X_test = X_test[choices, :]
        _y_test = y_test[choices]
        dm1 = m1.score(_X_test, _y_test)
        dm2 = m2.score(_X_test, _y_test)
        d = dm1 - dm2
        ds.append(d)
    p = sum([_ for _ in ds if _ > 2*delta])/b
    return p

In [9]:

boostrap((X_test, y_test), m1, m2, 30, 10, 0.025)
# 不能拒绝原假设：m1 在双边置信度 0.05 下不比 m2 好，所以 m1 不比 m2 好

Out[9]:

0.1166666666666667

In [10]:

boostrap((X_test, y_test), m1, m2, 30, 10, 0.05)
# 拒绝原假设：m1 在双边置信度 0.1 下不比 m2 好，所以 m1 比 m2 好

Out[10]:

0.08666666666666666

In [ ]: