In [1]:
import mvpa2
import pylab as pl
import numpy as np
from mvpa2.misc.data_generators import normal_feature_dataset
from mvpa2.clfs.svm import LinearCSVMC
from mvpa2.generators.partition import NFoldPartitioner
from mvpa2.measures.base import CrossValidation
from mvpa2.mappers.zscore import zscore
In [16]:
mvpa2.seed(1);
ds_noise = normal_feature_dataset(perlabel=100, nlabels=2, nfeatures=2, snr=0,
                                  nonbogus_features=[0,1])

# signal levels
sigs = [0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0]
In [22]:
clf = LinearCSVMC(C=1000, enable_ca=['training_stats'])               # high C to make margin harder
cve = CrossValidation(clf, NFoldPartitioner(), enable_ca='stats')
sana = clf.get_sensitivity_analyzer(postproc=None)

rs = []
errors, training_errors = [], []

for sig in sigs: # 0, 1, 5] : # , 10]:
    ds = ds_noise.copy()
    # introduce signal into the first feature
    ds.samples[ds.T == 'L1', 0] += sig

    error = np.mean(cve(ds))
    sa = sana(ds)
    training_error = 1-clf.ca.training_stats.stats['ACC']
    
    errors.append(error)
    training_errors.append(training_error)
    
    w = sa.samples[0]
    b = np.asscalar(sa.sa.biases)
    # width each way
    r = 1./np.linalg.norm(w)
    xmin = np.min(ds[:,0], axis=0)
    xmax = np.max(ds[:,0], axis=0)
    x = np.linspace(xmin, xmax, 20)
    y  =    -(w[0] * x - b) /w[1]
    y1 = ( 1-(w[0] * x - b))/w[1]
    y2 = (-1-(w[0] * x - b))/w[1]

    pl.figure(figsize=(10,4))

    for t,c in zip(ds.UT, ['r', 'b']):
        ds_ = ds[ds.T == t]
        pl.scatter(ds_[:, 0], ds_[:, 1], c=c)
    # draw the hyperplane
    pl.plot(x, y)
    pl.plot(x, y1, '--')
    pl.plot(x, y2, '--')
    pl.title("SIGNAL: %.2f training_error: %.2f error: %.2f |w|: %.2f r=%.2f"
             %(sig, training_error, error, np.linalg.norm(w), r))
    ca = pl.gca()
    ca.set_xlim((-2, 4))
    ca.set_ylim((-1.2, 1.2))
    pl.show()
    rs.append(r)
So what would be our dependence between signal level and width of the margin?
In [24]:
pl.figure()
pl.plot(sigs, rs, label="Margin width of %s" % clf)
pl.plot(sigs, errors, label="Gener. error")
pl.plot(sigs, training_errors, label="Training error")
pl.xlabel("SIGNAL")
pl.legend()
pl.show()
In [ ]: