$\newcommand{\Reals}{\mathbb{R}} \newcommand{\Nats}{\mathbb{N}} \newcommand{\PDK}{{k}} \newcommand{\IS}{\mathcal{X}} \newcommand{\FM}{\Phi} \newcommand{\Gram}{G} \newcommand{\RKHS}{\mathcal{H}} \newcommand{\prodDot}[2]{\left\langle#1,#2\right\rangle} \DeclareMathOperator*{\argmin}{arg\,min} \DeclareMathOperator*{\argmax}{arg\,max}$
### FIRST SOME CODE ####
from __future__ import division, print_function, absolute_import
from IPython.display import SVG, display, Image, HTML
import numpy as np, scipy as sp, pylab as pl, matplotlib.pyplot as plt, scipy.stats as stats, sklearn, sklearn.datasets
from scipy.spatial.distance import squareform, pdist, cdist
import rkhs_operators as ro
import distributions as dist #commit 480cf98 of https://github.com/ingmarschuster/distributions
pl.style.use(u'seaborn-talk')
display(Image(filename="GP_uq.png", width=630)) #source: http://scikit-learn.org/0.17/modules/gaussian_process.html
display(Image(filename="SVM.png", width=700)) #source: https://en.wikipedia.org/wiki/Support_vector_machine
display(Image(filename="monomials_small.jpg", width=800)) #source: Berhard Schölkopf
and label $l_i = 0$ if $x_i$ generated by $p_0$, $l_i = 1$ otherwise
figkw = {"figsize":(4,4), "dpi":150}
np.random.seed(5)
samps_per_distr = 20
data = np.vstack([stats.multivariate_normal(np.array([-2,0]), np.eye(2)*1.5).rvs(samps_per_distr),
stats.multivariate_normal(np.array([2,0]), np.eye(2)*1.5).rvs(samps_per_distr)])
distr_idx = np.r_[[0]*samps_per_distr, [1]*samps_per_distr]
f = pl.figure(**figkw);
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
plt.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha = 0.4)
pl.show()
(remember in $\Reals^2$ canonically: $\prodDot{a}{b} = a_1 b_1+a_2 b_2 $)
pl.figure(**figkw)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha=0.2)
pl.arrow(0, 0, *data[distr_idx==idx,:].mean(0), head_width=0.3, width=0.05, head_length=0.3, fc=c, ec=c)
pl.title(r"Mean embeddings for $\Phi(x)=x$");
pl.figure(**figkw)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha=0.2)
pl.arrow(0, 0, *data[distr_idx==idx,:].mean(0), head_width=0.3, width=0.05, head_length=0.3, fc=c, ec=c)
pl.title(r"Mean embeddings for $\Phi(x)=x$");
pl.scatter(np.ones(1), np.ones(1), c='k', marker='D', alpha=0.8);
# Some plotting code
def apply_to_mg(func, *mg):
#apply a function to points on a meshgrid
x = np.vstack([e.flat for e in mg]).T
return np.array([func(i.reshape((1,2))) for i in x]).reshape(mg[0].shape)
def plot_with_contour(samps, data_idx, cont_func, method_name = None, delta = 0.025, pl = pl, colormesh_cmap = pl.cm.Pastel2, contour_classif = True):
x = np.arange(samps.T[0].min()-delta, samps.T[1].max()+delta, delta)
y = np.arange(samps.T[1].min()-delta, samps.T[1].max()+delta, delta)
X, Y = np.meshgrid(x, y)
Z = apply_to_mg(cont_func, X,Y)
Z = Z.reshape(X.shape)
fig = pl.figure(**figkw)
if colormesh_cmap is not None:
bound = np.abs(Z).max()
pl.pcolor(X, Y, Z , cmap=colormesh_cmap, alpha=0.5, edgecolors=None, vmin=-bound, vmax=bound)
if contour_classif is True:
c = pl.contour(X, Y, Z, colors=['k', ],
alpha = 0.5,
linestyles=[ '--'],
levels=[0],
linewidths=0.7)
else:
pl.contour(X, Y, Z, linewidths=0.7)
if method_name is not None:
pl.title(method_name)
for (idx, c, marker) in [(0,'r', (0,3,0)), (1, "b", "x")]:
pl.scatter(*data[distr_idx==idx,:].T, c=c, marker=marker, alpha = 0.4)
pl.show()
pl.close()
est_dens_1 = dist.mixt(2, [dist.mvnorm(x, np.eye(2)*0.1) for x in data[:4]], [1./4]*4)
plot_with_contour(data, distr_idx,
lambda x: exp(est_dens_1.logpdf(x)),
colormesh_cmap=None, contour_classif=False)
est_dens_1 = dist.mixt(2, [dist.mvnorm(x, np.eye(2)*0.1,10) for x in data[:samps_per_distr]], [1./samps_per_distr]*samps_per_distr)
plot_with_contour(data, distr_idx,
lambda x: exp(est_dens_1.logpdf(x)),
colormesh_cmap=None, contour_classif=False)
are symmetric positive semidefinite, then $\PDK$ is a psd kernel
* can measure angles in the new space $$\prodDot{g}{f}_\RKHS = \cos(\angle[g,f])~\|g\|_\RKHS ~\|f\|_\RKHS$$
Plan
pick $\FM(x) = \PDK(\cdot, x)$
let linear combinations of features also be in $\RKHS$ $$f(\cdot)=\sum_{i=1}^m a_i \PDK(\cdot, x_i) \in \RKHS$$ for $a_i \in \Reals$
$\RKHS$ a vector space over $\Reals$ : if $f(\cdot)$ and $g(\cdot)$ functions from $\IS$ to $\Reals$, so are $a~f(\cdot)$ for $a \in \Reals, f(\cdot)+g(\cdot)$
$$\widehat{p}_0 = \frac{1}{N_0} \sum_{l_i = 0} \mathcal{N}(\cdot; x_i,\Sigma) ~~~~~~~~ \mu_{0} = \frac{1}{N_0} \sum_{l_i = 0} \PDK(\cdot, x_i)$$
if $\PDK$ is Gaussian density with covariance $\Sigma$
Lets look at example classification output
class KMEclassification(object):
def __init__(self, samps1, samps2, kernel):
self.de1 = ro.RKHSDensityEstimator(samps1, kernel, 0.1)
self.de2 = ro.RKHSDensityEstimator(samps2, kernel, 0.1)
def classification_score(self, test):
return (self.de1.eval_kme(test) - self.de2.eval_kme(test))
data, distr_idx = sklearn.datasets.make_circles(n_samples=400, factor=.3, noise=.05)
kc = KMEclassification(data[distr_idx==0,:], data[distr_idx==1,:], ro.LinearKernel())
plot_with_contour(data, distr_idx, kc.classification_score, 'Inner Product classif. '+"Linear", pl = plt, contour_classif = True, colormesh_cmap = pl.cm.bwr)
kc = KMEclassification(data[distr_idx==0,:], data[distr_idx==1,:], ro.GaussianKernel(0.3))
plot_with_contour(data, distr_idx, kc.classification_score, 'Inner Product classif. '+"Gaussian", pl = plt, contour_classif = True, colormesh_cmap = pl.cm.bwr)
called the maximum mean discrepancy (MMD)
An example
out_samps = data[distr_idx==0,:1] + 1
inp_samps = data[distr_idx==0,1:] + 1
def plot_mean_embedding(cme, inp_samps, out_samps, p1 = 0., p2 = 1., offset = 0.5):
x = np.linspace(inp_samps.min()-offset,inp_samps.max()+offset,200)
fig = pl.figure(figsize=(10, 5))
ax = [pl.subplot2grid((2, 2), (0, 1)),
pl.subplot2grid((2, 2), (0, 0), rowspan=2),
pl.subplot2grid((2, 2), (1, 1))]
ax[1].scatter(out_samps, inp_samps, alpha=0.3, color = 'r')
ax[1].set_xlabel('Output')
ax[1].set_ylabel('Input')
ax[1].axhline(p1, 0, 8, color='g', linestyle='--')
ax[1].axhline(p2, 0, 8, color='b', linestyle='--')
ax[1].set_title("%d input-output pairs"%len(out_samps))
ax[1].set_yticks((p1, p2))
e = cme.lhood(np.array([[p1], [p2]]), x[:, None]).T
#ax[0].plot(x, d[0], '-', label='cond. density')
ax[2].plot(x, e[0], 'g--', label='cond. mean emb.')
ax[2].set_title(r"p(outp | inp=%.1f)"%p1)
ax[0].plot(x, e[1], 'b--', label='cond. mean emb.')
ax[0].set_title(r"p(outp | inp=%.1f)"%p2)
#ax[2].legend(loc='best')
fig.tight_layout()
cme = ro.ConditionMeanEmbedding(inp_samps, out_samps, ro.GaussianKernel(0.3), ro.GaussianKernel(0.3), 5)
plot_mean_embedding(cme, inp_samps, out_samps, 0.3, 2.,)
HTML('<iframe width="560" height="315" src="https://www.youtube.com/embed/MpzaCCbX-z4?rel=0&showinfo=0&start=148" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>')
display(Image(filename="Pendulum_eigenfunctions.png", width=700))
display(Image(filename="KeywordClustering.png", width=700))
Get slides at ingmarschuster.com