from IPython.parallel import Client
client = Client()
dv = client[:]
lb_view = client.load_balanced_view()
print len(dv)
4
%%px
import mahotas as mh
import numpy as np
from os import path
import os
from os import path
import mahotas as mh
import pylab as pl
import pandas as pd
import cPickle
import mlpy
from sklearn.utils import shuffle
from PIL import Image
%pylab inline --no-import-all
Populating the interactive namespace from numpy and matplotlib
## write images to pickle
image_dir = path.abspath('../../tutorials/data/cifar10/train/')
img_files = [path.join(image_dir, img) for img in os.listdir(image_dir)]
def load_img(img_path):
## USE gray images alone
#img = mh.colors.rgb2gray(mh.imread(img_path))
#img = mh.imread(img_path)
img = np.asarray(Image.open(img_path))
img_id = path.splitext(path.split(img_path)[1])[0]
return (int(img_id), img)
## images = lb_view.map(load_img, img_files, block=True)
images = map(load_img, img_files)
images = np.asarray([img for (i, img) in sorted(images, key = lambda (i, img): i)])
labels = np.asarray(pd.read_csv('../../tutorials/data/cifar10/trainLabels.csv').label)
print labels.shape
print images.shape
(50000,) (50000, 32, 32, 3)
## a subset of images
subset_sz = 5000
images, labels = shuffle(images, labels)
images = images[:subset_sz]
labels = labels[:subset_sz]
print images.shape
print labels.shape
(5000, 32, 32, 3) (5000,)
def dtw_gray_dist(imga, imgb):
import mlpy
return mlpy.dtw_std(mh.colors.rgb2gray(imga).ravel(),
mh.colors.rgb2gray(imgb).ravel())
def dtw_rgb_dist(imga, imgb):
import mlpy
import numpy as np
xa = np.concatenate([imga[:,:,i].ravel() for i in xrange(3)])
xb = np.concatenate([imgb[:,:,i].ravel() for i in xrange(3)])
return mlpy.dtw_std(xa, xb)
def dtw_3_dist(imga, imgb):
## method used in the original chapter, doesnt feel right though
## WHY index 0 in column?
xa = np.concatenate([imga[:, 10, i] for i in xrange(3)])
xb = np.concatenate([imgb[:, 10, i] for i in xrange(3)])
return mlpy.dtw_std(xa, xb)
def dtw_r_dist(imga, imgb):
## red only
xa = imga[:, :, 0].ravel()
xb = imgb[:, :, 0].ravel()
return mlpy.dtw_std(xa, xb)
iref = 1
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_gray_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x110f0c9d0>
iref = 100
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_gray_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x1111c7290>
iref = 1
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_rgb_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x1111be690>
iref = 100
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_rgb_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x1112ab810>
iref = 1
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_3_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x11125c5d0>
iref = 100
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_3_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x1110c2950>
iref = 1
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_r_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x111602710>
iref = 100
ref_img = images[iref]
ref_dtw_dists = map(lambda (i, img): (i, dtw_r_dist(ref_img, img)),
enumerate(images, 0))
ref_dtw_dists = sorted(ref_dtw_dists, key = lambda (i, dist): dist)
pl.figure(figsize=(1, 1))
pl.imshow(ref_img, cmap = pl.cm.gray)
pl.title(labels[iref])
pl.figure()
fig, axes = pl.subplots(nrows = 3, ncols = 3, figsize = (5, 5))
axes = axes.ravel()
for (ax, (i, dist)) in zip(axes, ref_dtw_dists):
ax.imshow(images[i], cmap = pl.cm.gray)
ax.set_title("%s = %g" % (labels[i], dist))
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
<matplotlib.figure.Figure at 0x111607e50>
*** Conclusion - the DTW method is NOT that useful in CIFAR data, but it could be better than Euclidean distances. Using color channels usually gives slightly better results (but not really) than gray (probably because the gray values are floats, but DTW works better on integers??) ***
*** Due to the poor performance, I didnt go forward to test it with a customized-kernel SVM or a poor-man version of kernel approximation ***