In [ ]:
data_root = 'data/drums/'
In [ ]:
%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import pickle
import json
from os.path import join
from utils import *
from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Input
from keras.layers.core import Dense, Reshape, Dropout, Activation, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
from keras.optimizers import SGD
from keras.utils import np_utils
In [ ]:
%time fingerprints = np.load(join(data_root, 'fingerprints.npy'))
img_rows, img_cols = fingerprints.shape[1:]
print fingerprints.shape
In [ ]:
with open(join(data_root, 'labels_to_samples.pkl'), 'rb') as f:
    labels_to_samples = pickle.load(f)
with open(join(data_root, 'samples_to_labels.pkl'), 'rb') as f:
    samples_to_labels = pickle.load(f)
synsets = json.load(open(join(data_root, 'synsets.json')))
nb_classes = len(synsets)
In [ ]:
# take a sample of the data with even chunks for each class
# the total number of samples can be larger than len(data) because some samples may be included under multiple labels
# and the total can be less than limit_per_class * nb_classes because some classes might have less data
def get_data(data, labels_to_samples, samples_to_labels, limit_per_class=100):
    X_train = []
    labels_train = []
    for samples in labels_to_samples:
        np.random.shuffle(samples)
        for i in samples[:limit_per_class]:
            X_train.append(data[i])
            labels_train.append(samples_to_labels[i])
    X_train = np.asarray(X_train)
    nb_train = len(X_train)
    nb_classes = len(labels_to_samples)
    y_train = np.zeros((nb_train, nb_classes), dtype=np.float32)
    for i, w in enumerate(labels_train):
        y_train[i, w] = 1.
    return X_train, y_train
In [ ]:
X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels)
print(X_train.dtype, X_train.shape, X_train.min(), X_train.max())
print(y_train.dtype, y_train.shape)
In [ ]:
inputs = Input(shape=(img_rows, img_cols))
x = Reshape((1, img_rows, img_cols))(inputs)

x = Convolution2D(32, 3, 3, border_mode='same', activation='relu')(x)
x = Convolution2D(32, 3, 3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)

x = Convolution2D(64, 3, 3, border_mode='same', activation='relu')(x)
x = Convolution2D(64, 3, 3, activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)

x = Flatten()(x)
encoded = Dense(512, activation='relu')(x)
x = BatchNormalization()(encoded)
x = Dropout(0.5)(x)
x = Dense(nb_classes, activation='softmax')(x)

classifier = Model(input=inputs, output=x)
classifier.compile(optimizer='rmsprop', loss='categorical_crossentropy')
print(classifier.summary())
In [ ]:
from keras import backend as K

batch_size = 32
nb_epoch = 10
nb_slices = 48
lr = 0.001
decay = 0.99

for d in range(nb_slices):
    X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels)
    print('Learning rate', lr)
    K.set_value(classifier.optimizer.lr, lr)
    classifier.fit(X_train, y_train,
          batch_size=batch_size,
          nb_epoch=nb_epoch,
          verbose=1,
          shuffle=True)
    lr *= decay
In [ ]:
# save the network, it can be used later to predict labels for new sounds
open(join(data_root, 'classifier.json'), 'w').write(classifier.to_json())
classifier.save_weights(join(data_root, 'classifier.h5'))
In [ ]:
# make label predictions
predicted_labels = classifier.predict(fingerprints)
np.save(join(data_root, 'predicted_labels.npy'), predicted_labels)
In [ ]:
# make encoding predictions
encoder = Model(input=inputs, output=encoded)
predicted_encoding = encoder.predict(fingerprints)
np.save(join(data_root, 'predicted_encoding.npy'), predicted_encoding)
In [ ]:
# show some examples of predicted encodings
plt.figure(figsize=(30,2))
plt.plot(predicted_encoding[:3].T)
plt.show()
In [ ]:
# show some examples of predicted labels
# red lines indicate the real labels
indices = np.arange(nb_classes)
np.random.shuffle(indices)
for i in indices[:10]:
    cur = fingerprints[i].reshape(1,img_rows,img_cols)
    cl = classifier.predict(cur, verbose=0)
    plt.figure(figsize=(30,2))
    for j in samples_to_labels[i]:
        plt.axvline(j+.5,c='red')
        print ', '.join(synsets[j])
    plt.bar(np.arange(nb_classes), classifier.predict(cur, verbose=0)[0])
    plt.xlim([0,nb_classes])
    plt.show()
In [ ]:
# show at a mosaic of fingerprints for given synsets
for label in [1,5,10]:
    samples = labels_to_samples[label]
    total = len(samples)
    print ', '.join(synsets[label]), total
    if total > 0:
        show_array(255 * make_mosaic(fingerprints[samples]))