#!/usr/bin/env python # coding: utf-8 # In[ ]: data_root = 'data/drums/' # In[ ]: get_ipython().run_line_magic('matplotlib', 'inline') from matplotlib import pyplot as plt import numpy as np import pickle import json from os.path import join from utils import * from keras.datasets import cifar10 from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential, Model from keras.layers import Input from keras.layers.core import Dense, Reshape, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution2D, MaxPooling2D from keras.layers.normalization import BatchNormalization from keras.optimizers import SGD from keras.utils import np_utils # In[ ]: get_ipython().run_line_magic('time', "fingerprints = np.load(join(data_root, 'fingerprints.npy'))") img_rows, img_cols = fingerprints.shape[1:] print fingerprints.shape # In[ ]: with open(join(data_root, 'labels_to_samples.pkl'), 'rb') as f: labels_to_samples = pickle.load(f) with open(join(data_root, 'samples_to_labels.pkl'), 'rb') as f: samples_to_labels = pickle.load(f) synsets = json.load(open(join(data_root, 'synsets.json'))) nb_classes = len(synsets) # In[ ]: # take a sample of the data with even chunks for each class # the total number of samples can be larger than len(data) because some samples may be included under multiple labels # and the total can be less than limit_per_class * nb_classes because some classes might have less data def get_data(data, labels_to_samples, samples_to_labels, limit_per_class=100): X_train = [] labels_train = [] for samples in labels_to_samples: np.random.shuffle(samples) for i in samples[:limit_per_class]: X_train.append(data[i]) labels_train.append(samples_to_labels[i]) X_train = np.asarray(X_train) nb_train = len(X_train) nb_classes = len(labels_to_samples) y_train = np.zeros((nb_train, nb_classes), dtype=np.float32) for i, w in enumerate(labels_train): y_train[i, w] = 1. return X_train, y_train # In[ ]: X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels) print(X_train.dtype, X_train.shape, X_train.min(), X_train.max()) print(y_train.dtype, y_train.shape) # In[ ]: inputs = Input(shape=(img_rows, img_cols)) x = Reshape((1, img_rows, img_cols))(inputs) x = Convolution2D(32, 3, 3, border_mode='same', activation='relu')(x) x = Convolution2D(32, 3, 3, activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = BatchNormalization()(x) x = Dropout(0.25)(x) x = Convolution2D(64, 3, 3, border_mode='same', activation='relu')(x) x = Convolution2D(64, 3, 3, activation='relu')(x) x = MaxPooling2D(pool_size=(2, 2))(x) x = BatchNormalization()(x) x = Dropout(0.25)(x) x = Flatten()(x) encoded = Dense(512, activation='relu')(x) x = BatchNormalization()(encoded) x = Dropout(0.5)(x) x = Dense(nb_classes, activation='softmax')(x) classifier = Model(input=inputs, output=x) classifier.compile(optimizer='rmsprop', loss='categorical_crossentropy') print(classifier.summary()) # In[ ]: from keras import backend as K batch_size = 32 nb_epoch = 10 nb_slices = 48 lr = 0.001 decay = 0.99 for d in range(nb_slices): X_train, y_train = get_data(fingerprints, labels_to_samples, samples_to_labels) print('Learning rate', lr) K.set_value(classifier.optimizer.lr, lr) classifier.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, shuffle=True) lr *= decay # In[ ]: # save the network, it can be used later to predict labels for new sounds open(join(data_root, 'classifier.json'), 'w').write(classifier.to_json()) classifier.save_weights(join(data_root, 'classifier.h5')) # In[ ]: # make label predictions predicted_labels = classifier.predict(fingerprints) np.save(join(data_root, 'predicted_labels.npy'), predicted_labels) # In[ ]: # make encoding predictions encoder = Model(input=inputs, output=encoded) predicted_encoding = encoder.predict(fingerprints) np.save(join(data_root, 'predicted_encoding.npy'), predicted_encoding) # In[ ]: # show some examples of predicted encodings plt.figure(figsize=(30,2)) plt.plot(predicted_encoding[:3].T) plt.show() # In[ ]: # show some examples of predicted labels # red lines indicate the real labels indices = np.arange(nb_classes) np.random.shuffle(indices) for i in indices[:10]: cur = fingerprints[i].reshape(1,img_rows,img_cols) cl = classifier.predict(cur, verbose=0) plt.figure(figsize=(30,2)) for j in samples_to_labels[i]: plt.axvline(j+.5,c='red') print ', '.join(synsets[j]) plt.bar(np.arange(nb_classes), classifier.predict(cur, verbose=0)[0]) plt.xlim([0,nb_classes]) plt.show() # In[ ]: # show at a mosaic of fingerprints for given synsets for label in [1,5,10]: samples = labels_to_samples[label] total = len(samples) print ', '.join(synsets[label]), total if total > 0: show_array(255 * make_mosaic(fingerprints[samples]))