from keras.preprocessing import sequence
from keras.models import *
from keras.layers import *
from keras.callbacks import *
from keras.datasets import imdb
from keras.engine import Layer, InputSpec
import tensorflow as tf
class KMaxPooling(Layer):
"""
K-max pooling layer that extracts the k-highest activations from a sequence (2nd dimension).
TensorFlow backend.
"""
def __init__(self, k=1, **kwargs):
super().__init__(**kwargs)
self.input_spec = InputSpec(ndim=3)
self.k = k
def compute_output_shape(self, input_shape):
return (input_shape[0], (input_shape[1] * self.k))
def call(self, inputs):
# swap last two dimensions since top_k will be applied along the last dimension
#shifted_input = tf.transpose(inputs, [0, 2, 1])
# extract top_k, returns two tensors [values, indices]
top_k = tf.nn.top_k(inputs, k=self.k, sorted=True, name=None)[0]
# return flattened output
return top_k
Using TensorFlow backend.
imdb
dataset in Kerasnum_words = 5000
max_len = 300
embedding_dim = 50
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=num_words)
X_train = sequence.pad_sequences(X_train, maxlen=max_len)
X_test = sequence.pad_sequences(X_test, maxlen=max_len)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)
(25000, 300) (25000, 300) (25000,) (25000,)
def basic_dynamic_cnn(k = 5):
model = Sequential()
# Embedding each word
model.add(Embedding(num_words, embedding_dim, input_length = max_len))
# Wide convolution
model.add(ZeroPadding1D(29))
model.add(Conv1D(embedding_dim, 30, activation = 'relu'))
# k-max pooling
model.add(Permute((2, 1)))
model.add(KMaxPooling(k))
model.add(Reshape((k, -1)))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
basic_dynamic_cnn = basic_dynamic_cnn()
basic_dynamic_cnn.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= embedding_13 (Embedding) (None, 300, 50) 250000 _________________________________________________________________ zero_padding1d_13 (ZeroPaddi (None, 358, 50) 0 _________________________________________________________________ conv1d_12 (Conv1D) (None, 329, 50) 75050 _________________________________________________________________ permute_7 (Permute) (None, 50, 329) 0 _________________________________________________________________ k_max_pooling_9 (KMaxPooling (None, 250) 0 _________________________________________________________________ reshape_7 (Reshape) (None, 5, 50) 0 _________________________________________________________________ flatten_5 (Flatten) (None, 250) 0 _________________________________________________________________ dense_5 (Dense) (None, 1) 251 ================================================================= Total params: 325,301 Trainable params: 325,301 Non-trainable params: 0 _________________________________________________________________
callbacks = [ModelCheckpoint(filepath = 'best_model.hdf5', monitor='val_acc', verbose=1, save_best_only = True, mode='max')]
history = basic_dynamic_cnn.fit(X_train, y_train, callbacks = callbacks, epochs = 10, validation_split = 0.2, batch_size = 200)
basic_dynamic_cnn_best_model = basic_dynamic_cnn()
basic_dynamic_cnn_best_model.load_weights('best_model.hdf5')
basic_dynamic_cnn_best_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
results = basic_dynamic_cnn_best_model.evaluate(X_test, y_test)
print('Test accuracy: ', results[1])
24832/25000 [============================>.] - ETA: 0sTest accuracy: 0.87084
def basic_dynamic_cnn(k = 5):
model = Sequential()
# Embedding each word
model.add(Embedding(num_words, embedding_dim, input_length = max_len))
# Wide convolution
model.add(ZeroPadding1D(29))
model.add(Conv1D(embedding_dim, 30, activation = 'relu'))
# k-max pooling
model.add(Permute((2, 1)))
model.add(KMaxPooling(k))
model.add(Reshape((k, -1)))
model.add(Flatten())
model.add(Dense(1, activation = 'sigmoid'))
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
# two kinds of k's and kernel sizes for each operation
def two_conv_dynamic_cnn(k1 = 20, k2 = 10, ksize1 = 20, ksize = 30):
inputs = Input(shape = (X_train.shape[-1],))
embed = Embedding(num_words, embedding_dim, input_length = max_len)(inputs)
padded = ZeroPadding1D(ksize1 - 1)(embed)
conv1 = Conv1D(embedding_dim, ksize1, activation = 'relu')(padded)
permuted = Permute((2,1))(conv1)
kmaxpool1 = KMaxPooling(k1)(permuted)
kmaxpool1 = Reshape((k1, -1))(kmaxpool1)
padded = ZeroPadding1D(ksize2 -1)(kmaxpool1)
conv2 = Conv1D(embedding_dim, ksize2, activation = 'relu')(padded)
permuted = Permute((2,1))(conv2)
kmaxpool2 = KMaxPooling(k2)(permuted)
kmaxpool2 = Reshape((k2, -1))(kmaxpool2)
flattened = Flatten()(kmaxpool2)
outputs = Dense(1, activation = 'sigmoid')(flattened)
model = Model(inputs = inputs, outputs = outputs)
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
two_conv_dynamic_cnn = two_conv_dynamic_cnn()
two_conv_dynamic_cnn.summary()
_________________________________________________________________ Layer (type) Output Shape Param # ================================================================= input_13 (InputLayer) (None, 300) 0 _________________________________________________________________ embedding_28 (Embedding) (None, 300, 50) 250000 _________________________________________________________________ zero_padding1d_30 (ZeroPaddi (None, 338, 50) 0 _________________________________________________________________ conv1d_29 (Conv1D) (None, 319, 50) 50050 _________________________________________________________________ permute_21 (Permute) (None, 50, 319) 0 _________________________________________________________________ k_max_pooling_23 (KMaxPoolin (None, 1000) 0 _________________________________________________________________ reshape_21 (Reshape) (None, 20, 50) 0 _________________________________________________________________ zero_padding1d_31 (ZeroPaddi (None, 78, 50) 0 _________________________________________________________________ conv1d_30 (Conv1D) (None, 49, 50) 75050 _________________________________________________________________ permute_22 (Permute) (None, 50, 49) 0 _________________________________________________________________ k_max_pooling_24 (KMaxPoolin (None, 500) 0 _________________________________________________________________ reshape_22 (Reshape) (None, 10, 50) 0 _________________________________________________________________ flatten_11 (Flatten) (None, 500) 0 _________________________________________________________________ dense_11 (Dense) (None, 1) 501 ================================================================= Total params: 375,601 Trainable params: 375,601 Non-trainable params: 0 _________________________________________________________________
callbacks = [ModelCheckpoint(filepath = 'best_model.hdf5', monitor='val_acc', verbose=1, save_best_only = True, mode='max')]
history = two_conv_dynamic_cnn.fit(X_train, y_train, callbacks = callbacks, epochs = 10, validation_split = 0.2, batch_size = 200)
two_conv_dynamic_cnn_best_model = two_conv_dynamic_cnn()
two_conv_dynamic_cnn_best_model.load_weights('best_model.hdf5')
two_conv_dynamic_cnn_best_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
results = two_conv_dynamic_cnn_best_model.evaluate(X_test, y_test)
print('Test accuracy: ', results[1])
24960/25000 [============================>.] - ETA: 0sTest accuracy: 0.8832
def two_feature_map_dynamic_cnn(k1 = 20, k2 = 10, ksize1 = 20, ksize = 30):
inputs = Input(shape = (X_train.shape[-1],))
embed = Embedding(num_words, embedding_dim, input_length = max_len)(inputs)
conv_results = []
# two feature maps using for loop
for i in range(2):
padded = ZeroPadding1D(ksize1 - 1)(embed)
conv1 = Conv1D(embedding_dim, ksize1, activation = 'relu')(padded)
permuted = Permute((2,1))(conv1)
kmaxpool1 = KMaxPooling(k1)(permuted)
kmaxpool1 = Reshape((k1, -1))(kmaxpool1)
padded = ZeroPadding1D(ksize2 -1)(kmaxpool1)
conv2 = Conv1D(embedding_dim, ksize2, activation = 'relu')(padded)
permuted = Permute((2,1))(conv2)
kmaxpool2 = KMaxPooling(k2)(permuted)
kmaxpool2 = Reshape((k2, -1))(kmaxpool2)
flattened = Flatten()(kmaxpool2)
conv_results.append(flattened)
conv_result = concatenate(conv_results)
outputs = Dense(1, activation = 'sigmoid')(conv_result)
model = Model(inputs = inputs, outputs = outputs)
model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
return model
two_feature_map_dynamic_cnn = two_feature_map_dynamic_cnn()
two_feature_map_dynamic_cnn.summary()
__________________________________________________________________________________________________ Layer (type) Output Shape Param # Connected to ================================================================================================== input_15 (InputLayer) (None, 300) 0 __________________________________________________________________________________________________ embedding_30 (Embedding) (None, 300, 50) 250000 input_15[0][0] __________________________________________________________________________________________________ zero_padding1d_34 (ZeroPadding1 (None, 338, 50) 0 embedding_30[0][0] __________________________________________________________________________________________________ zero_padding1d_36 (ZeroPadding1 (None, 338, 50) 0 embedding_30[0][0] __________________________________________________________________________________________________ conv1d_33 (Conv1D) (None, 319, 50) 50050 zero_padding1d_34[0][0] __________________________________________________________________________________________________ conv1d_35 (Conv1D) (None, 319, 50) 50050 zero_padding1d_36[0][0] __________________________________________________________________________________________________ permute_25 (Permute) (None, 50, 319) 0 conv1d_33[0][0] __________________________________________________________________________________________________ permute_27 (Permute) (None, 50, 319) 0 conv1d_35[0][0] __________________________________________________________________________________________________ k_max_pooling_27 (KMaxPooling) (None, 1000) 0 permute_25[0][0] __________________________________________________________________________________________________ k_max_pooling_29 (KMaxPooling) (None, 1000) 0 permute_27[0][0] __________________________________________________________________________________________________ reshape_25 (Reshape) (None, 20, 50) 0 k_max_pooling_27[0][0] __________________________________________________________________________________________________ reshape_27 (Reshape) (None, 20, 50) 0 k_max_pooling_29[0][0] __________________________________________________________________________________________________ zero_padding1d_35 (ZeroPadding1 (None, 78, 50) 0 reshape_25[0][0] __________________________________________________________________________________________________ zero_padding1d_37 (ZeroPadding1 (None, 78, 50) 0 reshape_27[0][0] __________________________________________________________________________________________________ conv1d_34 (Conv1D) (None, 49, 50) 75050 zero_padding1d_35[0][0] __________________________________________________________________________________________________ conv1d_36 (Conv1D) (None, 49, 50) 75050 zero_padding1d_37[0][0] __________________________________________________________________________________________________ permute_26 (Permute) (None, 50, 49) 0 conv1d_34[0][0] __________________________________________________________________________________________________ permute_28 (Permute) (None, 50, 49) 0 conv1d_36[0][0] __________________________________________________________________________________________________ k_max_pooling_28 (KMaxPooling) (None, 500) 0 permute_26[0][0] __________________________________________________________________________________________________ k_max_pooling_30 (KMaxPooling) (None, 500) 0 permute_28[0][0] __________________________________________________________________________________________________ reshape_26 (Reshape) (None, 10, 50) 0 k_max_pooling_28[0][0] __________________________________________________________________________________________________ reshape_28 (Reshape) (None, 10, 50) 0 k_max_pooling_30[0][0] __________________________________________________________________________________________________ flatten_13 (Flatten) (None, 500) 0 reshape_26[0][0] __________________________________________________________________________________________________ flatten_14 (Flatten) (None, 500) 0 reshape_28[0][0] __________________________________________________________________________________________________ concatenate_1 (Concatenate) (None, 1000) 0 flatten_13[0][0] flatten_14[0][0] __________________________________________________________________________________________________ dense_13 (Dense) (None, 1) 1001 concatenate_1[0][0] ================================================================================================== Total params: 501,201 Trainable params: 501,201 Non-trainable params: 0 __________________________________________________________________________________________________
callbacks = [ModelCheckpoint(filepath = 'best_model.hdf5', monitor='val_acc', verbose=1, save_best_only = True, mode='max')]
history = two_feature_map_dynamic_cnn.fit(X_train, y_train, callbacks = callbacks, epochs = 10, validation_split = 0.2, batch_size = 200)
two_feature_map_dynamic_cnn_best_model = two_feature_map_dynamic_cnn()
two_feature_map_dynamic_cnn_best_model.load_weights('best_model.hdf5')
two_feature_map_dynamic_cnn_best_model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
results = two_feature_map_dynamic_cnn_best_model.evaluate(X_test, y_test)
print('Test accuracy: ', results[1])
24960/25000 [============================>.] - ETA: 0sTest accuracy: 0.88092