#!/usr/bin/env python # coding: utf-8 # In[5]: from keras.preprocessing import sequence from keras.utils import np_utils from keras.models import Sequential from keras.layers.core import Dense, Dropout, Activation from keras.layers.embeddings import Embedding from keras.layers.recurrent import LSTM from keras.datasets import imdb # # IMDB Movie reviews sentiment classification - Keras (Backend: TensorFlow) # ## IMDB Movie reviews Data # # Kerasの提供するデータセットを利用: # http://keras.io/datasets/#imdb-movie-reviews-sentiment-classification # In[6]: max_features = 20000 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2) print 'train sequences: {0}'.format(len(X_train)) print 'test sequences: {0}'.format(len(X_test)) # In[8]: # データの中身 # 各レビューはWord Indexのシーケンスとして符号化されている # Indexは頻度に基づき符号化されているため,例えば3番めに頻度の高いWordにはIndex「3」が付与される X_train[0][:10] # In[10]: # テキストは最長100におさめる maxlen = 100 X_train = sequence.pad_sequences(X_train, maxlen=maxlen) X_test = sequence.pad_sequences(X_test, maxlen=maxlen) print 'X_train shape: {0}'.format(X_train.shape) print 'X_test shape: {0}'.format(X_test.shape) # ## Implementing # In[11]: model = Sequential() model.add(Embedding(max_features, 128, input_length=maxlen)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1)) model.add(Activation('sigmoid')) # ## Training # In[12]: model.compile(loss='binary_crossentropy', optimizer='adam', class_mode="binary") # In[13]: batch_size = 32 model.fit(X_train, y_train, batch_size=32, nb_epoch=3, validation_data=(X_test, y_test), show_accuracy=True) # ## Evaluating # In[14]: print(model.evaluate(X_test, y_test, batch_size=32, show_accuracy=True)) #