!wget https://ceb.nlm.nih.gov/proj/malaria/cell_images.zip
--2019-04-24 17:33:58-- https://ceb.nlm.nih.gov/proj/malaria/cell_images.zip Resolving ceb.nlm.nih.gov (ceb.nlm.nih.gov)... 130.14.52.15, 2607:f220:41e:7052::15 Connecting to ceb.nlm.nih.gov (ceb.nlm.nih.gov)|130.14.52.15|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 353452851 (337M) [application/zip] Saving to: ‘cell_images.zip’ cell_images.zip 100%[===================>] 337.08M 71.4MB/s in 4.5s 2019-04-24 17:34:03 (75.1 MB/s) - ‘cell_images.zip’ saved [353452851/353452851]
!unzip -q cell_images.zip
mv cell_images training
!mkdir -p validation/Parasitized && mkdir -p validation/Uninfected
import numpy as np
import keras
from keras import backend as K
from keras.optimizers import Adam, SGD
import random
from keras.layers import Flatten, Dropout, BatchNormalization, Reshape, GlobalAveragePooling2D
from keras.layers import Dense, Dropout, Input
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
import pandas as pd
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
import os
import shutil
import matplotlib.image as mpimg
%matplotlib inline
Using TensorFlow backend.
total_images = 27558
validation_size = 0.20
training_parasitized_folder_name = "training/Parasitized"
training_uninfected_folder_name = "training/Uninfected"
validation_parasitized_folder_name = "validation/Parasitized"
validation_uninfected_folder_name = "validation/Uninfected"
parasitized_folder = os.listdir("training/Parasitized")
uninfected_folder = os.listdir("training/Uninfected")
validation_folder_size = int(len(parasitized_folder) * 0.20)
for image_name in parasitized_folder[:validation_folder_size]:
image_url = f'{training_parasitized_folder_name}/{image_name}'
shutil.move(image_url, validation_parasitized_folder_name)
for image_name in uninfected_folder[:validation_folder_size]:
image_url = f'{training_uninfected_folder_name}/{image_name}'
shutil.move(image_url, validation_uninfected_folder_name)
def plot_images(image_index, folder_name, image_name, title):
sp = figure.add_subplot(2, 3, image_index + 1)
sp.axis('Off')
image_path = f'{folder_name}/{image_name}'
image_loaded = img=mpimg.imread(image_path)
sp.set_title(title, fontsize=16)
plt.imshow(image_loaded, interpolation=None)
figure = plt.figure(figsize=(12, 6))
parasitized_images = os.listdir("training/Parasitized")[0:3]
uninfected_images = os.listdir("training/Uninfected")[0:3]
for image_index, image_name in enumerate(parasitized_images):
plot_images(image_index, training_parasitized_folder_name, image_name, "Parasitized")
for image_index, image_name in enumerate(uninfected_images):
plot_images(image_index + 3, training_uninfected_folder_name, image_name, "Uninfected")
batch_size = 32
img_size = 96
input_img_size = (96, 96, 3)
num_classes = 2
train_generator = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=20,
zoom_range=0.05,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode="nearest").flow_from_directory(
"training",
target_size=(img_size, img_size),
color_mode="rgb",
shuffle=True,
batch_size=batch_size)
val_generator = ImageDataGenerator(
preprocessing_function=preprocess_input).flow_from_directory(
"validation",
target_size=(img_size, img_size),
color_mode="rgb",
shuffle=False,
batch_size=batch_size)
Found 22048 images belonging to 2 classes. Found 5510 images belonging to 2 classes.
train_steps = int((total_images - (validation_folder_size * 2)) // batch_size)
val_steps = int((validation_folder_size * 2) // batch_size)
def create_model():
input_tensor = Input(shape=input_img_size)
mobile_model = MobileNetV2(
weights=None,
input_tensor=input_tensor,
input_shape=input_img_size,
alpha=1.5,
include_top=False)
for layer in mobile_model.layers:
layer.trainable = True
mobile_model_output = mobile_model.output
classification_layer = Flatten()(mobile_model_output)
classification_layer = Dense(256, activation='relu')(classification_layer)
classification_layer = Dropout(0.5)(classification_layer)
predictions = Dense(activation="softmax", units=num_classes)(classification_layer)
model = Model(inputs=input_tensor, outputs=predictions)
return model
learning_rate = 3e-4
epochs = 30
def polynomial_decay(epoch):
power = 1.0
alpha = learning_rate * (1 - (epoch / float(epochs))) ** power
return alpha
weights_name = "epoch={epoch:02d}|accuracy={val_acc:.4f}.h5"
checkpoint = ModelCheckpoint(weights_name, monitor="val_acc", verbose=1, save_best_only=True,
save_weights_only=True, mode="max", period=1)
lr_decay = LearningRateScheduler(polynomial_decay)
optimizer = SGD(lr=learning_rate, momentum=0.9)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version. Instructions for updating: Colocations handled automatically by placer.
model = create_model()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc'])
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
trained_model = model.fit_generator(train_generator,
epochs=epochs,
steps_per_epoch=train_steps,
callbacks=[checkpoint, lr_decay],
validation_data=val_generator,
validation_steps=val_steps,
verbose=1)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead. Epoch 1/30 688/688 [==============================] - 159s 232ms/step - loss: 0.6550 - acc: 0.6229 - val_loss: 0.7740 - val_acc: 0.5692 Epoch 00001: val_acc improved from -inf to 0.56922, saving model to epoch=01|accuracy=0.5692.h5 Epoch 2/30 688/688 [==============================] - 149s 216ms/step - loss: 0.3988 - acc: 0.8191 - val_loss: 3.6536 - val_acc: 0.5035 Epoch 00002: val_acc did not improve from 0.56922 Epoch 3/30 688/688 [==============================] - 149s 216ms/step - loss: 0.2010 - acc: 0.9298 - val_loss: 0.3059 - val_acc: 0.8491 Epoch 00003: val_acc improved from 0.56922 to 0.84909, saving model to epoch=03|accuracy=0.8491.h5 Epoch 4/30 688/688 [==============================] - 149s 216ms/step - loss: 0.1681 - acc: 0.9458 - val_loss: 0.2279 - val_acc: 0.9150 Epoch 00004: val_acc improved from 0.84909 to 0.91496, saving model to epoch=04|accuracy=0.9150.h5 Epoch 5/30 688/688 [==============================] - 150s 218ms/step - loss: 0.1566 - acc: 0.9486 - val_loss: 0.1422 - val_acc: 0.9544 Epoch 00005: val_acc improved from 0.91496 to 0.95438, saving model to epoch=05|accuracy=0.9544.h5 Epoch 6/30 688/688 [==============================] - 149s 216ms/step - loss: 0.1473 - acc: 0.9513 - val_loss: 0.1454 - val_acc: 0.9505 Epoch 00006: val_acc did not improve from 0.95438 Epoch 7/30 688/688 [==============================] - 148s 214ms/step - loss: 0.1396 - acc: 0.9538 - val_loss: 0.1340 - val_acc: 0.9560 Epoch 00007: val_acc improved from 0.95438 to 0.95602, saving model to epoch=07|accuracy=0.9560.h5 Epoch 8/30 688/688 [==============================] - 149s 217ms/step - loss: 0.1358 - acc: 0.9558 - val_loss: 0.1300 - val_acc: 0.9597 Epoch 00008: val_acc improved from 0.95602 to 0.95967, saving model to epoch=08|accuracy=0.9597.h5 Epoch 9/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1320 - acc: 0.9571 - val_loss: 0.1482 - val_acc: 0.9504 Epoch 00009: val_acc did not improve from 0.95967 Epoch 10/30 688/688 [==============================] - 150s 218ms/step - loss: 0.1290 - acc: 0.9570 - val_loss: 0.1204 - val_acc: 0.9593 Epoch 00010: val_acc did not improve from 0.95967 Epoch 11/30 688/688 [==============================] - 149s 216ms/step - loss: 0.1274 - acc: 0.9577 - val_loss: 0.1252 - val_acc: 0.9578 Epoch 00011: val_acc did not improve from 0.95967 Epoch 12/30 688/688 [==============================] - 147s 214ms/step - loss: 0.1256 - acc: 0.9587 - val_loss: 0.1221 - val_acc: 0.9609 Epoch 00012: val_acc improved from 0.95967 to 0.96095, saving model to epoch=12|accuracy=0.9609.h5 Epoch 13/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1216 - acc: 0.9591 - val_loss: 0.1202 - val_acc: 0.9599 Epoch 00013: val_acc did not improve from 0.96095 Epoch 14/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1199 - acc: 0.9592 - val_loss: 0.1153 - val_acc: 0.9608 Epoch 00014: val_acc did not improve from 0.96095 Epoch 15/30 688/688 [==============================] - 147s 214ms/step - loss: 0.1181 - acc: 0.9597 - val_loss: 0.1186 - val_acc: 0.9613 Epoch 00015: val_acc improved from 0.96095 to 0.96131, saving model to epoch=15|accuracy=0.9613.h5 Epoch 16/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1144 - acc: 0.9617 - val_loss: 0.1143 - val_acc: 0.9613 Epoch 00016: val_acc did not improve from 0.96131 Epoch 17/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1150 - acc: 0.9612 - val_loss: 0.1158 - val_acc: 0.9624 Epoch 00017: val_acc improved from 0.96131 to 0.96241, saving model to epoch=17|accuracy=0.9624.h5 Epoch 18/30 688/688 [==============================] - 148s 215ms/step - loss: 0.1136 - acc: 0.9612 - val_loss: 0.1122 - val_acc: 0.9626 Epoch 00018: val_acc improved from 0.96241 to 0.96259, saving model to epoch=18|accuracy=0.9626.h5 Epoch 19/30 688/688 [==============================] - 150s 217ms/step - loss: 0.1100 - acc: 0.9622 - val_loss: 0.1133 - val_acc: 0.9626 Epoch 00019: val_acc did not improve from 0.96259 Epoch 20/30 688/688 [==============================] - 148s 216ms/step - loss: 0.1124 - acc: 0.9620 - val_loss: 0.1102 - val_acc: 0.9635 Epoch 00020: val_acc improved from 0.96259 to 0.96350, saving model to epoch=20|accuracy=0.9635.h5 Epoch 21/30 688/688 [==============================] - 147s 213ms/step - loss: 0.1094 - acc: 0.9615 - val_loss: 0.1120 - val_acc: 0.9637 Epoch 00021: val_acc improved from 0.96350 to 0.96369, saving model to epoch=21|accuracy=0.9637.h5 Epoch 22/30 688/688 [==============================] - 148s 214ms/step - loss: 0.1102 - acc: 0.9626 - val_loss: 0.1090 - val_acc: 0.9633 Epoch 00022: val_acc did not improve from 0.96369 Epoch 23/30 688/688 [==============================] - 147s 214ms/step - loss: 0.1086 - acc: 0.9619 - val_loss: 0.1061 - val_acc: 0.9641 Epoch 00023: val_acc improved from 0.96369 to 0.96405, saving model to epoch=23|accuracy=0.9641.h5 Epoch 24/30 688/688 [==============================] - 147s 214ms/step - loss: 0.1051 - acc: 0.9627 - val_loss: 0.1067 - val_acc: 0.9646 Epoch 00024: val_acc improved from 0.96405 to 0.96460, saving model to epoch=24|accuracy=0.9646.h5 Epoch 25/30 688/688 [==============================] - 151s 220ms/step - loss: 0.1046 - acc: 0.9638 - val_loss: 0.1106 - val_acc: 0.9628 Epoch 00025: val_acc did not improve from 0.96460 Epoch 26/30 688/688 [==============================] - 147s 214ms/step - loss: 0.1022 - acc: 0.9636 - val_loss: 0.1074 - val_acc: 0.9639 Epoch 00026: val_acc did not improve from 0.96460 Epoch 27/30 688/688 [==============================] - 149s 216ms/step - loss: 0.1051 - acc: 0.9639 - val_loss: 0.1098 - val_acc: 0.9622 Epoch 00027: val_acc did not improve from 0.96460 Epoch 28/30 688/688 [==============================] - 149s 216ms/step - loss: 0.1006 - acc: 0.9647 - val_loss: 0.1097 - val_acc: 0.9620 Epoch 00028: val_acc did not improve from 0.96460 Epoch 29/30 688/688 [==============================] - 150s 218ms/step - loss: 0.1017 - acc: 0.9643 - val_loss: 0.1045 - val_acc: 0.9641 Epoch 00029: val_acc did not improve from 0.96460 Epoch 30/30 688/688 [==============================] - 150s 218ms/step - loss: 0.1022 - acc: 0.9642 - val_loss: 0.1032 - val_acc: 0.9639 Epoch 00030: val_acc did not improve from 0.96460
def plot_validation_training(metric, trained_model):
validation_metric = trained_model.history[f'val_{metric}']
training_metric = trained_model.history[metric]
epochs = range(len(training_metric))
plt.plot(epochs, training_metric, 'b', label=f'Training {metric}')
plt.plot(epochs, validation_metric, 'r', label=f'Validation {metric}')
plt.ylim(bottom=0)
plt.xlabel('Epochs ', fontsize=16)
plt.ylabel(metric, fontsize=16)
loc = 'upper right' if metric == "loss" else 'lower right'
plt.legend(loc=loc)
plt.title(f'Training and validation {metric}', fontsize = 20)
plt.show()
plot_validation_training("loss", trained_model)
plot_validation_training("acc", trained_model)
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
model.load_weights("epoch=24_accuracy=0.9646.h5")
val_generator.reset()
classes_predicted = model.predict_generator(val_generator, steps=val_steps, verbose=1)
172/172 [==============================] - 10s 61ms/step
len(classes_predicted)
5504
real_classes = np.argmax(classes_predicted, axis=1)
val_labels = val_generator.classes
val_labels = val_labels[:5504]
len(val_labels), len(real_classes)
(5504, 5504)
val_generator.class_indices
{'Parasitized': 0, 'Uninfected': 1}
classes_names = ["Parasitized", "Uninfected"]
cm = confusion_matrix(val_labels, real_classes, labels=range(num_classes))
plot_confusion_matrix(cm, classes_names)
Confusion matrix, without normalization [[2642 114] [ 80 2668]]
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
print(f'sensitivity: {sensitivity}')
specificity = cm[1, 1] / (cm[1, 1] + cm[1, 0])
print(f'specifity: {specificity}')
sensitivity: 0.9586357039187228 specifity: 0.9708879184861717
from sklearn.metrics import classification_report
report = classification_report(val_labels, real_classes, target_names=classes_names)
print(report)
precision recall f1-score support Parasitized 0.97 0.96 0.96 2756 Uninfected 0.96 0.97 0.96 2748 micro avg 0.96 0.96 0.96 5504 macro avg 0.96 0.96 0.96 5504 weighted avg 0.96 0.96 0.96 5504
!pip uninstall keras-vis
Uninstalling keras-vis-0.4.1: Would remove: /usr/local/lib/python3.6/dist-packages/docs/* /usr/local/lib/python3.6/dist-packages/keras_vis-0.4.1.dist-info/* /usr/local/lib/python3.6/dist-packages/vis/* Would not remove (might be manually added): /usr/local/lib/python3.6/dist-packages/docs/autogen.py Proceed (y/n)? y Successfully uninstalled keras-vis-0.4.1
!pip install git+https://github.com/raghakot/keras-vis.git -U
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
parasitized_folder = os.listdir("training/Parasitized")
uninfected_folder = os.listdir("training/Uninfected")
parasitized_image_path = f'{training_parasitized_folder_name}/{parasitized_folder[20]}'
uninfected_image_path = f'{training_uninfected_folder_name}/{uninfected_folder[20]}'
parasitized_image = utils.load_img(parasitized_image_path, target_size=(96, 96))
uninfected_image = utils.load_img(uninfected_image_path, target_size=(96, 96))
/usr/local/lib/python3.6/dist-packages/skimage/transform/_warps.py:105: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15. warn("The default mode, 'constant', will be changed to 'reflect' in " /usr/local/lib/python3.6/dist-packages/skimage/transform/_warps.py:110: UserWarning: Anti-aliasing will be enabled by default in skimage 0.15 to avoid aliasing artifacts when down-sampling images. warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
parasitized_image_model = image.load_img(parasitized_image_path, target_size=(96, 96))
uninfected_image_model = image.load_img(uninfected_image_path, target_size=(96, 96))
parasitized_image_model = image.img_to_array(parasitized_image)
parasitized_image_model = preprocess_input(parasitized_image)
uninfected_image_model = image.img_to_array(uninfected_image)
uninfected_image_model = preprocess_input(uninfected_image)
preprocessed_images = np.array([parasitized_image_model, uninfected_image_model])
predictions = model.predict(preprocessed_images)
predicted_classes = np.argmax(predictions, axis=1)
images_array = [parasitized_image, uninfected_image]
fig, ax = plt.subplots(1, 2, figsize=(8, 8))
fig.suptitle('Normal Images')
for index, img in enumerate(images_array):
class_index = predicted_classes[index]
ax[index].set_title(f'class: {classes_names[index]}, pred: {classes_names[class_index]}')
ax[index].imshow(img)
layer_idx = utils.find_layer_idx(model, 'dense_2')
fig, ax = plt.subplots(1, 2, figsize=(8, 8))
fig.suptitle('Gradient maps')
for index, img in enumerate(images_array):
grads = visualize_saliency(model, layer_idx, filter_indices=index, seed_input=img)
ax[index].set_title(classes_names[index])
ax[index].imshow(grads, cmap='jet')
for modifier in ['guided', 'relu']:
plt.figure()
f, ax = plt.subplots(1, 2)
plt.suptitle(modifier)
for index, img in enumerate(images_array):
grads = visualize_saliency(model, layer_idx, filter_indices=index,
seed_input=img, backprop_modifier=modifier)
ax[index].imshow(grads, cmap='jet')
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
import matplotlib.cm as cm
from vis.visualization import visualize_cam
penultimate_layer = utils.find_layer_idx(model, 'Conv_1_bn')
for modifier in [None, 'guided', 'relu']:
plt.figure()
f, ax = plt.subplots(1, 2)
plt.suptitle("vanilla" if modifier is None else modifier)
for index, img in enumerate(images_array):
grads = visualize_cam(model, layer_idx, filter_indices=index,
seed_input=img, penultimate_layer_idx=penultimate_layer,
backprop_modifier=modifier)
jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
ax[index].imshow(overlay(jet_heatmap, img))
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
model_json = model.to_json()
with open("malaria_model.json", "w") as json_file:
json_file.write(model_json)