!mkdir -p ~/.kaggle/
mv kaggle.json ~/.kaggle/
!kaggle datasets download -d paultimothymooney/breast-histopathology-images
Warning: Your Kaggle API key is readable by other users on this system! To fix this, you can run 'chmod 600 /root/.kaggle/kaggle.json' Downloading breast-histopathology-images.zip to /content 100% 1.48G/1.49G [00:23<00:00, 101MB/s] 100% 1.49G/1.49G [00:23<00:00, 67.3MB/s]
!mkdir images
!unzip -q breast-histopathology-images.zip
!unzip -q IDC_regular_ps50_idx5.zip -d images
import numpy as np
import keras
from keras import backend as K
from keras.optimizers import Adam, SGD
from keras.layers import Flatten, Dropout, BatchNormalization, Reshape, GlobalAveragePooling2D
from keras.layers import Dense, Dropout, Input
from keras.applications.mobilenet_v2 import MobileNetV2, preprocess_input
from keras.models import Model
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, LearningRateScheduler
from keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix
import itertools
import matplotlib.pyplot as plt
import pandas as pd
from keras.preprocessing import image
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
import matplotlib.image as mpimg
import shutil
import os
Using TensorFlow backend.
images_folder = os.listdir("images")
first_folder = images_folder[0]
benign_images_first_folder = f'images/{first_folder}/0'
benign_images = os.listdir(benign_images_first_folder)[0:3]
malignant_images_first_folder = f'images/{first_folder}/1'
malignant_images = os.listdir(malignant_images_first_folder)[0:3]
def plot_images(image_index, folder_name, image_name, title):
sp = figure.add_subplot(2, 3, image_index + 1)
sp.axis('Off')
image_path = f'{folder_name}/{image_name}'
image_loaded = img=mpimg.imread(image_path)
sp.set_title(title, fontsize=16)
plt.imshow(image_loaded, interpolation=None)
figure = plt.figure(figsize=(12, 6))
for image_index, image_name in enumerate(benign_images):
plot_images(image_index, benign_images_first_folder, image_name, "Benign")
for image_index, image_name in enumerate(malignant_images):
plot_images(image_index + 3, malignant_images_first_folder, image_name, "Malignant")
!mkdir training && mkdir validation
!mkdir training/benign && mkdir training/malignant
!mkdir validation/benign && mkdir validation/malignant
benign_images_training_folder_name = "training/benign"
malignant_images_training_folder_name = "training/malignant"
for folder in images_folder:
benign_folder = os.listdir(f'images/{folder}/0')
malignant_folder = os.listdir(f'images/{folder}/1')
for benign_image in benign_folder:
image_url = f'images/{folder}/0/{benign_image}'
shutil.move(image_url, benign_images_training_folder_name)
for malign_image in malignant_folder:
image_url = f'images/{folder}/1/{malign_image}'
shutil.move(image_url, malignant_images_training_folder_name)
benign_images = len(os.listdir(benign_images_training_folder_name))
malignant_images = len(os.listdir(malignant_images_training_folder_name))
total_images = benign_images + malignant_images
print(f'Total images: {total_images}')
print(f'Benign images: {benign_images}')
print(f'Malignant images: {malignant_images}')
Total images: 277524 Benign images: 198738 Malignant images: 78786
validation_size = 0.20
benign_validation_folder_size = int(benign_images * 0.20)
malignant_validation_folder_size = int(malignant_images * 0.20)
print(f'Benign validation size: {benign_validation_folder_size}')
print(f'Malignant validation size: {malignant_validation_folder_size}')
Benign validation size: 39747 Malignant validation size: 15757
benign_images_validation_folder_name = "validation/benign"
malignant_images_validation_folder_name = "validation/malignant"
benign_images_training_folder = os.listdir(benign_images_training_folder_name)
malignant_images_training_folder = os.listdir(malignant_images_training_folder_name)
for image_name in benign_images_training_folder[:benign_validation_folder_size]:
image_url = f'{benign_images_training_folder_name}/{image_name}'
shutil.move(image_url, benign_images_validation_folder_name)
for image_name in malignant_images_training_folder[:malignant_validation_folder_size]:
image_url = f'{malignant_images_training_folder_name}/{image_name}'
shutil.move(image_url, malignant_images_validation_folder_name)
training_folder_size = len(os.listdir(benign_images_training_folder_name)) + len(os.listdir(malignant_images_training_folder_name))
validation_folder_size = len(os.listdir(benign_images_validation_folder_name)) + len(os.listdir(malignant_images_validation_folder_name))
print(f'Training folder size: {training_folder_size}')
print(f'Validation folder size: {validation_folder_size}')
Training folder size: 222020 Validation folder size: 55504
batch_size = 128
img_size = 48
input_img_size = (48, 48, 3)
num_classes = 2
train_generator = ImageDataGenerator(
preprocessing_function=preprocess_input,
rotation_range=20,
zoom_range=0.05,
width_shift_range=0.05,
height_shift_range=0.05,
shear_range=0.05,
horizontal_flip=True,
fill_mode="nearest").flow_from_directory(
"training",
target_size=(img_size, img_size),
color_mode="rgb",
shuffle=True,
batch_size=batch_size)
val_generator = ImageDataGenerator(
preprocessing_function=preprocess_input).flow_from_directory(
"validation",
target_size=(img_size, img_size),
color_mode="rgb",
shuffle=False,
batch_size=batch_size)
Found 222020 images belonging to 2 classes. Found 55504 images belonging to 2 classes.
train_steps = int(training_folder_size // batch_size)
val_steps = int(validation_folder_size // batch_size)
train_steps, val_steps
(1734, 433)
def create_model():
input_tensor = Input(shape=input_img_size)
mobile_model = MobileNetV2(
weights=None,
input_tensor=input_tensor,
input_shape=input_img_size,
alpha=1.5,
include_top=False)
for layer in mobile_model.layers:
layer.trainable = True
mobile_model_output = mobile_model.output
classification_layer = Flatten()(mobile_model_output)
classification_layer = Dense(256, activation='relu')(classification_layer)
classification_layer = Dropout(0.5)(classification_layer)
predictions = Dense(activation="softmax", units=num_classes)(classification_layer)
model = Model(inputs=input_tensor, outputs=predictions)
return model
learning_rate = 3e-4
epochs = 20
def polynomial_decay(epoch):
power = 1.0
alpha = learning_rate * (1 - (epoch / float(epochs))) ** power
return alpha
y_true = np.concatenate((val_generator.classes, train_generator.classes))
cw = compute_class_weight('balanced', np.unique(y_true), y_true)
class_weights = {index: weight for index, weight in enumerate(cw)}
class_weights
{0: 0.6982157413277783, 1: 1.761251999086132}
weights_name = "epoch={epoch:02d}|accuracy={val_acc:.4f}.h5"
checkpoint = ModelCheckpoint(weights_name, monitor="val_acc", verbose=1, save_best_only=True,
save_weights_only=True, mode="max", period=1)
lr_decay = LearningRateScheduler(polynomial_decay)
optimizer = SGD(lr=learning_rate, momentum=0.9)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version. Instructions for updating: Colocations handled automatically by placer.
model = create_model()
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['acc'])
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be removed in a future version. Instructions for updating: Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
trained_model = model.fit_generator(train_generator,
epochs=epochs,
class_weight=class_weights,
steps_per_epoch=train_steps,
callbacks=[checkpoint, lr_decay],
validation_data=val_generator,
validation_steps=val_steps,
verbose=1)
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version. Instructions for updating: Use tf.cast instead. Epoch 1/20 1734/1734 [==============================] - 1073s 619ms/step - loss: 0.4558 - acc: 0.8023 - val_loss: 0.3677 - val_acc: 0.8448 Epoch 00001: val_acc improved from -inf to 0.84478, saving model to epoch=01|accuracy=0.8448.h5 Epoch 2/20 1734/1734 [==============================] - 1044s 602ms/step - loss: 0.4011 - acc: 0.8272 - val_loss: 0.4111 - val_acc: 0.8396 Epoch 00002: val_acc did not improve from 0.84478 Epoch 3/20 1734/1734 [==============================] - 1045s 603ms/step - loss: 0.3841 - acc: 0.8336 - val_loss: 0.3658 - val_acc: 0.8501 Epoch 00003: val_acc improved from 0.84478 to 0.85013, saving model to epoch=03|accuracy=0.8501.h5 Epoch 4/20 1734/1734 [==============================] - 1040s 600ms/step - loss: 0.3748 - acc: 0.8370 - val_loss: 0.4060 - val_acc: 0.8155 Epoch 00004: val_acc did not improve from 0.85013 Epoch 5/20 1734/1734 [==============================] - 1049s 605ms/step - loss: 0.3676 - acc: 0.8411 - val_loss: 0.3786 - val_acc: 0.8337 Epoch 00005: val_acc did not improve from 0.85013 Epoch 6/20 1734/1734 [==============================] - 1044s 602ms/step - loss: 0.3633 - acc: 0.8420 - val_loss: 0.3482 - val_acc: 0.8598 Epoch 00006: val_acc improved from 0.85013 to 0.85978, saving model to epoch=06|accuracy=0.8598.h5 Epoch 7/20 1734/1734 [==============================] - 1048s 605ms/step - loss: 0.3596 - acc: 0.8437 - val_loss: 0.3772 - val_acc: 0.8391 Epoch 00007: val_acc did not improve from 0.85978 Epoch 8/20 1734/1734 [==============================] - 1049s 605ms/step - loss: 0.3548 - acc: 0.8457 - val_loss: 0.3271 - val_acc: 0.8615 Epoch 00008: val_acc improved from 0.85978 to 0.86146, saving model to epoch=08|accuracy=0.8615.h5 Epoch 9/20 1734/1734 [==============================] - 1053s 607ms/step - loss: 0.3512 - acc: 0.8464 - val_loss: 0.3410 - val_acc: 0.8553 Epoch 00009: val_acc did not improve from 0.86146 Epoch 10/20 1734/1734 [==============================] - 1050s 605ms/step - loss: 0.3491 - acc: 0.8469 - val_loss: 0.3256 - val_acc: 0.8644 Epoch 00010: val_acc improved from 0.86146 to 0.86440, saving model to epoch=10|accuracy=0.8644.h5 Epoch 11/20 1734/1734 [==============================] - 1052s 607ms/step - loss: 0.3451 - acc: 0.8487 - val_loss: 0.3150 - val_acc: 0.8650 Epoch 00011: val_acc improved from 0.86440 to 0.86505, saving model to epoch=11|accuracy=0.8650.h5 Epoch 12/20 1734/1734 [==============================] - 1067s 615ms/step - loss: 0.3431 - acc: 0.8497 - val_loss: 0.3537 - val_acc: 0.8466 Epoch 00012: val_acc did not improve from 0.86505 Epoch 13/20 1734/1734 [==============================] - 1069s 617ms/step - loss: 0.3408 - acc: 0.8514 - val_loss: 0.3410 - val_acc: 0.8544 Epoch 00013: val_acc did not improve from 0.86505 Epoch 14/20 1734/1734 [==============================] - 1049s 605ms/step - loss: 0.3390 - acc: 0.8512 - val_loss: 0.3256 - val_acc: 0.8603 Epoch 00014: val_acc did not improve from 0.86505 Epoch 15/20 1734/1734 [==============================] - 1058s 610ms/step - loss: 0.3366 - acc: 0.8522 - val_loss: 0.3063 - val_acc: 0.8687 Epoch 00015: val_acc improved from 0.86505 to 0.86872, saving model to epoch=15|accuracy=0.8687.h5 Epoch 16/20 1734/1734 [==============================] - 1045s 603ms/step - loss: 0.3357 - acc: 0.8530 - val_loss: 0.3421 - val_acc: 0.8530 Epoch 00016: val_acc did not improve from 0.86872 Epoch 17/20 1734/1734 [==============================] - 1043s 601ms/step - loss: 0.3349 - acc: 0.8533 - val_loss: 0.3329 - val_acc: 0.8576 Epoch 00017: val_acc did not improve from 0.86872 Epoch 18/20 1734/1734 [==============================] - 1045s 603ms/step - loss: 0.3336 - acc: 0.8537 - val_loss: 0.3246 - val_acc: 0.8605 Epoch 00018: val_acc did not improve from 0.86872 Epoch 19/20 1734/1734 [==============================] - 1045s 603ms/step - loss: 0.3327 - acc: 0.8545 - val_loss: 0.3354 - val_acc: 0.8548 Epoch 00019: val_acc did not improve from 0.86872 Epoch 20/20 1734/1734 [==============================] - 1051s 606ms/step - loss: 0.3321 - acc: 0.8544 - val_loss: 0.3277 - val_acc: 0.8581 Epoch 00020: val_acc did not improve from 0.86872
def plot_validation_training(metric, trained_model):
validation_metric = trained_model.history[f'val_{metric}']
training_metric = trained_model.history[metric]
epochs = range(len(training_metric))
plt.plot(epochs, training_metric, 'b', label=f'Training {metric}')
plt.plot(epochs, validation_metric, 'r', label=f'Validation {metric}')
plt.ylim(bottom=0)
plt.xlabel('Epochs ', fontsize=16)
plt.ylabel(metric, fontsize=16)
loc = 'upper right' if metric == "loss" else 'lower right'
plt.legend(loc=loc)
plt.title(f'Training and validation {metric}', fontsize = 20)
plt.show()
plot_validation_training("loss", trained_model)
plot_validation_training("acc", trained_model)
def plot_confusion_matrix(cm, classes,
normalize=False,
title='Confusion matrix',
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
plt.imshow(cm, interpolation='nearest', cmap=cmap)
plt.title(title)
plt.colorbar()
tick_marks = np.arange(len(classes))
plt.xticks(tick_marks, classes, rotation=45)
plt.yticks(tick_marks, classes)
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
plt.text(j, i, format(cm[i, j], fmt),
horizontalalignment="center",
color="white" if cm[i, j] > thresh else "black")
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.tight_layout()
model.load_weights('epoch=15_accuracy=0.8687.h5')
val_generator.reset()
classes_predicted = model.predict_generator(val_generator, steps=val_steps, verbose=1)
433/433 [==============================] - 77s 177ms/step
len(classes_predicted)
55424
real_classes = np.argmax(classes_predicted, axis=1)
val_labels = val_generator.classes
len(val_labels)
55504
val_labels = val_labels[:55424]
len(val_labels), len(real_classes)
(55424, 55424)
val_generator.class_indices
{'benign': 0, 'malignant': 1}
classes_names = ["Benign", "Malignant"]
cm = confusion_matrix(val_labels, real_classes, labels=range(num_classes))
plot_confusion_matrix(cm, classes_names)
Confusion matrix, without normalization
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
print(f'sensitivity: {sensitivity}')
specificity = cm[1, 1] / (cm[1, 1] + cm[1, 0])
print(f'specifity: {specificity}')
sensitivity: 0.8568193825949129 specifity: 0.8617720227084263
from sklearn.metrics import classification_report
report = classification_report(val_labels, real_classes, target_names=classes_names)
print(report)
precision recall f1-score support Benign 0.94 0.86 0.90 39747 Malignant 0.70 0.86 0.77 15677 micro avg 0.86 0.86 0.86 55424 macro avg 0.82 0.86 0.84 55424 weighted avg 0.87 0.86 0.86 55424
!pip uninstall keras-vis
Uninstalling keras-vis-0.4.1: Would remove: /usr/local/lib/python3.6/dist-packages/docs/* /usr/local/lib/python3.6/dist-packages/keras_vis-0.4.1.dist-info/* /usr/local/lib/python3.6/dist-packages/vis/* Would not remove (might be manually added): /usr/local/lib/python3.6/dist-packages/docs/autogen.py Proceed (y/n)? y Successfully uninstalled keras-vis-0.4.1
!pip install git+https://github.com/raghakot/keras-vis.git -U
from vis.visualization import visualize_saliency, overlay
from vis.utils import utils
from keras import activations
benign_folder = os.listdir("training/benign")
malignant_folder = os.listdir("training/malignant")
benign_folder_name = "training/benign"
malignant_folder_name = "training/malignant"
benign_image_path = f'{benign_folder_name}/{benign_folder[20]}'
malignant_image_path = f'{malignant_folder_name}/{malignant_folder[58]}'
benign_image = utils.load_img(benign_image_path, target_size=(48, 48))
malignant_image = utils.load_img(malignant_image_path, target_size=(48, 48))
/usr/local/lib/python3.6/dist-packages/skimage/transform/_warps.py:105: UserWarning: The default mode, 'constant', will be changed to 'reflect' in skimage 0.15. warn("The default mode, 'constant', will be changed to 'reflect' in " /usr/local/lib/python3.6/dist-packages/skimage/transform/_warps.py:110: UserWarning: Anti-aliasing will be enabled by default in skimage 0.15 to avoid aliasing artifacts when down-sampling images. warn("Anti-aliasing will be enabled by default in skimage 0.15 to "
benign_image_model = image.load_img(benign_image_path, target_size=(48, 48))
malignant_image_model = image.load_img(malignant_image_path, target_size=(48, 48))
benign_image_model = image.img_to_array(benign_image_model)
benign_image_model = preprocess_input(benign_image_model)
malignant_image_model = image.img_to_array(malignant_image_model)
malignant_image_model = preprocess_input(malignant_image_model)
preprocessed_images = np.array([benign_image_model, malignant_image_model])
predictions = model.predict(preprocessed_images)
predicted_classes = np.argmax(predictions, axis=1)
images_array = [benign_image, malignant_image]
fig, ax = plt.subplots(1, 2, figsize=(8, 8))
fig.suptitle('Normal Images')
for index, img in enumerate(images_array):
class_index = predicted_classes[index]
ax[index].set_title(f'class: {classes_names[index]}, pred: {classes_names[class_index]}')
ax[index].imshow(img)
layer_idx = utils.find_layer_idx(model, 'dense_2')
fig, ax = plt.subplots(1, 2, figsize=(8, 8))
fig.suptitle('Gradient maps')
for index, img in enumerate(images_array):
grads = visualize_saliency(model, layer_idx, filter_indices=index, seed_input=img)
ax[index].set_title(classes_names[index])
ax[index].imshow(grads, cmap='jet')
for modifier in ['guided', 'relu']:
plt.figure()
f, ax = plt.subplots(1, 2)
plt.suptitle(modifier)
for index, img in enumerate(images_array):
grads = visualize_saliency(model, layer_idx, filter_indices=index,
seed_input=img, backprop_modifier=modifier)
ax[index].imshow(grads, cmap='jet')
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
import matplotlib.cm as cm
from vis.visualization import visualize_cam
penultimate_layer = utils.find_layer_idx(model, 'Conv_1_bn')
for modifier in [None, 'guided', 'relu']:
plt.figure()
f, ax = plt.subplots(1, 2)
plt.suptitle("vanilla" if modifier is None else modifier)
for index, img in enumerate(images_array):
grads = visualize_cam(model, layer_idx, filter_indices=index,
seed_input=img, penultimate_layer_idx=penultimate_layer,
backprop_modifier=modifier)
jet_heatmap = np.uint8(cm.jet(grads)[..., :3] * 255)
ax[index].imshow(overlay(jet_heatmap, img))
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
<Figure size 432x288 with 0 Axes>
model_json = model.to_json()
with open("breast_cancer_model.json", "w") as json_file:
json_file.write(model_json)