Notebook

In [1]:

import cv2
import numpy as np
import pandas as pd

from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
from keras.models import Model
from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Activation, UpSampling2D, BatchNormalization
from keras.optimizers import RMSprop
from keras.losses import binary_crossentropy
import keras.backend as K

from sklearn.model_selection import train_test_split

Using TensorFlow backend.

In [2]:

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:

def dice_loss(y_true, y_pred):
    smooth = 1.
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)


def bce_dice_loss(y_true, y_pred):
    return binary_crossentropy(y_true, y_pred) + (1 - dice_loss(y_true, y_pred))

def unet_down_one_block(inputs, num_filters):
    x = Conv2D(num_filters, (3, 3), padding='same')(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def unet_max_pool(inputs):
    x = MaxPooling2D((2, 2), strides=(2, 2))(inputs)
    return x

def unet_up_one_block(up_input, down_input, num_filters):
    x = UpSampling2D((2,2))(up_input)
    x = concatenate([down_input, x], axis=3)
    x = Conv2D(num_filters, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Conv2D(num_filters, (3,3), padding='same')(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    return x

def get_unet(input_shape = (256, 256, 3),
             num_classes = 1,
             initial_filters = 32,
             central_filters = 1024):
    
    num_filters = initial_filters
    
    out_list    = [Input(shape=input_shape)]
    down_interim_list = []
    
    while num_filters <= central_filters/2:
        x = unet_down_one_block(out_list[-1], num_filters)
        down_interim_list.append(x)
        num_filters = num_filters * 2
        y = unet_max_pool(x)
        out_list.append(y)
    
    x = unet_down_one_block(out_list[-1], num_filters)
    out_list.append(x)
    num_filters = int(num_filters / 2)
    
    while num_filters >= initial_filters:
        x = unet_up_one_block(out_list[-1], down_interim_list.pop(), num_filters)
        out_list.append(x)
        num_filters = int(num_filters / 2)
    
    classify = Conv2D(num_classes, (1,1), activation = 'sigmoid')(out_list[-1])
    
    model = Model(inputs=out_list[0], outputs=classify)
    
    model.compile(optimizer=RMSprop(lr=0.0001),
                  loss=bce_dice_loss,
                  metrics=[dice_loss])
    
    return model

In [4]:

model_512 = get_unet(input_shape=(512,512,3), initial_filters=16)
model_512.load_weights(filepath='weights/best_weights_512_2.hdf5')

In [5]:

model_fullres = get_unet(input_shape=(1024,1024,3), initial_filters=8)
model_fullres.load_weights(filepath='weights/best_weights_fullres3.hdf5')

In [6]:

from tqdm import tqdm

In [7]:

df_test = pd.read_csv('data/sample_submission.csv')
ids_test = df_test['img'].map(lambda s: s.split('.')[0])

In [8]:

names = []
for id in ids_test:
    names.append('{}.jpg'.format(id))

In [9]:

# https://www.kaggle.com/stainsby/fast-tested-rle
def run_length_encode(mask):
    '''
    img: numpy array, 1 - mask, 0 - background
    Returns run length as string formated
    '''
    inds = mask.flatten()
    runs = np.where(inds[1:] != inds[:-1])[0] + 2
    runs[1::2] = runs[1::2] - runs[:-1:2]
    rle = ' '.join([str(r) for r in runs])
    return rle


rles = []

In [10]:

val_batch_size = 8
threshold = 0.5

In [11]:

import gzip
import pickle
f = gzip.open('test_bboxes.pklz','rb')
test_bboxes = pickle.load(f)
f.close()

In [ ]:

for start in tqdm(range(0, len(ids_test), val_batch_size)):
    
    end = min(start + val_batch_size, len(ids_test))
    ids_test_batch = ids_test[start:end]
    
    this_batch_imgs=[]
    for id in ids_test_batch.values:
        img = cv2.imread('data/test/{}.jpg'.format(id))
        this_batch_imgs.append(img)
    
    m512_batch=[]
    for i in range(len(this_batch_imgs)):
        img = this_batch_imgs[i]
        img = cv2.resize(img, (512, 512))
        m512_batch.append(img)
    
    preds_512 = model_512.predict_on_batch(np.array(m512_batch, np.float32) / 255)
    preds_512 = np.squeeze(preds_512, axis=3)
    
    mfull_batch = []
    for i in range(len(this_batch_imgs)):
        id = ids_test_batch.values[i]
        img = this_batch_imgs[i]
        
        this_bbox = test_bboxes[id]
        img_1_box = this_bbox[0]
        img_2_box = this_bbox[1]
        
        img_1 = np.copy(img[img_1_box[2]:(img_1_box[3]+1), img_1_box[0]:(img_1_box[1]+1),:])
        img_2 = np.copy(img[img_2_box[2]:(img_2_box[3]+1), img_2_box[0]:(img_2_box[1]+1),:])
        img_2 = cv2.flip(img_2, 1)
        
        mfull_batch.append(img_1)
        mfull_batch.append(img_2)
        
    preds_full = model_fullres.predict_on_batch(np.array(mfull_batch, np.float32) / 255)
    preds_full = np.squeeze(preds_full, axis=3)
    
    final_preds_full = []
    for i in range(len(ids_test_batch.values)):
        id = ids_test_batch.values[i]
        
        this_bbox = test_bboxes[id]
        img_1_box = this_bbox[0]
        img_2_box = this_bbox[1]
        
        pred_1 = np.copy(preds_full[(i*2), :, :])
        pred_2 = np.copy(preds_full[(i*2) + 1, :, :])
        pred_2 = cv2.flip(pred_2, 1)
        
        left_part   = np.copy(pred_1[:, 0:894])
        middle_1    = np.copy(pred_1[:, 894:1024])
        middle_2    = np.copy(pred_2[:, 0:130])
        middle_part = np.add(middle_1, middle_2)/2
        right_part  = np.copy(pred_2[:, 130:1024])
        all_parts   = np.concatenate((left_part, middle_part, right_part), axis=1)
        
        if img_1_box[2] != 0:
            top_part = np.zeros(shape = (img_1_box[2],1918), dtype = np.float32)
            all_parts = np.concatenate((top_part, all_parts), axis = 0)
        
        if img_1_box[3] != 1279:
            bottom_part = np.zeros(shape = (1279-img_1_box[3],1918), dtype = np.float32)
            all_parts = np.concatenate((all_parts, bottom_part), axis = 0)
        
        final_preds_full.append(all_parts)
    
    final_preds_512 = []
    for i in range(len(ids_test_batch.values)):
        this_pred = np.copy(preds_512[i, :, :])
        this_pred_resized = cv2.resize(this_pred, (1918, 1280))
        final_preds_512.append(this_pred_resized)
    
    ensemble_preds = []
    for i in range(len(ids_test_batch.values)):
        this_ensemble = np.add(final_preds_512[i], final_preds_full[i])/2
        ensemble_preds.append(this_ensemble)
    
    for prob in ensemble_preds:
        mask = prob > threshold
        rle = run_length_encode(mask)
        rles.append(rle)

100%|██████████| 12508/12508 [12:25:18<00:00,  3.54s/it]

In [13]:

print("Generating submission file...")
df = pd.DataFrame({'img': names, 'rle_mask': rles})
df.to_csv('submit/submission9.csv.gz', index=False, compression='gzip')
print("Completed!")

Generating submission file...
Completed!

In [ ]: