#!/usr/bin/env python # coding: utf-8 # # Fine-tuning ImageNet model for Dogs and Cats categorization # # This tutorial shows how to fine-tune a pre-trained SqueezeNet model for the dogs and cats categorization in GPU using Caffe2 framework. # ## Prerequisite # # This notebook assumes nvidia-docker environment. Install `nvidia-docker` in a GPU host. Launch `caffe2ai/caffe2` instance to run this notebook. # # ```bash # nvidia-docker run -it --rm -p 8888:8888 caffe2ai/caffe2:c2v0.8.1.cuda8.cudnn7.ubuntu16.04 \ # jupyter notebook --no-browser --ip "*" --allow-root # ``` # ## Getting dogs-vs-cats dataset # # We use the dog-vs-cats dataset from Kaggle. Create a Kaggle account, login, and download the dataset from the following URL: # # https://www.kaggle.com/c/dogs-vs-cats/data # # Once downloaded the dataset, unarchive the files and organize data in the following location. # # ``` # mkdir dogs-vs-cats/ # unzip train.zip -d dogs-vs-cats/ # unzip test1.zip -d dogs-vs-cats/ # rm train.zip test1.zip # ``` # ## Prepare pre-trained model # # Caffe2 v0.8.1 has a bug in model download module. Fix the code and get SqueezeNet as follows. # # ``` # sed -i "s,s3.amazonaws.com/caffe2/,s3.amazonaws.com/download.caffe2.ai/," \ # /usr/local/caffe2/python/models/download.py # python -m caffe2.python.models.download -i squeezenet # ``` # ## Data pre-processing # # The following code transforms an image to the desired input format for SqueezeNet. # In[1]: import numpy as np import skimage.io import skimage.transform def rescale(img, input_height, input_width): aspect = img.shape[1] / float(img.shape[0]) if aspect > 1: return skimage.transform.resize(img, (input_width, int(aspect * input_height))) elif aspect < 1: return skimage.transform.resize(img, (int(input_width/aspect), input_height)) else: return skimage.transform.resize(img, (input_width, input_height)) def crop_center(img, cropx, cropy): y, x, c = img.shape startx = x // 2 - (cropx // 2) starty = y // 2 - (cropy // 2) return img[starty:starty+cropy, startx:startx+cropx] def prepare_image(img_path): img = skimage.io.imread(img_path) img = skimage.img_as_float(img) img = rescale(img, 227, 227) img = crop_center(img, 227, 227) img = img.swapaxes(1, 2).swapaxes(0, 1) # HWC to CHW dimension img = img[(2, 1, 0), :, :] # RGB to BGR color order img = img * 255 - 128 # Subtract mean = 128 return img.astype(np.float32) # ## Dataset loader # # During training, we will use the following object to read batches of image and label pairs. # In[2]: import os, glob, random def make_batch(iterable, batch_size=1): length = len(iterable) for index in range(0, length, batch_size): yield iterable[index:min(index + batch_size, length)] class DogsCatsDataset(object): """ Dogs and cats dataset reader """ def __init__(self, split="train", data_dir="dogs-vs-cats/"): self.categories = {"dog": 0, "cat": 1} self.image_files = list(glob.glob(os.path.join(data_dir, split, "*.jpg"))) self.labels = [self.categories.get(os.path.basename(path).strip().split(".")[0], -1) for path in self.image_files] def __getitem__(self, index): image = prepare_image(self.image_files[index]) label = self.labels[index] return image, label def __len__(self): return len(self.labels) def read(self, batch_size=50, shuffle=True): """Read (image, label) pairs in batch""" order = list(range(len(self))) if shuffle: random.shuffle(order) for batch in make_batch(order, batch_size): images, labels = [], [] for index in batch: image, label = self[index] images.append(image) labels.append(label) yield np.stack(images).astype(np.float32), np.stack(labels).astype(np.int32).reshape((batch_size,)) # ## Model construction # # In the following, we will modify SqueezeNet structure so that we can apply the pre-trained model to dogs-vs-cats categorization problem. We do not really need to change the kind of operators in the prediction module but need to change how to initialize model parameters as well as the output shape. In Caffe2, net definition is given in the protocol buffer. We can directly modify the protocol buffer and add any other necessary operators using model_helper. # # The basic steps are: # # 1. Load predict_net. # 2. Load init_net, add them to the model parameter list, and replace how to initialize the parameters. # 3. Add any training operators such as gradients and optimizers. # In[3]: from caffe2.python import core, workspace, model_helper, optimizer, brew from caffe2.python.modeling import initializers from caffe2.python.modeling.parameter_info import ParameterTags from caffe2.proto import caffe2_pb2 PREDICT_NET = "/usr/local/caffe2/python/models/squeezenet/predict_net.pb" INIT_NET = "/usr/local/caffe2/python/models/squeezenet/init_net.pb" def AddPredictNet(model, predict_net_path): predict_net_proto = caffe2_pb2.NetDef() with open(predict_net_path, "rb") as f: predict_net_proto.ParseFromString(f.read()) model.net = core.Net(predict_net_proto) # Fix dimension incompatibility model.Squeeze("softmaxout", "softmax", dims=[2, 3]) def AddInitNet(model, init_net_path, out_dim=2, params_to_learn=None): init_net_proto = caffe2_pb2.NetDef() with open(init_net_path, "rb") as f: init_net_proto.ParseFromString(f.read()) # Define params to learn in the model. for op in init_net_proto.op: param_name = op.output[0] if params_to_learn is None or op.output[0] in params_to_learn: tags = (ParameterTags.WEIGHT if param_name.endswith("_w") else ParameterTags.BIAS) model.create_param( param_name=param_name, shape=op.arg[0], initializer=initializers.ExternalInitializer(), tags=tags, ) # Remove conv10_w, conv10_b initializers at (50, 51) init_net_proto.op.pop(51) init_net_proto.op.pop(50) # Add new initializers for conv10_w, conv10_b model.param_init_net = core.Net(init_net_proto) model.param_init_net.XavierFill([], "conv10_w", shape=[out_dim, 512, 1, 1]) model.param_init_net.ConstantFill([], "conv10_b", shape=[out_dim]) def AddTrainingOperators(model, softmax, label): xent = model.LabelCrossEntropy([softmax, label], "xent") loss = model.AveragedLoss(xent, "loss") brew.accuracy(model, [softmax, label], "accuracy") model.AddGradientOperators([loss]) opt = optimizer.build_sgd(model, base_learning_rate=0.1) for param in model.GetOptimizationParamInfo(): opt(model.net, model.param_init_net, param) train_model = model_helper.ModelHelper("train_net") AddPredictNet(train_model, PREDICT_NET) AddInitNet(train_model, INIT_NET, params_to_learn=["conv10_w", "conv10_b"]) # Use None to learn everything. AddTrainingOperators(train_model, "softmax", "label") # ## Running in GPU # # In this version of Caffe2, operators with empty `device_option` runs in CPU. We have to manually remove per-op device option and specify global `device_option` like below. # In[4]: def SetDeviceOption(model, device_option): # Clear op-specific device options and set global device option. for net in ("net", "param_init_net"): net_def = getattr(model, net).Proto() net_def.device_option.CopyFrom(device_option) for op in net_def.op: # Some operators are CPU-only. if op.output[0] not in ("optimizer_iteration", "iteration_mutex"): op.ClearField("device_option") op.ClearField("engine") setattr(model, net, core.Net(net_def)) device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA device_option.cuda_gpu_id = 0 SetDeviceOption(train_model, device_option) # ## Running fine-tuning # # Now that we specified the fine-tuning model. Let's start the process using GPU. # In[5]: workspace.ResetWorkspace() # Initialization. train_dataset = DogsCatsDataset("train") for image, label in train_dataset.read(batch_size=1): workspace.FeedBlob("data", image, device_option=device_option) workspace.FeedBlob("label", label, device_option=device_option) break workspace.RunNetOnce(train_model.param_init_net) workspace.CreateNet(train_model.net, overwrite=True) # Main loop. batch_size = 50 print_freq = 50 losses = [] for epoch in range(5): for index, (image, label) in enumerate(train_dataset.read(batch_size)): workspace.FeedBlob("data", image, device_option=device_option) workspace.FeedBlob("label", label, device_option=device_option) workspace.RunNet(train_model.net) accuracy = float(workspace.FetchBlob("accuracy")) loss = workspace.FetchBlob("loss").mean() losses.append(loss) if index % print_freq == 0: print("[{}][{}/{}] loss={}, accuracy={}".format( epoch, index, int(len(train_dataset) / batch_size), loss, accuracy)) # ## Checking learning progress # # Let's plot the loss over iterations. Note this is very simple plotting. Perhaps it is better to use a monitoring tool such as [visdom](https://github.com/facebookresearch/visdom). # In[6]: import matplotlib.pyplot as plt get_ipython().run_line_magic('matplotlib', 'inline') plt.plot(losses) plt.xlabel("iterations") plt.ylabel("loss") plt.grid("on") # ## Deploy model # # Once fine-tuning is finished, we can apply the learned model to a new input. First instantiate a deploy model. # In[7]: deploy_model = model_helper.ModelHelper("deploy_net") AddPredictNet(deploy_model, PREDICT_NET) SetDeviceOption(deploy_model, device_option) # We can pre-process and feed a test image to the depoly model. The prediction result is given as probability in NxC format, where C is in `["dog", "cat"]` order. # In[8]: test_dataset = DogsCatsDataset("test1") image, _ = test_dataset[0] image = image[np.newaxis, :] workspace.FeedBlob("data", image, device_option=device_option) workspace.RunNetOnce(deploy_model.net) result = workspace.FetchBlob("softmax")[0] skimage.io.imshow(test_dataset.image_files[0]) print("dog={:g} cat={:g}".format(result[0], result[1]))