#!/usr/bin/env python # coding: utf-8 # # Multi-label classification # Previously, we classified things into 2 classes: dogs and cats. But what if our image belongs to multiple classes? # # The process is largely the same in terms of fast.ai, but there are a few differences. # # ## 1. Softmax --> Sigmoid # Instead of using a `softmax` activation function to evaluate our classes, we'll use a __sigmoid__ function. Softmax wants to pick one thing -- remember, the $e^{activation}$ means differences between numbers will be greatly accentuated. Sigmoid works better than softmax because it will normalize between 0-1, but it will still allow us to normalize towards multiple classes. # In[1]: # The sigmoid function aka logistic function # Sigmoid will be close to 0 when -ve, and close to 1 when +ve get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np def sigmoid(z): # Apply sigmoid activation function return 1/(1+np.exp(-z)) test_input = np.arange(-6, 6, 0.01) plt.plot(test_input, sigmoid(test_input), linewidth=2) plt.grid(1) # ## 2. Read from CSV # In our previous function, we were able to use a keras-style set up -- put each class in a folder with that class name. Since each image can belong to multiple classes, we'd have to copy each image to multiple folders, which isn't really practical or optimal. So instead we must read our data from a .csv file. # # ## Note on one-hot encoding # In single classification, our data might be one-hot encoded. Imagine we have a bunch of data like this: # In[2]: # The second row indicates which class our data is -- there is a 1 in the column that represents the data data = np.array([["Dog", "Cat", "Frog", "Bird"], [0, 0, 1, 0]]) data # Obviously it would be a lot more efficient to just enumerate the data: # In[6]: data = np.array([["Dog", "Cat", "Frog", "Bird"], [0, 1, 2, 3]]) data # So if our data is tagged with `2` we know it's a frog, etc. Note that even if our data is tagged like this, `PyTorch` will turn it into one-hot encoded vectors. # # More info: https://hackernoon.com/what-is-one-hot-encoding-why-and-when-do-you-have-to-use-it-e3c6186d008f # # Doing the training # In[3]: # First import our data get_ipython().run_line_magic('reload_ext', 'autoreload') get_ipython().run_line_magic('autoreload', '2') get_ipython().run_line_magic('matplotlib', 'inline') # In[4]: from fastai.conv_learner import * # In[5]: PATH = 'data/planet/' # In[6]: get_ipython().system('ls {PATH}') # In[7]: # Recall from previous lessons that we had our confusion matrix showing false negatives, false positives, etc. # f2 is a function defined by the contest to measure how good our function is. You can always define your own evaluation function. from planet import f2 metrics=[f2] f_model = resnet34 # In[8]: label_csv = f'{PATH}train_v2.csv' data_size = len(list(open(label_csv)))-1 # Generate some data to use as validation data by taking some random rows (20%) val_idxs = get_cv_idxs(data_size) # In[9]: # Set up our get_data function # This sets up our transforms and returns an image classifier which we can use to browse data, etc. # Notice this time we can set top-down transforms since we're using satellite data def get_data(image_size): transforms = tfms_from_model(f_model, image_size, aug_tfms=transforms_top_down, max_zoom=1.05) return ImageClassifierData.from_csv(PATH, "train-jpg", label_csv, tfms=transforms, suffix=".jpg", val_idxs=val_idxs, test_name="test-jpg") # In[10]: # Now set up our data using the function we just defined data = get_data(256) # Now we will explore our data some more. There are a few concepts from `PyTorch` that it's worth knowing about: # # * `Data set` (ds) = a single object (e.g., a single image) # * `Data loader` (dl) = a single mini-batch (e.g., of images) # In[11]: # So this line of code will get a single mini-batch, convert it to an iterator, and then get the next item from that iterator images,labels = next(iter(data.val_dl)) # In[12]: # We can see that we have a list of vectors with a 1 where they should have that classification label labels # In[13]: # Note too the shape # The batch size is 64, and we have 17 different labels labels.shape # In[14]: # Let's take a look at the first image # Note, because the image is just a matrix of numbers, we can *1.4 to make it brighter plt.imshow(data.val_ds.denorm(to_np(images))[0]*1.4); # In[15]: # And to see which labels this has list(zip(data.classes, labels[0])) # In[16]: # Start off training on small images image_size=64 # In[17]: data = get_data(image_size) # In[18]: # This goes through and resizes all of our images for efficiencies sake? data = data.resize(int(image_size*1.3), "tmp") # In[20]: # Now we want to find the learning rate # First set up our model learn = ConvLearner.pretrained(f_model, data, metrics=metrics) # In[47]: learning_rate_finder = learn.lr_find() learn.sched.plot() # In[22]: # Note that the learning rate is decreasing fastest at around 0.2 learning_rate = 0.2 # In[23]: # Do some training for the final layers # We now know that when we train, what we are doing is setting the kernel (image feature filter) values # and the numbers of the weight matrix in the fully connected layer learn.fit(learning_rate, 3, cycle_len=1, cycle_mult=2) # In[24]: # The satellite images are quite different from the image-net images that this network was trained on # Therefore, we use slightly higher learning rates than our cats/dogs examples to train the network differential_learning_rates = np.array([learning_rate/9, learning_rate/3, learning_rate]) # In[26]: # Unfreeze the previous layers so we can train them with our differential learning rates learn.unfreeze() learn.fit(differential_learning_rates, 3, cycle_len=1, cycle_mult=2) # In[27]: # Save the weights learn.save(f'{image_size}') # In[28]: # Take a look -- we can see the loss decreasing as we train learn.sched.plot_loss() # In[29]: # Now we train again with slightly larger images # Still not 100% sure what the reason for this is -- just simulating different data? image_size = 128 # In[30]: data = get_data(image_size) learn.set_data(data) learn.freeze() learn.fit(learning_rate, 3, cycle_len=1, cycle_mult=3) # In[31]: # Now tweak the initial layers again learn.unfreeze() learn.fit(differential_learning_rates, 3, cycle_len=1, cycle_mult=3) # In[32]: learn.save(f'{image_size}') # In[33]: # Now do it AGAIN, with even bigger image sizes image_size = 256 # In[35]: data = get_data(image_size) learn.set_data(data) learn.freeze() learn.fit(learning_rate, 3, cycle_len=1, cycle_mult=2) # In[36]: # (Interrupted the kernel here as it started running for 21 cycles for some reason. fasi.ai bug?) learn.unfreeze() learn.fit(differential_learning_rates, 3, cycle_len=3, cycle_mult=2) # In[37]: learn.save(f'{image_size}') # In[38]: # Finallly, get our predictions using test time augmentation # Reminder: TTA = Test Time Augmentation, applying transforms to the test data and taking an average multi_label_classification_predictions,target_values = learn.TTA() # In[39]: predictions = np.mean(multi_label_classification_predictions, 0) # In[44]: # Predictions contains the likelihood for each of our 17 classes predictions[:5] # In[45]: # Target values contains the actual correct classes for each image target_values[:5] # In[40]: f2(predictions, target_values) # # Note on structured/unstructured data # In the next lesson, we will look at processing structured and unstructed data. # # These are not 100% standardized terms, but generally we think of it as: # # 1. __Unstructured data__: Every "piece" of the data objects we're processing is the same. For example, in an image, everything is a pixel. # # 2. __Structured data__: The parts of the data objects are different. For example, if we are processing sales reports, one column could contain profit, another weather data, another quarterly dates, etc. # In[ ]: