import pandas as pd
import numpy as np
import os
import cv2
import random
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset
import albumentations
# download data: https://www.kaggle.com/c/bengaliai-cv19/data
data_dir = '../input/'
files_train =[f'train_image_data_{fid}.parquet' for fid in range(4)]
print(files_train)
['train_image_data_0.parquet', 'train_image_data_1.parquet', 'train_image_data_2.parquet', 'train_image_data_3.parquet']
F = os.path.join(data_dir, files_train[0])
train0 = pd.read_parquet(F)
train0.head()
image_id | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | ... | 32322 | 32323 | 32324 | 32325 | 32326 | 32327 | 32328 | 32329 | 32330 | 32331 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Train_0 | 254 | 253 | 252 | 253 | 251 | 252 | 253 | 251 | 251 | ... | 253 | 253 | 253 | 253 | 253 | 253 | 253 | 253 | 253 | 251 |
1 | Train_1 | 251 | 244 | 238 | 245 | 248 | 246 | 246 | 247 | 251 | ... | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 254 |
2 | Train_2 | 251 | 250 | 249 | 250 | 249 | 245 | 247 | 252 | 252 | ... | 254 | 253 | 252 | 252 | 253 | 253 | 253 | 253 | 251 | 249 |
3 | Train_3 | 247 | 247 | 249 | 253 | 253 | 252 | 251 | 251 | 250 | ... | 254 | 254 | 254 | 254 | 254 | 253 | 253 | 252 | 251 | 252 |
4 | Train_4 | 249 | 248 | 246 | 246 | 248 | 244 | 242 | 242 | 229 | ... | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 255 | 255 |
5 rows × 32333 columns
print('Train Image Data{} Shape is : {}'.format(0, train0.shape))
Train Image Data0 Shape is : (50210, 32333)
데이터가 크기 떄문에, 전체 데이터를 읽을 떄는 parquet으로 읽는것보다 feather형식으로 읽는 것이 30배 정도 더 빠릅니다.
하지만 일부의 데이터만 볼 것이기 때문에 parquet으로 읽어서 진행하겠습니다.
먼저 augmentation을 진행하기 앞서, Dataset을 만들것입니다.
사이즈는 137 * 236
입니다.
class BengaliDataset(Dataset):
def __init__(self, df, img_height, img_width):
self.df = df
self.img_height = img_height
self.img_width = img_width
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
img = self.df.iloc[idx][0:].values.astype(np.uint8)
img = img.reshape(self.img_height, self.img_width)
img = 255 - img
img = (img*(255.0/img.max())).astype(np.uint8)
return img
HEIGHT = 137
WIDTH = 236
train0.set_index('image_id', inplace=True)
image = BengaliDataset(train0, img_height=HEIGHT, img_width=WIDTH)
nrow, ncol = 1, 5
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
axs = axs.flatten()
for i, ax in enumerate(axs):
img = image[i]
ax.imshow(img)
ax.set_title(f'label: Original')
ax.axis('off')
plt.tight_layout()
Blur the input image using a random-sized kernel.
임의크기의 커널을 사용하여 이미지를 흐리게 만듭니다.
import albumentations as A
aug = A.Blur(p=0.5)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: Blur Image')
axs[n,1].axis('off')
plt.tight_layout()
Apply gaussian noise to the input image.
이미지에 noise를 더하여, 좀 더 robust한 결과를 만들도록 합니다.
import albumentations as A
aug = A.GaussNoise(var_limit=5. / 255., p=1.0)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: Gauss Noise')
axs[n,1].axis('off')
plt.tight_layout()
Course Drop out of the square regions in the image.
이미지에서 Dropout을 적용한다고 생각하시면 될 꺼 같습니다.
import albumentations as A
aug = A.Cutout(num_holes=8, max_h_size=20, max_w_size=20, p=1.0)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: Cut Out')
axs[n,1].axis('off')
plt.tight_layout()
Randomly change brightness and contrast of the input image.
이미지의 밝기와 대비를 임의로 변경합니다.
import albumentations as A
aug = A.RandomBrightnessContrast(p=1.0)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: RandomBrightnessContrast')
axs[n,1].axis('off')
plt.tight_layout()
Randomly apply affine transforms: translate, scale and rotate the input
이미지의 크기나 회전을 임의로 변형시킵니다.
import albumentations as A
aug = A.ShiftScaleRotate(
shift_limit=0.0625,
scale_limit=0.1,
rotate_limit=30,
p=1.0)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: ShiftSclaeRotate')
axs[n,1].axis('off')
plt.tight_layout()
Place a regular grid of points on the input and randomly move the neighbourhood of these point around via affine transformations.
이미지의 격자 내에서 점의 주변을 임의로 이동시킵니다.
import albumentations as A
aug = A.IAAPiecewiseAffine(p=1.0)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
for n in range(nrow):
img = image[n]
aug_image = aug(image=img)['image']
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_image)
axs[n,1].set_title(f'label: Affine')
axs[n,1].axis('off')
plt.tight_layout()
Alubumentiation을 기반으로 AugMix가 어떤것인지 알아보겠습니다.
먼저 아래의 사진 4장을 보면서, 대략적인 감을 잡으실 수 있을겁니다.
CutOut - 이미지의 임의의 부분을 제거
MixUp - 이미지 간 확률적으로 두 이미지를 섞습니다.
CutMix - CutOut + MixUp
AugMix - 위에서 소개한 Augmentation기법들을 섞습니다.
CIFAR-10 실험결과, AugMix는 다른 기법들보다 우수하게 성적을 냈다고 합니다.
Augmix: https://arxiv.org/abs/1912.02781
Official implementation: https://github.com/google-research/augmix
from albumentations import HorizontalFlip
from albumentations.core.transforms_interface import ImageOnlyTransform
class AugMix(ImageOnlyTransform):
"""Augmentations mix to Improve Robustness and Uncertainty.
Args:
image (np.ndarray): Raw input image of shape (h, w, c)
severity (int): Severity of underlying augmentation operators.
width (int): Width of augmentation chain
depth (int): Depth of augmentation chain. -1 enables stochastic depth uniformly
from [1, 3]
alpha (float): Probability coefficient for Beta and Dirichlet distributions.
augmentations (list of augmentations): Augmentations that need to mix and perform.
Targets:
image
Image types:
uint8, float32
Reference:
| https://arxiv.org/abs/1912.02781
| https://github.com/google-research/augmix
"""
def __init__(self, width=2, depth=2, alpha=0.5, augmentations=[HorizontalFlip()], always_apply=False, p=0.5):
super(AugMix, self).__init__(always_apply, p)
self.width = width
self.depth = depth
self.alpha = alpha
self.augmentations = augmentations
self.ws = np.float32(np.random.dirichlet([self.alpha] * self.width))
self.m = np.float32(np.random.beta(self.alpha, self.alpha))
def apply_op(self, image, op):
image = op(image=image)["image"]
return image
def apply(self, img, **params):
mix = np.zeros_like(img)
for i in range(self.width):
image_aug = img.copy()
for _ in range(self.depth):
op = np.random.choice(self.augmentations)
image_aug = self.apply_op(image_aug, op)
mix = np.add(mix, self.ws[i] * image_aug, out=mix, casting="unsafe")
mixed = (1 - self.m) * img + self.m * mix
if img.dtype in ["uint8", "uint16", "uint32", "uint64"]:
mixed = np.clip((mixed), 0, 255).astype(np.uint8)
return mixed
def get_transform_init_args_names(self):
return ("width", "depth", "alpha")
augs = [A.HorizontalFlip(always_apply=True),
A.Blur(always_apply=True),
A.ShiftScaleRotate(always_apply=True),
A.GaussNoise(always_apply=True),
A.Cutout(always_apply=True),
A.IAAPiecewiseAffine(always_apply=True)]
transforms_train = albumentations.Compose([
AugMix(width=3, depth=2, alpha=.4, p=1., augmentations=augs),
])
class Aug_BengaliDataset(Dataset):
def __init__(self, df, img_height, img_width, transform=None):
self.df = df
self.img_height = img_height
self.img_width = img_width
self.transform = transform
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
img = self.df.iloc[idx][0:].values.astype(np.uint8)
img = img.reshape(self.img_height, self.img_width)
img = 255 - img
img = (img*(255.0/img.max())).astype(np.uint8)
if self.transform is not None:
res = self.transform(image=img)
img = res['image']
else:
img = img
return img
aug_image = Aug_BengaliDataset(train0, img_height=HEIGHT, img_width=WIDTH, transform=transforms_train)
nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(15, 10))
for n in range(nrow):
img = image[n]
aug_img = aug_image[n]
axs[n,0].imshow(img)
axs[n,0].set_title(f'label: Original')
axs[n,0].axis('off')
axs[n,1].imshow(aug_img)
axs[n,1].set_title(f'label: Augmix')
axs[n,1].axis('off')
plt.tight_layout()
plt.show()