image_id	0	1	2	3	4	5	6	7	8	...	32322	32323	32324	32325	32326	32327	32328	32329	32330	32331
0	Train_0	254	253	252	253	251	252	253	251	251	...	253	253	253	253	253	253	253	253	253	251
1	Train_1	251	244	238	245	248	246	246	247	251	...	255	255	255	255	255	255	255	255	255	254
2	Train_2	251	250	249	250	249	245	247	252	252	...	254	253	252	252	253	253	253	253	251	249
3	Train_3	247	247	249	253	253	252	251	251	250	...	254	254	254	254	254	253	253	252	251	252
4	Train_4	249	248	246	246	248	244	242	242	229	...	255	255	255	255	255	255	255	255	255	255

5 rows × 32333 columns

In [5]:

print('Train Image Data{} Shape is : {}'.format(0, train0.shape))

Train Image Data0 Shape is : (50210, 32333)

데이터가 크기 떄문에, 전체 데이터를 읽을 떄는 parquet으로 읽는것보다 feather형식으로 읽는 것이 30배 정도 더 빠릅니다.

하지만 일부의 데이터만 볼 것이기 때문에 parquet으로 읽어서 진행하겠습니다.

먼저 augmentation을 진행하기 앞서, Dataset을 만들것입니다.

사이즈는 137 * 236 입니다.

In [6]:

class BengaliDataset(Dataset):
    def __init__(self, df, img_height, img_width):
        self.df = df
        self.img_height = img_height
        self.img_width = img_width

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img = self.df.iloc[idx][0:].values.astype(np.uint8)
        img = img.reshape(self.img_height, self.img_width)
        img = 255 - img
        img = (img*(255.0/img.max())).astype(np.uint8)

        return img

In [7]:

HEIGHT = 137
WIDTH = 236

train0.set_index('image_id', inplace=True)
image = BengaliDataset(train0, img_height=HEIGHT, img_width=WIDTH)

Albumentation Tutorial¶

Original¶

In [8]:

nrow, ncol = 1, 5

fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))
axs = axs.flatten()

for i, ax in enumerate(axs):
    img = image[i]
    ax.imshow(img)
    ax.set_title(f'label: Original')
    ax.axis('off')
plt.tight_layout()

Blur¶

Blur the input image using a random-sized kernel.

임의크기의 커널을 사용하여 이미지를 흐리게 만듭니다.

In [9]:

import albumentations as A

aug = A.Blur(p=0.5)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: Blur Image')
    axs[n,1].axis('off')
    
plt.tight_layout()

Noise¶

Apply gaussian noise to the input image.

이미지에 noise를 더하여, 좀 더 robust한 결과를 만들도록 합니다.

In [10]:

import albumentations as A

aug = A.GaussNoise(var_limit=5. / 255., p=1.0)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: Gauss Noise')
    axs[n,1].axis('off')
    
plt.tight_layout()

Cut Out¶

Course Drop out of the square regions in the image.

이미지에서 Dropout을 적용한다고 생각하시면 될 꺼 같습니다.

In [11]:

import albumentations as A

aug = A.Cutout(num_holes=8,  max_h_size=20, max_w_size=20, p=1.0)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: Cut Out')
    axs[n,1].axis('off')
    
plt.tight_layout()

Brightness, Contrast¶

Randomly change brightness and contrast of the input image.

이미지의 밝기와 대비를 임의로 변경합니다.

In [12]:

import albumentations as A

aug = A.RandomBrightnessContrast(p=1.0)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: RandomBrightnessContrast')
    axs[n,1].axis('off')
    
plt.tight_layout()

Scale, Rotate¶

Randomly apply affine transforms: translate, scale and rotate the input

이미지의 크기나 회전을 임의로 변형시킵니다.

In [13]:

import albumentations as A

aug = A.ShiftScaleRotate(
                shift_limit=0.0625,
                scale_limit=0.1,
                rotate_limit=30,
                p=1.0)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: ShiftSclaeRotate')
    axs[n,1].axis('off')
    
plt.tight_layout()

Affine¶

Place a regular grid of points on the input and randomly move the neighbourhood of these point around via affine transformations.

이미지의 격자 내에서 점의 주변을 임의로 이동시킵니다.

In [14]:

import albumentations as A

aug = A.IAAPiecewiseAffine(p=1.0)

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(20, 10))

for n in range(nrow):
    img = image[n]
    aug_image = aug(image=img)['image']
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_image)
    axs[n,1].set_title(f'label: Affine')
    axs[n,1].axis('off')
    
plt.tight_layout()

AugMix¶

Alubumentiation을 기반으로 AugMix가 어떤것인지 알아보겠습니다.

먼저 아래의 사진 4장을 보면서, 대략적인 감을 잡으실 수 있을겁니다.

CutOut - 이미지의 임의의 부분을 제거

MixUp - 이미지 간 확률적으로 두 이미지를 섞습니다.

CutMix - CutOut + MixUp

AugMix - 위에서 소개한 Augmentation기법들을 섞습니다.

CIFAR-10 실험결과, AugMix는 다른 기법들보다 우수하게 성적을 냈다고 합니다.

Augmix: https://arxiv.org/abs/1912.02781

Official implementation: https://github.com/google-research/augmix

In [16]:

from albumentations import HorizontalFlip
from albumentations.core.transforms_interface import ImageOnlyTransform

class AugMix(ImageOnlyTransform):
    """Augmentations mix to Improve Robustness and Uncertainty.
    Args:
        image (np.ndarray): Raw input image of shape (h, w, c)
        severity (int): Severity of underlying augmentation operators.
        width (int): Width of augmentation chain
        depth (int): Depth of augmentation chain. -1 enables stochastic depth uniformly
          from [1, 3]
        alpha (float): Probability coefficient for Beta and Dirichlet distributions.
        augmentations (list of augmentations): Augmentations that need to mix and perform.
    Targets:
        image
    Image types:
        uint8, float32

    Reference:
    |  https://arxiv.org/abs/1912.02781
    |  https://github.com/google-research/augmix
    """

    def __init__(self, width=2, depth=2, alpha=0.5, augmentations=[HorizontalFlip()], always_apply=False, p=0.5):
        super(AugMix, self).__init__(always_apply, p)
        self.width = width
        self.depth = depth
        self.alpha = alpha
        self.augmentations = augmentations
        self.ws = np.float32(np.random.dirichlet([self.alpha] * self.width))
        self.m = np.float32(np.random.beta(self.alpha, self.alpha))

    def apply_op(self, image, op):
        image = op(image=image)["image"]
        return image

    def apply(self, img, **params):
        mix = np.zeros_like(img)
        for i in range(self.width):
            image_aug = img.copy()

            for _ in range(self.depth):
                op = np.random.choice(self.augmentations)
                image_aug = self.apply_op(image_aug, op)

            mix = np.add(mix, self.ws[i] * image_aug, out=mix, casting="unsafe")

        mixed = (1 - self.m) * img + self.m * mix
        if img.dtype in ["uint8", "uint16", "uint32", "uint64"]:
            mixed = np.clip((mixed), 0, 255).astype(np.uint8)
        return mixed

    def get_transform_init_args_names(self):
        return ("width", "depth", "alpha")

In [21]:

augs = [A.HorizontalFlip(always_apply=True),
        A.Blur(always_apply=True),
        A.ShiftScaleRotate(always_apply=True),
        A.GaussNoise(always_apply=True),
        A.Cutout(always_apply=True),
        A.IAAPiecewiseAffine(always_apply=True)]

transforms_train = albumentations.Compose([
    AugMix(width=3, depth=2, alpha=.4, p=1., augmentations=augs),
])

In [22]:

class Aug_BengaliDataset(Dataset):
    def __init__(self, df, img_height, img_width, transform=None):
        self.df = df
        self.img_height = img_height
        self.img_width = img_width
        self.transform = transform

    def __len__(self):
        return len(self.df)
    

    def __getitem__(self, idx):
        img = self.df.iloc[idx][0:].values.astype(np.uint8)
        img = img.reshape(self.img_height, self.img_width)
        img = 255 - img
        img = (img*(255.0/img.max())).astype(np.uint8)
        
        if self.transform is not None:
            res = self.transform(image=img)
            img = res['image']
        else:
            img = img

        return img

In [23]:

aug_image = Aug_BengaliDataset(train0, img_height=HEIGHT, img_width=WIDTH, transform=transforms_train)

In [24]:

nrow, ncol = 5, 2
fig, axs = plt.subplots(nrow, ncol, figsize=(15, 10))

for n in range(nrow):
    img = image[n]
    aug_img = aug_image[n]
    
    axs[n,0].imshow(img)
    axs[n,0].set_title(f'label: Original')
    axs[n,0].axis('off')
    
    axs[n,1].imshow(aug_img)
    axs[n,1].set_title(f'label: Augmix')
    axs[n,1].axis('off')
    
plt.tight_layout()
plt.show()

Table of Contents