%reload_ext autoreload %autoreload 2 from nb_005 import * import pandas as pd PATH = Path('data/faces/') get_image_files img_fns = get_image_files(PATH) len(img_fns) poses = pd.read_csv(PATH/'face_landmarks.csv') poses.head() pose_dict = {o[0]:o[1:].astype(np.float32) for o in poses.values} def scale_coords(x, size, to_unit=True): s = tensor([size[1]/2,size[0]/2])[None] return (x/s-1) if to_unit else ((x+1)*s) pose_pnts = [] for i, fname in enumerate(img_fns): size = open_image(fname).size coords = tensor(pose_dict[fname.name]).view(-1,2) pose_pnts.append(scale_coords(coords, size)) len(pose_pnts) pose_pnts[0].shape,pose_pnts[0][0] def show_pose(img, pnts, ax=None): if ax is None: _,ax = plt.subplots() img.show(ax, hide_axis=False) pnts = scale_coords(pnts, img.size, to_unit=False) ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r') img = open_image(img_fns[0]) show_pose(img, pose_pnts[0]) @dataclass class CoordTargetDataset(DatasetBase): x:List[Path]; y:List[List[float]] def __post_init__(self): assert len(self.x)==len(self.y) def __getitem__(self, i): return open_image(self.x[i]), self.y[i] img_ds = CoordTargetDataset(img_fns, pose_pnts) def affine_inv_mult(c,m, shape): "Applies the inverse affine transform described in m" size = c.size() _,h,w = shape m[0,1] *= h/w m[1,0] *= w/h c = c.view(-1,2) a = torch.inverse(m[:2,:2].t()) c = torch.addmm(-torch.mv(a,m[:2,2]), c, a) return c.view(size) def apply_pad_coords(c, padding_mode='reflect', **kwargs): "Apply the padding mode to the coords" if padding_mode=='zeros' or padding_mode=='border': mask = (c[:,0] >=-1) * (c[:,0] <=1) * (c[:,1] >=-1) * (c[:,1] <=1) return c[mask] elif padding_mode=='reflect': c[c < -1] = -2 - c[c < -1] c[c > 1] = 2 - c[c > 1] return c #export class ImagePoints(): def __init__(self, pts, shape): self._flow=pts self._affine_mat=None self.sample_kwargs = {} self._shape = shape @property def shape(self): return self._shape def __repr__(self): return f'{self.__class__.__name__} ({self.shape})' @property def px(self): return self.flow @px.setter def px(self,v): self._px=v @property def flow(self): if self._flow is None: self._flow = affine_grid(self.shape) if self._affine_mat is not None: self._flow = affine_inv_mult(self._flow,self._affine_mat,self._shape) self._flow = apply_pad_coords(self._flow, **self.sample_kwargs) self._affine_mat = None return self._flow @flow.setter def flow(self,v): self._flow=v def lighting(self, func, *args, **kwargs): return self #TODO: special func for points def pixel(self, func, *args, **kwargs): self.px = func(self.px, *args, **kwargs) return self def coord(self, func, *args, **kwargs): self.flow = func(self.flow, self.shape, *args, invert=True, **kwargs) return self def affine(self, func, *args, **kwargs): m = func(*args, **kwargs) self.affine_mat = self.affine_mat @ self._flow.new(m) return self def set_sample(self, **kwargs): self.sample_kwargs = kwargs return self def resize(self, size): assert self._flow is None if isinstance(size, int): size=(self.shape[0], size, size) self.flow = affine_grid(size) return self @property def affine_mat(self): if self._affine_mat is None: self._affine_mat = self._flow.new(torch.eye(3)) return self._affine_mat @affine_mat.setter def affine_mat(self,v): self._affine_mat=v def clone(self): return self.__class__(self.flow.clone(), self.shape) def apply_tfms(tfms, x, do_resolve=True, xtra=None, size=None, is_coord=False, x_sz=None, **kwargs): if not tfms: return x if not xtra: xtra={} tfms = sorted(listify(tfms), key=lambda o: o.tfm.order) if do_resolve: resolve_tfms(tfms) x = Image(x.clone()) if not is_coord else ImagePoints(x.clone(), x_sz) if kwargs: x.set_sample(**kwargs) if size: x.resize(size) for tfm in tfms: if tfm.tfm in xtra: x = tfm(x, **xtra[tfm.tfm]) else: x = tfm(x) return x.px TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox') class DatasetTfm(Dataset): def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs): self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs def __len__(self): return len(self.ds) def __getattr__(self, k): return getattr(self.ds, k) def __getitem__(self,idx): if isinstance(idx, tuple): idx,xtra = idx else: xtra={} x,y = self.ds[idx] if self.tfm_y == TfmY.Coord: x_sz = x.size() x = apply_tfms(self.tfms, x, **{**self.kwargs, **xtra}) if self.tfm_y == TfmY.Coord: y = apply_tfms(self.tfms, y, do_resolve=False, is_coord=True, x_sz=x_sz, **{**self.kwargs, **xtra}) return x, y tfms = [rotate(degrees=(-30,30.))] tfm_ds = DatasetTfm(img_ds, tfms, TfmY.Coord, padding_mode='zeros') def show_pose(img, pnts, ax=None): if ax is None: _,ax = plt.subplots() ax.imshow(img.numpy().transpose(1,2,0)) w,h = img.size(2),img.size(1) if len(pnts) != 0: pnts = (pnts.numpy() + 1) * np.array([w/2,h/2])[None] ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r') _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) tfms = [zoom(scale=(1,2),row_pct=(0,1),col_pct=(0,1))] tfm_ds = DatasetTfm(img_ds, tfms, TfmY.Coord, padding_mode='reflect') _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) def apply_pad_coords(c, padding_mode): "Apply the padding mode to the coords" if padding_mode=='zeros' or padding_mode=='border': mask = (c[:,0] >=-1) * (c[:,0] <=1) * (c[:,1] >=-1) * (c[:,1] <=1) return c[mask] elif padding_mode=='reflect': c[c < -1] = -2 - c[c < -1] c[c > 1] = 2 - c[c > 1] return c def affine_inv_mult(c,m): "Applies the inverse affine transform described in m" size = c.size() c = c.view(-1,2) a = torch.inverse(m[:2,:2].t()) c = torch.addmm(-torch.mv(a,m[:2,2]), c, a) return c.view(size) @dataclass class TargetCoords(): coords:Tensor img_size:torch.Size def size(self, int=None) -> Union[int,torch.Size]: if int is None: return self.img_size else: return self.img_size[int] def clone(self): return TargetCoords(self.coords.clone(), self.img_size) tc = TargetCoords(pose_pnts[0], open_image(img_fns[0]).size()) tc.size(), tc.size(1) TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox') class DatasetTfm(Dataset): def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs): self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs def __len__(self): return len(self.ds) def __getattr__(self, k): return getattr(self.ds, k) def __getitem__(self,idx): if isinstance(idx, tuple): idx,xtra = idx else: xtra={} x,y = self.ds[idx] if self.tfm_y == TfmY.Coord: y = TargetCoords(y, x.size()) x = apply_tfms(self.tfms, x, **{**self.kwargs, **xtra}) if self.tfm_y == TfmY.Coord: y = apply_tfms(self.tfms, y, do_resolve=False, is_coord=coord, x_sz=x.size(), **{**self.kwargs, **xtra}) return apply_tfms(self.tfms, x, **{**self.kwargs, **xtra}), y TfmY = IntEnum('TfmY', 'No Pixel Mask Coord BBox') class TfmDataset(Dataset): def __init__(self, ds:Dataset, tfms:Collection[Callable]=None, tfm_y:TfmY=TfmY.No, **kwargs): self.ds,self.tfms,self.tfm_y,self.kwargs = ds,tfms,tfm_y,kwargs def __len__(self): return len(self.ds) def __getitem__(self,idx): x,y = self.ds[idx] if self.tfms is not None: tfm = apply_tfms(self.tfms) if self.tfm_y == TfmY.Coord: y = TargetCoords(y, x.size()) x = apply_tfms(self.tfms, x, **self.kwargs) if self.tfm_y != TfmY.No and y is not None: coord,seg = (self.tfm_y == TfmY.Coord),(self.tfm_y == TfmY.Mask) y = tfm(y, segmentation=seg, is_coord=coord, **self.kwargs).coords return x,y def get_resize_target(img_sz, crop_target, do_crop=False): if crop_target is None: return None ch,r,c = img_sz target_r,target_c = crop_target ratio = (min if do_crop else max)(r/target_r, c/target_c) return ch,round(r/ratio),round(c/ratio) def _apply_affine(img, size=None, padding_mode='reflect', do_crop=False, aspect=None, mult=32, mats=None, func=None, crop_func=None, is_coord=False, **kwargs): if size is not None and not is_listy(size): size = listify(size,2) if aspect is None else get_crop_target(size, aspect, mult) if (not mats) and func is None and size is None: return img resize_target = get_resize_target(img.size(), size, do_crop=do_crop) c = img.coords if is_coord else affine_grid(img, torch.eye(3), size=resize_target) if func is not None: c = func(c, img.size(), invert=is_coord) if mats: m = affines_mat(mats) c = affine_mult(c, img.new_tensor(m)) if not is_coord else affine_inv_mult(c, img.coords.new_tensor(m)) if is_coord: if resize_target is None: resize_target = img.size() res = TargetCoords(apply_pad_coords(c, padding_mode=padding_mode), torch.Size(resize_target)) else: res = grid_sample(img, c, padding_mode=padding_mode, **kwargs) if padding_mode=='zeros': padding_mode='constant' if crop_func is not None: res = crop_func(res, size=size, padding_mode=padding_mode, is_coord=is_coord) return res def apply_affine(mats=None, func=None, crop_func=None): return partial(_apply_affine, mats=mats, func=func, crop_func=crop_func) nb_003a.apply_affine = apply_affine tfms = [rotate(degrees=(-30,30.))] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros') def show_pose(img, pnts, ax=None): if ax is None: _,ax = plt.subplots() ax.imshow(img.numpy().transpose(1,2,0)) w,h = img.size(2),img.size(1) if len(pnts) != 0: pnts = (pnts.numpy() + 1) * np.array([w/2,h/2])[None] ax.scatter(pnts[:, 0], pnts[:, 1], s=10, marker='.', c='r') _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) tfms = [zoom_tfm(scale=(1,2),row_pct=(0,1),col_pct=(0,1))] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', size=100, do_crop=True) _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) def _apply_tfm_func(pixel_func,lighting_func,affine_func,start_func, x, segmentation=False, is_coord=False, **kwargs): if not np.any([pixel_func,lighting_func,affine_func,start_func]): return x x = x.clone() if start_func is not None: x = start_func(x, is_coord=is_coord) if affine_func is not None: x = affine_func(x, is_coord=is_coord, **kwargs) if lighting_func is not None and not segmentation and not is_coord: x = lighting_func(x) if pixel_func is not None: x = pixel_func(x, is_coord=is_coord) return x def apply_tfms(tfms): resolve_tfms(tfms) grouped_tfms = dict_groupby(listify(tfms), lambda o: o.tfm_type) start_tfms,affine_tfms,coord_tfms,pixel_tfms,lighting_tfms,crop_tfms = [ (grouped_tfms.get(o)) for o in TfmType] lighting_func = apply_lighting(compose(lighting_tfms)) mats = [o() for o in listify(affine_tfms)] affine_func = apply_affine(mats, func=compose(coord_tfms), crop_func=compose(crop_tfms)) return partial(_apply_tfm_func, compose(pixel_tfms),lighting_func,affine_func,compose(start_tfms)) nb_003a.apply_tfms = apply_tfms def pad_coord(x:TargetCoords, row_pad:int, col_pad:int): c,h,w = x.size() pad = torch.Tensor([w/(w + 2*col_pad), h/(h + 2*row_pad)]) new_sz = torch.Size([c, h+2*row_pad, w+2*col_pad]) return TargetCoords(x.coords.mul_(pad[None]), new_sz) @reg_transform def pad(x, padding, mode='reflect', is_coord=False) -> TfmType.Start: if is_coord: return pad_coord(x, padding, padding) else: return F.pad(x[None], (padding,)*4, mode=mode)[0] tfms = [pad_tfm(padding=100)] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord) _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) def crop_coords(x:TargetCoords, row:int, col:int, rows:int, cols:int): ch,h,w = x.size() c = x.coords c.mul_(torch.Tensor([w/cols,h/rows])[None]) c.add_(-1 + torch.Tensor([w/cols-2*col/cols,h/rows-2*row/rows])[None]) c = apply_pad_coords(c, padding_mode='zeros') new_sz = torch.Size([ch, rows, cols]) return TargetCoords(c, new_sz) @reg_transform def crop(x, size, is_coord=False, row_pct:uniform=0.5, col_pct:uniform=0.5) -> TfmType.Pixel: size = listify(size,2) rows,cols = size row = int((x.size(1)-rows+1) * row_pct) col = int((x.size(2)-cols+1) * col_pct) if is_coord: return crop_coords(x,row,col,rows,cols) else: return x[:, row:row+rows, col:col+cols].contiguous() tfms = [crop_tfm(size=200,row_pct=(0,1),col_pct=(0,1))] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord) _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) @reg_transform def crop_pad(x, size, padding_mode='reflect', is_coord=False, row_pct:uniform = 0.5, col_pct:uniform = 0.5) -> TfmType.Crop: size = listify(size,2) rows,cols = size if x.size(1) TfmType.Coord: orig_pts = [[-1,-1], [-1,1], [1,-1], [1,1]] if direction == 0: targ_pts = [[-1,-1], [-1,1], [1,-1-magnitude], [1,1+magnitude]] elif direction == 1: targ_pts = [[-1,-1-magnitude], [-1,1+magnitude], [1,-1], [1,1]] elif direction == 2: targ_pts = [[-1,-1], [-1-magnitude,1], [1,-1], [1+magnitude,1]] elif direction == 3: targ_pts = [[-1-magnitude,-1], [-1,1], [1+magnitude,-1], [1,1]] coeffs = find_coeffs(targ_pts, orig_pts) if invert else find_coeffs(orig_pts, targ_pts) return apply_perspective(c, coeffs) tfms = [tilt_tfm(direction=(0,3), magnitude=(-0.4,0.4))] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', do_crop=True, size=100) _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax) def flip_lr_coord(x): x.coords[:,0] = -x.coords[:,0] return x @reg_transform def flip_lr(x, is_coord=False) -> TfmType.Pixel: if is_coord: return flip_lr_coord(x) else: return x.flip(2) tfms = [flip_lr_tfm(p=0.5)] tfm_ds = TfmDataset(img_ds, tfms, TfmY.Coord, padding_mode='zeros', do_crop=True, size=100) _,axs = plt.subplots(1,4, figsize=(12,6)) for i,ax in enumerate(axs): show_pose(*tfm_ds[0], ax=ax)