Source code for doctr.transforms.modules.base

# Copyright (C) 2021-2023, Mindee.

# This program is licensed under the Apache License 2.0.
# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.

import math
import random
from typing import Any, Callable, Dict, List, Tuple

import numpy as np

from doctr.utils.repr import NestedObject

from .. import functional as F

__all__ = ["SampleCompose", "ImageTransform", "ColorInversion", "OneOf", "RandomApply", "RandomRotate", "RandomCrop"]


class SampleCompose(NestedObject):
    """Implements a wrapper that will apply transformations sequentially on both image and target

    .. tabs::

        .. tab:: TensorFlow

            .. code:: python

                >>> import numpy as np
                >>> import tensorflow as tf
                >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
                >>> transfo = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
                >>> out, out_boxes = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), np.zeros((2, 4)))

        .. tab:: PyTorch

            .. code:: python

                >>> import numpy as np
                >>> import torch
                >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate
                >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)])
                >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4)))

    Args:
        transforms: list of transformation modules
    """

    _children_names: List[str] = ["sample_transforms"]

    def __init__(self, transforms: List[Callable[[Any, Any], Tuple[Any, Any]]]) -> None:
        self.sample_transforms = transforms

    def __call__(self, x: Any, target: Any) -> Tuple[Any, Any]:
        for t in self.sample_transforms:
            x, target = t(x, target)

        return x, target


class ImageTransform(NestedObject):
    """Implements a transform wrapper to turn an image-only transformation into an image+target transform

    .. tabs::

        .. tab:: TensorFlow

            .. code:: python

                >>> import tensorflow as tf
                >>> from doctr.transforms import ImageTransform, ColorInversion
                >>> transfo = ImageTransform(ColorInversion((32, 32)))
                >>> out, _ = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1), None)

        .. tab:: PyTorch

            .. code:: python

                >>> import torch
                >>> from doctr.transforms import ImageTransform, ColorInversion
                >>> transfo = ImageTransform(ColorInversion((32, 32)))
                >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None)

    Args:
        transform: the image transformation module to wrap
    """

    _children_names: List[str] = ["img_transform"]

    def __init__(self, transform: Callable[[Any], Any]) -> None:
        self.img_transform = transform

    def __call__(self, img: Any, target: Any) -> Tuple[Any, Any]:
        img = self.img_transform(img)
        return img, target


[docs]class ColorInversion(NestedObject): """Applies the following tranformation to a tensor (image or batch of images): convert to grayscale, colorize (shift 0-values randomly), and then invert colors .. tabs:: .. tab:: TensorFlow .. code:: python >>> import tensorflow as tf >>> from doctr.transforms import ColorInversion >>> transfo = ColorInversion(min_val=0.6) >>> out = transfo(tf.random.uniform(shape=[8, 64, 64, 3], minval=0, maxval=1)) .. tab:: PyTorch .. code:: python >>> import torch >>> from doctr.transforms import ColorInversion >>> transfo = ColorInversion(min_val=0.6) >>> out = transfo(torch.rand(8, 64, 64, 3)) Args: min_val: range [min_val, 1] to colorize RGB pixels """ def __init__(self, min_val: float = 0.5) -> None: self.min_val = min_val def extra_repr(self) -> str: return f"min_val={self.min_val}" def __call__(self, img: Any) -> Any: return F.invert_colors(img, self.min_val)
[docs]class OneOf(NestedObject): """Randomly apply one of the input transformations .. tabs:: .. tab:: TensorFlow .. code:: python >>> import tensorflow as tf >>> from doctr.transforms import OneOf >>> transfo = OneOf([JpegQuality(), Gamma()]) >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) .. tab:: PyTorch .. code:: python >>> import torch >>> from doctr.transforms import OneOf >>> transfo = OneOf([JpegQuality(), Gamma()]) >>> out = transfo(torch.rand(1, 64, 64, 3)) Args: transforms: list of transformations, one only will be picked """ _children_names: List[str] = ["transforms"] def __init__(self, transforms: List[Callable[[Any], Any]]) -> None: self.transforms = transforms def __call__(self, img: Any) -> Any: # Pick transformation transfo = self.transforms[int(random.random() * len(self.transforms))] # Apply return transfo(img)
[docs]class RandomApply(NestedObject): """Apply with a probability p the input transformation .. tabs:: .. tab:: TensorFlow .. code:: python >>> import tensorflow as tf >>> from doctr.transforms import RandomApply >>> transfo = RandomApply(Gamma(), p=.5) >>> out = transfo(tf.random.uniform(shape=[64, 64, 3], minval=0, maxval=1)) .. tab:: PyTorch .. code:: python >>> import torch >>> from doctr.transforms import RandomApply >>> transfo = RandomApply(Gamma(), p=.5) >>> out = transfo(torch.rand(1, 64, 64, 3)) Args: transform: transformation to apply p: probability to apply """ def __init__(self, transform: Callable[[Any], Any], p: float = 0.5) -> None: self.transform = transform self.p = p def extra_repr(self) -> str: return f"transform={self.transform}, p={self.p}" def __call__(self, img: Any) -> Any: if random.random() < self.p: return self.transform(img) return img
[docs]class RandomRotate(NestedObject): """Randomly rotate a tensor image and its boxes .. image:: https://doctr-static.mindee.com/models?id=v0.4.0/rotation_illustration.png&src=0 :align: center Args: max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in [-max_angle, max_angle] expand: whether the image should be padded before the rotation """ def __init__(self, max_angle: float = 5.0, expand: bool = False) -> None: self.max_angle = max_angle self.expand = expand def extra_repr(self) -> str: return f"max_angle={self.max_angle}, expand={self.expand}" def __call__(self, img: Any, target: np.ndarray) -> Tuple[Any, np.ndarray]: angle = random.uniform(-self.max_angle, self.max_angle) r_img, r_polys = F.rotate_sample(img, target, angle, self.expand) # Removes deleted boxes is_kept = (r_polys.max(1) > r_polys.min(1)).sum(1) == 2 return r_img, r_polys[is_kept]
[docs]class RandomCrop(NestedObject): """Randomly crop a tensor image and its boxes Args: scale: tuple of floats, relative (min_area, max_area) of the crop ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w """ def __init__(self, scale: Tuple[float, float] = (0.08, 1.0), ratio: Tuple[float, float] = (0.75, 1.33)) -> None: self.scale = scale self.ratio = ratio def extra_repr(self) -> str: return f"scale={self.scale}, ratio={self.ratio}" def __call__(self, img: Any, target: Dict[str, np.ndarray]) -> Tuple[Any, Dict[str, np.ndarray]]: scale = random.uniform(self.scale[0], self.scale[1]) ratio = random.uniform(self.ratio[0], self.ratio[1]) # Those might overflow crop_h = math.sqrt(scale * ratio) crop_w = math.sqrt(scale / ratio) xmin, ymin = random.uniform(0, 1 - crop_w), random.uniform(0, 1 - crop_h) xmax, ymax = xmin + crop_w, ymin + crop_h # Clip them xmin, ymin = max(xmin, 0), max(ymin, 0) xmax, ymax = min(xmax, 1), min(ymax, 1) croped_img, crop_boxes = F.crop_detection(img, target["boxes"], (xmin, ymin, xmax, ymax)) return croped_img, dict(boxes=crop_boxes)