RLE-Foundation · Heodel · Nov 23, 2023 · Nov 23, 2023
diff --git a/rllte/xplore/augmentation/cyc_aug.py b/rllte/xplore/augmentation/cyc_aug.py
@@ -0,0 +1,65 @@
+# =============================================================================
+# MIT License
+
+# Copyright (c) 2023 Reinforcement Learning Evolution Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# =============================================================================
+
+
+import torch as th
+import torch.nn.functional as F
+from rllte.common.prototype import BaseAugmentation
+from .pad_crop import PadCrop
+from .pad_resize import PadResizePlus
+
+class PeriodicPadCropResize(BaseAugmentation):
+    """
+    Periodically applies PadCrop and PadResizePlus transformations to images.
+
+    Args:
+        pad (int): The padding size.
+        highest_pad_strength (int): The maximum strength of padding.
+        T (int): Number of cycles to apply the transformations. Each cycle consists of one application of PadCrop followed by one application of PadResizePlus.
+
+
+    """
+
+    def __init__(self, pad: int, highest_pad_strength: int,T: int) -> None:
+
+        super().__init__()
+        self.pad_crop = PadCrop(pad)
+        self.pad_resize_plus = PadResizePlus(highest_pad_strength)
+        self.T = T
+
+    def forward(self, x: th.Tensor) -> th.Tensor:
+        """
+        Applies the PadCrop and PadResizePlus transformations periodically to the input images.
+
+        Args:
+            x (th.Tensor): The input images to be transformed.
+
+        Returns:
+            th.Tensor: The transformed images after applying the periodic transformations.
+        """
+        for _ in range(self.T):
+            x = self.pad_crop(x)
+            x = self.pad_resize_plus(x)
+        return x
+
diff --git a/rllte/xplore/augmentation/pad_crop.py b/rllte/xplore/augmentation/pad_crop.py
@@ -0,0 +1,79 @@
+# =============================================================================
+# MIT License
+
+# Copyright (c) 2023 Reinforcement Learning Evolution Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# =============================================================================
+
+
+import torch as th
+import torch.nn.functional as F
+from rllte.common.prototype import BaseAugmentation
+
+class PadCrop(BaseAugmentation):
+    """
+    Random shift operation for processing image-based observations.
+
+    Args:
+        pad (int): Padding size to apply before shifting.
+
+    Returns:
+        Augmented images with random shifts applied.
+    """
+
+    def __init__(self, pad: int) -> None:
+        """
+        Initializes the PadCrop with specified padding.
+
+        Args:
+            pad (int): The padding size.
+        """
+        super().__init__()
+        self.pad = pad
+
+    def forward(self, x: th.Tensor) -> th.Tensor:
+        """
+        Applies random shifts to the input images.
+
+        Args:
+            x (th.Tensor): Input images.
+
+        Returns:
+            th.Tensor: Shifted images.
+        """
+        n, c, h, w = x.size()
+        assert h == w, "Height and width must be equal."
+        padding = tuple([self.pad] * 4)
+        x = F.pad(x, padding, 'replicate')
+
+        # Compute the grid for shifting
+        eps = 1.0 / (h + 2 * self.pad)
+        arange = th.linspace(-1.0 + eps, 1.0 - eps, h + 2 * self.pad, device=x.device, dtype=x.dtype)[:h]
+        arange = arange.unsqueeze(0).repeat(h, 1).unsqueeze(2)
+        base_grid = th.cat([arange, arange.transpose(1, 0)], dim=2)
+        base_grid = base_grid.unsqueeze(0).repeat(n, 1, 1, 1)
+
+        # Calculate random shifts
+        shift = th.randint(0, 2 * self.pad + 1, size=(n, 1, 1, 2), device=x.device, dtype=x.dtype)
+        shift *= 2.0 / (h + 2 * self.pad)
+
+        # Apply shifts to the grid and perform grid sampling
+        grid = base_grid + shift
+        return F.grid_sample(x, grid, padding_mode='zeros', align_corners=False)
diff --git a/rllte/xplore/augmentation/pad_resize.py b/rllte/xplore/augmentation/pad_resize.py
@@ -0,0 +1,111 @@
+# =============================================================================
+# MIT License
+
+# Copyright (c) 2023 Reinforcement Learning Evolution Foundation
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+# =============================================================================
+
+
+import torch as th
+import torch.nn.functional as F
+import torchvision.transforms as T
+from rllte.common.prototype import BaseAugmentation
+
+
+
+class PadResizePlus(BaseAugmentation):
+    """
+    Pad and resize operation for processing image-based observations.
+
+    This class pads the images randomly and then crops them back to their 
+    original size, followed by resizing.
+
+    Args:
+        highest_pad_strength (int): The maximum strength of padding.
+    """
+
+    def __init__(self, highest_pad_strength: int) -> None:
+        """
+        Initializes the PadResizePlus with the highest padding strength.
+
+        Args:
+            highest_pad_strength (int): The maximum strength of padding.
+        """
+        super().__init__()
+        self.highest_pad_strength = highest_pad_strength
+
+    def crop(self, imgs: th.Tensor, pad_x: int, pad_y: int) -> th.Tensor:
+        """
+        Crops the padded images.
+
+        Args:
+            imgs (th.Tensor): Padded images.
+            pad_x (int): Padding along the width.
+            pad_y (int): Padding along the height.
+
+        Returns:
+            th.Tensor: Cropped images.
+        """
+        n, c, h_pad, w_pad = imgs.size()
+
+        # Calculate the crop size
+        crop_x = w_pad - pad_x
+        crop_y = h_pad - pad_y
+
+        # Create a grid for cropping
+        eps_x = 1.0 / w_pad
+        eps_y = 1.0 / h_pad
+        x_range = th.linspace(-1.0 + eps_x, 1.0 - eps_x, w_pad, device=imgs.device, dtype=imgs.dtype)[:crop_x]
+        y_range = th.linspace(-1.0 + eps_y, 1.0 - eps_y, h_pad, device=imgs.device, dtype=imgs.dtype)[:crop_y]
+        grid_y, grid_x = th.meshgrid(y_range, x_range)
+        base_grid = th.stack([grid_x, grid_y], dim=-1)
+
+        # Calculate random shifts
+        shift_x = th.randint(0, pad_x + 1, size=(n, 1, 1, 1), device=imgs.device, dtype=imgs.dtype)
+        shift_y = th.randint(0, pad_y + 1, size=(n, 1, 1, 1), device=imgs.device, dtype=imgs.dtype)
+        shift_x *= 2.0 / w_pad
+        shift_y *= 2.0 / h_pad
+        shift = th.cat([shift_x, shift_y], dim=-1)
+        grid = base_grid + shift
+
+        # Apply the grid to the input tensor to perform cropping
+        padded_imgs_after_crop = F.grid_sample(imgs, grid)
+        return padded_imgs_after_crop
+
+    def forward(self, imgs: th.Tensor) -> th.Tensor:
+        """
+        Applies padding, cropping, and resizing to the input images.
+
+        Args:
+            imgs (th.Tensor): Input images.
+
+        Returns:
+            th.Tensor: Processed images.
+        """
+        strength = th.randint(0, self.highest_pad_strength + 1, (1,)).item()
+
+        _, _, h, w = imgs.shape
+        pad_x = th.randint(0, strength + 1, (1,)).item()
+        pad_y = strength - pad_x
+        padded_imgs_before_crop = F.pad(imgs, (pad_x, pad_x, pad_y, pad_y))
+
+        padded_imgs_after_crop = self.crop(padded_imgs_before_crop, pad_x, pad_y)
+        resize = T.Resize(size=(h, w))
+        return resize(padded_imgs_after_crop)