feat: v3det dataset

LutingWang · Oct 15, 2024 · b1646a3 · b1646a3
1 parent 932de71
commit b1646a3
Show file tree

Hide file tree

Showing 8 changed files with 69 additions and 63 deletions.
diff --git a/docs/source/data/v3det.rst b/docs/source/data/v3det.rst
@@ -11,24 +11,20 @@ https://v3det.openxlab.org.cn/
     git lfs install
     git clone https://huggingface.co/datasets/yhcao/${repo}.git
     cd ${repo}
-        git apply v3det.txt
         python v3det_exemplar_image_download.py
         python v3det_image_download.py
         python v3det_text_image_download.py
     cd ..
 
     mkdir v3det & cd v3det
-        ln -s ../${repo}/V3Det/images train
-        ln -s ../${repo}/V3Det/test
-        mkdir annotations & cd annotations
-            ln -s ../../${repo}/*.json .
+        ln -s ${PWD}/../${repo}/V3Det/images
+        mkdir annotations && cd annotations
+            ln -s ${PWD}/../../${repo}/*.json .
         cd ..
     cd ..
 
     cd ..
 
-.. literalinclude:: v3det.txt
-
 .. code::
 
     data/v3det/
diff --git a/docs/source/data/v3det.txt b/docs/source/data/v3det.txt
diff --git a/docs/source/pretrained/clip.py b/docs/source/pretrained/clip.py
@@ -5,11 +5,11 @@
 from einops.layers.torch import Rearrange
 
 import todd.tasks.natural_language_processing as nlp
-from todd.datasets import CLIP_MEAN, CLIP_STD, COCODataset
+from todd.datasets import CLIP_MEAN, CLIP_STD, coco_url
 from todd.models.modules import CLIPText, CLIPViT
 from todd.utils import get_image
 
-url = COCODataset.url('val', 2017, 39769)
+url = coco_url('val', 2017, 39769)  # pylint: disable=invalid-name
 image = get_image(url)
 
 transforms = tf_v2.Compose([

diff --git a/docs/source/pretrained/dino.py b/docs/source/pretrained/dino.py
@@ -4,11 +4,11 @@
 import torchvision.transforms.v2 as tf_v2
 from einops.layers.torch import Rearrange
 
-from todd.datasets import IMAGENET_MEAN, IMAGENET_STD, COCODataset
+from todd.datasets import IMAGENET_MEAN, IMAGENET_STD, coco_url
 from todd.models.modules import DINO, DINOv2
 from todd.utils import get_image
 
-url = COCODataset.url('val', 2017, 39769)
+url = coco_url('val', 2017, 39769)  # pylint: disable=invalid-name
 image = get_image(url)
 
 transforms = tf_v2.Compose([

diff --git a/todd/datasets/__init__.py b/todd/datasets/__init__.py
@@ -12,4 +12,5 @@
 from .registries import *
 from .sa_med2d import *
 from .satin import *
+from .v3det import *
 from .zip import *
diff --git a/todd/datasets/coco.py b/todd/datasets/coco.py
@@ -1,4 +1,5 @@
 __all__ = [
+    'coco_url',
     'COCODataset',
 ]
 
@@ -36,6 +37,10 @@
 Year = Literal[2014, 2017]
 
 
+def coco_url(split: Split, year: Year, id_: int) -> str:
+    return f'{URL}{split}{year}/{id_:012d}.jpg'
+
+
 class BaseKeys(KeysProtocol[str], ABC):
 
     def __init__(self, image_ids: Iterable[int], suffix: str) -> None:
@@ -84,8 +89,12 @@ def load(
         annotation: '_Annotation',
         categories: Mapping[int, int],
     ) -> Self:
+        mask = (
+            torch.from_numpy(coco.annToMask(annotation))
+            if 'segmentation' in annotation else torch.zeros(1)
+        )
         return cls(
-            torch.from_numpy(coco.annToMask(annotation)),
+            mask,
             annotation['area'],
             bool(annotation['iscrowd']),
             cast('BBox', annotation['bbox']),
@@ -175,10 +184,6 @@ class COCODataset(BaseDataset[COCO, T]):
     DATA_ROOT = pathlib.Path('data/coco')
     ANNOTATIONS_ROOT = DATA_ROOT / 'annotations'
 
-    @classmethod
-    def url(cls, split: Split, year: Year, id_: int) -> str:
-        return f'{URL}{split}{year}/{id_:012d}.jpg'
-
     def __init__(
         self,
         *args,

diff --git a/todd/datasets/lvis.py b/todd/datasets/lvis.py
@@ -24,6 +24,7 @@
     from todd.tasks.object_detection import BBox, FlattenBBoxesXYWH
 
 Split = Literal['train', 'val', 'minival']
+Version = Literal['v0.5', 'v1']
 
 
 class Keys(COCOKeys):
@@ -115,7 +116,7 @@ def __init__(
         self,
         *args,
         split: Split,
-        version: Literal['v0.5', 'v1'] = 'v1',
+        version: Version = 'v1',
         access_layer: PILAccessLayer | None = None,
         annotations_file: pathlib.Path | str | None = None,
         **kwargs,

diff --git a/todd/datasets/v3det.py b/todd/datasets/v3det.py
@@ -0,0 +1,49 @@
+__all__ = [
+    'V3DetDataset',
+]
+
+import pathlib
+from typing import Literal
+
+from ..registries import DatasetRegistry
+from .access_layers.pil import PILAccessLayer
+from .coco import COCODataset
+
+Split = Literal['train', 'val']
+Year = Literal[2023]
+Version = Literal['v1']
+
+
+@DatasetRegistry.register_()
+class V3DetDataset(COCODataset):
+    DATA_ROOT = pathlib.Path('data/v3det')
+    ANNOTATIONS_ROOT = DATA_ROOT / 'annotations'
+
+    def __init__(
+        self,
+        *args,
+        split: Split,
+        year: Year = 2023,
+        version: Version = 'v1',
+        access_layer: PILAccessLayer | None = None,
+        annotations_file: pathlib.Path | str | None = None,
+        **kwargs,
+    ) -> None:
+        if access_layer is None:
+            access_layer = PILAccessLayer(
+                data_root=str(self.DATA_ROOT),
+                suffix='jpg',
+            )
+        if annotations_file is None:
+            annotations_file = (
+                self.ANNOTATIONS_ROOT / f'v3det_{year}_{version}_{split}.json'
+            )
+
+        super().__init__(
+            *args,
+            split=split,
+            # year=...,  # not used
+            access_layer=access_layer,
+            annotations_file=annotations_file,
+            **kwargs,
+        )