From 6a6e3f282b2752c4a0f58d3c65181aa29f16c2d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolas=20K=C3=A4nzig?= <36882833+nkaenzig@users.noreply.github.com> Date: Tue, 4 Feb 2025 11:59:54 +0100 Subject: [PATCH] Add `UNITOPatho` dataset (#750) --- .../offline/classification/unitopatho.yaml | 110 ++++++++++++ .../online/classification/unitopatho.yaml | 90 ++++++++++ docs/datasets/index.md | 15 +- docs/datasets/unitopatho.md | 68 ++++++++ mkdocs.yml | 1 + src/eva/vision/data/datasets/__init__.py | 2 + .../data/datasets/classification/__init__.py | 3 + .../datasets/classification/unitopatho.py | 159 ++++++++++++++++++ ...0_crop_sk00003_(12824,33871,1812,1812).png | 3 + ...1_crop_sk00026_(15904,10751,1812,1812).png | 3 + ...02_crop_sk00033_(7688,23775,1812,1812).png | 3 + ...000_crop_sk00021_(9060,9947,1812,1812).png | 3 + .../vision/datasets/unitopatho/800/test.csv | 3 + .../vision/datasets/unitopatho/800/train.csv | 3 + .../classification/test_unitopatho.py | 45 +++++ tests/eva/vision/test_vision_cli.py | 4 + 16 files changed, 508 insertions(+), 7 deletions(-) create mode 100644 configs/vision/pathology/offline/classification/unitopatho.yaml create mode 100644 configs/vision/pathology/online/classification/unitopatho.yaml create mode 100644 docs/datasets/unitopatho.md create mode 100644 src/eva/vision/data/datasets/classification/unitopatho.py create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/HP/149-B3-HP.ndpi_ROI__mpp0.44_reg000_crop_sk00003_(12824,33871,1812,1812).png create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/NORM/188-B4-NORM.ndpi_ROI__mpp0.44_reg001_crop_sk00026_(15904,10751,1812,1812).png create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/243-B5-TVALG.ndpi_ROI__mpp0.44_reg002_crop_sk00033_(7688,23775,1812,1812).png create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/TVA.LG CASO 2 - 2018-12-04 13.19.16.ndpi_ROI__mpp0.44_reg000_crop_sk00021_(9060,9947,1812,1812).png create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/test.csv create mode 100644 tests/eva/assets/vision/datasets/unitopatho/800/train.csv create mode 100644 tests/eva/vision/data/datasets/classification/test_unitopatho.py diff --git a/configs/vision/pathology/offline/classification/unitopatho.yaml b/configs/vision/pathology/offline/classification/unitopatho.yaml new file mode 100644 index 00000000..2bd064a7 --- /dev/null +++ b/configs/vision/pathology/offline/classification/unitopatho.yaml @@ -0,0 +1,110 @@ +--- +trainer: + class_path: eva.Trainer + init_args: + n_runs: &N_RUNS ${oc.env:N_RUNS, 5} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:MODEL_NAME, dino_vits16}/offline/unitopatho} + max_steps: &MAX_STEPS ${oc.env:MAX_STEPS, 12500} + checkpoint_type: ${oc.env:CHECKPOINT_TYPE, best} + callbacks: + - class_path: eva.callbacks.ConfigurationLogger + - class_path: lightning.pytorch.callbacks.TQDMProgressBar + init_args: + refresh_rate: ${oc.env:TQDM_REFRESH_RATE, 1} + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: epoch + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + filename: best + save_last: true + save_top_k: 1 + monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/MulticlassAccuracy} + mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max} + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + min_delta: 0 + patience: ${oc.env:PATIENCE, 45} + monitor: *MONITOR_METRIC + mode: *MONITOR_METRIC_MODE + - class_path: eva.callbacks.ClassificationEmbeddingsWriter + init_args: + output_dir: &DATASET_EMBEDDINGS_ROOT ${oc.env:EMBEDDINGS_ROOT, ./data/embeddings}/${oc.env:MODEL_NAME, dino_vits16}/unitopatho + dataloader_idx_map: + 0: train + 1: val + backbone: + class_path: eva.vision.models.ModelFromRegistry + init_args: + model_name: ${oc.env:MODEL_NAME, universal/vit_small_patch16_224_dino} + model_extra_kwargs: ${oc.env:MODEL_EXTRA_KWARGS, null} + overwrite: false + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: *OUTPUT_ROOT + name: "" +model: + class_path: eva.HeadModule + init_args: + head: + class_path: torch.nn.Linear + init_args: + in_features: ${oc.env:IN_FEATURES, 384} + out_features: &NUM_CLASSES 6 + criterion: torch.nn.CrossEntropyLoss + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: ${oc.env:LR_VALUE, 0.0003} + lr_scheduler: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: *MAX_STEPS + eta_min: 0.0 + metrics: + common: + - class_path: eva.metrics.AverageLoss + - class_path: eva.metrics.MulticlassClassificationMetrics + init_args: + num_classes: *NUM_CLASSES +data: + class_path: eva.DataModule + init_args: + datasets: + train: + class_path: eva.datasets.EmbeddingsClassificationDataset + init_args: &DATASET_ARGS + root: *DATASET_EMBEDDINGS_ROOT + manifest_file: manifest.csv + split: train + val: + class_path: eva.datasets.EmbeddingsClassificationDataset + init_args: + <<: *DATASET_ARGS + split: val + predict: + - class_path: eva.vision.datasets.UniToPatho + init_args: &PREDICT_DATASET_ARGS + root: ${oc.env:DATA_ROOT, ./data/unitopatho} + split: train + transforms: + class_path: eva.vision.data.transforms.common.ResizeAndCrop + init_args: + mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} + std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]} + - class_path: eva.vision.datasets.UniToPatho + init_args: + <<: *PREDICT_DATASET_ARGS + split: val + dataloaders: + train: + batch_size: &BATCH_SIZE ${oc.env:BATCH_SIZE, 256} + num_workers: &N_DATA_WORKERS ${oc.env:N_DATA_WORKERS, 4} + shuffle: true + val: + batch_size: *BATCH_SIZE + num_workers: *N_DATA_WORKERS + predict: + batch_size: &PREDICT_BATCH_SIZE ${oc.env:PREDICT_BATCH_SIZE, 64} + num_workers: *N_DATA_WORKERS diff --git a/configs/vision/pathology/online/classification/unitopatho.yaml b/configs/vision/pathology/online/classification/unitopatho.yaml new file mode 100644 index 00000000..167876e3 --- /dev/null +++ b/configs/vision/pathology/online/classification/unitopatho.yaml @@ -0,0 +1,90 @@ +--- +trainer: + class_path: eva.Trainer + init_args: + n_runs: &N_RUNS ${oc.env:N_RUNS, 5} + default_root_dir: &OUTPUT_ROOT ${oc.env:OUTPUT_ROOT, logs/${oc.env:MODEL_NAME, dino_vits16}/online/unitopatho} + max_steps: &MAX_STEPS ${oc.env:MAX_STEPS, 12500} + checkpoint_type: ${oc.env:CHECKPOINT_TYPE, best} + callbacks: + - class_path: eva.callbacks.ConfigurationLogger + - class_path: lightning.pytorch.callbacks.TQDMProgressBar + init_args: + refresh_rate: ${oc.env:TQDM_REFRESH_RATE, 1} + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: epoch + - class_path: lightning.pytorch.callbacks.ModelCheckpoint + init_args: + filename: best + save_last: true + save_top_k: 1 + monitor: &MONITOR_METRIC ${oc.env:MONITOR_METRIC, val/MulticlassAccuracy} + mode: &MONITOR_METRIC_MODE ${oc.env:MONITOR_METRIC_MODE, max} + - class_path: lightning.pytorch.callbacks.EarlyStopping + init_args: + min_delta: 0 + patience: ${oc.env:PATIENCE, 45} + monitor: *MONITOR_METRIC + mode: *MONITOR_METRIC_MODE + logger: + - class_path: lightning.pytorch.loggers.TensorBoardLogger + init_args: + save_dir: *OUTPUT_ROOT + name: "" +model: + class_path: eva.HeadModule + init_args: + backbone: + class_path: eva.vision.models.ModelFromRegistry + init_args: + model_name: ${oc.env:MODEL_NAME, universal/vit_small_patch16_224_dino} + model_extra_kwargs: ${oc.env:MODEL_EXTRA_KWARGS, null} + head: + class_path: torch.nn.Linear + init_args: + in_features: ${oc.env:IN_FEATURES, 384} + out_features: &NUM_CLASSES 6 + criterion: torch.nn.CrossEntropyLoss + optimizer: + class_path: torch.optim.AdamW + init_args: + lr: ${oc.env:LR_VALUE, 0.0003} + lr_scheduler: + class_path: torch.optim.lr_scheduler.CosineAnnealingLR + init_args: + T_max: *MAX_STEPS + eta_min: 0.0 + metrics: + common: + - class_path: eva.metrics.AverageLoss + - class_path: eva.metrics.MulticlassClassificationMetrics + init_args: + num_classes: *NUM_CLASSES +data: + class_path: eva.DataModule + init_args: + datasets: + train: + class_path: eva.vision.datasets.UniToPatho + init_args: &DATASET_ARGS + root: ${oc.env:DATA_ROOT, ./data/unitopatho} + split: train + transforms: + class_path: eva.vision.data.transforms.common.ResizeAndCrop + init_args: + mean: ${oc.env:NORMALIZE_MEAN, [0.485, 0.456, 0.406]} + std: ${oc.env:NORMALIZE_STD, [0.229, 0.224, 0.225]} + val: + class_path: eva.vision.datasets.UniToPatho + init_args: + <<: *DATASET_ARGS + split: val + dataloaders: + train: + batch_size: &BATCH_SIZE ${oc.env:BATCH_SIZE, 256} + num_workers: &N_DATA_WORKERS ${oc.env:N_DATA_WORKERS, 4} + shuffle: true + val: + batch_size: *BATCH_SIZE + num_workers: *N_DATA_WORKERS diff --git a/docs/datasets/index.md b/docs/datasets/index.md index 1c799d7a..5cd3838a 100644 --- a/docs/datasets/index.md +++ b/docs/datasets/index.md @@ -7,17 +7,18 @@ ### Whole Slide (WSI) and microscopy image datasets #### Patch-level -| Dataset | #Patches | Patch Size | Magnification (μm/px) | Task | Tissue Type | -|------------------------------------|----------|------------|------------------------|----------------------------|------------------| -| [BACH](bach.md) | 400 | 2048x1536 | 20x (0.5) | Classification (4 classes) | Breast | +| Dataset | #Patches | Patch Size | Magnification (μm/px) | Task | Tissue Type | +|------------------------------------|----------|-------------|------------------------|----------------------------|------------------| +| [BACH](bach.md) | 400 | 2048x1536 | 20x (0.5) | Classification (4 classes) | Breast | | [BRACS](bracs.md) | 4539 | variable | 40x (0.25) | Classification (7 classes) | Breast | | [BreakHis](breakhis.md) | 1995 | 700x460 | 40x (0.25) | Classification (8 classes) | Breast | -| [CRC](crc.md) | 107,180 | 224x224 | 20x (0.5) | Classification (9 classes) | Colorectal | +| [CRC](crc.md) | 107,180 | 224x224 | 20x (0.5) | Classification (9 classes) | Colorectal | | [GleasonArvaniti](crc.md) | 22,752 | 750x750 | 40x (0.23) | Classification (4 classes) | Prostate | -| [PatchCamelyon](patch_camelyon.md) | 327,680 | 96x96 | 10x (1.0) \* | Classification (2 classes) | Breast | -| [MHIST](mhist.md) | 3,152 | 224x224 | 5x (2.0) \* | Classification (2 classes) | Colorectal Polyp | +| [PatchCamelyon](patch_camelyon.md) | 327,680 | 96x96 | 10x (1.0) \* | Classification (2 classes) | Breast | +| [MHIST](mhist.md) | 3,152 | 224x224 | 5x (2.0) \* | Classification (2 classes) | Colorectal Polyp | +| [UniToPatho](unitopatho.md) | 8669 | 1812 x 1812 | 20x (0.4415) | Classification (6 classes) | Colorectal Polyp | | [MoNuSAC](monusac.md) | 294 | 113x81 - 1398x1956 | 40x (0.25) | Segmentation (4 classes) | Multi-Organ Cell Type (Breast, Kidney, Lung and Prostate) | -| [CoNSeP](consep.md) | 41 | 1000x1000 | 40x (0.25) \* | Segmentation (8 classes) | Colorectal Nuclear | +| [CoNSeP](consep.md) | 41 | 1000x1000 | 40x (0.25) \* | Segmentation (8 classes) | Colorectal Nuclear | \* Downsampled from 40x (0.25 μm/px) to increase the field of view. diff --git a/docs/datasets/unitopatho.md b/docs/datasets/unitopatho.md new file mode 100644 index 00000000..def14796 --- /dev/null +++ b/docs/datasets/unitopatho.md @@ -0,0 +1,68 @@ +# UniToPatho + + +UniToPatho is an annotated dataset of 9536 hematoxylin and eosin stained patches extracted from 292 whole-slide images, meant for training deep neural networks for colorectal polyps classification and adenomas grading. The slides are acquired through a Hamamatsu Nanozoomer S210 scanner at 20x magnification (0.4415 μm/px). Each slide belongs to a different patient and is annotated by expert pathologists, according to six classes as follows: + +- NORM - Normal tissue; +- HP - Hyperplastic Polyp; +- TA.HG - Tubular Adenoma, High-Grade dysplasia; +- TA.LG - Tubular Adenoma, Low-Grade dysplasia; +- TVA.HG - Tubulo-Villous Adenoma, High-Grade dysplasia; +- TVA.LG - Tubulo-Villous Adenoma, Low-Grade dysplasia. + +For this benchmark we used only the `800` subset which contains 8669 images of resolution 1812x1812 (the `7000` subset contains much bigger images and would therefore be difficult to handle as patch classification task). + +## Raw data + +### Key stats + +| | | +|--------------------------------|-----------------------------| +| **Modality** | Vision (WSI patches) | +| **Task** | Multiclass classification (6 classes) | +| **Cancer type** | Colorectal | +| **Data size** | 48.37 GB | +| **Image dimension** | 1812 x 1812 | +| **Magnification (μm/px)** | 20x (0.4415) | +| **Magnification after resize (μm/px)** | 162x (3.57) | +| **Files format** | `png` | +| **Number of images** | 8669 | + + +### Splits + +The data source provides train/validation splits + +| Splits | Train | Validation | +|----------|--------------|---------------| +| #Samples | 6270 (72.33) | 2399 (27.67%) | + +The dataset authors only provide two splits, which is why we don't report performance on a third test split. + + +### Organization + +The UniToPatho data is organized as follows (note that we are using only the `800` subset): + +``` +unitopatho +├── 800 + test.csv + train.csv +│ ├── HP # 1 folder per class +│ ├── NORM +│ ├── TA.HG +│ ├── ... +``` + + +## Download and preprocessing +The `UniToPatho` dataset class doesn't download the data during runtime and must be downloaded manually from [the official source](https://ieee-dataport.org/open-access/unitopatho). + +## Relevant links + +* [GitHub Repo](https://github.com/EIDOSLAB/UNITOPATHO) + +## License + +[CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml index bd843cf4..7458bcea 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -90,6 +90,7 @@ nav: - GleasonArvaniti: datasets/gleason_arvaniti.md - MHIST: datasets/mhist.md - PatchCamelyon: datasets/patch_camelyon.md + - UniToPatho: datasets/unitopatho.md - MoNuSAC: datasets/monusac.md - CoNSeP: datasets/consep.md - BCSS: datasets/bcss.md diff --git a/src/eva/vision/data/datasets/__init__.py b/src/eva/vision/data/datasets/__init__.py index 9195857d..e1a665c5 100644 --- a/src/eva/vision/data/datasets/__init__.py +++ b/src/eva/vision/data/datasets/__init__.py @@ -11,6 +11,7 @@ GleasonArvaniti, PANDASmall, PatchCamelyon, + UniToPatho, WsiClassificationDataset, ) from eva.vision.data.datasets.segmentation import ( @@ -38,6 +39,7 @@ "PANDASmall", "Camelyon16", "PatchCamelyon", + "UniToPatho", "WsiClassificationDataset", "CoNSeP", "EmbeddingsSegmentationDataset", diff --git a/src/eva/vision/data/datasets/classification/__init__.py b/src/eva/vision/data/datasets/classification/__init__.py index 3989c2bd..cd60020d 100644 --- a/src/eva/vision/data/datasets/classification/__init__.py +++ b/src/eva/vision/data/datasets/classification/__init__.py @@ -9,6 +9,7 @@ from eva.vision.data.datasets.classification.mhist import MHIST from eva.vision.data.datasets.classification.panda import PANDA, PANDASmall from eva.vision.data.datasets.classification.patch_camelyon import PatchCamelyon +from eva.vision.data.datasets.classification.unitopatho import UniToPatho from eva.vision.data.datasets.classification.wsi import WsiClassificationDataset __all__ = [ @@ -20,7 +21,9 @@ "GleasonArvaniti", "MHIST", "PatchCamelyon", + "UniToPatho", "WsiClassificationDataset", "PANDA", "PANDASmall", + "Camelyon16", ] diff --git a/src/eva/vision/data/datasets/classification/unitopatho.py b/src/eva/vision/data/datasets/classification/unitopatho.py new file mode 100644 index 00000000..2f039f10 --- /dev/null +++ b/src/eva/vision/data/datasets/classification/unitopatho.py @@ -0,0 +1,159 @@ +"""UniToPatho dataset class.""" + +import functools +import glob +import os +from typing import Callable, Dict, List, Literal + +import pandas as pd +import torch +from torchvision import tv_tensors +from typing_extensions import override + +from eva.vision.data.datasets import _validators +from eva.vision.data.datasets.classification import base +from eva.vision.utils import io + + +class UniToPatho(base.ImageClassification): + """Dataset class for UniToPatho images and corresponding targets.""" + + _expected_dataset_lengths: Dict[str | None, int] = { + "train": 6270, + "val": 2399, + None: 8669, + } + """Expected dataset lengths for the splits and complete dataset.""" + + _license: str = "CC BY 4.0 (https://creativecommons.org/licenses/by/4.0/)" + """Dataset license.""" + + def __init__( + self, + root: str, + split: Literal["train", "val"] | None = None, + transforms: Callable | None = None, + ) -> None: + """Initialize the dataset. + + The dataset is split into train and validation by taking into account + the patient IDs to avoid any data leakage. + + Args: + root: Path to the root directory of the dataset. + split: Dataset split to use. If `None`, the entire dataset is used. + transforms: A function/transform which returns a transformed + version of the raw data samples. + """ + super().__init__(transforms=transforms) + + self._root = root + self._split = split + + self._indices: List[int] = [] + + @property + @override + def classes(self) -> List[str]: + return ["HP", "NORM", "TA.HG", "TA.LG", "TVA.HG", "TVA.LG"] + + @property + @override + def class_to_idx(self) -> Dict[str, int]: + return {"HP": 0, "NORM": 1, "TA.HG": 2, "TA.LG": 3, "TVA.HG": 4, "TVA.LG": 5} + + @property + def _dataset_path(self) -> str: + """Returns the path of the image data of the dataset.""" + return os.path.join(self._root, "800") + + @functools.cached_property + def _image_files(self) -> List[str]: + """Return the list of image files in the dataset. + + Returns: + List of image file paths. + """ + files_pattern = os.path.join(self._dataset_path, "**/*.png") + image_files = list(glob.glob(files_pattern, recursive=True)) + return sorted(image_files) + + @functools.cached_property + def _manifest(self) -> pd.DataFrame: + """Returns the train.csv & test.csv files as dataframe.""" + df_train = pd.read_csv(os.path.join(self._dataset_path, "train.csv")) + df_val = pd.read_csv(os.path.join(self._dataset_path, "test.csv")) + df_train["split"], df_val["split"] = "train", "val" + return pd.concat([df_train, df_val], axis=0).set_index("image_id") + + @override + def filename(self, index: int) -> str: + image_path = self._image_files[self._indices[index]] + return os.path.relpath(image_path, self._dataset_path) + + @override + def prepare_data(self) -> None: + _validators.check_dataset_exists(self._root, True) + + @override + def configure(self) -> None: + self._indices = self._make_indices() + + @override + def validate(self) -> None: + _validators.check_dataset_integrity( + self, + length=self._expected_dataset_lengths[self._split], + n_classes=6, + first_and_last_labels=("HP", "TVA.LG"), + ) + + @override + def load_image(self, index: int) -> tv_tensors.Image: + image_path = self._image_files[self._indices[index]] + return io.read_image_as_tensor(image_path) + + @override + def load_target(self, index: int) -> torch.Tensor: + target = self._extract_class(self._image_files[self._indices[index]]) + return torch.tensor(target, dtype=torch.long) + + @override + def __len__(self) -> int: + return len(self._indices) + + def _print_license(self) -> None: + """Prints the dataset license.""" + print(f"Dataset license: {self._license}") + + def _extract_image_id(self, image_file: str) -> str: + """Extracts the image_id from the file name.""" + return os.path.basename(image_file) + + def _extract_class(self, file: str) -> int: + image_id = self._extract_image_id(file) + return int(self._manifest.at[image_id, "top_label"]) + + def _make_indices(self) -> List[int]: + """Builds the dataset indices for the specified split.""" + train_indices = [] + val_indices = [] + + for index, image_file in enumerate(self._image_files): + image_id = self._extract_image_id(image_file) + split = self._manifest.at[image_id, "split"] + + if split == "train": + train_indices.append(index) + elif split == "val": + val_indices.append(index) + else: + raise ValueError(f"Invalid split value found: {split}") + + split_indices = { + "train": train_indices, + "val": val_indices, + None: train_indices + val_indices, + } + + return split_indices[self._split] diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/HP/149-B3-HP.ndpi_ROI__mpp0.44_reg000_crop_sk00003_(12824,33871,1812,1812).png b/tests/eva/assets/vision/datasets/unitopatho/800/HP/149-B3-HP.ndpi_ROI__mpp0.44_reg000_crop_sk00003_(12824,33871,1812,1812).png new file mode 100644 index 00000000..41530578 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/HP/149-B3-HP.ndpi_ROI__mpp0.44_reg000_crop_sk00003_(12824,33871,1812,1812).png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add11ec1847cd9260972c1e502743ef1122590f6c77c280a394bf7f9ec298aeb +size 3997 diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/NORM/188-B4-NORM.ndpi_ROI__mpp0.44_reg001_crop_sk00026_(15904,10751,1812,1812).png b/tests/eva/assets/vision/datasets/unitopatho/800/NORM/188-B4-NORM.ndpi_ROI__mpp0.44_reg001_crop_sk00026_(15904,10751,1812,1812).png new file mode 100644 index 00000000..41530578 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/NORM/188-B4-NORM.ndpi_ROI__mpp0.44_reg001_crop_sk00026_(15904,10751,1812,1812).png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add11ec1847cd9260972c1e502743ef1122590f6c77c280a394bf7f9ec298aeb +size 3997 diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/243-B5-TVALG.ndpi_ROI__mpp0.44_reg002_crop_sk00033_(7688,23775,1812,1812).png b/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/243-B5-TVALG.ndpi_ROI__mpp0.44_reg002_crop_sk00033_(7688,23775,1812,1812).png new file mode 100644 index 00000000..41530578 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/243-B5-TVALG.ndpi_ROI__mpp0.44_reg002_crop_sk00033_(7688,23775,1812,1812).png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add11ec1847cd9260972c1e502743ef1122590f6c77c280a394bf7f9ec298aeb +size 3997 diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/TVA.LG CASO 2 - 2018-12-04 13.19.16.ndpi_ROI__mpp0.44_reg000_crop_sk00021_(9060,9947,1812,1812).png b/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/TVA.LG CASO 2 - 2018-12-04 13.19.16.ndpi_ROI__mpp0.44_reg000_crop_sk00021_(9060,9947,1812,1812).png new file mode 100644 index 00000000..41530578 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/TVA.LG/TVA.LG CASO 2 - 2018-12-04 13.19.16.ndpi_ROI__mpp0.44_reg000_crop_sk00021_(9060,9947,1812,1812).png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:add11ec1847cd9260972c1e502743ef1122590f6c77c280a394bf7f9ec298aeb +size 3997 diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/test.csv b/tests/eva/assets/vision/datasets/unitopatho/800/test.csv new file mode 100644 index 00000000..da319e61 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/test.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b09850ac7b197941e6221f172e3a4f15867e859eac931045b80086477b3ad4c +size 366 diff --git a/tests/eva/assets/vision/datasets/unitopatho/800/train.csv b/tests/eva/assets/vision/datasets/unitopatho/800/train.csv new file mode 100644 index 00000000..630f2052 --- /dev/null +++ b/tests/eva/assets/vision/datasets/unitopatho/800/train.csv @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe035b1a10c4e5996529fb88e1775a88772eb96aaa643d7c7b078857e5a5941e +size 401 diff --git a/tests/eva/vision/data/datasets/classification/test_unitopatho.py b/tests/eva/vision/data/datasets/classification/test_unitopatho.py new file mode 100644 index 00000000..c9533d9e --- /dev/null +++ b/tests/eva/vision/data/datasets/classification/test_unitopatho.py @@ -0,0 +1,45 @@ +"""UniToPatho dataset tests.""" + +import os +from typing import Literal + +import pytest +import torch +from torchvision import tv_tensors + +from eva.vision.data import datasets + + +@pytest.mark.parametrize( + "split, index", + [ + ("train", 0), + ("train", 1), + ("val", 0), + ("val", 1), + ], +) +def test_sample(unitopatho_dataset: datasets.UniToPatho, index: int) -> None: + """Tests the format of a dataset sample.""" + sample = unitopatho_dataset[index] + # assert data sample is a tuple + assert isinstance(sample, tuple) + assert len(sample) == 3 + # assert the format of the `image` and `target` + image, target, _ = sample + assert isinstance(image, tv_tensors.Image) + assert image.shape == (3, 40, 40) + assert isinstance(target, torch.Tensor) + assert target in [0, 1, 2, 3, 4, 5, 6, 7, 8] + + +@pytest.fixture(scope="function") +def unitopatho_dataset(split: Literal["train", "val"], assets_path: str) -> datasets.UniToPatho: + """UniToPatho dataset fixture.""" + dataset = datasets.UniToPatho( + root=os.path.join(assets_path, "vision", "datasets", "unitopatho"), + split=split, + ) + dataset.prepare_data() + dataset.configure() + return dataset diff --git a/tests/eva/vision/test_vision_cli.py b/tests/eva/vision/test_vision_cli.py index b7fa1a54..4fecfceb 100644 --- a/tests/eva/vision/test_vision_cli.py +++ b/tests/eva/vision/test_vision_cli.py @@ -17,11 +17,13 @@ # | online # classification "configs/vision/pathology/online/classification/bach.yaml", + "configs/vision/pathology/online/classification/bracs.yaml", "configs/vision/pathology/online/classification/breakhis.yaml", "configs/vision/pathology/online/classification/crc.yaml", "configs/vision/pathology/online/classification/gleason_arvaniti.yaml", "configs/vision/pathology/online/classification/mhist.yaml", "configs/vision/pathology/online/classification/patch_camelyon.yaml", + "configs/vision/pathology/online/classification/unitopatho.yaml", # segmentation "configs/vision/pathology/online/segmentation/bcss.yaml", "configs/vision/pathology/online/segmentation/consep.yaml", @@ -31,6 +33,7 @@ # | offline # classification "configs/vision/pathology/offline/classification/bach.yaml", + "configs/vision/pathology/offline/classification/bracs.yaml", "configs/vision/pathology/offline/classification/breakhis.yaml", "configs/vision/pathology/offline/classification/camelyon16.yaml", "configs/vision/pathology/offline/classification/crc.yaml", @@ -38,6 +41,7 @@ "configs/vision/pathology/offline/classification/mhist.yaml", "configs/vision/pathology/offline/classification/panda.yaml", "configs/vision/pathology/offline/classification/patch_camelyon.yaml", + "configs/vision/pathology/offline/classification/unitopatho.yaml", # segmentation "configs/vision/pathology/offline/segmentation/bcss.yaml", "configs/vision/pathology/offline/segmentation/consep.yaml",