From 7b673cdd4f2072a5d4194667ca6e33f10f93c748 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 10:46:01 +0100 Subject: [PATCH 01/14] Added pytest for paradigm_utils.py --- osculari/paradigms/paradigm_utils.py | 140 ++++++++++++++----------- tests/datasets/imutils_test.py | 48 ++++++++- tests/paradigms/paradigm_utils_test.py | 48 +++++++++ 3 files changed, 171 insertions(+), 65 deletions(-) create mode 100644 tests/paradigms/paradigm_utils_test.py diff --git a/osculari/paradigms/paradigm_utils.py b/osculari/paradigms/paradigm_utils.py index 954d6c4..483c0d4 100644 --- a/osculari/paradigms/paradigm_utils.py +++ b/osculari/paradigms/paradigm_utils.py @@ -20,20 +20,21 @@ ] -def accuracy_preds(output: torch.Tensor, target: torch.Tensor, - topk: Optional[Sequence] = (1,)) -> (List, List): +def _accuracy_preds(output: torch.Tensor, target: torch.Tensor, + topk: Optional[Sequence] = (1,)) -> (List[float], List[torch.Tensor]): """ - Computes the accuracy over the k top predictions. + Compute accuracy and correct predictions for the top-k thresholds. - Args: - output: The model's output tensor containing predictions for each input sample. - target: The ground-truth labels for each input sample. - topk: An optional list of top-k accuracy thresholds to be computed (e.g., (1, 5)). + Parameters: + output (torch.Tensor): Model predictions. + target (torch.Tensor): Ground truth labels. + topk (Optional[Sequence]): Top-k thresholds for accuracy computation. Default is (1,). Returns: - A tuple containing the computed accuracies and correct predictions for each top-k threshold. + Tuple[List[float], List[torch.Tensor]]: List of accuracies for each top-k threshold, + list of correct predictions for each top-k + threshold. """ - with torch.inference_mode(): # Ensure that the model is in inference mode maxk = max(topk) # Extract the maximum top-k value batch_size = target.size(0) # Get the batch size @@ -59,38 +60,47 @@ def accuracy_preds(output: torch.Tensor, target: torch.Tensor, def accuracy(output: torch.Tensor, target: torch.Tensor) -> float: """ - This function computes the accuracy of a model's prediction on a given set of data. + Compute the accuracy of model predictions. - Args: - output: The model's predicted output (torch.Tensor). - target: The ground truth labels (torch.Tensor). + Parameters: + output (torch.Tensor): Model predictions. + target (torch.Tensor): Ground truth labels. Returns: - The accuracy of the model's predictions (float). + float: Accuracy of the model predictions. """ + # Ensure the output has two dimensions (Linear layer output is two-dimensional) + assert len(output.shape) == 2 + # Ensure output and target have the same number of elements + assert len(output) == len(target) + # Check if the model is performing binary classification if output.shape[1] == 1: - # Check if the model produces one-dimensional predictions - pred = torch.equal(torch.gt(output, 0), target.float()) - return pred.float().mean(0, keepdim=True)[0] + # Convert to binary predictions (greater than 0) + output_class = torch.gt(output, 0).flatten() + # Compute accuracy for binary classification + pred = torch.eq(output_class, target) + return pred.float().mean().item() # Otherwise, the model produces multidimensional predictions - acc, _ = accuracy_preds(output, target, topk=[1]) + acc, _ = _accuracy_preds(output, target, topk=[1]) return acc[0].item() # Extract the top-1 accuracy def circular_mean(a: float, b: float) -> float: """ - Computes the circular mean of two values. + Compute the circular mean of two angles in radians. - Args: - a: The first value (float). - b: The second value (float). + Parameters: + a (float): First angle in radians. + b (float): Second angle in radians. - Returns: - The circular mean of the two values (float). - """ + Returns: + float: Circular mean of the two angles. + """ + # Calculate the circular mean using a conditional expression mu = (a + b + 1) / 2 if abs(a - b) > 0.5 else (a + b) / 2 + # Adjust the result to be in the range [0, 1) return mu if mu >= 1 else mu - 1 @@ -115,24 +125,23 @@ def midpoint( Union[float, npt.NDArray, None] ): """ - Finds the midpoint of the stimulus range based on the current accuracy and the target accuracy - threshold. - - Args: - acc: The current accuracy value (float). - low: The lower bound of the stimulus range (float or NumPy array). - mid: The current midpoint of the stimulus range (float or NumPy array). - high: The upper bound of the stimulus range (float or NumPy array). - th: The target accuracy threshold (float). - ep: The convergence tolerance (float; optional). - circular_channels: The list of circular channels for applying circular arithmetic when - computing the average (list; optional). + Compute new midpoints for a given accuracy in a binary search. + + Parameters: + acc (float): Current accuracy. + low (Union[float, npt.NDArray]): Low value in the search space. + mid (Union[float, npt.NDArray]): Midpoint in the search space. + high (Union[float, npt.NDArray]): High value in the search space. + th (float): Target accuracy. + ep (Optional[float]): Acceptable range around the target accuracy. Default is 1e-4. + circular_channels (Optional[List]): List of circular channels. Default is None. Returns: - The new low, mid, and high values of the stimulus range based on the current accuracy and - the target accuracy threshold. + (Union[float, npt.NDArray, None], Union[float, npt.NDArray, None], Union[float, npt.NDArray, None]): + Tuple containing the updated low, mid, and high values. + If the accuracy is within the acceptable range of the target accuracy, returns + (None, None, None). """ - # Calculate the difference between the current accuracy and the target accuracy diff_acc = acc - th @@ -157,32 +166,35 @@ def midpoint( return mid, new_mid, high -def train_linear_probe(model: ProbeNet, dataset: Union[TorchDataset, TorchDataLoader], - epoch_loop: Callable[[nn.Module, TorchDataLoader, Any, torch.device], Dict], - out_dir: str, device: Optional[torch.device] = None, - epochs: Optional[int] = 10, - optimiser: Optional[torch.optim.Optimizer] = None, - scheduler: Optional[lr_scheduler.LRScheduler] = None) -> Dict: +def train_linear_probe( + model: ProbeNet, + dataset: Union[TorchDataset, TorchDataLoader], + epoch_loop: Callable[[nn.Module, TorchDataLoader, Any, torch.device], Dict], + out_dir: str, + device: Optional[torch.device] = None, + epochs: Optional[int] = 10, + optimiser: Optional[torch.optim.Optimizer] = None, + scheduler: Optional[lr_scheduler.LRScheduler] = None +) -> Dict: """ - Trains the linear probe network on the specified dataset. - - Args: - model: The linear probe network to train. - dataset: The dataset or dataloader for training. - epoch_loop: A function to perform an epoch of training or testing. This function - must accept for positional arguments (i.e., model, train_loader, optimiser, device). - This function should return a dictionary. - out_dir: The output directory for saving checkpoints. - device: The device to use for training (Optional). - epochs: The number of epochs to train for (Optional). - optimiser: The optimiser to use for training (default: SGD) (Optional). - scheduler: The learning rate scheduler to use - (default: MultiStepLR at 50 and 80% of epochs) (Optional). - - Returns: - A dictionary containing training logs. - """ + Train a linear probe on top of a frozen backbone model. + + Parameters: + model (ProbeNet): Linear probe model. + dataset (Union[TorchDataset, TorchDataLoader]): Training dataset or data loader. + epoch_loop (Callable): Function defining the training loop for one epoch. This function + must accept for positional arguments (i.e., model, train_loader, optimiser, device). + This function should return a dictionary. + out_dir (str): Output directory to save checkpoints. + device (Optional[torch.device]): Device on which to perform training. + epochs (Optional[int]): Number of training epochs. Default is 10. + optimiser (Optional[torch.optim.Optimizer]): Optimization algorithm. Default is SGD. + scheduler (Optional[lr_scheduler.LRScheduler]): Learning rate scheduler. Default is + MultiStepLR at 50 and 80% of epochs + Returns: + Dict: Training logs containing statistics. + """ # Data loading if isinstance(dataset, TorchDataLoader): train_loader = dataset diff --git a/tests/datasets/imutils_test.py b/tests/datasets/imutils_test.py index f487d8f..ed1c835 100644 --- a/tests/datasets/imutils_test.py +++ b/tests/datasets/imutils_test.py @@ -2,8 +2,8 @@ Unit tests for imutils_test.py """ -import numpy as np import pytest +import numpy as np from osculari.datasets import imutils @@ -33,7 +33,53 @@ def test_michelson_contrast_valid_input(sample_image): np.testing.assert_almost_equal(result, expected_result) +def test_michelson_contrast_contrast_one(sample_image): + contrast_factor = 1.0 + result = imutils.michelson_contrast(sample_image, contrast_factor) + + # Ensure that the output has the same shape as the input + assert result.shape == sample_image.shape + + # Ensure that the output is a NumPy array + assert isinstance(result, np.ndarray) + + # Ensure that the output is identical to input + expected_result = sample_image + np.testing.assert_equal(result, expected_result) + + def test_michelson_contrast_invalid_contrast(): with pytest.raises(AssertionError): contrast_factor = 1.5 # Invalid contrast value imutils.michelson_contrast(np.array([[1, 2], [3, 4]]), contrast_factor) + + +def test_gamma_correction_valid_input(sample_image): + gamma_factor = 0.5 + result = imutils.gamma_correction(sample_image, gamma_factor) + + # Ensure that the output has the same shape as the input + assert result.shape == sample_image.shape + + # Ensure that the output is a NumPy array + assert isinstance(result, np.ndarray) + + # Ensure that gamma correction is applied correctly + expected_result = np.array([[169, 201, 223], + [187, 213, 232], + [201, 223, 239]], dtype='uint8') + np.testing.assert_almost_equal(result, expected_result) + + +def test_gamma_correction_gamma_one(sample_image): + gamma_factor = 1.0 + result = imutils.gamma_correction(sample_image, gamma_factor) + + # Ensure that when gamma is 1, the output is the same as the input + np.testing.assert_almost_equal(result, sample_image) + + +def test_gamma_correction_zero_gamma(): + with pytest.raises(AssertionError): + gamma_factor = 0.0 # Invalid gamma value + imutils.gamma_correction(np.array([[1, 2], [3, 4]]), gamma_factor) diff --git a/tests/paradigms/paradigm_utils_test.py b/tests/paradigms/paradigm_utils_test.py new file mode 100644 index 0000000..4ebaff2 --- /dev/null +++ b/tests/paradigms/paradigm_utils_test.py @@ -0,0 +1,48 @@ +""" +Unit tests for paradigm_utils.py +""" + +import pytest +import torch + +from osculari.paradigms import paradigm_utils + + +def test_accuracy_binary_classification(): + # Test accuracy for binary classification predictions + output = torch.tensor([0.2, -0.1, 0.8, -0.4]).view(4, 1) + target = torch.tensor([1, 0, 1, 0]) + acc = paradigm_utils.accuracy(output, target) + assert acc == 1.0 + + +def test_accuracy_multi_classification(): + # Test accuracy for multi-class predictions + output = torch.tensor([[0.2, -0.1, 0.8, -0.4], [0.1, 0.3, -0.2, 0.5]]) + target = torch.tensor([2, 0]) + acc = paradigm_utils.accuracy(output, target) + assert acc == 0.5 + + +def test_accuracy_invalid_input(): + # Test with invalid input (different shapes) + output = torch.tensor([[0.2, -0.1, 0.8, -0.4], [0.1, 0.3, -0.2, 0.5]]) + target = torch.tensor([2, 0, 1]) # Invalid target shape + with pytest.raises(AssertionError): + paradigm_utils.accuracy(output, target) + + +def test_accuracy_zero_dimensional(): + # Test with zero-dimensional input (should raise an error) + output = torch.tensor(0.5) + target = torch.tensor(1) + with pytest.raises(AssertionError): + paradigm_utils.accuracy(output, target) + + +def test_accuracy_one_dimensional_equal(): + # Test accuracy for one-dimensional predictions where output and target are equal + output = torch.tensor([0.2, -0.1, 0.8, -0.4]).view(4, 1) + target = torch.tensor([0, 0, 1, 0]) + acc = paradigm_utils.accuracy(output, target) + assert acc == 0.75 From 08cdf3e0d6405e893d695e4be3b93e0d6d2f65f8 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 19:41:15 +0100 Subject: [PATCH 02/14] Added more pytests for paradigm_utils.py --- osculari/paradigms/paradigm_utils.py | 9 +- tests/paradigms/paradigm_utils_test.py | 171 +++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 3 deletions(-) diff --git a/osculari/paradigms/paradigm_utils.py b/osculari/paradigms/paradigm_utils.py index 483c0d4..717aad1 100644 --- a/osculari/paradigms/paradigm_utils.py +++ b/osculari/paradigms/paradigm_utils.py @@ -87,9 +87,9 @@ def accuracy(output: torch.Tensor, target: torch.Tensor) -> float: return acc[0].item() # Extract the top-1 accuracy -def circular_mean(a: float, b: float) -> float: +def _circular_mean(a: float, b: float) -> float: """ - Compute the circular mean of two angles in radians. + Compute the circular mean of two variables in the range of 0 to 1. Parameters: a (float): First angle in radians. @@ -98,6 +98,9 @@ def circular_mean(a: float, b: float) -> float: Returns: float: Circular mean of the two angles. """ + # Ensure a and b are in the range of 0 to 1 + assert 0 <= a <= 1 + assert 0 <= b <= 1 # Calculate the circular mean using a conditional expression mu = (a + b + 1) / 2 if abs(a - b) > 0.5 else (a + b) / 2 # Adjust the result to be in the range [0, 1) @@ -112,7 +115,7 @@ def _compute_avg(a: Union[float, npt.NDArray], b: Union[float, npt.NDArray], a, b = a.copy().squeeze(), b.copy().squeeze() c = (a + b) / 2 for i in circular_channels: - c[i] = circular_mean(a[i], b[i]) + c[i] = _circular_mean(a[i], b[i]) return c diff --git a/tests/paradigms/paradigm_utils_test.py b/tests/paradigms/paradigm_utils_test.py index 4ebaff2..0dadfa0 100644 --- a/tests/paradigms/paradigm_utils_test.py +++ b/tests/paradigms/paradigm_utils_test.py @@ -3,9 +3,14 @@ """ import pytest +import numpy as np +import shutil +import os import torch +from torch.utils.data import Dataset from osculari.paradigms import paradigm_utils +from osculari import models def test_accuracy_binary_classification(): @@ -46,3 +51,169 @@ def test_accuracy_one_dimensional_equal(): target = torch.tensor([0, 0, 1, 0]) acc = paradigm_utils.accuracy(output, target) assert acc == 0.75 + + +def test_accuracy_multi_target(): + # Test accuracy for multi-class predictions + output = torch.tensor([[0.2, -0.1, 0.8, -0.4], [0.1, 0.3, -0.2, 0.5]]) + target = torch.tensor([[0.1, 0.1, 0.8, 0.0], [0.6, 0.3, 0.1, 0.0]]) + acc = paradigm_utils.accuracy(output, target) + assert acc == 0.5 + + +def test_midpoint_within_acceptable_range(): + # Test when the accuracy is within the acceptable range + acc = 0.5 + low = 0.4 + mid = 0.5 + high = 0.6 + th = 0.5 + ep = 0.1 + circular_channels = None + + updated_low, updated_mid, updated_high = paradigm_utils.midpoint( + acc, low, mid, high, th, ep, circular_channels + ) + + assert updated_low is None + assert updated_mid is None + assert updated_high is None + + +def test_midpoint_accuracy_above_target(): + # Test when the accuracy is above the target + acc = 0.7 + low = 0.4 + mid = 0.5 + high = 0.6 + th = 0.5 + ep = 0.1 + circular_channels = None + + updated_low, updated_mid, updated_high = paradigm_utils.midpoint( + acc, low, mid, high, th, ep, circular_channels + ) + + # Check if the new midpoint is computed correctly + assert np.isclose(updated_low, low) + assert np.isclose(updated_mid, (low + mid) / 2) + assert np.isclose(updated_high, mid) + + +def test_midpoint_accuracy_below_target(): + # Test when the accuracy is below the target + acc = 0.3 + low = 0.4 + mid = 0.5 + high = 0.6 + th = 0.5 + ep = 0.1 + circular_channels = None + + updated_low, updated_mid, updated_high = paradigm_utils.midpoint( + acc, low, mid, high, th, ep, circular_channels + ) + + # Check if the new midpoint is computed correctly + assert np.isclose(updated_low, mid) + assert np.isclose(updated_mid, (mid + high) / 2) + assert np.isclose(updated_high, high) + + +def test_midpoint_circular_channels(): + # Test with circular channels + acc = 0.7 + low = np.array([350 / 360, 350]) + mid = np.array([10 / 360, 10]) + high = np.array([20 / 360, 20]) + th = 0.5 + ep = 0.1 + circular_channels = [0] + + updated_low, updated_mid, updated_high = paradigm_utils.midpoint( + acc, low, mid, high, th, ep, circular_channels + ) + + # Check if the new midpoint is computed correctly for circular channels + assert np.allclose(updated_low, low) + assert np.allclose(updated_mid, np.array([(low[0] + mid[0] + 1) / 2, (low[1] + mid[1]) / 2])) + assert np.allclose(updated_high, mid) + + +def test_midpoint_circular_channels_invalid_input(): + # Test with circular channels + acc = 0.7 + low = np.array([350, 350]) + mid = np.array([10, 10]) + high = np.array([20, 20]) + th = 0.5 + ep = 0.1 + circular_channels = [0] + + with pytest.raises(AssertionError): + _ = paradigm_utils.midpoint(acc, low, mid, high, th, ep, circular_channels) + + +class DummyDataset(Dataset): + """Placeholder for a dummy dataset""" + + def __init__(self): + self.data = torch.randn((10, 3, 224, 224)) + self.labels = torch.randint(0, 2, (10,)) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx], self.labels[idx] + + +def dummy_epoch_loop(model, train_loader, optimiser, device): + """Placeholder for the epoch_loop function""" + model.train() + for data, target in train_loader: + data, target = data.to(device), target.to(device) + optimiser.zero_grad() + output = model(data, data) + loss = torch.nn.functional.cross_entropy(output, target) + loss.backward() + optimiser.step() + return {'loss': [loss.item()]} + + +def test_train_linear_probe(): + model = models.paradigm_2afc_merge_concatenate( + architecture='resnet18', weights=None, layers='block0', img_size=224 + ) + dataset = DummyDataset() + out_dir = "test_output" + device = torch.device("cpu") + epochs = 5 + + training_logs = paradigm_utils.train_linear_probe( + model=model, + dataset=dataset, + epoch_loop=dummy_epoch_loop, + out_dir=out_dir, + device=device, + epochs=epochs, + ) + + assert 'loss' in training_logs + assert len(training_logs['loss']) == epochs + + # Check if the output directory is created and the checkpoint file exists + assert os.path.exists(out_dir) + checkpoint_path = os.path.join(out_dir, 'checkpoint.pth.tar') + assert os.path.exists(checkpoint_path) + + # Check if the checkpoint file is valid + checkpoint = torch.load(checkpoint_path) + assert 'epoch' in checkpoint + assert 'network' in checkpoint + assert 'optimizer' in checkpoint + assert 'scheduler' in checkpoint + assert 'log' in checkpoint + + # Clean up the temporary test output directory + shutil.rmtree(out_dir) From bbea927bcdc8666b01bef2aa07d0d2f2c16f31d2 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 21:06:31 +0100 Subject: [PATCH 03/14] Added unit tests for model_utils.py --- osculari/models/model_utils.py | 1 + tests/models/model_utils_test.py | 119 +++++++++++++++++++++++++++++++ 2 files changed, 120 insertions(+) create mode 100644 tests/models/model_utils_test.py diff --git a/osculari/models/model_utils.py b/osculari/models/model_utils.py index 3b5abb3..f812f6b 100644 --- a/osculari/models/model_utils.py +++ b/osculari/models/model_utils.py @@ -125,6 +125,7 @@ def generic_features_size(model: nn.Module, img_size: int, # Move the input image to GPU and change the data type if is_clip is True if is_clip: + model = model.cuda() img = img.cuda() img = img.type(torch.float16) diff --git a/tests/models/model_utils_test.py b/tests/models/model_utils_test.py new file mode 100644 index 0000000..729af07 --- /dev/null +++ b/tests/models/model_utils_test.py @@ -0,0 +1,119 @@ +""" +Unit tests for model_utils.py +""" + +import pytest +import torch +from torch import nn + +from osculari.models import model_utils +from osculari import models + + +def test_is_resnet_backbone_resnet(): + # Test with a valid ResNet architecture + architecture = 'resnet18' + assert model_utils.is_resnet_backbone(architecture) + + +def test_is_resnet_backbone_resnext(): + # Test with a valid ResNeXt architecture + architecture = 'resnext50_32x4d' + assert model_utils.is_resnet_backbone(architecture) + + +def test_is_resnet_backbone_taskonomy(): + # Test with a valid Taskonomy architecture + architecture = 'taskonomy_autoencoding' + assert model_utils.is_resnet_backbone(architecture) + + +def test_is_resnet_backbone_non_resnet(): + # Test with a non-ResNet architecture + architecture = 'vgg16' + assert not model_utils.is_resnet_backbone(architecture) + + +def test_is_resnet_backbone_case_insensitive(): + # Test with case-insensitive match + architecture = 'ResNeXt101_32x8d' + assert not model_utils.is_resnet_backbone(architecture) + + +def test_is_resnet_backbone_empty_string(): + # Test with an empty string (should return False) + architecture = '' + assert not model_utils.is_resnet_backbone(architecture) + + +def test_check_input_size_valid_size(): + # Test with a valid input size for ViT architecture + architecture = 'vit_b_32' + img_size = 224 + model_utils.check_input_size(architecture, img_size) + + +def test_check_input_size_valid_size_clip(): + # Test with a valid input size for CLIP architecture + architecture = 'clip_RN50x4' + img_size = 288 + model_utils.check_input_size(architecture, img_size) + + +def test_check_input_size_invalid_size_vit(): + # Test with an invalid input size for ViT architecture + architecture = 'vit_b_32' + img_size = 256 + with pytest.raises(RuntimeError, match=r'Network .* expects size .* but got .*'): + model_utils.check_input_size(architecture, img_size) + + +def test_check_input_size_invalid_size_clip(): + # Test with an invalid input size for CLIP architecture + architecture = 'clip_RN50x16' + img_size = 300 + with pytest.raises(RuntimeError, match=r'Network .* expects size .* but got .*'): + model_utils.check_input_size(architecture, img_size) + + +def test_check_input_size_other_architecture(): + # Test with other architectures (should not raise an error) + architecture = 'resnet50' + img_size = 224 + model_utils.check_input_size(architecture, img_size) + + +class SimpleModel(nn.Module): + def __init__(self): + super(SimpleModel, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) + self.pool = nn.MaxPool2d(kernel_size=2, stride=2) + + def forward(self, x): + x = self.conv1(x) + x = self.pool(x) + return x + + +def test_generic_features_size_resnet(): + # Test with a valid model and image size + model = models.FeatureExtractor(architecture='resnet18', weights=None, layers='block0') + img_size = 128 + output_size = model_utils.generic_features_size(model, img_size) + assert output_size == (64, img_size // 4, img_size // 4) + + +def test_generic_features_size_fc(): + # Test with a valid model and image size + model = models.FeatureExtractor(architecture='vgg11', weights=None, layers='fc') + img_size = 128 + output_size = model_utils.generic_features_size(model, img_size) + assert output_size == torch.Size([1000]) + + +def test_generic_features_size_valid_clip(): + # Test with a valid CLIP model and image size + model = SimpleModel().half() + img_size = 128 + output_size = model_utils.generic_features_size(model, img_size, is_clip=True) + assert output_size == (64, img_size // 2, img_size // 2) From c5cbb45ecfbc20d54ed3886f4eba8967995a92ee Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 21:36:58 +0100 Subject: [PATCH 04/14] Added unit tests for forced_choice.py --- tests/models/model_utils_test.py | 32 +++++----- tests/paradigms/forced_choice_test.py | 89 +++++++++++++++++++++++++++ 2 files changed, 105 insertions(+), 16 deletions(-) create mode 100644 tests/paradigms/forced_choice_test.py diff --git a/tests/models/model_utils_test.py b/tests/models/model_utils_test.py index 729af07..22bc288 100644 --- a/tests/models/model_utils_test.py +++ b/tests/models/model_utils_test.py @@ -83,16 +83,16 @@ def test_check_input_size_other_architecture(): model_utils.check_input_size(architecture, img_size) -class SimpleModel(nn.Module): - def __init__(self): - super(SimpleModel, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) - self.pool = nn.MaxPool2d(kernel_size=2, stride=2) - - def forward(self, x): - x = self.conv1(x) - x = self.pool(x) - return x +# class SimpleModel(nn.Module): +# def __init__(self): +# super(SimpleModel, self).__init__() +# self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1) +# self.pool = nn.MaxPool2d(kernel_size=2, stride=2) +# +# def forward(self, x): +# x = self.conv1(x) +# x = self.pool(x) +# return x def test_generic_features_size_resnet(): @@ -111,9 +111,9 @@ def test_generic_features_size_fc(): assert output_size == torch.Size([1000]) -def test_generic_features_size_valid_clip(): - # Test with a valid CLIP model and image size - model = SimpleModel().half() - img_size = 128 - output_size = model_utils.generic_features_size(model, img_size, is_clip=True) - assert output_size == (64, img_size // 2, img_size // 2) +# def test_generic_features_size_valid_clip(): +# # Test with a valid CLIP model and image size +# model = SimpleModel().half() +# img_size = 128 +# output_size = model_utils.generic_features_size(model, img_size, is_clip=True) +# assert output_size == (64, img_size // 2, img_size // 2) diff --git a/tests/paradigms/forced_choice_test.py b/tests/paradigms/forced_choice_test.py new file mode 100644 index 0000000..0e6172d --- /dev/null +++ b/tests/paradigms/forced_choice_test.py @@ -0,0 +1,89 @@ +""" +Unit tests for forced_choice.py +""" + +import pytest +import torch + +from osculari.paradigms import forced_choice +from osculari import models + + +@pytest.fixture +def example_model(): + """Create an example model""" + return models.paradigm_2afc_merge_concatenate( + architecture='resnet18', weights=None, layers='block0', img_size=224 + ) + + +class DummyDataset: + """Placeholder for a dummy dataset""" + + def __init__(self, num_batches=3, batch_size=4, img_size=224, num_classes=2): + self.num_batches = num_batches + self.batch_size = batch_size + self.img_size = img_size + self.num_classes = num_classes + + def __iter__(self): + for _ in range(self.num_batches): + batch_data = ( + torch.randn((self.batch_size, 3, self.img_size, self.img_size)), + torch.randn((self.batch_size, 3, self.img_size, self.img_size)), + torch.randint(0, self.num_classes, (self.batch_size,)) + ) + yield batch_data + + +def test_epoch_loop_train(example_model): + # Test training mode + optimiser = torch.optim.SGD(example_model.parameters(), lr=0.01, momentum=0.9) + dataset = DummyDataset() + epoch_log = forced_choice.epoch_loop(example_model, dataset, optimiser, return_outputs=False) + + assert 'accuracy' in epoch_log + assert 'loss' in epoch_log + assert 'output' not in epoch_log # output should not be present if return_outputs=False + + +def test_epoch_loop_eval(example_model): + # Test evaluation mode + dataset = DummyDataset() + epoch_log = forced_choice.epoch_loop(example_model, dataset, optimiser=None, + return_outputs=False) + + assert 'accuracy' in epoch_log + assert 'loss' in epoch_log + assert 'output' not in epoch_log # output should not be present if return_outputs=False + + +def test_epoch_loop_return_outputs(example_model): + # Test return_outputs=True + optimiser = torch.optim.SGD(example_model.parameters(), lr=0.01, momentum=0.9) + dataset = DummyDataset() + epoch_log = forced_choice.epoch_loop(example_model, dataset, optimiser, return_outputs=True) + + assert 'accuracy' in epoch_log + assert 'loss' in epoch_log + assert 'output' in epoch_log # output should be present if return_outputs=True + + +def test_test_dataset(example_model): + # Test evaluation mode + dataset = DummyDataset() + epoch_log = forced_choice.test_dataset(example_model, dataset) + + assert 'accuracy' in epoch_log + assert 'loss' in epoch_log + assert 'output' not in epoch_log # output should not be present + + +def test_predict_dataset(example_model): + # Test evaluation mode + dataset = DummyDataset() + epoch_log = forced_choice.predict_dataset(example_model, dataset) + + assert 'accuracy' in epoch_log + assert 'loss' in epoch_log + assert 'output' in epoch_log # output should be present From 4b67dec46db5e09ccf57c6659afc955ddba4117f Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 22:32:10 +0100 Subject: [PATCH 05/14] Added unit tests for staircase proedure and train_linear_probe functions --- osculari/paradigms/adaptive_psychophysics.py | 4 +- .../paradigms/adaptive_psychophysics_test.py | 94 +++++++++++++++++++ tests/paradigms/paradigm_utils_test.py | 53 ++++++++++- 3 files changed, 144 insertions(+), 7 deletions(-) create mode 100644 tests/paradigms/adaptive_psychophysics_test.py diff --git a/osculari/paradigms/adaptive_psychophysics.py b/osculari/paradigms/adaptive_psychophysics.py index a5f0005..205c3a0 100644 --- a/osculari/paradigms/adaptive_psychophysics.py +++ b/osculari/paradigms/adaptive_psychophysics.py @@ -55,7 +55,7 @@ def staircase(model: nn.Module, results = [] # Number of attempts to perform the staircase procedure - attempt_num = 0 + attempt_num = 1 # Perform the staircase procedure until convergence while True: @@ -70,7 +70,7 @@ def staircase(model: nn.Module, accuracy = np.mean(test_log['accuracy']) # Check if accuracy is within the acceptable range - if 1 < accuracy < 0: + if 1 < accuracy or accuracy < 0: raise RuntimeError('Accuracy for staircase procedure must be between 0 and 1.') # Append the current midpoint and accuracy to the results list diff --git a/tests/paradigms/adaptive_psychophysics_test.py b/tests/paradigms/adaptive_psychophysics_test.py new file mode 100644 index 0000000..318c8cb --- /dev/null +++ b/tests/paradigms/adaptive_psychophysics_test.py @@ -0,0 +1,94 @@ +""" +Unit tests for adaptive_psychophysics_test.py +""" + +import pytest +import numpy as np +import torch +import torch.nn as nn +from torch.utils.data import Dataset + +from osculari.paradigms import staircase + + +class SimpleModel(nn.Module): + """Placeholder for a simple neural network model""" + + def __init__(self): + super(SimpleModel, self).__init__() + self.fc = nn.Linear(22, 1) + + def forward(self, x): + return self.fc(x) + + +class DummyDataset(Dataset): + """Placeholder for a dummy dataset""" + + def __init__(self): + self.data = torch.randn((10, 22)) + self.labels = torch.randint(0, 2, (10,)) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + return self.data[idx], self.labels[idx] + + +def dummy_test_function(model, dataloader, device): + """Dummy test function for evaluation""" + model.to(device) + outputs = [] + for data, target in dataloader: + data, target = data.to(device), target.to(device) + output = model(data) + outputs.extend(output.detach().cpu().numpy()) + accuracy = np.random.uniform() + return {'accuracy': [accuracy]} + + +def dummy_dataset_function(_mid_val): + """Dummy dataset function for creating the dataset and dataloader""" + batch_size = 10 + th = 0.1 + return DummyDataset(), batch_size, th + + +def test_staircase(): + # Test the basic functionality of the staircase procedure + model = SimpleModel() + low_val = 0.0 + high_val = 1.0 + result = staircase(model, dummy_test_function, dummy_dataset_function, low_val, high_val, + max_attempts=5) + + assert isinstance(result, np.ndarray) + assert result.shape[1] == 2 # The result should have two columns (midpoint, accuracy) + assert result.shape[0] <= 5 # The result should have at most 5 data points (max_attempts) + + +def test_staircase_invalid_accuracy(): + # Test when the accuracy is outside the acceptable range + model = SimpleModel() + low_val = 0.0 + high_val = 1.0 + + def dummy_invalid_test_function(_model, _dataloader, _device): + """Dummy function for invalid accuracy.""" + return {'accuracy': [1.5]} # Invalid accuracy value + + with pytest.raises(RuntimeError): + _ = staircase(model, dummy_invalid_test_function, dummy_dataset_function, low_val, high_val) + + +def test_staircase_max_attempts(): + # Test when the staircase procedure reaches the maximum number of attempts + model = SimpleModel() + low_val = 0.0 + high_val = 1.0 + result = staircase(model, dummy_test_function, dummy_dataset_function, low_val, high_val, + max_attempts=1) + + assert isinstance(result, np.ndarray) + assert result.shape[0] == 1 # The result should have only one data point (max_attempts) diff --git a/tests/paradigms/paradigm_utils_test.py b/tests/paradigms/paradigm_utils_test.py index 0dadfa0..639e1ae 100644 --- a/tests/paradigms/paradigm_utils_test.py +++ b/tests/paradigms/paradigm_utils_test.py @@ -8,11 +8,20 @@ import os import torch from torch.utils.data import Dataset +from torch.utils.data import DataLoader as TorchDataLoader from osculari.paradigms import paradigm_utils from osculari import models +@pytest.fixture +def example_model(): + """Create an example model""" + return models.paradigm_2afc_merge_concatenate( + architecture='resnet18', weights=None, layers='block0', img_size=224 + ) + + def test_accuracy_binary_classification(): # Test accuracy for binary classification predictions output = torch.tensor([0.2, -0.1, 0.8, -0.4]).view(4, 1) @@ -181,17 +190,14 @@ def dummy_epoch_loop(model, train_loader, optimiser, device): return {'loss': [loss.item()]} -def test_train_linear_probe(): - model = models.paradigm_2afc_merge_concatenate( - architecture='resnet18', weights=None, layers='block0', img_size=224 - ) +def test_train_linear_probe(example_model): dataset = DummyDataset() out_dir = "test_output" device = torch.device("cpu") epochs = 5 training_logs = paradigm_utils.train_linear_probe( - model=model, + model=example_model, dataset=dataset, epoch_loop=dummy_epoch_loop, out_dir=out_dir, @@ -217,3 +223,40 @@ def test_train_linear_probe(): # Clean up the temporary test output directory shutil.rmtree(out_dir) + + +def test_train_linear_probe_dataloader(example_model): + dataloader = TorchDataLoader( + DummyDataset(), batch_size=4, shuffle=False, num_workers=0, pin_memory=True + ) + out_dir = "test_output" + device = torch.device("cpu") + epochs = 5 + + training_logs = paradigm_utils.train_linear_probe( + model=example_model, + dataset=dataloader, + epoch_loop=dummy_epoch_loop, + out_dir=out_dir, + device=device, + epochs=epochs, + ) + + assert 'loss' in training_logs + assert len(training_logs['loss']) == epochs + + # Check if the output directory is created and the checkpoint file exists + assert os.path.exists(out_dir) + checkpoint_path = os.path.join(out_dir, 'checkpoint.pth.tar') + assert os.path.exists(checkpoint_path) + + # Check if the checkpoint file is valid + checkpoint = torch.load(checkpoint_path) + assert 'epoch' in checkpoint + assert 'network' in checkpoint + assert 'optimizer' in checkpoint + assert 'scheduler' in checkpoint + assert 'log' in checkpoint + + # Clean up the temporary test output directory + shutil.rmtree(out_dir) From ffea06380681f2c8fe9c039313a61ae0bc51dc49 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 22:57:40 +0100 Subject: [PATCH 06/14] Added unit tests for dataset_utils.py and gratings_test.py --- osculari/datasets/dataset_utils.py | 2 + osculari/datasets/gratings.py | 2 +- tests/datasets/dataset_utils_test.py | 84 ++++++++++++++++++++++++++++ tests/datasets/gratings_test.py | 61 ++++++++++++++++++++ 4 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 tests/datasets/dataset_utils_test.py create mode 100644 tests/datasets/gratings_test.py diff --git a/osculari/datasets/dataset_utils.py b/osculari/datasets/dataset_utils.py index e4c1064..84ac82e 100644 --- a/osculari/datasets/dataset_utils.py +++ b/osculari/datasets/dataset_utils.py @@ -186,6 +186,8 @@ def background_img(bg_type: Any, bg_size: Union[int, Tuple], im2double=True) -> num_colours = np.random.randint(3, 25) num_patches = np.random.randint(2, bg_size[0] // 20) bg_img = _patch_img(bg_size, num_colours, num_patches, channels) + if 'achromatic' in bg_type: + bg_img = np.repeat(bg_img, 3, axis=2) else: raise RuntimeError('Unsupported background type %s.' % bg_type) # Handle user-specified background values diff --git a/osculari/datasets/gratings.py b/osculari/datasets/gratings.py index 8a103f9..1b4fa7d 100644 --- a/osculari/datasets/gratings.py +++ b/osculari/datasets/gratings.py @@ -91,7 +91,7 @@ class GratingsDataset(TorchDataset): """ def __init__(self, img_size: int, spatial_frequencies: Optional[Sequence[int]] = None, - thetas: Optional[Sequence[int]] = None, gaussian_sigma: Optional[float] = None, + thetas: Optional[Sequence[float]] = None, gaussian_sigma: Optional[float] = None, transform: Optional[Callable] = None) -> None: super(GratingsDataset, self).__init__() self.img_size = img_size diff --git a/tests/datasets/dataset_utils_test.py b/tests/datasets/dataset_utils_test.py new file mode 100644 index 0000000..4be016c --- /dev/null +++ b/tests/datasets/dataset_utils_test.py @@ -0,0 +1,84 @@ +""" +Unit tests for dataset_utils.py +""" + +import pytest +import numpy as np + +from osculari.datasets import dataset_utils + + +def test_background_uniform_achromatic(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('uniform_achromatic', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.allclose(bg_img, bg_img[0, 0, :]) + + +def test_background_uniform_colour(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('uniform_colour', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.allclose(bg_img, bg_img[0, 0, :]) + + +def test_background_random_achromatic(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('random_achromatic', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + + +def test_background_random_achromatic_pixelwise(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('random_achromatic', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + assert np.all(np.equal(bg_img[..., 0], bg_img[..., 1])) + + +def test_background_random_colour(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('random_colour', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + + +def test_background_patch_achromatic(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('patch_achromatic', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + + +def test_background_patch_achromatic_pixelwise(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('patch_achromatic', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + assert np.all(np.equal(bg_img[..., 0], bg_img[..., 1])) + + +def test_background_patch_colour(): + bg_size = (256, 256) + bg_img = dataset_utils.background_img('patch_colour', bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.unique(bg_img).shape[0] > 1 + + +def test_background_uniform_value(): + bg_size = (256, 256) + bg_value = 0.5 + bg_img = dataset_utils.background_img(bg_value, bg_size) + assert bg_img.shape == (*bg_size, 3) + assert np.allclose(bg_img, bg_value) + + +def test_background_invalid_type(): + with pytest.raises(RuntimeError, match='Unsupported background type'): + _ = dataset_utils.background_img('invalid_type', (256, 256)) + + +def test_background_invalid_value_type(): + with pytest.raises(RuntimeError, match='Unsupported background type'): + _ = dataset_utils.background_img(None, (256, 256)) diff --git a/tests/datasets/gratings_test.py b/tests/datasets/gratings_test.py new file mode 100644 index 0000000..bbd45dd --- /dev/null +++ b/tests/datasets/gratings_test.py @@ -0,0 +1,61 @@ +""" +Unit tests for gratings.py +""" + +import pytest +import numpy as np +import torch +import torchvision.transforms as torch_transforms + +from osculari.datasets import GratingsDataset + + +def test_gratings_dataset_len(): + # Test the __len__ method of GratingsDataset + img_size = 64 + dataset = GratingsDataset(img_size=img_size) + expected_length = len(dataset.thetas) * len(dataset.sfs) + assert len(dataset) == expected_length + + +def test_gratings_dataset_make_grating(): + # Test the make_grating method of GratingsDataset + img_size = 64 + dataset = GratingsDataset(img_size=img_size) + idx = 0 + amplitude = 1.0 + channels = 3 + grating = dataset.make_grating(idx, amplitude, channels) + assert isinstance(grating, np.ndarray) + assert grating.shape == (img_size, img_size, channels) + + +def test_gratings_dataset_getitem(): + # Test the __getitem__ method of GratingsDataset + img_size = 64 + dataset = GratingsDataset(img_size=img_size) + + # Test without transformation + idx = 0 + grating = dataset[idx] + assert isinstance(grating, np.ndarray) + assert grating.shape == (img_size, img_size, 3) + + # Test with transformation + transform = torch_transforms.Compose([torch_transforms.ToTensor()]) + dataset.transform = transform + grating = dataset[idx] + assert isinstance(grating, torch.Tensor) + assert grating.shape == (3, img_size, img_size) + + +def test_gratings_dataset_with_gaussian(): + # Test the make_grating method of GratingsDataset + img_size = 64 + dataset = GratingsDataset(img_size=img_size, gaussian_sigma=0.5) + idx = 0 + amplitude = 1.0 + channels = 3 + grating = dataset.make_grating(idx, amplitude, channels) + assert isinstance(grating, np.ndarray) + assert grating.shape == (img_size, img_size, channels) From a840d30fe0e70db640b732345bd6d9e403b17ee0 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Fri, 15 Dec 2023 23:22:55 +0100 Subject: [PATCH 07/14] Unit tests for ShapeAppearanceDataset --- osculari/datasets/geometrical_shapes.py | 1 + tests/datasets/geometrical_shapes_test.py | 85 +++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 tests/datasets/geometrical_shapes_test.py diff --git a/osculari/datasets/geometrical_shapes.py b/osculari/datasets/geometrical_shapes.py index 2650db7..2fade67 100644 --- a/osculari/datasets/geometrical_shapes.py +++ b/osculari/datasets/geometrical_shapes.py @@ -159,6 +159,7 @@ def __init__( self.num_samples = num_samples self.num_images = num_images self.img_size = img_size + assert callable(merge_fg_bg) self.merge_fg_bg = merge_fg_bg self.bg = background self.unique_fg_shape = unique_fg_shape diff --git a/tests/datasets/geometrical_shapes_test.py b/tests/datasets/geometrical_shapes_test.py new file mode 100644 index 0000000..aad9812 --- /dev/null +++ b/tests/datasets/geometrical_shapes_test.py @@ -0,0 +1,85 @@ +""" +Unit tests for geometrical_shapes.py +""" + +import pytest +import numpy as np +import torch + +from osculari.datasets import ShapeAppearanceDataset + + +def test_shape_appearance_dataset_len(): + # Test the __len__ method of ShapeAppearanceDataset + num_samples = 100 + dataset = ShapeAppearanceDataset(num_samples=num_samples, num_images=2, img_size=64, + background='uniform_achromatic', + merge_fg_bg=lambda x, y: (x, y)) + assert len(dataset) == num_samples + + +def test_shape_appearance_dataset_make_fg_masks(): + # Test the make_fg_masks method of ShapeAppearanceDataset + num_samples = 100 + num_images = 2 + img_size = 64 + dataset = ShapeAppearanceDataset(num_samples=num_samples, num_images=num_images, + img_size=img_size, background='uniform_achromatic', + merge_fg_bg=lambda x, y: (x, y)) + fg_masks = dataset.make_fg_masks() + assert len(fg_masks) == num_images + assert all( + isinstance(mask, np.ndarray) and mask.dtype == bool and mask.shape == (img_size, img_size) + for mask in fg_masks) + + +def test_shape_appearance_dataset_make_bg_images(): + # Test the make_bg_images method of ShapeAppearanceDataset + num_samples = 100 + num_images = 2 + img_size = 64 + dataset = ShapeAppearanceDataset(num_samples=num_samples, num_images=num_images, + img_size=img_size, background='uniform_achromatic', + merge_fg_bg=lambda x, y: (x, y)) + bg_images = dataset.make_bg_images() + assert len(bg_images) == num_images + assert all(isinstance(img, np.ndarray) and img.dtype == np.float32 and img.shape == ( + img_size, img_size, 3) for img in bg_images) + + +def test_shape_appearance_dataset_getitem(): + # Test the __getitem__ method of ShapeAppearanceDataset + num_samples = 100 + num_images = 2 + img_size = 64 + dataset = ShapeAppearanceDataset(num_samples=num_samples, num_images=num_images, + img_size=img_size, background='uniform_achromatic', + merge_fg_bg=lambda x, y: (x, 0)) + idx = 0 + data = dataset[idx] + assert len(data[:-1]) == num_images + assert all(isinstance(item, np.ndarray) for item in data[:-1]) + assert data[-1] == 0 # Ground is 0 + + +def test_shape_appearance_dataset_invalid_bg(): + # Test with an invalid background type + num_samples = 100 + num_images = 2 + img_size = 64 + with pytest.raises(RuntimeError): + dataset = ShapeAppearanceDataset(num_samples=num_samples, num_images=num_images, + img_size=img_size, background='invalid_bg', + merge_fg_bg=lambda x, y: (x, y)) + _ = dataset[0] + + +def test_shape_appearance_dataset_invalid_merge_fg_bg(): + # Test with an invalid merge_fg_bg function + num_samples = 100 + num_images = 2 + img_size = 64 + with pytest.raises(AssertionError): + _ = ShapeAppearanceDataset(num_samples=num_samples, num_images=num_images, + img_size=img_size, background='uniform_achromatic', + merge_fg_bg='invalid_func') From 813fd24cb8f7fc2cc01ee873c0299d87e48226ce Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sat, 16 Dec 2023 22:15:31 +0100 Subject: [PATCH 08/14] Add unit tests for readout.py --- osculari/models/readout.py | 2 + tests/models/readout_test.py | 85 ++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 tests/models/readout_test.py diff --git a/osculari/models/readout.py b/osculari/models/readout.py index a4c64cb..805b0e1 100644 --- a/osculari/models/readout.py +++ b/osculari/models/readout.py @@ -22,6 +22,8 @@ "load_paradigm_2afc", "load_paradigm_ooo", "ProbeNet", + "OddOneOutNet", + "Classifier2AFC", "ActivationLoader", "FeatureExtractor" ] diff --git a/tests/models/readout_test.py b/tests/models/readout_test.py new file mode 100644 index 0000000..06f8fd5 --- /dev/null +++ b/tests/models/readout_test.py @@ -0,0 +1,85 @@ +""" +Unit tests for readout.py +""" + +import pytest +import torch +from torch.testing import assert_close + +from osculari.models import OddOneOutNet, load_paradigm_ooo, load_paradigm_2afc + + +def test_odd_one_out_net_few_inputs(): + with pytest.raises(RuntimeError): + _ = OddOneOutNet(input_nodes=2, merge_paradigm='cat', + architecture='taskonomy_autoencoding', weights=None, layers='block0', + img_size=224) + + +def test_odd_one_out_net_init_cat(): + # Test the initialization of OddOneOutNet + input_nodes = 4 + net = OddOneOutNet(input_nodes=input_nodes, merge_paradigm='cat', + architecture='taskonomy_autoencoding', weights=None, layers='block0', + img_size=224) + assert net.input_nodes == input_nodes + assert net.fc.out_features == input_nodes + + +def test_odd_one_out_net_init_diff(): + # Test the initialization of OddOneOutNet + input_nodes = 4 + net = OddOneOutNet(input_nodes=input_nodes, merge_paradigm='diff', + architecture='taskonomy_autoencoding', weights=None, layers='block0', + img_size=224) + assert net.input_nodes == input_nodes + assert net.fc.out_features == 1 + + +@pytest.mark.parametrize("merge_paradigm,expected", [("cat", 4), ("diff", 4)]) +def test_odd_one_out_net_forward_cat(merge_paradigm, expected): + # Test the forward pass of OddOneOutNet with merge_paradigm='cat' + input_nodes = 4 + img_size = 224 + net = OddOneOutNet(input_nodes=input_nodes, merge_paradigm=merge_paradigm, + architecture='taskonomy_autoencoding', weights=None, layers='block0', + img_size=img_size) + + x1 = torch.randn(2, 3, img_size, img_size) + x2 = torch.randn(2, 3, img_size, img_size) + x3 = torch.randn(2, 3, img_size, img_size) + x4 = torch.randn(2, 3, img_size, img_size) + + output = net(x1, x2, x3, x4) + assert output.shape == (2, input_nodes) + + +def test_odd_one_out_net_serialization(): + # Test the serialization of OddOneOutNet + input_nodes = 4 + net = OddOneOutNet(input_nodes=input_nodes, merge_paradigm='cat', + architecture='taskonomy_autoencoding', weights='taskonomy_autoencoding', + layers='block0', img_size=224) + + net_params = net.serialisation_params() + new_net = load_paradigm_ooo(net_params) + + # Ensure that the parameters are correctly loaded + assert net.input_nodes == new_net.input_nodes + assert net.merge_paradigm == new_net.merge_paradigm + assert_close(net.state_dict(), new_net.state_dict()) + + +def test_odd_one_out_net_loss_function(): + # Test the loss function of OddOneOutNet + input_nodes = 4 + net = OddOneOutNet(input_nodes=input_nodes, merge_paradigm='cat', + architecture='taskonomy_autoencoding', weights=None, layers='block0', + img_size=224) + + # Assuming a batch size of 2 + output = torch.randn(2, input_nodes) + target = torch.randint(0, input_nodes, (2,), dtype=torch.long) + + loss = net.loss_function(output, target) + assert loss.item() >= 0 From b407c343d681e5ecf2394e7e2686175d686d674a Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sat, 16 Dec 2023 22:41:02 +0100 Subject: [PATCH 09/14] Unit tests for all imagenet pretrained models and all layers --- tests/models/pretrained_models_test.py | 31 ++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/models/pretrained_models_test.py diff --git a/tests/models/pretrained_models_test.py b/tests/models/pretrained_models_test.py new file mode 100644 index 0000000..3bb783c --- /dev/null +++ b/tests/models/pretrained_models_test.py @@ -0,0 +1,31 @@ +""" +Unit tests for pretrained_models.py +""" + +import pytest +import torch +from torchvision import models as torch_models + +from osculari.models import readout, available_layers + + +@pytest.mark.parametrize("net_name", torch_models.list_models(module=torch_models)) +def test_imagenet_models(net_name): + img_size = 224 + x1 = torch.randn(2, 3, img_size, img_size) + x2 = torch.randn(2, 3, img_size, img_size) + for layer in available_layers(net_name): + weights = None + readout_kwargs = { + 'architecture': net_name, 'img_size': img_size, + 'weights': weights, + 'layers': layer + } + classifier_lwargs = { + 'probe_layer': 'nn', + 'pooling': 'max_2_2' + } + + net = readout.paradigm_2afc_merge_concatenate(**classifier_lwargs, **readout_kwargs) + output = net(x1, x2) + assert output.shape == (2, 2) From 0cf09f69559fbade2c783dbf2d64b8fa1f9f603f Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sun, 17 Dec 2023 22:04:31 +0100 Subject: [PATCH 10/14] Using visualpriors package for tasknomoy_networks --- osculari/models/pretrained_models.py | 3 +- osculari/models/taskonomy_network.py | 420 --------------------------- requirements.txt | 1 + 3 files changed, 3 insertions(+), 421 deletions(-) delete mode 100644 osculari/models/taskonomy_network.py diff --git a/osculari/models/pretrained_models.py b/osculari/models/pretrained_models.py index f805b90..c8502ab 100644 --- a/osculari/models/pretrained_models.py +++ b/osculari/models/pretrained_models.py @@ -11,9 +11,10 @@ from torch.utils import model_zoo from torchvision import models as torch_models +from visualpriors import taskonomy_network import clip -from . import model_utils, pretrained_layers, taskonomy_network +from . import model_utils, pretrained_layers _TORCHVISION_SEGMENTATION = [ 'deeplabv3_mobilenet_v3_large', diff --git a/osculari/models/taskonomy_network.py b/osculari/models/taskonomy_network.py deleted file mode 100644 index 0d34802..0000000 --- a/osculari/models/taskonomy_network.py +++ /dev/null @@ -1,420 +0,0 @@ -""" -The original code was downloaded from https://github.com/alexsax/midlevel-reps -""" - -import torch -import torch.nn as nn -import torch.nn.functional as torch_fun -import warnings - -task_mapping = { - 'autoencoder': 'autoencoding', - 'colorization': 'colorization', - 'curvature': 'curvature', - 'denoise': 'denoising', - 'edge2d': 'edge_texture', - 'edge3d': 'edge_occlusion', - 'ego_motion': 'egomotion', - 'fix_pose': 'fixated_pose', - 'jigsaw': 'jigsaw', - 'keypoint2d': 'keypoints2d', - 'keypoint3d': 'keypoints3d', - 'non_fixated_pose': 'nonfixated_pose', - 'point_match': 'point_matching', - 'reshade': 'reshading', - 'rgb2depth': 'depth_zbuffer', - 'rgb2mist': 'depth_euclidean', - 'rgb2sfnorm': 'normal', - 'room_layout': 'room_layout', - 'segment25d': 'segment_unsup25d', - 'segment2d': 'segment_unsup2d', - 'segmentsemantic': 'segment_semantic', - 'class_1000': 'class_object', - 'class_places': 'class_scene', - 'inpainting_whole': 'inpainting', - 'vanishing_point': 'vanishing_point' -} - -CHANNELS_TO_TASKS = { - 1: ['colorization', 'edge_texture', 'edge_occlusion', 'keypoints3d', 'keypoints2d', 'reshading', - 'depth_zbuffer', 'depth_euclidean', ], - 2: ['curvature', 'principal_curvature'], - 3: ['autoencoding', 'denoising', 'normal', 'inpainting', 'rgb', 'normals'], - 17: ['segment_semantic'], - 63: ['class_scene'], - 64: ['segment_unsup2d', 'segment_unsup25d'], - 1000: ['class_object'], -} - -PIX_TO_PIX_TASKS = ['colorization', 'edge_texture', 'edge_occlusion', - 'keypoints3d', 'keypoints2d', 'reshading', - 'depth_zbuffer', 'depth_euclidean', - 'curvature', 'autoencoding', 'denoising', - 'normal', 'inpainting', - 'segment_unsup2d', 'segment_unsup25d', - 'segment_semantic', ] -FEED_FORWARD_TASKS = ['class_object', 'class_scene', 'room_layout', 'vanishing_point'] -SINGLE_IMAGE_TASKS = PIX_TO_PIX_TASKS + FEED_FORWARD_TASKS -SIAMESE_TASKS = ['fix_pose', 'jigsaw', 'ego_motion', 'point_match', 'non_fixated_pose'] -DONT_APPLY_TANH_TASKS = ['segment_semantic'] - -TASKS_TO_CHANNELS = {} -for n, tasks in CHANNELS_TO_TASKS.items(): - for task in tasks: - TASKS_TO_CHANNELS[task] = n - -LIST_OF_OLD_TASKS = sorted(list(task_mapping.keys())) -LIST_OF_TASKS = sorted(list(task_mapping.values())) - -TASKONOMY_PRETRAINED_WEIGHT_FILES = """autoencoding_decoder-a4a006b5a8b314b9b0ae815c12cf80e4c5f2e6c703abdf65a64a020d3fef7941.pth -autoencoding_encoder-e35146c09253720e97c0a7f8ee4e896ac931f5faa1449df003d81e6089ac6307.pth -class_object_decoder-3cdb6d9ec5a221ca39352e62412c2ab5ae7a00258a962b9b67fe398566ce6c5d.pth -class_object_encoder-4a4e42dad58066039a0d2f9d128bb32e93a7e4aa52edb2d2a07bcdd1a6536c18.pth -class_scene_decoder-517010623d64eb108ca3225fde2a87e72e3e97137b744aa12deeff2fa4f097dc.pth -class_scene_encoder-ad85764467cddafd98211313ceddebb98adf2a6bee2cedfe0b922a37ae65eaf8.pth -colorization_encoder-5ed817acdd28d13e443d98ad15ebe1c3059a3252396a2dff6a2090f6f86616a5.pth -curvature_decoder-b93aed18d7510ad9502755f05c1ef569c00d1fc9c4620333a764ad0d6d131fd3.pth -curvature_encoder-3767cf5d06d9c6bca859631eb5a3c368d66abeb15542171b94188ffbe47d7571.pth -denoising_decoder-5c4e343e885ac13ed0093b4f357680437b8a81f4d36c0b27b6ac831ba5c9fce6.pth -denoising_encoder-b64cab95af4a2c565066a7e8effaf37d6586c3b9389b47fff9376478d849db38.pth -depth_euclidean_decoder-f8d7d0d2bdaf55fac3bdfc8c2812c599bac84985d55503ec92960a4c8b5db7e8.pth -depth_euclidean_encoder-88f18d41313de7dbc88314a7f0feec3023047303d94d73eb8622dc40334ef149.pth -depth_zbuffer_decoder-4833f06833899a8d81b29c6d7eda8adf69b394a91a8c0389b0d58db523097de9.pth -depth_zbuffer_encoder-cc343a8ed622fd7ee3ce54398be8682bbbbfb5d11fa80e8d03a56a5ae4e11b09.pth -edge_occlusion_decoder-1b74d29a2b5afd9eb1a2cf2179289a31e2757909135615d5ba0a9164eb22505f.pth -edge_occlusion_encoder-5ac3f3e918131f61e01fe95e49f462ae2fc56aa463f8d353ca84cd4e248b9c08.pth -edge_texture_decoder-e241e823d6417a0c9b36b7616aad759380dfd3eb83362124e90f9ed5daa92c73.pth -edge_texture_encoder-be2d686a6a4dfebe968d16146a17176eba37e29f736d5cd9a714317c93718810.pth -egomotion_encoder-9aa647c34bf98f9e491e0b37890d77566f6ae35ccf41d9375c674511318d571c.pth -fixated_pose_encoder-78cf321518abc16f9f4782b9e5d4e8f5d6966c373d951928a26f872e55297567.pth -inpainting_decoder-5982904d2a3ce470ce993d89572134dd835dd809f5cfd6290334dc0fe8b1277f.pth -inpainting_encoder-bf96fbaaea9268a820a19a1d13dbf6af31798f8983c6d9203c00fab2d236a142.pth -jigsaw_encoder-0c2b342c9080f8713c178b04aa6c581ed3a0112fecaf78edc4c04e0a90516e39.pth -keypoints2d_decoder-0157a2a18c4e1f861c725d8d4cf3701b02e9444f47e22bdd1262c879dd2d0839.pth -keypoints2d_encoder-6b77695acff4c84091c484a7b128a1e28a7e9c36243eda278598f582cf667fe0.pth -keypoints3d_decoder-724ea6f255cbe4c487a984230242ec7c3557fa8234bde2487d69eacc7b9b75af.pth -keypoints3d_encoder-7e3f1ec97b82ae30030b7ea4fec2dc606b71497d8c0335d05f0be3dc909d000d.pth -nonfixated_pose_encoder-3433a600ca9ff384b9898e55d86a186d572c2ebbe4701489a373933e3cfd5b8b.pth -normal_decoder-8f18bfb30ee733039f05ed4a65b4db6f7cc1f8a4b9adb4806838e2bf88e020ec.pth -normal_encoder-f5e2c7737e4948e3b2a822f584892c342eaabbe66661576ba50db7cdd40561c5.pth -point_matching_encoder-4bd2a6b2909d9998fabaf0278ab568f42f2b692a648e28555ede6c6cda5361f4.pth -reshading_decoder-5bda58f921a3065992ab0034aa0ed787af97f26ac9e5668746dae49c299606cb.pth -reshading_encoder-de456246e171dc8407fb2951539aa60d75925ae0f1dbb43f110b7768398b36a6.pth -room_layout_encoder-1e1662f43b834261464b1825227a04efba59b50cc8883bee9adc3ddafd4796c1.pth -segment_semantic_decoder-d74c6fcf4e0f2bbdce9afe21f9064453a2ac5c7131226527b1d0748f701d04a0.pth -segment_semantic_encoder-bb3007244520fc89cd111e099744a22b1e5c98cd83ed3f116fbff6376d220127.pth -segment_unsup25d_decoder-64c1553cadf76e7efd59138321dc94d186a27eb2bb21e5e6c2624ae825bd4da1.pth -segment_unsup25d_encoder-7d12d2500c18c003ffc23943214f5dfd74932f0e3d03dde2c3a81ebc406e31a0.pth -segment_unsup2d_decoder-a0f3975a22032f116d36e3f3a49f33ddcd6e798cced3ac0962eef5bdccfc397f.pth -segment_unsup2d_encoder-b679053a920e8bcabf0cd454606098ae85341e054080f2be29473971d4265964.pth -vanishing_point_encoder-afd2ae9b71d46a54efc5231b3e38ebc3e35bfab78cb0a78d9b75863a240b19a8.pth""".split() -TASKONOMY_PRETRAINED_WEIGHT_URL_TEMPLATE = 'https://github.com/alexsax/visual-prior/raw/networks/assets/pytorch/{filename}' -TASKONOMY_PRETRAINED_URLS = { - k.split("-")[0]: TASKONOMY_PRETRAINED_WEIGHT_URL_TEMPLATE.format(filename=k) - for k in TASKONOMY_PRETRAINED_WEIGHT_FILES} - - -class TaskonomyNetwork(nn.Module): - - def __init__(self, - out_channels=3, - eval_only=True, - load_encoder_path=None, - load_decoder_path=None, - model_dir=None, - is_decoder_mlp=False, - apply_tanh=True, - progress=True): - """ - out_channels = None for decoder only - """ - super(TaskonomyNetwork, self).__init__() - self.encoder = TaskonomyEncoder(eval_only=True) - self.encoder.normalize_outputs = False - - self.decoder = None - self.is_decoder_mlp = is_decoder_mlp - self.apply_tanh = apply_tanh - if out_channels is not None: - self.decoder = TaskonomyDecoder( - out_channels=out_channels, - is_decoder_mlp=self.is_decoder_mlp, - apply_tanh=self.apply_tanh, - eval_only=True) - - if load_encoder_path is not None: - self.load_encoder(load_encoder_path, model_dir, progress) - - if load_decoder_path is not None: - self.load_decoder(load_decoder_path, model_dir, progress) - - def load_encoder(self, url, model_dir=None, progress=True): - checkpoint = torch.utils.model_zoo.load_url(url, model_dir=model_dir, progress=progress) - return self.encoder.load_state_dict(checkpoint['state_dict']) - - def load_decoder(self, url, model_dir=None, progress=True): - checkpoint = torch.utils.model_zoo.load_url(url, model_dir=model_dir, progress=progress) - return self.decoder.load_state_dict(checkpoint['state_dict']) - - def forward(self, x): - return self.decoder(self.encoder(x)) - - -class Scissor(torch.nn.Module): - # Remove the first row and column of our data - # To deal with asymmetry in ConvTranpose layers - # if used correctly, this removes 0's - def forward(self, x): - _, _, h, _ = x.shape - x = x[:, :, 1:h, 1:h] - return x - - -class TaskonomyDecoder(nn.Module): - """ - Note regarding DeConvolution Layer: - - TF uses padding = 'same': `o = i * stride` (e.g. 128 -> 64 if stride = 2) - - Using the equation relating output_size, input_size, stride, padding, kernel_size, we get 2p = 1 - - See https://stackoverflow.com/questions/50683039/conv2d-transpose-output-shape-using-formula - - This means we need to add asymmetric padding of (1,0,1,0) prior to deconv - - PyTorch ConvTranspose2d does not support asymmetric padding, so we need to pad ourselves - - But since we pad ourselves it goes into the input size and since stride = 2, we get an extra row/column of zeros - - e.g. This is because it is putting a row/col between each row/col of the input (our padding is treated as input) - - That's fine, if we remove that row and column, we get the proper outputs we are looking for - - See https://github.com/vdumoulin/conv_arithmetic to visualize deconvs - """ - - def __init__(self, out_channels=3, eval_only=True, is_decoder_mlp=False, apply_tanh=True): - super(TaskonomyDecoder, self).__init__() - self.is_decoder_mlp = is_decoder_mlp - if self.is_decoder_mlp: - self.fc0 = nn.Linear(2048, 2048, bias=False) - self.bn0 = nn.BatchNorm1d(2048, momentum=0.1, affine=True) - self.relu0 = nn.ReLU(inplace=False) - self.dropout = nn.Dropout(p=0.5, inplace=False) - self.fc2 = nn.Linear(2048, out_channels) - else: - self.conv2 = self._make_layer(8, 1024) - self.conv3 = self._make_layer(1024, 1024) - self.conv4 = self._make_layer(1024, 512) - self.conv5 = self._make_layer(512, 256) - self.conv6 = self._make_layer(256, 256) - self.conv7 = self._make_layer(256, 128) - - self.deconv8 = self._make_layer(128, 64, stride=2, deconv=True) - self.conv9 = self._make_layer(64, 64) - - self.deconv10 = self._make_layer(64, 32, stride=2, deconv=True) - self.conv11 = self._make_layer(32, 32) - - self.deconv12 = self._make_layer(32, 16, stride=2, deconv=True) - self.conv13 = self._make_layer(16, 32) - - self.deconv14 = self._make_layer(32, 16, stride=2, deconv=True) - - decoder_output_layers = [ - nn.Conv2d(16, out_channels, kernel_size=3, stride=1, bias=True, padding=1)] - if apply_tanh: - decoder_output_layers.append(nn.Tanh()) - self.decoder_output = nn.Sequential(*decoder_output_layers) - - self.eval_only = eval_only - if self.eval_only: - self.eval() - - for p in self.parameters(): - p.requires_grad = False - - def _make_layer(self, in_channels, out_channels, stride=1, deconv=False): - if deconv: - pad = nn.ZeroPad2d((1, 0, 1, 0)) # Pad first row and column - conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=3, stride=stride, - padding=1, output_padding=0, bias=False) - scissor = Scissor() # Remove first row and column - else: - conv = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, - bias=False) # pad = 'SAME' - - bn = nn.BatchNorm2d(out_channels, momentum=0.1, affine=True) - lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=False) - - if deconv: - layer = nn.Sequential(pad, conv, scissor, bn, lrelu) - else: - layer = nn.Sequential(conv, bn, lrelu) - return layer - - def forward(self, x): - if self.is_decoder_mlp: - if len(x.shape) == 4 and x.shape[2] == x.shape[3]: - # NCHW to NHWC - # weights from TF require the input to be ordered in this fashion - # calling .view on (N,8,16,16) is different from (N,16,16,8) - x = x.permute((0, 2, 3, 1)).contiguous() - x = x.view(-1, 2048) - x = self.fc0(x) - x = self.bn0(x) - x = self.relu0(x) - x = self.dropout(x) - x = self.fc2(x) - else: - # Input x: N x 256 x 256 x 3 - x = self.conv2(x) - x = self.conv3(x) - x = self.conv4(x) - x = self.conv5(x) - x = self.conv6(x) - x = self.conv7(x) - - x = self.deconv8(x) - x = self.conv9(x) - - x = self.deconv10(x) - x = self.conv11(x) - - x = self.deconv12(x) - x = self.conv13(x) - - x = self.deconv14(x) - x = self.decoder_output(x) - # add gaussian-noise? - return x - - -class Bottleneck(nn.Module): - expansion = 4 - - def __init__(self, inplanes, planes, stride=1, downsample=None): - super(Bottleneck, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, bias=False, padding=1) - # self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) - self.bn3 = nn.BatchNorm2d(planes * 4) - self.relu = nn.ReLU(inplace=True) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.bn1(out) - out = self.relu(out) - - # out = F.pad(out, pad=(1,1,1,1), mode='constant', value=0) # other modes are reflect, replicate - out = self.conv2(out) - out = self.bn2(out) - out = self.relu(out) - - out = self.conv3(out) - out = self.bn3(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - - return out - - -class TaskonomyClassificationDecoder(nn.Module): - def __init__(self, out_channels=3, eval_only=True): - super(TaskonomyClassificationDecoder, self).__init__() - - -class TaskonomyEncoder(nn.Module): - - def __init__(self, normalize_outputs=True, eval_only=True, train_penultimate=False, - train=False): - self.inplanes = 64 - super(TaskonomyEncoder, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=0, bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True) - block = Bottleneck - layers = [3, 4, 6, 3] - self.layer1 = self._make_layer(block, 64, layers[0], stride=2) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2]) - self.layer4 = self._make_layer(block, 512, layers[3]) - self.compress1 = nn.Conv2d(2048, 8, kernel_size=3, stride=1, padding=1, bias=False) - self.compress_bn = nn.BatchNorm2d(8) - self.relu1 = nn.ReLU(inplace=True) - self.groupnorm = nn.GroupNorm(8, 8, affine=False) - self.normalize_outputs = normalize_outputs - self.eval_only = eval_only - if self.eval_only: - self.eval() - for p in self.parameters(): - p.requires_grad = False - - if train_penultimate: - for name, param in self.named_parameters(): - if 'compress' in name: # last layers: compress1.weight, compress_bn.weight, compress_bn.bias - param.requires_grad = True - else: - param.requires_grad = False - - if train: - for p in self.parameters(): - p.requires_grad = True - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - layers = [] - - if self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=1, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - layers.append(block(self.inplanes, planes, downsample=downsample)) - - self.inplanes = planes * block.expansion - for i in range(1, blocks - 1): - layers.append(block(self.inplanes, planes)) - - downsample = None - if stride != 1: - downsample = nn.Sequential( - nn.MaxPool2d(kernel_size=1, stride=stride), - ) - layers.append(block(self.inplanes, planes, stride, downsample)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = torch_fun.pad(x, pad=(3, 3, 3, 3), mode='constant', value=0) - # other modes are reflect, replicate, constant - - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - # x = F.pad(x, (0,1,0,1), 'constant', 0) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.compress1(x) - x = self.compress_bn(x) - x = self.relu1(x) - if self.normalize_outputs: - x = self.groupnorm(x) - return x - - def train(self, val): - if val and self.eval_only: - warnings.warn( - "Ignoring 'train()' in TaskonomyEncoder since 'eval_only' was set during initialization.", - RuntimeWarning) - else: - return super(TaskonomyEncoder, self).train(val) diff --git a/requirements.txt b/requirements.txt index 36de792..30101d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ numpy torch torchvision clip-openai +visualpriors opencv-python From a1d2456d73661d507bb1fdbf036d43dbb5bd5c66 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sun, 17 Dec 2023 22:05:03 +0100 Subject: [PATCH 11/14] Better parametrising imagenet models test. --- tests/models/pretrained_models_test.py | 38 +++++++++++++++----------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/tests/models/pretrained_models_test.py b/tests/models/pretrained_models_test.py index 3bb783c..5851a58 100644 --- a/tests/models/pretrained_models_test.py +++ b/tests/models/pretrained_models_test.py @@ -9,23 +9,29 @@ from osculari.models import readout, available_layers -@pytest.mark.parametrize("net_name", torch_models.list_models(module=torch_models)) -def test_imagenet_models(net_name): +def all_imagenet_networks_layers(): + """All pretrained ImageNet networks and supported layers.""" + for net_name in torch_models.list_models(module=torch_models): + for layer in available_layers(net_name): + yield net_name, layer + + +@pytest.mark.parametrize("net_name,layer", all_imagenet_networks_layers()) +def test_imagenet_models(net_name, layer): img_size = 224 x1 = torch.randn(2, 3, img_size, img_size) x2 = torch.randn(2, 3, img_size, img_size) - for layer in available_layers(net_name): - weights = None - readout_kwargs = { - 'architecture': net_name, 'img_size': img_size, - 'weights': weights, - 'layers': layer - } - classifier_lwargs = { - 'probe_layer': 'nn', - 'pooling': 'max_2_2' - } + weights = None + readout_kwargs = { + 'architecture': net_name, 'img_size': img_size, + 'weights': weights, + 'layers': layer + } + classifier_lwargs = { + 'probe_layer': 'nn', + 'pooling': 'max_2_2' + } - net = readout.paradigm_2afc_merge_concatenate(**classifier_lwargs, **readout_kwargs) - output = net(x1, x2) - assert output.shape == (2, 2) + net = readout.paradigm_2afc_merge_concatenate(**classifier_lwargs, **readout_kwargs) + output = net(x1, x2) + assert output.shape == (2, 2) From 2d8c16f17e36e17da7b607539ce42f28c38499a5 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sun, 17 Dec 2023 22:13:47 +0100 Subject: [PATCH 12/14] Runtime Error unit tests for pretrained_models.py --- tests/models/pretrained_models_test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/tests/models/pretrained_models_test.py b/tests/models/pretrained_models_test.py index 5851a58..c23dd70 100644 --- a/tests/models/pretrained_models_test.py +++ b/tests/models/pretrained_models_test.py @@ -7,6 +7,7 @@ from torchvision import models as torch_models from osculari.models import readout, available_layers +from osculari.models import pretrained_models def all_imagenet_networks_layers(): @@ -35,3 +36,19 @@ def test_imagenet_models(net_name, layer): net = readout.paradigm_2afc_merge_concatenate(**classifier_lwargs, **readout_kwargs) output = net(x1, x2) assert output.shape == (2, 2) + + +def test_preprocess_mean_std_invalid_model(): + with pytest.raises(RuntimeError): + _ = pretrained_models.preprocess_mean_std('invalid_network') + + +def test_get_pretrained_model_invalid_model(): + with pytest.raises(RuntimeError): + _ = pretrained_models.get_pretrained_model('invalid_network', 'weights') + + +def test_model_features_invalid_layer(): + network = pretrained_models.get_pretrained_model('resnet18', 'none') + with pytest.raises(RuntimeError): + _ = pretrained_models.model_features(network, 'resnet18', 'invalid_layer') From 7b2cdad660f6dd81e742630433e8a7b3630e20c5 Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sun, 17 Dec 2023 23:07:50 +0100 Subject: [PATCH 13/14] Loading CLIP in CPU to compute the generic size for a layer --- osculari/models/model_utils.py | 11 +---------- osculari/models/pretrained_models.py | 4 +++- osculari/models/readout.py | 22 ++++++++++++++++------ 3 files changed, 20 insertions(+), 17 deletions(-) diff --git a/osculari/models/model_utils.py b/osculari/models/model_utils.py index f812f6b..a7780fb 100644 --- a/osculari/models/model_utils.py +++ b/osculari/models/model_utils.py @@ -103,16 +103,13 @@ def is_resnet_backbone(architecture: str) -> bool: return 'resnet' in architecture or 'resnext' in architecture or 'taskonomy_' in architecture -def generic_features_size(model: nn.Module, img_size: int, - is_clip: Optional[bool] = False) -> Tuple[int]: +def generic_features_size(model: nn.Module, img_size: int) -> Tuple[int]: """ Compute the output size of a neural network model given an input image size. Parameters: model (nn.Module): The neural network model. img_size (int): The input image size (assuming square images). - is_clip (Optional[bool]): Flag indicating whether the model is a CLIP model - (default is False). Returns: Tuple[int]: The computed output size of the model. @@ -123,12 +120,6 @@ def generic_features_size(model: nn.Module, img_size: int, # Convert the image to a PyTorch tensor and add batch dimension img = torchvis_fun.to_tensor(img).unsqueeze(0) - # Move the input image to GPU and change the data type if is_clip is True - if is_clip: - model = model.cuda() - img = img.cuda() - img = img.type(torch.float16) - # Set the model to evaluation mode model.eval() diff --git a/osculari/models/pretrained_models.py b/osculari/models/pretrained_models.py index c8502ab..9fd9c63 100644 --- a/osculari/models/pretrained_models.py +++ b/osculari/models/pretrained_models.py @@ -475,6 +475,7 @@ def get_pretrained_model(network_name: str, weights: str) -> nn.Module: Parameters: network_name (str): Name of the network. weights (str): Path to the pretrained weights file. + clip_cpu (bool): Load the CLIP model in CPU. Raises: RuntimeError: If the specified network is not supported. @@ -489,7 +490,8 @@ def get_pretrained_model(network_name: str, weights: str) -> nn.Module: # Load CLIP model # TODO: support for None clip_version = network_name.replace('clip_', '') - model, _ = clip.load(clip_version) + device = "cuda" if torch.cuda.is_available() and weights not in ['none', None] else "cpu" + model, _ = clip.load(clip_version, device=device) elif 'taskonomy_' in network_name: # Load Taskonomy model model = taskonomy_network.TaskonomyEncoder() diff --git a/osculari/models/readout.py b/osculari/models/readout.py index 805b0e1..674152f 100644 --- a/osculari/models/readout.py +++ b/osculari/models/readout.py @@ -231,6 +231,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return self.extract_features(x) +def _image_encoder_none_weights(architecture: str, layer: str) -> nn.Module: + # TODO: consider converting this into a overload functoin for generic size + model_instance = pretraineds.get_pretrained_model(architecture, 'none') + image_encoder = pretraineds.get_image_encoder(architecture, model_instance) + layer_features = pretraineds.model_features(image_encoder, architecture, layer) + return layer_features + + class ProbeNet(ReadOutNet): """Adding a linear layer on top of readout features.""" @@ -264,14 +272,11 @@ def __init__(self, input_nodes: int, num_classes: int, img_size: int, } # Handle features from multiple layers - is_clip = 'clip' in self.architecture if hasattr(self, 'act_dict'): total_dim = 0 for layer in self.layers: - model_instance = pretraineds.get_pretrained_model(self.architecture, 'none') - image_encoder = pretraineds.get_image_encoder(self.architecture, model_instance) - layer_features = pretraineds.model_features(image_encoder, self.architecture, layer) - odim = model_utils.generic_features_size(layer_features, img_size, is_clip) + image_encoder = _image_encoder_none_weights(self.architecture, layer) + odim = model_utils.generic_features_size(image_encoder, img_size) if type(odim) is int: total_dim += odim else: @@ -279,7 +284,12 @@ def __init__(self, input_nodes: int, num_classes: int, img_size: int, total_dim += (odim[0] * tmp_size) self.out_dim = (total_dim, 1) else: - self.out_dim = model_utils.generic_features_size(self.backbone, img_size, is_clip) + image_encoder = self.backbone + # To calculate the weights of the CLIP model, we load an instance in CPU by + # passing weights='none' + if 'clip' in self.architecture: + image_encoder = _image_encoder_none_weights(self.architecture, self.layers) + self.out_dim = model_utils.generic_features_size(image_encoder, img_size) if len(self.out_dim) == 1 and self.pool is not None: RuntimeWarning( 'Layer %s output is a vector, no pooling can be applied' % self.layers From 62b3a1b751c37dfc398d8650c6374ec9bf23961b Mon Sep 17 00:00:00 2001 From: ArashAkbarinia Date: Sun, 17 Dec 2023 23:15:47 +0100 Subject: [PATCH 14/14] Unit test for all supported pretrained networks and all supported layers --- tests/models/pretrained_models_test.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tests/models/pretrained_models_test.py b/tests/models/pretrained_models_test.py index c23dd70..83bfcb9 100644 --- a/tests/models/pretrained_models_test.py +++ b/tests/models/pretrained_models_test.py @@ -4,25 +4,30 @@ import pytest import torch -from torchvision import models as torch_models -from osculari.models import readout, available_layers +from osculari.models import readout, available_layers, available_models from osculari.models import pretrained_models -def all_imagenet_networks_layers(): - """All pretrained ImageNet networks and supported layers.""" - for net_name in torch_models.list_models(module=torch_models): +def all_networks_layers(): + """All supported pretrained networks and supported layers.""" + for net_name in available_models(flatten=True): for layer in available_layers(net_name): yield net_name, layer -@pytest.mark.parametrize("net_name,layer", all_imagenet_networks_layers()) +@pytest.mark.parametrize("net_name,layer", all_networks_layers()) def test_imagenet_models(net_name, layer): - img_size = 224 + expected_sizes = { + 'clip_RN50x4': 288, + 'clip_RN50x16': 384, + 'clip_RN50x64': 448, + 'clip_ViT-L/14@336px': 336, + } + img_size = expected_sizes[net_name] if net_name in expected_sizes else 224 x1 = torch.randn(2, 3, img_size, img_size) x2 = torch.randn(2, 3, img_size, img_size) - weights = None + weights = 'none' readout_kwargs = { 'architecture': net_name, 'img_size': img_size, 'weights': weights,