Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged to main branch #17

Merged
merged 14 commits into from
Dec 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions osculari/datasets/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,8 @@ def background_img(bg_type: Any, bg_size: Union[int, Tuple], im2double=True) ->
num_colours = np.random.randint(3, 25)
num_patches = np.random.randint(2, bg_size[0] // 20)
bg_img = _patch_img(bg_size, num_colours, num_patches, channels)
if 'achromatic' in bg_type:
bg_img = np.repeat(bg_img, 3, axis=2)
else:
raise RuntimeError('Unsupported background type %s.' % bg_type)
# Handle user-specified background values
Expand Down
1 change: 1 addition & 0 deletions osculari/datasets/geometrical_shapes.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,7 @@ def __init__(
self.num_samples = num_samples
self.num_images = num_images
self.img_size = img_size
assert callable(merge_fg_bg)
self.merge_fg_bg = merge_fg_bg
self.bg = background
self.unique_fg_shape = unique_fg_shape
Expand Down
2 changes: 1 addition & 1 deletion osculari/datasets/gratings.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ class GratingsDataset(TorchDataset):
"""

def __init__(self, img_size: int, spatial_frequencies: Optional[Sequence[int]] = None,
thetas: Optional[Sequence[int]] = None, gaussian_sigma: Optional[float] = None,
thetas: Optional[Sequence[float]] = None, gaussian_sigma: Optional[float] = None,
transform: Optional[Callable] = None) -> None:
super(GratingsDataset, self).__init__()
self.img_size = img_size
Expand Down
10 changes: 1 addition & 9 deletions osculari/models/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,13 @@ def is_resnet_backbone(architecture: str) -> bool:
return 'resnet' in architecture or 'resnext' in architecture or 'taskonomy_' in architecture


def generic_features_size(model: nn.Module, img_size: int,
is_clip: Optional[bool] = False) -> Tuple[int]:
def generic_features_size(model: nn.Module, img_size: int) -> Tuple[int]:
"""
Compute the output size of a neural network model given an input image size.

Parameters:
model (nn.Module): The neural network model.
img_size (int): The input image size (assuming square images).
is_clip (Optional[bool]): Flag indicating whether the model is a CLIP model
(default is False).

Returns:
Tuple[int]: The computed output size of the model.
Expand All @@ -123,11 +120,6 @@ def generic_features_size(model: nn.Module, img_size: int,
# Convert the image to a PyTorch tensor and add batch dimension
img = torchvis_fun.to_tensor(img).unsqueeze(0)

# Move the input image to GPU and change the data type if is_clip is True
if is_clip:
img = img.cuda()
img = img.type(torch.float16)

# Set the model to evaluation mode
model.eval()

Expand Down
7 changes: 5 additions & 2 deletions osculari/models/pretrained_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,10 @@
from torch.utils import model_zoo

from torchvision import models as torch_models
from visualpriors import taskonomy_network
import clip

from . import model_utils, pretrained_layers, taskonomy_network
from . import model_utils, pretrained_layers

_TORCHVISION_SEGMENTATION = [
'deeplabv3_mobilenet_v3_large',
Expand Down Expand Up @@ -474,6 +475,7 @@ def get_pretrained_model(network_name: str, weights: str) -> nn.Module:
Parameters:
network_name (str): Name of the network.
weights (str): Path to the pretrained weights file.
clip_cpu (bool): Load the CLIP model in CPU.

Raises:
RuntimeError: If the specified network is not supported.
Expand All @@ -488,7 +490,8 @@ def get_pretrained_model(network_name: str, weights: str) -> nn.Module:
# Load CLIP model
# TODO: support for None
clip_version = network_name.replace('clip_', '')
model, _ = clip.load(clip_version)
device = "cuda" if torch.cuda.is_available() and weights not in ['none', None] else "cpu"
model, _ = clip.load(clip_version, device=device)
elif 'taskonomy_' in network_name:
# Load Taskonomy model
model = taskonomy_network.TaskonomyEncoder()
Expand Down
24 changes: 18 additions & 6 deletions osculari/models/readout.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"load_paradigm_2afc",
"load_paradigm_ooo",
"ProbeNet",
"OddOneOutNet",
"Classifier2AFC",
"ActivationLoader",
"FeatureExtractor"
]
Expand Down Expand Up @@ -229,6 +231,14 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
return self.extract_features(x)


def _image_encoder_none_weights(architecture: str, layer: str) -> nn.Module:
# TODO: consider converting this into a overload functoin for generic size
model_instance = pretraineds.get_pretrained_model(architecture, 'none')
image_encoder = pretraineds.get_image_encoder(architecture, model_instance)
layer_features = pretraineds.model_features(image_encoder, architecture, layer)
return layer_features


class ProbeNet(ReadOutNet):
"""Adding a linear layer on top of readout features."""

Expand Down Expand Up @@ -262,22 +272,24 @@ def __init__(self, input_nodes: int, num_classes: int, img_size: int,
}

# Handle features from multiple layers
is_clip = 'clip' in self.architecture
if hasattr(self, 'act_dict'):
total_dim = 0
for layer in self.layers:
model_instance = pretraineds.get_pretrained_model(self.architecture, 'none')
image_encoder = pretraineds.get_image_encoder(self.architecture, model_instance)
layer_features = pretraineds.model_features(image_encoder, self.architecture, layer)
odim = model_utils.generic_features_size(layer_features, img_size, is_clip)
image_encoder = _image_encoder_none_weights(self.architecture, layer)
odim = model_utils.generic_features_size(image_encoder, img_size)
if type(odim) is int:
total_dim += odim
else:
tmp_size = 1 if len(odim) < 3 else np.prod(self.pool['size']) * self.pool['num']
total_dim += (odim[0] * tmp_size)
self.out_dim = (total_dim, 1)
else:
self.out_dim = model_utils.generic_features_size(self.backbone, img_size, is_clip)
image_encoder = self.backbone
# To calculate the weights of the CLIP model, we load an instance in CPU by
# passing weights='none'
if 'clip' in self.architecture:
image_encoder = _image_encoder_none_weights(self.architecture, self.layers)
self.out_dim = model_utils.generic_features_size(image_encoder, img_size)
if len(self.out_dim) == 1 and self.pool is not None:
RuntimeWarning(
'Layer %s output is a vector, no pooling can be applied' % self.layers
Expand Down
Loading