torchreid/utils/feature_extractor.py

from __future__ import absolute_import

import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image

from ptflops import get_model_complexity_info

from torchreid.models import build_model
from torchreid.utils import check_isfile, load_pretrained_weights
from scripts.default_config import model_kwargs, get_default_config, merge_from_files_with_base


class FeatureExtractor(object):
    """A simple API for feature extraction.

    FeatureExtractor can be used like a python function, which
    accepts input of the following types:
        - a list of strings (image paths)
        - a list of numpy.ndarray each with shape (H, W, C)
        - a single string (image path)
        - a single numpy.ndarray with shape (H, W, C)
        - a torch.Tensor with shape (B, C, H, W) or (C, H, W)

    Returned is a torch tensor with shape (B, D) where D is the
    feature dimension.

    Args:
        config_path (str): path to model configuration file.
        model_path (str): path to model weights.
        image_size (sequence or int): image height and width.
        device (str): 'cpu' or 'cuda' (could be specific gpu devices).
        verbose (bool): show model details.

    Examples::

        from torchreid.utils import FeatureExtractor

        extractor = FeatureExtractor(
            model_name='osnet_x1_0',
            model_path='a/b/c/model.pth.tar',
            device='cuda'
        )

        image_list = [
            'a/b/c/image001.jpg',
            'a/b/c/image002.jpg',
            'a/b/c/image003.jpg',
            'a/b/c/image004.jpg',
            'a/b/c/image005.jpg'
        ]

        features = extractor(image_list)
        print(features.shape) # output (5, 512)
    """

    def __init__(
        self,
        config_path='',
        model_path='',
        device='cuda',
        verbose=True
    ):
        # Build model
        cfg = get_default_config()
        merge_from_files_with_base(cfg, config_path)
        cfg.use_gpu = device.startswith('cuda')
        model = build_model(**model_kwargs(cfg, 1))
        model.eval()

        image_size = (cfg.data.height, cfg.data.width)
        flops, num_params = get_model_complexity_info(model, (3, image_size[0], image_size[1]),
                                                      as_strings=False, verbose=False, print_per_layer_stat=False)

        if verbose:
            print('Model: {}'.format(cfg.model.name))
            print('- params: {:,}'.format(num_params))
            print('- flops: {:,}'.format(flops))

        if model_path and check_isfile(model_path):
            load_pretrained_weights(model, model_path)

        # Build transform functions
        transforms = []
        transforms += [T.Resize(image_size)]
        transforms += [T.ToTensor()]
        print(cfg.data.norm_mean, cfg.data.norm_std)
        transforms += [T.Normalize(mean=cfg.data.norm_mean, std=cfg.data.norm_std)]
        preprocess = T.Compose(transforms)

        to_pil = T.ToPILImage()

        device = torch.device(device)
        model.to(device)

        # Class attributes
        self.model = model
        self.preprocess = preprocess
        self.to_pil = to_pil
        self.device = device

    def __call__(self, input):
        if isinstance(input, list):
            images = []

            for element in input:
                if isinstance(element, str):
                    image = Image.open(element).convert('RGB')

                elif isinstance(element, np.ndarray):
                    image = self.to_pil(element)

                else:
                    raise TypeError(
                        'Type of each element must belong to [str | numpy.ndarray]'
                    )

                image = self.preprocess(image)
                images.append(image)

            images = torch.stack(images, dim=0)
            images = images.to(self.device)

        elif isinstance(input, str):
            image = Image.open(input).convert('RGB')
            image = self.preprocess(image)
            images = image.unsqueeze(0).to(self.device)

        elif isinstance(input, np.ndarray):
            image = self.to_pil(input)
            image = self.preprocess(image)
            images = image.unsqueeze(0).to(self.device)

        elif isinstance(input, torch.Tensor):
            if input.dim() == 3:
                input = input.unsqueeze(0)
            images = input.to(self.device)

        else:
            raise NotImplementedError

        with torch.no_grad():
            features = self.model(images)

        return features