confidence_test.py

import argparse
import datetime
import os
import random
import time
import warnings
from pathlib import Path

import cv2
import numpy as np
import torch
import torchvision.transforms as standard_transforms
from PIL import Image

from crowd_datasets import build_dataset
from engine import *
from models import build_model

warnings.filterwarnings("ignore")


def get_args_parser():
    parser = argparse.ArgumentParser(
        "Set parameters for P2PNet evaluation", add_help=False
    )

    # * Backbone
    parser.add_argument(
        "--backbone",
        default="vgg16_bn",
        type=str,
        help="name of the convolutional backbone to use",
    )

    parser.add_argument(
        "--row", default=2, type=int, help="row number of anchor points"
    )
    parser.add_argument(
        "--line", default=2, type=int, help="line number of anchor points"
    )

    parser.add_argument("--output_dir", default="", help="path where to save")
    parser.add_argument(
        "--weight_path", default="", help="path where the trained weights saved"
    )

    parser.add_argument(
        "--gpu_id", default=0, type=int, help="the gpu used for evaluation"
    )

    return parser


def main(args, debug=False):

    os.environ["CUDA_VISIBLE_DEVICES"] = "{}".format(args.gpu_id)

    print(args)
    device = torch.device("cuda")
    # get the P2PNet
    model = build_model(args)
    # move to GPU
    model.to(device)
    # load trained model
    if args.weight_path is not None:
        checkpoint = torch.load(args.weight_path, map_location="cpu")
        model.load_state_dict(checkpoint["model"])
    # convert to eval mode
    model.eval()
    # create the pre-processing transform
    transform = standard_transforms.Compose(
        [
            standard_transforms.ToTensor(),
            standard_transforms.Normalize(
                mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
            ),
        ]
    )

    # set your image path here
    img_path = "./dataroot/worm_dataset/test/images/IMG_24.png"
    # load the images
    img_raw = Image.open(img_path).convert("RGB")
    # round the size
    width, height = img_raw.size
    new_width = width // 128 * 128
    new_height = height // 128 * 128
    img_raw = img_raw.resize((new_width, new_height), Image.LANCZOS)
    # pre-proccessing
    img = transform(img_raw)

    samples = torch.Tensor(img).unsqueeze(0)
    samples = samples.to(device)
    # run inference
    outputs = model(samples)

    print(outputs["pred_logits"])
    print(outputs["pred_logits"].size())

    outputs_scores = torch.nn.functional.softmax(outputs["pred_logits"], -1)[:, :, 1][0]
    outputs_points = outputs["pred_points"][0]
    print("\nFinal")
    print(outputs_scores)
    print(torch.max(outputs_scores))

    threshold = 0.5
    # filter the predictions
    points = outputs_points[outputs_scores > threshold].detach().cpu().numpy().tolist()
    print("number of points within threshold", len(points))
    predict_cnt = int((outputs_scores > threshold).sum())

    outputs_points = outputs["pred_points"][0]
    # draw the predictions
    size = 5
    img_to_draw = cv2.cvtColor(np.array(img_raw), cv2.COLOR_RGB2BGR)
    for p in points:
        img_to_draw = cv2.circle(
            img_to_draw, (int(p[0]), int(p[1])), size, (0, 0, 255), -1
        )
    # save the visualized image
    cv2.imwrite(
        os.path.join(args.output_dir, "pred{}.jpg".format(predict_cnt)), img_to_draw
    )


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        "P2PNet evaluation script", parents=[get_args_parser()]
    )
    args = parser.parse_args()
    main(args)