[detection] Add YOLOX

wuhanstudio · Nov 26, 2023 · e2c7535 · e2c7535
1 parent c54131e
commit e2c7535
Show file tree

Hide file tree

Showing 85 changed files with 8,673 additions and 43 deletions.
diff --git a/examples/inference/yolox_demo.py b/examples/inference/yolox_demo.py
@@ -0,0 +1,73 @@
+import cv2
+
+from what.models.detection.datasets.coco import COCO_CLASS_NAMES
+from what.models.detection.utils.box_utils import draw_bounding_boxes
+
+from what.models.detection.yolox.yolox_x import YOLOX_X
+from what.models.detection.yolox.yolox_l import YOLOX_L
+from what.models.detection.yolox.yolox_m import YOLOX_M
+from what.models.detection.yolox.yolox_s import YOLOX_S
+
+from what.cli.model import *
+from what.utils.file import get_file
+
+what_yolox_model_list = what_model_list[9:13]
+
+video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
+
+while not video.isdigit():
+    video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
+
+# Capture from camera
+cap = cv2.VideoCapture(int(video))
+#cap.set(3, 1920)
+#cap.set(4, 1080)
+
+# Check what_model_list for all supported models
+index = 0
+
+# Download the model first if not exists
+WHAT_YOLOX_MODEL_FILE = what_yolox_model_list[index][WHAT_MODEL_FILE_INDEX]
+WHAT_YOLOX_MODEL_URL  = what_yolox_model_list[index][WHAT_MODEL_URL_INDEX]
+WHAT_YOLOX_MODEL_HASH = what_yolox_model_list[index][WHAT_MODEL_HASH_INDEX]
+
+if not os.path.isfile(os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE)):
+    get_file(WHAT_YOLOX_MODEL_FILE,
+             WHAT_MODEL_PATH,
+             WHAT_YOLOX_MODEL_URL,
+             WHAT_YOLOX_MODEL_HASH)
+
+if index == 0:
+    model = YOLOX_X(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+if index == 1:
+    model = YOLOX_L(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+if index == 2:
+    model = YOLOX_M(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+if index == 3:
+    model = YOLOX_S(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+while True:
+    _, orig_image = cap.read()
+    if orig_image is None:
+        continue
+
+    # Image preprocessing
+    image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
+
+    # Run inference
+    images, boxes, labels, probs = model.predict(image)
+    image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+    # Draw bounding boxes onto the image
+    output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
+
+    cv2.imshow('YOLOX', image)
+
+    if cv2.waitKey(1) & 0xFF == ord('q'):
+        break
+
+cap.release()
+cv2.destroyAllWindows()
diff --git a/pyproject.toml b/pyproject.toml
@@ -12,6 +12,7 @@ requires = [
     "matplotlib",
     "click",
     "progressbar",
+    "loguru"
 ]
 build-backend = "setuptools.build_meta"
 

diff --git a/requirements.txt b/requirements.txt
@@ -11,3 +11,4 @@ tensorflow
 matplotlib
 pandas
 progressbar
+loguru
diff --git a/setup.py b/setup.py
@@ -33,6 +33,7 @@ def get_version(rel_path):
     "pandas",
     "click",
     "progressbar",
+    "loguru",
 ]
 
 setuptools.setup(

diff --git a/what/cli/example.py b/what/cli/example.py
@@ -1,6 +1,7 @@
 # Model Inference
 from what.examples.yolov3_demo import yolov3_inference_demo
 from what.examples.yolov4_demo import yolov4_inference_demo
+from what.examples.yolox_demo import yolox_inference_demo
 from what.examples.faster_rcnn_demo import frcnn_inference_demo
 from what.examples.mobilenet_ssd_demo import mobilenet_ssd_inference_demo
 
@@ -16,6 +17,7 @@
 what_example_list = [
     ('    Yolov3 Demo    ', ' Model Inference ', 'Yolov3 Object Detection.', yolov3_inference_demo),
     ('    Yolov4 Demo    ', ' Model Inference ', 'Yolov4 Object Detection.', yolov4_inference_demo),
+    ('     YoloX Demo    ', ' Model Inference ', 'YoloX Object Detection.', yolox_inference_demo),
     ('  FasterRCNN Demo  ', ' Model Inference ', 'FRCNN Object Detection.', frcnn_inference_demo),
     ('MobileNet SSD Demo', ' Model Inference ', 'MobileNet SSD Object Detection.', mobilenet_ssd_inference_demo),
     (' TOG Attack Demo ', 'Adversarial Attack', 'Real-time TOG Attack against Yolov3 Tiny.', yolov3_pcb_attack_demo),

diff --git a/what/cli/model.py b/what/cli/model.py
@@ -19,5 +19,9 @@
     ('YOLOv4 Tiny (    Darknet    )', 'Object Detection', 'YOLOv4 Tiny pretrained on MS COCO dataset.', 'yolov4-tiny.h5', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/yolov4-tiny.h5', '867f54dced382170538a9ca2374e14e778f80d4abd6011652b911b6aca77384e'),
     ('SSD         ( MobileNet  v1 )', 'Object Detection', 'SSD pretrained on VOC-2012 dataset.', 'mobilenet-v1-ssd-mp-0_675.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/mobilenet-v1-ssd-mp-0_675.pth', '58694cafa60456eeab4e81ae50ff49a01c46ab387bfea5200f047143ecd973a9'),
     ('SSD         ( MobileNet  v2 )', 'Object Detection', 'SSD pretrained on VOC-2012 dataset.', 'mobilenet-v2-ssd-lite-mp-0_686.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/mobilenet-v2-ssd-lite-mp-0_686.pth', 'b0d1ac2cdbf3c241ba837f51eeebc565ea37b95b7258e2604506a2f991e398a4'),
-    ('FasterRCNN  (     VGG16     )', 'Object Detection', 'Faster-RCNN pretrained on VOC-2012 dataset.', 'fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth', '3fd279284b536da3eac754404779e32e2e9fdd82d8511bbc7f6c50e14f0c69d2')
+    ('FasterRCNN  (     VGG16     )', 'Object Detection', 'Faster-RCNN pretrained on VOC-2012 dataset.', 'fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/fasterrcnn_12211511_0.701052458187_torchvision_pretrain.pth', '3fd279284b536da3eac754404779e32e2e9fdd82d8511bbc7f6c50e14f0c69d2'),
+    ('YOLOX X-Large                ', 'Object Detection', 'YOLOX-X pretrained on MS COCO dataset.', 'yolox-x.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/yolox_x.pth', '5652330b6ae860043f091b8f550a60c10e1129f416edfdb65c259be6caf355cf'),
+    ('YOLOX Large                  ', 'Object Detection', 'YOLOX-L pretrained on MS COCO dataset.', 'yolox-l.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/yolox_l.pth', '1e6b7fa6240375370b2a8a8eab9066b3cdd43fd1d0bfa8d2027fd3a51def2917'),
+    ('YOLOX Medium                 ', 'Object Detection', 'YOLOX-M pretrained on MS COCO dataset.', 'yolox-m.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/yolox_m.pth', '60076992b32da82951c90cfa7bd6ab70eba9eda243e08b940a396f60ac2d19b6'),
+    ('YOLOX Small                  ', 'Object Detection', 'YOLOX-S pretrained on MS COCO dataset.', 'yolox-s.pth', 'https://wuhanstudio.nyc3.cdn.digitaloceanspaces.com/what/yolox_s.pth', 'f55ded7181e1b0c13285c56e7790b8f0e8f8db590fe4edb37f0b7f345c913a30'),
 ]
diff --git a/what/examples/faster_rcnn_demo.py b/what/examples/faster_rcnn_demo.py
@@ -69,11 +69,12 @@ def frcnn_inference_demo():
         boxes[:, 2] = box_w / width
         boxes[:, 3] = box_h / height
 
-        output = draw_bounding_boxes(orig_image,
-                boxes,
-                labels[0],
-                VOC_CLASS_NAMES[1:],
-                scores[0])
+        if len(boxes) > 0:
+            output = draw_bounding_boxes(orig_image,
+                    boxes,
+                    labels[0],
+                    VOC_CLASS_NAMES[1:],
+                    scores[0])
 
         cv2.imshow('Faster RCNN Demo', output)
 

diff --git a/what/examples/mobilenet_ssd_demo.py b/what/examples/mobilenet_ssd_demo.py
@@ -78,7 +78,8 @@ def mobilenet_ssd_inference_demo():
             # Draw bounding boxes onto the image
             height, width, _ = image.shape
 
-            output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
+            if len(boxes) > 0:
+                output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
 
             cv2.imshow('MobileNet SSD Demo', output)
 

diff --git a/what/examples/yolov3_demo.py b/what/examples/yolov3_demo.py
@@ -74,7 +74,8 @@ def yolov3_inference_demo():
             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 
             # Draw bounding boxes onto the image
-            output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
+            if len(boxes) > 0:
+                output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
 
             cv2.imshow('YOLOv3 Demo', image)
 

diff --git a/what/examples/yolov3_pcb_attack_demo.py b/what/examples/yolov3_pcb_attack_demo.py
@@ -111,7 +111,8 @@ def yolov3_pcb_attack_demo():
         #     logger.info(f"{classes[labels[i]]}: {probs[i]:.2f}")
 
         out_img = cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR)
-        out_img = draw_bounding_boxes(out_img, boxes, labels, classes, probs);
+        if len(boxes) > 0:
+            out_img = draw_bounding_boxes(out_img, boxes, labels, classes, probs);
 
         cv2.namedWindow("result", cv2.WINDOW_NORMAL)
         cv2.imshow("result", out_img)

diff --git a/what/examples/yolov3_tog_attack_demo.py b/what/examples/yolov3_tog_attack_demo.py
@@ -108,7 +108,8 @@ def yolov3_tog_attack_demo():
         out_img = (out_img * 255.0).astype(np.uint8)
 
         out_img = cv2.cvtColor(out_img, cv2.COLOR_RGB2BGR)
-        out_img = draw_bounding_boxes(out_img, boxes, labels, classes, probs);
+        if len(boxes) > 0:
+            out_img = draw_bounding_boxes(out_img, boxes, labels, classes, probs);
 
         cv2.namedWindow("result", cv2.WINDOW_NORMAL)
         cv2.imshow("result", out_img)

diff --git a/what/examples/yolov4_demo.py b/what/examples/yolov4_demo.py
@@ -74,7 +74,8 @@ def yolov4_inference_demo():
             image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
 
             # Draw bounding boxes onto the image
-            output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
+            if len(boxes) > 0:
+                output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
 
             cv2.imshow('YOLOv4 Demo', image)
 

diff --git a/what/examples/yolox_demo.py b/what/examples/yolox_demo.py
@@ -0,0 +1,101 @@
+import cv2
+import os.path
+
+from what.models.detection.datasets.coco import COCO_CLASS_NAMES
+from what.models.detection.utils.box_utils import draw_bounding_boxes
+
+from what.models.detection.yolox.yolox_x import YOLOX_X
+from what.models.detection.yolox.yolox_l import YOLOX_L
+from what.models.detection.yolox.yolox_m import YOLOX_M
+from what.models.detection.yolox.yolox_s import YOLOX_S
+
+from what.cli.model import *
+
+from what.utils.file import get_file
+
+what_yolox_model_list = what_model_list[9:13]
+
+def yolox_inference_demo():
+
+    max_len = max([len(x[WHAT_MODEL_NAME_INDEX]) for x in what_yolox_model_list])
+    for i, model in enumerate(what_yolox_model_list, start=1):
+        if os.path.isfile(os.path.join(WHAT_MODEL_PATH, model[WHAT_MODEL_FILE_INDEX])):
+            downloaded = 'x'
+        else:
+            downloaded = ' '
+        print('[{}] {} : {:<{w}s}\t{}\t{}'.format(downloaded, i, model[WHAT_MODEL_NAME_INDEX], model[WHAT_MODEL_TYPE_INDEX], model[WHAT_MODEL_DESC_INDEX], w=max_len))
+
+    index = input(f"Please input the model index: ")
+    while not index.isdigit() or int(index) > len(what_yolox_model_list):
+        index = input(f"Model [{index}] does not exist. Please try again: ")
+
+    index = int(index) - 1
+
+    # Download the model first if not exists
+    WHAT_YOLOX_MODEL_FILE = what_yolox_model_list[index][WHAT_MODEL_FILE_INDEX]
+    WHAT_YOLOX_MODEL_URL  = what_yolox_model_list[index][WHAT_MODEL_URL_INDEX]
+    WHAT_YOLOX_MODEL_HASH = what_yolox_model_list[index][WHAT_MODEL_HASH_INDEX]
+
+    if not os.path.isfile(os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE)):
+        get_file(WHAT_YOLOX_MODEL_FILE,
+                WHAT_MODEL_PATH,
+                WHAT_YOLOX_MODEL_URL,
+                WHAT_YOLOX_MODEL_HASH)
+
+    if index == 0:
+        model = YOLOX_X(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+    if index == 1:
+        model = YOLOX_L(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+    if index == 2:
+        model = YOLOX_M(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+    if index == 3:
+        model = YOLOX_S(COCO_CLASS_NAMES, os.path.join(WHAT_MODEL_PATH, WHAT_YOLOX_MODEL_FILE))
+
+    video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
+
+    while not video.isdigit():
+        video = input(f"Please input the OpenCV capture device (e.g. 0, 1, 2): ")
+
+    try:
+        # Capture from camera or video
+        if video.isdigit():
+            cap = cv2.VideoCapture(int(video))
+        else:
+            cap = cv2.VideoCapture(video)
+
+        #cap.set(3, 1920)
+        #cap.set(4, 1080)
+
+        while True:
+            _, orig_image = cap.read()
+            if orig_image is None:
+                continue
+
+            # Image preprocessing
+            image = cv2.cvtColor(orig_image, cv2.COLOR_BGR2RGB)
+
+            # Run inference
+            images, boxes, labels, probs = model.predict(image)
+            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+            # Draw bounding boxes onto the image
+            if len(boxes) > 0:
+                output = draw_bounding_boxes(image, boxes, labels, model.class_names, probs);
+
+            cv2.imshow('YOLOX Demo', image)
+
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+
+        cap.release()
+        cv2.destroyAllWindows()
+
+    except Exception as e:
+        print(e)
+
+
+if __name__ == "__main__":
+    yolox_inference_demo()
diff --git a/what/models/detection/__init__.py b/what/models/detection/__init__.py
@@ -9,3 +9,4 @@
 from what.models.detection import yolo
 from what.models.detection import ssd
 from what.models.detection import frcnn
+from what.models.detection import yolox
diff --git a/what/models/detection/utils/box_utils.py b/what/models/detection/utils/box_utils.py
@@ -7,36 +7,37 @@ def draw_bounding_boxes(image, boxes, labels, class_names, probs):
         assert(boxes.shape[1] == 4)
         boxes = to_numpy(boxes)
 
-    # (x, y, w, h) --> (x1, y1, x2, y2)
-    height, width, _ = image.shape
-    for box in boxes:
-        box[0] *= width
-        box[1] *= height
-        box[2] *= width 
-        box[3] *= height
-
-        # From center to top left
-        box[0] -= box[2] / 2
-        box[1] -= box[3] / 2
-
-        # From width and height to x2 and y2
-        box[2] += box[0]
-        box[3] += box[1]
-
-    # Draw bounding boxes and labels
-    for i in range(boxes.shape[0]):
-        box = boxes[i]
-        label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
-        # print(label)
-
-        # Draw bounding boxes
-        cv2.rectangle(image, (int(box[0].item()), int(box[1].item())), (int(box[2].item()), int(box[3].item())), (255, 255, 0), 4)
-
-        # Draw labels
-        cv2.putText(image, label,
-                    (int(box[0]+20), int(box[1]+40)),
-                    cv2.FONT_HERSHEY_SIMPLEX,
-                    1,  # font scale
-                    (255, 0, 255),
-                    2)  # line type
+        # (x, y, w, h) --> (x1, y1, x2, y2)
+        height, width, _ = image.shape
+        for box in boxes:
+            box[0] *= width
+            box[1] *= height
+            box[2] *= width 
+            box[3] *= height
+
+            # From center to top left
+            box[0] -= box[2] / 2
+            box[1] -= box[3] / 2
+
+            # From width and height to x2 and y2
+            box[2] += box[0]
+            box[3] += box[1]
+
+        # Draw bounding boxes and labels
+        for i in range(boxes.shape[0]):
+            box = boxes[i]
+            label = f"{class_names[labels[i]]}: {probs[i]:.2f}"
+            # print(label)
+
+            # Draw bounding boxes
+            cv2.rectangle(image, (int(box[0].item()), int(box[1].item())), (int(box[2].item()), int(box[3].item())), (255, 255, 0), 4)
+
+            # Draw labels
+            cv2.putText(image, label,
+                        (int(box[0]+20), int(box[1]+40)),
+                        cv2.FONT_HERSHEY_SIMPLEX,
+                        1,  # font scale
+                        (255, 0, 255),
+                        2)  # line type
+
     return image
diff --git a/what/models/detection/yolox/__init__.py b/what/models/detection/yolox/__init__.py
@@ -0,0 +1,18 @@
+r'''
+This module implements YOLOX object detection model.
+
+<br />
+
+## what.models.detection.yolox.yolox_x
+## what.models.detection.yolox.yolox_m
+## what.models.detection.yolox.yolox_l
+## what.models.detection.yolox.yolox_s
+
+'''
+
+from what.models.detection.yolox.yolox_x import YOLOX_X
+from what.models.detection.yolox.yolox_m import YOLOX_M
+from what.models.detection.yolox.yolox_l import YOLOX_L
+from what.models.detection.yolox.yolox_s import YOLOX_S
+
+__all__ = ["YOLOX_X", "YOLOX_M", "YOLOX_L", "YOLOX_S"]
diff --git a/what/models/detection/yolox/core/__init__.py b/what/models/detection/yolox/core/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# Copyright (c) Megvii, Inc. and its affiliates.
+
+from .launch import launch
+from .trainer import Trainer
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,4 @@ tensorflow @@
     matplotlib
     pandas
     progressbar
+    loguru