diff --git a/README.md b/README.md
index 92d8ee93..47d1deab 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ English | [简体中文](README_cn.md)
-
+cp
## YOLOv6
Implementation of paper:
diff --git a/configs/base/README.md b/configs/base/README.md
deleted file mode 100644
index 77ef5a4e..00000000
--- a/configs/base/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-## YOLOv6 base model
-
-English | [简体中文](./README_cn.md)
-
-### Features
-
-- Use only regular convolution and Relu activation functions.
-
-- Apply CSP (1/2 channel dim) blocks in the network structure, except for Nano base model.
-
-Advantage:
-- Adopt a unified network structure and configuration, and the accuracy loss of the PTQ 8-bit quantization model is negligible.
-- Suitable for users who are just getting started or who need to apply, optimize and deploy an 8-bit quantization model quickly and frequently.
-
-
-### Performance
-
-| Model | Size | mAPval
0.5:0.95 | SpeedT4
TRT FP16 b1
(FPS) | SpeedT4
TRT FP16 b32
(FPS) | SpeedT4
TRT INT8 b1
(FPS) | SpeedT4
TRT INT8 b32
(FPS) | Params
(M) | FLOPs
(G) |
-| :--------------------------------------------------------------------------------------------- | --- | ----------------- | ----- | ---- | ---- | ---- | ----- | ------ |
-| [**YOLOv6-N-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6n_base.pt) | 640 | 36.6distill | 727 | 1302 | 814 | 1805 | 4.65 | 11.46 |
-| [**YOLOv6-S-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s_base.pt) | 640 | 45.3distill | 346 | 525 | 487 | 908 | 13.14 | 30.6 |
-| [**YOLOv6-M-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6m_base.pt) | 640 | 49.4distill | 179 | 245 | 284 | 439 | 28.33 | 72.30 |
-| [**YOLOv6-L-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6l_base.pt) | 640 | 51.1distill | 116 | 157 | 196 | 288 | 59.61 | 150.89 |
-
-- Speed is tested with TensorRT 8.2.4.2 on T4.
-- The processes of model training, evaluation, and inference are the same as the original ones. For details, please refer to [this README](https://github.com/meituan/YOLOv6#quick-start).
diff --git a/configs/base/README_cn.md b/configs/base/README_cn.md
deleted file mode 100644
index b6b01d14..00000000
--- a/configs/base/README_cn.md
+++ /dev/null
@@ -1,25 +0,0 @@
-## YOLOv6 基础版模型
-
-简体中文 | [English](./README.md)
-
-### 模型特点
-
-- 仅使用常规卷积和Relu激活函数
-
-- 网络结构均采用CSP (1/2通道) block,Nano网络除外。
-
-优势:
-- 采用统一的网络结构和配置,且 PTQ 8位量化模型精度损失较小,适合刚入门或有快速迭代部署8位量化模型需求的用户。
-
-
-### 模型指标
-
-| 模型 | 尺寸 | mAPval
0.5:0.95 | 速度T4
TRT FP16 b1
(FPS) | 速度T4
TRT FP16 b32
(FPS) | 速度T4
TRT INT8 b1
(FPS) | 速度T4
TRT INT8 b32
(FPS) | Params
(M) | FLOPs
(G) |
-| :--------------------------------------------------------------------------------------------- | --- | ----------------- | ----- | ---- | ---- | ---- | ----- | ------ |
-| [**YOLOv6-N-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6n_base.pt) | 640 | 36.6distill | 727 | 1302 | 814 | 1805 | 4.65 | 11.46 |
-| [**YOLOv6-S-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s_base.pt) | 640 | 45.3distill | 346 | 525 | 487 | 908 | 13.14 | 30.6 |
-| [**YOLOv6-M-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6m_base.pt) | 640 | 49.4distill | 179 | 245 | 284 | 439 | 28.33 | 72.30 |
-| [**YOLOv6-L-base**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6l_base.pt) | 640 | 51.1distill | 116 | 157 | 196 | 288 | 59.61 | 150.89 |
-
-- 速度是在 T4 上测试的,TensorRT 版本为 8.4.2.4;
-- 模型训练、评估、推理流程与原来保持一致,具体可参考 [首页 README 文档](https://github.com/meituan/YOLOv6/blob/main/README_cn.md#%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B)。
diff --git a/configs/base/yolov6l_base_finetune.py b/configs/base/yolov6l_base_finetune.py
deleted file mode 100644
index 7e8dc062..00000000
--- a/configs/base/yolov6l_base_finetune.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# YOLOv6 large base model
-model = dict(
- type='YOLOv6l_base',
- depth_multiple=1.0,
- width_multiple=1.0,
- pretrained=None,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_relu"
diff --git a/configs/base/yolov6m_base_finetune.py b/configs/base/yolov6m_base_finetune.py
deleted file mode 100644
index af5449ec..00000000
--- a/configs/base/yolov6m_base_finetune.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# YOLOv6m medium/large base model
-model = dict(
- type='YOLOv6m_base',
- pretrained=None,
- depth_multiple=0.80,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 0.8,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_relu"
diff --git a/configs/base/yolov6n_base.py b/configs/base/yolov6n_base.py
deleted file mode 100644
index 8340ca60..00000000
--- a/configs/base/yolov6n_base.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# YOLOv6s nano base model
-model = dict(
- type='YOLOv6n_base',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True, # set to True if you want to further train with distillation
- reg_max=16, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-training_mode = "conv_relu"
diff --git a/configs/base/yolov6n_base_finetune.py b/configs/base/yolov6n_base_finetune.py
deleted file mode 100644
index 593c3def..00000000
--- a/configs/base/yolov6n_base_finetune.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# YOLOv6s nanao base model
-model = dict(
- type='YOLOv6n_base',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=False, # set to True if you want to further train with distillation
- reg_max=0, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_relu"
diff --git a/configs/base/yolov6s_base.py b/configs/base/yolov6s_base.py
deleted file mode 100644
index 4e28c178..00000000
--- a/configs/base/yolov6s_base.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# YOLOv6s small base model
-model = dict(
- type='YOLOv6s_base',
- pretrained=None,
- depth_multiple=0.70,
- width_multiple=0.50,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',#CSPRepPANNeck
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True, # set to True if you want to further train with distillation
- reg_max=16, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-training_mode = "conv_relu"
diff --git a/configs/base/yolov6s_base_finetune.py b/configs/base/yolov6s_base_finetune.py
deleted file mode 100644
index eb4d2159..00000000
--- a/configs/base/yolov6s_base_finetune.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# YOLOv6s small base model
-model = dict(
- type='YOLOv6s_base',
- pretrained=None,
- depth_multiple=0.70,
- width_multiple=0.50,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=False, # set to True if you want to further train with distillation
- reg_max=0, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_relu"
diff --git a/configs/experiment/eval_640_repro.py b/configs/experiment/eval_640_repro.py
deleted file mode 100644
index 1f6a6217..00000000
--- a/configs/experiment/eval_640_repro.py
+++ /dev/null
@@ -1,79 +0,0 @@
-# eval param for different scale
-
-eval_params = dict(
- default = dict(
- img_size=640,
- shrink_size=2,
- infer_on_rect=False,
- ),
- yolov6n = dict(
- img_size=640,
- shrink_size=4,
- infer_on_rect=False,
- ),
- yolov6t = dict(
- img_size=640,
- shrink_size=6,
- infer_on_rect=False,
- ),
- yolov6s = dict(
- img_size=640,
- shrink_size=6,
- infer_on_rect=False,
- ),
- yolov6m = dict(
- img_size=640,
- shrink_size=4,
- infer_on_rect=False,
- ),
- yolov6l = dict(
- img_size=640,
- shrink_size=4,
- infer_on_rect=False,
- ),
- yolov6l_relu = dict(
- img_size=640,
- shrink_size=2,
- infer_on_rect=False,
- ),
- yolov6n6 = dict(
- img_size=1280,
- shrink_size=17,
- infer_on_rect=False,
- ),
- yolov6s6 = dict(
- img_size=1280,
- shrink_size=8,
- infer_on_rect=False,
- ),
- yolov6m6 = dict(
- img_size=1280,
- shrink_size=64,
- infer_on_rect=False,
- ),
- yolov6l6 = dict(
- img_size=1280,
- shrink_size=41,
- infer_on_rect=False,
- ),
- yolov6s_mbla = dict(
- img_size=640,
- shrink_size=7,
- infer_on_rect=False,
- ),
- yolov6m_mbla = dict(
- img_size=640,
- shrink_size=7,
- infer_on_rect=False,
- ),
- yolov6l_mbla = dict(
- img_size=640,
- shrink_size=7,
- infer_on_rect=False,
- ),
- yolov6x_mbla = dict(
- img_size=640,
- shrink_size=3,
- infer_on_rect=False,
- )
-)
diff --git a/configs/experiment/yolov6n_with_eval_params.py b/configs/experiment/yolov6n_with_eval_params.py
deleted file mode 100644
index e7366b33..00000000
--- a/configs/experiment/yolov6n_with_eval_params.py
+++ /dev/null
@@ -1,76 +0,0 @@
-# YOLOv6n model with eval param(when traing)
-model = dict(
- type='YOLOv6n',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.02, #0.01 # 0.02
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-# Eval params when eval model.
-# If eval_params item is list, eg conf_thres=[0.03, 0.03],
-# first will be used in train.py and second will be used in eval.py.
-eval_params = dict(
- batch_size=None, #None mean will be the same as batch on one device * 2
- img_size=None, #None mean will be the same as train image size
- conf_thres=0.03,
- iou_thres=0.65,
-
- #pading and scale coord
- shrink_size=None, # None mean will not shrink the image.
- infer_on_rect=True,
-
- #metric
- verbose=False,
- do_coco_metric=True,
- do_pr_metric=False,
- plot_curve=False,
- plot_confusion_matrix=False
-)
diff --git a/configs/experiment/yolov6s_csp_scaled.py b/configs/experiment/yolov6s_csp_scaled.py
deleted file mode 100644
index ba28843a..00000000
--- a/configs/experiment/yolov6s_csp_scaled.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# YOLOv6m model
-model = dict(
- type='YOLOv6s_csp',
- pretrained=None,
- depth_multiple=0.70,
- width_multiple=0.50,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- ),
- neck=dict(
- type='CSPRepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='giou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
diff --git a/configs/experiment/yolov6t.py b/configs/experiment/yolov6t.py
deleted file mode 100644
index afacd436..00000000
--- a/configs/experiment/yolov6t.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# YOLOv6t model
-model = dict(
- type='YOLOv6t',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.375,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/experiment/yolov6t_csp_scaled.py b/configs/experiment/yolov6t_csp_scaled.py
deleted file mode 100644
index e8ba99a9..00000000
--- a/configs/experiment/yolov6t_csp_scaled.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n_csp',
- pretrained=None,
- depth_multiple=0.60,
- width_multiple=0.50,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- ),
- neck=dict(
- type='CSPRepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='giou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
diff --git a/configs/experiment/yolov6t_finetune.py b/configs/experiment/yolov6t_finetune.py
deleted file mode 100644
index 8be47416..00000000
--- a/configs/experiment/yolov6t_finetune.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# YOLOv6t model
-model = dict(
- type='YOLOv6t',
- pretrained='weights/yolov6t.pt',
- depth_multiple=0.33,
- width_multiple=0.375,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/mbla/README.md b/configs/mbla/README.md
deleted file mode 100644
index d163124d..00000000
--- a/configs/mbla/README.md
+++ /dev/null
@@ -1,28 +0,0 @@
-## YOLOv6 mbla model
-
-English | [简体中文](./README_cn.md)
-
-### Features
-
-- Apply MBLABlock(Multi Branch Layer Aggregation Block) blocks in the network structure.
-
-Advantage:
-- Adopt a unified network structure and configuration.
-
-- Better performance for Small model comparing to yolov6 3.0 release.
-
-- Better performance comparing to yolov6 3.0 base.
-
-
-
-### Performance
-
-| Model | Size | mAPval
0.5:0.95 | SpeedT4
trt fp16 b1
(fps) | SpeedT4
trt fp16 b32
(fps) | Params
(M) | FLOPs
(G) |
-| :----------------------------------------------------------- | -------- | :----------------------- | -------------------------------------- | --------------------------------------- | -------------------- | ------------------- |
-| [**YOLOv6-S-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s_mbla.pt) | 640 | 47.0distill | 300 | 424 | 11.6 | 29.8 |
-| [**YOLOv6-M-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6m_mbla.pt) | 640 | 50.3distill | 168 | 216 | 26.1 | 66.7 |
-| [**YOLOv6-L-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6l_base.pt) | 640 | 52.0distill | 129 | 154 | 46.3 | 118.2 |
-| [**YOLOv6-X-base**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6x_base.pt) | 640 | 53.5distill | 78 | 94 | 78.8 | 199.0 |
-
-- Speed is tested with TensorRT 8.4.2.4 on T4.
-- The processes of model training, evaluation, and inference are the same as the original ones. For details, please refer to [this README](https://github.com/meituan/YOLOv6#quick-start).
diff --git a/configs/mbla/README_cn.md b/configs/mbla/README_cn.md
deleted file mode 100644
index ad399fe0..00000000
--- a/configs/mbla/README_cn.md
+++ /dev/null
@@ -1,26 +0,0 @@
-## YOLOv6 MBLA版模型
-
-简体中文 | [English](./README.md)
-
-### 模型特点
-
-- 网络主体结构均采用MBLABlock(Multi Branch Layer Aggregation Block)
-
-优势:
-- 采用统一的网络结构和配置
-
-- 相比3.0版本在s尺度效果提升,相比3.0base版本各尺度效果提升
-
-
-
-### 模型指标
-
-| 模型 | 输入尺寸 | mAPval
0.5:0.95 | 速度T4
trt fp16 b1
(fps) | 速度T4
trt fp16 b32
(fps) | Params
(M) | FLOPs
(G) |
-| :----------------------------------------------------------- | -------- | :----------------------- | -------------------------------------- | --------------------------------------- | -------------------- | ------------------- |
-| [**YOLOv6-S-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6s_mbla.pt) | 640 | 47.0distill | 300 | 424 | 11.6 | 29.8 |
-| [**YOLOv6-M-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6m_mbla.pt) | 640 | 50.3distill | 168 | 216 | 26.1 | 66.7 |
-| [**YOLOv6-L-mbla**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6l_base.pt) | 640 | 52.0distill | 129 | 154 | 46.3 | 118.2 |
-| [**YOLOv6-X-base**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6x_base.pt) | 640 | 53.5distill | 78 | 94 | 78.8 | 199.0 |
-
-- 速度是在 T4 上测试的,TensorRT 版本为 8.4.2.4;
-- 模型训练、评估、推理流程与原来保持一致,具体可参考 [首页 README 文档](https://github.com/meituan/YOLOv6/blob/main/README_cn.md#%E5%BF%AB%E9%80%9F%E5%BC%80%E5%A7%8B)。
diff --git a/configs/mbla/yolov6l_mbla_finetune.py b/configs/mbla/yolov6l_mbla_finetune.py
deleted file mode 100644
index 6ea88967..00000000
--- a/configs/mbla/yolov6l_mbla_finetune.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6l_mbla',
- pretrained=None,
- depth_multiple=0.5,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6m_mbla.py b/configs/mbla/yolov6m_mbla.py
deleted file mode 100644
index f84fc43d..00000000
--- a/configs/mbla/yolov6m_mbla.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6m_mbla',
- pretrained=None,
- depth_multiple=0.5,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6m_mbla_finetune.py b/configs/mbla/yolov6m_mbla_finetune.py
deleted file mode 100644
index aa0bc816..00000000
--- a/configs/mbla/yolov6m_mbla_finetune.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6m_mbla',
- pretrained=None,
- depth_multiple=0.5,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6s_mbla.py b/configs/mbla/yolov6s_mbla.py
deleted file mode 100644
index eedc76ee..00000000
--- a/configs/mbla/yolov6s_mbla.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6s_mbla',
- pretrained=None,
- depth_multiple=0.5,
- width_multiple=0.5,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6s_mbla_finetune.py b/configs/mbla/yolov6s_mbla_finetune.py
deleted file mode 100644
index a9812c71..00000000
--- a/configs/mbla/yolov6s_mbla_finetune.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6s_mbla',
- pretrained=None,
- depth_multiple=0.5,
- width_multiple=0.5,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6x_mbla.py b/configs/mbla/yolov6x_mbla.py
deleted file mode 100644
index b7b9703c..00000000
--- a/configs/mbla/yolov6x_mbla.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6x_mbla',
- pretrained=None,
- depth_multiple=1.0,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/mbla/yolov6x_mbla_finetune.py b/configs/mbla/yolov6x_mbla_finetune.py
deleted file mode 100644
index 65c57cb2..00000000
--- a/configs/mbla/yolov6x_mbla_finetune.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6x_mbla',
- pretrained=None,
- depth_multiple=1.0,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- stage_block_type="MBLABlock",
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- stage_block_type="MBLABlock",
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver=dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-
-training_mode = "conv_silu"
diff --git a/configs/qarepvgg/README.md b/configs/qarepvgg/README.md
deleted file mode 100644
index 81b130d2..00000000
--- a/configs/qarepvgg/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-## YOLOv6 base model
-
-English | [简体中文](./README_cn.md)
-
-### Features
-
-- This is a RepOpt-version implementation of YOLOv6 according to [QARepVGG](https://arxiv.org/abs/2212.01593).
-
-- The QARep version models possess slightly lower float accuracy on COCO than the RepVGG version models, but achieve highly improved quantized accuracy.
-
-- The INT8 accuracies listed were obtained using a simple PTQ process, as implemented in the [`onnx_to_trt.py`](../../deploy/TensorRT/onnx_to_trt.py) script. However, higher accuracies could be achieved using Quantization-Aware Training (QAT) due to the specific architecture design of the QARepVGG model.
-
-### Performance
-
-| Model | Size | Float
mAPval
0.5:0.95 | INT8
mAPval
0.5:0.95 | SpeedT4
trt fp16 b32
(fps) | SpeedT4
trt int8 b32
(fps) | Params
(M) | FLOPs
(G) |
-| :----------------------------------------------------------- | -------- | :----------------------- | -------------------------------------- | --------------------------------------- | -------------------- | ------------------- | -------------------- |
-| [**YOLOv6-N**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6n.pt) | 640 | 37.5 | 34.3 | 1286 | 1773 |4.7 | 11.4 |
-| [**YOLOv6-N-qa**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6n_qa.pt) | 640 | 37.1 | 36.4 | 1286 | 1773 | 4.7 | 11.4 |
-| [**YOLOv6-S**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s.pt) | 640 | 45.0 | 41.3 | 513 | 1117 | 18.5 | 45.3 |
-| [**YOLOv6-S-qa**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6s_qa.pt) | 640 | 44.7 | 44.0 | 513 | 1117 | 18.5 | 45.3 |
-| [**YOLOv6-M**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6m.pt) | 640 | 50.0 | 48.1 | 250 | 439 | 34.9 | 85.8 |
-| [**YOLOv6-M-qa**](https://github.com/meituan/YOLOv6/releases/download/0.3.0/yolov6m_qa.pt) | 640 | 49.7 | 49.4 | 250 | 439 | 34.9 | 85.8 |
-
-- Speed is tested with TensorRT 8.4 on T4.
-- We have not conducted experiments on the YOLOv6-L model since it does not use the RepVGG architecture.
-- The processes of model training, evaluation, and inference are the same as the original ones. For details, please refer to [this README](https://github.com/meituan/YOLOv6#quick-start).
diff --git a/configs/repopt/yolov6_tiny_hs.py b/configs/repopt/yolov6_tiny_hs.py
deleted file mode 100644
index 70a74279..00000000
--- a/configs/repopt/yolov6_tiny_hs.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# YOLOv6t model
-model = dict(
- type='YOLOv6t',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.375,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='hyper_search'
diff --git a/configs/repopt/yolov6_tiny_opt.py b/configs/repopt/yolov6_tiny_opt.py
deleted file mode 100644
index 95dbf317..00000000
--- a/configs/repopt/yolov6_tiny_opt.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# YOLOv6t model
-model = dict(
- type='YOLOv6t',
- pretrained=None,
- scales='../yolov6_assert/v6t_v2_scale_last.pt',
- depth_multiple=0.33,
- width_multiple=0.375,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/repopt/yolov6_tiny_opt_qat.py b/configs/repopt/yolov6_tiny_opt_qat.py
deleted file mode 100644
index 701bf4f1..00000000
--- a/configs/repopt/yolov6_tiny_opt_qat.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# YOLOv6t model
-model = dict(
- type='YOLOv6t',
- pretrained='./assets/v6s_t.pt',
- scales='./assets/v6t_v2_scale_last.pt',
- depth_multiple=0.33,
- width_multiple=0.375,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.00001,
- lrf=0.001,
- momentum=0.937,
- weight_decay=0.00005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-ptq = dict(
- num_bits = 8,
- calib_batches = 4,
- # 'max', 'histogram'
- calib_method = 'max',
- # 'entropy', 'percentile', 'mse'
- histogram_amax_method='entropy',
- histogram_amax_percentile=99.99,
- calib_output_path='./',
- sensitive_layers_skip=False,
- sensitive_layers_list=[],
-)
-
-qat = dict(
- calib_pt = './assets/v6s_t_calib_max.pt',
- sensitive_layers_skip = False,
- sensitive_layers_list=[],
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/repopt/yolov6n_hs.py b/configs/repopt/yolov6n_hs.py
deleted file mode 100644
index 67607ba2..00000000
--- a/configs/repopt/yolov6n_hs.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.02, #0.01 # 0.02
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='hyper_search'
diff --git a/configs/repopt/yolov6n_opt.py b/configs/repopt/yolov6n_opt.py
deleted file mode 100644
index 9b3db4fb..00000000
--- a/configs/repopt/yolov6n_opt.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n',
- pretrained=None,
- scales='../yolov6_assert/v6n_v2_scale_last.pt',
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.02, #0.01 # 0.02
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/repopt/yolov6n_opt_qat.py b/configs/repopt/yolov6n_opt_qat.py
deleted file mode 100644
index 4e76dfd3..00000000
--- a/configs/repopt/yolov6n_opt_qat.py
+++ /dev/null
@@ -1,82 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n',
- pretrained='./assets/v6s_n.pt',
- scales='./assets/v6n_v2_scale_last.pt',
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False,
- reg_max=0, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.00001, #0.01 # 0.02
- lrf=0.001,
- momentum=0.937,
- weight_decay=0.00005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-ptq = dict(
- num_bits = 8,
- calib_batches = 4,
- # 'max', 'histogram'
- calib_method = 'max',
- # 'entropy', 'percentile', 'mse'
- histogram_amax_method='entropy',
- histogram_amax_percentile=99.99,
- calib_output_path='./',
- sensitive_layers_skip=False,
- sensitive_layers_list=[],
-)
-
-qat = dict(
- calib_pt = './assets/v6s_n_calib_max.pt',
- sensitive_layers_skip = False,
- sensitive_layers_list=[],
-)
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/repopt/yolov6s_hs.py b/configs/repopt/yolov6s_hs.py
deleted file mode 100644
index 60c7286a..00000000
--- a/configs/repopt/yolov6s_hs.py
+++ /dev/null
@@ -1,59 +0,0 @@
-# YOLOv6s model
-model = dict(
- type='YOLOv6s',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=False,
- reg_max=0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='hyper_search'
diff --git a/configs/repopt/yolov6s_opt.py b/configs/repopt/yolov6s_opt.py
deleted file mode 100644
index 2676eb4f..00000000
--- a/configs/repopt/yolov6s_opt.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# YOLOv6s model
-model = dict(
- type='YOLOv6s',
- pretrained=None,
- scales='../yolov6_assert/v6s_v2_scale.pt',
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=False,
- reg_max=0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/repopt/yolov6s_opt_qat.py b/configs/repopt/yolov6s_opt_qat.py
deleted file mode 100644
index a41ea085..00000000
--- a/configs/repopt/yolov6s_opt_qat.py
+++ /dev/null
@@ -1,113 +0,0 @@
-# YOLOv6s model
-model = dict(
- type='YOLOv6s',
- pretrained='./assets/yolov6s_v2_reopt_43.1.pt',
- scales='./assets/yolov6s_v2_scale.pt',
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- ),
- neck=dict(
- type='RepPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=1,
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type = 'giou',
- use_dfl = False,
- reg_max = 0, # if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.00001,
- lrf=0.001,
- momentum=0.937,
- weight_decay=0.00005,
- warmup_epochs=3,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
-
-ptq = dict(
- num_bits = 8,
- calib_batches = 4,
- # 'max', 'histogram'
- calib_method = 'histogram',
- # 'entropy', 'percentile', 'mse'
- histogram_amax_method='entropy',
- histogram_amax_percentile=99.99,
- calib_output_path='./',
- sensitive_layers_skip=False,
- sensitive_layers_list=['detect.stems.0.conv',
- 'detect.stems.1.conv',
- 'detect.stems.2.conv',
- 'detect.cls_convs.0.conv',
- 'detect.cls_convs.1.conv',
- 'detect.cls_convs.2.conv',
- 'detect.reg_convs.0.conv',
- 'detect.reg_convs.1.conv',
- 'detect.reg_convs.2.conv',
- 'detect.cls_preds.0',
- 'detect.cls_preds.1',
- 'detect.cls_preds.2',
- 'detect.reg_preds.0',
- 'detect.reg_preds.1',
- 'detect.reg_preds.2',
- ],
-)
-
-qat = dict(
- calib_pt = './assets/yolov6s_v2_reopt_43.1_calib_histogram.pt',
- sensitive_layers_skip = False,
- sensitive_layers_list=['detect.stems.0.conv',
- 'detect.stems.1.conv',
- 'detect.stems.2.conv',
- 'detect.cls_convs.0.conv',
- 'detect.cls_convs.1.conv',
- 'detect.cls_convs.2.conv',
- 'detect.reg_convs.0.conv',
- 'detect.reg_convs.1.conv',
- 'detect.reg_convs.2.conv',
- 'detect.cls_preds.0',
- 'detect.cls_preds.1',
- 'detect.cls_preds.2',
- 'detect.reg_preds.0',
- 'detect.reg_preds.1',
- 'detect.reg_preds.2',
- ],
-)
-
-# Choose Rep-block by the training Mode, choices=["repvgg", "hyper-search", "repopt"]
-training_mode='repopt'
diff --git a/configs/yolov6l.py b/configs/solo/yolov6l_solo.py
similarity index 92%
rename from configs/yolov6l.py
rename to configs/solo/yolov6l_solo.py
index bfa6728b..caabc1f4 100644
--- a/configs/yolov6l.py
+++ b/configs/solo/yolov6l_solo.py
@@ -1,4 +1,4 @@
-# YOLOv6l model
+# YOLOv6l-seg model
model = dict(
type='YOLOv6l',
pretrained=None,
@@ -22,6 +22,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=64,
+ isseg=True,
+ issolo=True,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -45,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
diff --git a/configs/qarepvgg/yolov6m_qa.py b/configs/solo/yolov6m_solo.py
similarity index 92%
rename from configs/qarepvgg/yolov6m_qa.py
rename to configs/solo/yolov6m_solo.py
index c0690f15..84e73c0f 100644
--- a/configs/qarepvgg/yolov6m_qa.py
+++ b/configs/solo/yolov6m_solo.py
@@ -1,4 +1,4 @@
-# YOLOv6m model
+# YOLOv6m-seg model
model = dict(
type='YOLOv6m',
pretrained=None,
@@ -22,6 +22,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=64,
+ isseg=True,
+ issolo=True,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -45,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
@@ -64,5 +68,3 @@
mosaic=1.0,
mixup=0.1,
)
-
-training_mode='qarepvggv2'
diff --git a/configs/yolov6n.py b/configs/solo/yolov6n_solo.py
similarity index 92%
rename from configs/yolov6n.py
rename to configs/solo/yolov6n_solo.py
index 74f9386d..6392ceb4 100644
--- a/configs/yolov6n.py
+++ b/configs/solo/yolov6n_solo.py
@@ -1,4 +1,4 @@
-# YOLOv6n model
+# YOLOv6n-seg model
model = dict(
type='YOLOv6n',
pretrained=None,
@@ -21,6 +21,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=64,
+ isseg=True,
+ issolo=True,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -44,7 +48,7 @@
lr0=0.02,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
diff --git a/configs/qarepvgg/yolov6s_qa.py b/configs/solo/yolov6s_solo.py
similarity index 94%
rename from configs/qarepvgg/yolov6s_qa.py
rename to configs/solo/yolov6s_solo.py
index 3051679a..c2499ba3 100644
--- a/configs/qarepvgg/yolov6s_qa.py
+++ b/configs/solo/yolov6s_solo.py
@@ -1,4 +1,4 @@
-# YOLOv6s model
+# YOLOv6s-seg model
model = dict(
type='YOLOv6s',
pretrained=None,
@@ -21,6 +21,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=64,
+ isseg=True,
+ issolo=True,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -63,5 +67,3 @@
mosaic=1.0,
mixup=0.0,
)
-
-training_mode='qarepvggv2'
diff --git a/configs/base/yolov6m_base.py b/configs/solo/yolov6x_solo.py
similarity index 81%
rename from configs/base/yolov6m_base.py
rename to configs/solo/yolov6x_solo.py
index 5670f096..57a175ab 100644
--- a/configs/base/yolov6m_base.py
+++ b/configs/solo/yolov6x_solo.py
@@ -1,9 +1,9 @@
-# YOLOv6m medium/large base model
+# YOLOv6x-seg model
model = dict(
- type='YOLOv6m_base',
+ type='YOLOv6x',
pretrained=None,
- depth_multiple=0.80,
- width_multiple=0.75,
+ depth_multiple=1.33,
+ width_multiple=1.25,
backbone=dict(
type='CSPBepBackbone',
num_repeats=[1, 6, 12, 18, 6],
@@ -22,6 +22,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=64,
+ isseg=True,
+ issolo=True,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -33,7 +37,7 @@
use_dfl=True,
reg_max=16, #if use_dfl is False, please set reg_max to 0
distill_weight={
- 'class': 0.8,
+ 'class': 2.0,
'dfl': 1.0,
},
)
@@ -45,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.0015,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
@@ -64,4 +68,5 @@
mosaic=1.0,
mixup=0.1,
)
-training_mode = "conv_relu"
+training_mode = "conv_silu"
+# use normal conv to speed up training and further improve accuracy.
diff --git a/configs/yolov6_lite/README.md b/configs/yolov6_lite/README.md
deleted file mode 100644
index 170d12d9..00000000
--- a/configs/yolov6_lite/README.md
+++ /dev/null
@@ -1,22 +0,0 @@
-## YOLOv6Lite model
-
-English | [简体中文](./README_cn.md)
-
-## Mobile Benchmark
-| Model | Size | mAPval
0.5:0.95 | sm8350
(ms) | mt6853
(ms) | sdm660
(ms) |Params
(M) | FLOPs
(G) |
-| :----------------------------------------------------------- | ---- | -------------------- | -------------------- | -------------------- | -------------------- | -------------------- | -------------------- |
-| [**YOLOv6Lite-S**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_s.pt) | 320*320 | 22.4 | 7.99 | 11.99 | 41.86 | 0.55 | 0.56 |
-| [**YOLOv6Lite-M**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_m.pt) | 320*320 | 25.1 | 9.08 | 13.27 | 47.95 | 0.79 | 0.67 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 320*320 | 28.0 | 11.37 | 16.20 | 61.40 | 1.09 | 0.87 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 320*192 | 25.0 | 7.02 | 9.66 | 36.13 | 1.09 | 0.52 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 224*128 | 18.9 | 3.63 | 4.99 | 17.76 | 1.09 | 0.24 |
-
-
-Table Notes
-
-- From the perspective of model size and input image ratio, we have built a series of models on the mobile terminal to facilitate flexible applications in different scenarios.
-- All checkpoints are trained with 400 epochs without distillation.
-- Results of the mAP and speed are evaluated on [COCO val2017](https://cocodataset.org/#download) dataset, and the input resolution is the Size in the table.
-- Speed is tested on MNN 2.3.0 AArch64 with 2 threads by arm82 acceleration. The inference warm-up is performed 10 times, and the cycle is performed 100 times.
-- Qualcomm 888(sm8350), Dimensity 720(mt6853) and Qualcomm 660(sdm660) correspond to chips with different performances at the high, middle and low end respectively, which can be used as a reference for model capabilities under different chips.
-- Refer to [Test NCNN Speed](./docs/Test_NCNN_speed.md) tutorial to reproduce the NCNN speed results of YOLOv6Lite.
diff --git a/configs/yolov6_lite/README_cn.md b/configs/yolov6_lite/README_cn.md
deleted file mode 100644
index 23dd715e..00000000
--- a/configs/yolov6_lite/README_cn.md
+++ /dev/null
@@ -1,23 +0,0 @@
-## YOLOv6 轻量级模型
-
-简体中文 | [English](./README.md)
-
-## 移动端模型指标
-
-| 模型 | 输入尺寸 | mAPval
0.5:0.95 | sm8350
(ms) | mt6853
(ms) | sdm660
(ms) |Params
(M) | FLOPs
(G) |
-| :----------------------------------------------------------- | ---- | -------------------- | -------------------- | -------------------- | -------------------- | -------------------- | -------------------- |
-| [**YOLOv6Lite-S**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_s.pt) | 320*320 | 22.4 | 7.99 | 11.99 | 41.86 | 0.55 | 0.56 |
-| [**YOLOv6Lite-M**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_m.pt) | 320*320 | 25.1 | 9.08 | 13.27 | 47.95 | 0.79 | 0.67 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 320*320 | 28.0 | 11.37 | 16.20 | 61.40 | 1.09 | 0.87 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 320*192 | 25.0 | 7.02 | 9.66 | 36.13 | 1.09 | 0.52 |
-| [**YOLOv6Lite-L**](https://github.com/meituan/YOLOv6/releases/download/0.4.0/yolov6lite_l.pt) | 224*128 | 18.9 | 3.63 | 4.99 | 17.76 | 1.09 | 0.24 |
-
-
-表格笔记
-
-- 从模型尺寸和输入图片比例两种角度,在构建了移动端系列模型,方便不同场景下的灵活应用。
-- 所有权重都经过 400 个 epoch 的训练,并且没有使用蒸馏技术。
-- mAP 和速度指标是在 COCO val2017 数据集上评估的,输入分辨率为表格中对应展示的。
-- 使用 MNN 2.3.0 AArch64 进行速度测试。测速时,采用2个线程,并开启arm82加速,推理预热10次,循环100次。
-- 高通888(sm8350)、天玑720(mt6853)和高通660(sdm660)分别对应高中低端不同性能的芯片,可以作为不同芯片下机型能力的参考。
-- [NCNN 速度测试](./docs/Test_NCNN_speed.md)教程可以帮助展示及复现 YOLOv6Lite 的 NCNN 速度结果。
diff --git a/configs/yolov6_lite/yolov6_lite_l.py b/configs/yolov6_lite/yolov6_lite_l.py
deleted file mode 100644
index 212c8c73..00000000
--- a/configs/yolov6_lite/yolov6_lite_l.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-l model
-model = dict(
- type='YOLOv6-lite-l',
- pretrained=None,
- width_multiple=1.5,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.1 * 4,
- lrf=0.01,
- momentum=0.9,
- weight_decay=0.00004,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/yolov6_lite/yolov6_lite_l_finetune.py b/configs/yolov6_lite/yolov6_lite_l_finetune.py
deleted file mode 100644
index 48315c4d..00000000
--- a/configs/yolov6_lite/yolov6_lite_l_finetune.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-l model
-model = dict(
- type='YOLOv6-lite-l',
- pretrained='weights/yolov6lite_l.pt',
- width_multiple=1.5,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6_lite/yolov6_lite_m.py b/configs/yolov6_lite/yolov6_lite_m.py
deleted file mode 100644
index 8f0de368..00000000
--- a/configs/yolov6_lite/yolov6_lite_m.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-m model
-model = dict(
- type='YOLOv6-lite-m',
- pretrained=None,
- width_multiple=1.1,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.1 * 4,
- lrf=0.01,
- momentum=0.9,
- weight_decay=0.00004,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/yolov6_lite/yolov6_lite_m_finetune.py b/configs/yolov6_lite/yolov6_lite_m_finetune.py
deleted file mode 100644
index 108adda5..00000000
--- a/configs/yolov6_lite/yolov6_lite_m_finetune.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-m model
-model = dict(
- type='YOLOv6-lite-m',
- pretrained='weights/yolov6lite_m.pt',
- width_multiple=1.1,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6_lite/yolov6_lite_s.py b/configs/yolov6_lite/yolov6_lite_s.py
deleted file mode 100644
index 42a52e37..00000000
--- a/configs/yolov6_lite/yolov6_lite_s.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-s model
-model = dict(
- type='YOLOv6-lite-s',
- pretrained=None,
- width_multiple=0.7,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.1 * 4,
- lrf=0.01,
- momentum=0.9,
- weight_decay=0.00004,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/yolov6_lite/yolov6_lite_s_finetune.py b/configs/yolov6_lite/yolov6_lite_s_finetune.py
deleted file mode 100644
index befee2ce..00000000
--- a/configs/yolov6_lite/yolov6_lite_s_finetune.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# YOLOv6-lite-s model
-model = dict(
- type='YOLOv6-lite-s',
- pretrained='weights/yolov6lite_s.pt',
- width_multiple=0.7,
- backbone=dict(
- type='Lite_EffiBackbone',
- num_repeats=[1, 3, 7, 3],
- out_channels=[24, 32, 64, 128, 256],
- scale_size=0.5,
- ),
- neck=dict(
- type='Lite_EffiNeck',
- in_channels=[256, 128, 64],
- unified_channels=96
- ),
- head=dict(
- type='Lite_EffideHead',
- in_channels=[96, 96, 96, 96],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6l6.py b/configs/yolov6l6.py
deleted file mode 100644
index 3bb77c5f..00000000
--- a/configs/yolov6l6.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# YOLOv6l6 model
-model = dict(
- type='YOLOv6l6',
- pretrained=None,
- depth_multiple=1.0,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone_P6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck_P6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.2,
-)
-training_mode = "conv_silu"
diff --git a/configs/yolov6l6_finetune.py b/configs/yolov6l6_finetune.py
deleted file mode 100644
index 2ffb8ada..00000000
--- a/configs/yolov6l6_finetune.py
+++ /dev/null
@@ -1,62 +0,0 @@
-# YOLOv6l6 model
-model = dict(
- type='YOLOv6l6',
- pretrained='weights/yolov6l6.pt',
- depth_multiple=1.0,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone_P6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck_P6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_silu"
diff --git a/configs/yolov6l_finetune.py b/configs/yolov6l_finetune.py
deleted file mode 100644
index 9b301233..00000000
--- a/configs/yolov6l_finetune.py
+++ /dev/null
@@ -1,68 +0,0 @@
-# YOLOv6l model
-model = dict(
- type='YOLOv6l',
- pretrained='weights/yolov6l.pt',
- depth_multiple=1.0,
- width_multiple=1.0,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(1)/2,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(1)/2,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 2.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
-training_mode = "conv_silu"
-# use normal conv to speed up training and further improve accuracy.
diff --git a/configs/base/yolov6l_base.py b/configs/yolov6l_seg.py
similarity index 85%
rename from configs/base/yolov6l_base.py
rename to configs/yolov6l_seg.py
index ef2dbbb2..2ed9211f 100644
--- a/configs/base/yolov6l_base.py
+++ b/configs/yolov6l_seg.py
@@ -1,6 +1,6 @@
-# YOLOv6l large base model
+# YOLOv6l-seg model
model = dict(
- type='YOLOv6l_base',
+ type='YOLOv6l',
pretrained=None,
depth_multiple=1.0,
width_multiple=1.0,
@@ -22,6 +22,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=32,
+ isseg=True,
+ issolo=False,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -45,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
@@ -64,4 +68,5 @@
mosaic=1.0,
mixup=0.1,
)
-training_mode = "conv_relu"
+training_mode = "conv_silu"
+# use normal conv to speed up training and further improve accuracy.
diff --git a/configs/yolov6m6.py b/configs/yolov6m6.py
deleted file mode 100644
index e741bbc0..00000000
--- a/configs/yolov6m6.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# YOLOv6m6 model
-model = dict(
- type='YOLOv6m6',
- pretrained=None,
- depth_multiple=0.60,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone_P6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- csp_e=float(2)/3,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck_P6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- csp_e=float(2)/3,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.9,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.1,
-)
diff --git a/configs/yolov6m6_finetune.py b/configs/yolov6m6_finetune.py
deleted file mode 100644
index 83760d3a..00000000
--- a/configs/yolov6m6_finetune.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# YOLOv6m6 model
-model = dict(
- type='YOLOv6m6',
- pretrained='weights/yolov6m6.pt',
- depth_multiple=0.60,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone_P6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- csp_e=float(2)/3,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck_P6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- csp_e=float(2)/3,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6m_finetune.py b/configs/yolov6m_finetune.py
deleted file mode 100644
index cfe0fa93..00000000
--- a/configs/yolov6m_finetune.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# YOLOv6m model
-model = dict(
- type='YOLOv6m',
- pretrained='weights/yolov6m.pt',
- depth_multiple=0.60,
- width_multiple=0.75,
- backbone=dict(
- type='CSPBepBackbone',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- csp_e=float(2)/3,
- fuse_P2=True,
- ),
- neck=dict(
- type='CSPRepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- csp_e=float(2)/3,
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=True,
- reg_max=16, #if use_dfl is False, please set reg_max to 0
- distill_weight={
- 'class': 0.8,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6m.py b/configs/yolov6m_seg.py
similarity index 92%
rename from configs/yolov6m.py
rename to configs/yolov6m_seg.py
index 29fae396..d8660be3 100644
--- a/configs/yolov6m.py
+++ b/configs/yolov6m_seg.py
@@ -1,4 +1,4 @@
-# YOLOv6m model
+# YOLOv6m-seg model
model = dict(
type='YOLOv6m',
pretrained=None,
@@ -22,6 +22,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=32,
+ isseg=True,
+ issolo=False,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -45,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
diff --git a/configs/yolov6n6.py b/configs/yolov6n6.py
deleted file mode 100644
index 0abe3a44..00000000
--- a/configs/yolov6n6.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n6',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- fuse_P2=True, # if use RepBiFPANNeck6, please set fuse_P2 to True.
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.02,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/yolov6n6_finetune.py b/configs/yolov6n6_finetune.py
deleted file mode 100644
index 01100f0f..00000000
--- a/configs/yolov6n6_finetune.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6n6',
- pretrained='weights/yolov6n6.pt',
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- fuse_P2=True, # if use RepBiFPANNeck6, please set fuse_P2 to True.
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='siou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6n_finetune.py b/configs/yolov6n_finetune.py
deleted file mode 100644
index 03b6d1ba..00000000
--- a/configs/yolov6n_finetune.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# YOLOv6s model
-model = dict(
- type='YOLOv6n',
- pretrained='weights/yolov6n.pt',
- depth_multiple=0.33,
- width_multiple=0.25,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='siou',
- use_dfl=False, # set to True if you want to further train with distillation
- reg_max=0, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/qarepvgg/yolov6n_qa.py b/configs/yolov6n_seg.py
similarity index 92%
rename from configs/qarepvgg/yolov6n_qa.py
rename to configs/yolov6n_seg.py
index b42d9ddb..94b42ed1 100644
--- a/configs/qarepvgg/yolov6n_qa.py
+++ b/configs/yolov6n_seg.py
@@ -1,4 +1,4 @@
-# YOLOv6s model
+# YOLOv6n-seg model
model = dict(
type='YOLOv6n',
pretrained=None,
@@ -21,6 +21,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=32,
+ isseg=True,
+ issolo=False,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -44,7 +48,7 @@
lr0=0.02,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
@@ -63,4 +67,3 @@
mosaic=1.0,
mixup=0.0,
)
-training_mode='qarepvggv2'
diff --git a/configs/yolov6s6.py b/configs/yolov6s6.py
deleted file mode 100644
index 091bfffc..00000000
--- a/configs/yolov6s6.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6s6',
- pretrained=None,
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- fuse_P2=True, # if use RepBiFPANNeck6, please set fuse_P2 to True.
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.01,
- lrf=0.01,
- momentum=0.937,
- weight_decay=0.0005,
- warmup_epochs=3.0,
- warmup_momentum=0.8,
- warmup_bias_lr=0.1
-)
-
-data_aug = dict(
- hsv_h=0.015,
- hsv_s=0.7,
- hsv_v=0.4,
- degrees=0.0,
- translate=0.1,
- scale=0.5,
- shear=0.0,
- flipud=0.0,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.0,
-)
diff --git a/configs/yolov6s6_finetune.py b/configs/yolov6s6_finetune.py
deleted file mode 100644
index a22697ed..00000000
--- a/configs/yolov6s6_finetune.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# YOLOv6n model
-model = dict(
- type='YOLOv6s6',
- pretrained='weights/yolov6s6.pt',
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep6',
- num_repeats=[1, 6, 12, 18, 6, 6],
- out_channels=[64, 128, 256, 512, 768, 1024],
- fuse_P2=True, # if use RepBiFPANNeck6, please set fuse_P2 to True.
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck6',
- num_repeats=[12, 12, 12, 12, 12, 12],
- out_channels=[512, 256, 128, 256, 512, 1024],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512, 1024],
- num_layers=4,
- anchors=1,
- strides=[8, 16, 32, 64],
- atss_warmup_epoch=4,
- iou_type='giou',
- use_dfl=False,
- reg_max=0 #if use_dfl is False, please set reg_max to 0
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6s_finetune.py b/configs/yolov6s_finetune.py
deleted file mode 100644
index d6fb27fe..00000000
--- a/configs/yolov6s_finetune.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# YOLOv6s model
-model = dict(
- type='YOLOv6s',
- pretrained='weights/yolov6s.pt',
- depth_multiple=0.33,
- width_multiple=0.50,
- backbone=dict(
- type='EfficientRep',
- num_repeats=[1, 6, 12, 18, 6],
- out_channels=[64, 128, 256, 512, 1024],
- fuse_P2=True,
- cspsppf=True,
- ),
- neck=dict(
- type='RepBiFPANNeck',
- num_repeats=[12, 12, 12, 12],
- out_channels=[256, 128, 128, 256, 256, 512],
- ),
- head=dict(
- type='EffiDeHead',
- in_channels=[128, 256, 512],
- num_layers=3,
- begin_indices=24,
- anchors=3,
- anchors_init=[[10,13, 19,19, 33,23],
- [30,61, 59,59, 59,119],
- [116,90, 185,185, 373,326]],
- out_indices=[17, 20, 23],
- strides=[8, 16, 32],
- atss_warmup_epoch=0,
- iou_type='giou',
- use_dfl=False, # set to True if you want to further train with distillation
- reg_max=0, # set to 16 if you want to further train with distillation
- distill_weight={
- 'class': 1.0,
- 'dfl': 1.0,
- },
- )
-)
-
-solver = dict(
- optim='SGD',
- lr_scheduler='Cosine',
- lr0=0.0032,
- lrf=0.12,
- momentum=0.843,
- weight_decay=0.00036,
- warmup_epochs=2.0,
- warmup_momentum=0.5,
- warmup_bias_lr=0.05
-)
-
-data_aug = dict(
- hsv_h=0.0138,
- hsv_s=0.664,
- hsv_v=0.464,
- degrees=0.373,
- translate=0.245,
- scale=0.898,
- shear=0.602,
- flipud=0.00856,
- fliplr=0.5,
- mosaic=1.0,
- mixup=0.243,
-)
diff --git a/configs/yolov6s.py b/configs/yolov6s_seg.py
similarity index 92%
rename from configs/yolov6s.py
rename to configs/yolov6s_seg.py
index 8d8b6739..c4274ccc 100644
--- a/configs/yolov6s.py
+++ b/configs/yolov6s_seg.py
@@ -1,4 +1,4 @@
-# YOLOv6s model
+# YOLOv6s-seg model
model = dict(
type='YOLOv6s',
pretrained=None,
@@ -21,6 +21,10 @@
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=32,
+ isseg=True,
+ issolo=False,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -44,7 +48,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
diff --git a/configs/mbla/yolov6l_mbla.py b/configs/yolov6x_seg.py
similarity index 79%
rename from configs/mbla/yolov6l_mbla.py
rename to configs/yolov6x_seg.py
index 7534b705..3ef53e50 100644
--- a/configs/mbla/yolov6l_mbla.py
+++ b/configs/yolov6x_seg.py
@@ -1,29 +1,31 @@
-# YOLOv6l model
+# YOLOv6l-seg model
model = dict(
- type='YOLOv6l_mbla',
+ type='YOLOv6l',
pretrained=None,
- depth_multiple=0.5,
- width_multiple=1.0,
+ depth_multiple=1.33,
+ width_multiple=1.25,
backbone=dict(
type='CSPBepBackbone',
- num_repeats=[1, 4, 8, 8, 4],
+ num_repeats=[1, 6, 12, 18, 6],
out_channels=[64, 128, 256, 512, 1024],
csp_e=float(1)/2,
fuse_P2=True,
- stage_block_type="MBLABlock",
),
neck=dict(
type='CSPRepBiFPANNeck',
- num_repeats=[8, 8, 8, 8],
+ num_repeats=[12, 12, 12, 12],
out_channels=[256, 128, 128, 256, 256, 512],
csp_e=float(1)/2,
- stage_block_type="MBLABlock",
),
head=dict(
type='EffiDeHead',
in_channels=[128, 256, 512],
num_layers=3,
begin_indices=24,
+ npr=256,
+ nm=32,
+ isseg=True,
+ issolo=False,
anchors=3,
anchors_init=[[10,13, 19,19, 33,23],
[30,61, 59,59, 59,119],
@@ -47,7 +49,7 @@
lr0=0.01,
lrf=0.01,
momentum=0.937,
- weight_decay=0.0005,
+ weight_decay=0.001,
warmup_epochs=3.0,
warmup_momentum=0.8,
warmup_bias_lr=0.1
@@ -66,5 +68,5 @@
mosaic=1.0,
mixup=0.1,
)
-
training_mode = "conv_silu"
+# use normal conv to speed up training and further improve accuracy.
diff --git a/data/coco.yaml b/data/coco.yaml
index d20d411e..8ce2676d 100644
--- a/data/coco.yaml
+++ b/data/coco.yaml
@@ -1,13 +1,11 @@
# COCO 2017 dataset http://cocodataset.org
-train: ../coco/images/train2017 # 118287 images
-val: ../coco/images/val2017 # 5000 images
-test: ../coco/images/test2017
-anno_path: ../coco/annotations/instances_val2017.json
+train: ./data/coco/images/train2017 # 118287 images
+val: ./data/coco/images/val2017 # 5000 images
+# test: ./data/coco/images/val2017
# number of classes
nc: 80
-# whether it is coco dataset, only coco dataset should be set to True.
-is_coco: True
+
# class names
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
diff --git a/data/images/000000056350.jpg b/data/images/000000056350.jpg
new file mode 100644
index 00000000..0c95d084
Binary files /dev/null and b/data/images/000000056350.jpg differ
diff --git a/data/images/9_Press_Conference_Press_Conference_9_946.jpg b/data/images/9_Press_Conference_Press_Conference_9_946.jpg
new file mode 100644
index 00000000..aa342667
Binary files /dev/null and b/data/images/9_Press_Conference_Press_Conference_9_946.jpg differ
diff --git a/deploy/ONNX/README.md b/deploy/ONNX/README.md
index d42f3c8c..c3a618cb 100644
--- a/deploy/ONNX/README.md
+++ b/deploy/ONNX/README.md
@@ -33,15 +33,6 @@ python ./deploy/ONNX/export_onnx.py \
- `--conf-thres` : Confidence threshold for NMS algorithm.
- `--device` : Export device. Cuda device : 0 or 0,1,2,3 ... , CPU : cpu .
-## Download
-
-* [YOLOv6-N](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6n.onnx)
-* [YOLOv6-T](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6t.onnx)
-* [YOLOv6-S](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6s.onnx)
-* [YOLOv6-M](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6m.onnx)
-* [YOLOv6-L-ReLU](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6l_relu.onnx)
-* [YOLOv6-L](https://github.com/meituan/YOLOv6/releases/download/0.2.0/yolov6l.onnx)
-
## End2End export
diff --git a/deploy/ONNX/export_onnx.py b/deploy/ONNX/export_onnx.py
index ba7440ae..85368c85 100644
--- a/deploy/ONNX/export_onnx.py
+++ b/deploy/ONNX/export_onnx.py
@@ -22,7 +22,7 @@
if __name__ == '__main__':
parser = argparse.ArgumentParser()
- parser.add_argument('--weights', type=str, default='./yolov6s.pt', help='weights path')
+ parser.add_argument('--weights', type=str, default='./weights/best_ckpt.pt', help='weights path')
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size, the order is: height width') # height, width
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
diff --git a/tools/eval.py b/tools/eval.py
index 5543029c..7814639e 100644
--- a/tools/eval.py
+++ b/tools/eval.py
@@ -23,13 +23,13 @@ def boolean_string(s):
def get_args_parser(add_help=True):
parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Evalating', add_help=add_help)
parser.add_argument('--data', type=str, default='./data/coco.yaml', help='dataset.yaml path')
- parser.add_argument('--weights', type=str, default='./weights/yolov6s.pt', help='model.pt path(s)')
- parser.add_argument('--batch-size', type=int, default=32, help='batch size')
+ parser.add_argument('--weights', type=str, default='./checkpoints/yolov6n_yol.pt', help='model.pt path(s)')
+ parser.add_argument('--batch-size', type=int, default=2, help='batch size')
parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
parser.add_argument('--conf-thres', type=float, default=0.03, help='confidence threshold')
parser.add_argument('--iou-thres', type=float, default=0.65, help='NMS IoU threshold')
parser.add_argument('--task', default='val', help='val, test, or speed')
- parser.add_argument('--device', default='0', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+ parser.add_argument('--device', default='4', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--half', default=False, action='store_true', help='whether to use fp16 infer')
parser.add_argument('--save_dir', type=str, default='runs/val/', help='evaluation save dir')
parser.add_argument('--name', type=str, default='exp', help='save evaluation results to save_dir/name')
@@ -37,8 +37,8 @@ def get_args_parser(add_help=True):
parser.add_argument('--infer_on_rect', default=True, type=boolean_string, help='default to run with rectangle image to boost speed.')
parser.add_argument('--reproduce_640_eval', default=False, action='store_true', help='whether to reproduce 640 infer result, overwrite some config')
parser.add_argument('--eval_config_file', type=str, default='./configs/experiment/eval_640_repro.py', help='config file for repro 640 infer result')
- parser.add_argument('--do_coco_metric', default=True, type=boolean_string, help='whether to use pycocotool to metric, set False to close')
- parser.add_argument('--do_pr_metric', default=False, type=boolean_string, help='whether to calculate precision, recall and F1, n, set False to close')
+ parser.add_argument('--do_coco_metric', default=False, type=boolean_string, help='whether to use pycocotool to metric, set False to close')
+ parser.add_argument('--do_pr_metric', default=True, type=boolean_string, help='whether to calculate precision, recall and F1, n, set False to close')
parser.add_argument('--plot_curve', default=True, type=boolean_string, help='whether to save plots in savedir when do pr metric, set False to close')
parser.add_argument('--plot_confusion_matrix', default=False, action='store_true', help='whether to save confusion matrix plots when do pr metric, might cause no harm warning print')
parser.add_argument('--verbose', default=False, action='store_true', help='whether to print metric on each class')
@@ -46,6 +46,7 @@ def get_args_parser(add_help=True):
parser.add_argument('--specific-shape', action='store_true', help='rectangular training')
parser.add_argument('--height', type=int, default=None, help='image height of model input')
parser.add_argument('--width', type=int, default=None, help='image width of model input')
+ parser.add_argument('--issolo', default=False, type=boolean_string, help='is solo format')
args = parser.parse_args()
if args.config_file:
@@ -113,7 +114,8 @@ def run(data,
config_file=None,
specific_shape=False,
height=640,
- width=640
+ width=640,
+ issolo=False
):
""" Run the evaluation process
@@ -155,9 +157,11 @@ def run(data,
# eval
model.eval()
- pred_result, vis_outputs, vis_paths = val.predict_model(model, dataloader, task)
- eval_result = val.eval_model(pred_result, model, dataloader, task)
- return eval_result, vis_outputs, vis_paths
+ pred_result, _, __= val.predict_model(model, dataloader, task, issolo=issolo)
+ return pred_result, _, __
+ #raise ValueError("..")
+ #eval_result = val.eval_model(pred_result, model, dataloader, task)
+ #return eval_result, vis_outputs, vis_paths
def main(args):
diff --git a/tools/infer.py b/tools/infer.py
index 95b3fdc7..cb051112 100644
--- a/tools/infer.py
+++ b/tools/infer.py
@@ -17,11 +17,11 @@
def get_args_parser(add_help=True):
parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Inference.', add_help=add_help)
- parser.add_argument('--weights', type=str, default='weights/yolov6s.pt', help='model path(s) for inference.')
- parser.add_argument('--source', type=str, default='data/images', help='the source path, e.g. image-file/dir.')
+ parser.add_argument('--weights', type=str, default='./checkpoints/yolov6n_yol.pt', help='model path(s) for inference.')
+ parser.add_argument('--source', type=str, default='./data/images', help='the source path, e.g. image-file/dir.')
parser.add_argument('--webcam', action='store_true', help='whether to use webcam.')
- parser.add_argument('--webcam-addr', type=str, default='0', help='the web camera address, local camera or rtsp address.')
- parser.add_argument('--yaml', type=str, default='data/coco.yaml', help='data yaml file.')
+ parser.add_argument('--webcam-addr', type=str, default='6', help='the web camera address, local camera or rtsp address.')
+ parser.add_argument('--yaml', type=str, default='data/test.yaml', help='data yaml file.')
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='the image-size(h,w) in inference size.')
parser.add_argument('--conf-thres', type=float, default=0.4, help='confidence threshold for inference.')
parser.add_argument('--iou-thres', type=float, default=0.45, help='NMS IoU threshold for inference.')
@@ -29,7 +29,7 @@ def get_args_parser(add_help=True):
parser.add_argument('--device', default='0', help='device to run our model i.e. 0 or 0,1,2,3 or cpu.')
parser.add_argument('--save-txt', action='store_true', help='save results to *.txt.')
parser.add_argument('--not-save-img', action='store_true', help='do not save visuallized inference results.')
- parser.add_argument('--save-dir', type=str, help='directory to save predictions in. See --save-txt.')
+ parser.add_argument('--save-dir', type=str, default='./runs/inference', help='directory to save predictions in. See --save-txt.')
parser.add_argument('--view-img', action='store_true', help='show inference results')
parser.add_argument('--classes', nargs='+', type=int, help='filter by classes, e.g. --classes 0, or --classes 0 2 3.')
parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS.')
@@ -38,6 +38,7 @@ def get_args_parser(add_help=True):
parser.add_argument('--hide-labels', default=False, action='store_true', help='hide labels.')
parser.add_argument('--hide-conf', default=False, action='store_true', help='hide confidences.')
parser.add_argument('--half', action='store_true', help='whether to use FP16 half-precision inference.')
+ parser.add_argument('--issolo', action='store_true', help='solo structure or not')
args = parser.parse_args()
LOGGER.info(args)
@@ -66,6 +67,7 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'),
hide_labels=False,
hide_conf=False,
half=False,
+ issolo=False
):
""" Inference process, supporting inference on one image file or directory which containing images.
Args:
@@ -105,7 +107,7 @@ def run(weights=osp.join(ROOT, 'yolov6s.pt'),
# Inference
inferer = Inferer(source, webcam, webcam_addr, weights, device, yaml, img_size, half)
- inferer.infer(conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, not not_save_img, hide_labels, hide_conf, view_img)
+ inferer.infer(conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, not not_save_img, hide_labels, hide_conf, view_img, issolo=issolo)
if save_txt or not not_save_img:
LOGGER.info(f"Results saved to {save_dir}")
diff --git a/tools/train.py b/tools/train.py
index 635c68e4..9771e562 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -25,10 +25,10 @@
def get_args_parser(add_help=True):
parser = argparse.ArgumentParser(description='YOLOv6 PyTorch Training', add_help=add_help)
parser.add_argument('--data-path', default='./data/coco.yaml', type=str, help='path of dataset')
- parser.add_argument('--conf-file', default='./configs/yolov6n.py', type=str, help='experiments description file')
+ parser.add_argument('--conf-file', default='./configs/yolov6s.py', type=str, help='experiments description file')
parser.add_argument('--img-size', default=640, type=int, help='train, val image size (pixels)')
parser.add_argument('--rect', action='store_true', help='whether to use rectangular training, default is False')
- parser.add_argument('--batch-size', default=32, type=int, help='total batch size for all GPUs')
+ parser.add_argument('--batch-size', default=16, type=int, help='total batch size for all GPUs')
parser.add_argument('--epochs', default=400, type=int, help='number of total epochs to run')
parser.add_argument('--workers', default=8, type=int, help='number of data loading workers (default: 8)')
parser.add_argument('--device', default='0', type=str, help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
@@ -45,7 +45,7 @@ def get_args_parser(add_help=True):
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume the most recent training')
parser.add_argument('--write_trainbatch_tb', action='store_true', help='write train_batch image to tensorboard once an epoch, may slightly slower train speed if open')
- parser.add_argument('--stop_aug_last_n_epoch', default=15, type=int, help='stop strong aug at last n epoch, neg value not stop, default 15')
+ parser.add_argument('--stop_aug_last_n_epoch', default=-1, type=int, help='stop strong aug at last n epoch, neg value not stop, default 15')
parser.add_argument('--save_ckpt_on_last_n_epoch', default=-1, type=int, help='save last n epoch even not best or last, neg value not save')
parser.add_argument('--distill', action='store_true', help='distill or not')
parser.add_argument('--distill_feat', action='store_true', help='distill featmap or not')
@@ -54,7 +54,7 @@ def get_args_parser(add_help=True):
parser.add_argument('--teacher_model_path', type=str, default=None, help='teacher model path')
parser.add_argument('--temperature', type=int, default=20, help='distill temperature')
parser.add_argument('--fuse_ab', action='store_true', help='fuse ab branch in training process or not')
- parser.add_argument('--bs_per_gpu', default=32, type=int, help='batch size per GPU for auto-rescale learning rate, set to 16 for P6 models')
+ parser.add_argument('--bs_per_gpu', default=8, type=int, help='batch size per GPU for auto-rescale learning rate, set to 16 for P6 models')
parser.add_argument('--specific-shape', action='store_true', help='rectangular training')
parser.add_argument('--height', type=int, default=None, help='image height of model input')
parser.add_argument('--width', type=int, default=None, help='image width of model input')
diff --git a/yolov6/assigners/anchor_generator.py b/yolov6/assigners/anchor_generator.py
index c8276418..3a41e0ba 100644
--- a/yolov6/assigners/anchor_generator.py
+++ b/yolov6/assigners/anchor_generator.py
@@ -1,7 +1,5 @@
import torch
-from yolov6.utils.general import check_version
-torch_1_10_plus = check_version(torch.__version__, minimum='1.10.0')
def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.5, device='cpu', is_eval=False, mode='af'):
'''Generate anchors from features.'''
@@ -15,7 +13,10 @@ def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.
_, _, h, w = feats[i].shape
shift_x = torch.arange(end=w, device=device) + grid_cell_offset
shift_y = torch.arange(end=h, device=device) + grid_cell_offset
- shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij') if torch_1_10_plus else torch.meshgrid(shift_y, shift_x)
+ try:
+ shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij')
+ except:
+ shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
anchor_point = torch.stack(
[shift_x, shift_y], axis=-1).to(torch.float)
if mode == 'af': # anchor-free
@@ -37,7 +38,10 @@ def generate_anchors(feats, fpn_strides, grid_cell_size=5.0, grid_cell_offset=0.
cell_half_size = grid_cell_size * stride * 0.5
shift_x = (torch.arange(end=w, device=device) + grid_cell_offset) * stride
shift_y = (torch.arange(end=h, device=device) + grid_cell_offset) * stride
- shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij') if torch_1_10_plus else torch.meshgrid(shift_y, shift_x)
+ try:
+ shift_y, shift_x = torch.meshgrid(shift_y, shift_x, indexing='ij')
+ except:
+ shift_y, shift_x = torch.meshgrid(shift_y, shift_x)
anchor = torch.stack(
[
shift_x - cell_half_size, shift_y - cell_half_size,
diff --git a/yolov6/assigners/atss_assigner.py b/yolov6/assigners/atss_assigner.py
index 12a5f243..c1d51e74 100644
--- a/yolov6/assigners/atss_assigner.py
+++ b/yolov6/assigners/atss_assigner.py
@@ -21,7 +21,8 @@ def forward(self,
gt_labels,
gt_bboxes,
mask_gt,
- pd_bboxes):
+ pd_bboxes,
+ gt_segmasks):
r"""This code is based on
https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
@@ -47,7 +48,8 @@ def forward(self,
return torch.full( [self.bs, self.n_anchors], self.bg_idx).to(device), \
torch.zeros([self.bs, self.n_anchors, 4]).to(device), \
torch.zeros([self.bs, self.n_anchors, self.num_classes]).to(device), \
- torch.zeros([self.bs, self.n_anchors]).to(device)
+ torch.zeros([self.bs, self.n_anchors]).to(device), \
+ torch.zeros(*pd_bboxes.shape[:2], 40, 40)
overlaps = iou2d_calculator(gt_bboxes.reshape([-1, 4]), anc_bboxes)
@@ -74,7 +76,7 @@ def forward(self,
mask_pos, overlaps, self.n_max_boxes)
# assigned target
- target_labels, target_bboxes, target_scores = self.get_targets(
+ target_labels, target_bboxes, target_scores, target_segmasks = self.get_targets(
gt_labels, gt_bboxes, target_gt_idx, fg_mask)
# soft label with iou
@@ -83,7 +85,7 @@ def forward(self,
ious = ious.max(axis=-2)[0].unsqueeze(-1)
target_scores *= ious
- return target_labels.long(), target_bboxes, target_scores, fg_mask.bool()
+ return target_labels.long(), target_bboxes, target_scores, fg_mask.bool(), target_segmasks
def select_topk_candidates(self,
distances,
@@ -139,7 +141,8 @@ def get_targets(self,
gt_labels,
gt_bboxes,
target_gt_idx,
- fg_mask):
+ fg_mask,
+ gt_segmasks):
# assigned target labels
batch_idx = torch.arange(self.bs, dtype=gt_labels.dtype, device=gt_labels.device)
@@ -158,4 +161,7 @@ def get_targets(self,
target_scores = F.one_hot(target_labels.long(), self.num_classes + 1).float()
target_scores = target_scores[:, :, :self.num_classes]
- return target_labels, target_bboxes, target_scores
+ m_shape = gt_segmasks.shape[-2:]
+ target_segmasks = gt_segmasks.reshape([-1, m_shape[0], m_shape[1]])[target_gt_idx.flatten()]
+
+ return target_labels, target_bboxes, target_scores, target_segmasks
diff --git a/yolov6/assigners/atss_assigner_seg.py b/yolov6/assigners/atss_assigner_seg.py
new file mode 100644
index 00000000..bf844387
--- /dev/null
+++ b/yolov6/assigners/atss_assigner_seg.py
@@ -0,0 +1,166 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from yolov6.assigners.iou2d_calculator import iou2d_calculator
+from yolov6.assigners.assigner_utils import dist_calculator, select_candidates_in_gts, select_highest_overlaps, iou_calculator
+
+class ATSSAssigner(nn.Module):
+ '''Adaptive Training Sample Selection Assigner'''
+ def __init__(self,
+ topk=9,
+ num_classes=80):
+ super(ATSSAssigner, self).__init__()
+ self.topk = topk
+ self.num_classes = num_classes
+ self.bg_idx = num_classes
+
+ @torch.no_grad()
+ def forward(self,
+ anc_bboxes,
+ n_level_bboxes,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ pd_bboxes,
+ gt_segmasks):
+ r"""This code is based on
+ https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/atss_assigner.py
+
+ Args:
+ anc_bboxes (Tensor): shape(num_total_anchors, 4)
+ n_level_bboxes (List):len(3)
+ gt_labels (Tensor): shape(bs, n_max_boxes, 1)
+ gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+ mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+ pd_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+ Returns:
+ target_labels (Tensor): shape(bs, num_total_anchors)
+ target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ fg_mask (Tensor): shape(bs, num_total_anchors)
+ """
+ self.n_anchors = anc_bboxes.size(0)
+ self.bs = gt_bboxes.size(0)
+ self.n_max_boxes = gt_bboxes.size(1)
+
+ if self.n_max_boxes == 0:
+ device = gt_bboxes.device
+ return torch.full( [self.bs, self.n_anchors], self.bg_idx).to(device), \
+ torch.zeros([self.bs, self.n_anchors, 4]).to(device), \
+ torch.zeros([self.bs, self.n_anchors, self.num_classes]).to(device), \
+ torch.zeros([self.bs, self.n_anchors]).to(device)
+
+
+ overlaps = iou2d_calculator(gt_bboxes.reshape([-1, 4]), anc_bboxes)
+ overlaps = overlaps.reshape([self.bs, -1, self.n_anchors])
+
+ distances, ac_points = dist_calculator(gt_bboxes.reshape([-1, 4]), anc_bboxes)
+ distances = distances.reshape([self.bs, -1, self.n_anchors])
+
+ is_in_candidate, candidate_idxs = self.select_topk_candidates(
+ distances, n_level_bboxes, mask_gt)
+
+ overlaps_thr_per_gt, iou_candidates = self.thres_calculator(
+ is_in_candidate, candidate_idxs, overlaps)
+
+ # select candidates iou >= threshold as positive
+ is_pos = torch.where(
+ iou_candidates > overlaps_thr_per_gt.repeat([1, 1, self.n_anchors]),
+ is_in_candidate, torch.zeros_like(is_in_candidate))
+
+ is_in_gts = select_candidates_in_gts(ac_points, gt_bboxes)
+ mask_pos = is_pos * is_in_gts * mask_gt
+
+ target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(
+ mask_pos, overlaps, self.n_max_boxes)
+
+ # assigned target
+ target_labels, target_bboxes, target_scores, target_segmasks = self.get_targets(
+ gt_labels, gt_bboxes, target_gt_idx, fg_mask, gt_segmasks)
+
+ # soft label with iou
+ if pd_bboxes is not None:
+ ious = iou_calculator(gt_bboxes, pd_bboxes) * mask_pos
+ ious = ious.max(axis=-2)[0].unsqueeze(-1)
+ target_scores *= ious
+
+ return target_labels.long(), target_bboxes, target_scores, fg_mask.bool(), target_segmasks
+
+ def select_topk_candidates(self,
+ distances,
+ n_level_bboxes,
+ mask_gt):
+
+ mask_gt = mask_gt.repeat(1, 1, self.topk).bool()
+ level_distances = torch.split(distances, n_level_bboxes, dim=-1)
+ is_in_candidate_list = []
+ candidate_idxs = []
+ start_idx = 0
+ for per_level_distances, per_level_boxes in zip(level_distances, n_level_bboxes):
+
+ end_idx = start_idx + per_level_boxes
+ selected_k = min(self.topk, per_level_boxes)
+ _, per_level_topk_idxs = per_level_distances.topk(selected_k, dim=-1, largest=False)
+ candidate_idxs.append(per_level_topk_idxs + start_idx)
+ per_level_topk_idxs = torch.where(mask_gt,
+ per_level_topk_idxs, torch.zeros_like(per_level_topk_idxs))
+ is_in_candidate = F.one_hot(per_level_topk_idxs, per_level_boxes).sum(dim=-2)
+ is_in_candidate = torch.where(is_in_candidate > 1,
+ torch.zeros_like(is_in_candidate), is_in_candidate)
+ is_in_candidate_list.append(is_in_candidate.to(distances.dtype))
+ start_idx = end_idx
+
+ is_in_candidate_list = torch.cat(is_in_candidate_list, dim=-1)
+ candidate_idxs = torch.cat(candidate_idxs, dim=-1)
+
+ return is_in_candidate_list, candidate_idxs
+
+ def thres_calculator(self,
+ is_in_candidate,
+ candidate_idxs,
+ overlaps):
+
+ n_bs_max_boxes = self.bs * self.n_max_boxes
+ _candidate_overlaps = torch.where(is_in_candidate > 0,
+ overlaps, torch.zeros_like(overlaps))
+ candidate_idxs = candidate_idxs.reshape([n_bs_max_boxes, -1])
+ assist_idxs = self.n_anchors * torch.arange(n_bs_max_boxes, device=candidate_idxs.device)
+ assist_idxs = assist_idxs[:,None]
+ faltten_idxs = candidate_idxs + assist_idxs
+ candidate_overlaps = _candidate_overlaps.reshape(-1)[faltten_idxs]
+ candidate_overlaps = candidate_overlaps.reshape([self.bs, self.n_max_boxes, -1])
+
+ overlaps_mean_per_gt = candidate_overlaps.mean(axis=-1, keepdim=True)
+ overlaps_std_per_gt = candidate_overlaps.std(axis=-1, keepdim=True)
+ overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt
+
+ return overlaps_thr_per_gt, _candidate_overlaps
+
+ def get_targets(self,
+ gt_labels,
+ gt_bboxes,
+ target_gt_idx,
+ fg_mask,
+ gt_segmasks):
+
+ # assigned target labels
+ batch_idx = torch.arange(self.bs, dtype=gt_labels.dtype, device=gt_labels.device)
+ batch_idx = batch_idx[...,None]
+ target_gt_idx = (target_gt_idx + batch_idx * self.n_max_boxes).long()
+ target_labels = gt_labels.flatten()[target_gt_idx.flatten()]
+ target_labels = target_labels.reshape([self.bs, self.n_anchors])
+ target_labels = torch.where(fg_mask > 0,
+ target_labels, torch.full_like(target_labels, self.bg_idx))
+
+ # assigned target boxes
+ target_bboxes = gt_bboxes.reshape([-1, 4])[target_gt_idx.flatten()]
+ target_bboxes = target_bboxes.reshape([self.bs, self.n_anchors, 4])
+
+ # assigned target scores
+ target_scores = F.one_hot(target_labels.long(), self.num_classes + 1).float()
+ target_scores = target_scores[:, :, :self.num_classes]
+
+ m_shape = gt_segmasks.shape[-2:]
+ target_segmasks = gt_segmasks.reshape([-1, m_shape[0], m_shape[1]])[target_gt_idx.flatten()]
+
+ return target_labels, target_bboxes, target_scores, target_segmasks
diff --git a/yolov6/assigners/tal_assigner.py b/yolov6/assigners/tal_assigner.py
index 45008f5a..d1bd404a 100644
--- a/yolov6/assigners/tal_assigner.py
+++ b/yolov6/assigners/tal_assigner.py
@@ -25,7 +25,8 @@ def forward(self,
anc_points,
gt_labels,
gt_bboxes,
- mask_gt):
+ mask_gt,
+ gt_segmasks):
"""This code referenced to
https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
@@ -50,10 +51,11 @@ def forward(self,
return torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), \
torch.zeros_like(pd_bboxes).to(device), \
torch.zeros_like(pd_scores).to(device), \
- torch.zeros_like(pd_scores[..., 0]).to(device)
+ torch.zeros_like(pd_scores[..., 0]).to(device), \
+ torch.zeros(*pd_bboxes.shape[:2], 40, 40)
cycle, step, self.bs = (1, self.bs, self.bs) if self.n_max_boxes <= 100 else (self.bs, 1, 1)
- target_labels_lst, target_bboxes_lst, target_scores_lst, fg_mask_lst = [], [], [], []
+ target_labels_lst, target_bboxes_lst, target_scores_lst, fg_mask_lst, target_segmasks_lst = [], [], [], [], []
# loop batch dim in case of numerous object box
for i in range(cycle):
start, end = i*step, (i+1)*step
@@ -62,6 +64,7 @@ def forward(self,
gt_labels_ = gt_labels[start:end, ...]
gt_bboxes_ = gt_bboxes[start:end, ...]
mask_gt_ = mask_gt[start:end, ...]
+ gt_segmasks_ = gt_segmasks[start:end, ...]
mask_pos, align_metric, overlaps = self.get_pos_mask(
pd_scores_, pd_bboxes_, gt_labels_, gt_bboxes_, anc_points, mask_gt_)
@@ -70,8 +73,8 @@ def forward(self,
mask_pos, overlaps, self.n_max_boxes)
# assigned target
- target_labels, target_bboxes, target_scores = self.get_targets(
- gt_labels_, gt_bboxes_, target_gt_idx, fg_mask)
+ target_labels, target_bboxes, target_scores, target_segmasks = self.get_targets(
+ gt_labels_, gt_bboxes_, target_gt_idx, fg_mask, gt_segmasks_)
# normalize
align_metric *= mask_pos
@@ -85,14 +88,16 @@ def forward(self,
target_bboxes_lst.append(target_bboxes)
target_scores_lst.append(target_scores)
fg_mask_lst.append(fg_mask)
+ target_segmasks_lst.append(target_segmasks)
# concat
target_labels = torch.cat(target_labels_lst, 0)
target_bboxes = torch.cat(target_bboxes_lst, 0)
target_scores = torch.cat(target_scores_lst, 0)
fg_mask = torch.cat(fg_mask_lst, 0)
+ target_segmasks = torch.cat(target_segmasks_lst, 0)
- return target_labels, target_bboxes, target_scores, fg_mask.bool()
+ return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_segmasks
def get_pos_mask(self,
pd_scores,
@@ -153,7 +158,8 @@ def get_targets(self,
gt_labels,
gt_bboxes,
target_gt_idx,
- fg_mask):
+ fg_mask,
+ gt_segmasks):
# assigned target labels
batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[...,None]
@@ -169,5 +175,8 @@ def get_targets(self,
fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes)
target_scores = torch.where(fg_scores_mask > 0, target_scores,
torch.full_like(target_scores, 0))
+ m_shape = gt_segmasks.shape[-2:]
+ target_segmasks = gt_segmasks.reshape([-1, m_shape[0], m_shape[1]])[target_gt_idx]
- return target_labels, target_bboxes, target_scores
+
+ return target_labels, target_bboxes, target_scores, target_segmasks
diff --git a/yolov6/assigners/tal_assigner_seg.py b/yolov6/assigners/tal_assigner_seg.py
new file mode 100644
index 00000000..057c718b
--- /dev/null
+++ b/yolov6/assigners/tal_assigner_seg.py
@@ -0,0 +1,185 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from yolov6.assigners.assigner_utils import select_candidates_in_gts, select_highest_overlaps, iou_calculator, dist_calculator
+
+class TaskAlignedAssigner(nn.Module):
+ def __init__(self,
+ topk=13,
+ num_classes=80,
+ alpha=1.0,
+ beta=6.0,
+ eps=1e-9):
+ super(TaskAlignedAssigner, self).__init__()
+ self.topk = topk
+ self.num_classes = num_classes
+ self.bg_idx = num_classes
+ self.alpha = alpha
+ self.beta = beta
+ self.eps = eps
+
+ @torch.no_grad()
+ def forward(self,
+ pd_scores,
+ pd_bboxes,
+ anc_points,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ gt_segmasks):
+ """This code referenced to
+ https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
+
+ Args:
+ pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ anc_points (Tensor): shape(num_total_anchors, 2)
+ gt_labels (Tensor): shape(bs, n_max_boxes, 1)
+ gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+ mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+ Returns:
+ target_labels (Tensor): shape(bs, num_total_anchors)
+ target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ fg_mask (Tensor): shape(bs, num_total_anchors)
+ """
+ self.bs = pd_scores.size(0)
+ self.n_max_boxes = gt_bboxes.size(1)
+
+ if self.n_max_boxes == 0:
+ device = gt_bboxes.device
+ return torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), \
+ torch.zeros_like(pd_bboxes).to(device), \
+ torch.zeros_like(pd_scores).to(device), \
+ torch.zeros_like(pd_scores[..., 0]).to(device), \
+ []
+ #torch.zeros(*pd_bboxes.shape[:2]).to(device)
+
+
+ # cycle, step, self.bs = (1, self.bs, self.bs) if self.n_max_boxes <= 100 else (self.bs, 1, 1)
+ cycle, step, self.bs = (1, self.bs, self.bs)
+ target_labels_lst, target_bboxes_lst, target_scores_lst, fg_mask_lst, idx_lst = [], [], [], [], []
+ # loop batch dim in case of numerous object box
+ for i in range(cycle):
+ start, end = i*step, (i+1)*step
+ pd_scores_ = pd_scores[start:end, ...]
+ pd_bboxes_ = pd_bboxes[start:end, ...]
+ gt_labels_ = gt_labels[start:end, ...]
+ gt_bboxes_ = gt_bboxes[start:end, ...]
+ mask_gt_ = mask_gt[start:end, ...]
+ # gt_segmasks_ = gt_segmasks[start:end, ...]
+
+ mask_pos, align_metric, overlaps = self.get_pos_mask(
+ pd_scores_, pd_bboxes_, gt_labels_, gt_bboxes_, anc_points, mask_gt_)
+
+ target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(
+ mask_pos, overlaps, self.n_max_boxes)
+
+ # assigned target
+ target_labels, target_bboxes, target_scores, idx = self.get_targets(
+ gt_labels_, gt_bboxes_, target_gt_idx, fg_mask)
+
+ # normalize
+ align_metric *= mask_pos
+ pos_align_metrics = align_metric.max(axis=-1, keepdim=True)[0]
+ pos_overlaps = (overlaps * mask_pos).max(axis=-1, keepdim=True)[0]
+ norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).max(-2)[0].unsqueeze(-1)
+ target_scores = target_scores * norm_align_metric
+
+ # append
+ target_labels_lst.append(target_labels)
+ idx_lst.append(idx)
+ target_bboxes_lst.append(target_bboxes)
+ target_scores_lst.append(target_scores)
+ fg_mask_lst.append(fg_mask)
+ # target_segmasks_lst.append(target_segmasks)
+
+ # concat
+ target_labels = torch.cat(target_labels_lst, 0)
+ target_bboxes = torch.cat(target_bboxes_lst, 0)
+ target_scores = torch.cat(target_scores_lst, 0)
+ fg_mask = torch.cat(fg_mask_lst, 0)
+ # target_segmasks = torch.cat(target_segmasks_lst, 0)
+
+ return target_labels, target_bboxes, target_scores, fg_mask.bool(), idx_lst
+
+ def get_pos_mask(self,
+ pd_scores,
+ pd_bboxes,
+ gt_labels,
+ gt_bboxes,
+ anc_points,
+ mask_gt):
+
+ # get anchor_align metric
+ align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes)
+ # get in_gts mask
+ mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
+ # get topk_metric mask
+ mask_topk = self.select_topk_candidates(
+ align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool())
+ # merge all mask to a final mask
+ mask_pos = mask_topk * mask_in_gts * mask_gt
+
+ return mask_pos, align_metric, overlaps
+
+ def get_box_metrics(self,
+ pd_scores,
+ pd_bboxes,
+ gt_labels,
+ gt_bboxes):
+
+ pd_scores = pd_scores.permute(0, 2, 1)
+ gt_labels = gt_labels.to(torch.long)
+ ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
+ ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+ ind[1] = gt_labels.squeeze(-1)
+ bbox_scores = pd_scores[ind[0], ind[1]]
+
+ overlaps = iou_calculator(gt_bboxes, pd_bboxes)
+ align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+
+ return align_metric, overlaps
+
+ def select_topk_candidates(self,
+ metrics,
+ largest=True,
+ topk_mask=None):
+
+ num_anchors = metrics.shape[-1]
+ topk_metrics, topk_idxs = torch.topk(
+ metrics, self.topk, axis=-1, largest=largest)
+ if topk_mask is None:
+ topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > self.eps).tile(
+ [1, 1, self.topk])
+ topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs))
+ is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
+ is_in_topk = torch.where(is_in_topk > 1,
+ torch.zeros_like(is_in_topk), is_in_topk)
+ return is_in_topk.to(metrics.dtype)
+
+ def get_targets(self,
+ gt_labels,
+ gt_bboxes,
+ target_gt_idx,
+ fg_mask):
+
+ # assigned target labels
+ batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[...,None]
+ target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes
+ target_labels = gt_labels.long().flatten()[target_gt_idx]
+
+ # assigned target boxes
+ target_bboxes = gt_bboxes.reshape([-1, 4])[target_gt_idx]
+
+ # assigned target scores
+ target_labels[target_labels<0] = 0
+ target_scores = F.one_hot(target_labels, self.num_classes)
+ fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes)
+ target_scores = torch.where(fg_scores_mask > 0, target_scores,
+ torch.full_like(target_scores, 0))
+ # m_shape = gt_segmasks.shape[-2:]
+ # target_segmasks = gt_segmasks.reshape([-1, m_shape[0], m_shape[1]])[target_gt_idx]
+
+
+ return target_labels, target_bboxes, target_scores, target_gt_idx
diff --git a/yolov6/assigners/tal_assigner_seg2.py b/yolov6/assigners/tal_assigner_seg2.py
new file mode 100644
index 00000000..aa1101cd
--- /dev/null
+++ b/yolov6/assigners/tal_assigner_seg2.py
@@ -0,0 +1,183 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from yolov6.assigners.assigner_utils import select_candidates_in_gts, select_highest_overlaps, iou_calculator, dist_calculator
+
+class TaskAlignedAssigner(nn.Module):
+ def __init__(self,
+ topk=13,
+ num_classes=80,
+ alpha=1.0,
+ beta=6.0,
+ eps=1e-9):
+ super(TaskAlignedAssigner, self).__init__()
+ self.topk = topk
+ self.num_classes = num_classes
+ self.bg_idx = num_classes
+ self.alpha = alpha
+ self.beta = beta
+ self.eps = eps
+
+ @torch.no_grad()
+ def forward(self,
+ pd_scores,
+ pd_bboxes,
+ anc_points,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ gt_segmasks):
+ """This code referenced to
+ https://github.com/Nioolek/PPYOLOE_pytorch/blob/master/ppyoloe/assigner/tal_assigner.py
+
+ Args:
+ pd_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ pd_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ anc_points (Tensor): shape(num_total_anchors, 2)
+ gt_labels (Tensor): shape(bs, n_max_boxes, 1)
+ gt_bboxes (Tensor): shape(bs, n_max_boxes, 4)
+ mask_gt (Tensor): shape(bs, n_max_boxes, 1)
+ Returns:
+ target_labels (Tensor): shape(bs, num_total_anchors)
+ target_bboxes (Tensor): shape(bs, num_total_anchors, 4)
+ target_scores (Tensor): shape(bs, num_total_anchors, num_classes)
+ fg_mask (Tensor): shape(bs, num_total_anchors)
+ """
+ self.bs = pd_scores.size(0)
+ self.n_max_boxes = gt_bboxes.size(1)
+
+ if self.n_max_boxes == 0:
+ device = gt_bboxes.device
+ return torch.full_like(pd_scores[..., 0], self.bg_idx).to(device), \
+ torch.zeros_like(pd_bboxes).to(device), \
+ torch.zeros_like(pd_scores).to(device), \
+ torch.zeros_like(pd_scores[..., 0]).to(device), \
+ torch.zeros(*pd_bboxes.shape[:2], 40, 40)
+
+ cycle, step, self.bs = (1, self.bs, self.bs) if self.n_max_boxes <= 100 else (self.bs, 1, 1)
+ target_labels_lst, target_bboxes_lst, target_scores_lst, fg_mask_lst, target_segmasks_lst = [], [], [], [], []
+ # loop batch dim in case of numerous object box
+ for i in range(cycle):
+ start, end = i*step, (i+1)*step
+ pd_scores_ = pd_scores[start:end, ...]
+ pd_bboxes_ = pd_bboxes[start:end, ...]
+ gt_labels_ = gt_labels[start:end, ...]
+ gt_bboxes_ = gt_bboxes[start:end, ...]
+ mask_gt_ = mask_gt[start:end, ...]
+ gt_segmasks_ = gt_segmasks[start:end, ...]
+
+ mask_pos, align_metric, overlaps = self.get_pos_mask(
+ pd_scores_, pd_bboxes_, gt_labels_, gt_bboxes_, anc_points, mask_gt_)
+
+ target_gt_idx, fg_mask, mask_pos = select_highest_overlaps(
+ mask_pos, overlaps, self.n_max_boxes)
+
+ # assigned target
+ target_labels, target_bboxes, target_scores, target_segmasks = self.get_targets(
+ gt_labels_, gt_bboxes_, target_gt_idx, fg_mask, gt_segmasks_)
+
+ # normalize
+ align_metric *= mask_pos
+ pos_align_metrics = align_metric.max(axis=-1, keepdim=True)[0]
+ pos_overlaps = (overlaps * mask_pos).max(axis=-1, keepdim=True)[0]
+ norm_align_metric = (align_metric * pos_overlaps / (pos_align_metrics + self.eps)).max(-2)[0].unsqueeze(-1)
+ target_scores = target_scores * norm_align_metric
+
+ # append
+ target_labels_lst.append(target_labels)
+ target_bboxes_lst.append(target_bboxes)
+ target_scores_lst.append(target_scores)
+ fg_mask_lst.append(fg_mask)
+ target_segmasks_lst.append(target_segmasks)
+
+ # concat
+ target_labels = torch.cat(target_labels_lst, 0)
+ target_bboxes = torch.cat(target_bboxes_lst, 0)
+ target_scores = torch.cat(target_scores_lst, 0)
+ fg_mask = torch.cat(fg_mask_lst, 0)
+ target_segmasks = torch.cat(target_segmasks_lst, 0)
+
+ return target_labels, target_bboxes, target_scores, fg_mask.bool(), target_segmasks
+
+ def get_pos_mask(self,
+ pd_scores,
+ pd_bboxes,
+ gt_labels,
+ gt_bboxes,
+ anc_points,
+ mask_gt):
+
+ # get anchor_align metric
+ align_metric, overlaps = self.get_box_metrics(pd_scores, pd_bboxes, gt_labels, gt_bboxes)
+ # get in_gts mask
+ mask_in_gts = select_candidates_in_gts(anc_points, gt_bboxes)
+ # get topk_metric mask
+ mask_topk = self.select_topk_candidates(
+ align_metric * mask_in_gts, topk_mask=mask_gt.repeat([1, 1, self.topk]).bool())
+ # merge all mask to a final mask
+ mask_pos = mask_topk * mask_in_gts * mask_gt
+
+ return mask_pos, align_metric, overlaps
+
+ def get_box_metrics(self,
+ pd_scores,
+ pd_bboxes,
+ gt_labels,
+ gt_bboxes):
+
+ pd_scores = pd_scores.permute(0, 2, 1)
+ gt_labels = gt_labels.to(torch.long)
+ ind = torch.zeros([2, self.bs, self.n_max_boxes], dtype=torch.long)
+ ind[0] = torch.arange(end=self.bs).view(-1, 1).repeat(1, self.n_max_boxes)
+ ind[1] = gt_labels.squeeze(-1)
+ bbox_scores = pd_scores[ind[0], ind[1]]
+
+ overlaps = iou_calculator(gt_bboxes, pd_bboxes)
+ align_metric = bbox_scores.pow(self.alpha) * overlaps.pow(self.beta)
+
+ return align_metric, overlaps
+
+ def select_topk_candidates(self,
+ metrics,
+ largest=True,
+ topk_mask=None):
+
+ num_anchors = metrics.shape[-1]
+ topk_metrics, topk_idxs = torch.topk(
+ metrics, self.topk, axis=-1, largest=largest)
+ if topk_mask is None:
+ topk_mask = (topk_metrics.max(axis=-1, keepdim=True) > self.eps).tile(
+ [1, 1, self.topk])
+ topk_idxs = torch.where(topk_mask, topk_idxs, torch.zeros_like(topk_idxs))
+ is_in_topk = F.one_hot(topk_idxs, num_anchors).sum(axis=-2)
+ is_in_topk = torch.where(is_in_topk > 1,
+ torch.zeros_like(is_in_topk), is_in_topk)
+ return is_in_topk.to(metrics.dtype)
+
+ def get_targets(self,
+ gt_labels,
+ gt_bboxes,
+ target_gt_idx,
+ fg_mask,
+ gt_segmasks):
+
+ # assigned target labels
+ batch_ind = torch.arange(end=self.bs, dtype=torch.int64, device=gt_labels.device)[...,None]
+ target_gt_idx = target_gt_idx + batch_ind * self.n_max_boxes
+ target_labels = gt_labels.long().flatten()[target_gt_idx]
+
+ # assigned target boxes
+ target_bboxes = gt_bboxes.reshape([-1, 4])[target_gt_idx]
+
+ # assigned target scores
+ target_labels[target_labels<0] = 0
+ target_scores = F.one_hot(target_labels, self.num_classes)
+ fg_scores_mask = fg_mask[:, :, None].repeat(1, 1, self.num_classes)
+ target_scores = torch.where(fg_scores_mask > 0, target_scores,
+ torch.full_like(target_scores, 0))
+ m_shape = gt_segmasks.shape[-2:]
+ target_segmasks = gt_segmasks.reshape([-1, m_shape[0], m_shape[1]])[target_gt_idx]
+ print(target_gt_idx.shape, fg_mask.shape)
+
+
+ return target_labels, target_bboxes, target_scores, target_segmasks
diff --git a/yolov6/core/engine.py b/yolov6/core/engine.py
index 10545135..663a0812 100644
--- a/yolov6/core/engine.py
+++ b/yolov6/core/engine.py
@@ -21,7 +21,6 @@
from yolov6.models.yolo import build_model
from yolov6.models.yolo_lite import build_model as build_lite_model
-from yolov6.models.losses.loss import ComputeLoss as ComputeLoss
from yolov6.models.losses.loss_fuseab import ComputeLoss as ComputeLoss_ab
from yolov6.models.losses.loss_distill import ComputeLoss as ComputeLoss_distill
from yolov6.models.losses.loss_distill_ns import ComputeLoss as ComputeLoss_distill_ns
@@ -35,6 +34,8 @@
from yolov6.utils.general import download_ckpt
+
+
class Trainer:
def __init__(self, args, cfg, device):
self.args = args
@@ -42,6 +43,8 @@ def __init__(self, args, cfg, device):
self.device = device
self.max_epoch = args.epochs
+
+
if args.resume:
self.ckpt = torch.load(args.resume, map_location='cpu')
@@ -105,8 +108,8 @@ def __init__(self, args, cfg, device):
self.height = args.height
self.width = args.width
- self.loss_num = 3
- self.loss_info = ['Epoch', 'lr', 'iou_loss', 'dfl_loss', 'cls_loss']
+ self.loss_num = 4
+ self.loss_info = ['Epoch', 'lr', 'iou_loss', 'dfl_loss', 'cls_loss', "seg_loss"]
if self.args.distill:
self.loss_num += 1
self.loss_info += ['cwd_loss']
@@ -140,7 +143,9 @@ def train_one_epoch(self, epoch_num):
# Training one batch data.
def train_in_steps(self, epoch_num, step_num):
- images, targets = self.prepro_data(self.batch_data, self.device)
+ # torch.cuda.synchronize()
+ # qq1 = time.time()
+ images, targets, segmasks = self.prepro_data(self.batch_data, self.device)
# plot train_batch and save to tensorboard once an epoch
if self.write_trainbatch_tb and self.main_process and self.step == 0:
self.plot_train_batch(images, targets)
@@ -149,7 +154,11 @@ def train_in_steps(self, epoch_num, step_num):
# forward
with amp.autocast(enabled=self.device != 'cpu'):
_, _, batch_height, batch_width = images.shape
+ # torch.cuda.synchronize()
+ # qq2 = time.time()
preds, s_featmaps = self.model(images)
+ # torch.cuda.synchronize()
+ # qq3 = time.time()
if self.args.distill:
with torch.no_grad():
t_preds, t_featmaps = self.teacher_model(images)
@@ -159,18 +168,21 @@ def train_in_steps(self, epoch_num, step_num):
batch_height, batch_width)
elif self.args.fuse_ab:
- total_loss, loss_items = self.compute_loss((preds[0],preds[3],preds[4]), targets, epoch_num,
- step_num, batch_height, batch_width) # YOLOv6_af
- total_loss_ab, loss_items_ab = self.compute_loss_ab(preds[:3], targets, epoch_num, step_num,
- batch_height, batch_width) # YOLOv6_ab
+ total_loss, loss_items = self.compute_loss((preds[0],preds[3],preds[4], preds[5]), targets, epoch_num,
+ step_num, batch_height, batch_width, segmasks) # YOLOv6_af
+ total_loss_ab, loss_items_ab = self.compute_loss_ab((preds[0],preds[1],preds[2], preds[6]), targets, epoch_num, step_num,
+ batch_height, batch_width, segmasks) # YOLOv6_ab
total_loss += total_loss_ab
loss_items += loss_items_ab
else:
- total_loss, loss_items = self.compute_loss(preds, targets, epoch_num, step_num,
- batch_height, batch_width) # YOLOv6_af
+ total_loss, loss_items = self.compute_loss((preds[0],preds[3],preds[4], preds[5]), targets, epoch_num, step_num,
+ batch_height, batch_width, segmasks, img=images) # YOLOv6_af
if self.rank != -1:
total_loss *= self.world_size
+ # torch.cuda.synchronize()
+ # qq4 = time.time()
# backward
+ # print("prepare : {}s | model : {}s | loss : {}s".format(qq2 - qq1, qq3 - qq2, qq4 - qq3))
self.scaler.scale(total_loss).backward()
self.loss_items = loss_items
self.update_optimizer()
@@ -186,12 +198,12 @@ def after_epoch(self):
is_val_epoch = (remaining_epochs == 0) or ((not self.args.eval_final_only) and ((self.epoch + 1) % eval_interval == 0))
if is_val_epoch:
self.eval_model()
- self.ap = self.evaluate_results[1]
+ self.ap = self.evaluate_results[3]
self.best_ap = max(self.ap, self.best_ap)
# save ckpt
ckpt = {
- 'model': deepcopy(de_parallel(self.model)).half(),
- 'ema': deepcopy(self.ema.ema).half(),
+ 'model': deepcopy(de_parallel(self.model)),
+ 'ema': deepcopy(self.ema.ema),
'updates': self.ema.updates,
'optimizer': self.optimizer.state_dict(),
'scheduler': self.scheduler.state_dict(),
@@ -231,7 +243,10 @@ def eval_model(self):
task='train',
specific_shape=self.specific_shape,
height=self.height,
- width=self.width
+ width=self.width,
+ do_pr_metric=True,
+ do_coco_metric=False,
+ issolo=self.cfg.model.head.issolo
)
else:
def get_cfg_value(cfg_dict, value_str, default_value):
@@ -263,10 +278,10 @@ def get_cfg_value(cfg_dict, value_str, default_value):
width=self.width
)
- LOGGER.info(f"Epoch: {self.epoch} | mAP@0.5: {results[0]} | mAP@0.50:0.95: {results[1]}")
- self.evaluate_results = results[:2]
+ LOGGER.info(f"Epoch: {self.epoch} | box_mAP@0.5: {results[0]} | box_mAP@0.50:0.95: {results[1]} | mask_mAP@0.5: {results[2]} | mask_mAP@0.50:0.95: {results[3]}")
+ self.evaluate_results = [results[1], results[3]]
# plot validation predictions
- self.plot_val_pred(vis_outputs, vis_paths)
+ # self.plot_val_pred(vis_outputs, vis_paths)
def before_train_loop(self):
@@ -286,6 +301,10 @@ def before_train_loop(self):
self.best_ap = self.evaluate_results[1]
self.best_stop_strong_aug_ap = self.evaluate_results[1]
+ if self.cfg.model.head.issolo:
+ from yolov6.models.losses.seg_loss_solo_main import ComputeLoss as ComputeLoss
+ else:
+ from yolov6.models.losses.seg_loss import ComputeLoss as ComputeLoss
self.compute_loss = ComputeLoss(num_classes=self.data_dict['nc'],
ori_img_size=self.img_size,
@@ -293,6 +312,7 @@ def before_train_loop(self):
use_dfl=self.cfg.model.head.use_dfl,
reg_max=self.cfg.model.head.reg_max,
iou_type=self.cfg.model.head.iou_type,
+ nm=self.cfg.model.head.nm,
fpn_strides=self.cfg.model.head.strides)
if self.args.fuse_ab:
@@ -305,7 +325,7 @@ def before_train_loop(self):
fpn_strides=self.cfg.model.head.strides,
)
if self.args.distill :
- if self.cfg.model.type in ['YOLOv6n','YOLOv6s']:
+ if self.cfg.model.type in ['YOLOv6n','YOLOv6s']:
Loss_distill_func = ComputeLoss_distill_ns
else:
Loss_distill_func = ComputeLoss_distill
@@ -404,7 +424,8 @@ def get_data_loader(args, cfg, data_dict):
def prepro_data(batch_data, device):
images = batch_data[0].to(device, non_blocking=True).float() / 255
targets = batch_data[1].to(device)
- return images, targets
+ segmask = batch_data[4].to(device)
+ return images, targets, segmask
def get_model(self, args, cfg, nc, device):
if 'YOLOv6-lite' in cfg.model.type:
@@ -588,4 +609,4 @@ def quant_setup(self, model, cfg, device):
# QAT flow load calibrated model
assert cfg.qat.calib_pt is not None, 'Please provide calibrated model'
model.load_state_dict(torch.load(cfg.qat.calib_pt)['model'].float().state_dict())
- model.to(device)
+ model.to(device)
\ No newline at end of file
diff --git a/yolov6/core/evaler.py b/yolov6/core/evaler.py
index e79f51be..15c8bc76 100644
--- a/yolov6/core/evaler.py
+++ b/yolov6/core/evaler.py
@@ -7,13 +7,19 @@
import torch
import yaml
from pathlib import Path
+import cv2
+from multiprocessing.pool import ThreadPool
+
+
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
+import torch.nn.functional as F
+
from yolov6.data.data_load import create_dataloader
from yolov6.utils.events import LOGGER, NCOLS
-from yolov6.utils.nms import non_max_suppression
+from yolov6.utils.nms import non_max_suppression_seg, non_max_suppression_seg_solo
from yolov6.utils.general import download_ckpt
from yolov6.utils.checkpoint import load_checkpoint
from yolov6.utils.torch_utils import time_sync, get_model_info
@@ -87,24 +93,25 @@ def init_data(self, dataloader, task):
self.is_coco = self.data.get("is_coco", False)
self.ids = self.coco80_to_coco91_class() if self.is_coco else list(range(1000))
if task != 'train':
+ pad = 0.0
eval_hyp = {
"shrink_size":self.shrink_size,
}
rect = self.infer_on_rect
- pad = 0.5 if rect else 0.0
dataloader = create_dataloader(self.data[task if task in ('train', 'val', 'test') else 'val'],
- self.img_size, self.batch_size, self.stride, hyp=eval_hyp, check_labels=True, pad=pad, rect=rect,
+ self.img_size, self.batch_size, self.stride, hyp=eval_hyp, check_labels=True, pad=0.5, rect=True,
data_dict=self.data, task=task, specific_shape=self.specific_shape, height=self.height, width=self.width)[0]
return dataloader
- def predict_model(self, model, dataloader, task):
+ def predict_model(self, model, dataloader, task, issolo=False, weight_nums=66, bias_nums=1, dyconv_channels=66):
'''Model prediction
Predicts the whole dataset and gets the prediced results and inference time.
'''
self.speed_result = torch.zeros(4, device=self.device)
pred_results = []
pbar = tqdm(dataloader, desc=f"Inferencing model in {task} datasets.", ncols=NCOLS)
-
+ weight_nums = [weight_nums]
+ bias_nums = [bias_nums]
# whether to compute metric and plot PR curve and P、R、F1 curve under iou50 match rule
if self.do_pr_metric:
stats, ap = [], []
@@ -115,7 +122,7 @@ def predict_model(self, model, dataloader, task):
from yolov6.utils.metrics import ConfusionMatrix
confusion_matrix = ConfusionMatrix(nc=model.nc)
- for i, (imgs, targets, paths, shapes) in enumerate(pbar):
+ for i, (imgs, targets, paths, shapes, masks) in enumerate(pbar):
# pre-process
t1 = time_sync()
imgs = imgs.to(self.device, non_blocking=True)
@@ -125,12 +132,23 @@ def predict_model(self, model, dataloader, task):
# Inference
t2 = time_sync()
- outputs, _ = model(imgs)
+ toutputs, _ = model(imgs)
self.speed_result[2] += time_sync() - t2 # inference time
# post-process
t3 = time_sync()
- outputs = non_max_suppression(outputs, self.conf_thres, self.iou_thres, multi_label=True)
+ if not issolo:
+ loutputs = non_max_suppression_seg(toutputs, self.conf_thres, self.iou_thres, multi_label=True)
+ else:
+ loutputs = non_max_suppression_seg_solo(toutputs, self.conf_thres, self.iou_thres, multi_label=True)
+ protos = toutputs[1][0]
+ segments = []
+ segconf = [loutputs[li][..., 0:] for li in range(len(loutputs))]
+ outputs = [loutputs[li][..., :6] for li in range(len(loutputs))]
+ if not issolo:
+ segments = [self.handle_proto_test([protos[li].reshape(1, *(protos[li].shape[-3:]))], segconf[li], imgs.shape[-2:]) for li in range(len(loutputs))]
+ else:
+ segments = [self.handle_proto_solo([protos[li].reshape(1, *(protos[li].shape[-3:]))], segconf[li], imgs.shape[-2:], weight_sums=weight_nums, bias_sums=bias_nums, dyconv=dyconv_channels) for li in range(len(loutputs))]
self.speed_result[3] += time_sync() - t3 # post-process time
self.speed_result[0] += len(outputs)
@@ -139,7 +157,7 @@ def predict_model(self, model, dataloader, task):
eval_outputs = copy.deepcopy([x.detach().cpu() for x in outputs])
# save result
- pred_results.extend(self.convert_to_coco_format(outputs, imgs, paths, shapes, self.ids))
+ # pred_results.extend(self.convert_to_coco_format_seg(outputs, imgs, paths, shapes, self.ids, segments))
# for tensorboard visualization, maximum images to show: 8
if i == 0:
@@ -153,25 +171,29 @@ def predict_model(self, model, dataloader, task):
# Statistics per image
# This code is based on
# https://github.com/ultralytics/yolov5/blob/master/val.py
- for si, pred in enumerate(eval_outputs):
+ for si, (pred, pred_masks) in enumerate(zip(eval_outputs, segments)):
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else [] # target class
seen += 1
+ correct_masks = torch.zeros(len(pred), niou, dtype=torch.bool) # init
+ correct = torch.zeros(len(pred), niou, dtype=torch.bool) # init
if len(pred) == 0:
if nl:
- stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls))
+ stats.append((correct_masks, correct, torch.Tensor(), torch.Tensor(), tcls))
continue
+ # Masks
+ midx = targets[:, 0] == si
+ gt_masks = masks[midx]
# Predictions
predn = pred.clone()
self.scale_coords(imgs[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]) # native-space pred
# Assign all predictions as incorrect
- correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool)
+
if nl:
-
from yolov6.utils.nms import xywh2xyxy
# target boxes
@@ -183,49 +205,122 @@ def predict_model(self, model, dataloader, task):
labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels
- from yolov6.utils.metrics import process_batch
+ from yolov6.utils.metrics import process_batch
correct = process_batch(predn, labelsn, iouv)
+ correct_masks = process_batch(predn, labelsn, iouv, pred_masks, gt_masks, overlap=False, masks=True)
if self.plot_confusion_matrix:
confusion_matrix.process_batch(predn, labelsn)
# Append statistics (correct, conf, pcls, tcls)
- stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
+
+
+ stats.append((correct_masks.cpu(), correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))
if self.do_pr_metric:
# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
-
- from yolov6.utils.metrics import ap_per_class
- p, r, ap, f1, ap_class = ap_per_class(*stats, plot=self.plot_curve, save_dir=self.save_dir, names=model.names)
- AP50_F1_max_idx = len(f1.mean(0)) - f1.mean(0)[::-1].argmax() -1
- LOGGER.info(f"IOU 50 best mF1 thershold near {AP50_F1_max_idx/1000.0}.")
- ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
- mp, mr, map50, map = p[:, AP50_F1_max_idx].mean(), r[:, AP50_F1_max_idx].mean(), ap50.mean(), ap.mean()
- nt = np.bincount(stats[3].astype(np.int64), minlength=model.nc) # number of targets per class
+ from yolov6.utils.metrics import ap_per_class_box_and_mask, Metrics
+ metrics = Metrics()
+ # v5 method
+ results = ap_per_class_box_and_mask(*stats, plot=self.plot_curve, save_dir=self.save_dir, names=model.names)
+ metrics.update(results)
+ nt = np.bincount(stats[4].astype(np.int64), minlength=model.nc) # number of targets per class
# Print results
- s = ('%-16s' + '%12s' * 7) % ('Class', 'Images', 'Labels', 'P@.5iou', 'R@.5iou', 'F1@.5iou', 'mAP@.5', 'mAP@.5:.95')
+ s = ('%22s' + '%15s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P', 'R',
+ 'mAP50', 'mAP50-95)')
LOGGER.info(s)
- pf = '%-16s' + '%12i' * 2 + '%12.3g' * 5 # print format
- LOGGER.info(pf % ('all', seen, nt.sum(), mp, mr, f1.mean(0)[AP50_F1_max_idx], map50, map))
-
- self.pr_metric_result = (map50, map)
-
- # Print results per class
- if self.verbose and model.nc > 1:
- for i, c in enumerate(ap_class):
- LOGGER.info(pf % (model.names[c], seen, nt[c], p[i, AP50_F1_max_idx], r[i, AP50_F1_max_idx],
- f1[i, AP50_F1_max_idx], ap50[i], ap[i]))
+ pf = '%22s' + '%15i' * 2 + '%11.5g' * 8 # print format
+ mr = metrics.mean_results()
+ LOGGER.info(pf % ('all', seen, nt.sum(), *mr))
+ return [mr[2], mr[3], mr[6], mr[7]], [], []
if self.plot_confusion_matrix:
confusion_matrix.plot(save_dir=self.save_dir, names=list(model.names))
else:
- LOGGER.info("Calculate metric failed, might check dataset.")
- self.pr_metric_result = (0.0, 0.0)
+ return [0, 0, 0, 0], [], []
+
+ return pred_results
- return pred_results, vis_outputs, vis_paths
+ def parse_dynamic_params(self, flatten_kernels, weight_nums, bias_nums, dyconv_channels):
+ """split kernel head prediction to conv weight and bias."""
+ n_inst = flatten_kernels.size(0)
+ n_layers = len(weight_nums)
+ params_splits = list(
+ torch.split_with_sizes(
+ flatten_kernels, weight_nums + bias_nums, dim=1))
+ weight_splits = params_splits[:n_layers]
+ bias_splits = params_splits[n_layers:]
+ for i in range(n_layers):
+ if i < n_layers - 1:
+ weight_splits[i] = weight_splits[i].reshape(
+ n_inst * dyconv_channels, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst *
+ dyconv_channels)
+ else:
+ weight_splits[i] = weight_splits[i].reshape(n_inst, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst)
+
+ return weight_splits, bias_splits
+
+ def handle_proto_solo(self, proto_list, oconfs, imgshape, weight_sums=66, bias_sums=1, dyconv=66, img_orishape=None):
+ '''
+ proto_list: [(bs, 32, w, h), ...]
+ conf: (bs, l, 33) -> which_proto, 32
+ '''
+ def handle_proto_coord(proto):
+ _ = proto.shape[-2:]
+ x = torch.arange(0, 1, step = 1 / _[1]).unsqueeze(0).unsqueeze(0).repeat(1, _[0], 1).to(proto.dtype).to(proto.device)
+ y = torch.arange(0, 1, step = 1 / _[0]).unsqueeze(0).T.unsqueeze(0).repeat(1, 1, _[1]).to(proto.dtype).to(proto.device)
+ return torch.cat([proto, x, y]).reshape(1, -1, *_)
+
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ conf = oconfs[..., 6:]
+ if conf.shape[0] == 0:
+ return None
+
+ xyxy = oconfs[..., :4]
+ confs = conf[..., 1:]
+ proto = proto_list[0][0]
+ proto = handle_proto_coord(proto)
+ s = proto.shape[-2:]
+ num_inst = confs.shape[0]
+ proto = proto.reshape(1, -1, *proto.shape[-2:])
+ weights, biases = self.parse_dynamic_params(confs, weight_nums=weight_sums, bias_nums=bias_sums, dyconv_channels=dyconv)
+ n_layers = len(weights)
+ for i, (weight, bias) in enumerate(zip(weights, biases)):
+ x = F.conv2d(
+ proto, weight, bias=bias, stride=1, padding=0, groups=1)
+ if i < n_layers - 1:
+ x = F.relu(x)
+ x = x.reshape(num_inst, *proto.shape[-2:]).unsqueeze(0)
+ seg = x.sigmoid()
+ masks = F.interpolate(seg, imgshape, mode='bilinear', align_corners=False)[0]
+ if img_orishape:
+ masks_ori = F.interpolate(seg, img_orishape, mode='nearest')[0]
+ else:
+ masks_ori = None
+ masks = crop_mask(masks, xyxy).gt_(0.5)
+ masks = masks.gt_(0.5)
+ return masks
+
def eval_model(self, pred_results, model, dataloader, task):
@@ -282,7 +377,8 @@ def eval_model(self, pred_results, model, dataloader, task):
label_count_dicts[nc_i]["images"].add(ann_i["image_id"])
label_count_dicts[nc_i]["anns"] += 1
- s = ('%-16s' + '%12s' * 7) % ('Class', 'Labeled_images', 'Labels', 'P@.5iou', 'R@.5iou', 'F1@.5iou', 'mAP@.5', 'mAP@.5:.95')
+ s = ('%22s' + '%11s' * 10) % ('Class', 'Images', 'Instances', 'Box(P', 'R', 'mAP50', 'mAP50-95)', 'Mask(P', 'R',
+ 'mAP50', 'mAP50-95)')
LOGGER.info(s)
#IOU , all p, all cats, all gt, maxdet 100
coco_p = cocoEval.eval['precision']
@@ -383,6 +479,51 @@ def convert_to_coco_format(self, outputs, imgs, paths, shapes, ids):
pred_results.append(pred_data)
return pred_results
+ def convert_to_coco_format_seg(self, outputs, imgs, paths, shapes, ids, masks):
+
+ from pycocotools.mask import encode
+ import time
+
+ def single_encode(x):
+ rle = encode(np.asarray(x[:, :, None], order='F', dtype='uint8'))[0]
+ rle['counts'] = rle['counts'].decode('utf-8')
+ return rle
+
+
+ pred_results = []
+ for i, pred in enumerate(outputs):
+ if len(pred) == 0:
+ continue
+ pred_masks = masks[i].cpu().numpy()
+ pred_masks = np.transpose(pred_masks, (2, 0, 1))
+ a = time.time()
+ with ThreadPool(64) as pool:
+ rles = pool.map(single_encode, pred_masks)
+ print("rle time")
+ b = time.time()
+ path, shape = Path(paths[i]), shapes[i][0]
+ self.scale_coords(imgs[i].shape[1:], pred[:, :4], shape, shapes[i][1])
+ image_id = int(path.stem) if self.is_coco else path.stem
+ bboxes = self.box_convert(pred[:, 0:4])
+ bboxes[:, :2] -= bboxes[:, 2:] / 2
+ cls = pred[:, 5]
+ scores = pred[:, 4]
+ for ind in range(pred.shape[0]):
+ category_id = ids[int(cls[ind])]
+ bbox = [round(x, 3) for x in bboxes[ind].tolist()]
+ score = round(scores[ind].item(), 5)
+ pred_data = {
+ "image_id": image_id,
+ "category_id": category_id,
+ "bbox": bbox,
+ "score": score,
+ 'segmentation': rles[i]
+ }
+ pred_results.append(pred_data)
+ c = time.time()
+ print(b-a, c-b)
+ return pred_results
+
@staticmethod
def check_task(task):
if task not in ['train', 'val', 'test', 'speed']:
@@ -543,3 +684,48 @@ def convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, ids)
pred_results.extend(convert_to_coco_format_trt(nums, boxes, scores, classes, paths, shapes, self.ids))
self.speed_result[0] += self.batch_size
return dataloader, pred_results
+
+
+
+ @staticmethod
+ def handle_proto_test(proto_list, oconfs, imgshape, img_orishape=None):
+ '''
+ proto_list: [(bs, 32, w, h), ...]
+ conf: (bs, l, 33) -> which_proto, 32
+ '''
+
+
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ conf = oconfs[..., 6:]
+ if conf.shape[0] == 0:
+ return None
+
+ xyxy = oconfs[..., :4]
+ confs = conf[..., 1:]
+ proto = proto_list[0]
+
+ s = proto.shape[-2:]
+ seg = ((confs@proto.reshape(proto.shape[0], proto.shape[1], -1)).reshape(proto.shape[0], confs.shape[0], *s))
+ seg = seg.sigmoid()
+ masks = F.interpolate(seg, imgshape, mode='bilinear', align_corners=False)[0]
+ if img_orishape:
+ masks_ori = F.interpolate(seg, img_orishape, mode='nearest')[0]
+ else:
+ masks_ori = None
+ masks = crop_mask(masks, xyxy).gt_(0.5)
+ return masks
diff --git a/yolov6/core/inferer.py b/yolov6/core/inferer.py
index cea6586d..3fef6b35 100644
--- a/yolov6/core/inferer.py
+++ b/yolov6/core/inferer.py
@@ -13,11 +13,13 @@
from PIL import ImageFont
from collections import deque
+import torch.nn.functional as F
+
from yolov6.utils.events import LOGGER, load_yaml
from yolov6.layers.common import DetectBackend
from yolov6.data.data_augment import letterbox
from yolov6.data.datasets import LoadData
-from yolov6.utils.nms import non_max_suppression
+from yolov6.utils.nms import non_max_suppression_seg, non_max_suppression_seg_solo
from yolov6.utils.torch_utils import get_model_info
class Inferer:
@@ -67,10 +69,13 @@ def model_switch(self, model, img_size):
LOGGER.info("Switch model to deploy modality.")
- def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf, view_img=True):
+ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir, save_txt, save_img, hide_labels, hide_conf, view_img=True, issolo=True, weight_nums=66, bias_nums=1, dyconv_channels=66):
''' Model Inference and results visualization '''
vid_path, vid_writer, windows = None, None, []
+ print(issolo)
fps_calculator = CalcFPS()
+ weight_nums = [weight_nums]
+ bias_nums = [bias_nums]
for img_src, img_path, vid_cap in tqdm(self.files):
img, img_src = self.process_image(img_src, self.img_size, self.stride, self.half)
img = img.to(self.device)
@@ -79,15 +84,31 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir,
# expand for batch dim
t1 = time.time()
pred_results = self.model(img)
- det = non_max_suppression(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)[0]
+ if not issolo:
+ loutputs = non_max_suppression_seg(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+ else:
+ loutputs = non_max_suppression_seg_solo(pred_results, conf_thres, iou_thres, classes, agnostic_nms, max_det=max_det)
+ protos = pred_results[1][0]
+ segments = []
+ print(len(loutputs))
+ segconf = [loutputs[li][..., 0:] for li in range(len(loutputs))]
+ det = [loutputs[li][..., :6] for li in range(len(loutputs))][0]
+ if not issolo:
+ segments = [self.handle_proto_test([protos[li].reshape(1, *(protos[li].shape[-3:]))], segconf[li], img.shape[-2:]) for li in range(len(loutputs))][0]
+ else:
+ segments = [self.handle_proto_solo([protos[li].reshape(1, *(protos[li].shape[-3:]))], segconf[li], img.shape[-2:], weight_sums=weight_nums, bias_sums=bias_nums, dyconv=dyconv_channels) for li in range(len(loutputs))][0]
t2 = time.time()
+
+
if self.webcam:
save_path = osp.join(save_dir, self.webcam_addr)
txt_path = osp.join(save_dir, self.webcam_addr)
else:
# Create output files in nested dirs that mirrors the structure of the images' dirs
- rel_path = osp.relpath(osp.dirname(img_path), osp.dirname(self.source))
+ print(osp.dirname(img_path))
+ print(osp.dirname(self.source))
+ rel_path = "test"
save_path = osp.join(save_dir, rel_path, osp.basename(img_path)) # im.jpg
txt_path = osp.join(save_dir, rel_path, 'labels', osp.splitext(osp.basename(img_path))[0])
os.makedirs(osp.join(save_dir, rel_path), exist_ok=True)
@@ -98,9 +119,14 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir,
# check image and font
assert img_ori.data.contiguous, 'Image needs to be contiguous. Please apply to input images with np.ascontiguousarray(im).'
self.font_check()
-
if len(det):
det[:, :4] = self.rescale(img.shape[2:], det[:, :4], img_src.shape).round()
+
+
+ ii = 0
+ segments = self.rescale_mask(img.shape[2:], segments.cpu().numpy(), img_src.shape)
+ print(segments.shape)
+ segments = segments.transpose(2, 0, 1)
for *xyxy, conf, cls in reversed(det):
if save_txt: # Write to file
xywh = (self.box_convert(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
@@ -109,13 +135,16 @@ def infer(self, conf_thres, iou_thres, classes, agnostic_nms, max_det, save_dir,
f.write(('%g ' * len(line)).rstrip() % line + '\n')
if save_img:
+ print(cls)
class_num = int(cls) # integer class
label = None if hide_labels else (self.class_names[class_num] if hide_conf else f'{self.class_names[class_num]} {conf:.2f}')
- self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True))
+ img_ori = self.plot_box_and_label(img_ori, max(round(sum(img_ori.shape) / 2 * 0.003), 2), xyxy, label, color=self.generate_colors(class_num, True), segment=segments[ii])
+ ii += 1
img_src = np.asarray(img_ori)
+
# FPS counter
fps_calculator.update(1.0 / (t2 - t1))
avg_fps = fps_calculator.accumulate()
@@ -187,6 +216,21 @@ def rescale(ori_shape, boxes, target_shape):
return boxes
+ @staticmethod
+ def rescale_mask(ori_shape, masks, target_shape):
+ '''Rescale the output to the original image shape'''
+ ratio = min(ori_shape[0] / target_shape[0], ori_shape[1] / target_shape[1])
+ padding = int((ori_shape[1] - target_shape[1] * ratio) / 2), int((ori_shape[0] - target_shape[0] * ratio) / 2)
+
+
+ masks = masks[:, padding[1]: ori_shape[0]- padding[1], padding[0]: ori_shape[1] - padding[0]]
+ masks = masks.transpose(1, 2, 0)
+ masks = cv2.resize(masks, target_shape[:2][::-1])
+ if len(masks.shape) == 2:
+ masks = masks.reshape(*masks.shape, 1)
+
+ return masks
+
def check_img_size(self, img_size, s=32, floor=0):
"""Make sure image size is a multiple of stride s in each dimension, and return a new shape list of image."""
if isinstance(img_size, int): # integer i.e. img_size=640
@@ -204,6 +248,200 @@ def make_divisible(self, x, divisor):
# Upward revision the value x to make it evenly divisible by the divisor.
return math.ceil(x / divisor) * divisor
+ @staticmethod
+ def handle_proto(proto_list, oconfs, imgshape, det):
+ '''
+ proto_list: [(bs, 32, w, h), ...]
+ conf: (bs, l, 33) -> which_proto, 32
+ '''
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ conf = oconfs[..., 6:]
+
+ xyxy = oconfs[..., :4]
+ which_proto = conf[..., 0]
+ confs = conf[..., 1:]
+ res = []
+ protos = proto_list[0]
+ for i, proto in enumerate([protos, protos, protos]):
+ s = proto.shape[-2:]
+ tconfs = confs[which_proto[..., 0] == i]
+ if tconfs.shape[0] == 0:
+ continue
+ tseg = ((tconfs@proto.reshape(proto.shape[0], proto.shape[1], -1)).reshape(proto.shape[0], tconfs.shape[1], *s))
+ print("a:")
+ print(which_proto[..., 0] == i)
+ tseg=tseg.sigmoid()
+ masks = F.interpolate(tseg, imgshape, mode='nearest')[0]
+ #return masks
+ print(xyxy[which_proto[..., 0] == i][0].shape)
+ masks = crop_mask(masks, xyxy[which_proto[..., 0] == i][0])[0]
+ res.append(masks.gt_(0.5))
+ return torch.cat(res, dim = 0), xyxy[which_proto[..., 0] == i][0]
+
+
+ @staticmethod
+ def handle_proto_test(proto_list, oconfs, imgshape, img_orishape=None):
+ '''
+ proto_list: [(bs, 32, w, h), ...]
+ conf: (bs, l, 33) -> which_proto, 32
+ '''
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ conf = oconfs[..., 6:]
+ if conf.shape[0] == 0:
+ return None
+
+ xyxy = oconfs[..., :4]
+ confs = conf[..., 1:]
+ proto = proto_list[0]
+ s = proto.shape[-2:]
+ seg = ((confs@proto.reshape(proto.shape[0], proto.shape[1], -1)).reshape(proto.shape[0], confs.shape[0], *s))
+ seg = seg.sigmoid()
+ masks = F.interpolate(seg, imgshape, mode='bilinear', align_corners=False)[0]
+ if img_orishape:
+ masks_ori = F.interpolate(seg, img_orishape, mode='nearest')[0]
+ else:
+ masks_ori = None
+ masks = crop_mask(masks, xyxy).gt_(0.5)
+ return masks
+
+ # def handle_proto_solo(self, proto_list, oconfs, imgshape, weight_sums=66, bias_sums=66, dyconv=66, img_orishape=None):
+ # '''
+ # proto_list: [(bs, 32, w, h), ...]
+ # conf: (bs, l, 33) -> which_proto, 32
+ # '''
+ # def crop_mask(masks, boxes):
+ # """
+ # "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ # Vectorized by Chong (thanks Chong).
+
+ # Args:
+ # - masks should be a size [n, h, w] tensor of masks
+ # - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ # """
+
+ # n, h, w = masks.shape
+ # x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ # r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ # c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ # return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ # conf = oconfs[..., 6:]
+ # if conf.shape[0] == 0:
+ # return None
+
+ # xyxy = oconfs[..., :4]
+ # confs = conf[..., 1:]
+ # proto = proto_list[0]
+ # s = proto.shape[-2:]
+ # num_inst = confs.shape[0]
+ # proto = proto.reshape(1, -1, *proto.shape[-2:])
+ # proto = proto.repeat(num_inst, 1, 1, 1)
+ # weights, biases = self.parse_dynamic_params(confs, weight_nums=weight_sums, bias_nums=bias_sums, dyconv_channels=dyconv)
+ # n_layers = len(weights)
+ # for i, (weight, bias) in enumerate(zip(weights, biases)):
+ # x = F.conv2d(
+ # proto, weight, bias=bias, stride=1, padding=0, groups=num_inst)
+ # if i < n_layers - 1:
+ # x = F.relu(x)
+ # x = x.reshape(num_inst, *proto.shape[-2:])
+ # seg = x.sigmoid()
+ # masks = F.interpolate(seg, imgshape, mode='bilinear', align_corners=False)[0]
+ # if img_orishape:
+ # masks_ori = F.interpolate(seg, img_orishape, mode='nearest')[0]
+ # else:
+ # masks_ori = None
+ # masks = crop_mask(masks, xyxy).gt_(0.5)
+ # return masks
+ def handle_proto_solo(self, proto_list, oconfs, imgshape, weight_sums=66, bias_sums=1, dyconv=66, img_orishape=None):
+ '''
+ proto_list: [(bs, 32, w, h), ...]
+ conf: (bs, l, 33) -> which_proto, 32
+ '''
+ def handle_proto_coord(proto):
+ _ = proto.shape[-2:]
+ x = torch.arange(0, 1, step = 1 / _[1]).unsqueeze(0).unsqueeze(0).repeat(1, _[0], 1).to(proto.dtype).to(proto.device)
+ y = torch.arange(0, 1, step = 1 / _[0]).unsqueeze(0).T.unsqueeze(0).repeat(1, 1, _[1]).to(proto.dtype).to(proto.device)
+ return torch.cat([proto, x, y]).reshape(1, -1, *_)
+
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+ conf = oconfs[..., 6:]
+ if conf.shape[0] == 0:
+ return None
+
+ xyxy = oconfs[..., :4]
+ confs = conf[..., 1:]
+ proto = proto_list[0][0]
+ proto = handle_proto_coord(proto)
+ s = proto.shape[-2:]
+ num_inst = confs.shape[0]
+ proto = proto.reshape(1, -1, *proto.shape[-2:])
+ weights, biases = self.parse_dynamic_params(confs, weight_nums=weight_sums, bias_nums=bias_sums, dyconv_channels=dyconv)
+ n_layers = len(weights)
+ for i, (weight, bias) in enumerate(zip(weights, biases)):
+ x = F.conv2d(
+ proto, weight, bias=bias, stride=1, padding=0, groups=1)
+ if i < n_layers - 1:
+ x = F.relu(x)
+ x = x.reshape(num_inst, *proto.shape[-2:]).unsqueeze(0)
+ seg = x.sigmoid()
+ masks = F.interpolate(seg, imgshape, mode='bilinear', align_corners=False)[0]
+ if img_orishape:
+ masks_ori = F.interpolate(seg, img_orishape, mode='nearest')[0]
+ else:
+ masks_ori = None
+ masks = crop_mask(masks, xyxy).gt_(0.5)
+ masks = masks.gt_(0.5)
+ return masks
+
+
+
+
+
@staticmethod
def draw_text(
img,
@@ -237,9 +475,10 @@ def draw_text(
return text_size
@staticmethod
- def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX):
+ def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_color=(255, 255, 255), font=cv2.FONT_HERSHEY_COMPLEX, segment=None):
# Add one xyxy box to image with label
p1, p2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3]))
+ common_color = [[128,0,0], [255,0,0],[255,0,255],[255,102,0],[51,51,0],[0,51,0],[51,204,204],[0,128,128],[0,204,255]]
cv2.rectangle(image, p1, p2, color, thickness=lw, lineType=cv2.LINE_AA)
if label:
tf = max(lw - 1, 1) # font thickness
@@ -249,6 +488,13 @@ def plot_box_and_label(image, lw, box, label='', color=(128, 128, 128), txt_colo
cv2.rectangle(image, p1, p2, color, -1, cv2.LINE_AA) # filled
cv2.putText(image, label, (p1[0], p1[1] - 2 if outside else p1[1] + h + 2), font, lw / 3, txt_color,
thickness=tf, lineType=cv2.LINE_AA)
+ if segment is not None:
+ import random
+ ii=random.randint(0, len(common_color)-1)
+ colr = np.asarray(common_color[ii])
+ colr = colr.reshape(1,3).repeat((image.shape[0] * image.shape[1]), axis = 0).reshape(image.shape[0], image.shape[1], 3)
+ image = cv2.addWeighted(image, 1, (colr * segment.reshape(*segment.shape[:2], 1)).astype(image.dtype), 0.8, 1)
+ return image
@staticmethod
def font_check(font='./yolov6/utils/Arial.ttf', size=10):
@@ -280,6 +526,27 @@ def generate_colors(i, bgr=False):
num = len(palette)
color = palette[int(i) % num]
return (color[2], color[1], color[0]) if bgr else color
+
+ def parse_dynamic_params(self, flatten_kernels, weight_nums, bias_nums, dyconv_channels):
+ """split kernel head prediction to conv weight and bias."""
+ n_inst = flatten_kernels.size(0)
+ n_layers = len(weight_nums)
+ params_splits = list(
+ torch.split_with_sizes(
+ flatten_kernels, weight_nums + bias_nums, dim=1))
+ weight_splits = params_splits[:n_layers]
+ bias_splits = params_splits[n_layers:]
+ for i in range(n_layers):
+ if i < n_layers - 1:
+ weight_splits[i] = weight_splits[i].reshape(
+ n_inst * dyconv_channels, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst *
+ dyconv_channels)
+ else:
+ weight_splits[i] = weight_splits[i].reshape(n_inst, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst)
+
+ return weight_splits, bias_splits
class CalcFPS:
def __init__(self, nsamples: int = 50):
diff --git a/yolov6/data/data_augment.py b/yolov6/data/data_augment.py
index 45df88e6..e21c3873 100644
--- a/yolov6/data/data_augment.py
+++ b/yolov6/data/data_augment.py
@@ -26,7 +26,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
-def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=False, scaleup=True, stride=32):
'''Resize and pad image while meeting stride-multiple constraints.'''
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
@@ -51,19 +51,22 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleu
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+
return im, r, (left, top)
-def mixup(im, labels, im2, labels2):
+def mixup(im, labels, segments, im2, labels2, segments2):
'''Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf.'''
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
im = (im * r + im2 * (1 - r)).astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
- return im, labels
+ segments = np.concatenate((segments, segments2), 0)
+ return im, labels, segments
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
@@ -78,19 +81,17 @@ def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
new_shape=(640, 640)):
'''Applies Random affine transformation.'''
n = len(labels)
- if isinstance(new_shape, int):
- height = width = new_shape
- else:
- height, width = new_shape
+ height, width = new_shape
M, s = get_transform_matrix(img.shape[:2], (height, width), degrees, scale, shear, translate)
if (M != np.eye(3)).any(): # image changed
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
# Transform label coordinates
+ new_segments = []
if n:
new = np.zeros((n, 4))
-
+
xy = np.ones((n * 4, 3))
xy[:, :2] = labels[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
@@ -113,6 +114,7 @@ def random_affine(img, labels=(), degrees=10, translate=.1, scale=.1, shear=10,
return img, labels
+
def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate):
new_height, new_width = new_shape
# Center
@@ -147,6 +149,7 @@ def mosaic_augmentation(shape, imgs, hs, ws, labels, hyp, specific_shape = False
'''Applies Mosaic augmentation.'''
assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images."
labels4 = []
+
if not specific_shape:
if isinstance(shape, list) or isinstance(shape, np.ndarray):
target_height, target_width = shape
@@ -180,15 +183,18 @@ def mosaic_augmentation(shape, imgs, hs, ws, labels, hyp, specific_shape = False
# Labels
labels_per_img = labels[i].copy()
+
if labels_per_img.size:
boxes = np.copy(labels_per_img[:, 1:])
boxes[:, 0] = w * (labels_per_img[:, 1] - labels_per_img[:, 3] / 2) + padw # top left x
boxes[:, 1] = h * (labels_per_img[:, 2] - labels_per_img[:, 4] / 2) + padh # top left y
boxes[:, 2] = w * (labels_per_img[:, 1] + labels_per_img[:, 3] / 2) + padw # bottom right x
boxes[:, 3] = h * (labels_per_img[:, 2] + labels_per_img[:, 4] / 2) + padh # bottom right y
+
labels_per_img[:, 1:] = boxes
labels4.append(labels_per_img)
+
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
@@ -196,6 +202,7 @@ def mosaic_augmentation(shape, imgs, hs, ws, labels, hyp, specific_shape = False
# np.clip(x, 0, 2 * s, out=x)
labels4[:, 1::2] = np.clip(labels4[:, 1::2], 0, 2 * target_width)
labels4[:, 2::2] = np.clip(labels4[:, 2::2], 0, 2 * target_height)
+
# Augment
img4, labels4 = random_affine(img4, labels4,
@@ -205,4 +212,4 @@ def mosaic_augmentation(shape, imgs, hs, ws, labels, hyp, specific_shape = False
shear=hyp['shear'],
new_shape=(target_height, target_width))
- return img4, labels4
+ return img4, labels4
\ No newline at end of file
diff --git a/yolov6/data/data_load.py b/yolov6/data/data_load.py
index e68e8d71..923ab1f2 100644
--- a/yolov6/data/data_load.py
+++ b/yolov6/data/data_load.py
@@ -7,7 +7,7 @@
import torch.distributed as dist
from torch.utils.data import dataloader, distributed
-from .datasets import TrainValDataset
+from .seg_datasets import TrainValDataset
from yolov6.utils.events import LOGGER
from yolov6.utils.torch_utils import torch_distributed_zero_first
diff --git a/yolov6/data/seg_data_augment.py b/yolov6/data/seg_data_augment.py
new file mode 100644
index 00000000..6a2c87b6
--- /dev/null
+++ b/yolov6/data/seg_data_augment.py
@@ -0,0 +1,298 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+# This code is based on
+# https://github.com/ultralytics/yolov5/blob/master/utils/dataloaders.py
+
+import math
+import random
+
+import cv2
+import numpy as np
+
+
+def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
+ '''HSV color-space augmentation.'''
+ if hgain or sgain or vgain:
+ r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
+ hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
+ dtype = im.dtype # uint8
+
+ x = np.arange(0, 256, dtype=r.dtype)
+ lut_hue = ((x * r[0]) % 180).astype(dtype)
+ lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
+ lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
+
+ im_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val)))
+ cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR, dst=im) # no return needed
+
+
+def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
+ '''Resize and pad image while meeting stride-multiple constraints.'''
+ shape = im.shape[:2] # current shape [height, width]
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+ elif isinstance(new_shape, list) and len(new_shape) == 1:
+ new_shape = (new_shape[0], new_shape[0])
+
+ # Scale ratio (new / old)
+ r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+ if not scaleup: # only scale down, do not scale up (for better val mAP)
+ r = min(r, 1.0)
+
+ # Compute padding
+ new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
+ dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
+
+ if auto: # minimum rectangle
+ dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
+
+ dw /= 2 # divide padding into 2 sides
+ dh /= 2
+
+ if shape[::-1] != new_unpad: # resize
+ im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
+ top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+ left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+ im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
+
+ return im, r, (left, top)
+
+
+def mixup(im, labels, segments, im2, labels2, segments2):
+ # Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
+ r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
+ im = (im * r + im2 * (1 - r)).astype(np.uint8)
+ labels = np.concatenate((labels, labels2), 0)
+ segments = np.concatenate((segments, segments2), 0)
+ return im, labels, segments
+
+
+def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
+ '''Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio.'''
+ w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
+ w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
+ ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
+ return (w2 > wh_thr) & (h2 > wh_thr) & (ar < ar_thr) # candidates
+
+
+def random_affine(img, labels=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10,
+ new_shape=(640, 640), task=""):
+ '''Applies Random affine transformation.'''
+ n = len(labels)
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+ height, width = new_shape
+ # print(height, width, (height, width))
+
+ M, s = get_transform_matrix(img.shape[:2], (height, width), degrees, scale, shear, translate)
+ if (M != np.eye(3)).any(): # image changed
+ img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(114, 114, 114))
+
+ new_segments = []
+ # Transform label coordinates
+ if n:
+ new = np.zeros((n, 4))
+ segments = resample_segments(segments)
+ for i, segment in enumerate(segments):
+ xy = np.ones((len(segment), 3))
+ xy[:, :2] = segment
+ xy = xy @ M.T # transform
+ xy = (xy[:, :2])
+
+ # clip
+ new[i] = segment2box(xy, width, height)
+ new_segments.append(xy)
+ i = box_candidates(box1=labels[:, 1:5].T * s, box2=new.T, area_thr=0.01)
+ if task!="val":
+ labels = labels[i]
+ labels[:, 1:5] = new[i]
+ new_segments = np.array(new_segments)[i]
+ else:
+ labels[:, 1:5] = new
+ new_segments = np.array(new_segments)
+ return img, labels, new_segments
+
+def copy_paste(im, labels, segments, p=0.5):
+ # Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
+ n = len(segments)
+ if p and n:
+ h, w, c = im.shape # height, width, channels
+ im_new = np.zeros(im.shape, np.uint8)
+ for j in random.sample(range(n), k=round(p * n)):
+ l, s = labels[j], segments[j]
+ box = w - l[3], l[2], w - l[1], l[4]
+ ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
+ if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
+ labels = np.concatenate((labels, [[l[0], *box]]), 0)
+ segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
+ cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (1, 1, 1), cv2.FILLED)
+ result = cv2.flip(im, 1) # augment segments (flip left-right)
+ i = cv2.flip(im_new, 1).astype(bool)
+ im[i] = result[i] # cv2.imwrite('debug.jpg', im) # debug
+
+ return im, labels, segments
+
+def bbox_ioa(box1, box2, eps=1e-7):
+ """ Returns the intersection over box2 area given box1, box2. Boxes are x1y1x2y2
+ box1: np.array of shape(4)
+ box2: np.array of shape(nx4)
+ returns: np.array of shape(n)
+ """
+
+ # Get the coordinates of bounding boxes
+ b1_x1, b1_y1, b1_x2, b1_y2 = box1
+ b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
+
+ # Intersection area
+ inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
+ (np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
+
+ # box2 area
+ box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
+
+ # Intersection over box2 area
+ return inter_area / box2_area
+
+
+def regen_labels(labels=None, segments=None, new_shape=(640, 640)):
+ '''Applies Random affine transformation.'''
+ n = len(segments)
+ if isinstance(new_shape, int):
+ new_shape = (new_shape, new_shape)
+ height, width = new_shape
+
+ new_segments = []
+ # Transform label coordinates
+ if n:
+ new = np.zeros((n, 4))
+ segments = resample_segments(segments)
+ for i, segment in enumerate(segments):
+ new[i] = segment2box(segment, width, height)
+ new_segments.append(segment)
+ labels[:, 1:5] = new[i]
+ new_segments = np.array(new_segments)[i]
+
+ return labels, new_segments
+
+def resample_segments(segments, n=1000):
+ # Up-sample an (n,2) segment
+ for i, s in enumerate(segments):
+ s = np.concatenate((s, s[0:1, :]), axis=0)
+ x = np.linspace(0, len(s) - 1, n)
+ xp = np.arange(len(s))
+ segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
+ return segments
+
+
+def get_transform_matrix(img_shape, new_shape, degrees, scale, shear, translate):
+ new_height, new_width = new_shape
+ # print(new_height, new_width)
+ # Center
+ C = np.eye(3)
+ C[0, 2] = -img_shape[1] / 2 # x translation (pixels)
+ C[1, 2] = -img_shape[0] / 2 # y translation (pixels)
+
+ # Rotation and Scale
+ R = np.eye(3)
+ a = random.uniform(-degrees, degrees)
+ # a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
+ s = random.uniform(1 - scale, 1 + scale)
+ # s = 2 ** random.uniform(-scale, scale)
+ R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
+
+ # Shear
+ S = np.eye(3)
+ S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
+ S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
+
+ # Translation
+ T = np.eye(3)
+ T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_width # x translation (pixels)
+ T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * new_height # y transla ion (pixels)
+
+ # Combined rotation matrix
+ M = T @ S @ R @ C # order of operations (right to left) is IMPORTANT
+ return M, s
+
+
+def mosaic_augmentation(shape, imgs, hs, ws, labels, segments, hyp, specific_shape = False, target_height=640, target_width=640):
+ '''Applies Mosaic augmentation.'''
+ assert len(imgs) == 4, "Mosaic augmentation of current version only supports 4 images."
+ labels4 = []
+ segments4 = []
+ if not specific_shape:
+ if isinstance(shape, list) or isinstance(shape, np.ndarray):
+ target_height, target_width = shape
+ else:
+ target_height = target_width = shape
+
+ yc, xc = (int(random.uniform(x//2, 3*x//2)) for x in (target_height, target_width) ) # mosaic center x, y
+
+ for i in range(len(imgs)):
+ # Load image
+ img, h, w = imgs[i], hs[i], ws[i]
+ # place img in img4
+ if i == 0: # top left
+ img4 = np.full((target_height * 2, target_width * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
+
+ x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
+ elif i == 1: # top right
+ x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, target_width * 2), yc
+ x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
+ elif i == 2: # bottom left
+ x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(target_height * 2, yc + h)
+ x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
+ elif i == 3: # bottom right
+ x1a, y1a, x2a, y2a = xc, yc, min(xc + w, target_width * 2), min(target_height * 2, yc + h)
+ x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
+
+ img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
+ padw = x1a - x1b
+ padh = y1a - y1b
+
+ # Labels
+ labels_per_img = labels[i].copy()
+ segments_per_img = segments[i].copy()
+ if labels_per_img.size:
+ boxes = np.copy(labels_per_img[:, 1:])
+ boxes[:, 0] = w * (labels_per_img[:, 1] - labels_per_img[:, 3] / 2) + padw # top left x
+ boxes[:, 1] = h * (labels_per_img[:, 2] - labels_per_img[:, 4] / 2) + padh # top left y
+ boxes[:, 2] = w * (labels_per_img[:, 1] + labels_per_img[:, 3] / 2) + padw # bottom right x
+ boxes[:, 3] = h * (labels_per_img[:, 2] + labels_per_img[:, 4] / 2) + padh # bottom right y
+ for __ in range(len(segments_per_img)):
+ segments_per_img[__][:, 0] = w * segments_per_img[__][:, 0] + padw
+ segments_per_img[__][:, 1] = h * segments_per_img[__][:, 1] + padh
+ labels_per_img[:, 1:] = boxes
+
+ labels4.append(labels_per_img)
+ segments4.extend(segments_per_img)
+
+ # Concat/clip labels
+ labels4 = np.concatenate(labels4, 0)
+ # for x in (labels4[:, 1:]):
+ # np.clip(x, 0, 2 * s, out=x)
+ labels4[:, 1::2] = np.clip(labels4[:, 1::2], 0, 2 * target_width)
+ labels4[:, 2::2] = np.clip(labels4[:, 2::2], 0, 2 * target_height)
+ for __ in range(len(segments4)):
+ segments4[__][:, 0] = np.clip(segments4[__][:, 0], 0, 2 * target_width)
+ segments4[__][:, 1] = np.clip(segments4[__][:, 1], 0, 2 * target_height)
+
+ # Augment
+ return img4, labels4, segments4
+ img4, labels4, segments4 = random_affine(img4, labels4, segments4,
+ degrees=hyp['degrees'],
+ translate=hyp['translate'],
+ scale=hyp['scale'],
+ shear=hyp['shear'],
+ new_shape=(target_height, target_width))
+
+ return img4, labels4, segments4
+
+def segment2box(segment, width=640, height=640):
+ # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
+ x, y = segment.T # segment xy
+ inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
+ x, y, = x[inside], y[inside]
+ return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # xyxy
+
diff --git a/yolov6/data/seg_datasets.py b/yolov6/data/seg_datasets.py
new file mode 100644
index 00000000..8cca6513
--- /dev/null
+++ b/yolov6/data/seg_datasets.py
@@ -0,0 +1,859 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import glob
+from io import UnsupportedOperation
+import os
+import os.path as osp
+import random
+import json
+import time
+import hashlib
+from pathlib import Path
+import copy
+
+from multiprocessing.pool import Pool
+
+import cv2
+import numpy as np
+from tqdm import tqdm
+from PIL import ExifTags, Image, ImageOps
+
+import torch
+from torch.utils.data import Dataset
+import torch.distributed as dist
+
+from .seg_data_augment import (
+ augment_hsv,
+ letterbox,
+ mixup,
+ random_affine,
+ mosaic_augmentation,
+ copy_paste
+)
+from yolov6.utils.events import LOGGER
+import pickle
+
+
+# Parameters
+IMG_FORMATS = ["bmp", "jpg", "jpeg", "png", "tif", "tiff", "dng", "webp", "mpo"]
+VID_FORMATS = ["mp4", "mov", "avi", "mkv"]
+IMG_FORMATS.extend([f.upper() for f in IMG_FORMATS])
+VID_FORMATS.extend([f.upper() for f in VID_FORMATS])
+# Get orientation exif tag
+for k, v in ExifTags.TAGS.items():
+ if v == "Orientation":
+ ORIENTATION = k
+ break
+
+def img2label_paths(img_paths):
+ # Define label paths as a function of image paths
+ sa, sb = f'{os.sep}images{os.sep}', f'{os.sep}labels{os.sep}' # /images/, /labels/ substrings
+ return [sb.join(x.rsplit(sa, 1)).rsplit('.', 1)[0] + '.txt' for x in img_paths]
+
+class TrainValDataset(Dataset):
+ '''YOLOv6 train_loader/val_loader, loads images and labels for training and validation.'''
+ def __init__(
+ self,
+ img_dir,
+ img_size=640,
+ batch_size=16,
+ augment=False,
+ hyp=None,
+ rect=False,
+ check_images=False,
+ check_labels=False,
+ stride=32,
+ pad=0.0,
+ rank=-1,
+ data_dict=None,
+ task="train",
+ specific_shape = False,
+ height=1088,
+ width=1920,
+ downsample_ratio=4,
+ overlap=False
+ ):
+ assert task.lower() in ("train", "val", "test", "speed"), f"Not supported task: {task}"
+
+ t1 = time.time()
+ self.__dict__.update(locals())
+ if task.lower()!="train":
+ self.downsample_ratio = 1
+ self.main_process = self.rank in (-1, 0)
+ self.task = self.task.capitalize()
+ self.class_names = data_dict["names"]
+ self.img_paths, self.labels = self.get_imgs_labels(self.img_dir)
+ self.labels, self.segments = self.get_segment(self.labels)
+
+ self.rect = rect
+ self.specific_shape = specific_shape
+ self.target_height = height
+ self.target_width = width
+ if self.rect:
+ shapes = [self.img_info[p]["shape"] for p in self.img_paths]
+ self.shapes = np.array(shapes, dtype=np.float64)
+ if dist.is_initialized():
+ # in DDP mode, we need to make sure all images within batch_size * gpu_num
+ # will resized and padded to same shape.
+ sample_batch_size = self.batch_size * dist.get_world_size()
+ else:
+ sample_batch_size = self.batch_size
+ self.batch_indices = np.floor(
+ np.arange(len(shapes)) / sample_batch_size
+ ).astype(
+ np.int_
+ ) # batch indices of each image
+
+ self.sort_files_shapes()
+
+ t2 = time.time()
+ if self.main_process:
+ LOGGER.info(f"%.1fs for dataset initialization." % (t2 - t1))
+
+ def __len__(self):
+ """Get the length of dataset"""
+ return len(self.img_paths)
+
+ def __getitem__(self, index):
+ """Fetching a data sample for a given key.
+ This function applies mosaic and mixup augments during training.
+ During validation, letterbox augment is applied.
+ """
+ target_shape = (
+ (self.target_height, self.target_width) if self.specific_shape else
+ self.batch_shapes[self.batch_indices[index]] if self.rect
+ else self.img_size
+ )
+
+ # Mosaic Augmentation
+ if self.augment and random.random() < self.hyp["mosaic"]:
+ img, labels, segments = self.get_mosaic(index, target_shape)
+ shapes = None
+
+
+ # MixUp augmentation
+ if random.random() < self.hyp["mixup"]:
+ img_other, labels_other, segments_other = self.get_mosaic(
+ random.randint(0, len(self.img_paths) - 1), target_shape
+ )
+ img, labels, segments = mixup(img, labels, segments, img_other, labels_other, segments_other) # To Change
+
+ else:
+ # Load image
+ if self.hyp and "shrink_size" in self.hyp:
+ img, (h0, w0), (h, w) = self.load_image(index, self.hyp["shrink_size"])
+ else:
+ img, (h0, w0), (h, w) = self.load_image(index)
+
+ # letterbox
+ img, ratio, pad = letterbox(img, target_shape, auto=False, scaleup=self.augment)
+ shapes = (h0, w0), ((h * ratio / h0, w * ratio / w0), pad) # for COCO mAP rescaling
+ labels = copy.deepcopy(self.labels[index])
+ segments = copy.deepcopy(self.segments[index])
+
+ if labels.size:
+ w *= ratio
+ h *= ratio
+ # new boxes
+ boxes = np.copy(labels[:, 1:5])
+ boxes[:, 0] = (
+ w * (labels[:, 1] - labels[:, 3] / 2) + pad[0]
+ ) # top left x
+ boxes[:, 1] = (
+ h * (labels[:, 2] - labels[:, 4] / 2) + pad[1]
+ ) # top left y
+ boxes[:, 2] = (
+ w * (labels[:, 1] + labels[:, 3] / 2) + pad[0]
+ ) # bottom right x
+ boxes[:, 3] = (
+ h * (labels[:, 2] + labels[:, 4] / 2) + pad[1]
+ ) # bottom right y
+ labels[:, 1:] = boxes
+
+ if len(segments):
+ for i_s in range(len(segments)):
+ segments[i_s][:, 0] = segments[i_s][:, 0] * ratio * w + pad[0]
+ segments[i_s][:, 1] = segments[i_s][:, 1] * ratio * h + pad[1]
+
+ if self.augment:
+ img, labels, segments = random_affine(
+ img,
+ labels,
+ segments,
+ degrees=self.hyp["degrees"],
+ translate=self.hyp["translate"],
+ scale=self.hyp["scale"],
+ shear=self.hyp["shear"],
+ new_shape=target_shape,
+ )
+ else:
+ img, labels, segments = random_affine(
+ img,
+ labels,
+ segments,
+ degrees=0,
+ translate=0,
+ scale=0,
+ shear=0,
+ new_shape=target_shape,
+ task="val"
+ )
+
+
+ if len(labels):
+ h, w = img.shape[:2]
+
+ labels[:, [1, 3]] = labels[:, [1, 3]].clip(0, w - 1e-3) # x1, x2
+ labels[:, [2, 4]] = labels[:, [2, 4]].clip(0, h - 1e-3) # y1, y2
+
+ boxes = np.copy(labels[:, 1:])
+ boxes[:, 0] = ((labels[:, 1] + labels[:, 3]) / 2) / w # x center
+ boxes[:, 1] = ((labels[:, 2] + labels[:, 4]) / 2) / h # y center
+ boxes[:, 2] = (labels[:, 3] - labels[:, 1]) / w # width
+ boxes[:, 3] = (labels[:, 4] - labels[:, 2]) / h # height
+ labels[:, 1:] = boxes
+ lindex = labels[:, 0] >= 0
+ masks = self.polygons2masks(img.shape[:2], segments, color=1, downsample_ratio=self.downsample_ratio)
+ labels = labels[lindex]
+ masks = masks[lindex]
+
+ else:
+ masks = np.asarray([])
+
+ if self.augment:
+ img, labels, masks = self.general_augment(img, labels, masks.transpose(1, 2, 0) if masks.shape[0]!=0 else masks)
+
+ #?
+
+ masks_out = (torch.from_numpy(masks.copy()) if len(masks) else torch.zeros(1 if self.overlap else len(labels), img.shape[0] //
+ self.downsample_ratio, img.shape[1] //
+ self.downsample_ratio))
+
+ labels_out = torch.zeros((len(labels), 6))
+ if len(labels):
+ labels_out[:, 1:] = torch.from_numpy(labels)
+
+ # Convert
+ # self.drawit(img, labels, masks, self.img_paths[index], self.task)
+ img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
+ img = np.ascontiguousarray(img)
+ return torch.from_numpy(img), labels_out, self.img_paths[index], shapes, masks_out
+
+ def load_image(self, index, shrink_size=None):
+ """Load image.
+ This function loads image by cv2, resize original image to target shape(img_size) with keeping ratio.
+
+ Returns:
+ Image, original shape of image, resized image shape
+ """
+ path = self.img_paths[index]
+ try:
+ im = cv2.imread(path)
+ assert im is not None, f"opencv cannot read image correctly or {path} not exists"
+ except:
+ im = cv2.cvtColor(np.asarray(Image.open(path)), cv2.COLOR_RGB2BGR)
+ assert im is not None, f"Image Not Found {path}, workdir: {os.getcwd()}"
+
+ h0, w0 = im.shape[:2] # origin shape
+ if self.specific_shape:
+ # keep ratio resize
+ ratio = min(self.target_width / w0, self.target_height / h0)
+
+ elif shrink_size:
+ ratio = (self.img_size - shrink_size) / max(h0, w0)
+
+ else:
+ ratio = self.img_size / max(h0, w0)
+
+ if ratio != 1:
+ im = cv2.resize(
+ im,
+ (int(w0 * ratio), int(h0 * ratio)),
+ interpolation=cv2.INTER_AREA
+ if ratio < 1 and not self.augment
+ else cv2.INTER_LINEAR,
+ )
+ return im, (h0, w0), im.shape[:2]
+
+ @staticmethod
+ def collate_fn(batch):
+ """Merges a list of samples to form a mini-batch of Tensor(s)"""
+ img, label, path, shapes, masks = zip(*batch)
+ for i, l in enumerate(label):
+ l[:, 0] = i # add target image index for build_targets()
+ return torch.stack(img, 0), torch.cat(label, 0), path, shapes, torch.cat(masks, 0)
+
+ @staticmethod
+ def get_segment(labels):
+ rlabels = []
+ segments = []
+ if len(labels) == 0:
+ return np.asarray([])
+ for label in labels:
+ z1 = []#labels
+ z2 = []#seg
+ for l in label:
+ z1.append(np.asarray(l[:5]).reshape(1, 5).astype(np.float32))
+ z2.append(np.asarray(l[1:]).reshape(-1, 2).astype(np.float32))
+ if z1:
+ rlabels.append(np.concatenate(z1, axis = 0))
+ segments.append(z2)
+ else:
+ t = np.zeros((1, 5), dtype = np.float32)
+ t[..., 0]= -1
+ rlabels.append(t)
+ segments.append([np.zeros((2, 2), dtype = np.float32)])
+ return rlabels, segments
+
+
+
+
+ def get_imgs_labels(self, img_dirs):
+ if not isinstance(img_dirs, list):
+ img_dirs = [img_dirs]
+ # we store the cache img file in the first directory of img_dirs
+ valid_img_record = osp.join(
+ osp.dirname(img_dirs[0]), "." + osp.basename(img_dirs[0]) + "_cache.json"
+ )
+ NUM_THREADS = min(8, os.cpu_count())
+ img_paths = []
+ for img_dir in img_dirs:
+ assert osp.exists(img_dir), f"{img_dir} is an invalid directory path!"
+ img_paths += glob.glob(osp.join(img_dir, "**/*"), recursive=True)
+
+ img_paths = sorted(
+ p for p in img_paths if p.split(".")[-1].lower() in IMG_FORMATS and os.path.isfile(p)
+ )
+
+ assert img_paths, f"No images found in {img_dir}."
+ img_hash = self.get_hash(img_paths)
+ LOGGER.info(f'img record infomation path is:{valid_img_record}')
+ if osp.exists(valid_img_record):
+ with open(valid_img_record, "r") as f:
+ cache_info = json.load(f)
+ if "image_hash" in cache_info and cache_info["image_hash"] == img_hash:
+ img_info = cache_info["information"]
+ else:
+ self.check_images = True
+ else:
+ self.check_images = True
+
+ # check images
+ if self.check_images and self.main_process:
+ img_info = {}
+ nc, msgs = 0, [] # number corrupt, messages
+ LOGGER.info(
+ f"{self.task}: Checking formats of images with {NUM_THREADS} process(es): "
+ )
+ with Pool(NUM_THREADS) as pool:
+ pbar = tqdm(
+ pool.imap(TrainValDataset.check_image, img_paths),
+ total=len(img_paths),
+ )
+ for img_path, shape_per_img, nc_per_img, msg in pbar:
+ if nc_per_img == 0: # not corrupted
+ img_info[img_path] = {"shape": shape_per_img}
+ nc += nc_per_img
+ if msg:
+ msgs.append(msg)
+ pbar.desc = f"{nc} image(s) corrupted"
+ pbar.close()
+ if msgs:
+ LOGGER.info("\n".join(msgs))
+
+ cache_info = {"information": img_info, "image_hash": img_hash}
+ # save valid image paths.
+ with open(valid_img_record, "w") as f:
+ json.dump(cache_info, f)
+
+ # check and load anns
+
+ img_paths = list(img_info.keys())
+ label_paths = img2label_paths(img_paths)
+ assert label_paths, f"No labels found."
+ label_hash = self.get_hash(label_paths)
+ if "label_hash" not in cache_info or cache_info["label_hash"] != label_hash:
+ self.check_labels = True
+
+ if self.check_labels:
+ cache_info["label_hash"] = label_hash
+ nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number corrupt, messages
+ LOGGER.info(
+ f"{self.task}: Checking formats of labels with {NUM_THREADS} process(es): "
+ )
+ with Pool(NUM_THREADS) as pool:
+ pbar = pool.imap(
+ TrainValDataset.check_label_files, zip(img_paths, label_paths)
+ )
+ pbar = tqdm(pbar, total=len(label_paths)) if self.main_process else pbar
+ for (
+ img_path,
+ labels_per_file,
+ nc_per_file,
+ nm_per_file,
+ nf_per_file,
+ ne_per_file,
+ msg,
+ ) in pbar:
+ if nc_per_file == 0:
+ img_info[img_path]["labels"] = labels_per_file
+ else:
+ img_info.pop(img_path)
+ nc += nc_per_file
+ nm += nm_per_file
+ nf += nf_per_file
+ ne += ne_per_file
+ if msg:
+ msgs.append(msg)
+ if self.main_process:
+ pbar.desc = f"{nf} label(s) found, {nm} label(s) missing, {ne} label(s) empty, {nc} invalid label files"
+ if self.main_process:
+ pbar.close()
+ with open(valid_img_record, "w") as f:
+ json.dump(cache_info, f)
+ if msgs:
+ LOGGER.info("\n".join(msgs))
+ if nf == 0:
+ LOGGER.warning(
+ f"WARNING: No labels found in {osp.dirname(img_paths[0])}. "
+ )
+
+ if self.task.lower() == "val":
+ if self.data_dict.get("is_coco", False): # use original json file when evaluating on coco dataset.
+ assert osp.exists(self.data_dict["anno_path"]), "Eval on coco dataset must provide valid path of the annotation file in config file: data/coco.yaml"
+ else:
+ assert (
+ self.class_names
+ ), "Class names is required when converting labels to coco format for evaluating."
+ save_dir = osp.join(osp.dirname(osp.dirname(img_dirs[0])), "annotations")
+ if not osp.exists(save_dir):
+ os.mkdir(save_dir)
+ save_path = osp.join(
+ save_dir, "instances_" + osp.basename(img_dirs[0]) + ".json"
+ )
+ TrainValDataset.generate_coco_format_labels(
+ img_info, self.class_names, save_path
+ )
+
+ # img_paths, labels = list(
+ # zip(
+ # *[
+ # (
+ # img_path,
+ # np.array(info["labels"], dtype=np.float32)
+ # if info["labels"]
+ # else np.zeros((0, 5), dtype=np.float32),
+ # )
+ # for img_path, info in img_info.items()
+ # ]
+ # )
+ # )
+ img_paths, labels = list(
+ zip(
+ *[
+ (
+ img_path,
+ info["labels"]
+ if info["labels"]
+ else [],
+ )
+ for img_path, info in img_info.items()
+ ]
+ )
+ )
+ self.img_info = img_info
+ LOGGER.info(
+ f"{self.task}: Final numbers of valid images: {len(img_paths)}/ labels: {len(labels)}. "
+ )
+ return img_paths, labels
+
+ def get_mosaic(self, index, shape):
+ """Gets images and labels after mosaic augments"""
+ indices = [index] + random.choices(
+ range(0, len(self.img_paths)), k=3
+ ) # 3 additional image indices
+ random.shuffle(indices)
+ imgs, hs, ws, labels, segments = [], [], [], [], []
+ for index in indices:
+ img, _, (h, w) = self.load_image(index)
+ labels_per_img = self.labels[index]
+ segments_per_img = copy.deepcopy(self.segments[index])
+ imgs.append(img)
+ hs.append(h)
+ ws.append(w)
+ labels.append(labels_per_img)
+ segments.append(segments_per_img)
+ img, labels, segments = mosaic_augmentation(shape, imgs, hs, ws, labels, segments, self.hyp, self.specific_shape, self.target_height, self.target_width)
+ img, labels, segments = copy_paste(img, labels, segments, 0)
+ img, labels, segments = random_affine(img, labels, segments,
+ degrees=self.hyp['degrees'],
+ translate=self.hyp['translate'],
+ scale=self.hyp['scale'],
+ shear=self.hyp['shear'],
+ new_shape=shape if not self.specific_shape else (self.target_height, self.target_width))
+ return img, labels, segments
+
+ def general_augment(self, img, labels, segments):
+ """Gets images and labels after general augment
+ This function applies hsv, random ud-flip and random lr-flips augments.
+ """
+ nl = len(labels)
+
+ # HSV color-space
+ augment_hsv(
+ img,
+ hgain=self.hyp["hsv_h"],
+ sgain=self.hyp["hsv_s"],
+ vgain=self.hyp["hsv_v"],
+ )
+
+ # Flip up-down
+ if random.random() < self.hyp["flipud"]:
+ img = np.flipud(img)
+ if nl:
+ segments = np.flipud(segments)
+ labels[:, 2] = 1 - labels[:, 2]
+
+ # Flip left-right
+ if random.random() < self.hyp["fliplr"]:
+ img = np.fliplr(img)
+ if nl:
+ segments = np.fliplr(segments)
+ labels[:, 1] = 1 - labels[:, 1]
+
+ return img, labels, segments.transpose(2, 0, 1) if segments.shape[0]!=0 else segments
+
+ def sort_files_shapes(self):
+ '''Sort by aspect ratio.'''
+ batch_num = self.batch_indices[-1] + 1
+ s = self.shapes # [height, width]
+ ar = s[:, 1] / s[:, 0] # aspect ratio
+ irect = ar.argsort()
+ self.img_paths = [self.img_paths[i] for i in irect]
+ self.labels = [self.labels[i] for i in irect]
+ self.segments = [self.segments[i] for i in irect]
+ self.shapes = s[irect] # wh
+ ar = ar[irect]
+
+ # Set training image shapes
+ shapes = [[1, 1]] * batch_num
+ for i in range(batch_num):
+ ari = ar[self.batch_indices == i]
+ mini, maxi = ari.min(), ari.max()
+ if maxi < 1:
+ shapes[i] = [1, maxi]
+ elif mini > 1:
+ shapes[i] = [1 / mini, 1]
+ self.batch_shapes = (
+ np.ceil(np.array(shapes) * self.img_size / self.stride + self.pad).astype(
+ np.int_
+ )
+ * self.stride
+ )
+
+ @staticmethod
+ def check_image(im_file):
+ '''Verify an image.'''
+ nc, msg = 0, ""
+ try:
+ im = Image.open(im_file)
+ im.verify() # PIL verify
+ im = Image.open(im_file) # need to reload the image after using verify()
+ shape = (im.height, im.width) # (height, width)
+ try:
+ im_exif = im._getexif()
+ if im_exif and ORIENTATION in im_exif:
+ rotation = im_exif[ORIENTATION]
+ if rotation in (6, 8):
+ shape = (shape[1], shape[0])
+ except:
+ im_exif = None
+
+ assert (shape[0] > 9) & (shape[1] > 9), f"image size {shape} <10 pixels"
+ assert im.format.lower() in IMG_FORMATS, f"invalid image format {im.format}"
+ if im.format.lower() in ("jpg", "jpeg"):
+ with open(im_file, "rb") as f:
+ f.seek(-2, 2)
+ if f.read() != b"\xff\xd9": # corrupt JPEG
+ ImageOps.exif_transpose(Image.open(im_file)).save(
+ im_file, "JPEG", subsampling=0, quality=100
+ )
+ msg += f"WARNING: {im_file}: corrupt JPEG restored and saved"
+ return im_file, shape, nc, msg
+ except Exception as e:
+ nc = 1
+ msg = f"WARNING: {im_file}: ignoring corrupt image: {e}"
+ return im_file, None, nc, msg
+
+ @staticmethod
+ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
+ # Convert normalized segments into pixel segments, shape (n,2)
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+ y[..., 0] = w * x[..., 0] + padw # top left x
+ y[..., 1] = h * x[..., 1] + padh # top left y
+ return y
+
+ @staticmethod
+ def drawit(img, labels, masks, imgname = "", task = ""):
+ # Convert normalized segments into pixel segments, shape (n,2)
+ # There are some bugs in Val!
+ if task == "Val":
+ return 0
+ import copy
+
+ spsp = copy.deepcopy(img)
+ for label in labels:
+ xy = label[1:3] * np.asarray(img.shape[:2])[::-1]
+ wh = label[3:5] * np.asarray(img.shape[:2])[::-1]
+ pt1 = (xy - wh / 2).astype(np.int_)
+ pt2 = (xy + wh / 2).astype(np.int_)
+ cv2.rectangle(spsp, pt1, pt2, (0,255,255), 1)
+ ssss = random.randint(0,100000000)
+ for mask in masks:
+ if mask.shape[:2]!=(img.shape[0], img.shape[1]):
+ m = cv2.resize(mask,(img.shape[0], img.shape[1]))
+ else:
+ m = mask
+ m = m.reshape(img.shape[0], img.shape[1], 1)
+ q = np.ones((img.shape[0], img.shape[1], 1), dtype = np.int_) * 255 * m
+ q = q * m
+ s = np.zeros((img.shape[0], img.shape[1], 2))
+ s = np.concatenate([s, q], axis = 2)
+ spsp = cv2.addWeighted(spsp, 1, s.astype(np.int_), 0.5, 0, dtype=cv2.CV_8U)
+ print(img.shape, labels.shape, masks.shape)
+ try:
+ print(cv2.imwrite("/home/hadoop-seccv/ssd/wangzhaonian/yolov6_seg/test_img/{}.jpg".format(ssss), spsp))
+ print(imgname, ssss, len(labels), len(masks))
+ except:
+ print("?")
+
+
+ @staticmethod
+ def check_label_files(args):
+ img_path, lb_path = args
+ nm, nf, ne, nc, msg = 0, 0, 0, 0, "" # number (missing, found, empty, message
+ try:
+ if osp.exists(lb_path):
+ nf = 1 # label found
+ with open(lb_path, "r") as f:
+ labels = [
+ x.split() for x in f.read().strip().splitlines() if len(x) > 5 # get which has seg
+ ]
+ # labels = np.array(labels, dtype=np.float32)
+ if len(labels):
+ # assert all(
+ # len(l) >= 5 for l in labels
+ # ), f"{lb_path}: wrong label format."
+ # assert (
+ # labels >= 0
+ # ).all(), f"{lb_path}: Label values error: all values in label file must > 0"
+ # assert (
+ # labels[:, 1:] <= 1
+ # ).all(), f"{lb_path}: Label values error: all coordinates must be normalized"
+
+ # _, indices = np.unique(labels, axis=0, return_index=True)
+ # if len(indices) < len(labels): # duplicate row check
+ # labels = labels[indices] # remove duplicates
+ # msg += f"WARNING: {lb_path}: {len(labels) - len(indices)} duplicate labels removed"
+ # labels = labels.tolist()
+ _t = 0
+ else:
+ ne = 1 # label empty
+ labels = []
+ else:
+ nm = 1 # label missing
+ labels = []
+ return img_path, labels, nc, nm, nf, ne, msg
+ except Exception as e:
+ nc = 1
+ msg = f"WARNING: {lb_path}: ignoring invalid labels: {e}"
+ return img_path, None, nc, nm, nf, ne, msg
+
+ @staticmethod
+ def polygon2mask(img_size, polygons, color=1, downsample_ratio=1):
+ mask = np.zeros(img_size, dtype=np.uint8)
+ polygons = np.asarray(polygons)
+ polygons = polygons.astype(np.int32)
+ shape = polygons.shape
+ polygons = polygons.reshape(shape[0], -1, 2)
+ cv2.fillPoly(mask, polygons, color=color)
+ nh, nw = (img_size[0] // downsample_ratio, img_size[1] // downsample_ratio)
+ # NOTE: fillPoly firstly then resize is trying the keep the same way
+ # of loss calculation when mask-ratio=1.
+ mask = cv2.resize(mask, (nw, nh))
+ return mask
+
+ def polygons2masks(self, img_size, polygons, color, downsample_ratio=1):
+ """
+ Args:
+ img_size (tuple): The image size.
+ polygons (list[np.ndarray]): each polygon is [N, M],
+ N is the number of polygons,
+ M is the number of points(Be divided by 2).
+ """
+ masks = []
+ for si in range(len(polygons)):
+ mask = self.polygon2mask(img_size, [polygons[si].reshape(-1)], color, downsample_ratio)
+ masks.append(mask)
+ return np.array(masks)
+
+
+ def polygons2masks_overlap(self, img_size, segments, downsample_ratio=1):
+ """Return a (640, 640) overlap mask."""
+ masks = np.zeros((img_size[0] // downsample_ratio, img_size[1] // downsample_ratio),
+ dtype=np.int32 if len(segments) > 255 else np.uint8)
+ areas = []
+ ms = []
+ for si in range(len(segments)):
+ mask = self.polygon2mask(
+ img_size,
+ [segments[si].reshape(-1)],
+ downsample_ratio=downsample_ratio,
+ color=1,
+ )
+ ms.append(mask)
+ areas.append(mask.sum())
+ areas = np.asarray(areas)
+ index = np.argsort(-areas)
+ ms = np.array(ms)[index]
+ for i in range(len(segments)):
+ mask = ms[i] * (i + 1)
+ masks = masks + mask
+ masks = np.clip(masks, a_min=0, a_max=i + 1)
+ return masks, index
+
+ @staticmethod
+ def generate_coco_format_labels(img_info, class_names, save_path):
+ # for evaluation with pycocotools
+ dataset = {"categories": [], "annotations": [], "images": []}
+ for i, class_name in enumerate(class_names):
+ dataset["categories"].append(
+ {"id": i, "name": class_name, "supercategory": ""}
+ )
+
+ ann_id = 0
+ LOGGER.info(f"Convert to COCO format")
+ for i, (img_path, info) in enumerate(tqdm(img_info.items())):
+ labels = info["labels"] if info["labels"] else []
+ img_id = osp.splitext(osp.basename(img_path))[0]
+ img_h, img_w = info["shape"]
+ dataset["images"].append(
+ {
+ "file_name": os.path.basename(img_path),
+ "id": img_id,
+ "width": img_w,
+ "height": img_h,
+ }
+ )
+ if labels:
+ for label in labels:
+ c, x, y, w, h = label[:5]
+ c, x, y, w, h = float(c), float(x), float(y), float(w), float(h)
+ seg = np.asarray(label[5:]).astype(np.float32)
+ seg = seg.reshape(-1, 2)
+ #breakpoint()
+ seg = seg * np.asarray([img_w, img_h])
+ seg = seg.reshape(-1)
+ # convert x,y,w,h to x1,y1,x2,y2
+ x1 = (x - w / 2) * img_w
+ y1 = (y - h / 2) * img_h
+ x2 = (x + w / 2) * img_w
+ y2 = (y + h / 2) * img_h
+ # cls_id starts from 0
+ cls_id = int(c)
+ w = max(0, x2 - x1)
+ h = max(0, y2 - y1)
+ dataset["annotations"].append(
+ {
+ "area": h * w,
+ "bbox": [x1, y1, w, h],
+ "category_id": cls_id,
+ "id": ann_id,
+ "image_id": img_id,
+ "iscrowd": 0,
+ # mask
+ "segmentation": list(seg),
+ }
+ )
+ ann_id += 1
+
+ with open(save_path, "w") as f:
+ json.dump(dataset, f)
+ LOGGER.info(
+ f"Convert to COCO format finished. Resutls saved in {save_path}"
+ )
+
+ @staticmethod
+ def get_hash(paths):
+ """Get the hash value of paths"""
+ assert isinstance(paths, list), "Only support list currently."
+ h = hashlib.md5("".join(paths).encode())
+ return h.hexdigest()
+
+
+class LoadData:
+ def __init__(self, path, webcam, webcam_addr):
+ self.webcam = webcam
+ self.webcam_addr = webcam_addr
+ if webcam: # if use web camera
+ imgp = []
+ vidp = [int(webcam_addr) if webcam_addr.isdigit() else webcam_addr]
+ else:
+ p = str(Path(path).resolve()) # os-agnostic absolute path
+ if os.path.isdir(p):
+ files = sorted(glob.glob(os.path.join(p, '**/*.*'), recursive=True)) # dir
+ elif os.path.isfile(p):
+ files = [p] # files
+ else:
+ raise FileNotFoundError(f'Invalid path {p}')
+ imgp = [i for i in files if i.split('.')[-1] in IMG_FORMATS]
+ vidp = [v for v in files if v.split('.')[-1] in VID_FORMATS]
+ self.files = imgp + vidp
+ self.nf = len(self.files)
+ self.type = 'image'
+ if len(vidp) > 0:
+ self.add_video(vidp[0]) # new video
+ else:
+ self.cap = None
+
+ # @staticmethod
+ def checkext(self, path):
+ if self.webcam:
+ file_type = 'video'
+ else:
+ file_type = 'image' if path.split('.')[-1].lower() in IMG_FORMATS else 'video'
+ return file_type
+
+ def __iter__(self):
+ self.count = 0
+ return self
+
+ def __next__(self):
+ if self.count == self.nf:
+ raise StopIteration
+ path = self.files[self.count]
+ if self.checkext(path) == 'video':
+ self.type = 'video'
+ ret_val, img = self.cap.read()
+ while not ret_val:
+ self.count += 1
+ self.cap.release()
+ if self.count == self.nf: # last video
+ raise StopIteration
+ path = self.files[self.count]
+ self.add_video(path)
+ ret_val, img = self.cap.read()
+ else:
+ # Read image
+ self.count += 1
+ img = cv2.imread(path) # BGR
+ return img, path, self.cap
+
+ def add_video(self, path):
+ self.frame = 0
+ self.cap = cv2.VideoCapture(path)
+ self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
+
+ def __len__(self):
+ return self.nf # number of files
diff --git a/yolov6/models/efficientrep.py b/yolov6/models/efficientrep.py
index 5d0de7ce..4ca75083 100644
--- a/yolov6/models/efficientrep.py
+++ b/yolov6/models/efficientrep.py
@@ -387,20 +387,11 @@ def __init__(
block=RepVGGBlock,
csp_e=float(1)/2,
fuse_P2=False,
- cspsppf=False,
- stage_block_type="BepC3"
+ cspsppf=False
):
super().__init__()
assert channels_list is not None
assert num_repeats is not None
-
- if stage_block_type == "BepC3":
- stage_block = BepC3
- elif stage_block_type == "MBLABlock":
- stage_block = MBLABlock
- else:
- raise NotImplementedError
-
self.fuse_P2 = fuse_P2
self.stem = block(
@@ -417,7 +408,7 @@ def __init__(
kernel_size=3,
stride=2
),
- stage_block(
+ BepC3(
in_channels=channels_list[1],
out_channels=channels_list[1],
n=num_repeats[1],
@@ -433,7 +424,7 @@ def __init__(
kernel_size=3,
stride=2
),
- stage_block(
+ BepC3(
in_channels=channels_list[2],
out_channels=channels_list[2],
n=num_repeats[2],
@@ -449,7 +440,7 @@ def __init__(
kernel_size=3,
stride=2
),
- stage_block(
+ BepC3(
in_channels=channels_list[3],
out_channels=channels_list[3],
n=num_repeats[3],
@@ -469,7 +460,7 @@ def __init__(
kernel_size=3,
stride=2,
),
- stage_block(
+ BepC3(
in_channels=channels_list[4],
out_channels=channels_list[4],
n=num_repeats[4],
@@ -484,7 +475,7 @@ def __init__(
kernel_size=3,
stride=2,
),
- stage_block(
+ BepC3(
in_channels=channels_list[5],
out_channels=channels_list[5],
n=num_repeats[5],
diff --git a/yolov6/models/effidehead_seg.py b/yolov6/models/effidehead_seg.py
new file mode 100644
index 00000000..2bfe9843
--- /dev/null
+++ b/yolov6/models/effidehead_seg.py
@@ -0,0 +1,452 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from yolov6.layers.common import *
+from yolov6.assigners.anchor_generator import generate_anchors
+from yolov6.utils.general import dist2bbox
+
+
+class Detect(nn.Module):
+ export = False
+ '''Efficient Decoupled Head
+ With hardware-aware degisn, the decoupled head is optimized with
+ hybridchannels methods.
+ '''
+ def __init__(self, num_classes=80, num_layers=3, inplace=True, head_layers=None, reg_mask=None, use_dfl=True, reg_max=16, nm=32): # detection layer
+ # nm: number of masks
+ super().__init__()
+ assert head_layers is not None
+ assert reg_mask is not None
+ self.nc = num_classes # number of classes
+ self.no = num_classes + 5 + nm # number of outputs per anchor
+ self.nl = num_layers # number of detection layers
+ self.nm = nm
+ self.grid = [torch.zeros(1)] * num_layers
+ self.prior_prob = 1e-2
+ self.inplace = inplace
+ stride = [8, 16, 32] if num_layers == 3 else [8, 16, 32, 64] # strides computed during build
+ self.stride = torch.tensor(stride)
+ self.use_dfl = use_dfl
+ self.reg_max = reg_max
+ self.proj_conv = nn.Conv2d(self.reg_max + 1, 1, 1, bias=False)
+ self.grid_cell_offset = 0.5
+ self.grid_cell_size = 5.0
+
+ # Init decouple head
+ self.stems = nn.ModuleList()
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ self.cls_preds = nn.ModuleList()
+ self.reg_preds = nn.ModuleList()
+
+ # Efficient decoupled head layers
+ for i in range(num_layers):
+ idx = i*5
+ self.stems.append(head_layers[idx])
+ self.cls_convs.append(head_layers[idx+1])
+ self.reg_convs.append(head_layers[idx+2])
+ self.cls_preds.append(head_layers[idx+3])
+ self.reg_preds.append(head_layers[idx+4])
+
+ def initialize_biases(self):
+
+ for conv in self.cls_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ for conv in self.reg_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(1.0)
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False)
+ self.proj_conv.weight = nn.Parameter(self.proj.view([1, self.reg_max + 1, 1, 1]).clone().detach(),
+ requires_grad=False)
+
+ def forward(self, x):
+ if self.training:
+ cls_score_list = []
+ reg_distri_list = []
+
+ for i in range(self.nl):
+ x[i] = self.stems[i](x[i])
+ cls_x = x[i]
+ reg_x = x[i]
+ cls_feat = self.cls_convs[i](cls_x)
+ cls_output = self.cls_preds[i](cls_feat)
+ reg_feat = self.reg_convs[i](reg_x)
+ reg_output = self.reg_preds[i](reg_feat)
+
+ cls_output = torch.sigmoid(cls_output)
+ cls_score_list.append(cls_output.flatten(2).permute((0, 2, 1)))
+ reg_distri_list.append(reg_output.flatten(2).permute((0, 2, 1)))
+
+ cls_score_list = torch.cat(cls_score_list, axis=1)
+ reg_distri_list = torch.cat(reg_distri_list, axis=1)
+
+ return x, cls_score_list, reg_distri_list
+ else:
+ cls_score_list = []
+ reg_dist_list = []
+
+ for i in range(self.nl):
+ b, _, h, w = x[i].shape
+ l = h * w
+ x[i] = self.stems[i](x[i])
+ cls_x = x[i]
+ reg_x = x[i]
+ cls_feat = self.cls_convs[i](cls_x)
+ cls_output = self.cls_preds[i](cls_feat)
+ reg_feat = self.reg_convs[i](reg_x)
+ reg_output = self.reg_preds[i](reg_feat)
+
+ if self.use_dfl:
+ reg_output = reg_output.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3)
+ reg_output = self.proj_conv(F.softmax(reg_output, dim=1))
+
+ cls_output = torch.sigmoid(cls_output)
+
+ if self.export:
+ cls_score_list.append(cls_output)
+ reg_dist_list.append(reg_output)
+ else:
+ cls_score_list.append(cls_output.reshape([b, self.nc, l]))
+ reg_dist_list.append(reg_output.reshape([b, 4, l]))
+
+ if self.export:
+ return tuple(torch.cat([cls, reg], 1) for cls, reg in zip(cls_score_list, reg_dist_list))
+
+ cls_score_list = torch.cat(cls_score_list, axis=-1).permute(0, 2, 1)
+ reg_dist_list = torch.cat(reg_dist_list, axis=-1).permute(0, 2, 1)
+
+
+ anchor_points, stride_tensor = generate_anchors(
+ x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True, mode='af')
+
+ pred_bboxes = dist2bbox(reg_dist_list, anchor_points, box_format='xywh')
+ pred_bboxes *= stride_tensor
+ return torch.cat(
+ [
+ pred_bboxes,
+ torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),
+ cls_score_list
+ ],
+ axis=-1)
+
+def build_seg_layer(channels_list, num_anchors, num_classes, reg_max=16, num_layers=3):
+
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+
+ head_layers = nn.Sequential(
+ # stem0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred0
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred0
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ ),
+ # stem1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred1
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred1
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ ),
+ # stem2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred2
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred2
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ )
+ )
+
+ if num_layers == 4:
+ head_layers.add_module('stem3',
+ # stem3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=1,
+ stride=1
+ )
+ )
+ head_layers.add_module('cls_conv3',
+ # cls_conv3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=3,
+ stride=1
+ )
+ )
+ head_layers.add_module('reg_conv3',
+ # reg_conv3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=3,
+ stride=1
+ )
+ )
+ head_layers.add_module('cls_pred3',
+ # cls_pred3
+ nn.Conv2d(
+ in_channels=channels_list[chx[3]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ )
+ )
+ head_layers.add_module('reg_pred3',
+ # reg_pred3
+ nn.Conv2d(
+ in_channels=channels_list[chx[3]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ )
+ )
+
+ return head_layers
+
+
+######
+
+
+def build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max=16, num_layers=3):
+
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+
+ head_layers = nn.Sequential(
+ # stem0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred0
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred0
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ ),
+ # stem1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred1
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred1
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ ),
+ # stem2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred2
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred2
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ )
+ )
+
+ if num_layers == 4:
+ head_layers.add_module('stem3',
+ # stem3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=1,
+ stride=1
+ )
+ )
+ head_layers.add_module('cls_conv3',
+ # cls_conv3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=3,
+ stride=1
+ )
+ )
+ head_layers.add_module('reg_conv3',
+ # reg_conv3
+ ConvBNSiLU(
+ in_channels=channels_list[chx[3]],
+ out_channels=channels_list[chx[3]],
+ kernel_size=3,
+ stride=1
+ )
+ )
+ head_layers.add_module('cls_pred3',
+ # cls_pred3
+ nn.Conv2d(
+ in_channels=channels_list[chx[3]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ )
+ )
+ head_layers.add_module('reg_pred3',
+ # reg_pred3
+ nn.Conv2d(
+ in_channels=channels_list[chx[3]],
+ out_channels=4 * (reg_max + num_anchors),
+ kernel_size=1
+ )
+ )
+
+ return head_layers
diff --git a/yolov6/models/heads/effidehead_fuseab_seg.py b/yolov6/models/heads/effidehead_fuseab_seg.py
new file mode 100644
index 00000000..80272928
--- /dev/null
+++ b/yolov6/models/heads/effidehead_fuseab_seg.py
@@ -0,0 +1,551 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from yolov6.layers.common import *
+from yolov6.assigners.anchor_generator import generate_anchors
+from yolov6.utils.general import dist2bbox
+
+
+class Detect(nn.Module):
+ export = False
+ '''Efficient Decoupled Head for fusing anchor-base branches.
+ '''
+ def __init__(self, num_classes=80, anchors=None, num_layers=3, inplace=True, head_layers=None, reg_mask=None, use_dfl=True, reg_max=16, nm=32, fuse_ab=False): # detection layer
+ super().__init__()
+ assert head_layers is not None
+ assert reg_mask is not None
+ self.nc = num_classes # number of classes
+ self.no = num_classes + 5 + nm # number of outputs per anchor
+ self.nl = num_layers # number of detection layers
+ self.nm = nm # number of masks
+ if isinstance(anchors, (list, tuple)):
+ self.na = len(anchors[0]) // 2
+ else:
+ self.na = anchors
+ self.grid = [torch.zeros(1)] * num_layers
+ self.fuse_ab = fuse_ab
+ self.prior_prob = 1e-2
+ self.inplace = inplace
+ stride = [8, 16, 32] if num_layers == 3 else [8, 16, 32, 64] # strides computed during build
+ self.stride = torch.tensor(stride)
+ self.use_dfl = use_dfl
+ self.reg_max = reg_max
+ self.proj_conv = nn.Conv2d(self.reg_max + 1, 1, 1, bias=False)
+ self.grid_cell_offset = 0.5
+ self.grid_cell_size = 5.0
+ self.anchors_init= ((torch.tensor(anchors) / self.stride[:,None])).reshape(self.nl, self.na, 2)
+ self.reg_mask = reg_mask
+
+ # Init decouple head
+ self.stems = nn.ModuleList()
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ self.seg_convs = nn.ModuleList()
+ self.cls_preds = nn.ModuleList()
+ self.reg_preds = nn.ModuleList()
+ self.seg_preds = nn.ModuleList()
+ self.cls_preds_ab = nn.ModuleList()
+ self.reg_preds_ab = nn.ModuleList()
+ self.seg_preds_ab = nn.ModuleList()
+ self.seg_proto = nn.ModuleList()
+ self.seg_proto.append(reg_mask[0])
+
+
+ # Efficient decoupled head layers
+ for i in range(num_layers):
+ idx = i*10
+ self.stems.append(head_layers[idx])
+ self.cls_convs.append(head_layers[idx+1])
+ self.reg_convs.append(head_layers[idx+2])
+ self.seg_convs.append(head_layers[idx+3])
+ self.cls_preds.append(head_layers[idx+4])
+ self.reg_preds.append(head_layers[idx+5])
+ self.seg_preds.append(head_layers[idx+6])
+ if self.fuse_ab:
+ self.cls_preds_ab.append(head_layers[idx+7])
+ self.reg_preds_ab.append(head_layers[idx+8])
+ self.seg_preds_ab.append(head_layers[idx+9])
+
+
+ def initialize_biases(self):
+
+ for conv in self.cls_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ if self.fuse_ab:
+ for conv in self.cls_preds_ab:
+ b = conv.bias.view(-1, )
+ b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ for conv in self.reg_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(1.0)
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ if self.fuse_ab:
+ for conv in self.reg_preds_ab:
+ b = conv.bias.view(-1, )
+ b.data.fill_(1.0)
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False)
+ self.proj_conv.weight = nn.Parameter(self.proj.view([1, self.reg_max + 1, 1, 1]).clone().detach(),
+ requires_grad=False)
+
+ def handleseg_af(self, sgot_lst, sg_msk_lst):
+ '''
+ sg_msk_lst --> lst sg_msk: segmask: Shape(bs, 32, w, h)
+ sgot_lst --> lst sgot: seg_output_conf: Shape(bs, n, 32)
+ '''
+ mask_res = []
+ for i in range(len(sgot_lst)):
+ sgot = sgot_lst[i]
+ sg_msk = sg_msk_lst[i]
+ t_mask_res = []
+ for j in range(sgot.shape[0]):
+ sgot_t = sgot[j] # (n, 32)
+ sg_msk_t = sg_msk[j] # (32, w, h)
+ m_t = (sgot_t@sg_msk_t.reshape(32, -1)).reshape(-1, *sg_msk_t.shape[1:])
+ m_t = m_t.unsqueeze(0)
+ t_mask_res.append(m_t)
+ mask_res.append(torch.cat(t_mask_res, 0).flatten(0,1))
+ return mask_res
+
+ def handleseg_ab(self, sgot_lst, sg_msk_lst):
+ '''
+ sg_msk_lst --> lst sg_msk: segmask: Shape(bs, 32, w, h)
+ sgot_lst --> lst sgot: seg_output_conf: Shape(bs, num_of_anchors, h, w, 32)
+ sgot.flatten(1, 3) -> Shape(bs, n*num_of_anchors, 32)
+ for j in range(bs) -> ((n*num_of_anchor, 32)@(32, w0, h0) = (n*num_of_anchor, 32)@(32, w0, h0))
+ '''
+ mask_res = []
+ for i in range(len(sgot_lst)):
+ sgot = sgot_lst[i]
+ sg_msk = sg_msk_lst[i]
+ s_shape = sgot.shape[1:4]
+ sgot = sgot.flatten(1, 3)
+ t_mask_res = []
+ for j in range(sgot.shape[0]):
+ sgot_t = sgot[j] # (n, 32)
+ sg_msk_t = sg_msk[j] # (32, w, h)
+ m_t = (sgot_t@sg_msk_t.reshape(32, -1)).reshape(-1, *sg_msk_t.shape[1:])
+ m_t = m_t.unsqueeze(0)
+ t_mask_res.append(m_t)
+ mask_res.append(torch.cat(t_mask_res, 0).flatten(0,1))
+ return mask_res
+
+
+
+
+
+ def forward(self, x):
+ if self.training:
+ device = x[0].device
+ cls_score_list_af = []
+ reg_dist_list_af = []
+ cls_score_list_ab = []
+ reg_dist_list_ab = []
+ seg_conf_list_af = []
+ seg_conf_list_ab = []
+ seg_list = []
+ af_seg_list = []
+ ab_seg_list = []
+
+ seg_mask = self.seg_proto[0](x[0])
+ seg_list.append(seg_mask)
+
+
+
+ for i in range(self.nl):
+ b, _, h, w = x[i].shape
+ l = h * w
+
+
+ x[i] = self.stems[i](x[i])
+
+
+ cls_x = x[i]
+ reg_x = x[i]
+ seg_x = x[i]
+
+ cls_feat = self.cls_convs[i](cls_x)
+ reg_feat = self.reg_convs[i](reg_x)
+ seg_feat = self.seg_convs[i](seg_x)
+
+ #anchor_base
+ if self.fuse_ab:
+ cls_output_ab = self.cls_preds_ab[i](cls_feat)
+ reg_output_ab = self.reg_preds_ab[i](reg_feat)
+ seg_output_ab = self.seg_preds_ab[i](seg_feat)
+
+ cls_output_ab = torch.sigmoid(cls_output_ab)
+ seg_output_ab = torch.sigmoid(seg_output_ab)
+ if self.fuse_ab:
+ seg_conf_list_ab.append(seg_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2))
+ cls_output_ab = cls_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2)
+ cls_score_list_ab.append(cls_output_ab.flatten(1,3))
+
+
+ reg_output_ab = reg_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2)
+ reg_output_ab[..., 2:4] = ((reg_output_ab[..., 2:4].sigmoid() * 2) ** 2 ) * (self.anchors_init[i].reshape(1, self.na, 1, 1, 2).to(device))
+ reg_dist_list_ab.append(reg_output_ab.flatten(1,3))
+
+ #anchor_free
+ cls_output_af = self.cls_preds[i](cls_feat)
+ reg_output_af = self.reg_preds[i](reg_feat)
+ seg_output_af = self.seg_preds[i](seg_feat)
+
+ cls_output_af = torch.sigmoid(cls_output_af)
+ # seg_output_af = torch.sigmoid(seg_output_af)
+ seg_conf_list_af.append(seg_output_af.flatten(2).permute((0, 2, 1)))
+
+ cls_score_list_af.append(cls_output_af.flatten(2).permute((0, 2, 1)))
+ reg_dist_list_af.append(reg_output_af.flatten(2).permute((0, 2, 1)))
+
+ #Not support fuseab now
+ if False:
+ ab_seg_list = self.handleseg_ab(seg_conf_list_ab, seg_list) if self.fuse_ab else []
+ cls_score_list_ab = torch.cat(cls_score_list_ab, axis=1)
+ reg_dist_list_ab = torch.cat(reg_dist_list_ab, axis=1)
+ cls_score_list_af = torch.cat(cls_score_list_af, axis=1)
+ reg_dist_list_af = torch.cat(reg_dist_list_af, axis=1)
+
+ return x, cls_score_list_ab, reg_dist_list_ab, cls_score_list_af, reg_dist_list_af, [seg_conf_list_af, seg_list], ab_seg_list
+
+ else:
+ device = x[0].device
+ cls_score_list_af = []
+ reg_dist_list_af = []
+ seg_list = []
+ seg_conf_list_af = []
+ seg_mask = self.seg_proto[0](x[0])
+ seg_list.append(seg_mask)
+
+ for i in range(self.nl):
+ b, _, h, w = x[i].shape
+ l = h * w
+
+
+ x[i] = self.stems[i](x[i])
+
+ cls_x = x[i]
+ reg_x = x[i]
+ seg_x = x[i]
+
+ cls_feat = self.cls_convs[i](cls_x)
+ reg_feat = self.reg_convs[i](reg_x)
+ seg_feat = self.seg_convs[i](seg_x)
+
+ #anchor_free
+ cls_output_af = self.cls_preds[i](cls_feat)
+ reg_output_af = self.reg_preds[i](reg_feat)
+ seg_output_af = self.seg_preds[i](seg_feat)
+
+ if self.use_dfl:
+ reg_output_af = reg_output_af.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3)
+ reg_output_af = self.proj_conv(F.softmax(reg_output_af, dim=1))
+
+ cls_output_af = torch.sigmoid(cls_output_af)
+ # seg_output_af = torch.sigmoid(seg_output_af)
+ proto_no = (torch.ones(b, 1, l) * i).cuda()
+
+
+ if self.export:
+ cls_score_list_af.append(cls_output_af)
+ reg_dist_list_af.append(reg_output_af)
+ seg_conf_list_af.append(seg_output_af)
+ else:
+ cls_score_list_af.append(cls_output_af.reshape([b, self.nc, l]))
+ reg_dist_list_af.append(reg_output_af.reshape([b, 4, l]))
+ seg_conf_list_af.append(torch.cat([proto_no, seg_output_af.reshape([b, 32, l])], axis = 1)) #[which_proto, (32...)]
+
+ if self.export:
+ return tuple(torch.cat([cls, reg, seg], 1) for cls, reg, seg in zip(cls_score_list_af, reg_dist_list_af, seg_conf_list_af)), seg_list[0]
+
+ cls_score_list_af = torch.cat(cls_score_list_af, axis=-1).permute(0, 2, 1)
+ reg_dist_list_af = torch.cat(reg_dist_list_af, axis=-1).permute(0, 2, 1)
+ seg_conf_list_af = torch.cat(seg_conf_list_af, axis=-1).permute(0, 2, 1)
+
+
+
+ #anchor_free
+ anchor_points_af, stride_tensor_af = generate_anchors(
+ x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True, mode='af')
+
+ pred_bboxes_af = dist2bbox(reg_dist_list_af, anchor_points_af, box_format='xywh')
+ pred_bboxes_af *= stride_tensor_af
+
+ pred_bboxes = pred_bboxes_af
+ cls_score_list = cls_score_list_af
+
+ return torch.cat(
+ [
+ pred_bboxes,
+ torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),
+ cls_score_list
+ ],
+ axis=-1), seg_list, seg_conf_list_af
+
+class Proto(nn.Module):
+ # Borrow from YOLOv5
+ def __init__(self, num_layers, channels_list, pos, c_=256, c2=32): # ch_in, number of protos, number of masks
+ super().__init__()
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+ c1 = channels_list[chx[pos]]
+ self.cv1 = Conv(c1, c_, k=3)
+ self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
+ self.cv2 = Conv(c_, c_, k=3)
+ self.cv3 = Conv(c_, c2)
+
+ def forward(self, x):
+ return self.cv3(self.cv2(self.upsample(self.cv1(x))))
+
+def autopad(k, p=None, d=1): # kernel, padding, dilation
+ # Pad to 'same' shape outputs
+ if d > 1:
+ k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
+ if p is None:
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
+ return p
+
+
+class Conv(nn.Module):
+ # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+ default_act = nn.SiLU() # default activation
+
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+ super().__init__()
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
+ self.bn = nn.BatchNorm2d(c2)
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+ def forward(self, x):
+ return self.act(self.bn(self.conv(x)))
+
+ def forward_fuse(self, x):
+ return self.act(self.conv(x))
+
+
+def build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max=16, num_layers=3, num_masks=32, fuse_ab=False):
+
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+
+ head_layers = nn.Sequential(
+ # stem0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ # stem1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ # stem2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ )
+
+ return head_layers
+
+
+
+
+
+
+
diff --git a/yolov6/models/heads/effidehead_fuseab_seg_solo.py b/yolov6/models/heads/effidehead_fuseab_seg_solo.py
new file mode 100644
index 00000000..61bd1328
--- /dev/null
+++ b/yolov6/models/heads/effidehead_fuseab_seg_solo.py
@@ -0,0 +1,540 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from yolov6.layers.common import *
+from yolov6.assigners.anchor_generator import generate_anchors
+from yolov6.utils.general import dist2bbox
+
+
+class Detect(nn.Module):
+ export = False
+ '''Efficient Decoupled Head for fusing anchor-base branches.
+ '''
+ def __init__(self, num_classes=80, anchors=None, num_layers=3, inplace=True, head_layers=None, reg_mask=None, use_dfl=True, reg_max=16, nm=32, fuse_ab=False): # detection layer
+ super().__init__()
+ assert head_layers is not None
+ assert reg_mask is not None
+ self.nc = num_classes # number of classes
+ self.no = num_classes + 5 + nm # number of outputs per anchor
+ self.nl = num_layers # number of detection layers
+ self.nm = nm # number of masks
+ if isinstance(anchors, (list, tuple)):
+ self.na = len(anchors[0]) // 2
+ else:
+ self.na = anchors
+ self.grid = [torch.zeros(1)] * num_layers
+ self.fuse_ab = fuse_ab
+ self.prior_prob = 1e-2
+ self.inplace = inplace
+ stride = [8, 16, 32] if num_layers == 3 else [8, 16, 32, 64] # strides computed during build
+ self.stride = torch.tensor(stride)
+ self.use_dfl = use_dfl
+ self.reg_max = reg_max
+ self.proj_conv = nn.Conv2d(self.reg_max + 1, 1, 1, bias=False)
+ self.grid_cell_offset = 0.5
+ self.grid_cell_size = 5.0
+ self.anchors_init= ((torch.tensor(anchors) / self.stride[:,None])).reshape(self.nl, self.na, 2)
+ self.reg_mask = reg_mask
+
+ # Init decouple head
+ self.stems = nn.ModuleList()
+ self.cls_convs = nn.ModuleList()
+ self.reg_convs = nn.ModuleList()
+ self.seg_convs = nn.ModuleList()
+ self.cls_preds = nn.ModuleList()
+ self.reg_preds = nn.ModuleList()
+ self.seg_preds = nn.ModuleList()
+ self.cls_preds_ab = nn.ModuleList()
+ self.reg_preds_ab = nn.ModuleList()
+ self.seg_preds_ab = nn.ModuleList()
+ self.seg_proto = nn.ModuleList()
+ self.seg_proto.append(reg_mask[0])
+ self.seg_proto.append(reg_mask[1])
+ self.seg_proto.append(reg_mask[2])
+
+
+ # Efficient decoupled head layers
+ for i in range(num_layers):
+ idx = i*10
+ self.stems.append(head_layers[idx])
+ self.cls_convs.append(head_layers[idx+1])
+ self.reg_convs.append(head_layers[idx+2])
+ self.seg_convs.append(head_layers[idx+3])
+ self.cls_preds.append(head_layers[idx+4])
+ self.reg_preds.append(head_layers[idx+5])
+ self.seg_preds.append(head_layers[idx+6])
+ if self.fuse_ab:
+ self.cls_preds_ab.append(head_layers[idx+7])
+ self.reg_preds_ab.append(head_layers[idx+8])
+ self.seg_preds_ab.append(head_layers[idx+9])
+
+
+ def initialize_biases(self):
+
+ for conv in self.cls_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ if self.fuse_ab:
+ for conv in self.cls_preds_ab:
+ b = conv.bias.view(-1, )
+ b.data.fill_(-math.log((1 - self.prior_prob) / self.prior_prob))
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ for conv in self.reg_preds:
+ b = conv.bias.view(-1, )
+ b.data.fill_(1.0)
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ if self.fuse_ab:
+ for conv in self.reg_preds_ab:
+ b = conv.bias.view(-1, )
+ b.data.fill_(1.0)
+ conv.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+ w = conv.weight
+ w.data.fill_(0.)
+ conv.weight = torch.nn.Parameter(w, requires_grad=True)
+
+ self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False)
+ self.proj_conv.weight = nn.Parameter(self.proj.view([1, self.reg_max + 1, 1, 1]).clone().detach(),
+ requires_grad=False)
+
+
+ def handleseg_ab(self, sgot_lst, sg_msk_lst):
+ '''
+ sg_msk_lst --> lst sg_msk: segmask: Shape(bs, 32, w, h)
+ sgot_lst --> lst sgot: seg_output_conf: Shape(bs, num_of_anchors, h, w, 32)
+ sgot.flatten(1, 3) -> Shape(bs, n*num_of_anchors, 32)
+ for j in range(bs) -> ((n*num_of_anchor, 32)@(32, w0, h0) = (n*num_of_anchor, 32)@(32, w0, h0))
+ '''
+ mask_res = []
+ for i in range(len(sgot_lst)):
+ sgot = sgot_lst[i]
+ sg_msk = sg_msk_lst[i]
+ s_shape = sgot.shape[1:4]
+ sgot = sgot.flatten(1, 3)
+ t_mask_res = []
+ for j in range(sgot.shape[0]):
+ sgot_t = sgot[j] # (n, 32)
+ sg_msk_t = sg_msk[j] # (32, w, h)
+ m_t = (sgot_t@sg_msk_t.reshape(self.nm, -1)).reshape(-1, *sg_msk_t.shape[1:])
+ m_t = m_t.unsqueeze(0)
+ t_mask_res.append(m_t)
+ mask_res.append(torch.cat(t_mask_res, 0).flatten(0,1))
+ return mask_res
+
+
+
+
+
+ def forward(self, x):
+ if self.training:
+ device = x[0].device
+ cls_score_list_af = []
+ reg_dist_list_af = []
+ cls_score_list_ab = []
+ reg_dist_list_ab = []
+ seg_conf_list_af = []
+ seg_conf_list_ab = []
+ seg_list = []
+ af_seg_list = []
+ ab_seg_list = []
+
+ s1 = self.seg_proto[0](x[0])
+ s2 = self.seg_proto[1](x[1])
+ s3 = self.seg_proto[2](x[2])
+ seg_mask = s1 + s2 + s3
+ seg_list.append(seg_mask)
+
+
+
+ for i in range(self.nl):
+ b, _, h, w = x[i].shape
+ l = h * w
+
+
+ x[i] = self.stems[i](x[i])
+
+
+ cls_x = x[i]
+ reg_x = x[i]
+ seg_x = x[i]
+
+ cls_feat = self.cls_convs[i](cls_x)
+ reg_feat = self.reg_convs[i](reg_x)
+ seg_feat = self.seg_convs[i](seg_x)
+
+ #anchor_base
+ if self.fuse_ab:
+ cls_output_ab = self.cls_preds_ab[i](cls_feat)
+ reg_output_ab = self.reg_preds_ab[i](reg_feat)
+ seg_output_ab = self.seg_preds_ab[i](seg_feat)
+
+ cls_output_ab = torch.sigmoid(cls_output_ab)
+ seg_output_ab = torch.sigmoid(seg_output_ab)
+ if self.fuse_ab:
+ seg_conf_list_ab.append(seg_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2))
+ cls_output_ab = cls_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2)
+ cls_score_list_ab.append(cls_output_ab.flatten(1,3))
+
+
+ reg_output_ab = reg_output_ab.reshape(b, self.na, -1, h, w).permute(0,1,3,4,2)
+ reg_output_ab[..., 2:4] = ((reg_output_ab[..., 2:4].sigmoid() * 2) ** 2 ) * (self.anchors_init[i].reshape(1, self.na, 1, 1, 2).to(device))
+ reg_dist_list_ab.append(reg_output_ab.flatten(1,3))
+
+ #anchor_free
+ cls_output_af = self.cls_preds[i](cls_feat)
+ reg_output_af = self.reg_preds[i](reg_feat)
+ seg_output_af = self.seg_preds[i](seg_feat)
+
+ cls_output_af = torch.sigmoid(cls_output_af)
+ # seg_output_af = torch.sigmoid(seg_output_af)
+ seg_conf_list_af.append(seg_output_af.flatten(2).permute((0, 2, 1)))
+
+ cls_score_list_af.append(cls_output_af.flatten(2).permute((0, 2, 1)))
+ reg_dist_list_af.append(reg_output_af.flatten(2).permute((0, 2, 1)))
+
+ #Not support fuseab now
+ if False:
+ ab_seg_list = self.handleseg_ab(seg_conf_list_ab, seg_list) if self.fuse_ab else []
+ cls_score_list_ab = torch.cat(cls_score_list_ab, axis=1)
+ reg_dist_list_ab = torch.cat(reg_dist_list_ab, axis=1)
+ cls_score_list_af = torch.cat(cls_score_list_af, axis=1)
+ reg_dist_list_af = torch.cat(reg_dist_list_af, axis=1)
+
+ return x, cls_score_list_ab, reg_dist_list_ab, cls_score_list_af, reg_dist_list_af, [seg_conf_list_af, seg_list], ab_seg_list
+
+ else:
+ device = x[0].device
+ cls_score_list_af = []
+ reg_dist_list_af = []
+ seg_list = []
+ seg_conf_list_af = []
+ s1 = self.seg_proto[0](x[0])
+ s2 = self.seg_proto[1](x[1])
+ s3 = self.seg_proto[2](x[2])
+ seg_mask = s1 + s2 + s3
+ seg_list.append(seg_mask)
+
+ for i in range(self.nl):
+ b, _, h, w = x[i].shape
+ l = h * w
+
+
+ x[i] = self.stems[i](x[i])
+
+ cls_x = x[i]
+ reg_x = x[i]
+ seg_x = x[i]
+
+ cls_feat = self.cls_convs[i](cls_x)
+ reg_feat = self.reg_convs[i](reg_x)
+ seg_feat = self.seg_convs[i](seg_x)
+
+ #anchor_free
+ cls_output_af = self.cls_preds[i](cls_feat)
+ reg_output_af = self.reg_preds[i](reg_feat)
+ seg_output_af = self.seg_preds[i](seg_feat)
+
+ if self.use_dfl:
+ reg_output_af = reg_output_af.reshape([-1, 4, self.reg_max + 1, l]).permute(0, 2, 1, 3)
+ reg_output_af = self.proj_conv(F.softmax(reg_output_af, dim=1))
+
+ cls_output_af = torch.sigmoid(cls_output_af)
+ proto_no = (torch.ones(b, 1, l) * i).cuda()
+
+
+ if self.export:
+ cls_score_list_af.append(cls_output_af)
+ reg_dist_list_af.append(reg_output_af)
+ seg_conf_list_af.append(seg_output_af)
+ else:
+ cls_score_list_af.append(cls_output_af.reshape([b, self.nc, l]))
+ reg_dist_list_af.append(reg_output_af.reshape([b, 4, l]))
+ seg_conf_list_af.append(torch.cat([proto_no, seg_output_af.reshape([b, 67, l])], axis = 1)) #[which_proto, (32...)]
+
+ if self.export:
+ return tuple(torch.cat([cls, reg, seg], 1) for cls, reg, seg in zip(cls_score_list_af, reg_dist_list_af, seg_conf_list_af)), seg_list[0]
+
+ cls_score_list_af = torch.cat(cls_score_list_af, axis=-1).permute(0, 2, 1)
+ reg_dist_list_af = torch.cat(reg_dist_list_af, axis=-1).permute(0, 2, 1)
+ seg_conf_list_af = torch.cat(seg_conf_list_af, axis=-1).permute(0, 2, 1)
+
+
+
+ #anchor_free
+ anchor_points_af, stride_tensor_af = generate_anchors(
+ x, self.stride, self.grid_cell_size, self.grid_cell_offset, device=x[0].device, is_eval=True, mode='af')
+
+ pred_bboxes_af = dist2bbox(reg_dist_list_af, anchor_points_af, box_format='xywh')
+ pred_bboxes_af *= stride_tensor_af
+
+ pred_bboxes = pred_bboxes_af
+ cls_score_list = cls_score_list_af
+
+ return torch.cat(
+ [
+ pred_bboxes,
+ torch.ones((b, pred_bboxes.shape[1], 1), device=pred_bboxes.device, dtype=pred_bboxes.dtype),
+ cls_score_list
+ ],
+ axis=-1), seg_list, seg_conf_list_af
+
+class Proto(nn.Module):
+ # Borrowed from YOLOv5
+ def __init__(self, num_layers, channels_list, pos, c_=256, c2=64, scale_factor=2): # ch_in, number of protos, number of masks
+ super().__init__()
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+ c1 = channels_list[chx[pos]]
+ self.cv1 = Conv(c1, c_, k=3)
+ self.upsample = nn.Upsample(scale_factor=scale_factor, mode='nearest')
+ self.cv2 = Conv(c_, c_, k=3)
+ self.cv3 = Conv(c_, c2)
+
+ def forward(self, x):
+ return self.cv3(self.cv2(self.upsample(self.cv1(x))))
+
+def autopad(k, p=None, d=1): # kernel, padding, dilation
+ # Pad to 'same' shape outputs
+ if d > 1:
+ k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k] # actual kernel-size
+ if p is None:
+ p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad
+ return p
+
+
+class Conv(nn.Module):
+ # Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
+ default_act = nn.SiLU() # default activation
+
+ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
+ super().__init__()
+ self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
+ self.bn = nn.BatchNorm2d(c2)
+ self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
+
+ def forward(self, x):
+ return self.act(self.bn(self.conv(x)))
+
+ def forward_fuse(self, x):
+ return self.act(self.conv(x))
+
+
+def build_effidehead_layer(channels_list, num_anchors, num_classes, reg_max=16, num_layers=3, num_masks=67, fuse_ab=False):
+
+ chx = [6, 8, 10] if num_layers == 3 else [8, 9, 10, 11]
+
+ head_layers = nn.Sequential(
+ # stem0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv0
+ ConvBNSiLU(
+ in_channels=channels_list[chx[0]],
+ out_channels=channels_list[chx[0]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred0_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred0_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[0]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ # stem1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv1
+ ConvBNSiLU(
+ in_channels=channels_list[chx[1]],
+ out_channels=channels_list[chx[1]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred1_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred1_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[1]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ # stem2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=1,
+ stride=1
+ ),
+ # cls_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # reg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # seg_conv2
+ ConvBNSiLU(
+ in_channels=channels_list[chx[2]],
+ out_channels=channels_list[chx[2]],
+ kernel_size=3,
+ stride=1
+ ),
+ # cls_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes,
+ kernel_size=1
+ ),
+ # reg_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * (reg_max + 1),
+ kernel_size=1
+ ),
+ # seg_pred2_af
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_masks,
+ kernel_size=1
+ ),
+ # cls_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_classes * num_anchors,
+ kernel_size=1
+ ),
+ # reg_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=4 * num_anchors,
+ kernel_size=1
+ ),
+ # seg_pred2_3ab
+ nn.Conv2d(
+ in_channels=channels_list[chx[2]],
+ out_channels=num_masks * num_anchors,
+ kernel_size=1
+ ),
+ )
+
+ return head_layers
+
+
+
+
+
+
+
diff --git a/yolov6/models/losses/loss.py b/yolov6/models/losses/loss.py
index ec534923..c4fe8d87 100644
--- a/yolov6/models/losses/loss.py
+++ b/yolov6/models/losses/loss.py
@@ -30,8 +30,6 @@ def __init__(self,
):
self.fpn_strides = fpn_strides
- self.cached_feat_sizes = [torch.Size([0, 0]) for _ in fpn_strides]
- self.cached_anchors = None
self.grid_cell_size = grid_cell_size
self.grid_cell_offset = grid_cell_offset
self.num_classes = num_classes
@@ -60,13 +58,8 @@ def __call__(
):
feats, pred_scores, pred_distri = outputs
- if all(feat.shape[2:] == cfsize for feat, cfsize in zip(feats, self.cached_feat_sizes)):
- anchors, anchor_points, n_anchors_list, stride_tensor = self.cached_anchors
- else:
- self.cached_feat_sizes = [feat.shape[2:] for feat in feats]
- anchors, anchor_points, n_anchors_list, stride_tensor = \
- generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device)
- self.cached_anchors = anchors, anchor_points, n_anchors_list, stride_tensor
+ anchors, anchor_points, n_anchors_list, stride_tensor = \
+ generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device)
assert pred_scores.type() == pred_distri.type()
gt_bboxes_scale = torch.tensor([batch_width, batch_height, batch_width, batch_height]).type_as(pred_scores)
diff --git a/yolov6/models/losses/seg_loss.py b/yolov6/models/losses/seg_loss.py
new file mode 100644
index 00000000..04a25ecd
--- /dev/null
+++ b/yolov6/models/losses/seg_loss.py
@@ -0,0 +1,532 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+from yolov6.assigners.anchor_generator import generate_anchors
+from yolov6.utils.general import dist2bbox, bbox2dist, xywh2xyxy, box_iou
+from yolov6.utils.figure_iou import IOUloss
+from yolov6.assigners.atss_assigner_seg import ATSSAssigner
+from yolov6.assigners.tal_assigner_seg import TaskAlignedAssigner
+import time
+import pickle
+
+class ComputeLoss:
+ '''Loss computation func.'''
+ def __init__(self,
+ fpn_strides=[8, 16, 32],
+ grid_cell_size=5.0,
+ grid_cell_offset=0.5,
+ num_classes=80,
+ ori_img_size=640,
+ warmup_epoch=4,
+ use_dfl=True,
+ reg_max=16,
+ nm=32,
+ iou_type='giou',
+ loss_weight={
+ 'class': 1.0,
+ 'iou': 2.5,
+ 'dfl': 0.5,
+ 'seg': 2.5},
+ ):
+
+ self.fpn_strides = fpn_strides
+ self.grid_cell_size = grid_cell_size
+ self.grid_cell_offset = grid_cell_offset
+ self.num_classes = num_classes
+ self.ori_img_size = ori_img_size
+ self.nm = nm
+ self.tt = nm
+ self.warmup_epoch = warmup_epoch
+ self.warmup_assigner = ATSSAssigner(9, num_classes=self.num_classes)
+ self.formal_assigner = TaskAlignedAssigner(topk=13, num_classes=self.num_classes, alpha=1.0, beta=6.0)
+
+ self.use_dfl = use_dfl
+ self.reg_max = reg_max
+ self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False)
+ self.iou_type = iou_type
+ self.varifocal_loss = VarifocalLoss().cuda()
+ self.bbox_loss = BboxLoss(self.num_classes, self.reg_max, self.use_dfl, self.iou_type).cuda()
+ self.loss_weight = loss_weight
+
+ def __call__(
+ self,
+ outputs,
+ targets,
+ epoch_num,
+ step_num,
+ batch_height,
+ batch_width,
+ segmasks,
+ img=None,
+ ):
+
+ feats, pred_scores, pred_distri, pred_seg = outputs # seg_list:shape(3)(b, nm, mw, mh) seg_conf_list:shape(3):(b, l ,nm)
+ seg_cf, seg_proto = pred_seg
+ anchors, anchor_points, n_anchors_list, stride_tensor = \
+ generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device)
+
+ assert pred_scores.type() == pred_distri.type()
+ gt_bboxes_scale = torch.tensor([batch_width, batch_height, batch_width, batch_height]).type_as(pred_scores)
+ batch_size = pred_scores.shape[0]
+
+ targets, gt_segmasks =self.preprocess(targets, batch_size, gt_bboxes_scale, segmasks)
+ gt_labels = targets[:, :, :1]
+ gt_bboxes = targets[:, :, 1:] #xyxy
+ mask_gt = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+ anchor_points_s = anchor_points / stride_tensor
+ pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) #xyxy
+ try:
+ if epoch_num < self.warmup_epoch:
+ target_labels, target_bboxes, target_scores, fg_mask, target_segmasks = \
+ self.warmup_assigner(
+ anchors,
+ n_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ pred_bboxes.detach() * stride_tensor,
+ gt_segmasks)
+ else:
+ target_labels, target_bboxes, target_scores, fg_mask, idx_lst = \
+ self.formal_assigner(
+ pred_scores.detach(),
+ pred_bboxes.detach() * stride_tensor,
+ anchor_points,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ gt_segmasks)
+
+ except RuntimeError:
+ print(
+ "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
+ CPU mode is applied in this batch. If you want to avoid this issue, \
+ try to reduce the batch size or image size."
+ )
+ torch.cuda.empty_cache()
+ print("------------CPU Mode for This Batch-------------")
+ if epoch_num < self.warmup_epoch:
+ _anchors = anchors.cpu().float()
+ _n_anchors_list = n_anchors_list
+ _gt_labels = gt_labels.cpu().float()
+ _gt_bboxes = gt_bboxes.cpu().float()
+ _mask_gt = mask_gt.cpu().float()
+ _pred_bboxes = pred_bboxes.detach().cpu().float()
+ _stride_tensor = stride_tensor.cpu().float()
+ _segmasks = gt_segmasks.cpu().float()
+
+ target_labels, target_bboxes, target_scores, fg_mask, target_segmasks = \
+ self.warmup_assigner(
+ _anchors,
+ _n_anchors_list,
+ _gt_labels,
+ _gt_bboxes,
+ _mask_gt,
+ _pred_bboxes * _stride_tensor,
+ _segmasks)
+
+ else:
+ _pred_scores = pred_scores.detach().cpu().float()
+ _pred_bboxes = pred_bboxes.detach().cpu().float()
+ _anchor_points = anchor_points.cpu().float()
+ _gt_labels = gt_labels.cpu().float()
+ _gt_bboxes = gt_bboxes.cpu().float()
+ _mask_gt = mask_gt.cpu().float()
+ _stride_tensor = stride_tensor.cpu().float()
+ _segmasks = gt_segmasks.cpu().float()
+
+ target_labels, target_bboxes, target_scores, fg_mask, idx_lst = \
+ self.formal_assigner(
+ _pred_scores,
+ _pred_bboxes * _stride_tensor,
+ _anchor_points,
+ _gt_labels,
+ _gt_bboxes,
+ _mask_gt,
+ _segmasks)
+
+ target_labels = target_labels.cuda()
+ target_bboxes = target_bboxes.cuda()
+ target_scores = target_scores.cuda()
+ fg_mask = fg_mask.cuda()
+ for _ in idx_lst:
+ _ = _.cuda()
+
+
+ if step_num % 10 == 0:
+ torch.cuda.empty_cache()
+
+ # rescale bbox
+ target_bboxes /= stride_tensor
+
+ # cls loss
+ target_labels = torch.where(fg_mask > 0, target_labels, torch.full_like(target_labels, self.num_classes))
+ one_hot_label = F.one_hot(target_labels.long(), self.num_classes + 1)[..., :-1]
+ loss_cls = self.varifocal_loss(pred_scores, target_scores, one_hot_label)
+
+
+ target_scores_sum = target_scores.sum()
+
+ # avoid devide zero error, devide by zero will cause loss to be inf or nan.
+ # if target_scores_sum is 0, loss_cls equals to 0 alson
+ if target_scores_sum > 1:
+ loss_cls /= target_scores_sum
+
+ # bbox loss
+ loss_iou, loss_dfl = self.bbox_loss(pred_distri, pred_bboxes, anchor_points_s, target_bboxes,
+ target_scores, target_scores_sum, fg_mask)
+
+ loss_seg = self.mask_loss(gt_segmasks, seg_cf, seg_proto, target_bboxes, fg_mask, idx_lst, target_scores, target_scores_sum)
+
+ loss = self.loss_weight['class'] * loss_cls + \
+ self.loss_weight['iou'] * loss_iou + \
+ self.loss_weight['dfl'] * loss_dfl + \
+ self.loss_weight['seg'] * loss_seg
+
+
+ return loss, \
+ torch.cat(((self.loss_weight['iou'] * loss_iou).unsqueeze(0),
+ (self.loss_weight['dfl'] * loss_dfl).unsqueeze(0),
+ (self.loss_weight['class'] * loss_cls).unsqueeze(0),
+ (self.loss_weight['seg'] * loss_seg).unsqueeze(0))).detach()
+
+ def preprocess(self, targets, batch_size, scale_tensor, segmask):
+ targets_list = np.zeros((batch_size, 1, 5)).tolist()
+ cu = []
+ already = []
+ # seg_list = np.zeros((batch_size, 1, *segmask.shape[1:])).tolist()
+ for i, item in enumerate(targets.cpu().numpy().tolist()):
+ index = int(item[0])
+ targets_list[index].append(item[1:])
+ if index not in already:
+ already.append(index)
+ cu.append(i)
+ cu.append(segmask.shape[0])
+ max_len = max((len(l) for l in targets_list))
+ segmasks = torch.zeros(batch_size, max_len - 1, segmask.shape[-2], segmask.shape[-1]).cuda()
+ if len(already) != 0:
+ for i in range(len(already)):
+ j = already[i]
+ start = cu[i]
+ end = cu[i+1]
+ segmasks[j, : end - start] = segmask[start: end].clone()
+ targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
+
+ batch_target = targets[:, :, 1:5].mul_(scale_tensor)
+ targets[..., 1:] = xywh2xyxy(batch_target)
+ return targets, segmasks
+
+ def bbox_decode(self, anchor_points, pred_dist):
+ if self.use_dfl:
+ batch_size, n_anchors, _ = pred_dist.shape
+ pred_dist = F.softmax(pred_dist.view(batch_size, n_anchors, 4, self.reg_max + 1), dim=-1).matmul(self.proj.to(pred_dist.device))
+ return dist2bbox(pred_dist, anchor_points)
+
+ def mask_loss(self, gt_segmasks, seg_cf, seg_proto, txyxy_ori, fg_mask, idx_lst, target_scores=None, target_scores_sum=None):
+ # pred_mask_lst -> list
+ '''
+ pred_mask -> Shape(n1, w, h)
+ gt_mask -> Shape(n, img_w, img_h)
+ xyxy -> Shape(n, 4)
+ sum(n1, n2, n3, ...) = n
+ torch.abs((xyxy[..., 3] - xyxy[..., 1]) * (xyxy[..., 4] - xyxy[..., 2])) -> area
+ fg_mask --> (bs, tsize)
+ idx -> (bs, tsize)
+ gt_segmasks -> (bs, labelsize, w, h)
+ '''
+ sl = 0
+ sl2 = 0
+ bl = [2, 4, 8]
+ num_pos = fg_mask.sum()
+ tloss = torch.zeros(1).float().cuda()
+ if num_pos<=0:
+ for ipred in seg_proto:
+ tloss += (ipred.sum() * 0.)
+ for ipred in seg_cf:
+ tloss += (ipred.sum() * 0.)
+ return tloss[0]
+
+
+ xyxy_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4])
+ mtarget_scores = target_scores.sum(-1) # (bs, nl, 1)
+
+ sl = 0
+ qf = len(idx_lst) == 1 and len(idx_lst[0].shape) == 2
+ if qf:
+ idx_lst = idx_lst[0]
+ for j in range(len(seg_cf)):
+ ishape = 0
+ pshape = 0
+
+ iseg_proto = seg_proto[0] # (bs, 32, h, w)
+ bs = iseg_proto.shape[0]
+ iseg_cf = seg_cf[j] # (bs, part_n, 32)
+
+ pshape = iseg_proto.shape[-1]
+ ishape = iseg_cf.shape[1] # (1) = part_n
+ idx = idx_lst[:, sl: sl + ishape] # (bs, part_n)
+
+ ifg_mask = fg_mask[:, sl: sl + ishape] # (n) --> (bs, part_n)
+ itarget_scores = mtarget_scores[:, sl: sl + ishape]
+ if ifg_mask.sum() <= 0:
+ tloss += (iseg_proto.sum() * 0.)
+ tloss += (iseg_cf.sum() * 0.)
+ continue
+ target_sg = []
+ pred_sg = []
+ ixyxy_lst = []
+ mask_weight = []
+ for i in range(bs):
+ idx_thisbatch = torch.masked_select(idx[i], ifg_mask[i]) #(casize)
+ igt_segmasks = gt_segmasks.reshape(-1, *gt_segmasks.shape[-2:])[idx_thisbatch] # (?1, h?, w?) --> (?2, h?, w?)
+ imask_weight = torch.masked_select(itarget_scores[i], ifg_mask[i]).unsqueeze(-1)
+ mask_weight.append(imask_weight)
+ target_sg.append(igt_segmasks)
+ tiseg_cf = torch.masked_select(iseg_cf[i], ifg_mask[i].unsqueeze(-1).repeat(1, self.tt)) # (?2, 32)
+ tiseg_cf = tiseg_cf.reshape(-1, self.tt)
+ ipred_seg = (tiseg_cf@iseg_proto[i].reshape(self.tt, -1)).reshape(-1, pshape, pshape) # (?2, h, w)
+ ixyxy = torch.masked_select(txyxy_ori[i, sl: sl + ishape], xyxy_mask[i, sl: sl + ishape, :]).reshape(-1, 4) # (n, 4) --> (part_n, 4) --> (?2, 4)
+ ixyxy_lst.append(ixyxy)
+ pred_sg.append(ipred_seg)
+
+
+
+
+ bxyxy = torch.cat(ixyxy_lst, dim = 0) * bl[j]
+ bpred_seg = torch.cat(pred_sg, dim = 0)
+ bgt_seg = torch.cat(target_sg, dim = 0)
+ masks_weight = torch.cat(mask_weight, dim = 0).reshape(-1)
+ if tuple(bgt_seg.shape[-2:]) != (pshape, pshape): # downsample
+ bgt_seg = F.interpolate(bgt_seg[None], (pshape, pshape), mode='nearest')[0]
+ area = torch.abs((bxyxy[..., 2] - bxyxy[..., 0]) * (bxyxy[..., 3] - bxyxy[..., 1]))
+ area = area / (pshape)
+ area = area / (pshape)
+
+
+
+
+
+ sl += ishape
+ loss = F.binary_cross_entropy_with_logits(bpred_seg, bgt_seg, reduction='none')
+
+ loss = (self.crop_mask(loss, bxyxy).mean(dim=(1, 2)) / area) * masks_weight
+ loss = loss.sum()
+ tloss += loss
+ if target_scores_sum > 1:
+ tloss[0] = tloss[0] / target_scores_sum
+ return tloss[0] / len(seg_cf)
+
+
+ @staticmethod
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+
+class VarifocalLoss(nn.Module):
+ def __init__(self):
+ super(VarifocalLoss, self).__init__()
+
+ def forward(self, pred_score,gt_score, label, alpha=0.75, gamma=2.0):
+
+ weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
+ with torch.cuda.amp.autocast(enabled=False):
+ loss = (F.binary_cross_entropy(pred_score.float(), gt_score.float(), reduction='none') * weight).sum()
+
+ return loss
+
+
+class BboxLoss(nn.Module):
+ def __init__(self, num_classes, reg_max, use_dfl=False, iou_type='giou'):
+ super(BboxLoss, self).__init__()
+ self.num_classes = num_classes
+ self.iou_loss = IOUloss(box_format='xyxy', iou_type=iou_type, eps=1e-10)
+ self.reg_max = reg_max
+ self.use_dfl = use_dfl
+
+ def forward(self, pred_dist, pred_bboxes, anchor_points,
+ target_bboxes, target_scores, target_scores_sum, fg_mask):
+
+ # select positive samples mask
+ num_pos = fg_mask.sum()
+ if num_pos > 0:
+ # iou loss
+ bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4])
+ pred_bboxes_pos = torch.masked_select(pred_bboxes,
+ bbox_mask).reshape([-1, 4])
+ target_bboxes_pos = torch.masked_select(
+ target_bboxes, bbox_mask).reshape([-1, 4])
+ bbox_weight = torch.masked_select(
+ target_scores.sum(-1), fg_mask).unsqueeze(-1)
+ loss_iou = self.iou_loss(pred_bboxes_pos,
+ target_bboxes_pos) * bbox_weight
+ if target_scores_sum > 1:
+ loss_iou = loss_iou.sum() / target_scores_sum
+ else:
+ loss_iou = loss_iou.sum()
+
+ # dfl loss
+ if self.use_dfl:
+ dist_mask = fg_mask.unsqueeze(-1).repeat(
+ [1, 1, (self.reg_max + 1) * 4])
+ pred_dist_pos = torch.masked_select(
+ pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1])
+ target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max)
+ target_ltrb_pos = torch.masked_select(
+ target_ltrb, bbox_mask).reshape([-1, 4])
+ loss_dfl = self._df_loss(pred_dist_pos,
+ target_ltrb_pos) * bbox_weight
+ if target_scores_sum > 1:
+ loss_dfl = loss_dfl.sum() / target_scores_sum
+ else:
+ loss_dfl = loss_dfl.sum()
+ else:
+ loss_dfl = pred_dist.sum() * 0.
+
+ else:
+ loss_iou = pred_dist.sum() * 0.
+ loss_dfl = pred_dist.sum() * 0.
+
+ return loss_iou, loss_dfl
+
+ def _df_loss(self, pred_dist, target):
+ target_left = target.to(torch.long)
+ target_right = target_left + 1
+ weight_left = target_right.to(torch.float) - target
+ weight_right = 1 - weight_left
+ loss_left = F.cross_entropy(
+ pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction='none').view(
+ target_left.shape) * weight_left
+ loss_right = F.cross_entropy(
+ pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction='none').view(
+ target_left.shape) * weight_right
+ return (loss_left + loss_right).mean(-1, keepdim=True)
+
+def dice_loss(pred,
+ target,
+ weight=None,
+ eps=1e-3,
+ reduction='mean',
+ naive_dice=False,
+ avg_factor=None):
+ """Calculate dice loss, there are two forms of dice loss is supported:
+
+ - the one proposed in `V-Net: Fully Convolutional Neural
+ Networks for Volumetric Medical Image Segmentation
+ `_.
+ - the dice loss in which the power of the number in the
+ denominator is the first power instead of the second
+ power.
+
+ Args:
+ pred (torch.Tensor): The prediction, has a shape (n, *)
+ target (torch.Tensor): The learning label of the prediction,
+ shape (n, *), same shape of pred.
+ weight (torch.Tensor, optional): The weight of loss for each
+ prediction, has a shape (n,). Defaults to None.
+ eps (float): Avoid dividing by zero. Default: 1e-3.
+ reduction (str, optional): The method used to reduce the loss into
+ a scalar. Defaults to 'mean'.
+ Options are "none", "mean" and "sum".
+ naive_dice (bool, optional): If false, use the dice
+ loss defined in the V-Net paper, otherwise, use the
+ naive dice loss in which the power of the number in the
+ denominator is the first power instead of the second
+ power.Defaults to False.
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ """
+
+ input = pred.flatten(1)
+ target = target.flatten(1).float()
+
+ a = torch.sum(input * target, 1)
+ if naive_dice:
+ b = torch.sum(input, 1)
+ c = torch.sum(target, 1)
+ d = (2 * a + eps) / (b + c + eps)
+ else:
+ b = torch.sum(input * input, 1) + eps
+ c = torch.sum(target * target, 1) + eps
+ d = (2 * a) / (b + c)
+
+ loss = 1 - d
+ if weight is not None:
+ assert weight.ndim == loss.ndim
+ assert len(weight) == len(pred)
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+def weight_reduce_loss(loss,
+ weight=None,
+ reduction='mean',
+ avg_factor=None):
+ """Apply element-wise weight and reduce loss.
+
+ Args:
+ loss (Tensor): Element-wise loss.
+ weight (Optional[Tensor], optional): Element-wise weights.
+ Defaults to None.
+ reduction (str, optional): Same as built-in losses of PyTorch.
+ Defaults to 'mean'.
+ avg_factor (Optional[float], optional): Average factor when
+ computing the mean of losses. Defaults to None.
+
+ Returns:
+ Tensor: Processed loss values.
+ """
+ # if weight is specified, apply element-wise weight
+ if weight is not None:
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ loss = reduce_loss(loss, reduction)
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = torch.finfo(torch.float32).eps
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+ return loss
+
+def reduce_loss(loss, reduction):
+ """Reduce loss as specified.
+
+ Args:
+ loss (Tensor): Elementwise loss tensor.
+ reduction (str): Options are "none", "mean" and "sum".
+
+ Return:
+ Tensor: Reduced loss tensor.
+ """
+ reduction_enum = F._Reduction.get_enum(reduction)
+ # none: 0, elementwise_mean:1, sum: 2
+ if reduction_enum == 0:
+ return loss
+ elif reduction_enum == 1:
+ return loss.mean()
+ elif reduction_enum == 2:
+ return loss.sum()
\ No newline at end of file
diff --git a/yolov6/models/losses/seg_loss_solo_main.py b/yolov6/models/losses/seg_loss_solo_main.py
new file mode 100644
index 00000000..3a329beb
--- /dev/null
+++ b/yolov6/models/losses/seg_loss_solo_main.py
@@ -0,0 +1,583 @@
+#!/usr/bin/env python3
+# -*- coding:utf-8 -*-
+
+import torch
+import torch.nn as nn
+import numpy as np
+import torch.nn.functional as F
+from yolov6.assigners.anchor_generator import generate_anchors
+from yolov6.utils.general import dist2bbox, bbox2dist, xywh2xyxy, box_iou
+from yolov6.utils.figure_iou import IOUloss
+from yolov6.assigners.atss_assigner_seg import ATSSAssigner
+from yolov6.assigners.tal_assigner_seg import TaskAlignedAssigner
+import time
+import pickle
+
+class ComputeLoss:
+ '''Loss computation func.'''
+ def __init__(self,
+ fpn_strides=[8, 16, 32],
+ grid_cell_size=5.0,
+ grid_cell_offset=0.5,
+ num_classes=80,
+ ori_img_size=640,
+ warmup_epoch=4,
+ use_dfl=True,
+ reg_max=16,
+ weight_nums = 66,
+ bias_nums = 1,
+ nm = 64,
+ dyconv_channels = 66,
+ iou_type='giou',
+ loss_weight={
+ 'class': 1.0,
+ 'iou': 2.5,
+ 'dfl': 0.5,
+ 'seg': 2.5},
+ ):
+
+ self.fpn_strides = fpn_strides
+ self.grid_cell_size = grid_cell_size
+ self.grid_cell_offset = grid_cell_offset
+ self.num_classes = num_classes
+ self.ori_img_size = ori_img_size
+ self.nm = nm
+ self.tt = nm + bias_nums + 2
+ self.weight_nums = [nm + 2]
+ self.bias_nums = [bias_nums]
+ self.dyconv_channels = dyconv_channels
+
+ self.warmup_epoch = warmup_epoch
+ self.warmup_assigner = ATSSAssigner(9, num_classes=self.num_classes)
+ self.formal_assigner = TaskAlignedAssigner(topk=13, num_classes=self.num_classes, alpha=1.0, beta=6.0)
+
+ self.use_dfl = use_dfl
+ self.reg_max = reg_max
+ self.proj = nn.Parameter(torch.linspace(0, self.reg_max, self.reg_max + 1), requires_grad=False)
+ self.iou_type = iou_type
+ self.varifocal_loss = VarifocalLoss().cuda()
+ self.bbox_loss = BboxLoss(self.num_classes, self.reg_max, self.use_dfl, self.iou_type).cuda()
+ self.loss_weight = loss_weight
+ self.dice = True
+
+ def parse_dynamic_params(self, flatten_kernels):
+ """split kernel head prediction to conv weight and bias."""
+ n_inst = flatten_kernels.size(0)
+ n_layers = len(self.weight_nums)
+ params_splits = list(
+ torch.split_with_sizes(
+ flatten_kernels, self.weight_nums + self.bias_nums, dim=1))
+ weight_splits = params_splits[:n_layers]
+ bias_splits = params_splits[n_layers:]
+ for i in range(n_layers):
+ if i < n_layers - 1:
+ weight_splits[i] = weight_splits[i].reshape(
+ n_inst * self.dyconv_channels, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst *
+ self.dyconv_channels)
+ else:
+ weight_splits[i] = weight_splits[i].reshape(n_inst, -1, 1, 1)
+ bias_splits[i] = bias_splits[i].reshape(n_inst)
+
+ return weight_splits, bias_splits
+
+ def handle_proto_coord(self, proto):
+ _ = proto.shape[-1]
+ x = torch.arange(0, 1, step = 1 / _).unsqueeze(0).unsqueeze(0).repeat(1, _, 1).to(proto.dtype).to(proto.device)
+ y = torch.arange(0, 1, step = 1 / _).unsqueeze(0).T.unsqueeze(0).repeat(1, 1, _).to(proto.dtype).to(proto.device)
+ return torch.cat([proto, x, y]).reshape(1, -1, _, _)
+
+ def __call__(
+ self,
+ outputs,
+ targets,
+ epoch_num,
+ step_num,
+ batch_height,
+ batch_width,
+ segmasks,
+ img=None,
+ ):
+
+
+ feats, pred_scores, pred_distri, pred_seg = outputs # seg_list:shape(3)(b, nm, mw, mh) seg_conf_list:shape(3):(b, l ,nm)
+ seg_cf, seg_proto = pred_seg
+ anchors, anchor_points, n_anchors_list, stride_tensor = \
+ generate_anchors(feats, self.fpn_strides, self.grid_cell_size, self.grid_cell_offset, device=feats[0].device)
+
+ assert pred_scores.type() == pred_distri.type()
+ gt_bboxes_scale = torch.tensor([batch_width, batch_height, batch_width, batch_height]).type_as(pred_scores)
+ batch_size = pred_scores.shape[0]
+
+ targets, gt_segmasks =self.preprocess(targets, batch_size, gt_bboxes_scale, segmasks)
+ gt_labels = targets[:, :, :1]
+ gt_bboxes = targets[:, :, 1:] #xyxy
+ mask_gt = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+
+ # pboxes
+ anchor_points_s = anchor_points / stride_tensor
+ pred_bboxes = self.bbox_decode(anchor_points_s, pred_distri) #xyxy
+
+
+ try:
+ if epoch_num < self.warmup_epoch:
+ target_labels, target_bboxes, target_scores, fg_mask, target_segmasks = \
+ self.warmup_assigner(
+ anchors,
+ n_anchors_list,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ pred_bboxes.detach() * stride_tensor,
+ gt_segmasks)
+ else:
+ target_labels, target_bboxes, target_scores, fg_mask, idx_lst = \
+ self.formal_assigner(
+ pred_scores.detach(),
+ pred_bboxes.detach() * stride_tensor,
+ anchor_points,
+ gt_labels,
+ gt_bboxes,
+ mask_gt,
+ gt_segmasks)
+
+ except RuntimeError:
+ print(
+ "OOM RuntimeError is raised due to the huge memory cost during label assignment. \
+ CPU mode is applied in this batch. If you want to avoid this issue, \
+ try to reduce the batch size or image size."
+ )
+ torch.cuda.empty_cache()
+ print("------------CPU Mode for This Batch-------------")
+ if epoch_num < self.warmup_epoch:
+ _anchors = anchors.cpu().float()
+ _n_anchors_list = n_anchors_list
+ _gt_labels = gt_labels.cpu().float()
+ _gt_bboxes = gt_bboxes.cpu().float()
+ _mask_gt = mask_gt.cpu().float()
+ _pred_bboxes = pred_bboxes.detach().cpu().float()
+ _stride_tensor = stride_tensor.cpu().float()
+ _segmasks = gt_segmasks.cpu().float()
+
+ target_labels, target_bboxes, target_scores, fg_mask, target_segmasks = \
+ self.warmup_assigner(
+ _anchors,
+ _n_anchors_list,
+ _gt_labels,
+ _gt_bboxes,
+ _mask_gt,
+ _pred_bboxes * _stride_tensor,
+ _segmasks)
+
+ else:
+ _pred_scores = pred_scores.detach().cpu().float()
+ _pred_bboxes = pred_bboxes.detach().cpu().float()
+ _anchor_points = anchor_points.cpu().float()
+ _gt_labels = gt_labels.cpu().float()
+ _gt_bboxes = gt_bboxes.cpu().float()
+ _mask_gt = mask_gt.cpu().float()
+ _stride_tensor = stride_tensor.cpu().float()
+ _segmasks = gt_segmasks.cpu().float()
+
+ target_labels, target_bboxes, target_scores, fg_mask, idx_lst = \
+ self.formal_assigner(
+ _pred_scores,
+ _pred_bboxes * _stride_tensor,
+ _anchor_points,
+ _gt_labels,
+ _gt_bboxes,
+ _mask_gt,
+ _segmasks)
+
+ target_labels = target_labels.cuda()
+ target_bboxes = target_bboxes.cuda()
+ target_scores = target_scores.cuda()
+ fg_mask = fg_mask.cuda()
+ for _ in idx_lst:
+ _ = _.cuda()
+
+ if step_num % 10 == 0:
+ torch.cuda.empty_cache()
+
+ # rescale bbox
+ target_bboxes /= stride_tensor
+
+ # cls loss
+ target_labels = torch.where(fg_mask > 0, target_labels, torch.full_like(target_labels, self.num_classes))
+ one_hot_label = F.one_hot(target_labels.long(), self.num_classes + 1)[..., :-1]
+ loss_cls = self.varifocal_loss(pred_scores, target_scores, one_hot_label)
+
+
+ target_scores_sum = target_scores.sum()
+
+
+ if target_scores_sum > 1:
+ loss_cls /= target_scores_sum
+
+ # bbox loss
+ loss_iou, loss_dfl = self.bbox_loss(pred_distri, pred_bboxes, anchor_points_s, target_bboxes,
+ target_scores, target_scores_sum, fg_mask)
+
+ loss_seg = self.mask_loss(gt_segmasks, seg_cf, seg_proto, target_bboxes, fg_mask, idx_lst, target_scores, target_scores_sum, epoch=0)
+
+ loss = self.loss_weight['class'] * loss_cls + \
+ self.loss_weight['iou'] * loss_iou + \
+ self.loss_weight['dfl'] * loss_dfl + \
+ self.loss_weight['seg'] * loss_seg
+
+
+ return loss, \
+ torch.cat(((self.loss_weight['iou'] * loss_iou).unsqueeze(0),
+ (self.loss_weight['dfl'] * loss_dfl).unsqueeze(0),
+ (self.loss_weight['class'] * loss_cls).unsqueeze(0),
+ (self.loss_weight['seg'] * loss_seg).unsqueeze(0))).detach()
+
+ def preprocess(self, targets, batch_size, scale_tensor, segmask):
+ targets_list = np.zeros((batch_size, 1, 5)).tolist()
+ cu = []
+ already = []
+ for i, item in enumerate(targets.cpu().numpy().tolist()):
+ index = int(item[0])
+ targets_list[index].append(item[1:])
+ if index not in already:
+ already.append(index)
+ cu.append(i)
+ cu.append(segmask.shape[0])
+ max_len = max((len(l) for l in targets_list))
+ segmasks = torch.zeros(batch_size, max_len - 1, segmask.shape[-2], segmask.shape[-1]).cuda()
+ if len(already) != 0:
+ for i in range(len(already)):
+ j = already[i]
+ start = cu[i]
+ end = cu[i+1]
+ segmasks[j, : end - start] = segmask[start: end].clone()
+ targets = torch.from_numpy(np.array(list(map(lambda l:l + [[-1,0,0,0,0]]*(max_len - len(l)), targets_list)))[:,1:,:]).to(targets.device)
+
+ batch_target = targets[:, :, 1:5].mul_(scale_tensor)
+ targets[..., 1:] = xywh2xyxy(batch_target)
+ return targets, segmasks
+
+ def bbox_decode(self, anchor_points, pred_dist):
+ if self.use_dfl:
+ batch_size, n_anchors, _ = pred_dist.shape
+ pred_dist = F.softmax(pred_dist.view(batch_size, n_anchors, 4, self.reg_max + 1), dim=-1).matmul(self.proj.to(pred_dist.device))
+ return dist2bbox(pred_dist, anchor_points)
+
+ def mask_loss(self, gt_segmasks, seg_cf, seg_proto, txyxy_ori_s, fg_mask, idx_lst, target_scores=None, target_scores_sum=None, epoch=0):
+ # pred_mask_lst -> list
+ '''
+ pred_mask -> Shape(n1, w, h)
+ gt_mask -> Shape(n, img_w, img_h)
+ xyxy -> Shape(n, 4)
+ sum(n1, n2, n3, ...) = n
+ torch.abs((xyxy[..., 3] - xyxy[..., 1]) * (xyxy[..., 4] - xyxy[..., 2])) -> area
+ fg_mask --> (bs, tsize)
+ idx -> (bs, tsize)
+ gt_segmasks -> (bs, labelsize, w, h)
+ '''
+ sl = 0
+ sl2 = 0
+ bl = [2, 4, 8]
+ num_pos = fg_mask.sum()
+ tloss = torch.zeros(1).float().cuda()
+ if num_pos<=0:
+ for ipred in seg_proto:
+ tloss += (ipred.sum() * 0.)
+ for ipred in seg_cf:
+ tloss += (ipred.sum() * 0.)
+ return tloss[0]
+
+
+ xyxy_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4])
+ mtarget_scores = target_scores.sum(-1) # (bs, nl, 1)
+
+ sl = 0
+ qf = len(idx_lst) == 1 and len(idx_lst[0].shape) == 2
+ if qf:
+ idx_lst = idx_lst[0]
+ _ = [_i.shape[1] for _i in seg_cf]
+ sp = [2, 4, 8]
+ fpn = []
+ for i in range(0, 3):
+ fpn.extend([sp[i]] * _[i])
+ fpn = torch.Tensor(fpn).unsqueeze(-1).cuda()
+ txyxy_ori = txyxy_ori_s * fpn.unsqueeze(0).repeat(seg_cf[0].shape[0], 1, 1)
+ iseg_cf = torch.cat(seg_cf, axis = 1)
+ iseg_proto = seg_proto[0] # (bs, 32, h, w)
+ bs = iseg_proto.shape[0]
+ if fg_mask.sum()<=0:
+ tloss += (iseg_proto.sum() * 0.)
+ tloss += (iseg_cf.sum() * 0.)
+ return tloss[0]
+
+ pshape = iseg_proto.shape[-1]
+ ishape = iseg_cf.shape[1] # (1) = part_n
+ idx = idx_lst[:, :] # (bs, part_n)
+
+ ifg_mask = fg_mask[:, :] # (n) --> (bs, part_n)
+ itarget_scores = mtarget_scores[:, :]
+ target_sg = []
+ pred_sg = []
+ ixyxy_lst = []
+ mask_weight = []
+ for i in range(bs):
+ siproto = self.handle_proto_coord(iseg_proto[i])
+ iproto = siproto.reshape(1, -1, *siproto.shape[-2:])
+ idx_thisbatch = torch.masked_select(idx[i], ifg_mask[i]) #(casize)
+ igt_segmasks = gt_segmasks.reshape(-1, *gt_segmasks.shape[-2:])[idx_thisbatch] # (?1, h?, w?) --> (?2, h?, w?)
+ imask_weight = torch.masked_select(itarget_scores[i], ifg_mask[i]).unsqueeze(-1)
+ tiseg_cf = torch.masked_select(iseg_cf[i], ifg_mask[i].unsqueeze(-1).repeat(1, self.tt)) # (?2, 32)
+ tiseg_cf = tiseg_cf.reshape(-1, self.tt)
+ num_inst = tiseg_cf.shape[0]
+ if num_inst == 0:
+ tloss[0] += (tiseg_cf.sum() * 0.)
+ continue
+ mask_weight.append(imask_weight)
+ target_sg.append(igt_segmasks)
+ weights, biases = self.parse_dynamic_params(tiseg_cf)
+ n_layers = len(weights)
+ for _i, (weight, bias) in enumerate(zip(weights, biases)):
+ x = F.conv2d(
+ iproto, weight, bias=bias, stride=1, padding=0, groups=1)
+ if _i < n_layers - 1:
+ x = F.relu(x)
+ x = x.reshape(num_inst, *iproto.shape[-2:])
+ ixyxy = torch.masked_select(txyxy_ori[i, :], xyxy_mask[i, :, :]).reshape(-1, 4) # (n, 4) --> (part_n, 4) --> (?2, 4)
+ ixyxy_lst.append(ixyxy)
+ pred_sg.append(x)
+ bxyxy = torch.cat(ixyxy_lst, dim = 0)
+ bpred_seg = torch.cat(pred_sg, dim = 0)
+ bgt_seg = torch.cat(target_sg, dim = 0)
+ masks_weight = torch.cat(mask_weight, dim = 0).reshape(-1)
+ if tuple(bgt_seg.shape[-2:]) != (pshape, pshape): # downsample
+ bgt_seg = F.interpolate(bgt_seg[None], (pshape, pshape), mode='nearest')[0]
+ area = torch.abs((bxyxy[..., 2] - bxyxy[..., 0]) * (bxyxy[..., 3] - bxyxy[..., 1]))
+ area = area / (pshape)
+ area = area / (pshape)
+
+ if not self.dice:
+ loss = F.binary_cross_entropy_with_logits(bpred_seg, bgt_seg, reduction='none')
+ loss = (self.crop_mask(loss, bxyxy).mean(dim=(1, 2)) / area) * masks_weight
+ loss = loss.sum()
+ tloss += loss
+ if target_scores_sum > 1:
+ tloss[0] = tloss[0] / target_scores_sum
+ return tloss[0] / len(seg_cf)
+ else:
+ bpred_seg = bpred_seg.sigmoid()
+ if epoch <= 160:
+ loss = dice_loss(bpred_seg, bgt_seg, masks_weight, reduction='mean', avg_factor=target_scores_sum if target_scores_sum > 1 else 1)
+ else:
+ loss = dice_loss(bpred_seg, bgt_seg, reduction='mean')
+ tloss += loss
+ return tloss[0]
+
+ @staticmethod
+ def crop_mask(masks, boxes):
+ """
+ "Crop" predicted masks by zeroing out everything not in the predicted bbox.
+ Vectorized by Chong (thanks Chong).
+
+ Args:
+ - masks should be a size [n, h, w] tensor of masks
+ - boxes should be a size [n, 4] tensor of bbox coords in relative point form
+ """
+
+ n, h, w = masks.shape
+ x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
+ r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
+ c = torch.arange(h, device=masks.device, dtype=x1.dtype)[None, :, None] # cols shape(h,1,1)
+
+ return masks * ((r >= x1) * (r < x2) * (c >= y1) * (c < y2))
+
+
+class VarifocalLoss(nn.Module):
+ def __init__(self):
+ super(VarifocalLoss, self).__init__()
+
+ def forward(self, pred_score,gt_score, label, alpha=0.75, gamma=2.0):
+
+ weight = alpha * pred_score.pow(gamma) * (1 - label) + gt_score * label
+ with torch.cuda.amp.autocast(enabled=False):
+ loss = (F.binary_cross_entropy(pred_score.float(), gt_score.float(), reduction='none') * weight).sum()
+
+ return loss
+
+
+class BboxLoss(nn.Module):
+ def __init__(self, num_classes, reg_max, use_dfl=False, iou_type='giou'):
+ super(BboxLoss, self).__init__()
+ self.num_classes = num_classes
+ self.iou_loss = IOUloss(box_format='xyxy', iou_type=iou_type, eps=1e-10)
+ self.reg_max = reg_max
+ self.use_dfl = use_dfl
+
+ def forward(self, pred_dist, pred_bboxes, anchor_points,
+ target_bboxes, target_scores, target_scores_sum, fg_mask):
+
+ # select positive samples mask
+ num_pos = fg_mask.sum()
+ if num_pos > 0:
+ # iou loss
+ bbox_mask = fg_mask.unsqueeze(-1).repeat([1, 1, 4])
+ pred_bboxes_pos = torch.masked_select(pred_bboxes,
+ bbox_mask).reshape([-1, 4])
+ target_bboxes_pos = torch.masked_select(
+ target_bboxes, bbox_mask).reshape([-1, 4])
+ bbox_weight = torch.masked_select(
+ target_scores.sum(-1), fg_mask).unsqueeze(-1)
+ loss_iou = self.iou_loss(pred_bboxes_pos,
+ target_bboxes_pos) * bbox_weight
+ if target_scores_sum > 1:
+ loss_iou = loss_iou.sum() / target_scores_sum
+ else:
+ loss_iou = loss_iou.sum()
+
+ # dfl loss
+ if self.use_dfl:
+ dist_mask = fg_mask.unsqueeze(-1).repeat(
+ [1, 1, (self.reg_max + 1) * 4])
+ pred_dist_pos = torch.masked_select(
+ pred_dist, dist_mask).reshape([-1, 4, self.reg_max + 1])
+ target_ltrb = bbox2dist(anchor_points, target_bboxes, self.reg_max)
+ target_ltrb_pos = torch.masked_select(
+ target_ltrb, bbox_mask).reshape([-1, 4])
+ loss_dfl = self._df_loss(pred_dist_pos,
+ target_ltrb_pos) * bbox_weight
+ if target_scores_sum > 1:
+ loss_dfl = loss_dfl.sum() / target_scores_sum
+ else:
+ loss_dfl = loss_dfl.sum()
+ else:
+ loss_dfl = pred_dist.sum() * 0.
+
+ else:
+ loss_iou = pred_dist.sum() * 0.
+ loss_dfl = pred_dist.sum() * 0.
+
+ return loss_iou, loss_dfl
+
+ def _df_loss(self, pred_dist, target):
+ target_left = target.to(torch.long)
+ target_right = target_left + 1
+ weight_left = target_right.to(torch.float) - target
+ weight_right = 1 - weight_left
+ loss_left = F.cross_entropy(
+ pred_dist.view(-1, self.reg_max + 1), target_left.view(-1), reduction='none').view(
+ target_left.shape) * weight_left
+ loss_right = F.cross_entropy(
+ pred_dist.view(-1, self.reg_max + 1), target_right.view(-1), reduction='none').view(
+ target_left.shape) * weight_right
+ return (loss_left + loss_right).mean(-1, keepdim=True)
+
+def dice_loss(pred,
+ target,
+ weight=None,
+ eps=1e-3,
+ reduction='mean',
+ naive_dice=False,
+ avg_factor=None):
+ """Calculate dice loss, there are two forms of dice loss is supported:
+ Borrowed from MMDetection
+ - the one proposed in `V-Net: Fully Convolutional Neural
+ Networks for Volumetric Medical Image Segmentation
+ `_.
+ - the dice loss in which the power of the number in the
+ denominator is the first power instead of the second
+ power.
+
+ Args:
+ pred (torch.Tensor): The prediction, has a shape (n, *)
+ target (torch.Tensor): The learning label of the prediction,
+ shape (n, *), same shape of pred.
+ weight (torch.Tensor, optional): The weight of loss for each
+ prediction, has a shape (n,). Defaults to None.
+ eps (float): Avoid dividing by zero. Default: 1e-3.
+ reduction (str, optional): The method used to reduce the loss into
+ a scalar. Defaults to 'mean'.
+ Options are "none", "mean" and "sum".
+ naive_dice (bool, optional): If false, use the dice
+ loss defined in the V-Net paper, otherwise, use the
+ naive dice loss in which the power of the number in the
+ denominator is the first power instead of the second
+ power.Defaults to False.
+ avg_factor (int, optional): Average factor that is used to average
+ the loss. Defaults to None.
+ """
+
+ input = pred.flatten(1)
+ target = target.flatten(1).float()
+
+ a = torch.sum(input * target, 1)
+ if naive_dice:
+ b = torch.sum(input, 1)
+ c = torch.sum(target, 1)
+ d = (2 * a + eps) / (b + c + eps)
+ else:
+ b = torch.sum(input * input, 1) + eps
+ c = torch.sum(target * target, 1) + eps
+ d = (2 * a) / (b + c)
+
+ loss = 1 - d
+ if weight is not None:
+ assert weight.ndim == loss.ndim
+ assert len(weight) == len(pred)
+ loss = weight_reduce_loss(loss, weight, reduction, avg_factor)
+ return loss
+
+def weight_reduce_loss(loss,
+ weight=None,
+ reduction='none',
+ avg_factor=None):
+ """Apply element-wise weight and reduce loss.
+
+ Args:
+ loss (Tensor): Element-wise loss.
+ weight (Optional[Tensor], optional): Element-wise weights.
+ Defaults to None.
+ reduction (str, optional): Same as built-in losses of PyTorch.
+ Defaults to 'mean'.
+ avg_factor (Optional[float], optional): Average factor when
+ computing the mean of losses. Defaults to None.
+
+ Returns:
+ Tensor: Processed loss values.
+ """
+ # if weight is specified, apply element-wise weight
+ if weight is not None:
+ loss = loss * weight
+
+ # if avg_factor is not specified, just reduce the loss
+ if avg_factor is None:
+ loss = reduce_loss(loss, reduction)
+ else:
+ # if reduction is mean, then average the loss by avg_factor
+ if reduction == 'mean':
+ # Avoid causing ZeroDivisionError when avg_factor is 0.0,
+ # i.e., all labels of an image belong to ignore index.
+ eps = torch.finfo(torch.float32).eps
+ loss = loss.sum() / (avg_factor + eps)
+ # if reduction is 'none', then do nothing, otherwise raise an error
+ elif reduction != 'none':
+ raise ValueError('avg_factor can not be used with reduction="sum"')
+ return loss
+
+def reduce_loss(loss, reduction):
+ """Reduce loss as specified.
+
+ Args:
+ loss (Tensor): Elementwise loss tensor.
+ reduction (str): Options are "none", "mean" and "sum".
+
+ Return:
+ Tensor: Reduced loss tensor.
+ """
+ reduction_enum = F._Reduction.get_enum(reduction)
+ # none: 0, elementwise_mean:1, sum: 2
+ if reduction_enum == 0:
+ return loss
+ elif reduction_enum == 1:
+ return loss.mean()
+ elif reduction_enum == 2:
+ return loss.sum()
\ No newline at end of file
diff --git a/yolov6/models/reppan.py b/yolov6/models/reppan.py
index 2114f521..820f4211 100644
--- a/yolov6/models/reppan.py
+++ b/yolov6/models/reppan.py
@@ -551,22 +551,14 @@ def __init__(
channels_list=None,
num_repeats=None,
block=BottleRep,
- csp_e=float(1)/2,
- stage_block_type="BepC3"
+ csp_e=float(1)/2
):
super().__init__()
- if stage_block_type == "BepC3":
- stage_block = BepC3
- elif stage_block_type == "MBLABlock":
- stage_block = MBLABlock
- else:
- raise NotImplementedError
-
assert channels_list is not None
assert num_repeats is not None
- self.Rep_p4 = stage_block(
+ self.Rep_p4 = BepC3(
in_channels=channels_list[3] + channels_list[5], # 512 + 256
out_channels=channels_list[5], # 256
n=num_repeats[5],
@@ -574,7 +566,7 @@ def __init__(
block=block
)
- self.Rep_p3 = stage_block(
+ self.Rep_p3 = BepC3(
in_channels=channels_list[2] + channels_list[6], # 256 + 128
out_channels=channels_list[6], # 128
n=num_repeats[6],
@@ -582,7 +574,7 @@ def __init__(
block=block
)
- self.Rep_n3 = stage_block(
+ self.Rep_n3 = BepC3(
in_channels=channels_list[6] + channels_list[7], # 128 + 128
out_channels=channels_list[8], # 256
n=num_repeats[7],
@@ -590,7 +582,7 @@ def __init__(
block=block
)
- self.Rep_n4 = stage_block(
+ self.Rep_n4 = BepC3(
in_channels=channels_list[5] + channels_list[9], # 256 + 256
out_channels=channels_list[10], # 512
n=num_repeats[8],
@@ -795,21 +787,13 @@ def __init__(
channels_list=None,
num_repeats=None,
block=BottleRep,
- csp_e=float(1)/2,
- stage_block_type="BepC3"
+ csp_e=float(1)/2
):
super().__init__()
assert channels_list is not None
assert num_repeats is not None
- if stage_block_type == "BepC3":
- stage_block = BepC3
- elif stage_block_type == "MBLABlock":
- stage_block = MBLABlock
- else:
- raise NotImplementedError
-
self.reduce_layer0 = ConvBNReLU(
in_channels=channels_list[5], # 1024
out_channels=channels_list[6], # 512
@@ -822,7 +806,7 @@ def __init__(
out_channels=channels_list[6], # 512
)
- self.Rep_p5 = stage_block(
+ self.Rep_p5 = BepC3(
in_channels=channels_list[4] + channels_list[6], # 768 + 512
out_channels=channels_list[6], # 512
n=num_repeats[6],
@@ -842,7 +826,7 @@ def __init__(
out_channels=channels_list[7] # 256
)
- self.Rep_p4 = stage_block(
+ self.Rep_p4 = BepC3(
in_channels=channels_list[3] + channels_list[7], # 512 + 256
out_channels=channels_list[7], # 256
n=num_repeats[7],
@@ -862,7 +846,7 @@ def __init__(
out_channels=channels_list[8] # 128
)
- self.Rep_p3 = stage_block(
+ self.Rep_p3 = BepC3(
in_channels=channels_list[2] + channels_list[8], # 256 + 128
out_channels=channels_list[8], # 128
n=num_repeats[8],
@@ -877,7 +861,7 @@ def __init__(
stride=2
)
- self.Rep_n4 = stage_block(
+ self.Rep_n4 = BepC3(
in_channels=channels_list[8] + channels_list[8], # 128 + 128
out_channels=channels_list[9], # 256
n=num_repeats[9],
@@ -892,7 +876,7 @@ def __init__(
stride=2
)
- self.Rep_n5 = stage_block(
+ self.Rep_n5 = BepC3(
in_channels=channels_list[7] + channels_list[9], # 256 + 256
out_channels=channels_list[10], # 512
n=num_repeats[10],
@@ -907,7 +891,7 @@ def __init__(
stride=2
)
- self.Rep_n6 = stage_block(
+ self.Rep_n6 = BepC3(
in_channels=channels_list[6] + channels_list[10], # 512 + 512
out_channels=channels_list[11], # 1024
n=num_repeats[11],
@@ -962,21 +946,13 @@ def __init__(
channels_list=None,
num_repeats=None,
block=BottleRep,
- csp_e=float(1)/2,
- stage_block_type="BepC3"
+ csp_e=float(1)/2
):
super().__init__()
assert channels_list is not None
assert num_repeats is not None
- if stage_block_type == "BepC3":
- stage_block = BepC3
- elif stage_block_type == "MBLABlock":
- stage_block = MBLABlock
- else:
- raise NotImplementedError
-
self.reduce_layer0 = ConvBNReLU(
in_channels=channels_list[5], # 1024
out_channels=channels_list[6], # 512
@@ -989,7 +965,7 @@ def __init__(
out_channels=channels_list[6], # 512
)
- self.Rep_p5 = stage_block(
+ self.Rep_p5 = BepC3(
in_channels=channels_list[6], # 512
out_channels=channels_list[6], # 512
n=num_repeats[6],
@@ -1009,7 +985,7 @@ def __init__(
out_channels=channels_list[7], # 256
)
- self.Rep_p4 = stage_block(
+ self.Rep_p4 = BepC3(
in_channels=channels_list[7], # 256
out_channels=channels_list[7], # 256
n=num_repeats[7],
@@ -1029,7 +1005,7 @@ def __init__(
out_channels=channels_list[8], # 128
)
- self.Rep_p3 = stage_block(
+ self.Rep_p3 = BepC3(
in_channels=channels_list[8], # 128
out_channels=channels_list[8], # 128
n=num_repeats[8],
@@ -1044,7 +1020,7 @@ def __init__(
stride=2
)
- self.Rep_n4 = stage_block(
+ self.Rep_n4 = BepC3(
in_channels=channels_list[8] + channels_list[8], # 128 + 128
out_channels=channels_list[9], # 256
n=num_repeats[9],
@@ -1059,7 +1035,7 @@ def __init__(
stride=2
)
- self.Rep_n5 = stage_block(
+ self.Rep_n5 = BepC3(
in_channels=channels_list[7] + channels_list[9], # 256 + 256
out_channels=channels_list[10], # 512
n=num_repeats[10],
@@ -1074,7 +1050,7 @@ def __init__(
stride=2
)
- self.Rep_n6 = stage_block(
+ self.Rep_n6 = BepC3(
in_channels=channels_list[6] + channels_list[10], # 512 + 512
out_channels=channels_list[11], # 1024
n=num_repeats[11],
diff --git a/yolov6/models/yolo.py b/yolov6/models/yolo.py
index 2f37f1b1..5e121b79 100644
--- a/yolov6/models/yolo.py
+++ b/yolov6/models/yolo.py
@@ -63,6 +63,11 @@ def build_network(config, channels, num_classes, num_layers, fuse_ab=False, dist
channels_list_neck = config.model.neck.out_channels
use_dfl = config.model.head.use_dfl
reg_max = config.model.head.reg_max
+ issolo = config.model.head.issolo
+ isseg = config.model.head.isseg
+ npr = config.model.head.npr
+ npr = make_divisible(npr * width_mul, 8)
+ nm = config.model.head.nm
num_repeat = [(max(round(i * depth_mul), 1) if i > 1 else i) for i in (num_repeat_backbone + num_repeat_neck)]
channels_list = [make_divisible(i * width_mul, 8) for i in (channels_list_backbone + channels_list_neck)]
@@ -110,8 +115,20 @@ def build_network(config, channels, num_classes, num_layers, fuse_ab=False, dist
num_repeats=num_repeat,
block=block
)
-
- if distill_ns:
+ if isseg:
+ if issolo:
+ from yolov6.models.heads.effidehead_fuseab_seg_solo import Detect, build_effidehead_layer, Proto
+ anchors_init = config.model.head.anchors_init
+ head_layers = build_effidehead_layer(channels_list, 3, num_classes, reg_max=reg_max, num_layers=num_layers, num_masks=nm + 2 + 1, fuse_ab=fuse_ab)
+ reg_masks = [Proto(num_layers, channels_list, 0, npr, nm, scale_factor=2), Proto(num_layers, channels_list, 1, npr, nm, scale_factor=4), Proto(num_layers, channels_list, 2, npr, nm, scale_factor=8)]
+ head = Detect(num_classes, anchors_init, num_layers, head_layers=head_layers, use_dfl=use_dfl, reg_mask=reg_masks, fuse_ab=fuse_ab, nm=nm + 2 + 1)
+ else:
+ from yolov6.models.heads.effidehead_fuseab_seg import Detect, build_effidehead_layer, Proto
+ anchors_init = config.model.head.anchors_init
+ head_layers = build_effidehead_layer(channels_list, 3, num_classes, reg_max=reg_max, num_layers=num_layers, num_masks=nm, fuse_ab=fuse_ab)
+ reg_masks = [Proto(num_layers, channels_list, 0, npr, nm)]
+ head = Detect(num_classes, anchors_init, num_layers, head_layers=head_layers, use_dfl=use_dfl, reg_mask=reg_masks, fuse_ab=fuse_ab)
+ elif distill_ns:
from yolov6.models.heads.effidehead_distill_ns import Detect, build_effidehead_layer
if num_layers != 3:
LOGGER.error('ERROR in: Distill mode not fit on n/s models with P6 head.\n')
diff --git a/yolov6/utils/general.py b/yolov6/utils/general.py
index cb4418cd..e144f95d 100644
--- a/yolov6/utils/general.py
+++ b/yolov6/utils/general.py
@@ -5,7 +5,6 @@
import math
import torch
import requests
-import pkg_resources as pkg
from pathlib import Path
from yolov6.utils.events import LOGGER
@@ -94,7 +93,6 @@ def download_ckpt(path):
LOGGER.info(f"checkpoint {basename} not exist, try to downloaded it from github.")
# need to update the link with every release
url = f"https://github.com/meituan/YOLOv6/releases/download/0.4.0/{basename}"
- LOGGER.warning(f"downloading url is: {url}, pealse make sure the version of the downloading model is correspoing to the code version!")
r = requests.get(url, allow_redirects=True)
assert r.status_code == 200, "Unable to download checkpoints, manually download it"
open(path, 'wb').write(r.content)
@@ -115,13 +113,3 @@ def check_img_size(imgsz, s=32, floor=0):
if new_size != imgsz:
LOGGER.warning(f'--img-size {imgsz} must be multiple of max stride {s}, updating to {new_size}')
return new_size
-
-
-def check_version(current='0.0.0', minimum='0.0.0', name='version ', pinned=False, hard=False, verbose=False):
- # Check whether the package's version is match the required version.
- current, minimum = (pkg.parse_version(x) for x in (current, minimum))
- result = (current == minimum) if pinned else (current >= minimum) # bool
- if hard:
- info = f'⚠️ {name}{minimum} is required by YOLOv6, but {name}{current} is currently installed'
- assert result, info # assert minimum version requirement
- return result
diff --git a/yolov6/utils/metrics.py b/yolov6/utils/metrics.py
index cbfa130e..c54b4f8a 100644
--- a/yolov6/utils/metrics.py
+++ b/yolov6/utils/metrics.py
@@ -9,8 +9,9 @@
import torch
import warnings
from . import general
+import torch.nn.functional as F
-def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
+def ap_per_class_v6(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), prefix = ''):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
@@ -57,7 +58,7 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
# AP from recall-precision curve
for j in range(tp.shape[1]):
- ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+ ap[ci, j], mpre, mrec = compute_ap_v6(recall[:, j], precision[:, j])
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
@@ -71,8 +72,112 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names
# i = f1.mean(0).argmax() # max F1 index
# return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')
- return p, r, ap, f1, unique_classes.astype('int32')
+ AP50_F1_max_idx = len(f1.mean(0)) - f1.mean(0)[::-1].argmax() -1
+ ap50, ap = ap[:, 0], ap.mean(1)
+ mp, mr, map50, map = p[:, AP50_F1_max_idx].mean(), r[:, AP50_F1_max_idx].mean(), ap50.mean(), ap.mean()
+ return mp, mr, map50, map, AP50_F1_max_idx
+def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=(), eps=1e-16, prefix=''):
+ """ Compute the average precision, given the recall and precision curves.
+ Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
+ # Arguments
+ tp: True positives (nparray, nx1 or nx10).
+ conf: Objectness value from 0-1 (nparray).
+ pred_cls: Predicted object classes (nparray).
+ target_cls: True object classes (nparray).
+ plot: Plot precision-recall curve at mAP@0.5
+ save_dir: Plot save directory
+ # Returns
+ The average precision as computed in py-faster-rcnn.
+ """
+
+ # Sort by objectness
+ i = np.argsort(-conf)
+ tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
+
+ # Find unique classes
+ unique_classes, nt = np.unique(target_cls, return_counts=True)
+ nc = unique_classes.shape[0] # number of classes, number of detections
+
+ # Create Precision-Recall curve and compute AP for each class
+ px, py = np.linspace(0, 1, 1000), [] # for plotting
+ ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
+ for ci, c in enumerate(unique_classes):
+ i = pred_cls == c
+ n_l = nt[ci] # number of labels
+ n_p = i.sum() # number of predictions
+ if n_p == 0 or n_l == 0:
+ continue
+
+ # Accumulate FPs and TPs
+ fpc = (1 - tp[i]).cumsum(0)
+ tpc = tp[i].cumsum(0)
+
+ # Recall
+ recall = tpc / (n_l + eps) # recall curve
+ r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases
+
+ # Precision
+ precision = tpc / (tpc + fpc) # precision curve
+ p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score
+
+ # AP from recall-precision curve
+ for j in range(tp.shape[1]):
+ ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
+ if plot and j == 0:
+ py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5
+
+ # Compute F1 (harmonic mean of precision and recall)
+ f1 = 2 * p * r / (p + r + eps)
+ plot = False
+ if plot:
+ names = [v for k, v in names.items() if k in unique_classes] # list: only classes that have data
+ names = dict(enumerate(names)) # to dict
+ plot_pr_curve(px, py, ap, Path(save_dir) / f'{prefix}PR_curve.png', names)
+ plot_mc_curve(px, f1, Path(save_dir) / f'{prefix}F1_curve.png', names, ylabel='F1')
+ plot_mc_curve(px, p, Path(save_dir) / f'{prefix}P_curve.png', names, ylabel='Precision')
+ plot_mc_curve(px, r, Path(save_dir) / f'{prefix}R_curve.png', names, ylabel='Recall')
+
+ i = smooth(f1.mean(0), 0.1).argmax() # max F1 index
+ p, r, f1 = p[:, i], r[:, i], f1[:, i]
+ tp = (r * nt).round() # true positives
+ fp = (tp / (p + eps) - tp).round() # false positives
+ return tp, fp, p, r, f1, ap, unique_classes.astype(int)
+
+def smooth(y, f=0.05):
+ # Box filter of fraction f
+ nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
+ p = np.ones(nf // 2) # ones padding
+ yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
+ return np.convolve(yp, np.ones(nf) / nf, mode='valid') # y-smoothed
+
+
+def compute_ap_v6(recall, precision):
+ """ Compute the average precision, given the recall and precision curves
+ # Arguments
+ recall: The recall curve (list)
+ precision: The precision curve (list)
+ # Returns
+ Average precision, precision curve, recall curve
+ """
+
+ # Append sentinel values to beginning and end
+ mrec = np.concatenate(([0.0], recall, [1.0]))
+ mpre = np.concatenate(([1.0], precision, [0.0]))
+
+ # Compute the precision envelope
+ mpre = np.flip(np.maximum.accumulate(np.flip(mpre)))
+
+ # Integrate area under curve
+ method = 'interp' # methods: 'continuous', 'interp'
+ if method == 'interp':
+ x = np.linspace(0, 1, 101) # 101-point interp (COCO)
+ ap = np.trapz(np.interp(x, mrec, mpre), x) # integrate
+ else: # 'continuous'
+ i = np.where(mrec[1:] != mrec[:-1])[0] # points where x axis (recall) changes
+ ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) # area under curve
+
+ return ap, mpre, mrec
def compute_ap(recall, precision):
""" Compute the average precision, given the recall and precision curves
@@ -101,7 +206,6 @@ def compute_ap(recall, precision):
return ap, mpre, mrec
-# Plots ----------------------------------------------------------------------------------------------------------------
def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
# Precision-recall curve
@@ -142,17 +246,54 @@ def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence'
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)
-def process_batch(detections, labels, iouv):
+# def process_batch(detections, labels, iouv):
+# """
+# Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+# Arguments:
+# detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+# labels (Array[M, 5]), class, x1, y1, x2, y2
+# Returns:
+# correct (Array[N, 10]), for 10 IoU levels
+# """
+# correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+# iou = general.box_iou(labels[:, 1:], detections[:, :4])
+# correct_class = labels[:, 0:1] == detections[:, 5]
+# for i in range(len(iouv)):
+# x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
+# if x[0].shape[0]:
+# matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
+# if x[0].shape[0] > 1:
+# matches = matches[matches[:, 2].argsort()[::-1]]
+# matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+# # matches = matches[matches[:, 2].argsort()[::-1]]
+# matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+# correct[matches[:, 1].astype(int), i] = True
+# return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
+
+def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
"""
- Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+ Return correct prediction matrix
Arguments:
- detections (Array[N, 6]), x1, y1, x2, y2, conf, class
- labels (Array[M, 5]), class, x1, y1, x2, y2
+ detections (array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (array[M, 5]), class, x1, y1, x2, y2
Returns:
- correct (Array[N, 10]), for 10 IoU levels
+ correct (array[N, 10]), for 10 IoU levels
"""
+ if masks:
+ gt_masks = gt_masks.to(pred_masks.device)
+ if overlap:
+ nl = len(labels)
+ index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+ gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
+ gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+ if gt_masks.shape[1:] != pred_masks.shape[1:]:
+ gt_masks = F.interpolate(gt_masks[None].to(torch.float32), pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
+ gt_masks = gt_masks.gt_(0.5)
+ iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1).float(), pred_masks.view(pred_masks.shape[0], -1)).to(iouv.device)
+ else: # boxes
+ iou = box_iou(labels[:, 1:], detections[:, :4]).to(iouv.device)
+
correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
- iou = general.box_iou(labels[:, 1:], detections[:, :4])
correct_class = labels[:, 0:1] == detections[:, 5]
for i in range(len(iouv)):
x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
@@ -256,3 +397,232 @@ def plot(self, normalize=True, save_dir='', names=()):
def print(self):
for i in range(self.nc + 1):
print(' '.join(map(str, self.matrix[i])))
+
+
+def ap_per_class_box_and_mask(
+ tp_m,
+ tp_b,
+ conf,
+ pred_cls,
+ target_cls,
+ plot=False,
+ save_dir='.',
+ names=(),
+ is_v6=False
+):
+ """
+ Args:
+ tp_b: tp of boxes.
+ tp_m: tp of masks.
+ other arguments see `func: ap_per_class`.
+ #return p, r, ap, f1, unique_classes.astype('int32')
+ """
+ if not is_v6:
+ results_boxes = ap_per_class(tp_b,
+ conf,
+ pred_cls,
+ target_cls,
+ plot=plot,
+ save_dir=save_dir,
+ names=names,
+ prefix='Box')[2:]
+ results_masks = ap_per_class(tp_m,
+ conf,
+ pred_cls,
+ target_cls,
+ plot=plot,
+ save_dir=save_dir,
+ names=names,
+ prefix='Mask')[2:]
+
+ results = {
+ 'boxes': {
+ 'p': results_boxes[0],
+ 'r': results_boxes[1],
+ 'ap': results_boxes[3],
+ 'f1': results_boxes[2],
+ 'ap_class': results_boxes[4]},
+ 'masks': {
+ 'p': results_masks[0],
+ 'r': results_masks[1],
+ 'ap': results_masks[3],
+ 'f1': results_masks[2],
+ 'ap_class': results_masks[4]}}
+ return results
+ else:
+ results_boxes = ap_per_class_v6(tp_b,
+ conf,
+ pred_cls,
+ target_cls,
+ plot=plot,
+ save_dir=save_dir,
+ names=names,
+ prefix='Box')
+ results_masks = ap_per_class(tp_m,
+ conf,
+ pred_cls,
+ target_cls,
+ plot=plot,
+ save_dir=save_dir,
+ names=names,
+ prefix='Mask')
+ return results_boxes, results_masks
+
+class Metric:
+
+ def __init__(self) -> None:
+ self.p = [] # (nc, )
+ self.r = [] # (nc, )
+ self.f1 = [] # (nc, )
+ self.all_ap = [] # (nc, 10)
+ self.ap_class_index = [] # (nc, )
+
+ @property
+ def ap50(self):
+ """AP@0.5 of all classes.
+ Return:
+ (nc, ) or [].
+ """
+ return self.all_ap[:, 0] if len(self.all_ap) else []
+
+ @property
+ def ap(self):
+ """AP@0.5:0.95
+ Return:
+ (nc, ) or [].
+ """
+ return self.all_ap.mean(1) if len(self.all_ap) else []
+
+ @property
+ def mp(self):
+ """mean precision of all classes.
+ Return:
+ float.
+ """
+ return self.p.mean() if len(self.p) else 0.0
+
+ @property
+ def mr(self):
+ """mean recall of all classes.
+ Return:
+ float.
+ """
+ return self.r.mean() if len(self.r) else 0.0
+
+ @property
+ def map50(self):
+ """Mean AP@0.5 of all classes.
+ Return:
+ float.
+ """
+ return self.all_ap[:, 0].mean() if len(self.all_ap) else 0.0
+
+ @property
+ def map(self):
+ """Mean AP@0.5:0.95 of all classes.
+ Return:
+ float.
+ """
+ return self.all_ap.mean() if len(self.all_ap) else 0.0
+
+ def mean_results(self):
+ """Mean of results, return mp, mr, map50, map"""
+ return (self.mp, self.mr, self.map50, self.map)
+
+ def class_result(self, i):
+ """class-aware result, return p[i], r[i], ap50[i], ap[i]"""
+ return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
+
+ def get_maps(self, nc):
+ maps = np.zeros(nc) + self.map
+ for i, c in enumerate(self.ap_class_index):
+ maps[c] = self.ap[i]
+ return maps
+
+ def update(self, results):
+ """
+ Args:
+ results: tuple(p, r, ap, f1, ap_class)
+ """
+ p, r, all_ap, f1, ap_class_index = results
+ self.p = p
+ self.r = r
+ self.all_ap = all_ap
+ self.f1 = f1
+ self.ap_class_index = ap_class_index
+
+
+class Metrics:
+ """Metric for boxes and masks."""
+
+ def __init__(self) -> None:
+ self.metric_box = Metric()
+ self.metric_mask = Metric()
+
+ def update(self, results):
+ """
+ Args:
+ results: Dict{'boxes': Dict{}, 'masks': Dict{}}
+ """
+ self.metric_box.update(list(results['boxes'].values()))
+ self.metric_mask.update(list(results['masks'].values()))
+
+ def mean_results(self):
+ return self.metric_box.mean_results() + self.metric_mask.mean_results()
+
+ def class_result(self, i):
+ return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
+
+ def get_maps(self, nc):
+ return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
+
+ @property
+ def ap_class_index(self):
+ # boxes and masks have the same ap_class_index
+ return self.metric_box.ap_class_index
+
+def mask_iou(mask1, mask2, eps=1e-7):
+ """
+ mask1: [N, n] m1 means number of predicted objects
+ mask2: [M, n] m2 means number of gt objects
+ Note: n means image_w x image_h
+
+ return: masks iou, [N, M]
+ """
+ mask1 = mask1.float()
+ intersection = torch.matmul(mask1, mask2.t()).clamp(0)
+ union = (mask1.sum(1)[:, None] + mask2.sum(1)[None]) - intersection # (area1 + area2) - intersection
+ return intersection / (union + eps)
+
+
+def masks_iou(mask1, mask2, eps=1e-7):
+ """
+ mask1: [N, n] m1 means number of predicted objects
+ mask2: [N, n] m2 means number of gt objects
+ Note: n means image_w x image_h
+
+ return: masks iou, (N, )
+ """
+ intersection = (mask1 * mask2).sum(1).clamp(0) # (N, )
+ union = (mask1.sum(1) + mask2.sum(1))[None] - intersection # (area1 + area2) - intersection
+ return intersection / (union + eps)
+
+def box_iou(box1, box2, eps=1e-7):
+ # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+ """
+ Return intersection-over-union (Jaccard index) of boxes.
+ Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+ Arguments:
+ box1 (Tensor[N, 4])
+ box2 (Tensor[M, 4])
+ Returns:
+ iou (Tensor[N, M]): the NxM matrix containing the pairwise
+ IoU values for every element in boxes1 and boxes2
+ """
+
+ # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+ (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
+ inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
+
+ # IoU = inter / (area1 + area2 - inter)
+ return inter / ((a2 - a1).prod(2) + (b2 - b1).prod(2) - inter + eps)
\ No newline at end of file
diff --git a/yolov6/utils/nms.py b/yolov6/utils/nms.py
index 0f812642..c7369ba0 100644
--- a/yolov6/utils/nms.py
+++ b/yolov6/utils/nms.py
@@ -103,3 +103,164 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
break # time limit exceeded
return output
+
+
+def non_max_suppression_seg(predictions, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+ """Runs Non-Maximum Suppression (NMS) on inference results.
+ This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+ Args:
+ prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+ conf_thres: (float) confidence threshold.
+ iou_thres: (float) iou threshold.
+ classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+ agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+ multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+ max_det:(int), max number of output bboxes.
+
+ Returns:
+ list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+ """
+ prediction = predictions[0]
+ confs = predictions[2] # (bs, which_proto, fs)
+ prediction = torch.cat([prediction, confs], axis=2)# (bs, l ,5 + num_classes + 33)
+
+ num_classes = prediction.shape[2] - 5 - 33 # number of classes
+ pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5: 5 + num_classes], axis=-1)[0] > conf_thres) # candidates
+ # Check the parameters.
+ assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+ assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+
+ # Function settings.
+ max_wh = 4096 # maximum box width and height
+ max_nms = 30000 # maximum number of boxes put into torchvision.ops.nms()
+ time_limit = 10.0 # quit the function when nms cost time exceed the limit time.
+ multi_label &= num_classes > 1 # multiple labels per box
+
+ tik = time.time()
+ output = [torch.zeros((0, 6 + 33), device=prediction.device)] * prediction.shape[0]
+ for img_idx, x in enumerate(prediction): # image index, image inference
+ x = x[pred_candidates[img_idx]] # confidence
+
+ # If no box remains, skip the next process.
+ if not x.shape[0]:
+ continue
+
+ # confidence multiply the objectness
+ x[:, 5: 5 + num_classes] *= x[:, 4:5] # conf = obj_conf * cls_conf
+
+ # (center x, center y, width, height) to (x1, y1, x2, y2)
+ box = xywh2xyxy(x[:, :4])
+ segconf = x[:, 5 + num_classes: ]
+
+ # Detections matrix's shape is (n,6), each row represents (xyxy, conf, cls)
+ if multi_label:
+ box_idx, class_idx = (x[:, 5: 5 + num_classes] > conf_thres).nonzero(as_tuple=False).T
+ x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float(), segconf[box_idx]), 1)
+ else: # Only keep the class with highest scores.
+ conf, class_idx = x[:, 5: 5 + num_classes].max(1, keepdim=True)
+ x = torch.cat((box, conf, class_idx.float(), segconf), 1)[conf.view(-1) > conf_thres]
+
+ # Filter by class, only keep boxes whose category is in classes.
+ if classes is not None:
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+ # Check shape
+ num_box = x.shape[0] # number of boxes
+ if not num_box: # no boxes kept.
+ continue
+ elif num_box > max_nms: # excess max boxes' number.
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
+
+ # Batched NMS
+ class_offset = x[:, 5:6] * (0 if agnostic else max_wh) # classes
+ boxes, scores = x[:, :4] + class_offset, x[:, 4] # boxes (offset by class), scores
+ keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
+ if keep_box_idx.shape[0] > max_det: # limit detections
+ keep_box_idx = keep_box_idx[:max_det]
+
+ output[img_idx] = x[keep_box_idx]
+ if (time.time() - tik) > time_limit:
+ print(f'WARNING: NMS cost time exceed the limited {time_limit}s.')
+ break # time limit exceeded
+
+ return output
+
+def non_max_suppression_seg_solo(predictions, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300):
+ """Runs Non-Maximum Suppression (NMS) on inference results.
+ This code is borrowed from: https://github.com/ultralytics/yolov5/blob/47233e1698b89fc437a4fb9463c815e9171be955/utils/general.py#L775
+ Args:
+ prediction: (tensor), with shape [N, 5 + num_classes], N is the number of bboxes.
+ conf_thres: (float) confidence threshold.
+ iou_thres: (float) iou threshold.
+ classes: (None or list[int]), if a list is provided, nms only keep the classes you provide.
+ agnostic: (bool), when it is set to True, we do class-independent nms, otherwise, different class would do nms respectively.
+ multi_label: (bool), when it is set to True, one box can have multi labels, otherwise, one box only huave one label.
+ max_det:(int), max number of output bboxes.
+
+ Returns:
+ list of detections, echo item is one tensor with shape (num_boxes, 6), 6 is for [xyxy, conf, cls].
+ """
+ prediction = predictions[0]
+ confs = predictions[2] # (bs, which_proto, fs)
+ prediction = torch.cat([prediction, confs], axis=2)# (bs, l ,5 + num_classes + 68)
+
+ num_classes = prediction.shape[2] - 5 - 68 # number of classes
+ pred_candidates = torch.logical_and(prediction[..., 4] > conf_thres, torch.max(prediction[..., 5: 5 + num_classes], axis=-1)[0] > conf_thres) # candidates
+ # Check the parameters.
+ assert 0 <= conf_thres <= 1, f'conf_thresh must be in 0.0 to 1.0, however {conf_thres} is provided.'
+ assert 0 <= iou_thres <= 1, f'iou_thres must be in 0.0 to 1.0, however {iou_thres} is provided.'
+
+ # Function settings.
+ max_wh = 4096 # maximum box width and height
+ max_nms = 30000 # maximum number of boxes put into torchvision.ops.nms()
+ time_limit = 10.0 # quit the function when nms cost time exceed the limit time.
+ multi_label &= num_classes > 1 # multiple labels per box
+
+ tik = time.time()
+ output = [torch.zeros((0, 6 + 68), device=prediction.device)] * prediction.shape[0]
+ for img_idx, x in enumerate(prediction): # image index, image inference
+ x = x[pred_candidates[img_idx]] # confidence
+
+ # If no box remains, skip the next process.
+ if not x.shape[0]:
+ continue
+
+ # confidence multiply the objectness
+ x[:, 5: 5 + num_classes] *= x[:, 4:5] # conf = obj_conf * cls_conf
+
+ # (center x, center y, width, height) to (x1, y1, x2, y2)
+ box = xywh2xyxy(x[:, :4])
+ segconf = x[:, 5 + num_classes: ]
+
+ # Detections matrix's shape is (n,6), each row represents (xyxy, conf, cls)
+ if multi_label:
+ box_idx, class_idx = (x[:, 5: 5 + num_classes] > conf_thres).nonzero(as_tuple=False).T
+ x = torch.cat((box[box_idx], x[box_idx, class_idx + 5, None], class_idx[:, None].float(), segconf[box_idx]), 1)
+ else: # Only keep the class with highest scores.
+ conf, class_idx = x[:, 5: 5 + num_classes].max(1, keepdim=True)
+ x = torch.cat((box, conf, class_idx.float(), segconf), 1)[conf.view(-1) > conf_thres]
+
+ # Filter by class, only keep boxes whose category is in classes.
+ if classes is not None:
+ x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+ # Check shape
+ num_box = x.shape[0] # number of boxes
+ if not num_box: # no boxes kept.
+ continue
+ elif num_box > max_nms: # excess max boxes' number.
+ x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
+
+ # Batched NMS
+ class_offset = x[:, 5:6] * (0 if agnostic else max_wh) # classes
+ boxes, scores = x[:, :4] + class_offset, x[:, 4] # boxes (offset by class), scores
+ keep_box_idx = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
+ if keep_box_idx.shape[0] > max_det: # limit detections
+ keep_box_idx = keep_box_idx[:max_det]
+
+ output[img_idx] = x[keep_box_idx]
+ if (time.time() - tik) > time_limit:
+ print(f'WARNING: NMS cost time exceed the limited {time_limit}s.')
+ break # time limit exceeded
+
+ return output
diff --git a/yolov6/utils/test1.py b/yolov6/utils/test1.py
new file mode 100644
index 00000000..246494f2
--- /dev/null
+++ b/yolov6/utils/test1.py
@@ -0,0 +1,23 @@
+def process_batch(detections, labels, iouv):
+ """
+ Return correct predictions matrix. Both sets of boxes are in (x1, y1, x2, y2) format.
+ Arguments:
+ detections (Array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (Array[M, 5]), class, x1, y1, x2, y2
+ Returns:
+ correct (Array[N, 10]), for 10 IoU levels
+ """
+ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+ iou = general.box_iou(labels[:, 1:], detections[:, :4])
+ correct_class = labels[:, 0:1] == detections[:, 5]
+ for i in range(len(iouv)):
+ x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
+ if x[0].shape[0]:
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
+ if x[0].shape[0] > 1:
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+ # matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+ correct[matches[:, 1].astype(int), i] = True
+ return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
\ No newline at end of file
diff --git a/yolov6/utils/test2.py b/yolov6/utils/test2.py
new file mode 100644
index 00000000..f21ad021
--- /dev/null
+++ b/yolov6/utils/test2.py
@@ -0,0 +1,37 @@
+def process_batch(detections, labels, iouv, pred_masks=None, gt_masks=None, overlap=False, masks=False):
+ """
+ Return correct prediction matrix
+ Arguments:
+ detections (array[N, 6]), x1, y1, x2, y2, conf, class
+ labels (array[M, 5]), class, x1, y1, x2, y2
+ Returns:
+ correct (array[N, 10]), for 10 IoU levels
+ """
+ #breakpoint()
+ if masks:
+ gt_masks = gt_masks.to(pred_masks.device)
+ if overlap:
+ nl = len(labels)
+ index = torch.arange(nl, device=gt_masks.device).view(nl, 1, 1) + 1
+ gt_masks = gt_masks.repeat(nl, 1, 1) # shape(1,640,640) -> (n,640,640)
+ gt_masks = torch.where(gt_masks == index, 1.0, 0.0)
+ if gt_masks.shape[1:] != pred_masks.shape[1:]:
+ gt_masks = F.interpolate(gt_masks[None].to(torch.float32), pred_masks.shape[1:], mode='bilinear', align_corners=False)[0]
+ gt_masks = gt_masks.gt_(0.5)
+ iou = mask_iou(gt_masks.view(gt_masks.shape[0], -1), pred_masks.view(pred_masks.shape[0], -1)).to(iouv.device)
+ else: # boxes
+ iou = box_iou(labels[:, 1:], detections[:, :4]).to(iouv.device)
+
+ correct = np.zeros((detections.shape[0], iouv.shape[0])).astype(bool)
+ correct_class = labels[:, 0:1] == detections[:, 5]
+ for i in range(len(iouv)):
+ x = torch.where((iou >= iouv[i]) & correct_class) # IoU > threshold and classes match
+ if x[0].shape[0]:
+ matches = torch.cat((torch.stack(x, 1), iou[x[0], x[1]][:, None]), 1).cpu().numpy() # [label, detect, iou]
+ if x[0].shape[0] > 1:
+ matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 1], return_index=True)[1]]
+ # matches = matches[matches[:, 2].argsort()[::-1]]
+ matches = matches[np.unique(matches[:, 0], return_index=True)[1]]
+ correct[matches[:, 1].astype(int), i] = True
+ return torch.tensor(correct, dtype=torch.bool, device=iouv.device)
\ No newline at end of file