From 627e00cb14de3ef043327b3c7881597ccf83e691 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Mon, 6 Nov 2023 16:29:19 +0800
Subject: [PATCH] Support ODinW and evaluate (#11105)

---
 ...ss_swin-t_a_fpn_dyhead_pretrain_odinw13.py | 338 ++++++++
 ...ss_swin-t_a_fpn_dyhead_pretrain_odinw35.py | 771 ++++++++++++++++++
 configs/odinw/override_category.py            |  96 +++
 docs/zh_cn/user_guides/dataset_prepare.md     |  55 ++
 mmdet/datasets/base_det_dataset.py            |   7 +
 mmdet/datasets/coco.py                        |   1 +
 mmdet/datasets/dataset_wrappers.py            |   8 +
 mmdet/evaluation/__init__.py                  |   1 +
 mmdet/evaluation/evaluator/__init__.py        |   4 +
 .../evaluator/multi_datasets_evaluator.py     | 111 +++
 mmdet/models/detectors/glip.py                |  81 +-
 11 files changed, 1452 insertions(+), 21 deletions(-)
 create mode 100644 configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw13.py
 create mode 100644 configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw35.py
 create mode 100644 configs/odinw/override_category.py
 create mode 100644 mmdet/evaluation/evaluator/__init__.py
 create mode 100644 mmdet/evaluation/evaluator/multi_datasets_evaluator.py

diff --git a/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw13.py b/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw13.py
new file mode 100644
index 00000000000..6c2cc0c6f09
--- /dev/null
+++ b/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw13.py
@@ -0,0 +1,338 @@
+_base_ = '../glip/glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365.py'
+
+dataset_type = 'CocoDataset'
+data_root = 'data/odinw/'
+
+base_test_pipeline = _base_.test_pipeline
+base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape',
+                                       'img_shape', 'scale_factor', 'text',
+                                       'custom_entities', 'caption_prompt')
+
+# ---------------------1 AerialMaritimeDrone---------------------#
+class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'AerialMaritimeDrone/large/'
+dataset_AerialMaritimeDrone = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    test_mode=True,
+    pipeline=base_test_pipeline,
+    return_classes=True)
+val_evaluator_AerialMaritimeDrone = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------2 Aquarium---------------------#
+class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
+              'stingray')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
+
+caption_prompt = None
+# caption_prompt = {
+#     'penguin': {
+#         'suffix': ', which is black and white'
+#     },
+#     'puffin': {
+#         'suffix': ' with orange beaks'
+#     },
+#     'stingray': {
+#         'suffix': ' which is flat and round'
+#     },
+# }
+dataset_Aquarium = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    caption_prompt=caption_prompt,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Aquarium = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------3 CottontailRabbits---------------------#
+class_name = ('Cottontail-Rabbit', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'CottontailRabbits/'
+
+caption_prompt = None
+# caption_prompt = {'Cottontail-Rabbit': {'name': 'rabbit'}}
+
+dataset_CottontailRabbits = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    caption_prompt=caption_prompt,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_CottontailRabbits = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------4 EgoHands---------------------#
+class_name = ('hand', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'EgoHands/generic/'
+
+caption_prompt = None
+# caption_prompt = {'hand': {'suffix': ' of a person'}}
+
+dataset_EgoHands = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    caption_prompt=caption_prompt,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_EgoHands = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------5 NorthAmericaMushrooms---------------------#
+class_name = ('CoW', 'chanterelle')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/'  # noqa
+
+caption_prompt = None
+# caption_prompt = {
+#     'CoW': {
+#         'name': 'flat mushroom'
+#     },
+#     'chanterelle': {
+#         'name': 'yellow mushroom'
+#     }
+# }
+
+dataset_NorthAmericaMushrooms = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    caption_prompt=caption_prompt,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_NorthAmericaMushrooms = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------6 Packages---------------------#
+class_name = ('package', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Packages/Raw/'
+
+caption_prompt = None
+# caption_prompt = {
+#     'package': {
+#         'prefix': 'there is a ',
+#         'suffix': ' on the porch'
+#     }
+# }
+
+dataset_Packages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    caption_prompt=caption_prompt,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Packages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------7 PascalVOC---------------------#
+class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
+              'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+              'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+              'tvmonitor')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'PascalVOC/'
+dataset_PascalVOC = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_PascalVOC = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------8 pistols---------------------#
+class_name = ('pistol', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'pistols/export/'
+dataset_pistols = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='val_annotations_without_background.json',
+    data_prefix=dict(img=''),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_pistols = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'val_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------9 pothole---------------------#
+class_name = ('pothole', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'pothole/'
+
+caption_prompt = None
+# caption_prompt = {
+#     'pothole': {
+#         'prefix': 'there are some ',
+#         'name': 'holes',
+#         'suffix': ' on the road'
+#     }
+# }
+
+dataset_pothole = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_pothole = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------10 Raccoon---------------------#
+class_name = ('raccoon', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
+dataset_Raccoon = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Raccoon = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------11 ShellfishOpenImages---------------------#
+class_name = ('Crab', 'Lobster', 'Shrimp')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'ShellfishOpenImages/raw/'
+dataset_ShellfishOpenImages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_ShellfishOpenImages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------12 thermalDogsAndPeople---------------------#
+class_name = ('dog', 'person')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'thermalDogsAndPeople/'
+dataset_thermalDogsAndPeople = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_thermalDogsAndPeople = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------13 VehiclesOpenImages---------------------#
+class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'VehiclesOpenImages/416x416/'
+dataset_VehiclesOpenImages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=base_test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_VehiclesOpenImages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# --------------------- Config---------------------#
+dataset_prefixes = [
+    'AerialMaritimeDrone', 'Aquarium', 'CottontailRabbits', 'EgoHands',
+    'NorthAmericaMushrooms', 'Packages', 'PascalVOC', 'pistols', 'pothole',
+    'Raccoon', 'ShellfishOpenImages', 'thermalDogsAndPeople',
+    'VehiclesOpenImages'
+]
+datasets = [
+    dataset_AerialMaritimeDrone, dataset_Aquarium, dataset_CottontailRabbits,
+    dataset_EgoHands, dataset_NorthAmericaMushrooms, dataset_Packages,
+    dataset_PascalVOC, dataset_pistols, dataset_pothole, dataset_Raccoon,
+    dataset_ShellfishOpenImages, dataset_thermalDogsAndPeople,
+    dataset_VehiclesOpenImages
+]
+metrics = [
+    val_evaluator_AerialMaritimeDrone, val_evaluator_Aquarium,
+    val_evaluator_CottontailRabbits, val_evaluator_EgoHands,
+    val_evaluator_NorthAmericaMushrooms, val_evaluator_Packages,
+    val_evaluator_PascalVOC, val_evaluator_pistols, val_evaluator_pothole,
+    val_evaluator_Raccoon, val_evaluator_ShellfishOpenImages,
+    val_evaluator_thermalDogsAndPeople, val_evaluator_VehiclesOpenImages
+]
+
+# -------------------------------------------------#
+val_dataloader = dict(
+    dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    _delete_=True,
+    type='MultiDatasetsEvaluator',
+    metrics=metrics,
+    dataset_prefixes=dataset_prefixes)
+test_evaluator = val_evaluator
diff --git a/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw35.py b/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw35.py
new file mode 100644
index 00000000000..292c5ec1639
--- /dev/null
+++ b/configs/odinw/glip_atss_swin-t_a_fpn_dyhead_pretrain_odinw35.py
@@ -0,0 +1,771 @@
+_base_ = '../glip/glip_atss_swin-t_a_fpn_dyhead_pretrain_obj365.py'
+
+dataset_type = 'CocoDataset'
+data_root = 'data/odinw/'
+
+# ---------------------1 AerialMaritimeDrone_large---------------------#
+class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'AerialMaritimeDrone/large/'
+dataset_AerialMaritimeDrone_large = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_AerialMaritimeDrone_large = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------2 AerialMaritimeDrone_tiled---------------------#
+class_name = ('boat', 'car', 'dock', 'jetski', 'lift')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'AerialMaritimeDrone/tiled/'
+dataset_AerialMaritimeDrone_tiled = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_AerialMaritimeDrone_tiled = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------3 AmericanSignLanguageLetters---------------------#
+class_name = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
+              'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'AmericanSignLanguageLetters/American Sign Language Letters.v1-v1.coco/'  # noqa
+dataset_AmericanSignLanguageLetters = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_AmericanSignLanguageLetters = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------4 Aquarium---------------------#
+class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish',
+              'stingray')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/'
+dataset_Aquarium = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Aquarium = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------5 BCCD---------------------#
+class_name = ('Platelets', 'RBC', 'WBC')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'BCCD/BCCD.v3-raw.coco/'
+dataset_BCCD = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_BCCD = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------6 boggleBoards---------------------#
+class_name = ('Q', 'a', 'an', 'b', 'c', 'd', 'e', 'er', 'f', 'g', 'h', 'he',
+              'i', 'in', 'j', 'k', 'l', 'm', 'n', 'o', 'o ', 'p', 'q', 'qu',
+              'r', 's', 't', 't\\', 'th', 'u', 'v', 'w', 'wild', 'x', 'y', 'z')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'boggleBoards/416x416AutoOrient/export/'
+dataset_boggleBoards = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='val_annotations_without_background.json',
+    data_prefix=dict(img=''),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_boggleBoards = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'val_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------7 brackishUnderwater---------------------#
+class_name = ('crab', 'fish', 'jellyfish', 'shrimp', 'small_fish', 'starfish')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'brackishUnderwater/960x540/'
+dataset_brackishUnderwater = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_brackishUnderwater = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------8 ChessPieces---------------------#
+class_name = ('  ', 'black bishop', 'black king', 'black knight', 'black pawn',
+              'black queen', 'black rook', 'white bishop', 'white king',
+              'white knight', 'white pawn', 'white queen', 'white rook')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/'
+dataset_ChessPieces = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/new_annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_ChessPieces = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/new_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------9 CottontailRabbits---------------------#
+class_name = ('Cottontail-Rabbit', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'CottontailRabbits/'
+dataset_CottontailRabbits = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_CottontailRabbits = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------10 dice---------------------#
+class_name = ('1', '2', '3', '4', '5', '6')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'dice/mediumColor/export/'
+dataset_dice = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='val_annotations_without_background.json',
+    data_prefix=dict(img=''),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_dice = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'val_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------11 DroneControl---------------------#
+class_name = ('follow', 'follow_hand', 'land', 'land_hand', 'null', 'object',
+              'takeoff', 'takeoff-hand')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'DroneControl/Drone Control.v3-raw.coco/'
+dataset_DroneControl = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_DroneControl = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------12 EgoHands_generic---------------------#
+class_name = ('hand', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'EgoHands/generic/'
+dataset_EgoHands_generic = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_EgoHands_generic = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------13 EgoHands_specific---------------------#
+class_name = ('myleft', 'myright', 'yourleft', 'yourright')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'EgoHands/specific/'
+dataset_EgoHands_specific = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_EgoHands_specific = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------14 HardHatWorkers---------------------#
+class_name = ('head', 'helmet', 'person')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'HardHatWorkers/raw/'
+dataset_HardHatWorkers = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_HardHatWorkers = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------15 MaskWearing---------------------#
+class_name = ('mask', 'no-mask')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'MaskWearing/raw/'
+dataset_MaskWearing = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_MaskWearing = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------16 MountainDewCommercial---------------------#
+class_name = ('bottle', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'MountainDewCommercial/'
+dataset_MountainDewCommercial = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_MountainDewCommercial = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------17 NorthAmericaMushrooms---------------------#
+class_name = ('flat mushroom', 'yellow mushroom')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/'  # noqa
+dataset_NorthAmericaMushrooms = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/new_annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_NorthAmericaMushrooms = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/new_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------18 openPoetryVision---------------------#
+class_name = ('American Typewriter', 'Andale Mono', 'Apple Chancery', 'Arial',
+              'Avenir', 'Baskerville', 'Big Caslon', 'Bradley Hand',
+              'Brush Script MT', 'Chalkboard', 'Comic Sans MS', 'Copperplate',
+              'Courier', 'Didot', 'Futura', 'Geneva', 'Georgia', 'Gill Sans',
+              'Helvetica', 'Herculanum', 'Impact', 'Kefa', 'Lucida Grande',
+              'Luminari', 'Marker Felt', 'Menlo', 'Monaco', 'Noteworthy',
+              'Optima', 'PT Sans', 'PT Serif', 'Palatino', 'Papyrus',
+              'Phosphate', 'Rockwell', 'SF Pro', 'SignPainter', 'Skia',
+              'Snell Roundhand', 'Tahoma', 'Times New Roman', 'Trebuchet MS',
+              'Verdana')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'openPoetryVision/512x512/'
+dataset_openPoetryVision = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_openPoetryVision = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------19 OxfordPets_by_breed---------------------#
+class_name = ('cat-Abyssinian', 'cat-Bengal', 'cat-Birman', 'cat-Bombay',
+              'cat-British_Shorthair', 'cat-Egyptian_Mau', 'cat-Maine_Coon',
+              'cat-Persian', 'cat-Ragdoll', 'cat-Russian_Blue', 'cat-Siamese',
+              'cat-Sphynx', 'dog-american_bulldog',
+              'dog-american_pit_bull_terrier', 'dog-basset_hound',
+              'dog-beagle', 'dog-boxer', 'dog-chihuahua',
+              'dog-english_cocker_spaniel', 'dog-english_setter',
+              'dog-german_shorthaired', 'dog-great_pyrenees', 'dog-havanese',
+              'dog-japanese_chin', 'dog-keeshond', 'dog-leonberger',
+              'dog-miniature_pinscher', 'dog-newfoundland', 'dog-pomeranian',
+              'dog-pug', 'dog-saint_bernard', 'dog-samoyed',
+              'dog-scottish_terrier', 'dog-shiba_inu',
+              'dog-staffordshire_bull_terrier', 'dog-wheaten_terrier',
+              'dog-yorkshire_terrier')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'OxfordPets/by-breed/'  # noqa
+dataset_OxfordPets_by_breed = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_OxfordPets_by_breed = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------20 OxfordPets_by_species---------------------#
+class_name = ('cat', 'dog')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'OxfordPets/by-species/'  # noqa
+dataset_OxfordPets_by_species = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_OxfordPets_by_species = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------21 PKLot---------------------#
+class_name = ('space-empty', 'space-occupied')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'PKLot/640/'  # noqa
+dataset_PKLot = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_PKLot = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------22 Packages---------------------#
+class_name = ('package', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Packages/Raw/'
+dataset_Packages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Packages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------23 PascalVOC---------------------#
+class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',
+              'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse',
+              'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train',
+              'tvmonitor')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'PascalVOC/'
+dataset_PascalVOC = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_PascalVOC = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------24 pistols---------------------#
+class_name = ('pistol', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'pistols/export/'
+dataset_pistols = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='val_annotations_without_background.json',
+    data_prefix=dict(img=''),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_pistols = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'val_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------25 plantdoc---------------------#
+class_name = ('Apple Scab Leaf', 'Apple leaf', 'Apple rust leaf',
+              'Bell_pepper leaf', 'Bell_pepper leaf spot', 'Blueberry leaf',
+              'Cherry leaf', 'Corn Gray leaf spot', 'Corn leaf blight',
+              'Corn rust leaf', 'Peach leaf', 'Potato leaf',
+              'Potato leaf early blight', 'Potato leaf late blight',
+              'Raspberry leaf', 'Soyabean leaf', 'Soybean leaf',
+              'Squash Powdery mildew leaf', 'Strawberry leaf',
+              'Tomato Early blight leaf', 'Tomato Septoria leaf spot',
+              'Tomato leaf', 'Tomato leaf bacterial spot',
+              'Tomato leaf late blight', 'Tomato leaf mosaic virus',
+              'Tomato leaf yellow virus', 'Tomato mold leaf',
+              'Tomato two spotted spider mites leaf', 'grape leaf',
+              'grape leaf black rot')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'plantdoc/416x416/'
+dataset_plantdoc = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_plantdoc = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------26 pothole---------------------#
+class_name = ('pothole', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'pothole/'
+dataset_pothole = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_pothole = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------27 Raccoon---------------------#
+class_name = ('raccoon', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/'
+dataset_Raccoon = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_Raccoon = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------28 selfdrivingCar---------------------#
+class_name = ('biker', 'car', 'pedestrian', 'trafficLight',
+              'trafficLight-Green', 'trafficLight-GreenLeft',
+              'trafficLight-Red', 'trafficLight-RedLeft',
+              'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'selfdrivingCar/fixedLarge/export/'
+dataset_selfdrivingCar = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='val_annotations_without_background.json',
+    data_prefix=dict(img=''),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_selfdrivingCar = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'val_annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------29 ShellfishOpenImages---------------------#
+class_name = ('Crab', 'Lobster', 'Shrimp')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'ShellfishOpenImages/raw/'
+dataset_ShellfishOpenImages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_ShellfishOpenImages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------30 ThermalCheetah---------------------#
+class_name = ('cheetah', 'human')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'ThermalCheetah/'
+dataset_ThermalCheetah = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_ThermalCheetah = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------31 thermalDogsAndPeople---------------------#
+class_name = ('dog', 'person')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'thermalDogsAndPeople/'
+dataset_thermalDogsAndPeople = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_thermalDogsAndPeople = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------32 UnoCards---------------------#
+class_name = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11',
+              '12', '13', '14')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'UnoCards/raw/'
+dataset_UnoCards = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_UnoCards = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------33 VehiclesOpenImages---------------------#
+class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'VehiclesOpenImages/416x416/'
+dataset_VehiclesOpenImages = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_VehiclesOpenImages = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------34 WildfireSmoke---------------------#
+class_name = ('smoke', )
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'WildfireSmoke/'
+dataset_WildfireSmoke = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_WildfireSmoke = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# ---------------------35 websiteScreenshots---------------------#
+class_name = ('button', 'field', 'heading', 'iframe', 'image', 'label', 'link',
+              'text')
+metainfo = dict(classes=class_name)
+_data_root = data_root + 'websiteScreenshots/'
+dataset_websiteScreenshots = dict(
+    type=dataset_type,
+    metainfo=metainfo,
+    data_root=_data_root,
+    ann_file='valid/annotations_without_background.json',
+    data_prefix=dict(img='valid/'),
+    pipeline=_base_.test_pipeline,
+    test_mode=True,
+    return_classes=True)
+val_evaluator_websiteScreenshots = dict(
+    type='CocoMetric',
+    ann_file=_data_root + 'valid/annotations_without_background.json',
+    metric='bbox')
+
+# --------------------- Config---------------------#
+dataset_prefixes = [
+    'AerialMaritimeDrone_large',
+    'AerialMaritimeDrone_tiled',
+    'AmericanSignLanguageLetters',
+    'Aquarium',
+    'BCCD',
+    'boggleBoards',
+    'brackishUnderwater',
+    'ChessPieces',
+    'CottontailRabbits',
+    'dice',
+    'DroneControl',
+    'EgoHands_generic',
+    'EgoHands_specific',
+    'HardHatWorkers',
+    'MaskWearing',
+    'MountainDewCommercial',
+    'NorthAmericaMushrooms',
+    'openPoetryVision',
+    'OxfordPets_by_breed',
+    'OxfordPets_by_species',
+    'PKLot',
+    'Packages',
+    'PascalVOC',
+    'pistols',
+    'plantdoc',
+    'pothole',
+    'Raccoons',
+    'selfdrivingCar',
+    'ShellfishOpenImages',
+    'ThermalCheetah',
+    'thermalDogsAndPeople',
+    'UnoCards',
+    'VehiclesOpenImages',
+    'WildfireSmoke',
+    'websiteScreenshots',
+]
+
+datasets = [
+    dataset_AerialMaritimeDrone_large, dataset_AerialMaritimeDrone_tiled,
+    dataset_AmericanSignLanguageLetters, dataset_Aquarium, dataset_BCCD,
+    dataset_boggleBoards, dataset_brackishUnderwater, dataset_ChessPieces,
+    dataset_CottontailRabbits, dataset_dice, dataset_DroneControl,
+    dataset_EgoHands_generic, dataset_EgoHands_specific,
+    dataset_HardHatWorkers, dataset_MaskWearing, dataset_MountainDewCommercial,
+    dataset_NorthAmericaMushrooms, dataset_openPoetryVision,
+    dataset_OxfordPets_by_breed, dataset_OxfordPets_by_species, dataset_PKLot,
+    dataset_Packages, dataset_PascalVOC, dataset_pistols, dataset_plantdoc,
+    dataset_pothole, dataset_Raccoon, dataset_selfdrivingCar,
+    dataset_ShellfishOpenImages, dataset_ThermalCheetah,
+    dataset_thermalDogsAndPeople, dataset_UnoCards, dataset_VehiclesOpenImages,
+    dataset_WildfireSmoke, dataset_websiteScreenshots
+]
+
+metrics = [
+    val_evaluator_AerialMaritimeDrone_large,
+    val_evaluator_AerialMaritimeDrone_tiled,
+    val_evaluator_AmericanSignLanguageLetters, val_evaluator_Aquarium,
+    val_evaluator_BCCD, val_evaluator_boggleBoards,
+    val_evaluator_brackishUnderwater, val_evaluator_ChessPieces,
+    val_evaluator_CottontailRabbits, val_evaluator_dice,
+    val_evaluator_DroneControl, val_evaluator_EgoHands_generic,
+    val_evaluator_EgoHands_specific, val_evaluator_HardHatWorkers,
+    val_evaluator_MaskWearing, val_evaluator_MountainDewCommercial,
+    val_evaluator_NorthAmericaMushrooms, val_evaluator_openPoetryVision,
+    val_evaluator_OxfordPets_by_breed, val_evaluator_OxfordPets_by_species,
+    val_evaluator_PKLot, val_evaluator_Packages, val_evaluator_PascalVOC,
+    val_evaluator_pistols, val_evaluator_plantdoc, val_evaluator_pothole,
+    val_evaluator_Raccoon, val_evaluator_selfdrivingCar,
+    val_evaluator_ShellfishOpenImages, val_evaluator_ThermalCheetah,
+    val_evaluator_thermalDogsAndPeople, val_evaluator_UnoCards,
+    val_evaluator_VehiclesOpenImages, val_evaluator_WildfireSmoke,
+    val_evaluator_websiteScreenshots
+]
+
+# -------------------------------------------------#
+val_dataloader = dict(
+    dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(
+    _delete_=True,
+    type='MultiDatasetsEvaluator',
+    metrics=metrics,
+    dataset_prefixes=dataset_prefixes)
+test_evaluator = val_evaluator
diff --git a/configs/odinw/override_category.py b/configs/odinw/override_category.py
new file mode 100644
index 00000000000..7641c837053
--- /dev/null
+++ b/configs/odinw/override_category.py
@@ -0,0 +1,96 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+
+import mmengine
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Override Category')
+    parser.add_argument('data_root')
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+
+    ChessPieces = [{
+        'id': 1,
+        'name': '  ',
+        'supercategory': 'pieces'
+    }, {
+        'id': 2,
+        'name': 'black bishop',
+        'supercategory': 'pieces'
+    }, {
+        'id': 3,
+        'name': 'black king',
+        'supercategory': 'pieces'
+    }, {
+        'id': 4,
+        'name': 'black knight',
+        'supercategory': 'pieces'
+    }, {
+        'id': 5,
+        'name': 'black pawn',
+        'supercategory': 'pieces'
+    }, {
+        'id': 6,
+        'name': 'black queen',
+        'supercategory': 'pieces'
+    }, {
+        'id': 7,
+        'name': 'black rook',
+        'supercategory': 'pieces'
+    }, {
+        'id': 8,
+        'name': 'white bishop',
+        'supercategory': 'pieces'
+    }, {
+        'id': 9,
+        'name': 'white king',
+        'supercategory': 'pieces'
+    }, {
+        'id': 10,
+        'name': 'white knight',
+        'supercategory': 'pieces'
+    }, {
+        'id': 11,
+        'name': 'white pawn',
+        'supercategory': 'pieces'
+    }, {
+        'id': 12,
+        'name': 'white queen',
+        'supercategory': 'pieces'
+    }, {
+        'id': 13,
+        'name': 'white rook',
+        'supercategory': 'pieces'
+    }]
+
+    _data_root = args.data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/'
+    json_data = mmengine.load(_data_root +
+                              'valid/annotations_without_background.json')
+    json_data['categories'] = ChessPieces
+    mmengine.dump(json_data,
+                  _data_root + 'valid/new_annotations_without_background.json')
+
+    NorthAmericaMushrooms = [{
+        'id': 1,
+        'name': 'flat mushroom',
+        'supercategory': 'mushroom'
+    }, {
+        'id': 2,
+        'name': 'yellow mushroom',
+        'supercategory': 'mushroom'
+    }]
+
+    _data_root = args.data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/'  # noqa
+    json_data = mmengine.load(_data_root +
+                              'valid/annotations_without_background.json')
+    json_data['categories'] = NorthAmericaMushrooms
+    mmengine.dump(json_data,
+                  _data_root + 'valid/new_annotations_without_background.json')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/zh_cn/user_guides/dataset_prepare.md b/docs/zh_cn/user_guides/dataset_prepare.md
index a8bf32011a7..1caad856af0 100644
--- a/docs/zh_cn/user_guides/dataset_prepare.md
+++ b/docs/zh_cn/user_guides/dataset_prepare.md
@@ -305,3 +305,58 @@ mim download mmdet --dataset voc2012
 # download coco2017 and preprocess by MIM
 mim download mmdet --dataset coco2017
 ```
+
+### ODinW 数据集准备
+
+ODinW 数据集来自 GLIP 论文，用于评估预训练模型泛化性能。一共包括 ODinW-13 和 ODinW-35 两个版本，其中 ODinW-35 包括了 ODinW-13 的所有数据。 目前数据托管在 [huggingface](https://huggingface.co/GLIPModel/GLIP)
+
+请确保你提前安装好了 [git lfs](https://git-lfs.com), 然后按照如下命令下载
+
+```shell
+cd mmdetection
+
+git lfs install
+# 我们不需要下载权重
+GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/GLIPModel/GLIP
+
+cd GLIP
+git lfs pull --include="odinw_35"
+```
+
+下载完成后，目录结构如下所示：
+
+```text
+mmdetection
+├── GLIP
+|     ├── odinw_35
+|     |   ├── AerialMaritimeDrone.zip
+|     |   ├── AmericanSignLanguageLetters.zip
+...
+```
+
+你可以采用如下命令全部解压并移动到 `mmdetection/data` 路径下：
+
+```shell
+#!/bin/bash
+
+folder="./GLIP/odinw_35/"
+
+find "$folder" -type f -name "*.zip" | while read -r file; do
+    unzip "$file" -d "$(dirname "$file")"
+done
+
+mv GLIP/odinw_35 data/
+```
+
+最终结构如下所示：
+
+```text
+mmdetection
+├── tools
+├── configs
+├── data
+|   ├── odinw_35
+|   |   ├── AerialMaritimeDrone
+...
+│   ├── coco
+```
diff --git a/mmdet/datasets/base_det_dataset.py b/mmdet/datasets/base_det_dataset.py
index 57bc7098387..8b3876d5c06 100644
--- a/mmdet/datasets/base_det_dataset.py
+++ b/mmdet/datasets/base_det_dataset.py
@@ -21,6 +21,8 @@ class BaseDetDataset(BaseDataset):
             corresponding backend. Defaults to None.
         return_classes (bool): Whether to return class information
             for open vocabulary-based algorithms. Defaults to False.
+        caption_prompt (dict, optional): Prompt for captioning.
+            Defaults to None.
     """
 
     def __init__(self,
@@ -30,11 +32,16 @@ def __init__(self,
                  file_client_args: dict = None,
                  backend_args: dict = None,
                  return_classes: bool = False,
+                 caption_prompt: Optional[dict] = None,
                  **kwargs) -> None:
         self.seg_map_suffix = seg_map_suffix
         self.proposal_file = proposal_file
         self.backend_args = backend_args
         self.return_classes = return_classes
+        self.caption_prompt = caption_prompt
+        if self.caption_prompt is not None:
+            assert self.return_classes, \
+                'return_classes must be True when using caption_prompt'
         if file_client_args is not None:
             raise RuntimeError(
                 'The `file_client_args` is deprecated, '
diff --git a/mmdet/datasets/coco.py b/mmdet/datasets/coco.py
index 277b75988da..1cf21c4e667 100644
--- a/mmdet/datasets/coco.py
+++ b/mmdet/datasets/coco.py
@@ -129,6 +129,7 @@ def parse_data_info(self, raw_data_info: dict) -> Union[dict, List[dict]]:
 
         if self.return_classes:
             data_info['text'] = self.metainfo['classes']
+            data_info['caption_prompt'] = self.caption_prompt
             data_info['custom_entities'] = True
 
         instances = []
diff --git a/mmdet/datasets/dataset_wrappers.py b/mmdet/datasets/dataset_wrappers.py
index e651e2b9902..d4e26e07c0f 100644
--- a/mmdet/datasets/dataset_wrappers.py
+++ b/mmdet/datasets/dataset_wrappers.py
@@ -247,6 +247,14 @@ def __init__(self,
         if not lazy_init:
             self.full_init()
 
+            if is_all_same:
+                self._metainfo.update(
+                    dict(cumulative_sizes=self.cumulative_sizes))
+            else:
+                for i, dataset in enumerate(self.datasets):
+                    self._metainfo[i].update(
+                        dict(cumulative_sizes=self.cumulative_sizes))
+
     def get_dataset_source(self, idx: int) -> int:
         dataset_idx, _ = self._get_ori_dataset_idx(idx)
         return dataset_idx
diff --git a/mmdet/evaluation/__init__.py b/mmdet/evaluation/__init__.py
index f70dc226d30..126dea092eb 100644
--- a/mmdet/evaluation/__init__.py
+++ b/mmdet/evaluation/__init__.py
@@ -1,3 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .evaluator import *  # noqa: F401,F403
 from .functional import *  # noqa: F401,F403
 from .metrics import *  # noqa: F401,F403
diff --git a/mmdet/evaluation/evaluator/__init__.py b/mmdet/evaluation/evaluator/__init__.py
new file mode 100644
index 00000000000..6b13fe99548
--- /dev/null
+++ b/mmdet/evaluation/evaluator/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .multi_datasets_evaluator import MultiDatasetsEvaluator
+
+__all__ = ['MultiDatasetsEvaluator']
diff --git a/mmdet/evaluation/evaluator/multi_datasets_evaluator.py b/mmdet/evaluation/evaluator/multi_datasets_evaluator.py
new file mode 100644
index 00000000000..5cff1cf210e
--- /dev/null
+++ b/mmdet/evaluation/evaluator/multi_datasets_evaluator.py
@@ -0,0 +1,111 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import warnings
+from collections import OrderedDict
+from typing import Sequence, Union
+
+from mmengine.dist import (broadcast_object_list, collect_results,
+                           is_main_process)
+from mmengine.evaluator import BaseMetric, Evaluator
+from mmengine.evaluator.metric import _to_cpu
+from mmengine.registry import EVALUATOR
+
+from mmdet.utils import ConfigType
+
+
+@EVALUATOR.register_module()
+class MultiDatasetsEvaluator(Evaluator):
+    """Wrapper class to compose class: `ConcatDataset` and multiple
+    :class:`BaseMetric` instances.
+    The metrics will be evaluated on each dataset slice separately. The name of
+    the each metric is the concatenation of the dataset prefix, the metric
+    prefix and the key of metric - e.g.
+    `dataset_prefix/metric_prefix/accuracy`.
+
+    Args:
+        metrics (dict or BaseMetric or Sequence): The config of metrics.
+        dataset_prefixes (Sequence[str]): The prefix of each dataset. The
+            length of this sequence should be the same as the length of the
+            datasets.
+    """
+
+    def __init__(self, metrics: Union[ConfigType, BaseMetric, Sequence],
+                 dataset_prefixes: Sequence[str]) -> None:
+        super().__init__(metrics)
+        self.dataset_prefixes = dataset_prefixes
+        self._setups = False
+
+    def _get_cumulative_sizes(self):
+        # ConcatDataset have a property `cumulative_sizes`
+        if isinstance(self.dataset_meta, Sequence):
+            dataset_slices = self.dataset_meta[0]['cumulative_sizes']
+            if not self._setups:
+                self._setups = True
+                for dataset_meta, metric in zip(self.dataset_meta,
+                                                self.metrics):
+                    metric.dataset_meta = dataset_meta
+        else:
+            dataset_slices = self.dataset_meta['cumulative_sizes']
+        return dataset_slices
+
+    def evaluate(self, size: int) -> dict:
+        """Invoke ``evaluate`` method of each metric and collect the metrics
+        dictionary.
+
+        Args:
+            size (int): Length of the entire validation dataset. When batch
+                size > 1, the dataloader may pad some data samples to make
+                sure all ranks have the same length of dataset slice. The
+                ``collect_results`` function will drop the padded data based on
+                this size.
+
+        Returns:
+            dict: Evaluation results of all metrics. The keys are the names
+            of the metrics, and the values are corresponding results.
+        """
+        metrics_results = OrderedDict()
+        dataset_slices = self._get_cumulative_sizes()
+        assert len(dataset_slices) == len(self.dataset_prefixes)
+
+        for dataset_prefix, start, end, metric in zip(
+                self.dataset_prefixes, [0] + dataset_slices[:-1],
+                dataset_slices, self.metrics):
+            if len(metric.results) == 0:
+                warnings.warn(
+                    f'{metric.__class__.__name__} got empty `self.results`.'
+                    'Please ensure that the processed results are properly '
+                    'added into `self.results` in `process` method.')
+
+            results = collect_results(metric.results, size,
+                                      metric.collect_device)
+
+            if is_main_process():
+                # cast all tensors in results list to cpu
+                results = _to_cpu(results)
+                _metrics = metric.compute_metrics(
+                    results[start:end])  # type: ignore
+
+                if metric.prefix:
+                    final_prefix = '/'.join((dataset_prefix, metric.prefix))
+                else:
+                    final_prefix = dataset_prefix
+                print(f'================{final_prefix}================')
+                metric_results = {
+                    '/'.join((final_prefix, k)): v
+                    for k, v in _metrics.items()
+                }
+
+                # Check metric name conflicts
+                for name in metric_results.keys():
+                    if name in metrics_results:
+                        raise ValueError(
+                            'There are multiple evaluation results with '
+                            f'the same metric name {name}. Please make '
+                            'sure all metrics have different prefixes.')
+                metrics_results.update(metric_results)
+            metric.results.clear()
+        if is_main_process():
+            metrics_results = [metrics_results]
+        else:
+            metrics_results = [None]  # type: ignore
+        broadcast_object_list(metrics_results)
+        return metrics_results[0]
diff --git a/mmdet/models/detectors/glip.py b/mmdet/models/detectors/glip.py
index e076a55fe20..13cfea960a8 100644
--- a/mmdet/models/detectors/glip.py
+++ b/mmdet/models/detectors/glip.py
@@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import re
 import warnings
-from typing import Tuple, Union
+from typing import Optional, Tuple, Union
 
 import torch
 from torch import Tensor
@@ -208,9 +208,11 @@ def __init__(self,
         self._special_tokens = '. '
 
     def get_tokens_and_prompts(
-            self,
-            original_caption: Union[str, list, tuple],
-            custom_entities: bool = False) -> Tuple[dict, str, list, list]:
+        self,
+        original_caption: Union[str, list, tuple],
+        custom_entities: bool = False,
+        enhanced_text_prompts: Optional[ConfigType] = None
+    ) -> Tuple[dict, str, list, list]:
         """Get the tokens positive and prompts for the caption."""
         if isinstance(original_caption, (list, tuple)) or custom_entities:
             if custom_entities and isinstance(original_caption, str):
@@ -219,15 +221,45 @@ def get_tokens_and_prompts(
                 original_caption = list(
                     filter(lambda x: len(x) > 0, original_caption))
 
-            caption_string = ''
-            tokens_positive = []
-            for idx, word in enumerate(original_caption):
-                tokens_positive.append(
-                    [[len(caption_string),
-                      len(caption_string) + len(word)]])
-                caption_string += word
-                if idx != len(original_caption) - 1:
-                    caption_string += self._special_tokens
+            if custom_entities and enhanced_text_prompts is not None:
+                caption_string = ''
+                tokens_positive = []
+                for idx, word in enumerate(original_caption):
+                    if word in enhanced_text_prompts:
+                        enhanced_text_dict = enhanced_text_prompts[word]
+                        if 'prefix' in enhanced_text_dict:
+                            caption_string += enhanced_text_dict['prefix']
+                        start_i = len(caption_string)
+                        if 'name' in enhanced_text_dict:
+                            caption_string += enhanced_text_dict['name']
+                        else:
+                            caption_string += word
+                        end_i = len(caption_string)
+                        tokens_positive.append([[start_i, end_i]])
+
+                        if 'suffix' in enhanced_text_dict:
+                            caption_string += enhanced_text_dict['suffix']
+                    else:
+                        tokens_positive.append([[
+                            len(caption_string),
+                            len(caption_string) + len(word)
+                        ]])
+                        caption_string += word
+
+                    if idx != len(original_caption) - 1:
+                        caption_string += self._special_tokens
+            else:
+                caption_string = ''
+                tokens_positive = []
+                for idx, word in enumerate(original_caption):
+                    tokens_positive.append([[
+                        len(caption_string),
+                        len(caption_string) + len(word)
+                    ]])
+                    caption_string += word
+                    if idx != len(original_caption) - 1:
+                        caption_string += self._special_tokens
+
             tokenized = self.language_model.tokenizer([caption_string],
                                                       return_tensors='pt')
             entities = original_caption
@@ -248,12 +280,14 @@ def get_positive_map(self, tokenized, tokens_positive):
         return positive_map_label_to_token, positive_map
 
     def get_tokens_positive_and_prompts(
-            self,
-            original_caption: Union[str, list, tuple],
-            custom_entities: bool = False) -> Tuple[dict, str, Tensor, list]:
+        self,
+        original_caption: Union[str, list, tuple],
+        custom_entities: bool = False,
+        enhanced_text_prompt: Optional[ConfigType] = None
+    ) -> Tuple[dict, str, Tensor, list]:
         tokenized, caption_string, tokens_positive, entities = \
             self.get_tokens_and_prompts(
-                original_caption, custom_entities)
+                original_caption, custom_entities, enhanced_text_prompt)
         positive_map_label_to_token, positive_map = self.get_positive_map(
             tokenized, tokens_positive)
         return positive_map_label_to_token, caption_string, \
@@ -345,6 +379,9 @@ def predict(self,
         text_prompts = [
             data_samples.text for data_samples in batch_data_samples
         ]
+        enhanced_text_prompts = [
+            data_samples.caption_prompt for data_samples in batch_data_samples
+        ]
 
         if 'custom_entities' in batch_data_samples[0]:
             # Assuming that the `custom_entities` flag
@@ -357,14 +394,16 @@ def predict(self,
             # All the text prompts are the same,
             # so there is no need to calculate them multiple times.
             _positive_maps_and_prompts = [
-                self.get_tokens_positive_and_prompts(text_prompts[0],
-                                                     custom_entities)
+                self.get_tokens_positive_and_prompts(
+                    text_prompts[0], custom_entities, enhanced_text_prompts[0])
             ] * len(batch_inputs)
         else:
             _positive_maps_and_prompts = [
                 self.get_tokens_positive_and_prompts(text_prompt,
-                                                     custom_entities)
-                for text_prompt in text_prompts
+                                                     custom_entities,
+                                                     enhanced_text_prompt)
+                for text_prompt, enhanced_text_prompt in zip(
+                    text_prompts, enhanced_text_prompts)
             ]
 
         token_positive_maps, text_prompts, _, entities = zip(