From d49fde85822df771a8f6c66b06d0c7bc8b06543f Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 00:07:29 +0800 Subject: [PATCH 01/24] Refactoring --- ....yaml => vgg_ssd300_coco_trainval35k.yaml} | 0 ...0_voc0712.yaml => vgg_ssd300_voc0712.yaml} | 0 ....yaml => vgg_ssd512_coco_trainval35k.yaml} | 2 + ...2_voc0712.yaml => vgg_ssd512_voc0712.yaml} | 2 + demo.py | 58 +++++---- eval_ssd.py | 52 ++++++-- ssd/config/defaults.py | 7 + ssd/data/build.py | 55 ++++++++ ssd/data/datasets/__init__.py | 17 ++- .../datasets/{coco_dataset.py => coco.py} | 13 +- ssd/data/datasets/evaluation/__init__.py | 25 +--- ssd/data/datasets/evaluation/coco/__init__.py | 13 +- ssd/data/datasets/evaluation/voc/__init__.py | 10 +- ssd/data/datasets/{voc_dataset.py => voc.py} | 16 ++- ssd/data/samplers/__init__.py | 3 +- ssd/data/samplers/distributed.py | 66 ++++++++++ ssd/data/transforms/__init__.py | 34 +++++ ssd/data/transforms/target_transform.py | 25 ++++ ssd/{ => data}/transforms/transforms.py | 0 ssd/engine/inference.py | 94 +++++--------- ssd/engine/trainer.py | 66 +++++----- ssd/{module => layers}/__init__.py | 0 .../anchors}/__init__.py | 0 ssd/{module => modeling/anchors}/prior_box.py | 8 +- ssd/modeling/backbone/__init__.py | 9 ++ ssd/modeling/backbone/mobilenet.py | 0 ssd/modeling/{vgg_ssd.py => backbone/vgg.py} | 98 +++++++++----- ssd/modeling/data_preprocessing.py | 59 --------- ssd/modeling/detector/__init__.py | 10 ++ ssd/modeling/detector/ssd_detector.py | 19 +++ ssd/modeling/detector_head/__init__.py | 6 + ssd/modeling/detector_head/detector_head.py | 71 +++++++++++ ssd/modeling/detector_head/inference.py | 53 ++++++++ .../loss.py} | 0 ssd/modeling/post_processor.py | 91 ------------- ssd/modeling/predictor.py | 29 ----- ssd/modeling/ssd.py | 120 ------------------ ssd/solver/__init__.py | 0 ssd/solver/build.py | 16 +++ ssd/{utils => solver}/lr_scheduler.py | 0 ssd/structures/__init__.py | 0 ssd/structures/container.py | 49 +++++++ ssd/utils/checkpoint.py | 94 ++++++++++++++ .../{distributed_util.py => dist_util.py} | 0 ssd/utils/misc.py | 22 ++-- ssd/utils/model_zoo.py | 67 ++++++++++ ssd/utils/viz.py | 97 -------------- train_ssd.py | 83 ++++-------- 48 files changed, 883 insertions(+), 676 deletions(-) rename configs/{ssd300_coco_trainval35k.yaml => vgg_ssd300_coco_trainval35k.yaml} (100%) rename configs/{ssd300_voc0712.yaml => vgg_ssd300_voc0712.yaml} (100%) rename configs/{ssd512_coco_trainval35k.yaml => vgg_ssd512_coco_trainval35k.yaml} (90%) rename configs/{ssd512_voc0712.yaml => vgg_ssd512_voc0712.yaml} (89%) create mode 100644 ssd/data/build.py rename ssd/data/datasets/{coco_dataset.py => coco.py} (92%) rename ssd/data/datasets/{voc_dataset.py => voc.py} (86%) create mode 100644 ssd/data/samplers/distributed.py create mode 100644 ssd/data/transforms/__init__.py create mode 100644 ssd/data/transforms/target_transform.py rename ssd/{ => data}/transforms/transforms.py (100%) rename ssd/{module => layers}/__init__.py (100%) rename ssd/{transforms => modeling/anchors}/__init__.py (100%) rename ssd/{module => modeling/anchors}/prior_box.py (92%) create mode 100644 ssd/modeling/backbone/__init__.py create mode 100644 ssd/modeling/backbone/mobilenet.py rename ssd/modeling/{vgg_ssd.py => backbone/vgg.py} (56%) delete mode 100644 ssd/modeling/data_preprocessing.py create mode 100644 ssd/modeling/detector/__init__.py create mode 100644 ssd/modeling/detector/ssd_detector.py create mode 100644 ssd/modeling/detector_head/__init__.py create mode 100644 ssd/modeling/detector_head/detector_head.py create mode 100644 ssd/modeling/detector_head/inference.py rename ssd/modeling/{multibox_loss.py => detector_head/loss.py} (100%) delete mode 100644 ssd/modeling/post_processor.py delete mode 100644 ssd/modeling/predictor.py delete mode 100644 ssd/modeling/ssd.py create mode 100644 ssd/solver/__init__.py create mode 100644 ssd/solver/build.py rename ssd/{utils => solver}/lr_scheduler.py (100%) create mode 100644 ssd/structures/__init__.py create mode 100644 ssd/structures/container.py create mode 100644 ssd/utils/checkpoint.py rename ssd/utils/{distributed_util.py => dist_util.py} (100%) create mode 100644 ssd/utils/model_zoo.py delete mode 100644 ssd/utils/viz.py diff --git a/configs/ssd300_coco_trainval35k.yaml b/configs/vgg_ssd300_coco_trainval35k.yaml similarity index 100% rename from configs/ssd300_coco_trainval35k.yaml rename to configs/vgg_ssd300_coco_trainval35k.yaml diff --git a/configs/ssd300_voc0712.yaml b/configs/vgg_ssd300_voc0712.yaml similarity index 100% rename from configs/ssd300_voc0712.yaml rename to configs/vgg_ssd300_voc0712.yaml diff --git a/configs/ssd512_coco_trainval35k.yaml b/configs/vgg_ssd512_coco_trainval35k.yaml similarity index 90% rename from configs/ssd512_coco_trainval35k.yaml rename to configs/vgg_ssd512_coco_trainval35k.yaml index 6ed2aaf9..c1c28710 100644 --- a/configs/ssd512_coco_trainval35k.yaml +++ b/configs/vgg_ssd512_coco_trainval35k.yaml @@ -1,5 +1,7 @@ MODEL: NUM_CLASSES: 81 + BACKBONE: + OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) PRIORS: FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1] STRIDES: [8, 16, 32, 64, 128, 256, 512] diff --git a/configs/ssd512_voc0712.yaml b/configs/vgg_ssd512_voc0712.yaml similarity index 89% rename from configs/ssd512_voc0712.yaml rename to configs/vgg_ssd512_voc0712.yaml index 1aaf04f0..72c53eb6 100644 --- a/configs/ssd512_voc0712.yaml +++ b/configs/vgg_ssd512_voc0712.yaml @@ -1,5 +1,7 @@ MODEL: NUM_CLASSES: 21 + BACKBONE: + OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) PRIORS: FEATURE_MAPS: [64, 32, 16, 8, 4, 2, 1] STRIDES: [8, 16, 32, 64, 128, 256, 512] diff --git a/demo.py b/demo.py index 96c26589..dd03bda0 100644 --- a/demo.py +++ b/demo.py @@ -4,47 +4,55 @@ import torch from PIL import Image from tqdm import tqdm +from vizer.draw import draw_boxes + from ssd.config import cfg from ssd.data.datasets import COCODataset, VOCDataset -from ssd.modeling.predictor import Predictor -from ssd.modeling.vgg_ssd import build_ssd_model import argparse import numpy as np -from ssd.utils.viz import draw_bounding_boxes +from ssd.data.transforms import build_transforms +from ssd.modeling.detector import build_detection_model +from ssd.utils import mkdir +from ssd.utils.checkpoint import CheckPointer -def run_demo(cfg, weights_file, iou_threshold, score_threshold, images_dir, output_dir, dataset_type): +def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names elif dataset_type == 'coco': class_names = COCODataset.class_names else: raise NotImplementedError('Not implemented now.') - device = torch.device(cfg.MODEL.DEVICE) - model = build_ssd_model(cfg) - model.load(weights_file) - print('Loaded weights from {}.'.format(weights_file)) + + model = build_detection_model(cfg) model = model.to(device) - predictor = Predictor(cfg=cfg, - model=model, - iou_threshold=iou_threshold, - score_threshold=score_threshold, - device=device) - cpu_device = torch.device("cpu") + checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) + checkpointer.load(ckpt, use_latest=ckpt is None) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) + mkdir(output_dir) - if not os.path.exists(output_dir): - os.makedirs(output_dir) - + cpu_device = torch.device("cpu") + transforms = build_transforms(cfg, is_train=False) + model.eval() for image_path in tqdm(image_paths): - image = Image.open(image_path).convert("RGB") - image = np.array(image) - output = predictor.predict(image) - boxes, labels, scores = [o.to(cpu_device).numpy() for o in output] - drawn_image = draw_bounding_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) + image = np.array(Image.open(image_path).convert("RGB")) + height, width, _ = image.shape + images = transforms(image).unsqueeze(0) + + result = model(images)[0] + result = result.resize((width, height)).to(cpu_device).numpy() + boxes, labels, scores = result['boxes'], result['labels'], result['scores'] + + indices = scores > score_threshold + + boxes = boxes[indices] + labels = labels[indices] + scores = scores[indices] + + drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) image_name = os.path.basename(image_path) Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) @@ -58,8 +66,7 @@ def main(): help="path to config file", type=str, ) - parser.add_argument("--weights", type=str, help="Trained weights.") - parser.add_argument("--iou_threshold", type=float, default=0.5) + parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.") parser.add_argument("--score_threshold", type=float, default=0.5) parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.') parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.') @@ -85,8 +92,7 @@ def main(): print("Running with config:\n{}".format(cfg)) run_demo(cfg=cfg, - weights_file=args.weights, - iou_threshold=args.iou_threshold, + ckpt=args.ckpt, score_threshold=args.score_threshold, images_dir=args.images_dir, output_dir=args.output_dir, diff --git a/eval_ssd.py b/eval_ssd.py index 3f612b01..d73a883b 100644 --- a/eval_ssd.py +++ b/eval_ssd.py @@ -6,22 +6,41 @@ import torch.utils.data from ssd.config import cfg -from ssd.engine.inference import do_evaluation -from ssd.modeling.vgg_ssd import build_ssd_model -from ssd.utils import distributed_util +from ssd.data.build import make_data_loader +from ssd.engine.inference import inference +from ssd.modeling.detector import build_detection_model +from ssd.utils import dist_util, mkdir +from ssd.utils.checkpoint import CheckPointer +from ssd.utils.dist_util import synchronize from ssd.utils.logger import setup_logger -def evaluation(cfg, weights_file, output_dir, distributed): - if not os.path.exists(output_dir): - os.makedirs(output_dir) +@torch.no_grad() +def do_evaluation(cfg, model, distributed): + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model = model.module + model.eval() device = torch.device(cfg.MODEL.DEVICE) - model = build_ssd_model(cfg) - model.load(weights_file) + data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed) + eval_results = [] + for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val): + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + if not os.path.exists(output_folder): + mkdir(output_folder) + eval_result = inference(model, data_loader, dataset_name, device, output_folder) + eval_results.append(eval_result) + return eval_results + + +def evaluation(cfg, ckpt, distributed): logger = logging.getLogger("SSD.inference") - logger.info('Loaded weights from {}.'.format(weights_file)) + + model = build_detection_model(cfg) + checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR, logger=logger) + device = torch.device(cfg.MODEL.DEVICE) model.to(device) - do_evaluation(cfg, model, output_dir, distributed) + checkpointer.load(ckpt, use_latest=ckpt is None) + do_evaluation(cfg, model, distributed) def main(): @@ -34,7 +53,13 @@ def main(): type=str, ) parser.add_argument("--local_rank", type=int, default=0) - parser.add_argument("--weights", type=str, help="Trained weights.") + parser.add_argument( + "--ckpt", + help="The path to the checkpoint for test, default is the latest checkpoint.", + default=None, + type=str, + ) + parser.add_argument("--output_dir", default="eval_results", type=str, help="The directory to store evaluation results.") parser.add_argument( @@ -55,12 +80,13 @@ def main(): if distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") + synchronize() cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() - logger = setup_logger("SSD", distributed_util.get_rank()) + logger = setup_logger("SSD", dist_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) @@ -69,7 +95,7 @@ def main(): config_str = "\n" + cf.read() logger.info(config_str) logger.info("Running with config:\n{}".format(cfg)) - evaluation(cfg, weights_file=args.weights, output_dir=args.output_dir, distributed=distributed) + evaluation(cfg, ckpt=args.ckpt, distributed=distributed) if __name__ == '__main__': diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index c68ab668..7adb5dba 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -3,6 +3,7 @@ _C = CN() _C.MODEL = CN() +_C.MODEL.META_ARCHITECTURE = 'SSDDetector' _C.MODEL.DEVICE = "cuda" # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5) _C.MODEL.THRESHOLD = 0.5 @@ -11,6 +12,11 @@ _C.MODEL.NEG_POS_RATIO = 3 _C.MODEL.CENTER_VARIANCE = 0.1 _C.MODEL.SIZE_VARIANCE = 0.2 + +_C.MODEL.BACKBONE = CN() +_C.MODEL.BACKBONE.NAME = 'vgg' +_C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256) + # ----------------------------------------------------------------------------- # PRIORS # ----------------------------------------------------------------------------- @@ -67,5 +73,6 @@ # change MAX_PER_CLASS to 400 as official caffe code will slightly increase mAP(0.8025=>0.8063, 0.7783=>0.7798) _C.TEST.MAX_PER_CLASS = 200 _C.TEST.MAX_PER_IMAGE = -1 +_C.TEST.BATCH_SIZE = 10 _C.OUTPUT_DIR = 'output' diff --git a/ssd/data/build.py b/ssd/data/build.py new file mode 100644 index 00000000..41277d4f --- /dev/null +++ b/ssd/data/build.py @@ -0,0 +1,55 @@ +import torch +from torch.utils.data import DataLoader +from torch.utils.data.dataloader import default_collate + +from ssd.data import samplers +from ssd.data.datasets import build_dataset +from ssd.data.transforms import build_transforms, build_target_transform +from ssd.structures.container import Container + + +class BatchCollator: + + def __call__(self, batch): + transposed_batch = list(zip(*batch)) + images = default_collate(transposed_batch[0]) + img_ids = default_collate(transposed_batch[2]) + + list_targets = transposed_batch[1] + targets = Container( + **{key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]} + ) + return images, targets, img_ids + + +def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0): + train_transform = build_transforms(cfg, is_train=True) + target_transform = build_target_transform(cfg) + dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST + datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) + + shuffle = is_train or distributed + + data_loaders = [] + + for dataset in datasets: + if distributed: + sampler = samplers.DistributedSampler(dataset, shuffle=shuffle) + elif shuffle: + sampler = torch.utils.data.RandomSampler(dataset) + else: + sampler = torch.utils.data.sampler.SequentialSampler(dataset) + + batch_size = cfg.SOLVER.BATCH_SIZE if is_train else cfg.TEST.BATCH_SIZE + batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False) + if max_iter is not None: + batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter) + + data_loader = DataLoader(dataset, num_workers=6, batch_sampler=batch_sampler, pin_memory=True, collate_fn=BatchCollator()) + data_loaders.append(data_loader) + + if is_train: + # during training, a single (possibly concatenated) data_loader is returned + assert len(data_loaders) == 1 + return data_loaders[0] + return data_loaders diff --git a/ssd/data/datasets/__init__.py b/ssd/data/datasets/__init__.py index 94cb3205..5e1e6ae0 100644 --- a/ssd/data/datasets/__init__.py +++ b/ssd/data/datasets/__init__.py @@ -1,8 +1,8 @@ from torch.utils.data import ConcatDataset from ssd.config.path_catlog import DatasetCatalog -from .voc_dataset import VOCDataset -from .coco_dataset import COCODataset +from .voc import VOCDataset +from .coco import COCODataset _DATASETS = { 'VOCDataset': VOCDataset, @@ -10,7 +10,7 @@ } -def build_dataset(dataset_list, transform=None, target_transform=None, is_test=False): +def build_dataset(dataset_list, transform=None, target_transform=None, is_train=True): assert len(dataset_list) > 0 datasets = [] for dataset_name in dataset_list: @@ -20,17 +20,16 @@ def build_dataset(dataset_list, transform=None, target_transform=None, is_test=F args['transform'] = transform args['target_transform'] = target_transform if factory == VOCDataset: - args['keep_difficult'] = is_test + args['keep_difficult'] = not is_train elif factory == COCODataset: - args['remove_empty'] = not is_test + args['remove_empty'] = is_train dataset = factory(**args) datasets.append(dataset) # for testing, return a list of datasets - if is_test: + if not is_train: return datasets + dataset = datasets[0] if len(datasets) > 1: dataset = ConcatDataset(datasets) - else: - dataset = datasets[0] - return dataset + return [dataset] diff --git a/ssd/data/datasets/coco_dataset.py b/ssd/data/datasets/coco.py similarity index 92% rename from ssd/data/datasets/coco_dataset.py rename to ssd/data/datasets/coco.py index 2309fa06..16c93ab9 100644 --- a/ssd/data/datasets/coco_dataset.py +++ b/ssd/data/datasets/coco.py @@ -3,6 +3,8 @@ import numpy as np from PIL import Image +from ssd.structures.container import Container + class COCODataset(torch.utils.data.Dataset): class_names = ('__background__', @@ -47,7 +49,11 @@ def __getitem__(self, index): image, boxes, labels = self.transform(image, boxes, labels) if self.target_transform: boxes, labels = self.target_transform(boxes, labels) - return image, boxes, labels + targets = Container( + boxes=boxes, + labels=labels, + ) + return image, targets, index def get_image(self, index): image_id = self.ids[index] @@ -80,6 +86,11 @@ def _xywh2xyxy(self, box): x1, y1, w, h = box return [x1, y1, x1 + w, y1 + h] + def get_img_info(self, index): + image_id = self.ids[index] + img_data = self.coco.imgs[image_id] + return img_data + def _read_image(self, image_id): file_name = self.coco.loadImgs(image_id)[0]['file_name'] image_file = os.path.join(self.data_dir, file_name) diff --git a/ssd/data/datasets/evaluation/__init__.py b/ssd/data/datasets/evaluation/__init__.py index bb15304b..08174093 100644 --- a/ssd/data/datasets/evaluation/__init__.py +++ b/ssd/data/datasets/evaluation/__init__.py @@ -3,25 +3,6 @@ from .voc import voc_evaluation -class EvaluationMetrics: - def __init__(self, dataset, evaluation_result): - if isinstance(dataset, VOCDataset): - self._parse_pascal_eval_metrics(evaluation_result) - elif isinstance(dataset, COCODataset): - self._parse_coco_eval_metrics(evaluation_result) - - def _parse_coco_eval_metrics(self, evaluation_result): - self.info = {'AP_IoU=0.50:0.95': evaluation_result.stats[0], - 'AP_IoU=0.50': evaluation_result.stats[1], - 'AP_IoU=0.75': evaluation_result.stats[2]} - - def _parse_pascal_eval_metrics(self, evaluation_result): - self.info = {'mAP': evaluation_result['map']} - - def get_printable_metrics(self): - return self.info - - def evaluate(dataset, predictions, output_dir): """evaluate dataset using different methods based on dataset type. Args: @@ -36,10 +17,8 @@ def evaluate(dataset, predictions, output_dir): dataset=dataset, predictions=predictions, output_dir=output_dir ) if isinstance(dataset, VOCDataset): - evaluation_result = voc_evaluation(**args) + return voc_evaluation(**args) elif isinstance(dataset, COCODataset): - evaluation_result = coco_evaluation(**args) + return coco_evaluation(**args) else: raise NotImplementedError - - return EvaluationMetrics(dataset, evaluation_result) diff --git a/ssd/data/datasets/evaluation/coco/__init__.py b/ssd/data/datasets/evaluation/coco/__init__.py index 35ae6b15..d306a2fc 100644 --- a/ssd/data/datasets/evaluation/coco/__init__.py +++ b/ssd/data/datasets/evaluation/coco/__init__.py @@ -5,7 +5,11 @@ def coco_evaluation(dataset, predictions, output_dir): coco_results = [] - for i, (boxes, labels, scores) in enumerate(predictions): + for i, prediction in enumerate(predictions): + img_info = dataset.get_img_info(i) + prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() + boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] + image_id, annotation = dataset.get_annotation(i) class_mapper = dataset.contiguous_id_to_coco_id if labels.shape[0] == 0: @@ -38,4 +42,9 @@ def coco_evaluation(dataset, predictions, output_dir): coco_eval.evaluate() coco_eval.accumulate() coco_eval.summarize() - return coco_eval + + keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"] + metrics = {} + for i, key in enumerate(keys): + metrics[key] = coco_eval.stats[i] + return dict(metrics=metrics) diff --git a/ssd/data/datasets/evaluation/voc/__init__.py b/ssd/data/datasets/evaluation/voc/__init__.py index 61740eda..5272eecd 100644 --- a/ssd/data/datasets/evaluation/voc/__init__.py +++ b/ssd/data/datasets/evaluation/voc/__init__.py @@ -18,13 +18,17 @@ def voc_evaluation(dataset, predictions, output_dir): gt_difficults = [] for i in range(len(dataset)): + prediction = predictions[i] image_id, annotation = dataset.get_annotation(i) gt_boxes, gt_labels, is_difficult = annotation gt_boxes_list.append(gt_boxes) gt_labels_list.append(gt_labels) gt_difficults.append(is_difficult.astype(np.bool)) - boxes, labels, scores = predictions[i] + img_info = dataset.get_img_info(i) + prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() + boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] + pred_boxes_list.append(boxes) pred_labels_list.append(labels) pred_scores_list.append(scores) @@ -38,12 +42,14 @@ def voc_evaluation(dataset, predictions, output_dir): use_07_metric=True) logger = logging.getLogger("SSD.inference") result_str = "mAP: {:.4f}\n".format(result["map"]) + metrics = {'mAP': result["map"]} for i, ap in enumerate(result["ap"]): if i == 0: # skip background continue + metrics[class_names[i]] = ap result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap) logger.info(result_str) result_path = os.path.join(output_dir, "result_{}.txt".format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) with open(result_path, "w") as f: f.write(result_str) - return result + return dict(metrics=metrics) diff --git a/ssd/data/datasets/voc_dataset.py b/ssd/data/datasets/voc.py similarity index 86% rename from ssd/data/datasets/voc_dataset.py rename to ssd/data/datasets/voc.py index 34744faa..5e54f64f 100644 --- a/ssd/data/datasets/voc_dataset.py +++ b/ssd/data/datasets/voc.py @@ -4,6 +4,8 @@ import xml.etree.ElementTree as ET from PIL import Image +from ssd.structures.container import Container + class VOCDataset(torch.utils.data.Dataset): class_names = ('__background__', @@ -40,7 +42,11 @@ def __getitem__(self, index): image, boxes, labels = self.transform(image, boxes, labels) if self.target_transform: boxes, labels = self.target_transform(boxes, labels) - return image, boxes, labels + targets = Container( + boxes=boxes, + labels=labels, + ) + return image, targets, index def get_image(self, index): image_id = self.ids[index] @@ -87,6 +93,14 @@ def _get_annotation(self, image_id): np.array(labels, dtype=np.int64), np.array(is_difficult, dtype=np.uint8)) + def get_img_info(self, index): + img_id = self.ids[index] + annotation_file = os.path.join(self.data_dir, "Annotations", "%s.xml" % img_id) + anno = ET.parse(annotation_file).getroot() + size = anno.find("size") + im_info = tuple(map(int, (size.find("height").text, size.find("width").text))) + return {"height": im_info[0], "width": im_info[1]} + def _read_image(self, image_id): image_file = os.path.join(self.data_dir, "JPEGImages", "%s.jpg" % image_id) image = Image.open(image_file).convert("RGB") diff --git a/ssd/data/samplers/__init__.py b/ssd/data/samplers/__init__.py index 9d0a05a4..3534a1f9 100644 --- a/ssd/data/samplers/__init__.py +++ b/ssd/data/samplers/__init__.py @@ -1,3 +1,4 @@ from .iteration_based_batch_sampler import IterationBasedBatchSampler +from .distributed import DistributedSampler -__all__ = ['IterationBasedBatchSampler'] +__all__ = ['IterationBasedBatchSampler', 'DistributedSampler'] diff --git a/ssd/data/samplers/distributed.py b/ssd/data/samplers/distributed.py new file mode 100644 index 00000000..33295898 --- /dev/null +++ b/ssd/data/samplers/distributed.py @@ -0,0 +1,66 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +# Code is copy-pasted exactly as in torch.utils.data.distributed. +# FIXME remove this once c10d fixes the bug it has +import math +import torch +import torch.distributed as dist +from torch.utils.data.sampler import Sampler + + +class DistributedSampler(Sampler): + """Sampler that restricts data loading to a subset of the dataset. + It is especially useful in conjunction with + :class:`torch.nn.parallel.DistributedDataParallel`. In such case, each + process can pass a DistributedSampler instance as a DataLoader sampler, + and load a subset of the original dataset that is exclusive to it. + .. note:: + Dataset is assumed to be of constant size. + Arguments: + dataset: Dataset used for sampling. + num_replicas (optional): Number of processes participating in + distributed training. + rank (optional): Rank of the current process within num_replicas. + """ + + def __init__(self, dataset, num_replicas=None, rank=None, shuffle=True): + if num_replicas is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + num_replicas = dist.get_world_size() + if rank is None: + if not dist.is_available(): + raise RuntimeError("Requires distributed package to be available") + rank = dist.get_rank() + self.dataset = dataset + self.num_replicas = num_replicas + self.rank = rank + self.epoch = 0 + self.num_samples = int(math.ceil(len(self.dataset) * 1.0 / self.num_replicas)) + self.total_size = self.num_samples * self.num_replicas + self.shuffle = shuffle + + def __iter__(self): + if self.shuffle: + # deterministically shuffle based on epoch + g = torch.Generator() + g.manual_seed(self.epoch) + indices = torch.randperm(len(self.dataset), generator=g).tolist() + else: + indices = torch.arange(len(self.dataset)).tolist() + + # add extra samples to make it evenly divisible + indices += indices[: (self.total_size - len(indices))] + assert len(indices) == self.total_size + + # subsample + offset = self.num_samples * self.rank + indices = indices[offset: offset + self.num_samples] + assert len(indices) == self.num_samples + + return iter(indices) + + def __len__(self): + return self.num_samples + + def set_epoch(self, epoch): + self.epoch = epoch diff --git a/ssd/data/transforms/__init__.py b/ssd/data/transforms/__init__.py new file mode 100644 index 00000000..a3643578 --- /dev/null +++ b/ssd/data/transforms/__init__.py @@ -0,0 +1,34 @@ +from ssd.modeling.anchors.prior_box import PriorBox +from .target_transform import SSDTargetTransform +from .transforms import * + + +def build_transforms(cfg, is_train=True): + if is_train: + transform = [ + ConvertFromInts(), + PhotometricDistort(), + Expand(cfg.INPUT.PIXEL_MEAN), + RandomSampleCrop(), + RandomMirror(), + ToPercentCoords(), + Resize(cfg.INPUT.IMAGE_SIZE), + SubtractMeans(cfg.INPUT.PIXEL_MEAN), + ToTensor(), + ] + else: + transform = [ + Resize(cfg.INPUT.IMAGE_SIZE), + SubtractMeans(cfg.INPUT.PIXEL_MEAN), + ToTensor() + ] + transform = Compose(transform) + return transform + + +def build_target_transform(cfg): + transform = SSDTargetTransform(PriorBox(cfg)(), + cfg.MODEL.CENTER_VARIANCE, + cfg.MODEL.SIZE_VARIANCE, + cfg.MODEL.THRESHOLD) + return transform diff --git a/ssd/data/transforms/target_transform.py b/ssd/data/transforms/target_transform.py new file mode 100644 index 00000000..3e893774 --- /dev/null +++ b/ssd/data/transforms/target_transform.py @@ -0,0 +1,25 @@ +import numpy as np +import torch + +from ssd.utils import box_utils + + +class SSDTargetTransform: + def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold): + self.center_form_priors = center_form_priors + self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors) + self.center_variance = center_variance + self.size_variance = size_variance + self.iou_threshold = iou_threshold + + def __call__(self, gt_boxes, gt_labels): + if type(gt_boxes) is np.ndarray: + gt_boxes = torch.from_numpy(gt_boxes) + if type(gt_labels) is np.ndarray: + gt_labels = torch.from_numpy(gt_labels) + boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels, + self.corner_form_priors, self.iou_threshold) + boxes = box_utils.corner_form_to_center_form(boxes) + locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance) + + return locations, labels diff --git a/ssd/transforms/transforms.py b/ssd/data/transforms/transforms.py similarity index 100% rename from ssd/transforms/transforms.py rename to ssd/data/transforms/transforms.py diff --git a/ssd/engine/inference.py b/ssd/engine/inference.py index e2893ebf..16ff7baa 100644 --- a/ssd/engine/inference.py +++ b/ssd/engine/inference.py @@ -4,17 +4,15 @@ import torch import torch.utils.data from tqdm import tqdm -from ssd.data.datasets import build_dataset from ssd.data.datasets.evaluation import evaluate -from ssd.modeling.predictor import Predictor -from ssd.modeling.ssd import SSD -from ssd.utils import distributed_util +from ssd.utils import dist_util +from ssd.utils.dist_util import synchronize, is_main_process def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): - all_predictions = distributed_util.all_gather(predictions_per_gpu) - if not distributed_util.is_main_process(): + all_predictions = dist_util.all_gather(predictions_per_gpu) + if not dist_util.is_main_process(): return # merge the list of dicts predictions = {} @@ -34,64 +32,34 @@ def _accumulate_predictions_from_multiple_gpus(predictions_per_gpu): return predictions -def _evaluation(cfg, dataset_name, test_dataset, predictor, distributed, output_dir): - """ Perform evaluating on one dataset - Args: - cfg: - dataset_name: dataset's name - test_dataset: Dataset object - predictor: Predictor object, used to to prediction. - distributed: whether distributed evaluating or not - output_dir: path to save prediction results - Returns: - evaluate result - """ - cpu_device = torch.device("cpu") - logger = logging.getLogger("SSD.inference") - logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(test_dataset))) - indices = list(range(len(test_dataset))) - if distributed: - indices = indices[distributed_util.get_rank()::distributed_util.get_world_size()] - - # show progress bar only on main process. - progress_bar = tqdm if distributed_util.is_main_process() else iter - logger.info('Progress on {} 0:'.format(cfg.MODEL.DEVICE.upper())) - predictions = {} - for i in progress_bar(indices): - image = test_dataset.get_image(i) - output = predictor.predict(image) - boxes, labels, scores = [o.to(cpu_device).numpy() for o in output] - predictions[i] = (boxes, labels, scores) - distributed_util.synchronize() - predictions = _accumulate_predictions_from_multiple_gpus(predictions) - if not distributed_util.is_main_process(): - return +def compute_on_dataset(model, data_loader, device): + results_dict = {} + for batch in tqdm(data_loader): + images, targets, image_ids = batch + cpu_device = torch.device("cpu") + with torch.no_grad(): + outputs = model(images.to(device)) - final_output_dir = os.path.join(output_dir, dataset_name) - if not os.path.exists(final_output_dir): - os.makedirs(final_output_dir) - torch.save(predictions, os.path.join(final_output_dir, 'predictions.pth')) - return evaluate(dataset=test_dataset, predictions=predictions, output_dir=final_output_dir) + outputs = [o.to(cpu_device) for o in outputs] + results_dict.update( + {img_id: result for img_id, result in zip(image_ids, outputs)} + ) + return results_dict -def do_evaluation(cfg, model, output_dir, distributed): - if isinstance(model, torch.nn.parallel.DistributedDataParallel): - model = model.module - assert isinstance(model, SSD), 'Wrong module.' - test_datasets = build_dataset(dataset_list=cfg.DATASETS.TEST, is_test=True) - device = torch.device(cfg.MODEL.DEVICE) - model.eval() - predictor = Predictor(cfg=cfg, - model=model, - iou_threshold=cfg.TEST.NMS_THRESHOLD, - score_threshold=cfg.TEST.CONFIDENCE_THRESHOLD, - device=device) - # evaluate all test datasets. +def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False): + dataset = data_loader.dataset logger = logging.getLogger("SSD.inference") - logger.info('Will evaluate {} dataset(s):'.format(len(test_datasets))) - metrics = {} - for dataset_name, test_dataset in zip(cfg.DATASETS.TEST, test_datasets): - metric = _evaluation(cfg, dataset_name, test_dataset, predictor, distributed, output_dir) - metrics[dataset_name] = metric - distributed_util.synchronize() - return metrics + logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset))) + predictions_path = os.path.join(output_folder, 'predictions.pth') + if use_cached and os.path.exists(predictions_path): + predictions = torch.load(predictions_path, map_location='cpu') + else: + predictions = compute_on_dataset(model, data_loader, device) + synchronize() + predictions = _accumulate_predictions_from_multiple_gpus(predictions) + if not is_main_process(): + return + if output_folder: + torch.save(predictions, predictions_path) + return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder) diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index ba4c0914..078a44b2 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -1,13 +1,23 @@ +import collections import datetime import logging import os import time import torch import torch.distributed as dist -from torch.nn.parallel import DistributedDataParallel -from ssd.engine.inference import do_evaluation -from ssd.utils import distributed_util +from eval_ssd import do_evaluation +from ssd.utils import dist_util + + +def write_metric(eval_result, prefix, summary_writer, global_step): + for key in eval_result: + value = eval_result[key] + tag = '{}/{}'.format(prefix, key) + if isinstance(value, collections.Mapping): + write_metric(value, tag, summary_writer, global_step) + else: + summary_writer.add_scalar(tag, value, global_step=global_step) def reduce_loss_dict(loss_dict): @@ -16,7 +26,7 @@ def reduce_loss_dict(loss_dict): 0 has the averaged results. Returns a dict with the same fields as loss_dict, after reduction. """ - world_size = distributed_util.get_world_size() + world_size = dist_util.get_world_size() if world_size < 2: return loss_dict with torch.no_grad(): @@ -35,45 +45,40 @@ def reduce_loss_dict(loss_dict): return reduced_losses -def _save_model(logger, model, model_path): - vgg_model = model - if isinstance(model, DistributedDataParallel): - vgg_model = model.module - vgg_model.save(model_path) - logger.info("Saved checkpoint to {}".format(model_path)) - - def do_train(cfg, model, data_loader, optimizer, scheduler, + checkpointer, device, + arguments, args): logger = logging.getLogger("SSD.trainer") - logger.info("Start training") + logger.info("Start training...") model.train() - save_to_disk = distributed_util.get_rank() == 0 + save_to_disk = dist_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: import tensorboardX - summary_writer = tensorboardX.SummaryWriter(log_dir=cfg.OUTPUT_DIR) + summary_writer = tensorboardX.SummaryWriter(log_dir=os.path.join(cfg.OUTPUT_DIR, 'tf_logs')) else: summary_writer = None max_iter = len(data_loader) + start_iter = arguments["iteration"] start_training_time = time.time() trained_time = 0 tic = time.time() end = time.time() - for iteration, (images, boxes, labels) in enumerate(data_loader): + for iteration, (images, targets, _) in enumerate(data_loader, start_iter): iteration = iteration + 1 + arguments["iteration"] = iteration scheduler.step() images = images.to(device) - boxes = boxes.to(device) - labels = labels.to(device) + targets = targets.to(device) optimizer.zero_grad() - loss_dict = model(images, targets=(boxes, labels)) + loss_dict = model(images, targets=targets) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) @@ -87,7 +92,7 @@ def do_train(cfg, model, if iteration % args.log_step == 0: eta_seconds = int((trained_time / iteration) * (max_iter - iteration)) log_str = [ - "Iter: {:06d}, Lr: {:.5f}, Cost: {:.2f}s, Eta: {}".format(iteration, + "iter: {:06d}, lr: {:.5f}, cost: {:.2f}s, eta: {}".format(iteration, optimizer.param_groups[0]['lr'], time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))), "total_loss: {:.3f}".format(losses_reduced.item()) @@ -105,22 +110,17 @@ def do_train(cfg, model, tic = time.time() - if save_to_disk and iteration % args.save_step == 0: - model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_iteration_{:06d}.pth".format(cfg.INPUT.IMAGE_SIZE, iteration)) - _save_model(logger, model, model_path) - # Do eval when training, to trace the mAP changes and see performance improved whether or nor + if iteration % args.save_step == 0: + checkpointer.save("model_{:06d}".format(iteration), **arguments) + if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter: - dataset_metrics = do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed) - if summary_writer: - global_step = iteration - for dataset_name, metrics in dataset_metrics.items(): - for metric_name, metric_value in metrics.get_printable_metrics().items(): - summary_writer.add_scalar('/'.join(['val', dataset_name, metric_name]), metric_value, global_step=global_step) + eval_results = do_evaluation(cfg, model, distributed=args.distributed) + if dist_util.get_rank() and summary_writer: + for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): + write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) model.train() - if save_to_disk: - model_path = os.path.join(cfg.OUTPUT_DIR, "ssd{}_vgg_final.pth".format(cfg.INPUT.IMAGE_SIZE)) - _save_model(logger, model, model_path) + checkpointer.save("model_final.pth", **arguments) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) diff --git a/ssd/module/__init__.py b/ssd/layers/__init__.py similarity index 100% rename from ssd/module/__init__.py rename to ssd/layers/__init__.py diff --git a/ssd/transforms/__init__.py b/ssd/modeling/anchors/__init__.py similarity index 100% rename from ssd/transforms/__init__.py rename to ssd/modeling/anchors/__init__.py diff --git a/ssd/module/prior_box.py b/ssd/modeling/anchors/prior_box.py similarity index 92% rename from ssd/module/prior_box.py rename to ssd/modeling/anchors/prior_box.py index 0fec3f01..9f15207d 100644 --- a/ssd/module/prior_box.py +++ b/ssd/modeling/anchors/prior_box.py @@ -1,13 +1,11 @@ from itertools import product import torch -import torch.nn as nn from math import sqrt -class PriorBox(nn.Module): +class PriorBox: def __init__(self, cfg): - super(PriorBox, self).__init__() self.image_size = cfg.INPUT.IMAGE_SIZE prior_config = cfg.MODEL.PRIORS self.feature_maps = prior_config.FEATURE_MAPS @@ -17,7 +15,7 @@ def __init__(self, cfg): self.aspect_ratios = prior_config.ASPECT_RATIOS self.clip = prior_config.CLIP - def forward(self): + def __call__(self): """Generate SSD Prior Boxes. It returns the center, height and width of the priors. The values are relative to the image size Returns: @@ -50,7 +48,7 @@ def forward(self): priors.append([cx, cy, w * ratio, h / ratio]) priors.append([cx, cy, w / ratio, h * ratio]) - priors = torch.Tensor(priors) + priors = torch.tensor(priors) if self.clip: priors.clamp_(max=1, min=0) return priors diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py new file mode 100644 index 00000000..c97f36cf --- /dev/null +++ b/ssd/modeling/backbone/__init__.py @@ -0,0 +1,9 @@ +from .vgg import vgg + +BACKBONES = { + 'vgg': vgg, +} + + +def build_backbone(cfg): + return BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg) diff --git a/ssd/modeling/backbone/mobilenet.py b/ssd/modeling/backbone/mobilenet.py new file mode 100644 index 00000000..e69de29b diff --git a/ssd/modeling/vgg_ssd.py b/ssd/modeling/backbone/vgg.py similarity index 56% rename from ssd/modeling/vgg_ssd.py rename to ssd/modeling/backbone/vgg.py index fe27eaac..25746e4f 100644 --- a/ssd/modeling/vgg_ssd.py +++ b/ssd/modeling/backbone/vgg.py @@ -1,5 +1,12 @@ import torch.nn as nn -from ssd.modeling.ssd import SSD +import torch.nn.functional as F + +from ssd.layers import L2Norm +from ssd.utils.model_zoo import load_state_dict_from_url + +model_urls = { + 'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth', +} # borrowed from https://github.com/amdegroot/ssd.pytorch/blob/master/ssd.py @@ -62,34 +69,61 @@ def add_header(vgg, extra_layers, boxes_per_location, num_classes): return regression_headers, classification_headers -def build_ssd_model(cfg): - num_classes = cfg.MODEL.NUM_CLASSES - size = cfg.INPUT.IMAGE_SIZE - vgg_base = { - '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', - 512, 512, 512], - '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', - 512, 512, 512], - } - extras_base = { - '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], - '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256], - } - - boxes_per_location = cfg.MODEL.PRIORS.BOXES_PER_LOCATION - - vgg_config = vgg_base[str(size)] - extras_config = extras_base[str(size)] - - vgg = nn.ModuleList(add_vgg(vgg_config)) - extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size)) - - regression_headers, classification_headers = add_header(vgg, extras, boxes_per_location, num_classes=num_classes) - regression_headers = nn.ModuleList(regression_headers) - classification_headers = nn.ModuleList(classification_headers) - - return SSD(cfg=cfg, - vgg=vgg, - extras=extras, - classification_headers=classification_headers, - regression_headers=regression_headers) +vgg_base = { + '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', + 512, 512, 512], + '512': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M', + 512, 512, 512], +} +extras_base = { + '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256], + '512': [256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256], +} + + +class VGG(nn.Module): + def __init__(self, cfg): + super().__init__() + size = cfg.INPUT.IMAGE_SIZE + vgg_config = vgg_base[str(size)] + extras_config = extras_base[str(size)] + + self.vgg = nn.ModuleList(add_vgg(vgg_config)) + self.extras = nn.ModuleList(add_extras(extras_config, i=1024, size=size)) + self.l2_norm = L2Norm(512, scale=20) + self.reset_parameters() + + def reset_parameters(self): + for m in self.extras.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight) + nn.init.zeros_(m.bias) + + def init_from_pretrain(self, state_dict): + self.vgg.load_state_dict(state_dict) + + def forward(self, x): + features = [] + for i in range(23): + x = self.vgg[i](x) + s = self.l2_norm(x) # Conv4_3 L2 normalization + features.append(s) + + # apply vgg up to fc7 + for i in range(23, len(self.vgg)): + x = self.vgg[i](x) + features.append(x) + + for k, v in enumerate(self.extras): + x = F.relu(v(x), inplace=True) + if k % 2 == 1: + features.append(x) + + return tuple(features) + + +def vgg(cfg, pretrained=True): + model = VGG(cfg) + if pretrained: + model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg'])) + return model diff --git a/ssd/modeling/data_preprocessing.py b/ssd/modeling/data_preprocessing.py deleted file mode 100644 index d0d7de4f..00000000 --- a/ssd/modeling/data_preprocessing.py +++ /dev/null @@ -1,59 +0,0 @@ -from ..transforms.transforms import * - - -class TrainAugmentation: - def __init__(self, size, mean=0): - """ - Args: - size: the size the of final image. - mean: mean pixel value per channel. - """ - self.mean = mean - self.size = size - self.augment = Compose([ - ConvertFromInts(), - PhotometricDistort(), - Expand(self.mean), - RandomSampleCrop(), - RandomMirror(), - ToPercentCoords(), - Resize(self.size), - SubtractMeans(self.mean), - ToTensor(), - ]) - - def __call__(self, img, boxes, labels): - """ - - Args: - img: the output of cv.imread in RGB layout. - boxes: boundding boxes in the form of (x1, y1, x2, y2). - labels: labels of boxes. - """ - return self.augment(img, boxes, labels) - - -class TestTransform: - def __init__(self, size, mean=0.0): - self.transform = Compose([ - ToPercentCoords(), - Resize(size), - SubtractMeans(mean), - ToTensor(), - ]) - - def __call__(self, image, boxes, labels): - return self.transform(image, boxes, labels) - - -class PredictionTransform: - def __init__(self, size, mean=0.0): - self.transform = Compose([ - Resize(size), - SubtractMeans(mean), - ToTensor() - ]) - - def __call__(self, image): - image, _, _ = self.transform(image) - return image diff --git a/ssd/modeling/detector/__init__.py b/ssd/modeling/detector/__init__.py new file mode 100644 index 00000000..bd31f3e3 --- /dev/null +++ b/ssd/modeling/detector/__init__.py @@ -0,0 +1,10 @@ +from .ssd_detector import SSDDetector + +_DETECTION_META_ARCHITECTURES = { + "SSDDetector": SSDDetector +} + + +def build_detection_model(cfg): + meta_arch = _DETECTION_META_ARCHITECTURES[cfg.MODEL.META_ARCHITECTURE] + return meta_arch(cfg) diff --git a/ssd/modeling/detector/ssd_detector.py b/ssd/modeling/detector/ssd_detector.py new file mode 100644 index 00000000..f6ac4777 --- /dev/null +++ b/ssd/modeling/detector/ssd_detector.py @@ -0,0 +1,19 @@ +from torch import nn + +from ssd.modeling.backbone import build_backbone +from ssd.modeling.detector_head import build_detector_head + + +class SSDDetector(nn.Module): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.backbone = build_backbone(cfg) + self.detector_head = build_detector_head(cfg) + + def forward(self, images, targets=None): + features = self.backbone(images) + detections, detector_losses = self.detector_head(features, targets) + if self.training: + return detector_losses + return detections diff --git a/ssd/modeling/detector_head/__init__.py b/ssd/modeling/detector_head/__init__.py new file mode 100644 index 00000000..8bbe9315 --- /dev/null +++ b/ssd/modeling/detector_head/__init__.py @@ -0,0 +1,6 @@ +from .detector_head import SSDHeader + + +def build_detector_head(cfg): + # TODO: make it more general + return SSDHeader(cfg) diff --git a/ssd/modeling/detector_head/detector_head.py b/ssd/modeling/detector_head/detector_head.py new file mode 100644 index 00000000..9ff74d4f --- /dev/null +++ b/ssd/modeling/detector_head/detector_head.py @@ -0,0 +1,71 @@ +import torch +from torch import nn +import torch.nn.functional as F + +from ssd.modeling.anchors.prior_box import PriorBox +from ssd.utils import box_utils +from .inference import PostProcessor +from .loss import MultiBoxLoss + + +class SSDHeader(nn.Module): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.cls_headers = nn.ModuleList() + self.reg_headers = nn.ModuleList() + for boxes_per_location, out_channels in zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS): + self.cls_headers.append( + nn.Conv2d(out_channels, boxes_per_location * cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) + ) + self.reg_headers.append( + nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) + ) + self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) + self.post_processor = PostProcessor(cfg) + self.priors = None + self.reset_parameters() + + def reset_parameters(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight) + nn.init.zeros_(m.bias) + + def forward(self, features, targets=None): + confidences = [] + locations = [] + for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers): + confidences.append(cls_header(feature).permute(0, 2, 3, 1).contiguous()) + locations.append(reg_header(feature).permute(0, 2, 3, 1).contiguous()) + + batch_size = features[0].shape[0] + confidences = torch.cat([c.view(c.shape[0], -1) for c in confidences], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES) + locations = torch.cat([l.view(l.shape[0], -1) for l in locations], dim=1).view(batch_size, -1, 4) + + if self.training: + return self._forward_train(confidences, locations, targets) + else: + return self._forward_test(confidences, locations) + + def _forward_train(self, confidences, locations, targets): + gt_boxes, gt_labels = targets['boxes'], targets['labels'] + reg_loss, cls_loss = self.loss_evaluator(confidences, locations, gt_labels, gt_boxes) + loss_dict = dict( + reg_loss=reg_loss, + cls_loss=cls_loss, + ) + detections = (confidences, locations) + return detections, loss_dict + + def _forward_test(self, confidences, locations): + if self.priors is None: + self.priors = PriorBox(self.cfg)().to(locations.device) + scores = F.softmax(confidences, dim=2) + boxes = box_utils.convert_locations_to_boxes( + locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE + ) + boxes = box_utils.center_form_to_corner_form(boxes) + detections = (scores, boxes) + detections = self.post_processor(detections) + return detections, {} diff --git a/ssd/modeling/detector_head/inference.py b/ssd/modeling/detector_head/inference.py new file mode 100644 index 00000000..d87250f0 --- /dev/null +++ b/ssd/modeling/detector_head/inference.py @@ -0,0 +1,53 @@ +import torch + +from ssd.structures.container import Container +from ssd.utils.nms import boxes_nms + + +class PostProcessor: + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + + def __call__(self, detections): + batches_scores, batches_boxes = detections + device = batches_scores.device + batch_size = batches_scores.size(0) + results = [] + for batch_id in range(batch_size): + processed_boxes = [] + processed_scores = [] + processed_labels = [] + + scores, boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4) + for class_id in range(1, scores.size(1)): # skip background + per_cls_scores = scores[:, class_id] + mask = per_cls_scores > self.cfg.TEST.CONFIDENCE_THRESHOLD + per_cls_scores = per_cls_scores[mask] + if per_cls_scores.numel() == 0: + continue + per_cls_boxes = boxes[mask, :] * self.cfg.INPUT.IMAGE_SIZE + keep = boxes_nms(per_cls_boxes, per_cls_scores, self.cfg.TEST.NMS_THRESHOLD, self.cfg.TEST.MAX_PER_CLASS) + + nmsed_boxes = per_cls_boxes[keep, :] + nmsed_labels = torch.tensor([class_id] * keep.size(0), device=device) + nmsed_scores = per_cls_scores[keep] + + processed_boxes.append(nmsed_boxes) + processed_scores.append(nmsed_scores) + processed_labels.append(nmsed_labels) + + if len(processed_boxes) == 0: + processed_boxes = torch.empty(0, 4) + processed_labels = torch.empty(0) + processed_scores = torch.empty(0) + else: + processed_boxes = torch.cat(processed_boxes, 0) + processed_labels = torch.cat(processed_labels, 0) + processed_scores = torch.cat(processed_scores, 0) + + container = Container(boxes=processed_boxes, labels=processed_labels, scores=processed_scores) + container.img_width = self.cfg.INPUT.IMAGE_SIZE + container.img_height = self.cfg.INPUT.IMAGE_SIZE + results.append(container) + return results diff --git a/ssd/modeling/multibox_loss.py b/ssd/modeling/detector_head/loss.py similarity index 100% rename from ssd/modeling/multibox_loss.py rename to ssd/modeling/detector_head/loss.py diff --git a/ssd/modeling/post_processor.py b/ssd/modeling/post_processor.py deleted file mode 100644 index cdd6e84a..00000000 --- a/ssd/modeling/post_processor.py +++ /dev/null @@ -1,91 +0,0 @@ -import torch - -from ssd.utils.nms import boxes_nms - - -class PostProcessor: - def __init__(self, - iou_threshold, - score_threshold, - image_size, - max_per_class=200, - max_per_image=-1): - self.confidence_threshold = score_threshold - self.iou_threshold = iou_threshold - self.width = image_size - self.height = image_size - self.max_per_class = max_per_class - self.max_per_image = max_per_image - - def __call__(self, confidences, locations, width=None, height=None, batch_ids=None): - """filter result using nms - Args: - confidences: (batch_size, num_priors, num_classes) - locations: (batch_size, num_priors, 4) - width(int): un-normalized using width - height(int): un-normalized using height - batch_ids: which batch to filter ? - Returns: - List[(boxes, labels, scores)], - boxes: (n, 4) - labels: (n, ) - scores: (n, ) - """ - if width is None: - width = self.width - if height is None: - height = self.height - - batch_size = confidences.size(0) - if batch_ids is None: - batch_ids = torch.arange(batch_size, device=confidences.device) - else: - batch_ids = torch.tensor(batch_ids, device=confidences.device) - - locations = locations[batch_ids] - confidences = confidences[batch_ids] - - results = [] - for decoded_boxes, scores in zip(locations, confidences): - # per batch - filtered_boxes = [] - filtered_labels = [] - filtered_probs = [] - for class_index in range(1, scores.size(1)): - probs = scores[:, class_index] - mask = probs > self.confidence_threshold - probs = probs[mask] - if probs.size(0) == 0: - continue - boxes = decoded_boxes[mask, :] - boxes[:, 0] *= width - boxes[:, 2] *= width - boxes[:, 1] *= height - boxes[:, 3] *= height - - keep = boxes_nms(boxes, probs, self.iou_threshold, self.max_per_class) - - boxes = boxes[keep, :] - labels = torch.tensor([class_index] * keep.size(0)) - probs = probs[keep] - - filtered_boxes.append(boxes) - filtered_labels.append(labels) - filtered_probs.append(probs) - - # no object detected - if len(filtered_boxes) == 0: - filtered_boxes = torch.empty(0, 4) - filtered_labels = torch.empty(0) - filtered_probs = torch.empty(0) - else: # cat all result - filtered_boxes = torch.cat(filtered_boxes, 0) - filtered_labels = torch.cat(filtered_labels, 0) - filtered_probs = torch.cat(filtered_probs, 0) - if 0 < self.max_per_image < filtered_probs.size(0): - keep = torch.argsort(filtered_probs, dim=0, descending=True)[:self.max_per_image] - filtered_boxes = filtered_boxes[keep, :] - filtered_labels = filtered_labels[keep] - filtered_probs = filtered_probs[keep] - results.append((filtered_boxes, filtered_labels, filtered_probs)) - return results diff --git a/ssd/modeling/predictor.py b/ssd/modeling/predictor.py deleted file mode 100644 index 364c8a90..00000000 --- a/ssd/modeling/predictor.py +++ /dev/null @@ -1,29 +0,0 @@ -import torch - -from ssd.modeling.post_processor import PostProcessor -from .data_preprocessing import PredictionTransform - - -class Predictor: - def __init__(self, cfg, model, iou_threshold, score_threshold, device): - self.cfg = cfg - self.model = model - self.transform = PredictionTransform(cfg.INPUT.IMAGE_SIZE, cfg.INPUT.PIXEL_MEAN) - self.post_processor = PostProcessor(iou_threshold=iou_threshold, - score_threshold=score_threshold, - image_size=cfg.INPUT.IMAGE_SIZE, - max_per_class=cfg.TEST.MAX_PER_CLASS, - max_per_image=cfg.TEST.MAX_PER_IMAGE) - self.device = device - self.model.eval() - - def predict(self, image): - height, width, _ = image.shape - image = self.transform(image) - images = image.unsqueeze(0) - images = images.to(self.device) - with torch.no_grad(): - scores, boxes = self.model(images) - results = self.post_processor(scores, boxes, width=width, height=height) - boxes, labels, scores = results[0] - return boxes, labels, scores diff --git a/ssd/modeling/ssd.py b/ssd/modeling/ssd.py deleted file mode 100644 index 1acef26f..00000000 --- a/ssd/modeling/ssd.py +++ /dev/null @@ -1,120 +0,0 @@ -import torch.nn as nn -import torch -import numpy as np -import torch.nn.functional as F - -from ssd.modeling.multibox_loss import MultiBoxLoss -from ssd.module import L2Norm -from ssd.module.prior_box import PriorBox -from ssd.utils import box_utils - - -class SSD(nn.Module): - def __init__(self, cfg, - vgg: nn.ModuleList, - extras: nn.ModuleList, - classification_headers: nn.ModuleList, - regression_headers: nn.ModuleList): - """Compose a SSD model using the given components. - """ - super(SSD, self).__init__() - self.cfg = cfg - self.num_classes = cfg.MODEL.NUM_CLASSES - self.vgg = vgg - self.extras = extras - self.classification_headers = classification_headers - self.regression_headers = regression_headers - self.l2_norm = L2Norm(512, scale=20) - self.criterion = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) - self.priors = None - self.reset_parameters() - - def reset_parameters(self): - def weights_init(m): - if isinstance(m, nn.Conv2d): - nn.init.xavier_uniform_(m.weight) - nn.init.zeros_(m.bias) - - self.vgg.apply(weights_init) - self.extras.apply(weights_init) - self.classification_headers.apply(weights_init) - self.regression_headers.apply(weights_init) - - def forward(self, x, targets=None): - sources = [] - confidences = [] - locations = [] - for i in range(23): - x = self.vgg[i](x) - s = self.l2_norm(x) # Conv4_3 L2 normalization - sources.append(s) - - # apply vgg up to fc7 - for i in range(23, len(self.vgg)): - x = self.vgg[i](x) - sources.append(x) - - for k, v in enumerate(self.extras): - x = F.relu(v(x), inplace=True) - if k % 2 == 1: - sources.append(x) - - for (x, l, c) in zip(sources, self.regression_headers, self.classification_headers): - locations.append(l(x).permute(0, 2, 3, 1).contiguous()) - confidences.append(c(x).permute(0, 2, 3, 1).contiguous()) - - confidences = torch.cat([o.view(o.size(0), -1) for o in confidences], 1) - locations = torch.cat([o.view(o.size(0), -1) for o in locations], 1) - - confidences = confidences.view(confidences.size(0), -1, self.num_classes) - locations = locations.view(locations.size(0), -1, 4) - - if not self.training: - # when evaluating, decode predictions - if self.priors is None: - self.priors = PriorBox(self.cfg)().to(locations.device) - confidences = F.softmax(confidences, dim=2) - boxes = box_utils.convert_locations_to_boxes( - locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE - ) - boxes = box_utils.center_form_to_corner_form(boxes) - return confidences, boxes - else: - # when training, compute losses - gt_boxes, gt_labels = targets - regression_loss, classification_loss = self.criterion(confidences, locations, gt_labels, gt_boxes) - loss_dict = dict( - regression_loss=regression_loss, - classification_loss=classification_loss, - ) - return loss_dict - - def init_from_base_net(self, model): - vgg_weights = torch.load(model, map_location=lambda storage, loc: storage) - self.vgg.load_state_dict(vgg_weights, strict=True) - - def load(self, model): - self.load_state_dict(torch.load(model, map_location=lambda storage, loc: storage)) - - def save(self, model_path): - torch.save(self.state_dict(), model_path) - - -class MatchPrior(object): - def __init__(self, center_form_priors, center_variance, size_variance, iou_threshold): - self.center_form_priors = center_form_priors - self.corner_form_priors = box_utils.center_form_to_corner_form(center_form_priors) - self.center_variance = center_variance - self.size_variance = size_variance - self.iou_threshold = iou_threshold - - def __call__(self, gt_boxes, gt_labels): - if type(gt_boxes) is np.ndarray: - gt_boxes = torch.from_numpy(gt_boxes) - if type(gt_labels) is np.ndarray: - gt_labels = torch.from_numpy(gt_labels) - boxes, labels = box_utils.assign_priors(gt_boxes, gt_labels, - self.corner_form_priors, self.iou_threshold) - boxes = box_utils.corner_form_to_center_form(boxes) - locations = box_utils.convert_boxes_to_locations(boxes, self.center_form_priors, self.center_variance, self.size_variance) - return locations, labels diff --git a/ssd/solver/__init__.py b/ssd/solver/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ssd/solver/build.py b/ssd/solver/build.py new file mode 100644 index 00000000..da008920 --- /dev/null +++ b/ssd/solver/build.py @@ -0,0 +1,16 @@ +import torch + +from .lr_scheduler import WarmupMultiStepLR + + +def make_optimizer(cfg, model, lr=None): + lr = cfg.SOLVER.BASE_LR if lr is None else lr + return torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) + + +def make_lr_scheduler(cfg, optimizer, milestones=None): + return WarmupMultiStepLR(optimizer=optimizer, + milestones=cfg.SOLVER.LR_STEPS if milestones is None else milestones, + gamma=cfg.SOLVER.GAMMA, + warmup_factor=cfg.SOLVER.WARMUP_FACTOR, + warmup_iters=cfg.SOLVER.WARMUP_ITERS) diff --git a/ssd/utils/lr_scheduler.py b/ssd/solver/lr_scheduler.py similarity index 100% rename from ssd/utils/lr_scheduler.py rename to ssd/solver/lr_scheduler.py diff --git a/ssd/structures/__init__.py b/ssd/structures/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ssd/structures/container.py b/ssd/structures/container.py new file mode 100644 index 00000000..abaf0142 --- /dev/null +++ b/ssd/structures/container.py @@ -0,0 +1,49 @@ +from typing import Mapping + +from matplotlib import collections + + +class Container: + def __init__(self, **kwargs): + self._data_dict = {**kwargs} + + def __setattr__(self, key, value): + object.__setattr__(self, key, value) + + def __getitem__(self, key): + return self._data_dict[key] + + def __iter__(self): + return self._data_dict.__iter__() + + def __setitem__(self, key, value): + self._data_dict[key] = value + + def to(self, *args, **kwargs): + keys = list(self._data_dict.keys()) + for key in keys: + value = self._data_dict[key] + if hasattr(value, 'to'): + self._data_dict[key] = value.to(*args, **kwargs) + return self + + def numpy(self): + keys = list(self._data_dict.keys()) + for key in keys: + value = self._data_dict[key] + if hasattr(value, 'numpy'): + self._data_dict[key] = value.numpy() + return self + + def resize(self, size): + img_width = getattr(self, 'img_width', -1) + img_height = getattr(self, 'img_height', -1) + if img_width > 0 and img_height > 0: + new_width, new_height = size + if 'boxes' in self._data_dict: + self._data_dict['boxes'][:, 0::2] = self._data_dict['boxes'][:, 0::2] / img_width * new_width + self._data_dict['boxes'][:, 1::2] = self._data_dict['boxes'][:, 1::2] / img_height * new_height + return self + + def __repr__(self): + return self._data_dict.__repr__() diff --git a/ssd/utils/checkpoint.py b/ssd/utils/checkpoint.py new file mode 100644 index 00000000..145ef0f7 --- /dev/null +++ b/ssd/utils/checkpoint.py @@ -0,0 +1,94 @@ +import logging +import os + +import torch +from torch.nn.parallel import DistributedDataParallel + + +class CheckPointer: + def __init__(self, + model, + optimizer=None, + scheduler=None, + save_dir="", + save_to_disk=None, + logger=None): + self.model = model + self.optimizer = optimizer + self.scheduler = scheduler + self.save_dir = save_dir + self.save_to_disk = save_to_disk + if logger is None: + logger = logging.getLogger(__name__) + self.logger = logger + + def save(self, name, **kwargs): + if not self.save_dir: + return + + if not self.save_to_disk: + return + + data = {} + if isinstance(self.model, DistributedDataParallel): + data['model'] = self.model.module.state_dict() + else: + data['model'] = self.model.state_dict() + if self.optimizer is not None: + data["optimizer"] = self.optimizer.state_dict() + if self.scheduler is not None: + data["scheduler"] = self.scheduler.state_dict() + data.update(kwargs) + + save_file = os.path.join(self.save_dir, "{}.pth".format(name)) + self.logger.info("Saving checkpoint to {}".format(save_file)) + torch.save(data, save_file) + + self.tag_last_checkpoint(save_file) + + def load(self, f=None, use_latest=True): + if self.has_checkpoint() and use_latest: + # override argument with existing checkpoint + f = self.get_checkpoint_file() + if not f: + # no checkpoint could be found + self.logger.info("No checkpoint found.") + return {} + + self.logger.info("Loading checkpoint from {}".format(f)) + checkpoint = torch.load(f, map_location=torch.device("cpu")) + model = self.model + if isinstance(model, DistributedDataParallel): + model = self.model.module + + model.load_state_dict(checkpoint.pop("model")) + if "optimizer" in checkpoint and self.optimizer: + self.logger.info("Loading optimizer from {}".format(f)) + self.optimizer.load_state_dict(checkpoint.pop("optimizer")) + if "scheduler" in checkpoint and self.scheduler: + self.logger.info("Loading scheduler from {}".format(f)) + self.scheduler.load_state_dict(checkpoint.pop("scheduler")) + + # return any further checkpoint data + return checkpoint + + def get_checkpoint_file(self): + save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + try: + with open(save_file, "r") as f: + last_saved = f.read() + last_saved = last_saved.strip() + except IOError: + # if file doesn't exist, maybe because it has just been + # deleted by a separate process + last_saved = "" + return last_saved + + def has_checkpoint(self): + save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + return os.path.exists(save_file) + + def tag_last_checkpoint(self, last_filename): + save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + with open(save_file, "w") as f: + f.write(last_filename) diff --git a/ssd/utils/distributed_util.py b/ssd/utils/dist_util.py similarity index 100% rename from ssd/utils/distributed_util.py rename to ssd/utils/dist_util.py diff --git a/ssd/utils/misc.py b/ssd/utils/misc.py index 9b821954..e01fb2ae 100644 --- a/ssd/utils/misc.py +++ b/ssd/utils/misc.py @@ -1,20 +1,14 @@ -import torch +import errno +import os def str2bool(s): return s.lower() in ('true', '1') -def save_checkpoint(epoch, net_state_dict, optimizer_state_dict, best_score, checkpoint_path, model_path): - torch.save({ - 'epoch': epoch, - 'model': net_state_dict, - 'optimizer': optimizer_state_dict, - 'best_score': best_score - }, checkpoint_path) - torch.save(net_state_dict, model_path) - - -def freeze_net_layers(net): - for param in net.parameters(): - param.requires_grad = False +def mkdir(path): + try: + os.makedirs(path) + except OSError as e: + if e.errno != errno.EEXIST: + raise diff --git a/ssd/utils/model_zoo.py b/ssd/utils/model_zoo.py new file mode 100644 index 00000000..c1206f1f --- /dev/null +++ b/ssd/utils/model_zoo.py @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. +import os +import sys + +import torch + +from ssd.utils.dist_util import is_main_process, synchronize + +try: + from torch.hub import _download_url_to_file + from torch.hub import urlparse + from torch.hub import HASH_REGEX +except ImportError: + from torch.utils.model_zoo import _download_url_to_file + from torch.utils.model_zoo import urlparse + from torch.utils.model_zoo import HASH_REGEX + + +# very similar to https://github.com/pytorch/pytorch/blob/master/torch/utils/model_zoo.py +# but with a few improvements and modifications +def cache_url(url, model_dir=None, progress=True): + r"""Loads the Torch serialized object at the given URL. + If the object is already present in `model_dir`, it's deserialized and + returned. The filename part of the URL should follow the naming convention + ``filename-.ext`` where ```` is the first eight or more + digits of the SHA256 hash of the contents of the file. The hash is used to + ensure unique names and to verify the contents of the file. + The default value of `model_dir` is ``$TORCH_HOME/models`` where + ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be + overridden with the ``$TORCH_MODEL_ZOO`` environment variable. + Args: + url (string): URL of the object to download + model_dir (string, optional): directory in which to save the object + progress (bool, optional): whether or not to display a progress bar to stderr + Example: + >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') + """ + if model_dir is None: + torch_home = os.path.expanduser(os.getenv("TORCH_HOME", "~/.torch")) + model_dir = os.getenv("TORCH_MODEL_ZOO", os.path.join(torch_home, "models")) + if not os.path.exists(model_dir): + os.makedirs(model_dir) + parts = urlparse(url) + filename = os.path.basename(parts.path) + if filename == "model_final.pkl": + # workaround as pre-trained Caffe2 models from Detectron have all the same filename + # so make the full path the filename by replacing / with _ + filename = parts.path.replace("/", "_") + cached_file = os.path.join(model_dir, filename) + if not os.path.exists(cached_file) and is_main_process(): + sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) + hash_prefix = HASH_REGEX.search(filename) + if hash_prefix is not None: + hash_prefix = hash_prefix.group(1) + # workaround: Caffe2 models don't have a hash, but follow the R-50 convention, + # which matches the hash PyTorch uses. So we skip the hash matching + # if the hash_prefix is less than 6 characters + if len(hash_prefix) < 6: + hash_prefix = None + _download_url_to_file(url, cached_file, hash_prefix, progress=progress) + synchronize() + return cached_file + + +def load_state_dict_from_url(url, map_location='cpu'): + cached_file = cache_url(url) + return torch.load(cached_file, map_location=map_location) diff --git a/ssd/utils/viz.py b/ssd/utils/viz.py deleted file mode 100644 index fb615224..00000000 --- a/ssd/utils/viz.py +++ /dev/null @@ -1,97 +0,0 @@ -import numpy as np -from six.moves import range -import PIL.Image as Image -import PIL.ImageDraw as ImageDraw -import PIL.ImageFont as ImageFont - -STANDARD_COLORS = [ - 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', - 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', - 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', - 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', - 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', - 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', - 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', - 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', - 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', - 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', - 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', - 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', - 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', - 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', - 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', - 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', - 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', - 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', - 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', - 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', - 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', - 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', - 'WhiteSmoke', 'Yellow', 'YellowGreen' -] - -NUM_COLORS = len(STANDARD_COLORS) - -try: - FONT = ImageFont.truetype('arial.ttf', 24) -except IOError: - FONT = ImageFont.load_default() - - -def _draw_single_box(image, xmin, ymin, xmax, ymax, color='black', display_str=None, font=None, thickness=2): - draw = ImageDraw.Draw(image) - left, right, top, bottom = xmin, xmax, ymin, ymax - draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=thickness, fill=color) - if display_str is not None: - text_bottom = bottom - # Reverse list and print from bottom to top. - text_width, text_height = font.getsize(display_str) - margin = np.ceil(0.05 * text_height) - draw.rectangle( - [(left, text_bottom - text_height - 2 * margin), (left + text_width, text_bottom)], fill=color) - draw.text((left + margin, text_bottom - text_height - margin), - display_str, - fill='black', - font=font) - - return image - - -def draw_bounding_boxes(image, boxes, labels=None, probs=None, class_name_map=None): - """Draw bboxes(labels, probs) on image - Args: - image: numpy array image, shape should be (height, width, channel) - boxes: bboxes, shape should be (N, 4), and each row is (xmin, ymin, xmax, ymax) - labels: labels, shape: (N, ) - probs: label scores, shape: (N, ), can be False/True or None - class_name_map: list or dict, map class id to class name for visualization. - can be False/True or None - Returns: - An image with information drawn on it. - """ - num_boxes = boxes.shape[0] - gt_boxes_new = boxes.copy() - draw_image = Image.fromarray(np.uint8(image)) - for i in range(num_boxes): - display_str = None - this_class = 0 - if labels is not None: - this_class = labels[i] - class_name = class_name_map[this_class] if class_name_map is not None else str(this_class) - class_name = class_name.decode('utf-8') if isinstance(class_name, bytes) else class_name - if probs is not None: - prob = probs[i] - display_str = '{}:{:.2f}'.format(class_name, prob) - else: - display_str = class_name - draw_image = _draw_single_box(image=draw_image, - xmin=gt_boxes_new[i, 0], - ymin=gt_boxes_new[i, 1], - xmax=gt_boxes_new[i, 2], - ymax=gt_boxes_new[i, 3], - color=STANDARD_COLORS[this_class % NUM_COLORS], - display_str=display_str, - font=FONT) - - image = np.array(draw_image, dtype=np.float32) - return image diff --git a/train_ssd.py b/train_ssd.py index 885c8108..a7a3857e 100644 --- a/train_ssd.py +++ b/train_ssd.py @@ -4,71 +4,45 @@ import torch import torch.distributed as dist -from torch.utils.data import DataLoader +from eval_ssd import do_evaluation from ssd.config import cfg -from ssd.data import samplers -from ssd.data.datasets import build_dataset -from ssd.engine.inference import do_evaluation +from ssd.data.build import make_data_loader from ssd.engine.trainer import do_train -from ssd.modeling.data_preprocessing import TrainAugmentation -from ssd.modeling.ssd import MatchPrior -from ssd.modeling.vgg_ssd import build_ssd_model -from ssd.module.prior_box import PriorBox -from ssd.utils import distributed_util +from ssd.modeling.detector import build_detection_model +from ssd.solver.build import make_optimizer, make_lr_scheduler +from ssd.utils import dist_util +from ssd.utils.checkpoint import CheckPointer +from ssd.utils.dist_util import synchronize from ssd.utils.logger import setup_logger -from ssd.utils.lr_scheduler import WarmupMultiStepLR from ssd.utils.misc import str2bool def train(cfg, args): logger = logging.getLogger('SSD.trainer') - # ----------------------------------------------------------------------------- - # Model - # ----------------------------------------------------------------------------- - model = build_ssd_model(cfg) + model = build_detection_model(cfg) device = torch.device(cfg.MODEL.DEVICE) model.to(device) - if args.resume: - logger.info("Resume from the model {}".format(args.resume)) - model.load(args.resume) - else: - logger.info("Init from base net {}".format(args.vgg)) - model.init_from_base_net(args.vgg) if args.distributed: model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.local_rank], output_device=args.local_rank) - # ----------------------------------------------------------------------------- - # Optimizer - # ----------------------------------------------------------------------------- + lr = cfg.SOLVER.LR * args.num_gpus # scale by num gpus - optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=cfg.SOLVER.MOMENTUM, weight_decay=cfg.SOLVER.WEIGHT_DECAY) + optimizer = make_optimizer(cfg, model, lr) - # ----------------------------------------------------------------------------- - # Scheduler - # ----------------------------------------------------------------------------- milestones = [step // args.num_gpus for step in cfg.SOLVER.LR_STEPS] - scheduler = WarmupMultiStepLR(optimizer=optimizer, - milestones=milestones, - gamma=cfg.SOLVER.GAMMA, - warmup_factor=cfg.SOLVER.WARMUP_FACTOR, - warmup_iters=cfg.SOLVER.WARMUP_ITERS) - - # ----------------------------------------------------------------------------- - # Dataset - # ----------------------------------------------------------------------------- - train_transform = TrainAugmentation(cfg.INPUT.IMAGE_SIZE, cfg.INPUT.PIXEL_MEAN) - target_transform = MatchPrior(PriorBox(cfg)(), cfg.MODEL.CENTER_VARIANCE, cfg.MODEL.SIZE_VARIANCE, cfg.MODEL.THRESHOLD) - train_dataset = build_dataset(dataset_list=cfg.DATASETS.TRAIN, transform=train_transform, target_transform=target_transform) - logger.info("Train dataset size: {}".format(len(train_dataset))) - if args.distributed: - sampler = torch.utils.data.DistributedSampler(train_dataset) - else: - sampler = torch.utils.data.RandomSampler(train_dataset) - batch_sampler = torch.utils.data.sampler.BatchSampler(sampler=sampler, batch_size=cfg.SOLVER.BATCH_SIZE, drop_last=False) - batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=cfg.SOLVER.MAX_ITER // args.num_gpus) - train_loader = DataLoader(train_dataset, num_workers=4, batch_sampler=batch_sampler, pin_memory=True) + scheduler = make_lr_scheduler(cfg, optimizer, milestones) + + arguments = {"iteration": 0} + save_to_disk = dist_util.get_rank() == 0 + checkpointer = CheckPointer(model, optimizer, scheduler, cfg.OUTPUT_DIR, save_to_disk, logger) + extra_checkpoint_data = checkpointer.load() + arguments.update(extra_checkpoint_data) + + max_iter = cfg.SOLVER.MAX_ITER // args.num_gpus + train_loader = make_data_loader(cfg, is_train=True, distributed=args.distributed, max_iter=max_iter, start_iter=arguments['iteration']) - return do_train(cfg, model, train_loader, optimizer, scheduler, device, args) + model = do_train(cfg, model, train_loader, optimizer, scheduler, checkpointer, device, arguments, args) + return model def main(): @@ -81,11 +55,9 @@ def main(): type=str, ) parser.add_argument("--local_rank", type=int, default=0) - parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth') - parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') - parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step') - parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step') - parser.add_argument('--eval_step', default=5000, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') + parser.add_argument('--log_step', default=20, type=int, help='Print logs every log_step') + parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step') + parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) parser.add_argument( "--skip-test", @@ -111,8 +83,9 @@ def main(): if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") + synchronize() - logger = setup_logger("SSD", distributed_util.get_rank()) + logger = setup_logger("SSD", dist_util.get_rank()) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) @@ -131,7 +104,7 @@ def main(): if not args.skip_test: logger.info('Start evaluating...') torch.cuda.empty_cache() # speed up evaluating after training finished - do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed) + do_evaluation(cfg, model, distributed=args.distributed) if __name__ == '__main__': From c76787533123c1c1a6aaaf12828e42becbbeef6f Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 19:07:11 +0800 Subject: [PATCH 02/24] fix eval bug, optimize --- configs/mobilenet_v2_ssd_voc0712.yaml | 17 +++ configs/vgg_ssd300_coco_trainval35k.yaml | 4 +- configs/vgg_ssd512_coco_trainval35k.yaml | 4 +- demo.py | 7 +- ssd/config/defaults.py | 6 +- ssd/data/build.py | 19 +-- ssd/data/datasets/coco.py | 7 -- ssd/data/datasets/evaluation/voc/__init__.py | 2 +- ssd/data/datasets/voc.py | 7 -- ssd/engine/trainer.py | 4 +- ssd/modeling/backbone/__init__.py | 27 +++- ssd/modeling/backbone/mobilenet.py | 123 +++++++++++++++++++ ssd/modeling/backbone/vgg.py | 8 -- ssd/modeling/detector_head/detector_head.py | 41 ++----- ssd/modeling/detector_head/inference.py | 34 +++-- ssd/modeling/detector_head/predictor.py | 73 +++++++++++ ssd/modeling/registry.py | 4 + ssd/structures/container.py | 26 ++-- ssd/utils/python_nms.py | 5 +- ssd/utils/registry.py | 40 ++++++ train_ssd.py | 2 +- 21 files changed, 354 insertions(+), 106 deletions(-) create mode 100644 configs/mobilenet_v2_ssd_voc0712.yaml create mode 100644 ssd/modeling/detector_head/predictor.py create mode 100644 ssd/modeling/registry.py create mode 100644 ssd/utils/registry.py diff --git a/configs/mobilenet_v2_ssd_voc0712.yaml b/configs/mobilenet_v2_ssd_voc0712.yaml new file mode 100644 index 00000000..92e5ea03 --- /dev/null +++ b/configs/mobilenet_v2_ssd_voc0712.yaml @@ -0,0 +1,17 @@ +MODEL: + NUM_CLASSES: 21 + PREDICTOR: 'SSDLitePredictor' + BACKBONE: + NAME: 'mobilenet_v2' + OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) +INPUT: + IMAGE_SIZE: 300 +DATASETS: + TRAIN: ("voc_2007_trainval", "voc_2012_trainval") + TEST: ("voc_2007_test", ) +SOLVER: + MAX_ITER: 120000 + LR_STEPS: [80000, 100000] + GAMMA: 0.1 + BATCH_SIZE: 32 + LR: 1e-3 diff --git a/configs/vgg_ssd300_coco_trainval35k.yaml b/configs/vgg_ssd300_coco_trainval35k.yaml index cfb7613f..9ebbeef7 100644 --- a/configs/vgg_ssd300_coco_trainval35k.yaml +++ b/configs/vgg_ssd300_coco_trainval35k.yaml @@ -18,5 +18,5 @@ SOLVER: GAMMA: 0.1 BATCH_SIZE: 32 LR: 1e-3 -TEST: - MAX_PER_CLASS: 400 \ No newline at end of file + +OUTPUT_DIR: 'output_vgg_ssd300_coco' \ No newline at end of file diff --git a/configs/vgg_ssd512_coco_trainval35k.yaml b/configs/vgg_ssd512_coco_trainval35k.yaml index c1c28710..77dbd010 100644 --- a/configs/vgg_ssd512_coco_trainval35k.yaml +++ b/configs/vgg_ssd512_coco_trainval35k.yaml @@ -19,6 +19,4 @@ SOLVER: LR_STEPS: [280000, 360000] GAMMA: 0.1 BATCH_SIZE: 24 - LR: 1e-3 -TEST: - MAX_PER_CLASS: 400 \ No newline at end of file + LR: 1e-3 \ No newline at end of file diff --git a/demo.py b/demo.py index dd03bda0..5fa2145b 100644 --- a/demo.py +++ b/demo.py @@ -17,6 +17,7 @@ from ssd.utils.checkpoint import CheckPointer +@torch.no_grad() def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): if dataset_type == "voc": class_names = VOCDataset.class_names @@ -40,9 +41,9 @@ def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): for image_path in tqdm(image_paths): image = np.array(Image.open(image_path).convert("RGB")) height, width, _ = image.shape - images = transforms(image).unsqueeze(0) + images = transforms(image)[0].unsqueeze(0) - result = model(images)[0] + result = model(images.to(device))[0] result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result['scores'] @@ -54,7 +55,7 @@ def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) image_name = os.path.basename(image_path) - Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) + Image.fromarray(drawn_image).save(os.path.join(output_dir, '{}_demo.jpg'.format(image_name.split('.')[0]))) def main(): diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index 7adb5dba..c03bdb76 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -4,6 +4,7 @@ _C.MODEL = CN() _C.MODEL.META_ARCHITECTURE = 'SSDDetector' +_C.MODEL.PREDICTOR = 'SSDPredictor' _C.MODEL.DEVICE = "cuda" # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5) _C.MODEL.THRESHOLD = 0.5 @@ -70,9 +71,8 @@ _C.TEST = CN() _C.TEST.NMS_THRESHOLD = 0.45 _C.TEST.CONFIDENCE_THRESHOLD = 0.01 -# change MAX_PER_CLASS to 400 as official caffe code will slightly increase mAP(0.8025=>0.8063, 0.7783=>0.7798) -_C.TEST.MAX_PER_CLASS = 200 -_C.TEST.MAX_PER_IMAGE = -1 +_C.TEST.MAX_PER_CLASS = -1 +_C.TEST.MAX_PER_IMAGE = 100 _C.TEST.BATCH_SIZE = 10 _C.OUTPUT_DIR = 'output' diff --git a/ssd/data/build.py b/ssd/data/build.py index 41277d4f..b8e26173 100644 --- a/ssd/data/build.py +++ b/ssd/data/build.py @@ -9,22 +9,27 @@ class BatchCollator: + def __init__(self, is_train=True): + self.is_train = is_train def __call__(self, batch): transposed_batch = list(zip(*batch)) images = default_collate(transposed_batch[0]) img_ids = default_collate(transposed_batch[2]) - list_targets = transposed_batch[1] - targets = Container( - **{key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]} - ) + if self.is_train: + list_targets = transposed_batch[1] + targets = Container( + {key: default_collate([d[key] for d in list_targets]) for key in list_targets[0]} + ) + else: + targets = None return images, targets, img_ids def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start_iter=0): - train_transform = build_transforms(cfg, is_train=True) - target_transform = build_target_transform(cfg) + train_transform = build_transforms(cfg, is_train=is_train) + target_transform = build_target_transform(cfg) if is_train else None dataset_list = cfg.DATASETS.TRAIN if is_train else cfg.DATASETS.TEST datasets = build_dataset(dataset_list, transform=train_transform, target_transform=target_transform, is_train=is_train) @@ -45,7 +50,7 @@ def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start if max_iter is not None: batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter) - data_loader = DataLoader(dataset, num_workers=6, batch_sampler=batch_sampler, pin_memory=True, collate_fn=BatchCollator()) + data_loader = DataLoader(dataset, num_workers=6, batch_sampler=batch_sampler, pin_memory=True, collate_fn=BatchCollator(is_train)) data_loaders.append(data_loader) if is_train: diff --git a/ssd/data/datasets/coco.py b/ssd/data/datasets/coco.py index 16c93ab9..f1af3a25 100644 --- a/ssd/data/datasets/coco.py +++ b/ssd/data/datasets/coco.py @@ -55,13 +55,6 @@ def __getitem__(self, index): ) return image, targets, index - def get_image(self, index): - image_id = self.ids[index] - image = self._read_image(image_id) - if self.transform: - image, _ = self.transform(image) - return image - def get_annotation(self, index): image_id = self.ids[index] return image_id, self._get_annotation(image_id) diff --git a/ssd/data/datasets/evaluation/voc/__init__.py b/ssd/data/datasets/evaluation/voc/__init__.py index 5272eecd..77eb9f2e 100644 --- a/ssd/data/datasets/evaluation/voc/__init__.py +++ b/ssd/data/datasets/evaluation/voc/__init__.py @@ -18,7 +18,6 @@ def voc_evaluation(dataset, predictions, output_dir): gt_difficults = [] for i in range(len(dataset)): - prediction = predictions[i] image_id, annotation = dataset.get_annotation(i) gt_boxes, gt_labels, is_difficult = annotation gt_boxes_list.append(gt_boxes) @@ -26,6 +25,7 @@ def voc_evaluation(dataset, predictions, output_dir): gt_difficults.append(is_difficult.astype(np.bool)) img_info = dataset.get_img_info(i) + prediction = predictions[i] prediction = prediction.resize((img_info['width'], img_info['height'])).numpy() boxes, labels, scores = prediction['boxes'], prediction['labels'], prediction['scores'] diff --git a/ssd/data/datasets/voc.py b/ssd/data/datasets/voc.py index 5e54f64f..37168905 100644 --- a/ssd/data/datasets/voc.py +++ b/ssd/data/datasets/voc.py @@ -48,13 +48,6 @@ def __getitem__(self, index): ) return image, targets, index - def get_image(self, index): - image_id = self.ids[index] - image = self._read_image(image_id) - if self.transform: - image, _ = self.transform(image) - return image - def get_annotation(self, index): image_id = self.ids[index] return image_id, self._get_annotation(image_id) diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index 078a44b2..8de39bbb 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -115,12 +115,12 @@ def do_train(cfg, model, if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter: eval_results = do_evaluation(cfg, model, distributed=args.distributed) - if dist_util.get_rank() and summary_writer: + if dist_util.get_rank() == 0 and summary_writer: for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) model.train() - checkpointer.save("model_final.pth", **arguments) + checkpointer.save("model_final", **arguments) # compute training time total_training_time = int(time.time() - start_training_time) total_time_str = str(datetime.timedelta(seconds=total_training_time)) diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py index c97f36cf..4f45d648 100644 --- a/ssd/modeling/backbone/__init__.py +++ b/ssd/modeling/backbone/__init__.py @@ -1,9 +1,26 @@ -from .vgg import vgg +from ssd.modeling import registry +from ssd.utils.model_zoo import load_state_dict_from_url +from .vgg import VGG +from .vgg import model_urls as vgg_model_urls +from .mobilenet import MobileNetV2 +from .mobilenet import model_urls as mobilenet_model_urls -BACKBONES = { - 'vgg': vgg, -} + +@registry.BACKBONES.register('vgg') +def vgg(cfg, pretrained=True): + model = VGG(cfg) + if pretrained: + model.init_from_pretrain(load_state_dict_from_url(vgg_model_urls['vgg'])) + return model + + +@registry.BACKBONES.register('mobilenet_v2') +def mobilenet_v2(cfg, pretrained=False): + model = MobileNetV2() + if pretrained: + model.load_state_dict(load_state_dict_from_url(mobilenet_model_urls['mobilenet_v2'])) + return model def build_backbone(cfg): - return BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg) + return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg) diff --git a/ssd/modeling/backbone/mobilenet.py b/ssd/modeling/backbone/mobilenet.py index e69de29b..85591de4 100644 --- a/ssd/modeling/backbone/mobilenet.py +++ b/ssd/modeling/backbone/mobilenet.py @@ -0,0 +1,123 @@ +from torch import nn + +model_urls = { + 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', +} + + +class ConvBNReLU(nn.Sequential): + def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1): + padding = (kernel_size - 1) // 2 + super(ConvBNReLU, self).__init__( + nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False), + nn.BatchNorm2d(out_planes), + nn.ReLU6(inplace=True) + ) + + +class InvertedResidual(nn.Module): + def __init__(self, inp, oup, stride, expand_ratio): + super(InvertedResidual, self).__init__() + self.stride = stride + assert stride in [1, 2] + + hidden_dim = int(round(inp * expand_ratio)) + self.use_res_connect = self.stride == 1 and inp == oup + + layers = [] + if expand_ratio != 1: + # pw + layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1)) + layers.extend([ + # dw + ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), + # pw-linear + nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False), + nn.BatchNorm2d(oup), + ]) + self.conv = nn.Sequential(*layers) + + def forward(self, x): + if self.use_res_connect: + return x + self.conv(x) + else: + return self.conv(x) + + +class MobileNetV2(nn.Module): + def __init__(self, width_mult=1.0, inverted_residual_setting=None): + super(MobileNetV2, self).__init__() + block = InvertedResidual + input_channel = 32 + last_channel = 1280 + + if inverted_residual_setting is None: + inverted_residual_setting = [ + # t, c, n, s + [1, 16, 1, 1], + [6, 24, 2, 2], + [6, 32, 3, 2], + [6, 64, 4, 2], + [6, 96, 3, 1], + [6, 160, 3, 2], + [6, 320, 1, 1], + ] + + # only check the first element, assuming user knows t,c,n,s are required + if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4: + raise ValueError("inverted_residual_setting should be non-empty " + "or a 4-element list, got {}".format(inverted_residual_setting)) + + # building first layer + input_channel = int(input_channel * width_mult) + self.last_channel = int(last_channel * max(1.0, width_mult)) + features = [ConvBNReLU(3, input_channel, stride=2)] + # building inverted residual blocks + for t, c, n, s in inverted_residual_setting: + output_channel = int(c * width_mult) + for i in range(n): + stride = s if i == 0 else 1 + features.append(block(input_channel, output_channel, stride, expand_ratio=t)) + input_channel = output_channel + # building last several layers + features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1)) + # make it nn.Sequential + self.features = nn.Sequential(*features) + self.extras = nn.ModuleList([ + InvertedResidual(1280, 512, 2, 0.2), + InvertedResidual(512, 256, 2, 0.25), + InvertedResidual(256, 256, 2, 0.5), + InvertedResidual(256, 64, 2, 0.25) + ]) + + self.reset_parameters() + + def reset_parameters(self): + # weight initialization + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.kaiming_normal_(m.weight, mode='fan_out') + if m.bias is not None: + nn.init.zeros_(m.bias) + elif isinstance(m, nn.BatchNorm2d): + nn.init.ones_(m.weight) + nn.init.zeros_(m.bias) + elif isinstance(m, nn.Linear): + nn.init.normal_(m.weight, 0, 0.01) + nn.init.zeros_(m.bias) + + def forward(self, x): + features = [] + for i in range(15): + x = self.features[i](x) + features.append(x) + + for i in range(15, len(self.features)): + x = self.features[i](x) + features.append(x) + + for i in range(len(self.extras)): + x = self.extras[i](x) + features.append(x) + + return tuple(features) diff --git a/ssd/modeling/backbone/vgg.py b/ssd/modeling/backbone/vgg.py index 25746e4f..17bf2813 100644 --- a/ssd/modeling/backbone/vgg.py +++ b/ssd/modeling/backbone/vgg.py @@ -2,7 +2,6 @@ import torch.nn.functional as F from ssd.layers import L2Norm -from ssd.utils.model_zoo import load_state_dict_from_url model_urls = { 'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth', @@ -120,10 +119,3 @@ def forward(self, x): features.append(x) return tuple(features) - - -def vgg(cfg, pretrained=True): - model = VGG(cfg) - if pretrained: - model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg'])) - return model diff --git a/ssd/modeling/detector_head/detector_head.py b/ssd/modeling/detector_head/detector_head.py index 9ff74d4f..65fc9aa6 100644 --- a/ssd/modeling/detector_head/detector_head.py +++ b/ssd/modeling/detector_head/detector_head.py @@ -1,8 +1,8 @@ -import torch from torch import nn import torch.nn.functional as F from ssd.modeling.anchors.prior_box import PriorBox +from ssd.modeling.detector_head.predictor import make_predictor from ssd.utils import box_utils from .inference import PostProcessor from .loss import MultiBoxLoss @@ -12,15 +12,7 @@ class SSDHeader(nn.Module): def __init__(self, cfg): super().__init__() self.cfg = cfg - self.cls_headers = nn.ModuleList() - self.reg_headers = nn.ModuleList() - for boxes_per_location, out_channels in zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS): - self.cls_headers.append( - nn.Conv2d(out_channels, boxes_per_location * cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) - ) - self.reg_headers.append( - nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) - ) + self.predictor = make_predictor(cfg) self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) self.post_processor = PostProcessor(cfg) self.priors = None @@ -33,37 +25,28 @@ def reset_parameters(self): nn.init.zeros_(m.bias) def forward(self, features, targets=None): - confidences = [] - locations = [] - for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers): - confidences.append(cls_header(feature).permute(0, 2, 3, 1).contiguous()) - locations.append(reg_header(feature).permute(0, 2, 3, 1).contiguous()) - - batch_size = features[0].shape[0] - confidences = torch.cat([c.view(c.shape[0], -1) for c in confidences], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES) - locations = torch.cat([l.view(l.shape[0], -1) for l in locations], dim=1).view(batch_size, -1, 4) - + cls_logits, bbox_pred = self.predictor(features) if self.training: - return self._forward_train(confidences, locations, targets) + return self._forward_train(cls_logits, bbox_pred, targets) else: - return self._forward_test(confidences, locations) + return self._forward_test(cls_logits, bbox_pred) - def _forward_train(self, confidences, locations, targets): + def _forward_train(self, cls_logits, bbox_pred, targets): gt_boxes, gt_labels = targets['boxes'], targets['labels'] - reg_loss, cls_loss = self.loss_evaluator(confidences, locations, gt_labels, gt_boxes) + reg_loss, cls_loss = self.loss_evaluator(cls_logits, bbox_pred, gt_labels, gt_boxes) loss_dict = dict( reg_loss=reg_loss, cls_loss=cls_loss, ) - detections = (confidences, locations) + detections = (cls_logits, bbox_pred) return detections, loss_dict - def _forward_test(self, confidences, locations): + def _forward_test(self, cls_logits, bbox_pred): if self.priors is None: - self.priors = PriorBox(self.cfg)().to(locations.device) - scores = F.softmax(confidences, dim=2) + self.priors = PriorBox(self.cfg)().to(bbox_pred.device) + scores = F.softmax(cls_logits, dim=2) boxes = box_utils.convert_locations_to_boxes( - locations, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE + bbox_pred, self.priors, self.cfg.MODEL.CENTER_VARIANCE, self.cfg.MODEL.SIZE_VARIANCE ) boxes = box_utils.center_form_to_corner_form(boxes) detections = (scores, boxes) diff --git a/ssd/modeling/detector_head/inference.py b/ssd/modeling/detector_head/inference.py index d87250f0..facfe8f3 100644 --- a/ssd/modeling/detector_head/inference.py +++ b/ssd/modeling/detector_head/inference.py @@ -8,6 +8,8 @@ class PostProcessor: def __init__(self, cfg): super().__init__() self.cfg = cfg + self.width = cfg.INPUT.IMAGE_SIZE + self.height = cfg.INPUT.IMAGE_SIZE def __call__(self, detections): batches_scores, batches_boxes = detections @@ -19,19 +21,22 @@ def __call__(self, detections): processed_scores = [] processed_labels = [] - scores, boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4) - for class_id in range(1, scores.size(1)): # skip background - per_cls_scores = scores[:, class_id] - mask = per_cls_scores > self.cfg.TEST.CONFIDENCE_THRESHOLD - per_cls_scores = per_cls_scores[mask] - if per_cls_scores.numel() == 0: + per_img_scores, per_img_boxes = batches_scores[batch_id], batches_boxes[batch_id] # (N, #CLS) (N, 4) + for class_id in range(1, per_img_scores.size(1)): # skip background + scores = per_img_scores[:, class_id] + mask = scores > self.cfg.TEST.CONFIDENCE_THRESHOLD + scores = scores[mask] + if scores.size(0) == 0: continue - per_cls_boxes = boxes[mask, :] * self.cfg.INPUT.IMAGE_SIZE - keep = boxes_nms(per_cls_boxes, per_cls_scores, self.cfg.TEST.NMS_THRESHOLD, self.cfg.TEST.MAX_PER_CLASS) + boxes = per_img_boxes[mask, :] + boxes[:, 0::2] *= self.width + boxes[:, 1::2] *= self.height - nmsed_boxes = per_cls_boxes[keep, :] + keep = boxes_nms(boxes, scores, self.cfg.TEST.NMS_THRESHOLD, self.cfg.TEST.MAX_PER_CLASS) + + nmsed_boxes = boxes[keep, :] nmsed_labels = torch.tensor([class_id] * keep.size(0), device=device) - nmsed_scores = per_cls_scores[keep] + nmsed_scores = scores[keep] processed_boxes.append(nmsed_boxes) processed_scores.append(nmsed_scores) @@ -46,8 +51,13 @@ def __call__(self, detections): processed_labels = torch.cat(processed_labels, 0) processed_scores = torch.cat(processed_scores, 0) + if processed_boxes.size(0) > self.cfg.TEST.MAX_PER_IMAGE > 0: + processed_scores, keep = torch.topk(processed_scores, k=self.cfg.TEST.MAX_PER_IMAGE) + processed_boxes = processed_boxes[keep, :] + processed_labels = processed_labels[keep] + container = Container(boxes=processed_boxes, labels=processed_labels, scores=processed_scores) - container.img_width = self.cfg.INPUT.IMAGE_SIZE - container.img_height = self.cfg.INPUT.IMAGE_SIZE + container.img_width = self.width + container.img_height = self.height results.append(container) return results diff --git a/ssd/modeling/detector_head/predictor.py b/ssd/modeling/detector_head/predictor.py new file mode 100644 index 00000000..6f563746 --- /dev/null +++ b/ssd/modeling/detector_head/predictor.py @@ -0,0 +1,73 @@ +import torch +from torch import nn + +from ssd.modeling import registry + + +class Predictor(nn.Module): + def __init__(self, cfg): + super().__init__() + self.cfg = cfg + self.cls_headers = nn.ModuleList() + self.reg_headers = nn.ModuleList() + for boxes_per_location, out_channels in zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS): + self.cls_headers.append(self.cls_block(out_channels, boxes_per_location)) + self.reg_headers.append(self.reg_block(out_channels, boxes_per_location)) + self.reset_parameters() + + def cls_block(self, out_channels, boxes_per_location): + raise NotImplementedError + + def reg_block(self, out_channels, boxes_per_location): + raise NotImplementedError + + def reset_parameters(self): + for m in self.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight) + nn.init.zeros_(m.bias) + + def forward(self, features): + cls_logits = [] + bbox_pred = [] + for feature, cls_header, reg_header in zip(features, self.cls_headers, self.reg_headers): + cls_logits.append(cls_header(feature).permute(0, 2, 3, 1).contiguous()) + bbox_pred.append(reg_header(feature).permute(0, 2, 3, 1).contiguous()) + + batch_size = features[0].shape[0] + cls_logits = torch.cat([c.view(c.shape[0], -1) for c in cls_logits], dim=1).view(batch_size, -1, self.cfg.MODEL.NUM_CLASSES) + bbox_pred = torch.cat([l.view(l.shape[0], -1) for l in bbox_pred], dim=1).view(batch_size, -1, 4) + + return cls_logits, bbox_pred + + +@registry.PREDICTORS.register('SSDPredictor') +class SSDPredictor(Predictor): + def cls_block(self, out_channels, boxes_per_location): + return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) + + def reg_block(self, out_channels, boxes_per_location): + return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) + + +def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): + ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 + return nn.Sequential( + nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels, stride=stride, padding=padding), + nn.BatchNorm2d(in_channels), + ReLU(), + nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), + ) + + +@registry.PREDICTORS.register('SSDLitePredictor') +class SSDLitePredictor(Predictor): + def cls_block(self, out_channels, boxes_per_location): + return SeperableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) + + def reg_block(self, out_channels, boxes_per_location): + return SeperableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) + + +def make_predictor(cfg): + return registry.PREDICTORS[cfg.MODEL.PREDICTOR](cfg) diff --git a/ssd/modeling/registry.py b/ssd/modeling/registry.py new file mode 100644 index 00000000..50c5d7a8 --- /dev/null +++ b/ssd/modeling/registry.py @@ -0,0 +1,4 @@ +from ssd.utils.registry import Registry + +BACKBONES = Registry() +PREDICTORS = Registry() diff --git a/ssd/structures/container.py b/ssd/structures/container.py index abaf0142..57f22376 100644 --- a/ssd/structures/container.py +++ b/ssd/structures/container.py @@ -1,11 +1,6 @@ -from typing import Mapping - -from matplotlib import collections - - class Container: - def __init__(self, **kwargs): - self._data_dict = {**kwargs} + def __init__(self, *args, **kwargs): + self._data_dict = dict(*args, **kwargs) def __setattr__(self, key, value): object.__setattr__(self, key, value) @@ -36,13 +31,20 @@ def numpy(self): return self def resize(self, size): + """resize boxes + Args: + size: (width, height) + Returns: + self + """ img_width = getattr(self, 'img_width', -1) img_height = getattr(self, 'img_height', -1) - if img_width > 0 and img_height > 0: - new_width, new_height = size - if 'boxes' in self._data_dict: - self._data_dict['boxes'][:, 0::2] = self._data_dict['boxes'][:, 0::2] / img_width * new_width - self._data_dict['boxes'][:, 1::2] = self._data_dict['boxes'][:, 1::2] / img_height * new_height + assert img_width > 0 and img_height > 0 + assert 'boxes' in self._data_dict + boxes = self._data_dict['boxes'] + new_width, new_height = size + boxes[:, 0::2] *= (new_width / img_width) + boxes[:, 1::2] *= (new_height / img_height) return self def __repr__(self): diff --git a/ssd/utils/python_nms.py b/ssd/utils/python_nms.py index 896554d7..7860ce56 100644 --- a/ssd/utils/python_nms.py +++ b/ssd/utils/python_nms.py @@ -2,14 +2,13 @@ import numpy as np -def python_nms(boxes, scores, nms_thresh, max_count=-1): +def python_nms(boxes, scores, nms_thresh): """ Performs non-maximum suppression using numpy Args: boxes(Tensor): `xyxy` mode boxes, use absolute coordinates(not support relative coordinates), shape is (n, 4) scores(Tensor): scores, shape is (n, ) nms_thresh(float): thresh - max_count (int): if > 0, then only the top max_proposals are kept after non-maximum suppression Returns: indices kept. """ @@ -56,7 +55,5 @@ def python_nms(boxes, scores, nms_thresh, max_count=-1): if ovr >= nms_thresh: suppressed[j] = True keep = np.nonzero(suppressed == 0)[0] - if max_count > 0: - keep = keep[:max_count] keep = torch.from_numpy(keep).to(origin_device) return keep diff --git a/ssd/utils/registry.py b/ssd/utils/registry.py new file mode 100644 index 00000000..194b0787 --- /dev/null +++ b/ssd/utils/registry.py @@ -0,0 +1,40 @@ +def _register_generic(module_dict, module_name, module): + assert module_name not in module_dict + module_dict[module_name] = module + + +class Registry(dict): + """ + A helper class for managing registering modules, it extends a dictionary + and provides a register functions. + Eg. creating a registry: + some_registry = Registry({"default": default_module}) + There're two ways of registering new modules: + 1): normal way is just calling register function: + def foo(): + ... + some_registry.register("foo_module", foo) + 2): used as decorator when declaring the module: + @some_registry.register("foo_module") + @some_registry.register("foo_module_nickname") + def foo(): + ... + Access of module is just like using a dictionary, eg: + f = some_registry["foo_module"] + """ + + def __init__(self, *args, **kwargs): + super(Registry, self).__init__(*args, **kwargs) + + def register(self, module_name, module=None): + # used as function call + if module is not None: + _register_generic(self, module_name, module) + return + + # used as decorator + def register_fn(fn): + _register_generic(self, module_name, fn) + return fn + + return register_fn diff --git a/train_ssd.py b/train_ssd.py index a7a3857e..6ffd19e2 100644 --- a/train_ssd.py +++ b/train_ssd.py @@ -55,7 +55,7 @@ def main(): type=str, ) parser.add_argument("--local_rank", type=int, default=0) - parser.add_argument('--log_step', default=20, type=int, help='Print logs every log_step') + parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step') parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step') parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0') parser.add_argument('--use_tensorboard', default=True, type=str2bool) From 85fb495c646bfbed2084e53eec2ad0c5f8947711 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 20:41:10 +0800 Subject: [PATCH 03/24] add mobilenet_v2 --- configs/mobilenet_v2_ssd_voc0712.yaml | 13 +++++++-- configs/vgg_ssd300_coco_trainval35k.yaml | 2 +- configs/vgg_ssd300_voc0712.yaml | 2 ++ configs/vgg_ssd512_coco_trainval35k.yaml | 4 ++- configs/vgg_ssd512_voc0712.yaml | 4 ++- ssd/config/defaults.py | 2 +- ssd/modeling/backbone/mobilenet.py | 4 +-- ssd/modeling/detector_head/detector_head.py | 7 ----- ssd/modeling/detector_head/predictor.py | 32 ++++++++++++--------- 9 files changed, 42 insertions(+), 28 deletions(-) diff --git a/configs/mobilenet_v2_ssd_voc0712.yaml b/configs/mobilenet_v2_ssd_voc0712.yaml index 92e5ea03..09d9c499 100644 --- a/configs/mobilenet_v2_ssd_voc0712.yaml +++ b/configs/mobilenet_v2_ssd_voc0712.yaml @@ -3,9 +3,16 @@ MODEL: PREDICTOR: 'SSDLitePredictor' BACKBONE: NAME: 'mobilenet_v2' - OUT_CHANNELS: (512, 1024, 512, 256, 256, 256, 256) + OUT_CHANNELS: (96, 1280, 512, 256, 256, 64) + PRIORS: + FEATURE_MAPS: [20, 10, 5, 3, 2, 1] + STRIDES: [16, 32, 64, 100, 150, 300] + MIN_SIZES: [60, 105, 150, 195, 240, 285] + MAX_SIZES: [105, 150, 195, 240, 285, 330] + ASPECT_RATIOS: [[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]] + BOXES_PER_LOCATION: [6, 6, 6, 6, 6, 6] INPUT: - IMAGE_SIZE: 300 + IMAGE_SIZE: 320 DATASETS: TRAIN: ("voc_2007_trainval", "voc_2012_trainval") TEST: ("voc_2007_test", ) @@ -15,3 +22,5 @@ SOLVER: GAMMA: 0.1 BATCH_SIZE: 32 LR: 1e-3 + +OUTPUT_DIR: 'outputs/mobilenet_v2_ssd_voc0712' \ No newline at end of file diff --git a/configs/vgg_ssd300_coco_trainval35k.yaml b/configs/vgg_ssd300_coco_trainval35k.yaml index 9ebbeef7..52a15062 100644 --- a/configs/vgg_ssd300_coco_trainval35k.yaml +++ b/configs/vgg_ssd300_coco_trainval35k.yaml @@ -19,4 +19,4 @@ SOLVER: BATCH_SIZE: 32 LR: 1e-3 -OUTPUT_DIR: 'output_vgg_ssd300_coco' \ No newline at end of file +OUTPUT_DIR: 'outputs/vgg_ssd300_coco_trainval35k' \ No newline at end of file diff --git a/configs/vgg_ssd300_voc0712.yaml b/configs/vgg_ssd300_voc0712.yaml index 5653e555..a5a17a4e 100644 --- a/configs/vgg_ssd300_voc0712.yaml +++ b/configs/vgg_ssd300_voc0712.yaml @@ -11,3 +11,5 @@ SOLVER: GAMMA: 0.1 BATCH_SIZE: 32 LR: 1e-3 + +OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712' \ No newline at end of file diff --git a/configs/vgg_ssd512_coco_trainval35k.yaml b/configs/vgg_ssd512_coco_trainval35k.yaml index 77dbd010..589e32d2 100644 --- a/configs/vgg_ssd512_coco_trainval35k.yaml +++ b/configs/vgg_ssd512_coco_trainval35k.yaml @@ -19,4 +19,6 @@ SOLVER: LR_STEPS: [280000, 360000] GAMMA: 0.1 BATCH_SIZE: 24 - LR: 1e-3 \ No newline at end of file + LR: 1e-3 + +OUTPUT_DIR: 'outputs/vgg_ssd512_coco_trainval35k' \ No newline at end of file diff --git a/configs/vgg_ssd512_voc0712.yaml b/configs/vgg_ssd512_voc0712.yaml index 72c53eb6..a48d557e 100644 --- a/configs/vgg_ssd512_voc0712.yaml +++ b/configs/vgg_ssd512_voc0712.yaml @@ -19,4 +19,6 @@ SOLVER: LR_STEPS: [80000, 100000] GAMMA: 0.1 BATCH_SIZE: 24 - LR: 1e-3 \ No newline at end of file + LR: 1e-3 + +OUTPUT_DIR: 'outputs/vgg_ssd512_voc0712' \ No newline at end of file diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index c03bdb76..bebbe6f7 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -75,4 +75,4 @@ _C.TEST.MAX_PER_IMAGE = 100 _C.TEST.BATCH_SIZE = 10 -_C.OUTPUT_DIR = 'output' +_C.OUTPUT_DIR = 'outputs' diff --git a/ssd/modeling/backbone/mobilenet.py b/ssd/modeling/backbone/mobilenet.py index 85591de4..4d0ded7c 100644 --- a/ssd/modeling/backbone/mobilenet.py +++ b/ssd/modeling/backbone/mobilenet.py @@ -108,11 +108,11 @@ def reset_parameters(self): def forward(self, x): features = [] - for i in range(15): + for i in range(14): x = self.features[i](x) features.append(x) - for i in range(15, len(self.features)): + for i in range(14, len(self.features)): x = self.features[i](x) features.append(x) diff --git a/ssd/modeling/detector_head/detector_head.py b/ssd/modeling/detector_head/detector_head.py index 65fc9aa6..063b8575 100644 --- a/ssd/modeling/detector_head/detector_head.py +++ b/ssd/modeling/detector_head/detector_head.py @@ -16,13 +16,6 @@ def __init__(self, cfg): self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) self.post_processor = PostProcessor(cfg) self.priors = None - self.reset_parameters() - - def reset_parameters(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.xavier_uniform_(m.weight) - nn.init.zeros_(m.bias) def forward(self, features, targets=None): cls_logits, bbox_pred = self.predictor(features) diff --git a/ssd/modeling/detector_head/predictor.py b/ssd/modeling/detector_head/predictor.py index 6f563746..7e0b97da 100644 --- a/ssd/modeling/detector_head/predictor.py +++ b/ssd/modeling/detector_head/predictor.py @@ -10,15 +10,15 @@ def __init__(self, cfg): self.cfg = cfg self.cls_headers = nn.ModuleList() self.reg_headers = nn.ModuleList() - for boxes_per_location, out_channels in zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS): - self.cls_headers.append(self.cls_block(out_channels, boxes_per_location)) - self.reg_headers.append(self.reg_block(out_channels, boxes_per_location)) + for level, (boxes_per_location, out_channels) in enumerate(zip(cfg.MODEL.PRIORS.BOXES_PER_LOCATION, cfg.MODEL.BACKBONE.OUT_CHANNELS)): + self.cls_headers.append(self.cls_block(level, out_channels, boxes_per_location)) + self.reg_headers.append(self.reg_block(level, out_channels, boxes_per_location)) self.reset_parameters() - def cls_block(self, out_channels, boxes_per_location): + def cls_block(self, level, out_channels, boxes_per_location): raise NotImplementedError - def reg_block(self, out_channels, boxes_per_location): + def reg_block(self, level, out_channels, boxes_per_location): raise NotImplementedError def reset_parameters(self): @@ -43,14 +43,14 @@ def forward(self, features): @registry.PREDICTORS.register('SSDPredictor') class SSDPredictor(Predictor): - def cls_block(self, out_channels, boxes_per_location): + def cls_block(self, level, out_channels, boxes_per_location): return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) - def reg_block(self, out_channels, boxes_per_location): + def reg_block(self, level, out_channels, boxes_per_location): return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) -def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): +def SeparableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 return nn.Sequential( nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels, stride=stride, padding=padding), @@ -62,11 +62,17 @@ def SeperableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding= @registry.PREDICTORS.register('SSDLitePredictor') class SSDLitePredictor(Predictor): - def cls_block(self, out_channels, boxes_per_location): - return SeperableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) - - def reg_block(self, out_channels, boxes_per_location): - return SeperableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) + def cls_block(self, level, out_channels, boxes_per_location): + num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS) + if level == num_levels - 1: + return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=1) + return SeparableConv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) + + def reg_block(self, level, out_channels, boxes_per_location): + num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS) + if level == num_levels - 1: + return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=1) + return SeparableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) def make_predictor(cfg): From a14635948d20541b853435d2ff5ec8da9114ad04 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 23:36:15 +0800 Subject: [PATCH 04/24] rename --- configs/mobilenet_v2_ssd_voc0712.yaml | 2 +- ssd/config/defaults.py | 2 +- ssd/modeling/box_head/__init__.py | 6 ++++++ .../detector_head.py => box_head/box_head.py} | 6 +++--- .../predictor.py => box_head/box_predictor.py} | 14 +++++++------- .../{detector_head => box_head}/inference.py | 0 ssd/modeling/{detector_head => box_head}/loss.py | 0 ssd/modeling/detector/ssd_detector.py | 6 +++--- ssd/modeling/detector_head/__init__.py | 6 ------ ssd/modeling/registry.py | 2 +- 10 files changed, 22 insertions(+), 22 deletions(-) create mode 100644 ssd/modeling/box_head/__init__.py rename ssd/modeling/{detector_head/detector_head.py => box_head/box_head.py} (91%) rename ssd/modeling/{detector_head/predictor.py => box_head/box_predictor.py} (91%) rename ssd/modeling/{detector_head => box_head}/inference.py (100%) rename ssd/modeling/{detector_head => box_head}/loss.py (100%) delete mode 100644 ssd/modeling/detector_head/__init__.py diff --git a/configs/mobilenet_v2_ssd_voc0712.yaml b/configs/mobilenet_v2_ssd_voc0712.yaml index 09d9c499..e50229ec 100644 --- a/configs/mobilenet_v2_ssd_voc0712.yaml +++ b/configs/mobilenet_v2_ssd_voc0712.yaml @@ -1,6 +1,6 @@ MODEL: NUM_CLASSES: 21 - PREDICTOR: 'SSDLitePredictor' + PREDICTOR: 'SSDLiteBoxPredictor' BACKBONE: NAME: 'mobilenet_v2' OUT_CHANNELS: (96, 1280, 512, 256, 256, 64) diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index bebbe6f7..bc80182a 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -4,7 +4,7 @@ _C.MODEL = CN() _C.MODEL.META_ARCHITECTURE = 'SSDDetector' -_C.MODEL.PREDICTOR = 'SSDPredictor' +_C.MODEL.PREDICTOR = 'SSDBoxPredictor' _C.MODEL.DEVICE = "cuda" # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5) _C.MODEL.THRESHOLD = 0.5 diff --git a/ssd/modeling/box_head/__init__.py b/ssd/modeling/box_head/__init__.py new file mode 100644 index 00000000..26e71b6e --- /dev/null +++ b/ssd/modeling/box_head/__init__.py @@ -0,0 +1,6 @@ +from .box_head import SSDBoxHead + + +def build_box_head(cfg): + # TODO: make it more general + return SSDBoxHead(cfg) diff --git a/ssd/modeling/detector_head/detector_head.py b/ssd/modeling/box_head/box_head.py similarity index 91% rename from ssd/modeling/detector_head/detector_head.py rename to ssd/modeling/box_head/box_head.py index 063b8575..e0708f93 100644 --- a/ssd/modeling/detector_head/detector_head.py +++ b/ssd/modeling/box_head/box_head.py @@ -2,17 +2,17 @@ import torch.nn.functional as F from ssd.modeling.anchors.prior_box import PriorBox -from ssd.modeling.detector_head.predictor import make_predictor +from ssd.modeling.box_head.box_predictor import make_box_predictor from ssd.utils import box_utils from .inference import PostProcessor from .loss import MultiBoxLoss -class SSDHeader(nn.Module): +class SSDBoxHead(nn.Module): def __init__(self, cfg): super().__init__() self.cfg = cfg - self.predictor = make_predictor(cfg) + self.predictor = make_box_predictor(cfg) self.loss_evaluator = MultiBoxLoss(neg_pos_ratio=cfg.MODEL.NEG_POS_RATIO) self.post_processor = PostProcessor(cfg) self.priors = None diff --git a/ssd/modeling/detector_head/predictor.py b/ssd/modeling/box_head/box_predictor.py similarity index 91% rename from ssd/modeling/detector_head/predictor.py rename to ssd/modeling/box_head/box_predictor.py index 7e0b97da..54c500db 100644 --- a/ssd/modeling/detector_head/predictor.py +++ b/ssd/modeling/box_head/box_predictor.py @@ -4,7 +4,7 @@ from ssd.modeling import registry -class Predictor(nn.Module): +class BoxPredictor(nn.Module): def __init__(self, cfg): super().__init__() self.cfg = cfg @@ -41,8 +41,8 @@ def forward(self, features): return cls_logits, bbox_pred -@registry.PREDICTORS.register('SSDPredictor') -class SSDPredictor(Predictor): +@registry.BOX_PREDICTORS.register('SSDBoxPredictor') +class SSDBoxPredictor(BoxPredictor): def cls_block(self, level, out_channels, boxes_per_location): return nn.Conv2d(out_channels, boxes_per_location * self.cfg.MODEL.NUM_CLASSES, kernel_size=3, stride=1, padding=1) @@ -60,8 +60,8 @@ def SeparableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding= ) -@registry.PREDICTORS.register('SSDLitePredictor') -class SSDLitePredictor(Predictor): +@registry.BOX_PREDICTORS.register('SSDLiteBoxPredictor') +class SSDLiteBoxPredictor(BoxPredictor): def cls_block(self, level, out_channels, boxes_per_location): num_levels = len(self.cfg.MODEL.BACKBONE.OUT_CHANNELS) if level == num_levels - 1: @@ -75,5 +75,5 @@ def reg_block(self, level, out_channels, boxes_per_location): return SeparableConv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) -def make_predictor(cfg): - return registry.PREDICTORS[cfg.MODEL.PREDICTOR](cfg) +def make_box_predictor(cfg): + return registry.BOX_PREDICTORS[cfg.MODEL.PREDICTOR](cfg) diff --git a/ssd/modeling/detector_head/inference.py b/ssd/modeling/box_head/inference.py similarity index 100% rename from ssd/modeling/detector_head/inference.py rename to ssd/modeling/box_head/inference.py diff --git a/ssd/modeling/detector_head/loss.py b/ssd/modeling/box_head/loss.py similarity index 100% rename from ssd/modeling/detector_head/loss.py rename to ssd/modeling/box_head/loss.py diff --git a/ssd/modeling/detector/ssd_detector.py b/ssd/modeling/detector/ssd_detector.py index f6ac4777..c43a4a68 100644 --- a/ssd/modeling/detector/ssd_detector.py +++ b/ssd/modeling/detector/ssd_detector.py @@ -1,7 +1,7 @@ from torch import nn from ssd.modeling.backbone import build_backbone -from ssd.modeling.detector_head import build_detector_head +from ssd.modeling.box_head import build_box_head class SSDDetector(nn.Module): @@ -9,11 +9,11 @@ def __init__(self, cfg): super().__init__() self.cfg = cfg self.backbone = build_backbone(cfg) - self.detector_head = build_detector_head(cfg) + self.box_head = build_box_head(cfg) def forward(self, images, targets=None): features = self.backbone(images) - detections, detector_losses = self.detector_head(features, targets) + detections, detector_losses = self.box_head(features, targets) if self.training: return detector_losses return detections diff --git a/ssd/modeling/detector_head/__init__.py b/ssd/modeling/detector_head/__init__.py deleted file mode 100644 index 8bbe9315..00000000 --- a/ssd/modeling/detector_head/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from .detector_head import SSDHeader - - -def build_detector_head(cfg): - # TODO: make it more general - return SSDHeader(cfg) diff --git a/ssd/modeling/registry.py b/ssd/modeling/registry.py index 50c5d7a8..b7c122b3 100644 --- a/ssd/modeling/registry.py +++ b/ssd/modeling/registry.py @@ -1,4 +1,4 @@ from ssd.utils.registry import Registry BACKBONES = Registry() -PREDICTORS = Registry() +BOX_PREDICTORS = Registry() From 56b8adaf12aabb53abd5f66e990296293f6c85f5 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 23:47:50 +0800 Subject: [PATCH 05/24] add pretrained config --- ssd/config/defaults.py | 1 + ssd/modeling/backbone/__init__.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index bc80182a..e9d45b35 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -17,6 +17,7 @@ _C.MODEL.BACKBONE = CN() _C.MODEL.BACKBONE.NAME = 'vgg' _C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256) +_C.MODEL.BACKBONE.PRETRAINED = True # ----------------------------------------------------------------------------- # PRIORS diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py index 4f45d648..8fd775f1 100644 --- a/ssd/modeling/backbone/__init__.py +++ b/ssd/modeling/backbone/__init__.py @@ -15,12 +15,12 @@ def vgg(cfg, pretrained=True): @registry.BACKBONES.register('mobilenet_v2') -def mobilenet_v2(cfg, pretrained=False): +def mobilenet_v2(cfg, pretrained=True): model = MobileNetV2() if pretrained: - model.load_state_dict(load_state_dict_from_url(mobilenet_model_urls['mobilenet_v2'])) + model.load_state_dict(load_state_dict_from_url(mobilenet_model_urls['mobilenet_v2']), strict=False) return model def build_backbone(cfg): - return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg) + return registry.BACKBONES[cfg.MODEL.BACKBONE.NAME](cfg, cfg.MODEL.BACKBONE.PRETRAINED) From a7c326110bd8c068e8fe2626aea08b3f2b6b31af Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 22 Jun 2019 23:59:35 +0800 Subject: [PATCH 06/24] optimize registry --- ssd/modeling/backbone/__init__.py | 19 +------------------ ssd/modeling/backbone/mobilenet.py | 11 +++++++++++ ssd/modeling/backbone/vgg.py | 10 ++++++++++ ssd/modeling/box_head/__init__.py | 5 ++++- ssd/modeling/box_head/box_head.py | 2 ++ ssd/modeling/registry.py | 1 + 6 files changed, 29 insertions(+), 19 deletions(-) diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py index 8fd775f1..885c6564 100644 --- a/ssd/modeling/backbone/__init__.py +++ b/ssd/modeling/backbone/__init__.py @@ -1,25 +1,8 @@ from ssd.modeling import registry -from ssd.utils.model_zoo import load_state_dict_from_url from .vgg import VGG -from .vgg import model_urls as vgg_model_urls from .mobilenet import MobileNetV2 -from .mobilenet import model_urls as mobilenet_model_urls - -@registry.BACKBONES.register('vgg') -def vgg(cfg, pretrained=True): - model = VGG(cfg) - if pretrained: - model.init_from_pretrain(load_state_dict_from_url(vgg_model_urls['vgg'])) - return model - - -@registry.BACKBONES.register('mobilenet_v2') -def mobilenet_v2(cfg, pretrained=True): - model = MobileNetV2() - if pretrained: - model.load_state_dict(load_state_dict_from_url(mobilenet_model_urls['mobilenet_v2']), strict=False) - return model +__all__ = ['VGG', 'MobileNetV2'] def build_backbone(cfg): diff --git a/ssd/modeling/backbone/mobilenet.py b/ssd/modeling/backbone/mobilenet.py index 4d0ded7c..88cfd435 100644 --- a/ssd/modeling/backbone/mobilenet.py +++ b/ssd/modeling/backbone/mobilenet.py @@ -1,5 +1,8 @@ from torch import nn +from ssd.modeling import registry +from ssd.utils.model_zoo import load_state_dict_from_url + model_urls = { 'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth', } @@ -121,3 +124,11 @@ def forward(self, x): features.append(x) return tuple(features) + + +@registry.BACKBONES.register('mobilenet_v2') +def mobilenet_v2(cfg, pretrained=True): + model = MobileNetV2() + if pretrained: + model.load_state_dict(load_state_dict_from_url(model_urls['mobilenet_v2']), strict=False) + return model diff --git a/ssd/modeling/backbone/vgg.py b/ssd/modeling/backbone/vgg.py index 17bf2813..f70109e1 100644 --- a/ssd/modeling/backbone/vgg.py +++ b/ssd/modeling/backbone/vgg.py @@ -2,6 +2,8 @@ import torch.nn.functional as F from ssd.layers import L2Norm +from ssd.modeling import registry +from ssd.utils.model_zoo import load_state_dict_from_url model_urls = { 'vgg': 'https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth', @@ -119,3 +121,11 @@ def forward(self, x): features.append(x) return tuple(features) + + +@registry.BACKBONES.register('vgg') +def vgg(cfg, pretrained=True): + model = VGG(cfg) + if pretrained: + model.init_from_pretrain(load_state_dict_from_url(model_urls['vgg'])) + return model diff --git a/ssd/modeling/box_head/__init__.py b/ssd/modeling/box_head/__init__.py index 26e71b6e..9439842e 100644 --- a/ssd/modeling/box_head/__init__.py +++ b/ssd/modeling/box_head/__init__.py @@ -1,6 +1,9 @@ +from ssd.modeling import registry from .box_head import SSDBoxHead +__all__ = ['SSDBoxHead'] + def build_box_head(cfg): # TODO: make it more general - return SSDBoxHead(cfg) + return registry.BOX_HEADS['SSDBoxHead'](cfg) diff --git a/ssd/modeling/box_head/box_head.py b/ssd/modeling/box_head/box_head.py index e0708f93..582a570f 100644 --- a/ssd/modeling/box_head/box_head.py +++ b/ssd/modeling/box_head/box_head.py @@ -1,6 +1,7 @@ from torch import nn import torch.nn.functional as F +from ssd.modeling import registry from ssd.modeling.anchors.prior_box import PriorBox from ssd.modeling.box_head.box_predictor import make_box_predictor from ssd.utils import box_utils @@ -8,6 +9,7 @@ from .loss import MultiBoxLoss +@registry.BOX_HEADS.register('SSDBoxHead') class SSDBoxHead(nn.Module): def __init__(self, cfg): super().__init__() diff --git a/ssd/modeling/registry.py b/ssd/modeling/registry.py index b7c122b3..51cdfa43 100644 --- a/ssd/modeling/registry.py +++ b/ssd/modeling/registry.py @@ -1,4 +1,5 @@ from ssd.utils.registry import Registry BACKBONES = Registry() +BOX_HEADS = Registry() BOX_PREDICTORS = Registry() From 4d7c37f3c676d031ea4bd6fd65dcc58a3f80c7bb Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 11:57:27 +0800 Subject: [PATCH 07/24] move SeparableConv2d to ssd.layers --- ssd/layers/__init__.py | 3 +++ ssd/layers/separable_conv.py | 17 +++++++++++++++++ ssd/modeling/box_head/box_predictor.py | 11 +---------- 3 files changed, 21 insertions(+), 10 deletions(-) create mode 100644 ssd/layers/separable_conv.py diff --git a/ssd/layers/__init__.py b/ssd/layers/__init__.py index 45f9d968..ae69db64 100644 --- a/ssd/layers/__init__.py +++ b/ssd/layers/__init__.py @@ -1,6 +1,9 @@ import torch import torch.nn as nn import torch.nn.init as init +from .separable_conv import SeparableConv2d + +__all__ = ['L2Norm', 'SeparableConv2d'] class L2Norm(nn.Module): diff --git a/ssd/layers/separable_conv.py b/ssd/layers/separable_conv.py new file mode 100644 index 00000000..8712d363 --- /dev/null +++ b/ssd/layers/separable_conv.py @@ -0,0 +1,17 @@ +from torch import nn + + +class SeparableConv2d(nn.Module): + def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): + super().__init__() + ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 + self.conv = nn.Sequential( + nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, + groups=in_channels, stride=stride, padding=padding), + nn.BatchNorm2d(in_channels), + ReLU(), + nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), + ) + + def forward(self, x): + return self.conv(x) diff --git a/ssd/modeling/box_head/box_predictor.py b/ssd/modeling/box_head/box_predictor.py index 54c500db..f2ab797a 100644 --- a/ssd/modeling/box_head/box_predictor.py +++ b/ssd/modeling/box_head/box_predictor.py @@ -1,6 +1,7 @@ import torch from torch import nn +from ssd.layers import SeparableConv2d from ssd.modeling import registry @@ -50,16 +51,6 @@ def reg_block(self, level, out_channels, boxes_per_location): return nn.Conv2d(out_channels, boxes_per_location * 4, kernel_size=3, stride=1, padding=1) -def SeparableConv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0, onnx_compatible=False): - ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 - return nn.Sequential( - nn.Conv2d(in_channels=in_channels, out_channels=in_channels, kernel_size=kernel_size, groups=in_channels, stride=stride, padding=padding), - nn.BatchNorm2d(in_channels), - ReLU(), - nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=1), - ) - - @registry.BOX_PREDICTORS.register('SSDLiteBoxPredictor') class SSDLiteBoxPredictor(BoxPredictor): def cls_block(self, level, out_channels, boxes_per_location): From 1afc200ba5562e48649d361179302458cd6e3c30 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 12:03:54 +0800 Subject: [PATCH 08/24] add build_** to __all__ --- ssd/modeling/backbone/__init__.py | 2 +- ssd/modeling/box_head/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py index 885c6564..91ab280d 100644 --- a/ssd/modeling/backbone/__init__.py +++ b/ssd/modeling/backbone/__init__.py @@ -2,7 +2,7 @@ from .vgg import VGG from .mobilenet import MobileNetV2 -__all__ = ['VGG', 'MobileNetV2'] +__all__ = ['build_backbone', 'VGG', 'MobileNetV2'] def build_backbone(cfg): diff --git a/ssd/modeling/box_head/__init__.py b/ssd/modeling/box_head/__init__.py index 9439842e..57308c00 100644 --- a/ssd/modeling/box_head/__init__.py +++ b/ssd/modeling/box_head/__init__.py @@ -1,7 +1,7 @@ from ssd.modeling import registry from .box_head import SSDBoxHead -__all__ = ['SSDBoxHead'] +__all__ = ['build_box_head', 'SSDBoxHead'] def build_box_head(cfg): From c1761e17ff844b0e18cb1de2d3a543e17d950603 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 12:27:32 +0800 Subject: [PATCH 09/24] rename train and eval file --- ssd/engine/inference.py | 21 ++++++++++++++++++++- ssd/engine/trainer.py | 2 +- eval_ssd.py => test.py | 22 ++-------------------- train_ssd.py => train.py | 2 +- 4 files changed, 24 insertions(+), 23 deletions(-) rename eval_ssd.py => test.py (75%) rename train_ssd.py => train.py (98%) diff --git a/ssd/engine/inference.py b/ssd/engine/inference.py index 16ff7baa..4de0acd9 100644 --- a/ssd/engine/inference.py +++ b/ssd/engine/inference.py @@ -4,9 +4,11 @@ import torch import torch.utils.data from tqdm import tqdm + +from ssd.data.build import make_data_loader from ssd.data.datasets.evaluation import evaluate -from ssd.utils import dist_util +from ssd.utils import dist_util, mkdir from ssd.utils.dist_util import synchronize, is_main_process @@ -63,3 +65,20 @@ def inference(model, data_loader, dataset_name, device, output_folder=None, use_ if output_folder: torch.save(predictions, predictions_path) return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder) + + +@torch.no_grad() +def do_evaluation(cfg, model, distributed): + if isinstance(model, torch.nn.parallel.DistributedDataParallel): + model = model.module + model.eval() + device = torch.device(cfg.MODEL.DEVICE) + data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed) + eval_results = [] + for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val): + output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) + if not os.path.exists(output_folder): + mkdir(output_folder) + eval_result = inference(model, data_loader, dataset_name, device, output_folder) + eval_results.append(eval_result) + return eval_results diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index 8de39bbb..d86a50dc 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -6,7 +6,7 @@ import torch import torch.distributed as dist -from eval_ssd import do_evaluation +from ssd.engine.inference import do_evaluation from ssd.utils import dist_util diff --git a/eval_ssd.py b/test.py similarity index 75% rename from eval_ssd.py rename to test.py index d73a883b..8547eabb 100644 --- a/eval_ssd.py +++ b/test.py @@ -6,32 +6,14 @@ import torch.utils.data from ssd.config import cfg -from ssd.data.build import make_data_loader -from ssd.engine.inference import inference +from ssd.engine.inference import do_evaluation from ssd.modeling.detector import build_detection_model -from ssd.utils import dist_util, mkdir +from ssd.utils import dist_util from ssd.utils.checkpoint import CheckPointer from ssd.utils.dist_util import synchronize from ssd.utils.logger import setup_logger -@torch.no_grad() -def do_evaluation(cfg, model, distributed): - if isinstance(model, torch.nn.parallel.DistributedDataParallel): - model = model.module - model.eval() - device = torch.device(cfg.MODEL.DEVICE) - data_loaders_val = make_data_loader(cfg, is_train=False, distributed=distributed) - eval_results = [] - for dataset_name, data_loader in zip(cfg.DATASETS.TEST, data_loaders_val): - output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) - if not os.path.exists(output_folder): - mkdir(output_folder) - eval_result = inference(model, data_loader, dataset_name, device, output_folder) - eval_results.append(eval_result) - return eval_results - - def evaluation(cfg, ckpt, distributed): logger = logging.getLogger("SSD.inference") diff --git a/train_ssd.py b/train.py similarity index 98% rename from train_ssd.py rename to train.py index 6ffd19e2..e26f73cc 100644 --- a/train_ssd.py +++ b/train.py @@ -5,7 +5,7 @@ import torch import torch.distributed as dist -from eval_ssd import do_evaluation +from ssd.engine.inference import do_evaluation from ssd.config import cfg from ssd.data.build import make_data_loader from ssd.engine.trainer import do_train From fcf4e3a1be05e042db0fe4a14bb07f074d0f9612 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 12:31:41 +0800 Subject: [PATCH 10/24] update README --- README.md | 84 ++++--------------------------------------------------- 1 file changed, 5 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index cf0d4d14..0a81ae48 100644 --- a/README.md +++ b/README.md @@ -121,14 +121,14 @@ Where `COCO_ROOT` default is `datasets` folder in current project, you can creat ```bash # for example, train SSD300: -python train_ssd.py --config-file configs/ssd300_voc0712.yaml --vgg vgg16_reducedfc.pth +python train.py --config-file configs/vgg_ssd300_voc0712.yaml ``` ### Multi-GPU training ```bash # for example, train SSD300 with 4 GPUs: export NGPUS=4 -python -m torch.distributed.launch --nproc_per_node=$NGPUS train_ssd.py --config-file configs/ssd300_voc0712.yaml --vgg vgg16_reducedfc.pth +python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml ``` The configuration files that I provide assume that we are running on single GPU. When changing number of GPUs, hyper-parameter (lr, max_iter, ...) will also changed according to this paper: [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). The pre-trained vgg weights can be downloaded here: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth. @@ -139,7 +139,7 @@ The pre-trained vgg weights can be downloaded here: https://s3.amazonaws.com/amd ```bash # for example, evaluate SSD300: -python eval_ssd.py --config-file configs/ssd300_voc0712.yaml --weights /path/to/trained_ssd300_weights.pth +python test.py --config-file configs/vgg_ssd300_voc0712.yaml ``` ### Multi-GPU evaluating @@ -147,14 +147,14 @@ python eval_ssd.py --config-file configs/ssd300_voc0712.yaml --weights /path/to/ ```bash # for example, evaluate SSD300 with 4 GPUs: export NGPUS=4 -python -m torch.distributed.launch --nproc_per_node=$NGPUS eval_ssd.py --config-file configs/ssd300_voc0712.yaml --weights /path/to/trained_ssd300_weights.pth +python -m torch.distributed.launch --nproc_per_node=$NGPUS test.py --config-file configs/vgg_ssd300_voc0712.yaml ``` ## Demo Predicting image in a folder is simple: ```bash -python demo.py --config-file configs/ssd300_voc0712.yaml --weights path/to/trained/weights.pth --images_dir demo +python demo.py --config-file configs/ssd300_voc0712.yaml --images_dir demo ``` Then the predicted images with boxes, scores and label names will saved to `demo/result` folder. @@ -182,79 +182,5 @@ Currently, I provide weights trained as follows: | SSD300* | 77.8 | 25.5 | | SSD512* | 80.2 | - | -### Details: - - - - - - - - - - - - - - - - - - - - -
VOC2007 testCOCO 2014 minival
SSD300*
mAP: 0.7783
-aeroplane       : 0.8252
-bicycle         : 0.8445
-bird            : 0.7597
-boat            : 0.7102
-bottle          : 0.5275
-bus             : 0.8643
-car             : 0.8660
-cat             : 0.8741
-chair           : 0.6179
-cow             : 0.8279
-diningtable     : 0.7862
-dog             : 0.8519
-horse           : 0.8630
-motorbike       : 0.8515
-person          : 0.8024
-pottedplant     : 0.5079
-sheep           : 0.7685
-sofa            : 0.7926
-train           : 0.8704
-tvmonitor       : 0.7554
Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.229
-Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.388
-Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.240
-Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.068
-Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.244
-Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.366
-Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.231
-Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.336
-Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.368
-Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.150
-Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.404
-Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.522
SSD512*
mAP: 0.8025
-aeroplane       : 0.8582
-bicycle         : 0.8710
-bird            : 0.8192
-boat            : 0.7410
-bottle          : 0.5894
-bus             : 0.8755
-car             : 0.8856
-cat             : 0.8926
-chair           : 0.6589
-cow             : 0.8634
-diningtable     : 0.7676
-dog             : 0.8707
-horse           : 0.8806
-motorbike       : 0.8512
-person          : 0.8316
-pottedplant     : 0.5238
-sheep           : 0.8191
-sofa            : 0.7915
-train           : 0.8735
-tvmonitor       : 0.7866
-
- ## Troubleshooting If you have issues running or compiling this code, we have compiled a list of common issues in [TROUBLESHOOTING.md](TROUBLESHOOTING.md). If your issue is not present there, please feel free to open a new issue. \ No newline at end of file From cd3b16f2c108ed8c30a2208e1c4089c13b2b1970 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 13:03:26 +0800 Subject: [PATCH 11/24] optimize do_train --- ssd/engine/trainer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index d86a50dc..44a6620e 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -74,19 +74,20 @@ def do_train(cfg, model, iteration = iteration + 1 arguments["iteration"] = iteration scheduler.step() + images = images.to(device) targets = targets.to(device) - - optimizer.zero_grad() loss_dict = model(images, targets=targets) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - loss = sum(loss for loss in loss_dict.values()) + + optimizer.zero_grad() loss.backward() optimizer.step() + trained_time += time.time() - end end = time.time() if iteration % args.log_step == 0: @@ -100,6 +101,7 @@ def do_train(cfg, model, for loss_name, loss_item in loss_dict_reduced.items(): log_str.append("{}: {:.3f}".format(loss_name, loss_item.item())) log_str = ', '.join(log_str) + # TODO: use MetricLogger to log logger.info(log_str) if summary_writer: global_step = iteration From 0fc0e1a377c03079b57570794a98ed7f0d9d067e Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 14:54:10 +0800 Subject: [PATCH 12/24] add data_loader cfg --- {output => outputs}/.gitignore | 0 ssd/config/defaults.py | 11 +++++++++++ ssd/data/build.py | 3 ++- 3 files changed, 13 insertions(+), 1 deletion(-) rename {output => outputs}/.gitignore (100%) diff --git a/output/.gitignore b/outputs/.gitignore similarity index 100% rename from output/.gitignore rename to outputs/.gitignore diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index e9d45b35..34ce5aee 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -14,6 +14,9 @@ _C.MODEL.CENTER_VARIANCE = 0.1 _C.MODEL.SIZE_VARIANCE = 0.2 +# ---------------------------------------------------------------------------- # +# Backbone +# ---------------------------------------------------------------------------- # _C.MODEL.BACKBONE = CN() _C.MODEL.BACKBONE.NAME = 'vgg' _C.MODEL.BACKBONE.OUT_CHANNELS = (512, 1024, 512, 256, 256, 256) @@ -51,6 +54,14 @@ # List of the dataset names for testing, as present in paths_catalog.py _C.DATASETS.TEST = () +# ----------------------------------------------------------------------------- +# DataLoader +# ----------------------------------------------------------------------------- +_C.DATA_LOADER = CN() +# Number of data loading threads +_C.DATA_LOADER.NUM_WORKERS = 4 +_C.DATA_LOADER.PIN_MEMORY = True + # ---------------------------------------------------------------------------- # # Solver # ---------------------------------------------------------------------------- # diff --git a/ssd/data/build.py b/ssd/data/build.py index b8e26173..0e5d4e2b 100644 --- a/ssd/data/build.py +++ b/ssd/data/build.py @@ -50,7 +50,8 @@ def make_data_loader(cfg, is_train=True, distributed=False, max_iter=None, start if max_iter is not None: batch_sampler = samplers.IterationBasedBatchSampler(batch_sampler, num_iterations=max_iter, start_iter=start_iter) - data_loader = DataLoader(dataset, num_workers=6, batch_sampler=batch_sampler, pin_memory=True, collate_fn=BatchCollator(is_train)) + data_loader = DataLoader(dataset, num_workers=cfg.DATA_LOADER.NUM_WORKERS, batch_sampler=batch_sampler, + pin_memory=cfg.DATA_LOADER.PIN_MEMORY, collate_fn=BatchCollator(is_train)) data_loaders.append(data_loader) if is_train: From b65560304dc61e63fa45e5a26fee930661c6e998 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 15:50:42 +0800 Subject: [PATCH 13/24] add MetricLogger --- ssd/engine/trainer.py | 46 ++++++++++++++------------ ssd/utils/metric_logger.py | 66 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 20 deletions(-) create mode 100644 ssd/utils/metric_logger.py diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index 44a6620e..88c5a28a 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -8,6 +8,7 @@ from ssd.engine.inference import do_evaluation from ssd.utils import dist_util +from ssd.utils.metric_logger import MetricLogger def write_metric(eval_result, prefix, summary_writer, global_step): @@ -54,7 +55,9 @@ def do_train(cfg, model, arguments, args): logger = logging.getLogger("SSD.trainer") - logger.info("Start training...") + logger.info("Start training ...") + meters = MetricLogger() + model.train() save_to_disk = dist_util.get_rank() == 0 if args.use_tensorboard and save_to_disk: @@ -67,8 +70,6 @@ def do_train(cfg, model, max_iter = len(data_loader) start_iter = arguments["iteration"] start_training_time = time.time() - trained_time = 0 - tic = time.time() end = time.time() for iteration, (images, targets, _) in enumerate(data_loader, start_iter): iteration = iteration + 1 @@ -78,31 +79,38 @@ def do_train(cfg, model, images = images.to(device) targets = targets.to(device) loss_dict = model(images, targets=targets) + loss = sum(loss for loss in loss_dict.values()) # reduce losses over all GPUs for logging purposes loss_dict_reduced = reduce_loss_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) - loss = sum(loss for loss in loss_dict.values()) + meters.update(total_loss=losses_reduced, **loss_dict_reduced) optimizer.zero_grad() loss.backward() optimizer.step() - trained_time += time.time() - end + batch_time = time.time() - end end = time.time() + meters.update(time=batch_time) if iteration % args.log_step == 0: - eta_seconds = int((trained_time / iteration) * (max_iter - iteration)) - log_str = [ - "iter: {:06d}, lr: {:.5f}, cost: {:.2f}s, eta: {}".format(iteration, - optimizer.param_groups[0]['lr'], - time.time() - tic, str(datetime.timedelta(seconds=eta_seconds))), - "total_loss: {:.3f}".format(losses_reduced.item()) - ] - for loss_name, loss_item in loss_dict_reduced.items(): - log_str.append("{}: {:.3f}".format(loss_name, loss_item.item())) - log_str = ', '.join(log_str) - # TODO: use MetricLogger to log - logger.info(log_str) + eta_seconds = meters.time.global_avg * (max_iter - iteration) + eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) + logger.info( + meters.delimiter.join([ + "iter: {iter:06d}", + "lr: {lr:.5f}", + '{meters}', + "eta: {eta}", + 'mem: {mem}M', + ]).format( + iter=iteration, + lr=optimizer.param_groups[0]['lr'], + meters=str(meters), + eta=eta_string, + mem=round(torch.cuda.max_memory_allocated() / 1024.0 / 1024.0), + ) + ) if summary_writer: global_step = iteration summary_writer.add_scalar('losses/total_loss', losses_reduced, global_step=global_step) @@ -110,8 +118,6 @@ def do_train(cfg, model, summary_writer.add_scalar('losses/{}'.format(loss_name), loss_item, global_step=global_step) summary_writer.add_scalar('lr', optimizer.param_groups[0]['lr'], global_step=global_step) - tic = time.time() - if iteration % args.save_step == 0: checkpointer.save("model_{:06d}".format(iteration), **arguments) @@ -120,7 +126,7 @@ def do_train(cfg, model, if dist_util.get_rank() == 0 and summary_writer: for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) - model.train() + model.train() # *IMPORTANT*: change to train mode after eval. checkpointer.save("model_final", **arguments) # compute training time diff --git a/ssd/utils/metric_logger.py b/ssd/utils/metric_logger.py new file mode 100644 index 00000000..ce5cfd88 --- /dev/null +++ b/ssd/utils/metric_logger.py @@ -0,0 +1,66 @@ +from collections import deque, defaultdict +import numpy as np +import torch + + +class SmoothedValue: + """Track a series of values and provide access to smoothed values over a + window or the global series average. + """ + + def __init__(self, window_size=10): + self.deque = deque(maxlen=window_size) + self.value = np.nan + self.series = [] + self.total = 0.0 + self.count = 0 + + def update(self, value): + self.deque.append(value) + self.series.append(value) + self.count += 1 + self.total += value + self.value = value + + @property + def median(self): + values = np.array(self.deque) + return np.median(values) + + @property + def avg(self): + values = np.array(self.deque) + return np.mean(values) + + @property + def global_avg(self): + return self.total / self.count + + +class MetricLogger: + def __init__(self, delimiter=", "): + self.meters = defaultdict(SmoothedValue) + self.delimiter = delimiter + + def update(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, torch.Tensor): + v = v.item() + assert isinstance(v, (float, int)) + self.meters[k].update(v) + + def __getattr__(self, attr): + if attr in self.meters: + return self.meters[attr] + if attr in self.__dict__: + return self.__dict__[attr] + raise AttributeError("'{}' object has no attribute '{}'".format( + type(self).__name__, attr)) + + def __str__(self): + loss_str = [] + for name, meter in self.meters.items(): + loss_str.append( + "{}: {:.3f} ({:.3f})".format(name, meter.avg, meter.global_avg) + ) + return self.delimiter.join(loss_str) From fd255fc364e17b49195f4825bea602d25a965b99 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 17:30:02 +0800 Subject: [PATCH 14/24] save log to file --- ssd/data/datasets/evaluation/coco/__init__.py | 1 + ssd/utils/logger.py | 8 +++++++- test.py | 2 +- train.py | 13 ++++++++----- 4 files changed, 17 insertions(+), 7 deletions(-) diff --git a/ssd/data/datasets/evaluation/coco/__init__.py b/ssd/data/datasets/evaluation/coco/__init__.py index d306a2fc..29d6dc33 100644 --- a/ssd/data/datasets/evaluation/coco/__init__.py +++ b/ssd/data/datasets/evaluation/coco/__init__.py @@ -47,4 +47,5 @@ def coco_evaluation(dataset, predictions, output_dir): metrics = {} for i, key in enumerate(keys): metrics[key] = coco_eval.stats[i] + logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3))) return dict(metrics=metrics) diff --git a/ssd/utils/logger.py b/ssd/utils/logger.py index 083fedde..9e617a1a 100644 --- a/ssd/utils/logger.py +++ b/ssd/utils/logger.py @@ -1,8 +1,9 @@ import logging +import os import sys -def setup_logger(name, distributed_rank): +def setup_logger(name, distributed_rank, save_dir=None): logger = logging.getLogger(name) logger.setLevel(logging.DEBUG) # don't log results for the non-master process @@ -13,4 +14,9 @@ def setup_logger(name, distributed_rank): formatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s") stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) + if save_dir: + fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) + fh.setLevel(logging.DEBUG) + fh.setFormatter(formatter) + logger.addHandler(fh) return logger diff --git a/test.py b/test.py index 8547eabb..eed7ecb6 100644 --- a/test.py +++ b/test.py @@ -68,7 +68,7 @@ def main(): cfg.merge_from_list(args.opts) cfg.freeze() - logger = setup_logger("SSD", dist_util.get_rank()) + logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) logger.info("Using {} GPUs".format(num_gpus)) logger.info(args) diff --git a/train.py b/train.py index e26f73cc..4f31a0b0 100644 --- a/train.py +++ b/train.py @@ -11,7 +11,7 @@ from ssd.engine.trainer import do_train from ssd.modeling.detector import build_detection_model from ssd.solver.build import make_optimizer, make_lr_scheduler -from ssd.utils import dist_util +from ssd.utils import dist_util, mkdir from ssd.utils.checkpoint import CheckPointer from ssd.utils.dist_util import synchronize from ssd.utils.logger import setup_logger @@ -85,14 +85,17 @@ def main(): torch.distributed.init_process_group(backend="nccl", init_method="env://") synchronize() - logger = setup_logger("SSD", dist_util.get_rank()) - logger.info("Using {} GPUs".format(num_gpus)) - logger.info(args) - cfg.merge_from_file(args.config_file) cfg.merge_from_list(args.opts) cfg.freeze() + if cfg.OUTPUT_DIR: + mkdir(cfg.OUTPUT_DIR) + + logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR) + logger.info("Using {} GPUs".format(num_gpus)) + logger.info(args) + logger.info("Loaded configuration file {}".format(args.config_file)) with open(args.config_file, "r") as cf: config_str = "\n" + cf.read() From f4611e1d12f6346c8c77e5f20a036a4ff784db91 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sun, 23 Jun 2019 22:33:08 +0800 Subject: [PATCH 15/24] optimize demo --- demo.py | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/demo.py b/demo.py index 5fa2145b..aa72dcbb 100644 --- a/demo.py +++ b/demo.py @@ -1,9 +1,9 @@ import glob import os +import time import torch from PIL import Image -from tqdm import tqdm from vizer.draw import draw_boxes from ssd.config import cfg @@ -31,6 +31,8 @@ def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): model = model.to(device) checkpointer = CheckPointer(model, save_dir=cfg.OUTPUT_DIR) checkpointer.load(ckpt, use_latest=ckpt is None) + weight_file = ckpt if ckpt else checkpointer.get_checkpoint_file() + print('Loaded weights from {}'.format(weight_file)) image_paths = glob.glob(os.path.join(images_dir, '*.jpg')) mkdir(output_dir) @@ -38,24 +40,38 @@ def run_demo(cfg, ckpt, score_threshold, images_dir, output_dir, dataset_type): cpu_device = torch.device("cpu") transforms = build_transforms(cfg, is_train=False) model.eval() - for image_path in tqdm(image_paths): + for i, image_path in enumerate(image_paths): + start = time.time() + image_name = os.path.basename(image_path) + image = np.array(Image.open(image_path).convert("RGB")) - height, width, _ = image.shape + height, width = image.shape[:2] images = transforms(image)[0].unsqueeze(0) + load_time = time.time() - start + start = time.time() result = model(images.to(device))[0] + inference_time = time.time() - start + result = result.resize((width, height)).to(cpu_device).numpy() boxes, labels, scores = result['boxes'], result['labels'], result['scores'] indices = scores > score_threshold - boxes = boxes[indices] labels = labels[indices] scores = scores[indices] + meters = ' | '.join( + [ + 'objects {:02d}'.format(len(boxes)), + 'load {:03d}ms'.format(round(load_time * 1000)), + 'inference {:03d}ms'.format(round(inference_time * 1000)), + 'FPS {}'.format(round(1.0 / inference_time)) + ] + ) + print('({:04d}/{:04d}) {}: {}'.format(i + 1, len(image_paths), image_name, meters)) drawn_image = draw_boxes(image, boxes, labels, scores, class_names).astype(np.uint8) - image_name = os.path.basename(image_path) - Image.fromarray(drawn_image).save(os.path.join(output_dir, '{}_demo.jpg'.format(image_name.split('.')[0]))) + Image.fromarray(drawn_image).save(os.path.join(output_dir, image_name)) def main(): @@ -68,7 +84,7 @@ def main(): type=str, ) parser.add_argument("--ckpt", type=str, default=None, help="Trained weights.") - parser.add_argument("--score_threshold", type=float, default=0.5) + parser.add_argument("--score_threshold", type=float, default=0.7) parser.add_argument("--images_dir", default='demo', type=str, help='Specify a image dir to do prediction.') parser.add_argument("--output_dir", default='demo/result', type=str, help='Specify a image dir to save predicted images.') parser.add_argument("--dataset_type", default="voc", type=str, help='Specify dataset type. Currently support voc and coco.') From 891f9bc94620e7eb71ac37603ec3a1cf1bb6d75c Mon Sep 17 00:00:00 2001 From: lufficc Date: Mon, 24 Jun 2019 10:03:29 +0800 Subject: [PATCH 16/24] rename PREDICTOR to BOX_PREDICTOR --- configs/mobilenet_v2_ssd_voc0712.yaml | 2 +- ssd/config/defaults.py | 2 +- ssd/modeling/box_head/box_predictor.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/mobilenet_v2_ssd_voc0712.yaml b/configs/mobilenet_v2_ssd_voc0712.yaml index e50229ec..d8cdbbd6 100644 --- a/configs/mobilenet_v2_ssd_voc0712.yaml +++ b/configs/mobilenet_v2_ssd_voc0712.yaml @@ -1,6 +1,6 @@ MODEL: NUM_CLASSES: 21 - PREDICTOR: 'SSDLiteBoxPredictor' + BOX_PREDICTOR: 'SSDLiteBoxPredictor' BACKBONE: NAME: 'mobilenet_v2' OUT_CHANNELS: (96, 1280, 512, 256, 256, 64) diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index 34ce5aee..e005e861 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -4,7 +4,7 @@ _C.MODEL = CN() _C.MODEL.META_ARCHITECTURE = 'SSDDetector' -_C.MODEL.PREDICTOR = 'SSDBoxPredictor' +_C.MODEL.BOX_PREDICTOR = 'SSDBoxPredictor' _C.MODEL.DEVICE = "cuda" # match default boxes to any ground truth with jaccard overlap higher than a threshold (0.5) _C.MODEL.THRESHOLD = 0.5 diff --git a/ssd/modeling/box_head/box_predictor.py b/ssd/modeling/box_head/box_predictor.py index f2ab797a..2b613d57 100644 --- a/ssd/modeling/box_head/box_predictor.py +++ b/ssd/modeling/box_head/box_predictor.py @@ -67,4 +67,4 @@ def reg_block(self, level, out_channels, boxes_per_location): def make_box_predictor(cfg): - return registry.BOX_PREDICTORS[cfg.MODEL.PREDICTOR](cfg) + return registry.BOX_PREDICTORS[cfg.MODEL.BOX_PREDICTOR](cfg) From 711f3e30ffe97ba30e37989c5c71c42f15d52313 Mon Sep 17 00:00:00 2001 From: lufficc Date: Mon, 24 Jun 2019 10:40:33 +0800 Subject: [PATCH 17/24] checkpoint support url --- ssd/utils/checkpoint.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/ssd/utils/checkpoint.py b/ssd/utils/checkpoint.py index 145ef0f7..12468011 100644 --- a/ssd/utils/checkpoint.py +++ b/ssd/utils/checkpoint.py @@ -4,8 +4,12 @@ import torch from torch.nn.parallel import DistributedDataParallel +from ssd.utils.model_zoo import cache_url + class CheckPointer: + _last_checkpoint_name = 'last_checkpoint.txt' + def __init__(self, model, optimizer=None, @@ -56,7 +60,7 @@ def load(self, f=None, use_latest=True): return {} self.logger.info("Loading checkpoint from {}".format(f)) - checkpoint = torch.load(f, map_location=torch.device("cpu")) + checkpoint = self._load_file(f) model = self.model if isinstance(model, DistributedDataParallel): model = self.model.module @@ -73,7 +77,7 @@ def load(self, f=None, use_latest=True): return checkpoint def get_checkpoint_file(self): - save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + save_file = os.path.join(self.save_dir, self._last_checkpoint_name) try: with open(save_file, "r") as f: last_saved = f.read() @@ -85,10 +89,19 @@ def get_checkpoint_file(self): return last_saved def has_checkpoint(self): - save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + save_file = os.path.join(self.save_dir, self._last_checkpoint_name) return os.path.exists(save_file) def tag_last_checkpoint(self, last_filename): - save_file = os.path.join(self.save_dir, "last_checkpoint.txt") + save_file = os.path.join(self.save_dir, self._last_checkpoint_name) with open(save_file, "w") as f: f.write(last_filename) + + def _load_file(self, f): + # download url files + if f.startswith("http"): + # if the file is a url path, download it and cache it + cached_f = cache_url(f) + self.logger.info("url {} cached in {}".format(f, cached_f)) + f = cached_f + return torch.load(f, map_location=torch.device("cpu")) From 436331b128fe0ca36efa5b2c988b5f84ff290e88 Mon Sep 17 00:00:00 2001 From: lufficc Date: Mon, 24 Jun 2019 13:56:21 +0800 Subject: [PATCH 18/24] add kwargs --- ssd/data/datasets/evaluation/__init__.py | 4 ++-- ssd/data/datasets/evaluation/coco/__init__.py | 13 ++++++++++++- ssd/data/datasets/evaluation/voc/__init__.py | 9 +++++++-- ssd/engine/inference.py | 8 ++++---- ssd/engine/trainer.py | 2 +- 5 files changed, 26 insertions(+), 10 deletions(-) diff --git a/ssd/data/datasets/evaluation/__init__.py b/ssd/data/datasets/evaluation/__init__.py index 08174093..f9c09400 100644 --- a/ssd/data/datasets/evaluation/__init__.py +++ b/ssd/data/datasets/evaluation/__init__.py @@ -3,7 +3,7 @@ from .voc import voc_evaluation -def evaluate(dataset, predictions, output_dir): +def evaluate(dataset, predictions, output_dir, **kwargs): """evaluate dataset using different methods based on dataset type. Args: dataset: Dataset object @@ -14,7 +14,7 @@ def evaluate(dataset, predictions, output_dir): evaluation result """ args = dict( - dataset=dataset, predictions=predictions, output_dir=output_dir + dataset=dataset, predictions=predictions, output_dir=output_dir, **kwargs, ) if isinstance(dataset, VOCDataset): return voc_evaluation(**args) diff --git a/ssd/data/datasets/evaluation/coco/__init__.py b/ssd/data/datasets/evaluation/coco/__init__.py index 29d6dc33..4acba8f3 100644 --- a/ssd/data/datasets/evaluation/coco/__init__.py +++ b/ssd/data/datasets/evaluation/coco/__init__.py @@ -1,9 +1,10 @@ import json import logging import os +from datetime import datetime -def coco_evaluation(dataset, predictions, output_dir): +def coco_evaluation(dataset, predictions, output_dir, iteration=None): coco_results = [] for i, prediction in enumerate(predictions): img_info = dataset.get_img_info(i) @@ -43,9 +44,19 @@ def coco_evaluation(dataset, predictions, output_dir): coco_eval.accumulate() coco_eval.summarize() + result_strings = [] keys = ["AP", "AP50", "AP75", "APs", "APm", "APl"] metrics = {} for i, key in enumerate(keys): metrics[key] = coco_eval.stats[i] logger.info('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3))) + result_strings.append('{:<10}: {}'.format(key, round(coco_eval.stats[i], 3))) + + if iteration is not None: + result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration)) + else: + result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) + with open(result_path, "w") as f: + f.write('\n'.join(result_strings)) + return dict(metrics=metrics) diff --git a/ssd/data/datasets/evaluation/voc/__init__.py b/ssd/data/datasets/evaluation/voc/__init__.py index 77eb9f2e..924abec0 100644 --- a/ssd/data/datasets/evaluation/voc/__init__.py +++ b/ssd/data/datasets/evaluation/voc/__init__.py @@ -7,7 +7,7 @@ from .eval_detection_voc import eval_detection_voc -def voc_evaluation(dataset, predictions, output_dir): +def voc_evaluation(dataset, predictions, output_dir, iteration=None): class_names = dataset.class_names pred_boxes_list = [] @@ -49,7 +49,12 @@ def voc_evaluation(dataset, predictions, output_dir): metrics[class_names[i]] = ap result_str += "{:<16}: {:.4f}\n".format(class_names[i], ap) logger.info(result_str) - result_path = os.path.join(output_dir, "result_{}.txt".format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) + + if iteration is not None: + result_path = os.path.join(output_dir, 'result_{:07d}.txt'.format(iteration)) + else: + result_path = os.path.join(output_dir, 'result_{}.txt'.format(datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))) with open(result_path, "w") as f: f.write(result_str) + return dict(metrics=metrics) diff --git a/ssd/engine/inference.py b/ssd/engine/inference.py index 4de0acd9..4075d051 100644 --- a/ssd/engine/inference.py +++ b/ssd/engine/inference.py @@ -49,7 +49,7 @@ def compute_on_dataset(model, data_loader, device): return results_dict -def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False): +def inference(model, data_loader, dataset_name, device, output_folder=None, use_cached=False, **kwargs): dataset = data_loader.dataset logger = logging.getLogger("SSD.inference") logger.info("Evaluating {} dataset({} images):".format(dataset_name, len(dataset))) @@ -64,11 +64,11 @@ def inference(model, data_loader, dataset_name, device, output_folder=None, use_ return if output_folder: torch.save(predictions, predictions_path) - return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder) + return evaluate(dataset=dataset, predictions=predictions, output_dir=output_folder, **kwargs) @torch.no_grad() -def do_evaluation(cfg, model, distributed): +def do_evaluation(cfg, model, distributed, **kwargs): if isinstance(model, torch.nn.parallel.DistributedDataParallel): model = model.module model.eval() @@ -79,6 +79,6 @@ def do_evaluation(cfg, model, distributed): output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name) if not os.path.exists(output_folder): mkdir(output_folder) - eval_result = inference(model, data_loader, dataset_name, device, output_folder) + eval_result = inference(model, data_loader, dataset_name, device, output_folder, **kwargs) eval_results.append(eval_result) return eval_results diff --git a/ssd/engine/trainer.py b/ssd/engine/trainer.py index 88c5a28a..f0337d61 100644 --- a/ssd/engine/trainer.py +++ b/ssd/engine/trainer.py @@ -122,7 +122,7 @@ def do_train(cfg, model, checkpointer.save("model_{:06d}".format(iteration), **arguments) if args.eval_step > 0 and iteration % args.eval_step == 0 and not iteration == max_iter: - eval_results = do_evaluation(cfg, model, distributed=args.distributed) + eval_results = do_evaluation(cfg, model, distributed=args.distributed, iteration=iteration) if dist_util.get_rank() == 0 and summary_writer: for eval_result, dataset in zip(eval_results, cfg.DATASETS.TEST): write_metric(eval_result['metrics'], 'metrics/' + dataset, summary_writer, iteration) From 685250f5ee3ba20f6dd796a6730c9e8a0b450064 Mon Sep 17 00:00:00 2001 From: lufficc Date: Tue, 25 Jun 2019 19:29:59 +0800 Subject: [PATCH 19/24] update container --- ssd/structures/container.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/ssd/structures/container.py b/ssd/structures/container.py index 57f22376..0c9b183d 100644 --- a/ssd/structures/container.py +++ b/ssd/structures/container.py @@ -1,4 +1,9 @@ class Container: + """ + Help class from manage boxes, labels, etc... + Not inherit dict due to `default_collate` will modify dict's subclass to dict. + """ + def __init__(self, *args, **kwargs): self._data_dict = dict(*args, **kwargs) @@ -14,21 +19,19 @@ def __iter__(self): def __setitem__(self, key, value): self._data_dict[key] = value - def to(self, *args, **kwargs): + def _call(self, name, *args, **kwargs): keys = list(self._data_dict.keys()) for key in keys: value = self._data_dict[key] - if hasattr(value, 'to'): - self._data_dict[key] = value.to(*args, **kwargs) + if hasattr(value, name): + self._data_dict[key] = getattr(value, name)(*args, **kwargs) return self + def to(self, *args, **kwargs): + return self._call('to', *args, **kwargs) + def numpy(self): - keys = list(self._data_dict.keys()) - for key in keys: - value = self._data_dict[key] - if hasattr(value, 'numpy'): - self._data_dict[key] = value.numpy() - return self + return self._call('numpy') def resize(self, size): """resize boxes From 18ab222e1faf9351aba957cc44be17621d7425e5 Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 29 Jun 2019 13:19:27 +0800 Subject: [PATCH 20/24] update README --- README.md | 16 ++++++++-------- ...712.yaml => mobilenet_v2_ssd320_voc0712.yaml} | 0 ssd/structures/container.py | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) rename configs/{mobilenet_v2_ssd_voc0712.yaml => mobilenet_v2_ssd320_voc0712.yaml} (100%) diff --git a/README.md b/README.md index 0a81ae48..36bf725c 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,14 @@ This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.o ## Highlights -- PyTorch 1.0 -- GPU/CPU NMS -- Multi-GPU training and inference -- Modular -- Visualization(Support Tensorboard) -- CPU support for inference -- Evaluating during training -- Metrics Visualization +- **PyTorch 1.0**: Support PyTorch 1.0 or higher. +- **Multi-GPU training and inference**: We use `DistributedDataParallel`, you can train or test with arbitrary GPU(s), the training schema will change accordingly. +- **Modular**: And you own modules without pain.We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor` etc... You can replace every component with your own code without change the code base.For example, You can add EfficientNet as backbone with change code, just add `efficient_net.py` and register it, specific it in the config file, It's done! +- **CPU support for inference**: runs on CPU in inference time. +- *Smooth and enjoyable training procedure*: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`. +- **Batched inference**: can perform inference using multiple images per batch per GPU +- **Evaluating during training**: eval you model every `eval_step` to check performance improving or not. +- **Metrics Visualization**: visualize metrics details in tensorboard, like AP, APl, APm and APs for COCO dataset or mAP and 20 categories' AP for VOC dataset. ## Installation ### Requirements diff --git a/configs/mobilenet_v2_ssd_voc0712.yaml b/configs/mobilenet_v2_ssd320_voc0712.yaml similarity index 100% rename from configs/mobilenet_v2_ssd_voc0712.yaml rename to configs/mobilenet_v2_ssd320_voc0712.yaml diff --git a/ssd/structures/container.py b/ssd/structures/container.py index 0c9b183d..7a05e205 100644 --- a/ssd/structures/container.py +++ b/ssd/structures/container.py @@ -1,7 +1,7 @@ class Container: """ - Help class from manage boxes, labels, etc... - Not inherit dict due to `default_collate` will modify dict's subclass to dict. + Help class for manage boxes, labels, etc... + Not inherit dict due to `default_collate` will change dict's subclass to dict. """ def __init__(self, *args, **kwargs): From 2c47244a4c81e24c29b432858574e045caf4f36a Mon Sep 17 00:00:00 2001 From: lufficc Date: Sat, 29 Jun 2019 15:02:15 +0800 Subject: [PATCH 21/24] update README --- README.md | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/README.md b/README.md index 36bf725c..aa523882 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,10 @@ This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.o - **PyTorch 1.0**: Support PyTorch 1.0 or higher. - **Multi-GPU training and inference**: We use `DistributedDataParallel`, you can train or test with arbitrary GPU(s), the training schema will change accordingly. -- **Modular**: And you own modules without pain.We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor` etc... You can replace every component with your own code without change the code base.For example, You can add EfficientNet as backbone with change code, just add `efficient_net.py` and register it, specific it in the config file, It's done! +- **Modular**: And you own modules without pain. We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor`, etc. You can replace every component with your own code without change the code base. For example, You can add [EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) as backbone, just add `efficient_net.py` and register it, specific it in the config file, It's done! - **CPU support for inference**: runs on CPU in inference time. -- *Smooth and enjoyable training procedure*: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`. -- **Batched inference**: can perform inference using multiple images per batch per GPU +- **Smooth and enjoyable training procedure**: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`. +- **Batched inference**: can perform inference using multiple images per batch per GPU. - **Evaluating during training**: eval you model every `eval_step` to check performance improving or not. - **Metrics Visualization**: visualize metrics details in tensorboard, like AP, APl, APm and APs for COCO dataset or mAP and 20 categories' AP for VOC dataset. @@ -27,27 +27,20 @@ This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.o ### Requirements 1. Python3 -1. PyTorch 1.0 +1. PyTorch 1.0 or higher 1. yacs +1. [Vizer](https://github.com/lufficc/Vizer) 1. GCC >= 4.9 1. OpenCV + ### Step-by-step installation ```bash -# First, make sure that your conda is setup properly with the right environment -# for that, check that `which conda`, `which pip` and `which python` points to the -# right path. From a clean conda env, this is what you need to do. -# But if you don't use conda, it's OK. Just pip install necessary packages. - -conda create --name SSD -source activate SSD - -# follow PyTorch installation in https://pytorch.org/get-started/locally/ -conda install pytorch torchvision -c pytorch - -pip install yacs tqdm -conda install opencv +git clone https://github.com/lufficc/SSD.git +cd SSD +#Required packages +pip install torch torchvision yacs tqdm opencv-python vizer # Optional packages # If you want visualize loss curve. Default is enabled. Disable by using --use_tensorboard 0 when training. @@ -58,14 +51,11 @@ cd ~/github git clone https://github.com/cocodataset/cocoapi.git cd cocoapi/PythonAPI python setup.py build_ext install - -# Finally, download the pre-trained vgg weights. -wget https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth ``` ### Build -NMS build is not necessary, as we provide a python-like nms, but is 2x slower than build-version. +NMS build is not necessary, as we provide a python-like nms, but is very slower than build-version. ```bash # For faster inference you need to build nms, this is needed when evaluating. Only training doesn't need this. cd ext @@ -158,13 +148,6 @@ python demo.py --config-file configs/ssd300_voc0712.yaml --images_dir demo ``` Then the predicted images with boxes, scores and label names will saved to `demo/result` folder. -Currently, I provide weights trained as follows: - -| | Weights | -| :-----: | :----------: | -| SSD300* | [ssd300_voc0712_mAP77.83.pth(100 MB)](https://github.com/lufficc/SSD/releases/download/v1.0.1/ssd300_voc0712_mAP77.83.pth) | -| SSD512* | [ssd512_voc0712_mAP80.25.pth(104 MB)](https://github.com/lufficc/SSD/releases/download/v1.0.1/ssd512_voc0712_mAP80.25.pth) | - ## Performance ### Origin Paper: From 29ba9af0d4b5e5938a063f114c6ad149f1ea5d83 Mon Sep 17 00:00:00 2001 From: lufficc Date: Tue, 2 Jul 2019 00:24:53 +0800 Subject: [PATCH 22/24] add efficient_net --- configs/efficient_net_b3_ssd300_voc0712.yaml | 18 ++ ssd/config/defaults.py | 2 +- ssd/modeling/backbone/__init__.py | 3 +- .../backbone/efficient_net/__init__.py | 14 + .../backbone/efficient_net/efficient_net.py | 231 ++++++++++++++++ ssd/modeling/backbone/efficient_net/utils.py | 247 ++++++++++++++++++ ssd/solver/lr_scheduler.py | 31 ++- 7 files changed, 535 insertions(+), 11 deletions(-) create mode 100644 configs/efficient_net_b3_ssd300_voc0712.yaml create mode 100644 ssd/modeling/backbone/efficient_net/__init__.py create mode 100644 ssd/modeling/backbone/efficient_net/efficient_net.py create mode 100644 ssd/modeling/backbone/efficient_net/utils.py diff --git a/configs/efficient_net_b3_ssd300_voc0712.yaml b/configs/efficient_net_b3_ssd300_voc0712.yaml new file mode 100644 index 00000000..46e9c7ff --- /dev/null +++ b/configs/efficient_net_b3_ssd300_voc0712.yaml @@ -0,0 +1,18 @@ +MODEL: + NUM_CLASSES: 21 + BACKBONE: + NAME: 'efficient_net-b3' + OUT_CHANNELS: (48, 136, 384, 256, 256, 256) +INPUT: + IMAGE_SIZE: 300 +DATASETS: + TRAIN: ("voc_2007_trainval", "voc_2012_trainval") + TEST: ("voc_2007_test", ) +SOLVER: + MAX_ITER: 160000 + LR_STEPS: [105000, 135000] + GAMMA: 0.1 + BATCH_SIZE: 24 + LR: 1e-3 + +OUTPUT_DIR: 'outputs/efficient_net_b3_ssd300_voc0712' \ No newline at end of file diff --git a/ssd/config/defaults.py b/ssd/config/defaults.py index e005e861..f4c11573 100644 --- a/ssd/config/defaults.py +++ b/ssd/config/defaults.py @@ -59,7 +59,7 @@ # ----------------------------------------------------------------------------- _C.DATA_LOADER = CN() # Number of data loading threads -_C.DATA_LOADER.NUM_WORKERS = 4 +_C.DATA_LOADER.NUM_WORKERS = 8 _C.DATA_LOADER.PIN_MEMORY = True # ---------------------------------------------------------------------------- # diff --git a/ssd/modeling/backbone/__init__.py b/ssd/modeling/backbone/__init__.py index 91ab280d..fd608233 100644 --- a/ssd/modeling/backbone/__init__.py +++ b/ssd/modeling/backbone/__init__.py @@ -1,8 +1,9 @@ from ssd.modeling import registry from .vgg import VGG from .mobilenet import MobileNetV2 +from .efficient_net import EfficientNet -__all__ = ['build_backbone', 'VGG', 'MobileNetV2'] +__all__ = ['build_backbone', 'VGG', 'MobileNetV2', 'EfficientNet'] def build_backbone(cfg): diff --git a/ssd/modeling/backbone/efficient_net/__init__.py b/ssd/modeling/backbone/efficient_net/__init__.py new file mode 100644 index 00000000..f62199a4 --- /dev/null +++ b/ssd/modeling/backbone/efficient_net/__init__.py @@ -0,0 +1,14 @@ +from ssd.modeling import registry +from .efficient_net import EfficientNet + +__all__ = ['efficient_net_b3', 'EfficientNet'] + + +@registry.BACKBONES.register('efficient_net-b3') +def efficient_net_b3(cfg, pretrained=True): + if pretrained: + model = EfficientNet.from_pretrained('efficientnet-b3') + else: + model = EfficientNet.from_name('efficientnet-b3') + + return model diff --git a/ssd/modeling/backbone/efficient_net/efficient_net.py b/ssd/modeling/backbone/efficient_net/efficient_net.py new file mode 100644 index 00000000..cf61b354 --- /dev/null +++ b/ssd/modeling/backbone/efficient_net/efficient_net.py @@ -0,0 +1,231 @@ +import torch +from torch import nn +from torch.nn import functional as F +from .utils import ( + relu_fn, + round_filters, + round_repeats, + drop_connect, + Conv2dSamePadding, + get_model_params, + efficientnet_params, + load_pretrained_weights, +) + +INDICES = { + 'efficientnet-b3': [7, 17, 25] +} + +EXTRAS = { + 'efficientnet-b3': [ + # in, out, k, s, p + [(384, 128, 1, 1, 0), (128, 256, 3, 2, 1)], # 5 x 5 + [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 3 x 3 + [(256, 128, 1, 1, 0), (128, 256, 3, 1, 0)], # 1 x 1 + + ] +} + + +def add_extras(cfgs): + extras = nn.ModuleList() + for cfg in cfgs: + extra = [] + for prams in cfg: + in_channels, out_channels, kernel_size, stride, padding = prams + extra.append(nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)) + extra.append(nn.ReLU()) + extras.append(nn.Sequential(*extra)) + return extras + + +class MBConvBlock(nn.Module): + """ + Mobile Inverted Residual Bottleneck Block + + Args: + block_args (namedtuple): BlockArgs, see above + global_params (namedtuple): GlobalParam, see above + + Attributes: + has_se (bool): Whether the block contains a Squeeze and Excitation layer. + """ + + def __init__(self, block_args, global_params): + super().__init__() + self._block_args = block_args + self._bn_mom = 1 - global_params.batch_norm_momentum + self._bn_eps = global_params.batch_norm_epsilon + self.has_se = (self._block_args.se_ratio is not None) and (0 < self._block_args.se_ratio <= 1) + self.id_skip = block_args.id_skip # skip connection and drop connect + + # Expansion phase + inp = self._block_args.input_filters # number of input channels + oup = self._block_args.input_filters * self._block_args.expand_ratio # number of output channels + if self._block_args.expand_ratio != 1: + self._expand_conv = Conv2dSamePadding(in_channels=inp, out_channels=oup, kernel_size=1, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + + # Depthwise convolution phase + k = self._block_args.kernel_size + s = self._block_args.stride + self._depthwise_conv = Conv2dSamePadding( + in_channels=oup, out_channels=oup, groups=oup, # groups makes it depthwise + kernel_size=k, stride=s, bias=False) + self._bn1 = nn.BatchNorm2d(num_features=oup, momentum=self._bn_mom, eps=self._bn_eps) + + # Squeeze and Excitation layer, if desired + if self.has_se: + num_squeezed_channels = max(1, int(self._block_args.input_filters * self._block_args.se_ratio)) + self._se_reduce = Conv2dSamePadding(in_channels=oup, out_channels=num_squeezed_channels, kernel_size=1) + self._se_expand = Conv2dSamePadding(in_channels=num_squeezed_channels, out_channels=oup, kernel_size=1) + + # Output phase + final_oup = self._block_args.output_filters + self._project_conv = Conv2dSamePadding(in_channels=oup, out_channels=final_oup, kernel_size=1, bias=False) + self._bn2 = nn.BatchNorm2d(num_features=final_oup, momentum=self._bn_mom, eps=self._bn_eps) + + def forward(self, inputs, drop_connect_rate=None): + """ + :param inputs: input tensor + :param drop_connect_rate: drop connect rate (float, between 0 and 1) + :return: output of block + """ + + # Expansion and Depthwise Convolution + x = inputs + if self._block_args.expand_ratio != 1: + x = relu_fn(self._bn0(self._expand_conv(inputs))) + x = relu_fn(self._bn1(self._depthwise_conv(x))) + + # Squeeze and Excitation + if self.has_se: + x_squeezed = F.adaptive_avg_pool2d(x, 1) + x_squeezed = self._se_expand(relu_fn(self._se_reduce(x_squeezed))) + x = torch.sigmoid(x_squeezed) * x + + x = self._bn2(self._project_conv(x)) + + # Skip connection and drop connect + input_filters, output_filters = self._block_args.input_filters, self._block_args.output_filters + if self.id_skip and self._block_args.stride == 1 and input_filters == output_filters: + if drop_connect_rate: + x = drop_connect(x, p=drop_connect_rate, training=self.training) + x = x + inputs # skip connection + return x + + +class EfficientNet(nn.Module): + """ + An EfficientNet model. Most easily loaded with the .from_name or .from_pretrained methods + + Args: + blocks_args (list): A list of BlockArgs to construct blocks + global_params (namedtuple): A set of GlobalParams shared between blocks + + Example: + model = EfficientNet.from_pretrained('efficientnet-b0') + + """ + + def __init__(self, model_name, blocks_args=None, global_params=None): + super().__init__() + self.indices = INDICES[model_name] + self.extras = add_extras(EXTRAS[model_name]) + assert isinstance(blocks_args, list), 'blocks_args should be a list' + assert len(blocks_args) > 0, 'block args must be greater than 0' + self._global_params = global_params + self._blocks_args = blocks_args + + # Batch norm parameters + bn_mom = 1 - self._global_params.batch_norm_momentum + bn_eps = self._global_params.batch_norm_epsilon + + # Stem + in_channels = 3 # rgb + out_channels = round_filters(32, self._global_params) # number of output channels + self._conv_stem = Conv2dSamePadding(in_channels, out_channels, kernel_size=3, stride=2, bias=False) + self._bn0 = nn.BatchNorm2d(num_features=out_channels, momentum=bn_mom, eps=bn_eps) + + # Build blocks + self._blocks = nn.ModuleList([]) + for block_args in self._blocks_args: + + # Update block input and output filters based on depth multiplier. + block_args = block_args._replace( + input_filters=round_filters(block_args.input_filters, self._global_params), + output_filters=round_filters(block_args.output_filters, self._global_params), + num_repeat=round_repeats(block_args.num_repeat, self._global_params) + ) + + # The first block needs to take care of stride and filter size increase. + self._blocks.append(MBConvBlock(block_args, self._global_params)) + if block_args.num_repeat > 1: + block_args = block_args._replace(input_filters=block_args.output_filters, stride=1) + for _ in range(block_args.num_repeat - 1): + self._blocks.append(MBConvBlock(block_args, self._global_params)) + self.reset_parameters() + + def reset_parameters(self): + for m in self.extras.modules(): + if isinstance(m, nn.Conv2d): + nn.init.xavier_uniform_(m.weight) + nn.init.zeros_(m.bias) + + def extract_features(self, inputs): + """ Returns output of the final convolution layer """ + + # Stem + x = relu_fn(self._bn0(self._conv_stem(inputs))) + + features = [] + + # Blocks + for idx, block in enumerate(self._blocks): + drop_connect_rate = self._global_params.drop_connect_rate + if drop_connect_rate: + drop_connect_rate *= float(idx) / len(self._blocks) + x = block(x, drop_connect_rate) + if idx in self.indices: + features.append(x) + + return x, features + + def forward(self, inputs): + """ Calls extract_features to extract features, applies final linear layer, and returns logits. """ + + # Convolution layers + x, features = self.extract_features(inputs) + + for layer in self.extras: + x = layer(x) + features.append(x) + + return tuple(features) + + @classmethod + def from_name(cls, model_name, override_params=None): + cls._check_model_name_is_valid(model_name) + blocks_args, global_params = get_model_params(model_name, override_params) + return EfficientNet(model_name, blocks_args, global_params) + + @classmethod + def from_pretrained(cls, model_name): + model = EfficientNet.from_name(model_name) + load_pretrained_weights(model, model_name) + return model + + @classmethod + def get_image_size(cls, model_name): + cls._check_model_name_is_valid(model_name) + _, _, res, _ = efficientnet_params(model_name) + return res + + @classmethod + def _check_model_name_is_valid(cls, model_name, also_need_pretrained_weights=False): + """ Validates model name. None that pretrained weights are only available for + the first four models (efficientnet-b{i} for i in 0,1,2,3) at the moment. """ + num_models = 4 if also_need_pretrained_weights else 8 + valid_models = ['efficientnet_b' + str(i) for i in range(num_models)] + if model_name.replace('-', '_') not in valid_models: + raise ValueError('model_name should be one of: ' + ', '.join(valid_models)) diff --git a/ssd/modeling/backbone/efficient_net/utils.py b/ssd/modeling/backbone/efficient_net/utils.py new file mode 100644 index 00000000..e6972d66 --- /dev/null +++ b/ssd/modeling/backbone/efficient_net/utils.py @@ -0,0 +1,247 @@ +""" +This file contains helper functions for building the model and for loading model parameters. +These helper functions are built to mirror those in the official TensorFlow implementation. +""" + +import re +import math +import collections +import torch +from torch import nn +from torch.nn import functional as F +from ssd.utils.model_zoo import load_state_dict_from_url + +######################################################################## +############### HELPERS FUNCTIONS FOR MODEL ARCHITECTURE ############### +######################################################################## + + +# Parameters for the entire model (stem, all blocks, and head) + +GlobalParams = collections.namedtuple('GlobalParams', [ + 'batch_norm_momentum', 'batch_norm_epsilon', 'dropout_rate', + 'num_classes', 'width_coefficient', 'depth_coefficient', + 'depth_divisor', 'min_depth', 'drop_connect_rate', ]) + +# Parameters for an individual model block +BlockArgs = collections.namedtuple('BlockArgs', [ + 'kernel_size', 'num_repeat', 'input_filters', 'output_filters', + 'expand_ratio', 'id_skip', 'stride', 'se_ratio']) + +# Change namedtuple defaults +GlobalParams.__new__.__defaults__ = (None,) * len(GlobalParams._fields) +BlockArgs.__new__.__defaults__ = (None,) * len(BlockArgs._fields) + + +def relu_fn(x): + """ Swish activation function """ + return x * torch.sigmoid(x) + + +def round_filters(filters, global_params): + """ Calculate and round number of filters based on depth multiplier. """ + multiplier = global_params.width_coefficient + if not multiplier: + return filters + divisor = global_params.depth_divisor + min_depth = global_params.min_depth + filters *= multiplier + min_depth = min_depth or divisor + new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) + if new_filters < 0.9 * filters: # prevent rounding by more than 10% + new_filters += divisor + return int(new_filters) + + +def round_repeats(repeats, global_params): + """ Round number of filters based on depth multiplier. """ + multiplier = global_params.depth_coefficient + if not multiplier: + return repeats + return int(math.ceil(multiplier * repeats)) + + +def drop_connect(inputs, p, training): + """ Drop connect. """ + if not training: return inputs + batch_size = inputs.shape[0] + keep_prob = 1 - p + random_tensor = keep_prob + random_tensor += torch.rand([batch_size, 1, 1, 1], dtype=inputs.dtype, device=inputs.device) + binary_tensor = torch.floor(random_tensor) + output = inputs / keep_prob * binary_tensor + return output + + +class Conv2dSamePadding(nn.Conv2d): + """ 2D Convolutions like TensorFlow """ + + def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True): + super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) + self.stride = self.stride if len(self.stride) == 2 else [self.stride[0]] * 2 + + def forward(self, x): + ih, iw = x.size()[-2:] + kh, kw = self.weight.size()[-2:] + sh, sw = self.stride + oh, ow = math.ceil(ih / sh), math.ceil(iw / sw) + pad_h = max((oh - 1) * self.stride[0] + (kh - 1) * self.dilation[0] + 1 - ih, 0) + pad_w = max((ow - 1) * self.stride[1] + (kw - 1) * self.dilation[1] + 1 - iw, 0) + if pad_h > 0 or pad_w > 0: + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) + + +######################################################################## +############## HELPERS FUNCTIONS FOR LOADING MODEL PARAMS ############## +######################################################################## + + +def efficientnet_params(model_name): + """ Map EfficientNet model name to parameter coefficients. """ + params_dict = { + # Coefficients: width,depth,res,dropout + 'efficientnet-b0': (1.0, 1.0, 224, 0.2), + 'efficientnet-b1': (1.0, 1.1, 240, 0.2), + 'efficientnet-b2': (1.1, 1.2, 260, 0.3), + 'efficientnet-b3': (1.2, 1.4, 300, 0.3), + 'efficientnet-b4': (1.4, 1.8, 380, 0.4), + 'efficientnet-b5': (1.6, 2.2, 456, 0.4), + 'efficientnet-b6': (1.8, 2.6, 528, 0.5), + 'efficientnet-b7': (2.0, 3.1, 600, 0.5), + } + return params_dict[model_name] + + +class BlockDecoder(object): + """ Block Decoder for readability, straight from the official TensorFlow repository """ + + @staticmethod + def _decode_block_string(block_string): + """ Gets a block through a string notation of arguments. """ + assert isinstance(block_string, str) + + ops = block_string.split('_') + options = {} + for op in ops: + splits = re.split(r'(\d.*)', op) + if len(splits) >= 2: + key, value = splits[:2] + options[key] = value + + # Check stride + assert (('s' in options and len(options['s']) == 1) or + (len(options['s']) == 2 and options['s'][0] == options['s'][1])) + + return BlockArgs( + kernel_size=int(options['k']), + num_repeat=int(options['r']), + input_filters=int(options['i']), + output_filters=int(options['o']), + expand_ratio=int(options['e']), + id_skip=('noskip' not in block_string), + se_ratio=float(options['se']) if 'se' in options else None, + stride=[int(options['s'][0])]) + + @staticmethod + def _encode_block_string(block): + """Encodes a block to a string.""" + args = [ + 'r%d' % block.num_repeat, + 'k%d' % block.kernel_size, + 's%d%d' % (block.strides[0], block.strides[1]), + 'e%s' % block.expand_ratio, + 'i%d' % block.input_filters, + 'o%d' % block.output_filters + ] + if 0 < block.se_ratio <= 1: + args.append('se%s' % block.se_ratio) + if block.id_skip is False: + args.append('noskip') + return '_'.join(args) + + @staticmethod + def decode(string_list): + """ + Decodes a list of string notations to specify blocks inside the network. + + :param string_list: a list of strings, each string is a notation of block + :return: a list of BlockArgs namedtuples of block args + """ + assert isinstance(string_list, list) + blocks_args = [] + for block_string in string_list: + blocks_args.append(BlockDecoder._decode_block_string(block_string)) + return blocks_args + + @staticmethod + def encode(blocks_args): + """ + Encodes a list of BlockArgs to a list of strings. + + :param blocks_args: a list of BlockArgs namedtuples of block args + :return: a list of strings, each string is a notation of block + """ + block_strings = [] + for block in blocks_args: + block_strings.append(BlockDecoder._encode_block_string(block)) + return block_strings + + +def efficientnet(width_coefficient=None, depth_coefficient=None, + dropout_rate=0.2, drop_connect_rate=0.2): + """ Creates a efficientnet model. """ + + blocks_args = [ + 'r1_k3_s11_e1_i32_o16_se0.25', 'r2_k3_s22_e6_i16_o24_se0.25', + 'r2_k5_s22_e6_i24_o40_se0.25', 'r3_k3_s22_e6_i40_o80_se0.25', + 'r3_k5_s11_e6_i80_o112_se0.25', 'r4_k5_s22_e6_i112_o192_se0.25', + 'r1_k3_s11_e6_i192_o320_se0.25', + ] + blocks_args = BlockDecoder.decode(blocks_args) + + global_params = GlobalParams( + batch_norm_momentum=0.99, + batch_norm_epsilon=1e-3, + dropout_rate=dropout_rate, + drop_connect_rate=drop_connect_rate, + # data_format='channels_last', # removed, this is always true in PyTorch + num_classes=1000, + width_coefficient=width_coefficient, + depth_coefficient=depth_coefficient, + depth_divisor=8, + min_depth=None + ) + + return blocks_args, global_params + + +def get_model_params(model_name, override_params): + """ Get the block args and global params for a given model """ + if model_name.startswith('efficientnet'): + w, d, _, p = efficientnet_params(model_name) + # note: all models have drop connect rate = 0.2 + blocks_args, global_params = efficientnet(width_coefficient=w, depth_coefficient=d, dropout_rate=p) + else: + raise NotImplementedError('model name is not pre-defined: %s' % model_name) + if override_params: + # ValueError will be raised here if override_params has fields not included in global_params. + global_params = global_params._replace(**override_params) + return blocks_args, global_params + + +url_map = { + 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet-b0-08094119.pth', + 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet-b1-dbc7070a.pth', + 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet-b2-27687264.pth', + 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet-b3-c8376fa2.pth', + 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet-b4-e116e8b3.pth', + 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet-b5-586e6cc6.pth', +} + + +def load_pretrained_weights(model, model_name): + """ Loads pretrained weights, and downloads if loading for the first time. """ + state_dict = load_state_dict_from_url(url_map[model_name]) + model.load_state_dict(state_dict, strict=False) + print('Loaded pretrained weights for {}'.format(model_name)) diff --git a/ssd/solver/lr_scheduler.py b/ssd/solver/lr_scheduler.py index 0bb6e4c6..2d60af7d 100644 --- a/ssd/solver/lr_scheduler.py +++ b/ssd/solver/lr_scheduler.py @@ -1,18 +1,31 @@ -from torch.optim.lr_scheduler import MultiStepLR +from bisect import bisect_right +from torch.optim.lr_scheduler import _LRScheduler -class WarmupMultiStepLR(MultiStepLR): + +class WarmupMultiStepLR(_LRScheduler): def __init__(self, optimizer, milestones, gamma=0.1, warmup_factor=1.0 / 3, warmup_iters=500, last_epoch=-1): + if not list(milestones) == sorted(milestones): + raise ValueError( + "Milestones should be a list of" " increasing integers. Got {}", + milestones, + ) + + self.milestones = milestones + self.gamma = gamma self.warmup_factor = warmup_factor self.warmup_iters = warmup_iters - super().__init__(optimizer, milestones, gamma, last_epoch) + super().__init__(optimizer, last_epoch) def get_lr(self): - if self.last_epoch <= self.warmup_iters: - alpha = self.last_epoch / self.warmup_iters + warmup_factor = 1 + if self.last_epoch < self.warmup_iters: + alpha = float(self.last_epoch) / self.warmup_iters warmup_factor = self.warmup_factor * (1 - alpha) + alpha - return [lr * warmup_factor for lr in self.base_lrs] - else: - lr = super().get_lr() - return lr + return [ + base_lr + * warmup_factor + * self.gamma ** bisect_right(self.milestones, self.last_epoch) + for base_lr in self.base_lrs + ] From 5c6c0719239695969256e29ba7c10faf9a6d57a4 Mon Sep 17 00:00:00 2001 From: lufficc Date: Tue, 2 Jul 2019 00:54:23 +0800 Subject: [PATCH 23/24] update README --- README.md | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index aa523882..488402ea 100644 --- a/README.md +++ b/README.md @@ -16,13 +16,13 @@ This repository implements [SSD (Single Shot MultiBox Detector)](https://arxiv.o - **PyTorch 1.0**: Support PyTorch 1.0 or higher. - **Multi-GPU training and inference**: We use `DistributedDataParallel`, you can train or test with arbitrary GPU(s), the training schema will change accordingly. -- **Modular**: And you own modules without pain. We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor`, etc. You can replace every component with your own code without change the code base. For example, You can add [EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) as backbone, just add `efficient_net.py` and register it, specific it in the config file, It's done! +- **Modular**: And you own modules without pain. We abstract `backbone`,`Detector`, `BoxHead`, `BoxPredictor`, etc. You can replace every component with your own code without change the code base. For example, You can add [EfficientNet](https://github.com/lukemelas/EfficientNet-PyTorch) as backbone, just add `efficient_net.py` (ALREADY ADDED) and register it, specific it in the config file, It's done! - **CPU support for inference**: runs on CPU in inference time. - **Smooth and enjoyable training procedure**: we save the state of model, optimizer, scheduler, training iter, you can stop your training and resume training exactly from the save point without change your training `CMD`. - **Batched inference**: can perform inference using multiple images per batch per GPU. - **Evaluating during training**: eval you model every `eval_step` to check performance improving or not. - **Metrics Visualization**: visualize metrics details in tensorboard, like AP, APl, APm and APs for COCO dataset or mAP and 20 categories' AP for VOC dataset. - +- **Auto download**: load pre-trained weights from URL and cache it. ## Installation ### Requirements @@ -121,7 +121,6 @@ export NGPUS=4 python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml ``` The configuration files that I provide assume that we are running on single GPU. When changing number of GPUs, hyper-parameter (lr, max_iter, ...) will also changed according to this paper: [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677). -The pre-trained vgg weights can be downloaded here: https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth. ## Evaluate @@ -144,26 +143,34 @@ python -m torch.distributed.launch --nproc_per_node=$NGPUS test.py --config-file Predicting image in a folder is simple: ```bash -python demo.py --config-file configs/ssd300_voc0712.yaml --images_dir demo +python demo.py --config-file configs/vgg_ssd300_voc0712.yaml --images_dir demo ``` Then the predicted images with boxes, scores and label names will saved to `demo/result` folder. -## Performance +## MODEL ZOO ### Origin Paper: | | VOC2007 test | coco test-dev2015 | | :-----: | :----------: | :----------: | -| Train | 07+12 | trainval35k | | SSD300* | 77.2 | 25.1 | | SSD512* | 79.8 | 28.8 | -### Our Implementation: +### COCO: + +| Backbone | Input Size | box AP | Model Size | Download | +| :------------: | :----------:| :--------------------------: | :--------: | :-------: | +| VGG16 | 300 | 25.2 | 274.5MB | | +| VGG16 | 512 | xx.x | xxx.xMB | | +| Mobilenet V2 | 320 | xx.x | xxx.xMB | | + +### PASCAL VOC: -| | VOC2007 test | COCO 2014 minival | -| :-----: | :----------: | :----------------------------------: | -| Train | 07+12 | trainval35k | -| SSD300* | 77.8 | 25.5 | -| SSD512* | 80.2 | - | +| Backbone | Input Size | mAP | Model Size | Download | +| :--------------: | :----------:| :--------------------------: | :--------: | :-------: | +| VGG16 | 300 | 77.6 | 210.3MB | | +| VGG16 | 512 | xx.x | xxx.xMB | | +| Mobilenet V2 | 320 | 68.8 | 26.8MB | | +| EfficientNet-B3 | 300 | 73.9 | 101.8MB | | ## Troubleshooting If you have issues running or compiling this code, we have compiled a list of common issues in [TROUBLESHOOTING.md](TROUBLESHOOTING.md). If your issue is not present there, please feel free to open a new issue. \ No newline at end of file From e0135b0b5e66f390e58ec9bcf857fc3d74fd8fde Mon Sep 17 00:00:00 2001 From: lufficc Date: Tue, 2 Jul 2019 01:02:58 +0800 Subject: [PATCH 24/24] update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 488402ea..43df8c14 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ python train.py --config-file configs/vgg_ssd300_voc0712.yaml ```bash # for example, train SSD300 with 4 GPUs: export NGPUS=4 -python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml +python -m torch.distributed.launch --nproc_per_node=$NGPUS train.py --config-file configs/vgg_ssd300_voc0712.yaml SOLVER.WARMUP_FACTOR 0.03333 SOLVER.WARMUP_ITERS 1000 ``` The configuration files that I provide assume that we are running on single GPU. When changing number of GPUs, hyper-parameter (lr, max_iter, ...) will also changed according to this paper: [Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour](https://arxiv.org/abs/1706.02677).