diff --git a/export.py b/export.py index bcbf84a90077..2a035736d1f2 100644 --- a/export.py +++ b/export.py @@ -500,9 +500,9 @@ def load_checkpoint( # load sparseml recipe for applying pruning and quantization checkpoint_recipe = train_recipe = None if resume: - train_recipe = ckpt['recipe'] if ('recipe' in ckpt) else None - elif ckpt['recipe'] or recipe: - train_recipe, checkpoint_recipe = recipe, ckpt['recipe'] + train_recipe = ckpt.get('recipe') + elif recipe or ckpt.get('recipe'): + train_recipe, checkpoint_recipe = recipe, ckpt.get('recipe') sparseml_wrapper = SparseMLWrapper(model.model if val_type else model, checkpoint_recipe, train_recipe) exclude_anchors = train_type and (cfg or hyp.get('anchors')) and not resume diff --git a/train.py b/train.py index 738155ad1f77..8ec3c4533a47 100644 --- a/train.py +++ b/train.py @@ -87,9 +87,9 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Save run settings if not evolve: with open(save_dir / 'hyp.yaml', 'w') as f: - yaml.dump(hyp, f, sort_keys=False) + yaml.safe_dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: - yaml.dump(vars(opt), f, sort_keys=False) + yaml.safe_dump(vars(opt), f, sort_keys=False) # Loggers data_dict = None @@ -492,7 +492,11 @@ def parse_opt(known=False): parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs, -1 for autobatch') parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=640, help='train, val image size (pixels)') parser.add_argument('--rect', action='store_true', help='rectangular training') - parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training') + parser.add_argument('--resume', + nargs='?', + const=True, + default=False, + help='resume most recent training. When true, ignores --recipe arg and re-uses saved recipe (if exists)') parser.add_argument('--nosave', action='store_true', help='only save final checkpoint') parser.add_argument('--noval', action='store_true', help='only validate final epoch') parser.add_argument('--noautoanchor', action='store_true', help='disable AutoAnchor') @@ -542,7 +546,7 @@ def main(opt, callbacks=Callbacks()): ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist' with open(Path(ckpt).parent.parent / 'opt.yaml', errors='ignore') as f: - opt = argparse.Namespace(**yaml.load(f, Loader=yaml.SafeLoader)) # replace + opt = argparse.Namespace(**yaml.safe_load(f)) # replace opt.cfg, opt.weights, opt.resume = '', ckpt, True # reinstate LOGGER.info(f'Resuming training from {ckpt}') else: @@ -553,7 +557,7 @@ def main(opt, callbacks=Callbacks()): if opt.project == str(ROOT / 'runs/train'): # if default project name, rename to runs/evolve opt.project = str(ROOT / 'runs/evolve') opt.exist_ok, opt.resume = opt.resume, False # pass resume to exist_ok and disable resume - opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run + opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)) # increment run # DDP mode device = select_device(opt.device, batch_size=opt.batch_size) diff --git a/utils/sparse.py b/utils/sparse.py index 6004223e2cc4..34a55029a8b6 100644 --- a/utils/sparse.py +++ b/utils/sparse.py @@ -57,9 +57,6 @@ def state_dict(self): } def apply_checkpoint_structure(self): - if not self.enabled: - return - if self.checkpoint_manager: self.checkpoint_manager.apply_structure(self.model, math.inf)