diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 1d3c164984ea..d967c9314b10 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -2611,7 +2611,9 @@ def _load_best_model(self): load_result = model.load_state_dict(state_dict, False) if not is_sagemaker_mp_enabled() and has_been_loaded: self._issue_warnings_after_load(load_result) - elif os.path.exists(os.path.join(self.state.best_model_checkpoint, WEIGHTS_INDEX_NAME)): + elif os.path.exists(os.path.join(self.state.best_model_checkpoint, SAFE_WEIGHTS_INDEX_NAME)) or os.path.exists( + os.path.join(self.state.best_model_checkpoint, WEIGHTS_INDEX_NAME) + ): load_result = load_sharded_checkpoint( model, self.state.best_model_checkpoint, strict=is_sagemaker_mp_enabled() )