From 3be34bad1134fe24ae897298793dfea1ce7b3c76 Mon Sep 17 00:00:00 2001
From: LutingWang <2457348692@qq.com>
Date: Thu, 14 Mar 2024 12:20:11 +0800
Subject: [PATCH] feat: define holder mixin

---
 todd/runners/callbacks/checkpoint.py  |   18 +-
 todd/runners/callbacks/composed.py    |    2 +-
 todd/runners/callbacks/git.py         |    6 +-
 todd/runners/callbacks/interval.py    |    2 +-
 todd/runners/callbacks/log.py         |   16 +-
 todd/runners/callbacks/lr.py          |    8 +-
 todd/runners/callbacks/monitor.py     |    4 +-
 todd/runners/callbacks/tensorboard.py |    2 +-
 todd/runners/epoch_based_trainer.py   |    2 +-
 todd/runners/iter_based_trainer.py    |    2 +-
 todd/runners/strategies/base.py       |   10 +-
 todd/runners/strategies/ddp.py        |    2 +-
 todd/runners/strategies/fsdp.py       |    8 +-
 todd/runners/utils.py                 |   47 +-
 todd/utils/__init__.py                |    1 +
 todd/utils/constants.py               |   11 +
 todd/utils/mixins.py                  |   16 +-
 tutorials/runners.ipynb               | 1244 +++++++++++++++----------
 18 files changed, 833 insertions(+), 568 deletions(-)
 create mode 100644 todd/utils/constants.py

diff --git a/todd/runners/callbacks/checkpoint.py b/todd/runners/callbacks/checkpoint.py
index 09548a21..caba779a 100644
--- a/todd/runners/callbacks/checkpoint.py
+++ b/todd/runners/callbacks/checkpoint.py
@@ -36,27 +36,27 @@ def __init__(
 
     def init(self, *args, **kwargs) -> None:
         super().init(*args, **kwargs)
-        self._checkpoint_dir = self._runner.work_dir / 'checkpoints'
+        self._checkpoint_dir = self.runner.work_dir / 'checkpoints'
         self._latest_checkpoint_dir = self._checkpoint_dir / 'latest'
 
         self._checkpoint_dir.mkdir(parents=True, exist_ok=True)
 
-        if self._runner._auto_resume and self._latest_checkpoint_dir.exists():
+        if self.runner._auto_resume and self._latest_checkpoint_dir.exists():
             load_from = self._latest_checkpoint_dir
-        elif self._runner.load_from is not None:
-            load_from = pathlib.Path(self._runner.load_from)
+        elif self.runner.load_from is not None:
+            load_from = pathlib.Path(self.runner.load_from)
             assert load_from.exists()
         else:
             load_from = None
 
         if load_from is not None:
             if get_rank() == 0:
-                self._runner.logger.info("Loading from %s", load_from)
+                self.runner.logger.info("Loading from %s", load_from)
             state_dict = {
                 f.stem: torch.load(f, 'cpu')
                 for f in load_from.glob('*.pth')
             }
-            self._runner.load_state_dict(state_dict, **self._load_state_dict)
+            self.runner.load_state_dict(state_dict, **self._load_state_dict)
 
     @property
     def checkpoint_dir(self) -> pathlib.Path:
@@ -71,13 +71,13 @@ def _work_dir(self, name: str) -> pathlib.Path:
 
     def _save(self, name: str) -> None:
         # for FSDP, all ranks should call state dict
-        state_dict = self._runner.state_dict(**self._state_dict)
+        state_dict = self.runner.state_dict(**self._state_dict)
 
         if get_rank() != 0:
             return
         work_dir = self._work_dir(name)
         work_dir.mkdir(parents=True, exist_ok=True)
-        self._runner.logger.info("Saving state dict to %s", work_dir)
+        self.runner.logger.info("Saving state dict to %s", work_dir)
         for k, v in state_dict.items():
             torch.save(v, work_dir / f'{k}.pth')
 
@@ -88,7 +88,7 @@ def _save(self, name: str) -> None:
     def after_run_iter(self, batch, memo: Memo) -> None:
         super().after_run_iter(batch, memo)
         if self._should_run_iter():
-            self._save(f'iter_{self._runner.iter_}')
+            self._save(f'iter_{self.runner.iter_}')
 
     def after_run_epoch(self, epoch_memo: Memo, memo: Memo) -> None:
         super().after_run_epoch(epoch_memo, memo)
diff --git a/todd/runners/callbacks/composed.py b/todd/runners/callbacks/composed.py
index a1c1638d..83bec29b 100644
--- a/todd/runners/callbacks/composed.py
+++ b/todd/runners/callbacks/composed.py
@@ -21,7 +21,7 @@ def __init__(self, *args, callbacks: Iterable[Config], **kwargs) -> None:
         super().__init__(*args, **kwargs)
         priorities = [c.pop('priority', dict()) for c in callbacks]
         queue = [
-            CallbackRegistry.build(c, runner=self._runner) for c in callbacks
+            CallbackRegistry.build(c, runner=self.runner) for c in callbacks
         ]
         self._priority_queue: PriorityQueue[KT, BaseCallback] = \
             PriorityQueue(priorities, queue)
diff --git a/todd/runners/callbacks/git.py b/todd/runners/callbacks/git.py
index ce642acd..ab90b0fe 100644
--- a/todd/runners/callbacks/git.py
+++ b/todd/runners/callbacks/git.py
@@ -33,10 +33,10 @@ def init(self, *args, **kwargs) -> None:
                 diff = subprocess_run(args_)
             except subprocess.CalledProcessError as e:
                 diff = str(e)
-                self._runner.logger.error(e)
+                self.runner.logger.error(e)
             else:
                 file = (
-                    self._runner.work_dir / f'git_diff_{get_timestamp()}.log'
+                    self.runner.work_dir / f'git_diff_{get_timestamp()}.log'
                 )
-                self._runner.logger.info('Saving git diff to %s', file)
+                self.runner.logger.info('Saving git diff to %s', file)
                 file.write_text(diff)
diff --git a/todd/runners/callbacks/interval.py b/todd/runners/callbacks/interval.py
index 8eb9625d..489dd21f 100644
--- a/todd/runners/callbacks/interval.py
+++ b/todd/runners/callbacks/interval.py
@@ -22,7 +22,7 @@ def __should_run(self, step: int) -> bool:
         return self._interval > 0 and step % self._interval == 0
 
     def _should_run_iter(self) -> bool:
-        return not self._by_epoch and self.__should_run(self._runner.iter_)
+        return not self._by_epoch and self.__should_run(self.runner.iter_)
 
     def _should_run_epoch(self) -> bool:
         return (
diff --git a/todd/runners/callbacks/log.py b/todd/runners/callbacks/log.py
index 8f884f54..263e6b45 100644
--- a/todd/runners/callbacks/log.py
+++ b/todd/runners/callbacks/log.py
@@ -43,24 +43,24 @@ def init(self, *args, **kwargs) -> None:
         if get_rank() > 0:
             return
         if self._with_file_handler:
-            file = self._runner.work_dir / f'{get_timestamp()}.log'
+            file = self.runner.work_dir / f'{get_timestamp()}.log'
             handler = logging.FileHandler(file)
             handler.setFormatter(Formatter())
-            self._runner.logger.addHandler(handler)
+            self.runner.logger.addHandler(handler)
         if self._collect_env is not None:
             from ...base import (  # noqa: E501 pylint: disable=import-outside-toplevel
                 collect_env,
             )
             env = collect_env(**self._collect_env)
-            self._runner.logger.info(env)
+            self.runner.logger.info(env)
 
     def before_run(self, memo: Memo) -> None:
         super().before_run(memo)
         self._eta: BaseETA | None = (
             None if self._eta_config is None else ETARegistry.build(
                 self._eta_config,
-                start=self._runner.iter_ - 1,
-                end=self._runner.iters,
+                start=self.runner.iter_ - 1,
+                end=self.runner.iters,
             )
         )
 
@@ -73,10 +73,10 @@ def after_run_iter(self, batch, memo: Memo) -> None:
         super().after_run_iter(batch, memo)
         if 'log' not in memo:
             return
-        prefix = f"Iter [{self._runner.iter_}/{self._runner.iters}] "
+        prefix = f"Iter [{self.runner.iter_}/{self.runner.iters}] "
 
         if self._eta is not None:
-            eta = self._eta(self._runner.iter_)
+            eta = self._eta(self.runner.iter_)
             eta = round(eta)
             prefix += f"ETA {str(datetime.timedelta(seconds=eta))} "
 
@@ -90,7 +90,7 @@ def after_run_iter(self, batch, memo: Memo) -> None:
 
         log: dict[str, Any] = memo.pop('log')
         message = ' '.join(f'{k}={v}' for k, v in log.items() if v is not None)
-        self._runner.logger.info(prefix + message)
+        self.runner.logger.info(prefix + message)
 
     def before_run_epoch(self, epoch_memo: Memo, memo: Memo) -> None:
         super().before_run_epoch(epoch_memo, memo)
diff --git a/todd/runners/callbacks/lr.py b/todd/runners/callbacks/lr.py
index 977558b1..6a597c9a 100644
--- a/todd/runners/callbacks/lr.py
+++ b/todd/runners/callbacks/lr.py
@@ -26,7 +26,7 @@ def __init__(
         **kwargs,
     ) -> None:
         super().__init__(*args, interval=interval, **kwargs)
-        assert isinstance(self._runner, Trainer)
+        assert isinstance(self.runner, Trainer)
         self._lr_scheduler_config = lr_scheduler
 
     def init(self, *args, **kwargs) -> None:
@@ -34,7 +34,7 @@ def init(self, *args, **kwargs) -> None:
         self._build_lr_scheduler()
 
     def _build_lr_scheduler(self) -> None:
-        runner = cast(Trainer, self._runner)
+        runner = cast(Trainer, self.runner)
         self._lr_scheduler: torch.optim.lr_scheduler.LRScheduler = \
             LRSchedulerRegistry.build(
                 self._lr_scheduler_config,
@@ -75,11 +75,11 @@ class LRScaleCallback(BaseCallback):
 
     def __init__(self, *args, lr_scaler: Config, **kwargs) -> None:
         super().__init__(*args, **kwargs)
-        assert isinstance(self._runner, Trainer)
+        assert isinstance(self.runner, Trainer)
         self._lr_scaler_config = lr_scaler
 
     def _scale_lr(self, config: Config) -> None:
-        runner = cast(Trainer, self._runner)
+        runner = cast(Trainer, self.runner)
         assert runner.dataloader.batch_size is not None
         base_batch_size = config.base_batch_size
         batch_size = get_world_size() * runner.dataloader.batch_size
diff --git a/todd/runners/callbacks/monitor.py b/todd/runners/callbacks/monitor.py
index 98aabf2b..08954774 100644
--- a/todd/runners/callbacks/monitor.py
+++ b/todd/runners/callbacks/monitor.py
@@ -37,8 +37,8 @@ def run_iter_context(
     ) -> None:
         super().run_iter_context(exit_stack, batch, memo)
         context = Context(
-            self._runner.logger,
-            iter_=self._runner.iter_,
+            self.runner.logger,
+            iter_=self.runner.iter_,
             batch=batch,
             memo=memo,
         )
diff --git a/todd/runners/callbacks/tensorboard.py b/todd/runners/callbacks/tensorboard.py
index 65eb9bc5..804f907e 100644
--- a/todd/runners/callbacks/tensorboard.py
+++ b/todd/runners/callbacks/tensorboard.py
@@ -31,7 +31,7 @@ def init(self, *args, **kwargs) -> None:
         super().init(*args, **kwargs)
         if get_rank() > 0:
             return
-        log_dir = self._runner.work_dir / 'tensorboard'
+        log_dir = self.runner.work_dir / 'tensorboard'
         self._summary_writer = SummaryWriter(
             log_dir,
             **self._summary_writer_config,
diff --git a/todd/runners/epoch_based_trainer.py b/todd/runners/epoch_based_trainer.py
index bc463465..8358d003 100644
--- a/todd/runners/epoch_based_trainer.py
+++ b/todd/runners/epoch_based_trainer.py
@@ -18,7 +18,7 @@
 
 
 @RunnerRegistry.register_()
-class EpochBasedTrainer(Trainer):
+class EpochBasedTrainer(Trainer[T]):
 
     def __init__(self, *args, epochs: int, **kwargs) -> None:
         super().__init__(*args, **kwargs)
diff --git a/todd/runners/iter_based_trainer.py b/todd/runners/iter_based_trainer.py
index 107c894c..7a212fa6 100644
--- a/todd/runners/iter_based_trainer.py
+++ b/todd/runners/iter_based_trainer.py
@@ -16,7 +16,7 @@
 
 
 @RunnerRegistry.register_()
-class IterBasedTrainer(Trainer):
+class IterBasedTrainer(Trainer[T]):
 
     def __init__(self, *args, iters: int, **kwargs) -> None:
         super().__init__(*args, **kwargs)
diff --git a/todd/runners/strategies/base.py b/todd/runners/strategies/base.py
index a97d30fb..d6c2a748 100644
--- a/todd/runners/strategies/base.py
+++ b/todd/runners/strategies/base.py
@@ -2,7 +2,7 @@
     'BaseStrategy',
 ]
 
-from typing import Any, Generic, Mapping, TypeVar, cast
+from typing import Any, Mapping, TypeVar, cast
 
 import torch
 from torch import nn
@@ -15,7 +15,7 @@
 
 
 @StrategyRegistry.register_()
-class BaseStrategy(RunnerHolderMixin, StateDictMixin, Generic[T]):
+class BaseStrategy(RunnerHolderMixin[T], StateDictMixin):
 
     def __init__(
         self,
@@ -45,7 +45,7 @@ def build_optimizer(self, config: Config) -> torch.optim.Optimizer:
 
     @property
     def module(self) -> nn.Module:
-        return self._runner.model
+        return self.runner.model
 
     def model_state_dict(self, *args, **kwargs) -> dict[str, Any]:
         return self.module.state_dict(*args, **kwargs)
@@ -62,7 +62,7 @@ def load_model_state_dict(
             **kwargs,
         )
         if get_rank() == 0:
-            self._runner.logger.info(incompatible_keys)
+            self.runner.logger.info(incompatible_keys)
 
     def load_model_from(
         self,
@@ -77,7 +77,7 @@ def load_model_from(
         model_state_dict = dict()
         for f_ in f_list:
             if get_rank() == 0:
-                self._runner.logger.info("Loading model from %s", f_)
+                self.runner.logger.info("Loading model from %s", f_)
             model_state_dict.update(torch.load(f_, 'cpu'))
         self.load_model_state_dict(model_state_dict, *args, **kwargs)
 
diff --git a/todd/runners/strategies/ddp.py b/todd/runners/strategies/ddp.py
index 3367c2ab..e2e9fd67 100644
--- a/todd/runners/strategies/ddp.py
+++ b/todd/runners/strategies/ddp.py
@@ -23,4 +23,4 @@ def wrap_model(self, model: nn.Module, config: Config) -> T:
 
     @property
     def module(self) -> nn.Module:
-        return self._runner.model.module
+        return self.runner.model.module
diff --git a/todd/runners/strategies/fsdp.py b/todd/runners/strategies/fsdp.py
index fa71105a..42eb809d 100644
--- a/todd/runners/strategies/fsdp.py
+++ b/todd/runners/strategies/fsdp.py
@@ -26,13 +26,13 @@ def wrap_model(self, model: nn.Module, config: Config) -> T:
 
     @property
     def module(self) -> nn.Module:
-        return self._runner.model.module
+        return self.runner.model.module
 
     def build_optimizer(self, config: Config) -> torch.optim.Optimizer:
-        return OptimizerRegistry.build(config, model=self._runner.model)
+        return OptimizerRegistry.build(config, model=self.runner.model)
 
     def model_state_dict(self, *args, **kwargs) -> dict[str, Any]:
-        return self._runner.model.state_dict(*args, **kwargs)
+        return self.runner.model.state_dict(*args, **kwargs)
 
     def load_model_state_dict(
         self,
@@ -40,7 +40,7 @@ def load_model_state_dict(
         *args,
         **kwargs,
     ) -> None:
-        self._runner.model.load_state_dict(state_dict, *args, **kwargs)
+        self.runner.model.load_state_dict(state_dict, *args, **kwargs)
 
     def optim_state_dict(
         self,
diff --git a/todd/runners/utils.py b/todd/runners/utils.py
index 0f257830..9c726054 100644
--- a/todd/runners/utils.py
+++ b/todd/runners/utils.py
@@ -2,42 +2,45 @@
     'RunnerHolderMixin',
 ]
 
-import weakref
-from typing import cast
+from typing import TypeVar
 
+from torch import nn
+
+from ..utils import HolderMixin
 from .base import BaseRunner
 from .epoch_based_trainer import EpochBasedTrainer
 from .iter_based_trainer import IterBasedTrainer
 from .trainer import Trainer
 from .validator import Validator
 
+T = TypeVar('T', bound=nn.Module)
+
 
-class RunnerHolderMixin:
+class RunnerHolderMixin(HolderMixin[BaseRunner[T]]):
 
-    def __init__(self, *args, runner: BaseRunner, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        runner_proxy = (
-            runner if isinstance(runner, weakref.ProxyTypes) else
-            weakref.proxy(runner)
-        )
-        self._runner = cast(BaseRunner, runner_proxy)
+    def __init__(self, *args, runner: BaseRunner[T], **kwargs) -> None:
+        super().__init__(*args, instance=runner, **kwargs)
+
+    @property
+    def runner(self) -> BaseRunner[T]:
+        return self._instance
 
     @property
-    def trainer(self) -> Trainer:
-        assert isinstance(self._runner, Trainer)
-        return self._runner
+    def trainer(self) -> Trainer[T]:
+        assert isinstance(self._instance, Trainer)
+        return self._instance
 
     @property
-    def validator(self) -> Validator:
-        assert isinstance(self._runner, Validator)
-        return self._runner
+    def validator(self) -> Validator[T]:
+        assert isinstance(self._instance, Validator)
+        return self._instance
 
     @property
-    def iter_based_trainer(self) -> IterBasedTrainer:
-        assert isinstance(self._runner, IterBasedTrainer)
-        return self._runner
+    def iter_based_trainer(self) -> IterBasedTrainer[T]:
+        assert isinstance(self._instance, IterBasedTrainer)
+        return self._instance
 
     @property
-    def epoch_based_trainer(self) -> EpochBasedTrainer:
-        assert isinstance(self._runner, EpochBasedTrainer)
-        return self._runner
+    def epoch_based_trainer(self) -> EpochBasedTrainer[T]:
+        assert isinstance(self._instance, EpochBasedTrainer)
+        return self._instance
diff --git a/todd/utils/__init__.py b/todd/utils/__init__.py
index 85d1d65f..199b7dfb 100644
--- a/todd/utils/__init__.py
+++ b/todd/utils/__init__.py
@@ -1,3 +1,4 @@
+from .constants import *
 from .enums import *
 from .generic_tensors import *
 from .metas import *
diff --git a/todd/utils/constants.py b/todd/utils/constants.py
new file mode 100644
index 00000000..e225d6e2
--- /dev/null
+++ b/todd/utils/constants.py
@@ -0,0 +1,11 @@
+__all__ = [
+    'IMAGENET_MEAN',
+    'IMAGENET_STD',
+    'IMAGENET_MEAN_255',
+    'IMAGENET_STD_255',
+]
+
+IMAGENET_MEAN = (0.485, 0.456, 0.406)
+IMAGENET_STD = (0.229, 0.224, 0.225)
+IMAGENET_MEAN_255 = tuple(x * 255 for x in IMAGENET_MEAN)
+IMAGENET_STD_255 = tuple(x * 255 for x in IMAGENET_STD)
diff --git a/todd/utils/mixins.py b/todd/utils/mixins.py
index 9574ea16..3e9a3ccb 100644
--- a/todd/utils/mixins.py
+++ b/todd/utils/mixins.py
@@ -1,8 +1,12 @@
 __all__ = [
     'StateDictMixin',
+    'HolderMixin',
 ]
 
-from typing import Any, Mapping
+import weakref
+from typing import Any, Generic, Mapping, TypeVar, cast
+
+T = TypeVar('T')
 
 
 class StateDictMixin:
@@ -19,4 +23,12 @@ def load_state_dict(
         pass
 
 
-# TODO: define holder mixin
+class HolderMixin(Generic[T]):
+
+    def __init__(self, *args, instance: T, **kwargs) -> None:
+        super().__init__(*args, **kwargs)
+        instance_proxy = (
+            instance if isinstance(instance, weakref.ProxyTypes) else
+            weakref.proxy(instance)
+        )
+        self._instance = cast(T, instance_proxy)
diff --git a/tutorials/runners.ipynb b/tutorials/runners.ipynb
index 4975962e..c64e02d6 100644
--- a/tutorials/runners.ipynb
+++ b/tutorials/runners.ipynb
@@ -42,7 +42,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:26:49,338 35174:140704541179520][patches.py:9 todd <module>] INFO: `ipdb` is installed. Using it for debugging.\n",
+      "[2024-03-14 12:17:47,682 62058:140704275689088][patches.py:9 todd <module>] INFO: `ipdb` is installed. Using it for debugging.\n",
       "/Users/bytedance/.local/share/virtualenvs/todd-ARrcnwyq/lib/python3.11/site-packages/mmcv/__init__.py:20: UserWarning: On January 1, 2023, MMCV will release v2.0.0, in which it will remove components related to the training process and add a data transformation module. In addition, it will rename the package names mmcv to mmcv-lite and mmcv-full to mmcv. See https://github.com/open-mmlab/mmcv/blob/master/docs/en/compatibility.md for more details.\n",
       "  warnings.warn(\n"
      ]
@@ -184,7 +184,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:50,988 35174:140704541179520][base.py:57 todd.Validator.validator __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n"
+      "\u001b[2m[2024-03-14 12:17:49,375 62058:140704275689088][base.py:56 todd.Validator.validator __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n"
      ]
     },
     {
@@ -192,7 +192,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpl9zy4w3o\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpjms97dk6\u001b[0m\n",
       "└── \u001b[1;36mvalidator\u001b[0m\n",
       "\n",
       "2 directories, 0 files\n"
@@ -226,11 +226,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:51,346 35174:140704541179520][base.py:57 todd.Validator.validator __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:51,388 35174:140704541179520][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,392 35174:140704541179520][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:26:51,395 35174:140704541179520][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:26:51,398 35174:140704541179520][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      "\u001b[2m[2024-03-14 12:17:49,723 62058:140704275689088][base.py:56 todd.Validator.validator __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:49,728 62058:140704275689088][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:49,731 62058:140704275689088][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:17:49,734 62058:140704275689088][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:17:49,736 62058:140704275689088][log.py:93 todd.Validator.validator after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     },
     {
@@ -238,7 +238,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpy_xrnx2v\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpv4q6cgrv\u001b[0m\n",
       "└── \u001b[1;36mvalidator\u001b[0m\n",
       "\n",
       "2 directories, 0 files\n"
@@ -288,15 +288,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:51,739 35174:140704541179520][base.py:57 todd.IterBasedTrainer.iter_based_trainer __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:51,743 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [1/8] batch={'x': tensor([1, 3]), 'y': tensor([2, 6])} weight=0.000 loss=4.000\n",
-      "[2024-02-23 18:26:51,744 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [2/8] batch={'x': tensor([5, 8]), 'y': tensor([10, 16])} weight=0.000 loss=13.000\n",
-      "[2024-02-23 18:26:51,745 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [3/8] batch={'x': tensor([4, 2]), 'y': tensor([8, 4])} weight=0.000 loss=6.000\n",
-      "[2024-02-23 18:26:51,748 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [4/8] batch={'x': tensor([ 6, 10]), 'y': tensor([12, 20])} weight=0.000 loss=16.000\n",
-      "[2024-02-23 18:26:51,749 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [5/8] batch={'x': tensor([7, 9]), 'y': tensor([14, 18])} weight=0.000 loss=16.000\n",
-      "[2024-02-23 18:26:51,751 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [6/8] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,753 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [7/8] batch={'x': tensor([3, 4]), 'y': tensor([6, 8])} weight=0.000 loss=7.000\n",
-      "[2024-02-23 18:26:51,755 35174:140704541179520][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [8/8] batch={'x': tensor([2, 6]), 'y': tensor([ 4, 12])} weight=0.000 loss=8.000\n"
+      "\u001b[2m[2024-03-14 12:17:50,032 62058:140704275689088][base.py:56 todd.IterBasedTrainer.iter_based_trainer __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:50,035 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [1/8] batch={'x': tensor([ 7, 10]), 'y': tensor([14, 20])} weight=0.000 loss=17.000\n",
+      "[2024-03-14 12:17:50,037 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [2/8] batch={'x': tensor([4, 6]), 'y': tensor([ 8, 12])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:50,038 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [3/8] batch={'x': tensor([3, 8]), 'y': tensor([ 6, 16])} weight=0.000 loss=11.000\n",
+      "[2024-03-14 12:17:50,039 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [4/8] batch={'x': tensor([9, 2]), 'y': tensor([18,  4])} weight=0.000 loss=11.000\n",
+      "[2024-03-14 12:17:50,041 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [5/8] batch={'x': tensor([5, 1]), 'y': tensor([10,  2])} weight=0.000 loss=6.000\n",
+      "[2024-03-14 12:17:50,043 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [6/8] batch={'x': tensor([8, 1]), 'y': tensor([16,  2])} weight=0.000 loss=9.000\n",
+      "[2024-03-14 12:17:50,044 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [7/8] batch={'x': tensor([ 5, 10]), 'y': tensor([10, 20])} weight=0.000 loss=15.000\n",
+      "[2024-03-14 12:17:50,046 62058:140704275689088][log.py:93 todd.IterBasedTrainer.iter_based_trainer after_run_iter] INFO: Iter [8/8] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.000 loss=6.000\n"
      ]
     }
    ],
@@ -337,25 +337,25 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:51,767 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.epoch_based_trainer __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:51,768 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [1/3]\n",
-      "[2024-02-23 18:26:51,771 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [1/15] batch={'x': tensor([4, 9]), 'y': tensor([ 8, 18])} weight=0.000 loss=13.000\n",
-      "[2024-02-23 18:26:51,772 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [2/15] batch={'x': tensor([7, 3]), 'y': tensor([14,  6])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,774 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [3/15] batch={'x': tensor([10,  2]), 'y': tensor([20,  4])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,775 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [4/15] batch={'x': tensor([8, 6]), 'y': tensor([16, 12])} weight=0.000 loss=14.000\n",
-      "[2024-02-23 18:26:51,777 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [5/15] batch={'x': tensor([1, 5]), 'y': tensor([ 2, 10])} weight=0.000 loss=6.000\n",
-      "[2024-02-23 18:26:51,777 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:26:51,779 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [6/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,780 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [7/15] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.000 loss=5.000\n",
-      "[2024-02-23 18:26:51,782 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [8/15] batch={'x': tensor([4, 8]), 'y': tensor([ 8, 16])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,783 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [9/15] batch={'x': tensor([9, 1]), 'y': tensor([18,  2])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,785 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [10/15] batch={'x': tensor([ 6, 10]), 'y': tensor([12, 20])} weight=0.000 loss=16.000\n",
-      "[2024-02-23 18:26:51,786 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:26:51,788 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [11/15] batch={'x': tensor([10,  9]), 'y': tensor([20, 18])} weight=0.000 loss=19.000\n",
-      "[2024-02-23 18:26:51,790 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [12/15] batch={'x': tensor([7, 5]), 'y': tensor([14, 10])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,793 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [13/15] batch={'x': tensor([8, 4]), 'y': tensor([16,  8])} weight=0.000 loss=12.000\n",
-      "[2024-02-23 18:26:51,794 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [14/15] batch={'x': tensor([3, 6]), 'y': tensor([ 6, 12])} weight=0.000 loss=9.000\n",
-      "[2024-02-23 18:26:51,796 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [15/15] batch={'x': tensor([1, 2]), 'y': tensor([2, 4])} weight=0.000 loss=3.000\n"
+      "\u001b[2m[2024-03-14 12:17:50,057 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.epoch_based_trainer __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:50,058 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [1/3]\n",
+      "[2024-03-14 12:17:50,061 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [1/15] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.000 loss=5.000\n",
+      "[2024-03-14 12:17:50,064 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [2/15] batch={'x': tensor([8, 6]), 'y': tensor([16, 12])} weight=0.000 loss=14.000\n",
+      "[2024-03-14 12:17:50,066 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [3/15] batch={'x': tensor([5, 9]), 'y': tensor([10, 18])} weight=0.000 loss=14.000\n",
+      "[2024-03-14 12:17:50,068 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [4/15] batch={'x': tensor([ 7, 10]), 'y': tensor([14, 20])} weight=0.000 loss=17.000\n",
+      "[2024-03-14 12:17:50,071 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [5/15] batch={'x': tensor([2, 3]), 'y': tensor([4, 6])} weight=0.000 loss=5.000\n",
+      "[2024-03-14 12:17:50,073 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:17:50,075 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [6/15] batch={'x': tensor([7, 8]), 'y': tensor([14, 16])} weight=0.000 loss=15.000\n",
+      "[2024-03-14 12:17:50,077 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [7/15] batch={'x': tensor([1, 6]), 'y': tensor([ 2, 12])} weight=0.000 loss=7.000\n",
+      "[2024-03-14 12:17:50,079 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [8/15] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.000 loss=6.000\n",
+      "[2024-03-14 12:17:50,081 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [9/15] batch={'x': tensor([10,  9]), 'y': tensor([20, 18])} weight=0.000 loss=19.000\n",
+      "[2024-03-14 12:17:50,082 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [10/15] batch={'x': tensor([3, 5]), 'y': tensor([ 6, 10])} weight=0.000 loss=8.000\n",
+      "[2024-03-14 12:17:50,084 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.epoch_based_trainer before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:17:50,086 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [11/15] batch={'x': tensor([3, 6]), 'y': tensor([ 6, 12])} weight=0.000 loss=9.000\n",
+      "[2024-03-14 12:17:50,088 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [12/15] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.000 loss=6.000\n",
+      "[2024-03-14 12:17:50,090 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [13/15] batch={'x': tensor([5, 8]), 'y': tensor([10, 16])} weight=0.000 loss=13.000\n",
+      "[2024-03-14 12:17:50,091 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [14/15] batch={'x': tensor([10,  7]), 'y': tensor([20, 14])} weight=0.000 loss=17.000\n",
+      "[2024-03-14 12:17:50,093 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.epoch_based_trainer after_run_iter] INFO: Iter [15/15] batch={'x': tensor([9, 1]), 'y': tensor([18,  2])} weight=0.000 loss=10.000\n"
      ]
     }
    ],
@@ -402,7 +402,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:26:51,926 35174:140704541179520][log.py:55 todd.Validator.log_callback init] INFO: \n",
+      "[2024-03-14 12:17:50,216 62058:140704275689088][log.py:55 todd.Validator.log_callback init] INFO: \n",
       "platform: macOS-14.0\n",
       "nvidia_smi: None\n",
       "python_version: 3.11.7 (main, Dec  4 2023, 18:10:11) [Clang 15.0.0 (clang-1500.1.0.2.5)]\n",
@@ -411,20 +411,30 @@
       "opencv_version: 4.7.0\n",
       "todd_version: 0.4.0\n",
       "cuda_home: None\n",
-      "git_commit_id: 8dccf62\n",
+      "git_commit_id: 0a7955a\n",
       "git_status: \n",
-      "M pyproject.toml\n",
-      " M todd/runners/base.py\n",
+      "M todd/runners/callbacks/checkpoint.py\n",
+      " M todd/runners/callbacks/composed.py\n",
+      " M todd/runners/callbacks/git.py\n",
+      " M todd/runners/callbacks/interval.py\n",
+      " M todd/runners/callbacks/log.py\n",
+      " M todd/runners/callbacks/lr.py\n",
+      " M todd/runners/callbacks/monitor.py\n",
+      " M todd/runners/callbacks/tensorboard.py\n",
       " M todd/runners/epoch_based_trainer.py\n",
       " M todd/runners/iter_based_trainer.py\n",
-      " M todd/runners/trainer.py\n",
-      " M todd/runners/validator.py\n",
-      " M todd/utils/torch.py\n",
-      "\u001b[2m[2024-02-23 18:26:51,927 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:51,931 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,934 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:26:51,937 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:26:51,939 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      " M todd/runners/strategies/base.py\n",
+      " M todd/runners/strategies/ddp.py\n",
+      " M todd/runners/strategies/fsdp.py\n",
+      " M todd/runners/utils.py\n",
+      " M todd/utils/__init__.py\n",
+      " M todd/utils/mixins.py\n",
+      "?? todd/utils/constants.py\n",
+      "\u001b[2m[2024-03-14 12:17:50,217 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:50,221 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:50,240 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:17:50,247 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:17:50,262 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     }
    ],
@@ -461,11 +471,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:51,979 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:51,987 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,991 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:26:52,005 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:26:52,032 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      "\u001b[2m[2024-03-14 12:17:50,291 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:50,294 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:50,296 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:17:50,299 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:17:50,301 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     },
     {
@@ -473,17 +483,17 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpe_f9kq5u\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpypx13e0i\u001b[0m\n",
       "└── \u001b[1;36mlog_callback\u001b[0m\n",
-      "    └── 2024-02-23T18-26-51_978897-08-00.log\n",
+      "    └── 2024-03-14T12-17-50_290843-08-00.log\n",
       "\n",
       "2 directories, 1 file\n",
       "\n",
-      "[2024-02-23 18:26:51,979 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\n",
-      "[2024-02-23 18:26:51,987 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:51,991 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:26:52,005 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:26:52,032 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      "[2024-03-14 12:17:50,291 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\n",
+      "[2024-03-14 12:17:50,294 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:50,296 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:17:50,299 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:17:50,301 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     }
    ],
@@ -523,11 +533,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:52,618 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:53,137 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:01 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:26:53,652 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:01 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:26:54,162 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:00 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:26:54,678 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      "\u001b[2m[2024-03-14 12:17:50,870 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:51,391 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:01 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:51,912 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:01 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:17:52,431 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:00 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:17:52,945 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     }
    ],
@@ -567,11 +577,11 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:26:54,690 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:26:56,210 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:04 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:27:00,228 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:05 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:27:05,240 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:03 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:27:10,261 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      "\u001b[2m[2024-03-14 12:17:52,955 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:17:54,468 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:04 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:17:58,481 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:05 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:18:03,499 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:03 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:18:08,518 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     }
    ],
@@ -611,7 +621,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:10,384 35174:140704541179520][log.py:55 todd.Validator.log_callback init] INFO: \n",
+      "[2024-03-14 12:18:08,647 62058:140704275689088][log.py:55 todd.Validator.log_callback init] INFO: \n",
       "platform: macOS-14.0\n",
       "nvidia_smi: None\n",
       "python_version: 3.11.7 (main, Dec  4 2023, 18:10:11) [Clang 15.0.0 (clang-1500.1.0.2.5)]\n",
@@ -620,20 +630,30 @@
       "opencv_version: 4.7.0\n",
       "todd_version: 0.4.0\n",
       "cuda_home: None\n",
-      "git_commit_id: 8dccf62\n",
+      "git_commit_id: 0a7955a\n",
       "git_status: \n",
-      "M pyproject.toml\n",
-      " M todd/runners/base.py\n",
+      "M todd/runners/callbacks/checkpoint.py\n",
+      " M todd/runners/callbacks/composed.py\n",
+      " M todd/runners/callbacks/git.py\n",
+      " M todd/runners/callbacks/interval.py\n",
+      " M todd/runners/callbacks/log.py\n",
+      " M todd/runners/callbacks/lr.py\n",
+      " M todd/runners/callbacks/monitor.py\n",
+      " M todd/runners/callbacks/tensorboard.py\n",
       " M todd/runners/epoch_based_trainer.py\n",
       " M todd/runners/iter_based_trainer.py\n",
-      " M todd/runners/trainer.py\n",
-      " M todd/runners/validator.py\n",
-      " M todd/utils/torch.py\n",
-      "\u001b[2m[2024-02-23 18:27:10,386 35174:140704541179520][base.py:57 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,390 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:00 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:27:10,393 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:00 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
-      "[2024-02-23 18:27:10,395 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:00 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
-      "[2024-02-23 18:27:10,398 35174:140704541179520][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
+      " M todd/runners/strategies/base.py\n",
+      " M todd/runners/strategies/ddp.py\n",
+      " M todd/runners/strategies/fsdp.py\n",
+      " M todd/runners/utils.py\n",
+      " M todd/utils/__init__.py\n",
+      " M todd/utils/mixins.py\n",
+      "?? todd/utils/constants.py\n",
+      "\u001b[2m[2024-03-14 12:18:08,648 62058:140704275689088][base.py:56 todd.Validator.log_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:08,653 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [5/20] ETA 0:00:00 batch={'x': tensor([5]), 'y': tensor([10])} weight=0.000 loss=10.000\n",
+      "[2024-03-14 12:18:08,655 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [10/20] ETA 0:00:00 batch={'x': tensor([10]), 'y': tensor([20])} weight=0.000 loss=20.000\n",
+      "[2024-03-14 12:18:08,658 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [15/20] ETA 0:00:00 batch={'x': tensor([15]), 'y': tensor([30])} weight=0.000 loss=30.000\n",
+      "[2024-03-14 12:18:08,660 62058:140704275689088][log.py:93 todd.Validator.log_callback after_run_iter] INFO: Iter [20/20] ETA 0:00:00 batch={'x': tensor([20]), 'y': tensor([40])} weight=0.000 loss=40.000\n"
      ]
     }
    ],
@@ -679,8 +699,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:10,464 35174:140704541179520][git.py:41 todd.Validator.git_callback init] INFO: Saving git diff to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpk3uvcez8/git_callback/git_diff_2024-02-23T18-27-10_464496-08-00.log\n",
-      "\u001b[2m[2024-02-23 18:27:10,467 35174:140704541179520][base.py:57 todd.Validator.git_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n"
+      "[2024-03-14 12:18:08,732 62058:140704275689088][git.py:41 todd.Validator.git_callback init] INFO: Saving git diff to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpotx2rbek/git_callback/git_diff_2024-03-14T12-18-08_732484-08-00.log\n",
+      "\u001b[2m[2024-03-14 12:18:08,735 62058:140704275689088][base.py:56 todd.Validator.git_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n"
      ]
     },
     {
@@ -688,249 +708,461 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "diff --git a/pyproject.toml b/pyproject.toml\n",
-      "index aa38442..487cb54 100644\n",
-      "--- a/pyproject.toml\n",
-      "+++ b/pyproject.toml\n",
-      "@@ -133,6 +133,7 @@ module = [\n",
-      "     'ipdb.*',\n",
-      "     'mmcv.*',\n",
-      "     'pptx.*',\n",
-      "+    'setuptools.*',\n",
-      "     'torchvision.*',\n",
-      "     'yapf.*',\n",
-      " ]\n",
-      "diff --git a/todd/runners/base.py b/todd/runners/base.py\n",
-      "index fda811f..e16bbfd 100644\n",
-      "--- a/todd/runners/base.py\n",
-      "+++ b/todd/runners/base.py\n",
-      "@@ -2,6 +2,7 @@ __all__ = [\n",
-      "     'BaseRunner',\n",
-      " ]\n",
+      "diff --git a/todd/runners/callbacks/checkpoint.py b/todd/runners/callbacks/checkpoint.py\n",
+      "index 09548a2..caba779 100644\n",
+      "--- a/todd/runners/callbacks/checkpoint.py\n",
+      "+++ b/todd/runners/callbacks/checkpoint.py\n",
+      "@@ -36,27 +36,27 @@ class CheckpointCallback(IntervalMixin, BaseCallback):\n",
       " \n",
-      "+from abc import abstractmethod\n",
-      " import contextlib\n",
-      " import getpass\n",
-      " import logging\n",
-      "@@ -105,8 +106,9 @@ class BaseRunner(StateDictMixin, Generic[T]):\n",
-      "         return self._logger\n",
+      "     def init(self, *args, **kwargs) -> None:\n",
+      "         super().init(*args, **kwargs)\n",
+      "-        self._checkpoint_dir = self._runner.work_dir / 'checkpoints'\n",
+      "+        self._checkpoint_dir = self.runner.work_dir / 'checkpoints'\n",
+      "         self._latest_checkpoint_dir = self._checkpoint_dir / 'latest'\n",
       " \n",
-      "     @property\n",
-      "+    @abstractmethod\n",
-      "     def iters(self) -> int:\n",
-      "-        return len(self._dataloader)\n",
-      "+        pass\n",
+      "         self._checkpoint_dir.mkdir(parents=True, exist_ok=True)\n",
       " \n",
-      "     def _build_strategy(\n",
-      "         self,\n",
-      "@@ -255,7 +257,7 @@ class BaseRunner(StateDictMixin, Generic[T]):\n",
-      "         return memo\n",
+      "-        if self._runner._auto_resume and self._latest_checkpoint_dir.exists():\n",
+      "+        if self.runner._auto_resume and self._latest_checkpoint_dir.exists():\n",
+      "             load_from = self._latest_checkpoint_dir\n",
+      "-        elif self._runner.load_from is not None:\n",
+      "-            load_from = pathlib.Path(self._runner.load_from)\n",
+      "+        elif self.runner.load_from is not None:\n",
+      "+            load_from = pathlib.Path(self.runner.load_from)\n",
+      "             assert load_from.exists()\n",
+      "         else:\n",
+      "             load_from = None\n",
       " \n",
-      "     def _setup(self) -> Memo:\n",
-      "-        return dict(dataloader=iter(self._dataloader))\n",
-      "+        return dict()\n",
+      "         if load_from is not None:\n",
+      "             if get_rank() == 0:\n",
+      "-                self._runner.logger.info(\"Loading from %s\", load_from)\n",
+      "+                self.runner.logger.info(\"Loading from %s\", load_from)\n",
+      "             state_dict = {\n",
+      "                 f.stem: torch.load(f, 'cpu')\n",
+      "                 for f in load_from.glob('*.pth')\n",
+      "             }\n",
+      "-            self._runner.load_state_dict(state_dict, **self._load_state_dict)\n",
+      "+            self.runner.load_state_dict(state_dict, **self._load_state_dict)\n",
       " \n",
-      "     def _teardown(self, memo: Memo) -> None:\n",
-      "         pass\n",
-      "diff --git a/todd/runners/epoch_based_trainer.py b/todd/runners/epoch_based_trainer.py\n",
-      "index 73ff6b7..8c2a517 100644\n",
-      "--- a/todd/runners/epoch_based_trainer.py\n",
-      "+++ b/todd/runners/epoch_based_trainer.py\n",
-      "@@ -9,6 +9,8 @@ from typing import TypeVar\n",
+      "     @property\n",
+      "     def checkpoint_dir(self) -> pathlib.Path:\n",
+      "@@ -71,13 +71,13 @@ class CheckpointCallback(IntervalMixin, BaseCallback):\n",
       " \n",
-      " from torch import nn\n",
+      "     def _save(self, name: str) -> None:\n",
+      "         # for FSDP, all ranks should call state dict\n",
+      "-        state_dict = self._runner.state_dict(**self._state_dict)\n",
+      "+        state_dict = self.runner.state_dict(**self._state_dict)\n",
       " \n",
-      "+from ..utils import set_epoch\n",
-      "+\n",
-      " from ..base import RunnerRegistry\n",
-      " from .trainer import Trainer\n",
-      " from .types import Memo\n",
-      "@@ -23,17 +25,9 @@ class EpochBasedTrainer(Trainer):\n",
+      "         if get_rank() != 0:\n",
+      "             return\n",
+      "         work_dir = self._work_dir(name)\n",
+      "         work_dir.mkdir(parents=True, exist_ok=True)\n",
+      "-        self._runner.logger.info(\"Saving state dict to %s\", work_dir)\n",
+      "+        self.runner.logger.info(\"Saving state dict to %s\", work_dir)\n",
+      "         for k, v in state_dict.items():\n",
+      "             torch.save(v, work_dir / f'{k}.pth')\n",
+      " \n",
+      "@@ -88,7 +88,7 @@ class CheckpointCallback(IntervalMixin, BaseCallback):\n",
+      "     def after_run_iter(self, batch, memo: Memo) -> None:\n",
+      "         super().after_run_iter(batch, memo)\n",
+      "         if self._should_run_iter():\n",
+      "-            self._save(f'iter_{self._runner.iter_}')\n",
+      "+            self._save(f'iter_{self.runner.iter_}')\n",
+      " \n",
+      "     def after_run_epoch(self, epoch_memo: Memo, memo: Memo) -> None:\n",
+      "         super().after_run_epoch(epoch_memo, memo)\n",
+      "diff --git a/todd/runners/callbacks/composed.py b/todd/runners/callbacks/composed.py\n",
+      "index a1c1638..83bec29 100644\n",
+      "--- a/todd/runners/callbacks/composed.py\n",
+      "+++ b/todd/runners/callbacks/composed.py\n",
+      "@@ -21,7 +21,7 @@ class ComposedCallback(BaseCallback):\n",
       "         super().__init__(*args, **kwargs)\n",
-      "         self._epochs = epochs\n",
+      "         priorities = [c.pop('priority', dict()) for c in callbacks]\n",
+      "         queue = [\n",
+      "-            CallbackRegistry.build(c, runner=self._runner) for c in callbacks\n",
+      "+            CallbackRegistry.build(c, runner=self.runner) for c in callbacks\n",
+      "         ]\n",
+      "         self._priority_queue: PriorityQueue[KT, BaseCallback] = \\\n",
+      "             PriorityQueue(priorities, queue)\n",
+      "diff --git a/todd/runners/callbacks/git.py b/todd/runners/callbacks/git.py\n",
+      "index ce642ac..ab90b0f 100644\n",
+      "--- a/todd/runners/callbacks/git.py\n",
+      "+++ b/todd/runners/callbacks/git.py\n",
+      "@@ -33,10 +33,10 @@ class GitCallback(BaseCallback):\n",
+      "                 diff = subprocess_run(args_)\n",
+      "             except subprocess.CalledProcessError as e:\n",
+      "                 diff = str(e)\n",
+      "-                self._runner.logger.error(e)\n",
+      "+                self.runner.logger.error(e)\n",
+      "             else:\n",
+      "                 file = (\n",
+      "-                    self._runner.work_dir / f'git_diff_{get_timestamp()}.log'\n",
+      "+                    self.runner.work_dir / f'git_diff_{get_timestamp()}.log'\n",
+      "                 )\n",
+      "-                self._runner.logger.info('Saving git diff to %s', file)\n",
+      "+                self.runner.logger.info('Saving git diff to %s', file)\n",
+      "                 file.write_text(diff)\n",
+      "diff --git a/todd/runners/callbacks/interval.py b/todd/runners/callbacks/interval.py\n",
+      "index 8eb9625..489dd21 100644\n",
+      "--- a/todd/runners/callbacks/interval.py\n",
+      "+++ b/todd/runners/callbacks/interval.py\n",
+      "@@ -22,7 +22,7 @@ class IntervalMixin(BaseCallback):\n",
+      "         return self._interval > 0 and step % self._interval == 0\n",
       " \n",
-      "-    @property\n",
-      "-    def epoch(self) -> int:\n",
-      "-        return self._iter // super().iters\n",
-      "-\n",
-      "-    @property\n",
-      "-    def inner_iter(self) -> int:\n",
-      "-        return self._iter % super().iters\n",
-      "-\n",
-      "     @property\n",
-      "     def iters(self) -> int:\n",
-      "-        return super().iters * self._epochs\n",
-      "+        return self.iters_per_epoch * self._epochs\n",
+      "     def _should_run_iter(self) -> bool:\n",
+      "-        return not self._by_epoch and self.__should_run(self._runner.iter_)\n",
+      "+        return not self._by_epoch and self.__should_run(self.runner.iter_)\n",
       " \n",
-      "     @property\n",
-      "     def epochs(self) -> int:\n",
-      "@@ -43,22 +37,13 @@ class EpochBasedTrainer(Trainer):\n",
-      "         return super()._run(epoch_memo)\n",
+      "     def _should_run_epoch(self) -> bool:\n",
+      "         return (\n",
+      "diff --git a/todd/runners/callbacks/log.py b/todd/runners/callbacks/log.py\n",
+      "index 8f884f5..263e6b4 100644\n",
+      "--- a/todd/runners/callbacks/log.py\n",
+      "+++ b/todd/runners/callbacks/log.py\n",
+      "@@ -43,24 +43,24 @@ class LogCallback(IntervalMixin, BaseCallback):\n",
+      "         if get_rank() > 0:\n",
+      "             return\n",
+      "         if self._with_file_handler:\n",
+      "-            file = self._runner.work_dir / f'{get_timestamp()}.log'\n",
+      "+            file = self.runner.work_dir / f'{get_timestamp()}.log'\n",
+      "             handler = logging.FileHandler(file)\n",
+      "             handler.setFormatter(Formatter())\n",
+      "-            self._runner.logger.addHandler(handler)\n",
+      "+            self.runner.logger.addHandler(handler)\n",
+      "         if self._collect_env is not None:\n",
+      "             from ...base import (  # noqa: E501 pylint: disable=import-outside-toplevel\n",
+      "                 collect_env,\n",
+      "             )\n",
+      "             env = collect_env(**self._collect_env)\n",
+      "-            self._runner.logger.info(env)\n",
+      "+            self.runner.logger.info(env)\n",
       " \n",
-      "     def _setup_epoch(self, memo: Memo) -> Memo:\n",
-      "-        samplers = [\n",
-      "-            self._dataloader.sampler,\n",
-      "-            self._dataloader.batch_sampler,\n",
-      "-            getattr(self._dataloader.batch_sampler, 'sampler', None),\n",
-      "-        ]\n",
-      "-        for sampler in samplers:\n",
-      "-            if (set_epoch := getattr(sampler, 'set_epoch', None)) is not None:\n",
-      "-                set_epoch(self.epoch)\n",
-      "         epoch_memo = super()._setup()\n",
-      "-        dataloader = epoch_memo['dataloader']\n",
-      "-        dataloader = itertools.islice(\n",
-      "-            dataloader,\n",
-      "-            super().iters - self.inner_iter,\n",
-      "-        )\n",
-      "+        set_epoch(self._dataloader, self.epoch)\n",
-      "         epoch_memo.update(\n",
-      "-            dataloader=dataloader,\n",
-      "+            dataloader=(\n",
-      "+                itertools.islice(self._dataloader, self.inner_iter, None)\n",
-      "+                if self.inner_iter > 0 else self._dataloader\n",
-      "+            ),\n",
-      "             epoch=defaultdict(list),\n",
+      "     def before_run(self, memo: Memo) -> None:\n",
+      "         super().before_run(memo)\n",
+      "         self._eta: BaseETA | None = (\n",
+      "             None if self._eta_config is None else ETARegistry.build(\n",
+      "                 self._eta_config,\n",
+      "-                start=self._runner.iter_ - 1,\n",
+      "-                end=self._runner.iters,\n",
+      "+                start=self.runner.iter_ - 1,\n",
+      "+                end=self.runner.iters,\n",
+      "             )\n",
       "         )\n",
-      "         return epoch_memo\n",
+      " \n",
+      "@@ -73,10 +73,10 @@ class LogCallback(IntervalMixin, BaseCallback):\n",
+      "         super().after_run_iter(batch, memo)\n",
+      "         if 'log' not in memo:\n",
+      "             return\n",
+      "-        prefix = f\"Iter [{self._runner.iter_}/{self._runner.iters}] \"\n",
+      "+        prefix = f\"Iter [{self.runner.iter_}/{self.runner.iters}] \"\n",
+      " \n",
+      "         if self._eta is not None:\n",
+      "-            eta = self._eta(self._runner.iter_)\n",
+      "+            eta = self._eta(self.runner.iter_)\n",
+      "             eta = round(eta)\n",
+      "             prefix += f\"ETA {str(datetime.timedelta(seconds=eta))} \"\n",
+      " \n",
+      "@@ -90,7 +90,7 @@ class LogCallback(IntervalMixin, BaseCallback):\n",
+      " \n",
+      "         log: dict[str, Any] = memo.pop('log')\n",
+      "         message = ' '.join(f'{k}={v}' for k, v in log.items() if v is not None)\n",
+      "-        self._runner.logger.info(prefix + message)\n",
+      "+        self.runner.logger.info(prefix + message)\n",
+      " \n",
+      "     def before_run_epoch(self, epoch_memo: Memo, memo: Memo) -> None:\n",
+      "         super().before_run_epoch(epoch_memo, memo)\n",
+      "diff --git a/todd/runners/callbacks/lr.py b/todd/runners/callbacks/lr.py\n",
+      "index 977558b..6a597c9 100644\n",
+      "--- a/todd/runners/callbacks/lr.py\n",
+      "+++ b/todd/runners/callbacks/lr.py\n",
+      "@@ -26,7 +26,7 @@ class LRScheduleCallback(IntervalMixin, BaseCallback):\n",
+      "         **kwargs,\n",
+      "     ) -> None:\n",
+      "         super().__init__(*args, interval=interval, **kwargs)\n",
+      "-        assert isinstance(self._runner, Trainer)\n",
+      "+        assert isinstance(self.runner, Trainer)\n",
+      "         self._lr_scheduler_config = lr_scheduler\n",
+      " \n",
+      "     def init(self, *args, **kwargs) -> None:\n",
+      "@@ -34,7 +34,7 @@ class LRScheduleCallback(IntervalMixin, BaseCallback):\n",
+      "         self._build_lr_scheduler()\n",
+      " \n",
+      "     def _build_lr_scheduler(self) -> None:\n",
+      "-        runner = cast(Trainer, self._runner)\n",
+      "+        runner = cast(Trainer, self.runner)\n",
+      "         self._lr_scheduler: torch.optim.lr_scheduler.LRScheduler = \\\n",
+      "             LRSchedulerRegistry.build(\n",
+      "                 self._lr_scheduler_config,\n",
+      "@@ -75,11 +75,11 @@ class LRScaleCallback(BaseCallback):\n",
+      " \n",
+      "     def __init__(self, *args, lr_scaler: Config, **kwargs) -> None:\n",
+      "         super().__init__(*args, **kwargs)\n",
+      "-        assert isinstance(self._runner, Trainer)\n",
+      "+        assert isinstance(self.runner, Trainer)\n",
+      "         self._lr_scaler_config = lr_scaler\n",
+      " \n",
+      "     def _scale_lr(self, config: Config) -> None:\n",
+      "-        runner = cast(Trainer, self._runner)\n",
+      "+        runner = cast(Trainer, self.runner)\n",
+      "         assert runner.dataloader.batch_size is not None\n",
+      "         base_batch_size = config.base_batch_size\n",
+      "         batch_size = get_world_size() * runner.dataloader.batch_size\n",
+      "diff --git a/todd/runners/callbacks/monitor.py b/todd/runners/callbacks/monitor.py\n",
+      "index 98aabf2..0895477 100644\n",
+      "--- a/todd/runners/callbacks/monitor.py\n",
+      "+++ b/todd/runners/callbacks/monitor.py\n",
+      "@@ -37,8 +37,8 @@ class MonitorCallback(BaseCallback):\n",
+      "     ) -> None:\n",
+      "         super().run_iter_context(exit_stack, batch, memo)\n",
+      "         context = Context(\n",
+      "-            self._runner.logger,\n",
+      "-            iter_=self._runner.iter_,\n",
+      "+            self.runner.logger,\n",
+      "+            iter_=self.runner.iter_,\n",
+      "             batch=batch,\n",
+      "             memo=memo,\n",
+      "         )\n",
+      "diff --git a/todd/runners/callbacks/tensorboard.py b/todd/runners/callbacks/tensorboard.py\n",
+      "index 65eb9bc..804f907 100644\n",
+      "--- a/todd/runners/callbacks/tensorboard.py\n",
+      "+++ b/todd/runners/callbacks/tensorboard.py\n",
+      "@@ -31,7 +31,7 @@ class TensorBoardCallback(IntervalMixin, BaseCallback):\n",
+      "         super().init(*args, **kwargs)\n",
+      "         if get_rank() > 0:\n",
+      "             return\n",
+      "-        log_dir = self._runner.work_dir / 'tensorboard'\n",
+      "+        log_dir = self.runner.work_dir / 'tensorboard'\n",
+      "         self._summary_writer = SummaryWriter(\n",
+      "             log_dir,\n",
+      "             **self._summary_writer_config,\n",
+      "diff --git a/todd/runners/epoch_based_trainer.py b/todd/runners/epoch_based_trainer.py\n",
+      "index bc46346..8358d00 100644\n",
+      "--- a/todd/runners/epoch_based_trainer.py\n",
+      "+++ b/todd/runners/epoch_based_trainer.py\n",
+      "@@ -18,7 +18,7 @@ T = TypeVar('T', bound=nn.Module)\n",
+      " \n",
+      " \n",
+      " @RunnerRegistry.register_()\n",
+      "-class EpochBasedTrainer(Trainer):\n",
+      "+class EpochBasedTrainer(Trainer[T]):\n",
+      " \n",
+      "     def __init__(self, *args, epochs: int, **kwargs) -> None:\n",
+      "         super().__init__(*args, **kwargs)\n",
       "diff --git a/todd/runners/iter_based_trainer.py b/todd/runners/iter_based_trainer.py\n",
-      "index bc507f1..2f69a9d 100644\n",
+      "index 107c894..7a212fa 100644\n",
       "--- a/todd/runners/iter_based_trainer.py\n",
       "+++ b/todd/runners/iter_based_trainer.py\n",
-      "@@ -3,10 +3,12 @@ __all__ = [\n",
+      "@@ -16,7 +16,7 @@ T = TypeVar('T', bound=nn.Module)\n",
+      " \n",
+      " \n",
+      " @RunnerRegistry.register_()\n",
+      "-class IterBasedTrainer(Trainer):\n",
+      "+class IterBasedTrainer(Trainer[T]):\n",
+      " \n",
+      "     def __init__(self, *args, iters: int, **kwargs) -> None:\n",
+      "         super().__init__(*args, **kwargs)\n",
+      "diff --git a/todd/runners/strategies/base.py b/todd/runners/strategies/base.py\n",
+      "index a97d30f..d6c2a74 100644\n",
+      "--- a/todd/runners/strategies/base.py\n",
+      "+++ b/todd/runners/strategies/base.py\n",
+      "@@ -2,7 +2,7 @@ __all__ = [\n",
+      "     'BaseStrategy',\n",
       " ]\n",
       " \n",
-      " import itertools\n",
-      "-from typing import TypeVar\n",
-      "+from typing import Any, Generator, TypeVar\n",
+      "-from typing import Any, Generic, Mapping, TypeVar, cast\n",
+      "+from typing import Any, Mapping, TypeVar, cast\n",
       " \n",
+      " import torch\n",
       " from torch import nn\n",
+      "@@ -15,7 +15,7 @@ T = TypeVar('T', bound=nn.Module)\n",
       " \n",
-      "+from ..utils.torch import set_epoch\n",
-      "+\n",
-      " from ..base import RunnerRegistry\n",
-      " from .trainer import Trainer\n",
-      " from .types import Memo\n",
-      "@@ -26,10 +28,23 @@ class IterBasedTrainer(Trainer):\n",
-      "     def iters(self) -> int:\n",
-      "         return self._iters\n",
       " \n",
-      "+    def _iterate_dataloader(self) -> Generator[Any, None, None]:\n",
-      "+        if self.inner_iter > 0:\n",
-      "+            set_epoch(self._dataloader, self.epoch)\n",
-      "+            yield from itertools.islice(\n",
-      "+                self._dataloader,\n",
-      "+                self.inner_iter,\n",
-      "+                self.iters - self.iters_per_epoch * self.epoch,\n",
-      "+            )\n",
-      "+        while self._iter < self.iters:\n",
-      "+            assert self.inner_iter == 0\n",
-      "+            set_epoch(self._dataloader, self.epoch)\n",
-      "+            yield from itertools.islice(\n",
-      "+                self._dataloader,\n",
-      "+                self.iters - self._iter,\n",
-      "+            )\n",
-      "+\n",
-      "     def _setup(self) -> Memo:\n",
-      "         memo = super()._setup()\n",
-      "-        dataloader = memo['dataloader']\n",
-      "-        dataloader = itertools.cycle(dataloader)\n",
-      "-        dataloader = itertools.islice(dataloader, self._iters - self._iter)\n",
-      "-        memo['dataloader'] = dataloader\n",
-      "+        memo['dataloader'] = self._iterate_dataloader()\n",
-      "         return memo\n",
-      "diff --git a/todd/runners/trainer.py b/todd/runners/trainer.py\n",
-      "index 40ecbe6..449c8b7 100644\n",
-      "--- a/todd/runners/trainer.py\n",
-      "+++ b/todd/runners/trainer.py\n",
-      "@@ -2,6 +2,7 @@ __all__ = [\n",
-      "     'Trainer',\n",
-      " ]\n",
+      " @StrategyRegistry.register_()\n",
+      "-class BaseStrategy(RunnerHolderMixin, StateDictMixin, Generic[T]):\n",
+      "+class BaseStrategy(RunnerHolderMixin[T], StateDictMixin):\n",
+      " \n",
+      "     def __init__(\n",
+      "         self,\n",
+      "@@ -45,7 +45,7 @@ class BaseStrategy(RunnerHolderMixin, StateDictMixin, Generic[T]):\n",
       " \n",
-      "+from abc import ABC\n",
-      " from typing import Any, Mapping, TypeVar\n",
+      "     @property\n",
+      "     def module(self) -> nn.Module:\n",
+      "-        return self._runner.model\n",
+      "+        return self.runner.model\n",
       " \n",
-      " import torch\n",
-      "@@ -15,7 +16,19 @@ T = TypeVar('T', bound=nn.Module)\n",
+      "     def model_state_dict(self, *args, **kwargs) -> dict[str, Any]:\n",
+      "         return self.module.state_dict(*args, **kwargs)\n",
+      "@@ -62,7 +62,7 @@ class BaseStrategy(RunnerHolderMixin, StateDictMixin, Generic[T]):\n",
+      "             **kwargs,\n",
+      "         )\n",
+      "         if get_rank() == 0:\n",
+      "-            self._runner.logger.info(incompatible_keys)\n",
+      "+            self.runner.logger.info(incompatible_keys)\n",
       " \n",
+      "     def load_model_from(\n",
+      "         self,\n",
+      "@@ -77,7 +77,7 @@ class BaseStrategy(RunnerHolderMixin, StateDictMixin, Generic[T]):\n",
+      "         model_state_dict = dict()\n",
+      "         for f_ in f_list:\n",
+      "             if get_rank() == 0:\n",
+      "-                self._runner.logger.info(\"Loading model from %s\", f_)\n",
+      "+                self.runner.logger.info(\"Loading model from %s\", f_)\n",
+      "             model_state_dict.update(torch.load(f_, 'cpu'))\n",
+      "         self.load_model_state_dict(model_state_dict, *args, **kwargs)\n",
       " \n",
-      " @RunnerRegistry.register_()\n",
-      "-class Trainer(BaseRunner[T]):\n",
-      "+class Trainer(BaseRunner[T], ABC):\n",
+      "diff --git a/todd/runners/strategies/ddp.py b/todd/runners/strategies/ddp.py\n",
+      "index 3367c2a..e2e9fd6 100644\n",
+      "--- a/todd/runners/strategies/ddp.py\n",
+      "+++ b/todd/runners/strategies/ddp.py\n",
+      "@@ -23,4 +23,4 @@ class DDPStrategy(CUDAStrategy[T]):\n",
+      " \n",
+      "     @property\n",
+      "     def module(self) -> nn.Module:\n",
+      "-        return self._runner.model.module\n",
+      "+        return self.runner.model.module\n",
+      "diff --git a/todd/runners/strategies/fsdp.py b/todd/runners/strategies/fsdp.py\n",
+      "index fa71105..42eb809 100644\n",
+      "--- a/todd/runners/strategies/fsdp.py\n",
+      "+++ b/todd/runners/strategies/fsdp.py\n",
+      "@@ -26,13 +26,13 @@ class FSDPStrategy(CUDAStrategy[T]):\n",
+      " \n",
+      "     @property\n",
+      "     def module(self) -> nn.Module:\n",
+      "-        return self._runner.model.module\n",
+      "+        return self.runner.model.module\n",
+      " \n",
+      "     def build_optimizer(self, config: Config) -> torch.optim.Optimizer:\n",
+      "-        return OptimizerRegistry.build(config, model=self._runner.model)\n",
+      "+        return OptimizerRegistry.build(config, model=self.runner.model)\n",
+      " \n",
+      "     def model_state_dict(self, *args, **kwargs) -> dict[str, Any]:\n",
+      "-        return self._runner.model.state_dict(*args, **kwargs)\n",
+      "+        return self.runner.model.state_dict(*args, **kwargs)\n",
+      " \n",
+      "     def load_model_state_dict(\n",
+      "         self,\n",
+      "@@ -40,7 +40,7 @@ class FSDPStrategy(CUDAStrategy[T]):\n",
+      "         *args,\n",
+      "         **kwargs,\n",
+      "     ) -> None:\n",
+      "-        self._runner.model.load_state_dict(state_dict, *args, **kwargs)\n",
+      "+        self.runner.model.load_state_dict(state_dict, *args, **kwargs)\n",
+      " \n",
+      "     def optim_state_dict(\n",
+      "         self,\n",
+      "diff --git a/todd/runners/utils.py b/todd/runners/utils.py\n",
+      "index 0f25783..9c72605 100644\n",
+      "--- a/todd/runners/utils.py\n",
+      "+++ b/todd/runners/utils.py\n",
+      "@@ -2,42 +2,45 @@ __all__ = [\n",
+      "     'RunnerHolderMixin',\n",
+      " ]\n",
+      " \n",
+      "-import weakref\n",
+      "-from typing import cast\n",
+      "+from typing import TypeVar\n",
+      " \n",
+      "+from torch import nn\n",
       "+\n",
-      "+    @property\n",
-      "+    def iters_per_epoch(self) -> int:\n",
-      "+        return len(self._dataloader)\n",
+      "+from ..utils import HolderMixin\n",
+      " from .base import BaseRunner\n",
+      " from .epoch_based_trainer import EpochBasedTrainer\n",
+      " from .iter_based_trainer import IterBasedTrainer\n",
+      " from .trainer import Trainer\n",
+      " from .validator import Validator\n",
+      " \n",
+      "+T = TypeVar('T', bound=nn.Module)\n",
       "+\n",
-      "+    @property\n",
-      "+    def inner_iter(self) -> int:\n",
-      "+        return self._iter % self.iters_per_epoch\n",
+      " \n",
+      "-class RunnerHolderMixin:\n",
+      "+class RunnerHolderMixin(HolderMixin[BaseRunner[T]]):\n",
+      " \n",
+      "-    def __init__(self, *args, runner: BaseRunner, **kwargs) -> None:\n",
+      "-        super().__init__(*args, **kwargs)\n",
+      "-        runner_proxy = (\n",
+      "-            runner if isinstance(runner, weakref.ProxyTypes) else\n",
+      "-            weakref.proxy(runner)\n",
+      "-        )\n",
+      "-        self._runner = cast(BaseRunner, runner_proxy)\n",
+      "+    def __init__(self, *args, runner: BaseRunner[T], **kwargs) -> None:\n",
+      "+        super().__init__(*args, instance=runner, **kwargs)\n",
       "+\n",
       "+    @property\n",
-      "+    def epoch(self) -> int:\n",
-      "+        return self._iter // self.iters_per_epoch\n",
+      "+    def runner(self) -> BaseRunner[T]:\n",
+      "+        return self._instance\n",
       " \n",
       "     @property\n",
-      "     def optimizer(self) -> torch.optim.Optimizer:\n",
-      "diff --git a/todd/runners/validator.py b/todd/runners/validator.py\n",
-      "index a70132a..c55f18b 100644\n",
-      "--- a/todd/runners/validator.py\n",
-      "+++ b/todd/runners/validator.py\n",
-      "@@ -16,9 +16,15 @@ T = TypeVar('T', bound=nn.Module)\n",
-      " @RunnerRegistry.register_()\n",
-      " class Validator(BaseRunner[T]):\n",
+      "-    def trainer(self) -> Trainer:\n",
+      "-        assert isinstance(self._runner, Trainer)\n",
+      "-        return self._runner\n",
+      "+    def trainer(self) -> Trainer[T]:\n",
+      "+        assert isinstance(self._instance, Trainer)\n",
+      "+        return self._instance\n",
       " \n",
-      "+    @property\n",
-      "+    def iters(self) -> int:\n",
-      "+        return len(self._dataloader)\n",
-      "+\n",
-      "     def _setup(self) -> Memo:\n",
-      "         self._model.eval()\n",
-      "-        return super()._setup()\n",
-      "+        memo = super()._setup()\n",
-      "+        memo['dataloader'] = self._dataloader\n",
-      "+        return memo\n",
+      "     @property\n",
+      "-    def validator(self) -> Validator:\n",
+      "-        assert isinstance(self._runner, Validator)\n",
+      "-        return self._runner\n",
+      "+    def validator(self) -> Validator[T]:\n",
+      "+        assert isinstance(self._instance, Validator)\n",
+      "+        return self._instance\n",
       " \n",
-      "     @torch.no_grad()\n",
-      "     def run(self) -> Memo:\n",
-      "diff --git a/todd/utils/torch.py b/todd/utils/torch.py\n",
-      "index 05ec37b..f574616 100644\n",
-      "--- a/todd/utils/torch.py\n",
-      "+++ b/todd/utils/torch.py\n",
-      "@@ -5,6 +5,7 @@ __all__ = [\n",
-      "     'all_gather',\n",
-      "     'all_gather_',\n",
-      "     'all_sync',\n",
-      "+    'set_epoch',\n",
-      "     'Shape',\n",
-      "     'ModuleList',\n",
-      "     'ModuleDict',\n",
-      "@@ -20,6 +21,7 @@ from typing import TYPE_CHECKING\n",
-      " import torch\n",
-      " import torch.distributed as dist\n",
-      " from torch import nn\n",
-      "+from torch.utils.data import DataLoader\n",
+      "     @property\n",
+      "-    def iter_based_trainer(self) -> IterBasedTrainer:\n",
+      "-        assert isinstance(self._runner, IterBasedTrainer)\n",
+      "-        return self._runner\n",
+      "+    def iter_based_trainer(self) -> IterBasedTrainer[T]:\n",
+      "+        assert isinstance(self._instance, IterBasedTrainer)\n",
+      "+        return self._instance\n",
       " \n",
+      "     @property\n",
+      "-    def epoch_based_trainer(self) -> EpochBasedTrainer:\n",
+      "-        assert isinstance(self._runner, EpochBasedTrainer)\n",
+      "-        return self._runner\n",
+      "+    def epoch_based_trainer(self) -> EpochBasedTrainer[T]:\n",
+      "+        assert isinstance(self._instance, EpochBasedTrainer)\n",
+      "+        return self._instance\n",
+      "diff --git a/todd/utils/__init__.py b/todd/utils/__init__.py\n",
+      "index 85d1d65..199b7df 100644\n",
+      "--- a/todd/utils/__init__.py\n",
+      "+++ b/todd/utils/__init__.py\n",
+      "@@ -1,3 +1,4 @@\n",
+      "+from .constants import *\n",
+      " from .enums import *\n",
+      " from .generic_tensors import *\n",
+      " from .metas import *\n",
+      "diff --git a/todd/utils/mixins.py b/todd/utils/mixins.py\n",
+      "index 9574ea1..3e9a3cc 100644\n",
+      "--- a/todd/utils/mixins.py\n",
+      "+++ b/todd/utils/mixins.py\n",
+      "@@ -1,8 +1,12 @@\n",
+      " __all__ = [\n",
+      "     'StateDictMixin',\n",
+      "+    'HolderMixin',\n",
+      " ]\n",
       " \n",
-      " def get_rank(*args, **kwargs) -> int:\n",
-      "@@ -101,6 +103,18 @@ def all_sync(x: torch.Tensor) -> bool:\n",
-      "     return torch.allclose(x, x_prime)\n",
+      "-from typing import Any, Mapping\n",
+      "+import weakref\n",
+      "+from typing import Any, Generic, Mapping, TypeVar, cast\n",
+      "+\n",
+      "+T = TypeVar('T')\n",
       " \n",
       " \n",
-      "+def set_epoch(dataloader: DataLoader, epoch: int) -> None:\n",
-      "+    samplers = [\n",
-      "+        dataloader.sampler,\n",
-      "+        dataloader.batch_sampler,\n",
-      "+        getattr(dataloader.batch_sampler, 'sampler', None),\n",
-      "+    ]\n",
-      "+    for sampler in samplers:\n",
-      "+        set_epoch_ = getattr(sampler, 'set_epoch', None)\n",
-      "+        if set_epoch_ is not None:\n",
-      "+            set_epoch_(epoch)\n",
-      "+\n",
-      "+\n",
-      " class Shape:\n",
+      " class StateDictMixin:\n",
+      "@@ -19,4 +23,12 @@ class StateDictMixin:\n",
+      "         pass\n",
       " \n",
-      "     @classmethod\n"
+      " \n",
+      "-# TODO: define holder mixin\n",
+      "+class HolderMixin(Generic[T]):\n",
+      "+\n",
+      "+    def __init__(self, *args, instance: T, **kwargs) -> None:\n",
+      "+        super().__init__(*args, **kwargs)\n",
+      "+        instance_proxy = (\n",
+      "+            instance if isinstance(instance, weakref.ProxyTypes) else\n",
+      "+            weakref.proxy(instance)\n",
+      "+        )\n",
+      "+        self._instance = cast(T, instance_proxy)\n"
      ]
     }
    ],
@@ -972,15 +1204,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:10,779 35174:140704541179520][base.py:57 todd.IterBasedTrainer.optimize_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,784 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([5, 8]), 'y': tensor([10, 16])} weight=0.000 loss=13.000\n",
-      "[2024-02-23 18:27:10,786 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([9, 1]), 'y': tensor([18,  2])} weight=0.032 loss=9.838\n",
-      "[2024-02-23 18:27:10,787 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.057 loss=8.741\n",
-      "[2024-02-23 18:27:10,789 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([10,  2]), 'y': tensor([20,  4])} weight=0.080 loss=11.520\n",
-      "[2024-02-23 18:27:10,791 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([7, 4]), 'y': tensor([14,  8])} weight=0.110 loss=10.395\n",
-      "[2024-02-23 18:27:10,794 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([ 8, 10]), 'y': tensor([16, 20])} weight=0.138 loss=16.763\n",
-      "[2024-02-23 18:27:10,796 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([3, 5]), 'y': tensor([ 6, 10])} weight=0.183 loss=7.270\n",
-      "[2024-02-23 18:27:10,798 35174:140704541179520][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([2, 1]), 'y': tensor([4, 2])} weight=0.203 loss=2.696\n"
+      "\u001b[2m[2024-03-14 12:18:09,033 62058:140704275689088][base.py:56 todd.IterBasedTrainer.optimize_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,037 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([4, 7]), 'y': tensor([ 8, 14])} weight=0.000 loss=11.000\n",
+      "[2024-03-14 12:18:09,040 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.027 loss=8.876\n",
+      "[2024-03-14 12:18:09,042 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([1, 2]), 'y': tensor([2, 4])} weight=0.050 loss=2.925\n",
+      "[2024-03-14 12:18:09,043 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([ 5, 10]), 'y': tensor([10, 20])} weight=0.057 loss=14.569\n",
+      "[2024-03-14 12:18:09,045 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([8, 9]), 'y': tensor([16, 18])} weight=0.095 loss=16.193\n",
+      "[2024-03-14 12:18:09,047 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([7, 2]), 'y': tensor([14,  4])} weight=0.138 loss=8.381\n",
+      "[2024-03-14 12:18:09,049 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([ 8, 10]), 'y': tensor([16, 20])} weight=0.160 loss=16.560\n",
+      "[2024-03-14 12:18:09,050 62058:140704275689088][log.py:93 todd.IterBasedTrainer.optimize_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([9, 5]), 'y': tensor([18, 10])} weight=0.205 loss=12.565\n"
      ]
     }
    ],
@@ -1021,15 +1253,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:10,810 35174:140704541179520][base.py:57 todd.IterBasedTrainer.lr_schedule_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,812 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.000 loss=9.000 lr=['1.667e-03']\n",
-      "[2024-02-23 18:27:10,814 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([2, 9]), 'y': tensor([ 4, 18])} weight=0.008 loss=10.959 lr=['2.333e-03']\n",
-      "[2024-02-23 18:27:10,815 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([7, 5]), 'y': tensor([14, 10])} weight=0.020 loss=11.878 lr=['3.000e-03']\n",
-      "[2024-02-23 18:27:10,817 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([8, 4]), 'y': tensor([16,  8])} weight=0.038 loss=11.770 lr=['3.667e-03']\n",
-      "[2024-02-23 18:27:10,819 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([ 1, 10]), 'y': tensor([ 2, 20])} weight=0.060 loss=10.668 lr=['4.333e-03']\n",
-      "[2024-02-23 18:27:10,820 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([10,  6]), 'y': tensor([20, 12])} weight=0.084 loss=15.327 lr=['5.000e-03']\n",
-      "[2024-02-23 18:27:10,822 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([4, 7]), 'y': tensor([ 8, 14])} weight=0.124 loss=10.317 lr=['5.000e-03']\n",
-      "[2024-02-23 18:27:10,823 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([1, 9]), 'y': tensor([ 2, 18])} weight=0.152 loss=9.242 lr=['5.000e-03']\n"
+      "\u001b[2m[2024-03-14 12:18:09,063 62058:140704275689088][base.py:56 todd.IterBasedTrainer.lr_schedule_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,066 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([6, 2]), 'y': tensor([12,  4])} weight=0.000 loss=8.000 lr=['1.667e-03']\n",
+      "[2024-03-14 12:18:09,068 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([ 9, 10]), 'y': tensor([18, 20])} weight=0.007 loss=18.937 lr=['2.333e-03']\n",
+      "[2024-03-14 12:18:09,070 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([3, 8]), 'y': tensor([ 6, 16])} weight=0.029 loss=10.841 lr=['3.000e-03']\n",
+      "[2024-03-14 12:18:09,073 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.045 loss=11.728 lr=['3.667e-03']\n",
+      "[2024-03-14 12:18:09,074 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.067 loss=4.832 lr=['4.333e-03']\n",
+      "[2024-03-14 12:18:09,077 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([1, 9]), 'y': tensor([ 2, 18])} weight=0.078 loss=9.609 lr=['5.000e-03']\n",
+      "[2024-03-14 12:18:09,079 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.103 loss=4.742 lr=['5.000e-03']\n",
+      "[2024-03-14 12:18:09,080 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([7, 8]), 'y': tensor([14, 16])} weight=0.116 loss=14.132 lr=['5.000e-03']\n"
      ]
     }
    ],
@@ -1069,22 +1301,22 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:10,836 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.lr_schedule_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,836 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [1/5]\n",
-      "[2024-02-23 18:27:10,839 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [1/10] batch={'x': tensor([2, 1]), 'y': tensor([4, 2])} weight=0.000 loss=3.000 lr=['1.667e-03']\n",
-      "[2024-02-23 18:27:10,841 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [2/10] batch={'x': tensor([4, 3]), 'y': tensor([8, 6])} weight=0.002 loss=6.991 lr=['1.667e-03']\n",
-      "[2024-02-23 18:27:10,842 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [2/5]\n",
-      "[2024-02-23 18:27:10,844 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [3/10] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.008 loss=4.979 lr=['2.778e-03']\n",
-      "[2024-02-23 18:27:10,846 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [4/10] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.015 loss=4.962 lr=['2.778e-03']\n",
-      "[2024-02-23 18:27:10,847 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [3/5]\n",
-      "[2024-02-23 18:27:10,849 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [5/10] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.022 loss=4.944 lr=['3.889e-03']\n",
-      "[2024-02-23 18:27:10,851 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [6/10] batch={'x': tensor([2, 3]), 'y': tensor([4, 6])} weight=0.032 loss=4.920 lr=['3.889e-03']\n",
-      "[2024-02-23 18:27:10,852 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [4/5]\n",
-      "[2024-02-23 18:27:10,854 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [7/10] batch={'x': tensor([2, 1]), 'y': tensor([4, 2])} weight=0.042 loss=2.938 lr=['5.000e-03']\n",
-      "[2024-02-23 18:27:10,856 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [8/10] batch={'x': tensor([4, 3]), 'y': tensor([8, 6])} weight=0.049 loss=6.828 lr=['5.000e-03']\n",
-      "[2024-02-23 18:27:10,857 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [5/5]\n",
-      "[2024-02-23 18:27:10,859 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [9/10] batch={'x': tensor([2, 1]), 'y': tensor([4, 2])} weight=0.067 loss=2.900 lr=['5.000e-03']\n",
-      "[2024-02-23 18:27:10,861 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [10/10] batch={'x': tensor([3, 4]), 'y': tensor([6, 8])} weight=0.074 loss=6.740 lr=['5.000e-03']\n"
+      "\u001b[2m[2024-03-14 12:18:09,092 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.lr_schedule_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,094 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [1/5]\n",
+      "[2024-03-14 12:18:09,096 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [1/10] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.000 loss=5.000 lr=['1.667e-03']\n",
+      "[2024-03-14 12:18:09,098 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [2/10] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.004 loss=4.990 lr=['1.667e-03']\n",
+      "[2024-03-14 12:18:09,099 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [2/5]\n",
+      "[2024-03-14 12:18:09,101 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [3/10] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.008 loss=4.979 lr=['2.778e-03']\n",
+      "[2024-03-14 12:18:09,102 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [4/10] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.015 loss=4.962 lr=['2.778e-03']\n",
+      "[2024-03-14 12:18:09,104 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [3/5]\n",
+      "[2024-03-14 12:18:09,106 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [5/10] batch={'x': tensor([1, 2]), 'y': tensor([2, 4])} weight=0.022 loss=2.967 lr=['3.889e-03']\n",
+      "[2024-03-14 12:18:09,108 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [6/10] batch={'x': tensor([3, 4]), 'y': tensor([6, 8])} weight=0.028 loss=6.902 lr=['3.889e-03']\n",
+      "[2024-03-14 12:18:09,109 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [4/5]\n",
+      "[2024-03-14 12:18:09,112 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [7/10] batch={'x': tensor([1, 2]), 'y': tensor([2, 4])} weight=0.042 loss=2.938 lr=['5.000e-03']\n",
+      "[2024-03-14 12:18:09,113 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [8/10] batch={'x': tensor([4, 3]), 'y': tensor([8, 6])} weight=0.049 loss=6.828 lr=['5.000e-03']\n",
+      "[2024-03-14 12:18:09,114 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.lr_schedule_callback before_run_epoch] INFO: Epoch [5/5]\n",
+      "[2024-03-14 12:18:09,117 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [9/10] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.067 loss=5.800 lr=['5.000e-03']\n",
+      "[2024-03-14 12:18:09,119 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.lr_schedule_callback after_run_iter] INFO: Iter [10/10] batch={'x': tensor([1, 3]), 'y': tensor([2, 6])} weight=0.082 loss=3.837 lr=['5.000e-03']\n"
      ]
     }
    ],
@@ -1132,16 +1364,16 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:10,874 35174:140704541179520][lr.py:93 todd.IterBasedTrainer.lr_scale_callback _scale_lr] INFO: base_batch_size=1 batch_size=2 lr_scaler=2.000\n",
-      "\u001b[2m[2024-02-23 18:27:10,875 35174:140704541179520][base.py:57 todd.IterBasedTrainer.lr_scale_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,877 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([3, 7]), 'y': tensor([ 6, 14])} weight=0.000 loss=10.000\n",
-      "[2024-02-23 18:27:10,879 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([4, 9]), 'y': tensor([ 8, 18])} weight=0.050 loss=12.675\n",
-      "[2024-02-23 18:27:10,881 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([1, 6]), 'y': tensor([ 2, 12])} weight=0.115 loss=6.598\n",
-      "[2024-02-23 18:27:10,882 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([ 2, 10]), 'y': tensor([ 4, 20])} weight=0.150 loss=11.100\n",
-      "[2024-02-23 18:27:10,884 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.210 loss=11.635\n",
-      "[2024-02-23 18:27:10,885 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([7, 1]), 'y': tensor([14,  2])} weight=0.275 loss=6.900\n",
-      "[2024-02-23 18:27:10,887 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([8, 3]), 'y': tensor([16,  6])} weight=0.315 loss=9.267\n",
-      "[2024-02-23 18:27:10,888 35174:140704541179520][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([6, 9]), 'y': tensor([12, 18])} weight=0.370 loss=12.225\n"
+      "[2024-03-14 12:18:09,131 62058:140704275689088][lr.py:93 todd.IterBasedTrainer.lr_scale_callback _scale_lr] INFO: base_batch_size=1 batch_size=2 lr_scaler=2.000\n",
+      "\u001b[2m[2024-03-14 12:18:09,132 62058:140704275689088][base.py:56 todd.IterBasedTrainer.lr_scale_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,134 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([3, 4]), 'y': tensor([6, 8])} weight=0.000 loss=7.000\n",
+      "[2024-03-14 12:18:09,136 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([6, 7]), 'y': tensor([12, 14])} weight=0.035 loss=12.773\n",
+      "[2024-03-14 12:18:09,138 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([10,  2]), 'y': tensor([20,  4])} weight=0.100 loss=11.400\n",
+      "[2024-03-14 12:18:09,140 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([8, 9]), 'y': tensor([16, 18])} weight=0.160 loss=15.640\n",
+      "[2024-03-14 12:18:09,142 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([5, 1]), 'y': tensor([10,  2])} weight=0.245 loss=5.265\n",
+      "[2024-03-14 12:18:09,145 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([6, 8]), 'y': tensor([12, 16])} weight=0.275 loss=12.075\n",
+      "[2024-03-14 12:18:09,146 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([10,  3]), 'y': tensor([20,  6])} weight=0.345 loss=10.757\n",
+      "[2024-03-14 12:18:09,148 62058:140704275689088][log.py:93 todd.IterBasedTrainer.lr_scale_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.410 loss=4.770\n"
      ]
     }
    ],
@@ -1188,23 +1420,29 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:10,910 35174:140704541179520][base.py:57 todd.IterBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:10,912 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.000 loss=5.000\n",
-      "[2024-02-23 18:27:10,913 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_1\n",
-      "[2024-02-23 18:27:10,917 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([6, 7]), 'y': tensor([12, 14])} weight=0.012 loss=12.919\n",
-      "[2024-02-23 18:27:10,918 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_2\n",
-      "[2024-02-23 18:27:10,922 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([ 9, 10]), 'y': tensor([18, 20])} weight=0.045 loss=18.572\n",
-      "[2024-02-23 18:27:10,923 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_3\n",
-      "[2024-02-23 18:27:10,928 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.093 loss=12.399\n",
-      "[2024-02-23 18:27:10,930 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_4\n",
-      "[2024-02-23 18:27:10,936 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.125 loss=4.688\n",
-      "[2024-02-23 18:27:10,937 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_5\n",
-      "[2024-02-23 18:27:10,941 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.138 loss=8.381\n",
-      "[2024-02-23 18:27:10,942 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_6\n",
-      "[2024-02-23 18:27:10,946 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([4, 7]), 'y': tensor([ 8, 14])} weight=0.160 loss=10.120\n",
-      "[2024-02-23 18:27:10,947 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_7\n",
-      "[2024-02-23 18:27:10,951 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([ 2, 10]), 'y': tensor([ 4, 20])} weight=0.188 loss=10.875\n",
-      "[2024-02-23 18:27:10,952 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_8\n"
+      "\u001b[2m[2024-03-14 12:18:09,174 62058:140704275689088][base.py:56 todd.IterBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,177 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/8] batch={'x': tensor([6, 5]), 'y': tensor([12, 10])} weight=0.000 loss=11.000\n",
+      "[2024-03-14 12:18:09,195 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_1\n",
+      "[2024-03-14 12:18:09,224 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/8] batch={'x': tensor([2, 8]), 'y': tensor([ 4, 16])} weight=0.027 loss=9.863\n",
+      "[2024-03-14 12:18:09,233 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_2\n",
+      "[2024-03-14 12:18:09,241 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/8] batch={'x': tensor([ 3, 10]), 'y': tensor([ 6, 20])} weight=0.052 loss=12.659\n",
+      "[2024-03-14 12:18:09,243 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_3\n",
+      "[2024-03-14 12:18:09,251 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/8] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.085 loss=4.787\n",
+      "[2024-03-14 12:18:09,258 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_4\n",
+      "[2024-03-14 12:18:09,262 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/8] batch={'x': tensor([9, 7]), 'y': tensor([18, 14])} weight=0.097 loss=15.220\n",
+      "[2024-03-14 12:18:09,264 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_5\n",
+      "[2024-03-14 12:18:09,268 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([9, 7]), 'y': tensor([18, 14])} weight=0.137 loss=14.900\n",
+      "[2024-03-14 12:18:09,269 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_6\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[2024-03-14 12:18:09,274 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([1, 3]), 'y': tensor([2, 6])} weight=0.177 loss=3.645\n",
+      "[2024-03-14 12:18:09,276 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_7\n",
+      "[2024-03-14 12:18:09,281 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([5, 8]), 'y': tensor([10, 16])} weight=0.187 loss=11.781\n",
+      "[2024-03-14 12:18:09,282 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_8\n"
      ]
     },
     {
@@ -1212,7 +1450,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460\u001b[0m\n",
       "└── \u001b[1;36mcheckpoint_callback\u001b[0m\n",
       "    └── \u001b[1;36mcheckpoints\u001b[0m\n",
       "        ├── \u001b[1;36miter_1\u001b[0m\n",
@@ -1273,15 +1511,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:11,413 35174:140704541179520][checkpoint.py:54 todd.IterBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_5\n",
-      "[2024-02-23 18:27:11,420 35174:140704541179520][base.py:65 todd.IterBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
-      "\u001b[2m[2024-02-23 18:27:11,421 35174:140704541179520][base.py:57 todd.IterBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:11,424 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([3, 7]), 'y': tensor([ 6, 14])} weight=0.138 loss=9.312\n",
-      "[2024-02-23 18:27:11,425 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_6\n",
-      "[2024-02-23 18:27:11,429 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([2, 6]), 'y': tensor([ 4, 12])} weight=0.162 loss=7.350\n",
-      "[2024-02-23 18:27:11,430 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_7\n",
-      "[2024-02-23 18:27:11,434 35174:140704541179520][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([8, 4]), 'y': tensor([16,  8])} weight=0.182 loss=10.905\n",
-      "[2024-02-23 18:27:11,435 35174:140704541179520][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp5g2sg2lp/checkpoint_callback/checkpoints/iter_8\n"
+      "[2024-03-14 12:18:09,715 62058:140704275689088][checkpoint.py:54 todd.IterBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_5\n",
+      "[2024-03-14 12:18:09,718 62058:140704275689088][base.py:65 todd.IterBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
+      "\u001b[2m[2024-03-14 12:18:09,719 62058:140704275689088][base.py:56 todd.IterBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,722 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/8] batch={'x': tensor([10,  1]), 'y': tensor([20,  2])} weight=0.137 loss=10.244\n",
+      "[2024-03-14 12:18:09,723 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_6\n",
+      "[2024-03-14 12:18:09,727 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/8] batch={'x': tensor([4, 2]), 'y': tensor([8, 4])} weight=0.165 loss=5.505\n",
+      "[2024-03-14 12:18:09,728 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_7\n",
+      "[2024-03-14 12:18:09,731 62058:140704275689088][log.py:93 todd.IterBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/8] batch={'x': tensor([6, 9]), 'y': tensor([12, 18])} weight=0.180 loss=13.650\n",
+      "[2024-03-14 12:18:09,732 62058:140704275689088][checkpoint.py:80 todd.IterBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpkhq8r460/checkpoint_callback/checkpoints/iter_8\n"
      ]
     },
     {
@@ -1364,32 +1602,32 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:11,475 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:11,476 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [1/3]\n",
-      "[2024-02-23 18:27:11,478 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/15] batch={'x': tensor([10,  1]), 'y': tensor([20,  2])} weight=0.000 loss=11.000\n",
-      "[2024-02-23 18:27:11,480 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/15] batch={'x': tensor([5, 3]), 'y': tensor([10,  6])} weight=0.027 loss=7.890\n",
-      "[2024-02-23 18:27:11,481 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_2\n",
-      "[2024-02-23 18:27:11,484 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/15] batch={'x': tensor([7, 4]), 'y': tensor([14,  8])} weight=0.047 loss=10.739\n",
-      "[2024-02-23 18:27:11,486 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/15] batch={'x': tensor([9, 2]), 'y': tensor([18,  4])} weight=0.075 loss=10.588\n",
-      "[2024-02-23 18:27:11,487 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_4\n",
-      "[2024-02-23 18:27:11,491 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/15] batch={'x': tensor([6, 8]), 'y': tensor([12, 16])} weight=0.103 loss=13.283\n",
-      "[2024-02-23 18:27:11,492 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:27:11,494 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/15] batch={'x': tensor([7, 2]), 'y': tensor([14,  4])} weight=0.138 loss=8.381\n",
-      "[2024-02-23 18:27:11,495 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_6\n",
-      "[2024-02-23 18:27:11,499 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/15] batch={'x': tensor([ 3, 10]), 'y': tensor([ 6, 20])} weight=0.160 loss=11.960\n",
-      "[2024-02-23 18:27:11,500 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/15] batch={'x': tensor([4, 6]), 'y': tensor([ 8, 12])} weight=0.192 loss=9.038\n",
-      "[2024-02-23 18:27:11,502 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_8\n",
-      "[2024-02-23 18:27:11,506 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.218 loss=11.586\n",
-      "[2024-02-23 18:27:11,508 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([9, 1]), 'y': tensor([18,  2])} weight=0.250 loss=8.750\n",
-      "[2024-02-23 18:27:11,509 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_10\n",
-      "[2024-02-23 18:27:11,512 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:11,514 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([1, 8]), 'y': tensor([ 2, 16])} weight=0.275 loss=7.763\n",
-      "[2024-02-23 18:27:11,516 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([2, 6]), 'y': tensor([ 4, 12])} weight=0.298 loss=6.810\n",
-      "[2024-02-23 18:27:11,516 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_12\n",
-      "[2024-02-23 18:27:11,521 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.318 loss=10.095\n",
-      "[2024-02-23 18:27:11,522 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([ 4, 10]), 'y': tensor([ 8, 20])} weight=0.348 loss=11.567\n",
-      "[2024-02-23 18:27:11,523 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_14\n",
-      "[2024-02-23 18:27:11,527 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.383 loss=9.705\n"
+      "\u001b[2m[2024-03-14 12:18:09,769 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:09,769 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [1/3]\n",
+      "[2024-03-14 12:18:09,772 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/15] batch={'x': tensor([5, 3]), 'y': tensor([10,  6])} weight=0.000 loss=8.000\n",
+      "[2024-03-14 12:18:09,774 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/15] batch={'x': tensor([2, 6]), 'y': tensor([ 4, 12])} weight=0.020 loss=7.920\n",
+      "[2024-03-14 12:18:09,775 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_2\n",
+      "[2024-03-14 12:18:09,779 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/15] batch={'x': tensor([4, 7]), 'y': tensor([ 8, 14])} weight=0.040 loss=10.780\n",
+      "[2024-03-14 12:18:09,781 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/15] batch={'x': tensor([10,  1]), 'y': tensor([20,  2])} weight=0.067 loss=10.629\n",
+      "[2024-03-14 12:18:09,782 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_4\n",
+      "[2024-03-14 12:18:09,786 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/15] batch={'x': tensor([9, 8]), 'y': tensor([18, 16])} weight=0.095 loss=16.193\n",
+      "[2024-03-14 12:18:09,787 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:18:09,789 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.138 loss=11.175\n",
+      "[2024-03-14 12:18:09,790 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_6\n",
+      "[2024-03-14 12:18:09,794 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/15] batch={'x': tensor([1, 6]), 'y': tensor([ 2, 12])} weight=0.168 loss=6.414\n",
+      "[2024-03-14 12:18:09,796 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.185 loss=10.890\n",
+      "[2024-03-14 12:18:09,797 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_8\n",
+      "[2024-03-14 12:18:09,823 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([8, 2]), 'y': tensor([16,  4])} weight=0.215 loss=8.925\n",
+      "[2024-03-14 12:18:09,825 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.240 loss=12.320\n",
+      "[2024-03-14 12:18:09,833 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_10\n",
+      "[2024-03-14 12:18:09,842 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:09,844 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([5, 9]), 'y': tensor([10, 18])} weight=0.275 loss=12.075\n",
+      "[2024-03-14 12:18:09,846 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([7, 2]), 'y': tensor([14,  4])} weight=0.310 loss=7.605\n",
+      "[2024-03-14 12:18:09,857 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_12\n",
+      "[2024-03-14 12:18:09,861 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.333 loss=7.504\n",
+      "[2024-03-14 12:18:09,863 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([ 1, 10]), 'y': tensor([ 2, 20])} weight=0.355 loss=9.048\n",
+      "[2024-03-14 12:18:09,864 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_14\n",
+      "[2024-03-14 12:18:09,868 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([8, 4]), 'y': tensor([16,  8])} weight=0.383 loss=9.705\n"
      ]
     },
     {
@@ -1397,7 +1635,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9\u001b[0m\n",
       "└── \u001b[1;36mcheckpoint_callback\u001b[0m\n",
       "    └── \u001b[1;36mcheckpoints\u001b[0m\n",
       "        ├── \u001b[1;36miter_10\u001b[0m\n",
@@ -1452,21 +1690,21 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:11,956 35174:140704541179520][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_8\n",
-      "[2024-02-23 18:27:11,959 35174:140704541179520][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
-      "\u001b[2m[2024-02-23 18:27:11,960 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:11,961 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:27:11,964 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([6, 4]), 'y': tensor([12,  8])} weight=0.218 loss=8.913\n",
-      "[2024-02-23 18:27:11,966 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([ 1, 10]), 'y': tensor([ 2, 20])} weight=0.243 loss=9.666\n",
-      "[2024-02-23 18:27:11,967 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_10\n",
-      "[2024-02-23 18:27:11,970 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:11,972 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([10,  7]), 'y': tensor([20, 14])} weight=0.270 loss=14.705\n",
-      "[2024-02-23 18:27:11,973 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([6, 8]), 'y': tensor([12, 16])} weight=0.312 loss=11.812\n",
-      "[2024-02-23 18:27:11,974 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_12\n",
-      "[2024-02-23 18:27:11,978 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([1, 9]), 'y': tensor([ 2, 18])} weight=0.347 loss=8.262\n",
-      "[2024-02-23 18:27:11,980 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([4, 2]), 'y': tensor([8, 4])} weight=0.373 loss=4.883\n",
-      "[2024-02-23 18:27:11,982 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_14\n",
-      "[2024-02-23 18:27:11,987 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([5, 3]), 'y': tensor([10,  6])} weight=0.387 loss=6.450\n"
+      "[2024-03-14 12:18:10,289 62058:140704275689088][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_8\n",
+      "[2024-03-14 12:18:10,293 62058:140704275689088][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
+      "\u001b[2m[2024-03-14 12:18:10,294 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:10,295 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:18:10,298 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([3, 8]), 'y': tensor([ 6, 16])} weight=0.215 loss=9.818\n",
+      "[2024-03-14 12:18:10,300 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([5, 4]), 'y': tensor([10,  8])} weight=0.243 loss=7.909\n",
+      "[2024-03-14 12:18:10,300 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_10\n",
+      "[2024-03-14 12:18:10,303 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:10,305 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([7, 4]), 'y': tensor([14,  8])} weight=0.265 loss=9.542\n",
+      "[2024-03-14 12:18:10,307 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([10,  2]), 'y': tensor([20,  4])} weight=0.293 loss=10.245\n",
+      "[2024-03-14 12:18:10,307 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_12\n",
+      "[2024-03-14 12:18:10,311 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([6, 3]), 'y': tensor([12,  6])} weight=0.323 loss=7.549\n",
+      "[2024-03-14 12:18:10,312 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([5, 9]), 'y': tensor([10, 18])} weight=0.345 loss=11.585\n",
+      "[2024-03-14 12:18:10,313 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_14\n",
+      "[2024-03-14 12:18:10,316 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([1, 8]), 'y': tensor([ 2, 16])} weight=0.380 loss=7.290\n"
      ]
     },
     {
@@ -1482,17 +1720,17 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:12,442 35174:140704541179520][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_10\n",
-      "[2024-02-23 18:27:12,445 35174:140704541179520][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
-      "\u001b[2m[2024-02-23 18:27:12,446 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:12,447 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:12,450 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([6, 2]), 'y': tensor([12,  4])} weight=0.270 loss=6.920\n",
-      "[2024-02-23 18:27:12,452 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.290 loss=10.260\n",
-      "[2024-02-23 18:27:12,453 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_12\n",
-      "[2024-02-23 18:27:12,463 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.320 loss=11.760\n",
-      "[2024-02-23 18:27:12,465 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([3, 8]), 'y': tensor([ 6, 16])} weight=0.355 loss=9.047\n",
-      "[2024-02-23 18:27:12,466 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp46q6bkgj/checkpoint_callback/checkpoints/iter_14\n",
-      "[2024-02-23 18:27:12,470 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([1, 9]), 'y': tensor([ 2, 18])} weight=0.383 loss=8.087\n"
+      "[2024-03-14 12:18:10,730 62058:140704275689088][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_10\n",
+      "[2024-03-14 12:18:10,734 62058:140704275689088][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
+      "\u001b[2m[2024-03-14 12:18:10,734 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:10,736 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:10,740 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([7, 6]), 'y': tensor([14, 12])} weight=0.265 loss=11.278\n",
+      "[2024-03-14 12:18:10,742 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.298 loss=4.256\n",
+      "[2024-03-14 12:18:10,743 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_12\n",
+      "[2024-03-14 12:18:10,748 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.310 loss=10.985\n",
+      "[2024-03-14 12:18:10,750 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([ 2, 10]), 'y': tensor([ 4, 20])} weight=0.343 loss=9.945\n",
+      "[2024-03-14 12:18:10,751 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpn0pz4ax9/checkpoint_callback/checkpoints/iter_14\n",
+      "[2024-03-14 12:18:10,756 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.373 loss=9.765\n"
      ]
     }
    ],
@@ -1553,28 +1791,28 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:12,498 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:12,499 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [1/3]\n",
-      "[2024-02-23 18:27:12,501 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/15] batch={'x': tensor([3, 1]), 'y': tensor([6, 2])} weight=0.000 loss=4.000\n",
-      "[2024-02-23 18:27:12,503 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/15] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.010 loss=12.935\n",
-      "[2024-02-23 18:27:12,505 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/15] batch={'x': tensor([2, 7]), 'y': tensor([ 4, 14])} weight=0.042 loss=8.809\n",
-      "[2024-02-23 18:27:12,507 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/15] batch={'x': tensor([ 6, 10]), 'y': tensor([12, 20])} weight=0.065 loss=15.480\n",
-      "[2024-02-23 18:27:12,509 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/15] batch={'x': tensor([9, 4]), 'y': tensor([18,  8])} weight=0.105 loss=12.318\n",
-      "[2024-02-23 18:27:12,510 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu/checkpoint_callback/checkpoints/epoch_1\n",
-      "[2024-02-23 18:27:12,513 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:27:12,515 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/15] batch={'x': tensor([1, 5]), 'y': tensor([ 2, 10])} weight=0.137 loss=5.588\n",
-      "[2024-02-23 18:27:12,517 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/15] batch={'x': tensor([10,  2]), 'y': tensor([20,  4])} weight=0.152 loss=11.085\n",
-      "[2024-02-23 18:27:12,519 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/15] batch={'x': tensor([4, 8]), 'y': tensor([ 8, 16])} weight=0.182 loss=10.905\n",
-      "[2024-02-23 18:27:12,521 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([3, 7]), 'y': tensor([ 6, 14])} weight=0.212 loss=8.938\n",
-      "[2024-02-23 18:27:12,523 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([6, 9]), 'y': tensor([12, 18])} weight=0.237 loss=13.219\n",
-      "[2024-02-23 18:27:12,525 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu/checkpoint_callback/checkpoints/epoch_2\n",
-      "[2024-02-23 18:27:12,528 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:12,530 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([ 1, 10]), 'y': tensor([ 2, 20])} weight=0.275 loss=9.488\n",
-      "[2024-02-23 18:27:12,532 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([4, 6]), 'y': tensor([ 8, 12])} weight=0.302 loss=8.488\n",
-      "[2024-02-23 18:27:12,534 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([8, 9]), 'y': tensor([16, 18])} weight=0.327 loss=14.216\n",
-      "[2024-02-23 18:27:12,535 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([7, 5]), 'y': tensor([14, 10])} weight=0.370 loss=9.780\n",
-      "[2024-02-23 18:27:12,537 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.400 loss=4.000\n",
-      "[2024-02-23 18:27:12,538 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu/checkpoint_callback/checkpoints/epoch_3\n"
+      "\u001b[2m[2024-03-14 12:18:10,784 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:10,785 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [1/3]\n",
+      "[2024-03-14 12:18:10,789 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [1/15] batch={'x': tensor([4, 5]), 'y': tensor([ 8, 10])} weight=0.000 loss=9.000\n",
+      "[2024-03-14 12:18:10,793 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [2/15] batch={'x': tensor([3, 6]), 'y': tensor([ 6, 12])} weight=0.022 loss=8.899\n",
+      "[2024-03-14 12:18:10,797 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [3/15] batch={'x': tensor([ 9, 10]), 'y': tensor([18, 20])} weight=0.045 loss=18.572\n",
+      "[2024-03-14 12:18:10,799 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [4/15] batch={'x': tensor([8, 2]), 'y': tensor([16,  4])} weight=0.093 loss=9.538\n",
+      "[2024-03-14 12:18:10,802 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [5/15] batch={'x': tensor([1, 7]), 'y': tensor([ 2, 14])} weight=0.117 loss=7.530\n",
+      "[2024-03-14 12:18:10,803 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz/checkpoint_callback/checkpoints/epoch_1\n",
+      "[2024-03-14 12:18:10,808 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:18:10,812 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [6/15] batch={'x': tensor([4, 1]), 'y': tensor([8, 2])} weight=0.138 loss=4.656\n",
+      "[2024-03-14 12:18:10,816 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [7/15] batch={'x': tensor([7, 5]), 'y': tensor([14, 10])} weight=0.150 loss=11.100\n",
+      "[2024-03-14 12:18:10,820 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [8/15] batch={'x': tensor([3, 9]), 'y': tensor([ 6, 18])} weight=0.180 loss=10.920\n",
+      "[2024-03-14 12:18:10,822 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [9/15] batch={'x': tensor([6, 2]), 'y': tensor([12,  4])} weight=0.210 loss=7.160\n",
+      "[2024-03-14 12:18:10,824 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [10/15] batch={'x': tensor([10,  8]), 'y': tensor([20, 16])} weight=0.230 loss=15.930\n",
+      "[2024-03-14 12:18:10,826 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz/checkpoint_callback/checkpoints/epoch_2\n",
+      "[2024-03-14 12:18:10,829 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:10,833 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([2, 9]), 'y': tensor([ 4, 18])} weight=0.275 loss=9.488\n",
+      "[2024-03-14 12:18:10,835 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([6, 5]), 'y': tensor([12, 10])} weight=0.303 loss=9.336\n",
+      "[2024-03-14 12:18:10,837 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([3, 8]), 'y': tensor([ 6, 16])} weight=0.330 loss=9.185\n",
+      "[2024-03-14 12:18:10,839 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([ 4, 10]), 'y': tensor([ 8, 20])} weight=0.358 loss=11.497\n",
+      "[2024-03-14 12:18:10,841 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([7, 1]), 'y': tensor([14,  2])} weight=0.393 loss=6.430\n",
+      "[2024-03-14 12:18:10,843 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz/checkpoint_callback/checkpoints/epoch_3\n"
      ]
     },
     {
@@ -1582,7 +1820,7 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu\u001b[0m\n",
+      "\u001b[1;36m/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz\u001b[0m\n",
       "└── \u001b[1;36mcheckpoint_callback\u001b[0m\n",
       "    └── \u001b[1;36mcheckpoints\u001b[0m\n",
       "        ├── \u001b[1;36mepoch_1\u001b[0m\n",
@@ -1613,16 +1851,16 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "[2024-02-23 18:27:12,968 35174:140704541179520][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu/checkpoint_callback/checkpoints/epoch_2\n",
-      "[2024-02-23 18:27:12,974 35174:140704541179520][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
-      "\u001b[2m[2024-02-23 18:27:12,976 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:12,978 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:12,982 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([1, 8]), 'y': tensor([ 2, 16])} weight=0.275 loss=7.763\n",
-      "[2024-02-23 18:27:12,984 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([2, 7]), 'y': tensor([ 4, 14])} weight=0.297 loss=7.661\n",
-      "[2024-02-23 18:27:12,986 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.320 loss=10.080\n",
-      "[2024-02-23 18:27:12,989 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([5, 6]), 'y': tensor([10, 12])} weight=0.350 loss=9.075\n",
-      "[2024-02-23 18:27:12,991 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.377 loss=11.358\n",
-      "[2024-02-23 18:27:12,992 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp8jm3x0vu/checkpoint_callback/checkpoints/epoch_3\n"
+      "[2024-03-14 12:18:11,264 62058:140704275689088][checkpoint.py:54 todd.EpochBasedTrainer.checkpoint_callback init] INFO: Loading from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz/checkpoint_callback/checkpoints/epoch_2\n",
+      "[2024-03-14 12:18:11,267 62058:140704275689088][base.py:65 todd.EpochBasedTrainer.checkpoint_callback load_model_state_dict] INFO: <All keys matched successfully>\n",
+      "\u001b[2m[2024-03-14 12:18:11,268 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.checkpoint_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:11,269 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.checkpoint_callback before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:11,279 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [11/15] batch={'x': tensor([6, 2]), 'y': tensor([12,  4])} weight=0.275 loss=6.900\n",
+      "[2024-03-14 12:18:11,291 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [12/15] batch={'x': tensor([3, 5]), 'y': tensor([ 6, 10])} weight=0.295 loss=6.820\n",
+      "[2024-03-14 12:18:11,305 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [13/15] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.315 loss=4.212\n",
+      "[2024-03-14 12:18:11,312 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [14/15] batch={'x': tensor([7, 9]), 'y': tensor([14, 18])} weight=0.328 loss=13.380\n",
+      "[2024-03-14 12:18:11,315 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.checkpoint_callback after_run_iter] INFO: Iter [15/15] batch={'x': tensor([10,  8]), 'y': tensor([20, 16])} weight=0.368 loss=14.693\n",
+      "[2024-03-14 12:18:11,317 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.checkpoint_callback _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmplwjo14oz/checkpoint_callback/checkpoints/epoch_3\n"
      ]
     }
    ],
@@ -1700,15 +1938,15 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:13,032 35174:140704541179520][base.py:57 todd.FaultyValidator.monitor_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "\u001b[1;31m[2024-02-23 18:27:13,035 35174:140704541179520][monitor.py:26 todd.FaultyValidator.monitor_callback __exit__] ERROR: Unable to run iter_=1\n",
+      "\u001b[2m[2024-03-14 12:18:11,358 62058:140704275689088][base.py:56 todd.FaultyValidator.monitor_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "\u001b[1;31m[2024-03-14 12:18:11,359 62058:140704275689088][monitor.py:26 todd.FaultyValidator.monitor_callback __exit__] ERROR: Unable to run iter_=1\n",
       "batch={'x': tensor([1]), 'y': tensor([2])}\n",
-      "memo={'dataloader': <torch.utils.data.dataloader.DataLoader object at 0x14c1dc050>}\n",
+      "memo={'dataloader': <torch.utils.data.dataloader.DataLoader object at 0x152023210>}\n",
       "Traceback (most recent call last):\n",
-      "  File \"/Users/bytedance/.local/share/virtualenvs/todd-ARrcnwyq/lib/python3.11/site-packages/todd/runners/base.py\", line 255, in _run\n",
+      "  File \"/Users/bytedance/.local/share/virtualenvs/todd-ARrcnwyq/lib/python3.11/site-packages/todd/runners/base.py\", line 246, in _run\n",
       "    memo = self._run_iter(batch, memo)\n",
       "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/ipykernel_35174/1715875531.py\", line 5, in _run_iter\n",
+      "  File \"/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/ipykernel_62058/1715875531.py\", line 5, in _run_iter\n",
       "    raise CustomError(\"faulty runner\")\n",
       "CustomError: faulty runner\u001b[m\n"
      ]
@@ -1718,15 +1956,15 @@
      "output_type": "stream",
      "text": [
       "\n",
-      "[2024-02-23 18:27:13,032 35174:140704541179520][base.py:57 todd.FaultyValidator.monitor_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\n",
-      "[2024-02-23 18:27:13,035 35174:140704541179520][monitor.py:26 todd.FaultyValidator.monitor_callback __exit__] ERROR: Unable to run iter_=1\n",
+      "[2024-03-14 12:18:11,358 62058:140704275689088][base.py:56 todd.FaultyValidator.monitor_callback __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\n",
+      "[2024-03-14 12:18:11,359 62058:140704275689088][monitor.py:26 todd.FaultyValidator.monitor_callback __exit__] ERROR: Unable to run iter_=1\n",
       "batch={'x': tensor([1]), 'y': tensor([2])}\n",
-      "memo={'dataloader': <torch.utils.data.dataloader.DataLoader object at 0x14c1dc050>}\n",
+      "memo={'dataloader': <torch.utils.data.dataloader.DataLoader object at 0x152023210>}\n",
       "Traceback (most recent call last):\n",
-      "  File \"/Users/bytedance/.local/share/virtualenvs/todd-ARrcnwyq/lib/python3.11/site-packages/todd/runners/base.py\", line 255, in _run\n",
+      "  File \"/Users/bytedance/.local/share/virtualenvs/todd-ARrcnwyq/lib/python3.11/site-packages/todd/runners/base.py\", line 246, in _run\n",
       "    memo = self._run_iter(batch, memo)\n",
       "           ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
-      "  File \"/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/ipykernel_35174/1715875531.py\", line 5, in _run_iter\n",
+      "  File \"/var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/ipykernel_62058/1715875531.py\", line 5, in _run_iter\n",
       "    raise CustomError(\"faulty runner\")\n",
       "CustomError: faulty runner\n"
      ]
@@ -1780,28 +2018,28 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:13,341 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.strategy_load_model_from __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:13,342 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [1/3]\n",
-      "[2024-02-23 18:27:13,346 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [1/15] batch={'x': tensor([1, 3]), 'y': tensor([2, 6])} weight=0.000 loss=4.000\n",
-      "[2024-02-23 18:27:13,347 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [2/15] batch={'x': tensor([6, 2]), 'y': tensor([12,  4])} weight=0.010 loss=7.960\n",
-      "[2024-02-23 18:27:13,349 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [3/15] batch={'x': tensor([10,  8]), 'y': tensor([20, 16])} weight=0.030 loss=17.730\n",
-      "[2024-02-23 18:27:13,351 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [4/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.075 loss=11.550\n",
-      "[2024-02-23 18:27:13,367 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [5/15] batch={'x': tensor([4, 9]), 'y': tensor([ 8, 18])} weight=0.105 loss=12.318\n",
-      "[2024-02-23 18:27:13,385 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_1\n",
-      "[2024-02-23 18:27:13,414 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:27:13,419 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [6/15] batch={'x': tensor([5, 2]), 'y': tensor([10,  4])} weight=0.137 loss=6.519\n",
-      "[2024-02-23 18:27:13,420 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [7/15] batch={'x': tensor([6, 8]), 'y': tensor([12, 16])} weight=0.155 loss=12.915\n",
-      "[2024-02-23 18:27:13,422 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [8/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.190 loss=10.860\n",
-      "[2024-02-23 18:27:13,424 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [9/15] batch={'x': tensor([7, 1]), 'y': tensor([14,  2])} weight=0.220 loss=7.120\n",
-      "[2024-02-23 18:27:13,426 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [10/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.240 loss=12.320\n",
-      "[2024-02-23 18:27:13,427 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_2\n",
-      "[2024-02-23 18:27:13,430 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:13,432 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [11/15] batch={'x': tensor([ 3, 10]), 'y': tensor([ 6, 20])} weight=0.275 loss=11.212\n",
-      "[2024-02-23 18:27:13,434 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [12/15] batch={'x': tensor([7, 4]), 'y': tensor([14,  8])} weight=0.307 loss=9.309\n",
-      "[2024-02-23 18:27:13,436 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [13/15] batch={'x': tensor([2, 5]), 'y': tensor([ 4, 10])} weight=0.335 loss=5.827\n",
-      "[2024-02-23 18:27:13,438 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [14/15] batch={'x': tensor([9, 6]), 'y': tensor([18, 12])} weight=0.352 loss=12.356\n",
-      "[2024-02-23 18:27:13,440 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [15/15] batch={'x': tensor([8, 1]), 'y': tensor([16,  2])} weight=0.390 loss=7.245\n",
-      "[2024-02-23 18:27:13,441 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_3\n"
+      "\u001b[2m[2024-03-14 12:18:11,674 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.strategy_load_model_from __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:11,675 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [1/3]\n",
+      "[2024-03-14 12:18:11,679 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [1/15] batch={'x': tensor([2, 5]), 'y': tensor([ 4, 10])} weight=0.000 loss=7.000\n",
+      "[2024-03-14 12:18:11,682 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [2/15] batch={'x': tensor([ 7, 10]), 'y': tensor([14, 20])} weight=0.018 loss=16.851\n",
+      "[2024-03-14 12:18:11,684 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [3/15] batch={'x': tensor([1, 3]), 'y': tensor([2, 6])} weight=0.060 loss=3.880\n",
+      "[2024-03-14 12:18:11,687 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [4/15] batch={'x': tensor([8, 4]), 'y': tensor([16,  8])} weight=0.070 loss=11.580\n",
+      "[2024-03-14 12:18:11,689 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [5/15] batch={'x': tensor([6, 9]), 'y': tensor([12, 18])} weight=0.100 loss=14.250\n",
+      "[2024-03-14 12:18:11,691 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_1\n",
+      "[2024-03-14 12:18:11,694 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:18:11,697 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [6/15] batch={'x': tensor([9, 4]), 'y': tensor([18,  8])} weight=0.138 loss=12.106\n",
+      "[2024-03-14 12:18:11,700 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [7/15] batch={'x': tensor([ 5, 10]), 'y': tensor([10, 20])} weight=0.170 loss=13.725\n",
+      "[2024-03-14 12:18:11,702 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [8/15] batch={'x': tensor([3, 2]), 'y': tensor([6, 4])} weight=0.207 loss=4.481\n",
+      "[2024-03-14 12:18:11,704 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [9/15] batch={'x': tensor([8, 6]), 'y': tensor([16, 12])} weight=0.220 loss=12.460\n",
+      "[2024-03-14 12:18:11,706 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [10/15] batch={'x': tensor([7, 1]), 'y': tensor([14,  2])} weight=0.255 loss=6.980\n",
+      "[2024-03-14 12:18:11,708 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_2\n",
+      "[2024-03-14 12:18:11,711 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:11,713 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [11/15] batch={'x': tensor([6, 1]), 'y': tensor([12,  2])} weight=0.275 loss=6.038\n",
+      "[2024-03-14 12:18:11,714 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [12/15] batch={'x': tensor([7, 9]), 'y': tensor([14, 18])} weight=0.293 loss=13.660\n",
+      "[2024-03-14 12:18:11,716 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [13/15] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.333 loss=10.839\n",
+      "[2024-03-14 12:18:11,718 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [14/15] batch={'x': tensor([ 4, 10]), 'y': tensor([ 8, 20])} weight=0.365 loss=11.445\n",
+      "[2024-03-14 12:18:11,720 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [15/15] batch={'x': tensor([2, 3]), 'y': tensor([4, 6])} weight=0.400 loss=4.000\n",
+      "[2024-03-14 12:18:11,721 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_3\n"
      ]
     },
     {
@@ -1817,30 +2055,30 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[2m[2024-02-23 18:27:13,865 35174:140704541179520][base.py:57 todd.EpochBasedTrainer.strategy_load_model_from __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
-      "[2024-02-23 18:27:13,866 35174:140704541179520][base.py:80 todd.EpochBasedTrainer.strategy_load_model_from load_model_from] INFO: Loading model from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_2/model.pth\n",
-      "[2024-02-23 18:27:13,868 35174:140704541179520][base.py:65 todd.EpochBasedTrainer.strategy_load_model_from load_model_state_dict] INFO: <All keys matched successfully>\n",
-      "[2024-02-23 18:27:13,869 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [1/3]\n",
-      "[2024-02-23 18:27:13,873 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [1/15] batch={'x': tensor([7, 2]), 'y': tensor([14,  4])} weight=0.275 loss=7.762\n",
-      "[2024-02-23 18:27:13,875 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [2/15] batch={'x': tensor([ 6, 10]), 'y': tensor([12, 20])} weight=0.297 loss=13.620\n",
-      "[2024-02-23 18:27:13,877 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [3/15] batch={'x': tensor([1, 4]), 'y': tensor([2, 8])} weight=0.337 loss=4.156\n",
-      "[2024-02-23 18:27:13,881 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [4/15] batch={'x': tensor([8, 9]), 'y': tensor([16, 18])} weight=0.350 loss=14.025\n",
-      "[2024-02-23 18:27:13,883 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [5/15] batch={'x': tensor([5, 3]), 'y': tensor([10,  6])} weight=0.392 loss=6.430\n",
-      "[2024-02-23 18:27:13,884 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_1\n",
-      "[2024-02-23 18:27:13,888 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [2/3]\n",
-      "[2024-02-23 18:27:13,890 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [6/15] batch={'x': tensor([10,  1]), 'y': tensor([20,  2])} weight=0.412 loss=8.731\n",
-      "[2024-02-23 18:27:13,891 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [7/15] batch={'x': tensor([5, 4]), 'y': tensor([10,  8])} weight=0.440 loss=7.020\n",
-      "[2024-02-23 18:27:13,894 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [8/15] batch={'x': tensor([3, 9]), 'y': tensor([ 6, 18])} weight=0.462 loss=9.225\n",
-      "[2024-02-23 18:27:13,896 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [9/15] batch={'x': tensor([6, 7]), 'y': tensor([12, 14])} weight=0.492 loss=9.799\n",
-      "[2024-02-23 18:27:13,898 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [10/15] batch={'x': tensor([2, 8]), 'y': tensor([ 4, 16])} weight=0.525 loss=7.375\n",
-      "[2024-02-23 18:27:13,899 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_2\n",
-      "[2024-02-23 18:27:13,902 35174:140704541179520][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [3/3]\n",
-      "[2024-02-23 18:27:13,905 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [11/15] batch={'x': tensor([8, 5]), 'y': tensor([16, 10])} weight=0.550 loss=9.425\n",
-      "[2024-02-23 18:27:13,907 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [12/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.582 loss=8.505\n",
-      "[2024-02-23 18:27:13,910 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [13/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.612 loss=9.712\n",
-      "[2024-02-23 18:27:13,912 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [14/15] batch={'x': tensor([1, 6]), 'y': tensor([ 2, 12])} weight=0.647 loss=4.734\n",
-      "[2024-02-23 18:27:13,914 35174:140704541179520][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [15/15] batch={'x': tensor([2, 7]), 'y': tensor([ 4, 14])} weight=0.665 loss=6.008\n",
-      "[2024-02-23 18:27:13,915 35174:140704541179520][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmp91d3imrz/strategy_load_model_from/checkpoints/epoch_3\n"
+      "\u001b[2m[2024-03-14 12:18:12,135 62058:140704275689088][base.py:56 todd.EpochBasedTrainer.strategy_load_model_from __init__] DEBUG: Rank 0 initialized by bytedance@C02G870SMD6R\u001b[m\n",
+      "[2024-03-14 12:18:12,135 62058:140704275689088][base.py:80 todd.EpochBasedTrainer.strategy_load_model_from load_model_from] INFO: Loading model from /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_2/model.pth\n",
+      "[2024-03-14 12:18:12,138 62058:140704275689088][base.py:65 todd.EpochBasedTrainer.strategy_load_model_from load_model_state_dict] INFO: <All keys matched successfully>\n",
+      "[2024-03-14 12:18:12,139 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [1/3]\n",
+      "[2024-03-14 12:18:12,142 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [1/15] batch={'x': tensor([10,  1]), 'y': tensor([20,  2])} weight=0.275 loss=9.488\n",
+      "[2024-03-14 12:18:12,143 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [2/15] batch={'x': tensor([5, 3]), 'y': tensor([10,  6])} weight=0.303 loss=6.790\n",
+      "[2024-03-14 12:18:12,145 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [3/15] batch={'x': tensor([9, 6]), 'y': tensor([18, 12])} weight=0.323 loss=12.581\n",
+      "[2024-03-14 12:18:12,146 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [4/15] batch={'x': tensor([2, 4]), 'y': tensor([4, 8])} weight=0.360 loss=4.920\n",
+      "[2024-03-14 12:18:12,148 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [5/15] batch={'x': tensor([8, 7]), 'y': tensor([16, 14])} weight=0.375 loss=12.188\n",
+      "[2024-03-14 12:18:12,149 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_1\n",
+      "[2024-03-14 12:18:12,153 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [2/3]\n",
+      "[2024-03-14 12:18:12,154 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [6/15] batch={'x': tensor([8, 1]), 'y': tensor([16,  2])} weight=0.412 loss=7.144\n",
+      "[2024-03-14 12:18:12,156 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [7/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.435 loss=9.390\n",
+      "[2024-03-14 12:18:12,157 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [8/15] batch={'x': tensor([10,  4]), 'y': tensor([20,  8])} weight=0.465 loss=10.745\n",
+      "[2024-03-14 12:18:12,159 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [9/15] batch={'x': tensor([2, 6]), 'y': tensor([ 4, 12])} weight=0.500 loss=6.000\n",
+      "[2024-03-14 12:18:12,161 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [10/15] batch={'x': tensor([5, 7]), 'y': tensor([10, 14])} weight=0.520 loss=8.880\n",
+      "[2024-03-14 12:18:12,162 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_2\n",
+      "[2024-03-14 12:18:12,164 62058:140704275689088][log.py:99 todd.EpochBasedTrainer.strategy_load_model_from before_run_epoch] INFO: Epoch [3/3]\n",
+      "[2024-03-14 12:18:12,166 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [11/15] batch={'x': tensor([ 6, 10]), 'y': tensor([12, 20])} weight=0.550 loss=11.600\n",
+      "[2024-03-14 12:18:12,167 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [12/15] batch={'x': tensor([7, 1]), 'y': tensor([14,  2])} weight=0.590 loss=5.640\n",
+      "[2024-03-14 12:18:12,169 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [13/15] batch={'x': tensor([9, 3]), 'y': tensor([18,  6])} weight=0.610 loss=8.340\n",
+      "[2024-03-14 12:18:12,170 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [14/15] batch={'x': tensor([4, 2]), 'y': tensor([8, 4])} weight=0.640 loss=4.080\n",
+      "[2024-03-14 12:18:12,172 62058:140704275689088][log.py:93 todd.EpochBasedTrainer.strategy_load_model_from after_run_iter] INFO: Iter [15/15] batch={'x': tensor([5, 8]), 'y': tensor([10, 16])} weight=0.655 loss=8.743\n",
+      "[2024-03-14 12:18:12,173 62058:140704275689088][checkpoint.py:80 todd.EpochBasedTrainer.strategy_load_model_from _save] INFO: Saving state dict to /var/folders/v_/1kkfntxs5z74_rwvy1f3_mp80000gn/T/tmpj2udcidm/strategy_load_model_from/checkpoints/epoch_3\n"
      ]
     }
    ],