diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml
index df4a109841..84c8ac4b74 100644
--- a/.github/workflows/build_wheel.yml
+++ b/.github/workflows/build_wheel.yml
@@ -136,6 +136,41 @@ jobs:
           tags: ${{ steps.meta.outputs.tags }}
           labels: ${{ steps.meta.outputs.labels }}
 
+  build_pypi_index:
+    needs: [build_wheels, build_sdist]
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/download-artifact@v3
+        with:
+          name: artifact
+          path: dist/packages
+      - uses: actions/setup-python@v4
+        name: Install Python
+        with:
+          python-version: '3.11'
+      - run: pip install dumb-pypi
+      - run: |
+          ls dist/packages > package_list.txt
+          dumb-pypi --output-dir dist --packages-url ../../packages --package-list package_list.txt --title "DeePMD-kit Developed Packages"
+      - name: Upload Pages artifact
+        uses: actions/upload-pages-artifact@v2
+        with:
+          path: dist
+  deploy_pypi_index:
+    needs: build_pypi_index
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    if: github.event_name == 'push' && github.ref == 'refs/heads/devel' && github.repository_owner == 'deepmodeling'
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v2
+
   pass:
     name: Pass testing build wheels
     needs: [build_wheels, build_sdist]
diff --git a/.github/workflows/test_cc.yml b/.github/workflows/test_cc.yml
index fa37009730..a98afa7a94 100644
--- a/.github/workflows/test_cc.yml
+++ b/.github/workflows/test_cc.yml
@@ -30,7 +30,7 @@ jobs:
     # TODO: remove ase version when ase has new release
     - run: |
         python -m pip install -U pip
-        python -m pip install -e .[cpu,test,lmp] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip"
+        python -m pip install -e .[cpu,test,lmp] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
         DP_BUILD_TESTING: 1
     - run: pytest --cov=deepmd source/lmp/tests
diff --git a/.github/workflows/test_cuda.yml b/.github/workflows/test_cuda.yml
index 7b95e6d37b..ca72a32277 100644
--- a/.github/workflows/test_cuda.yml
+++ b/.github/workflows/test_cuda.yml
@@ -36,7 +36,7 @@ jobs:
     - name: Set PyPI mirror for Aliyun cloud machine
       run: python -m pip config --user set global.index-url https://mirrors.aliyun.com/pypi/simple/
     - run: python -m pip install -U "pip>=21.3.1,!=23.0.0"
-    - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://github.com/rosswhitfield/ase/archive/edd03571aff6944b77b4a4b055239f3c3e4eeb66.zip"
+    - run: python -m pip install -v -e .[gpu,test,lmp,cu11] "ase @ https://gitlab.com/ase/ase/-/archive/8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f/ase-8c5aa5fd6448c5cfb517a014dccf2b214a9dfa8f.tar.gz"
       env:
         DP_BUILD_TESTING: 1
         DP_VARIANT: cuda
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 50a47499f1..e168af2c8d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,13 +30,13 @@ repos:
       exclude: ^source/3rdparty
 -   repo: https://github.com/astral-sh/ruff-pre-commit
     # Ruff version.
-    rev: v0.0.292
+    rev: v0.1.1
     hooks:
     - id: ruff
       args: ["--fix"]
       exclude: ^source/3rdparty
 -   repo: https://github.com/psf/black-pre-commit-mirror
-    rev: 23.9.1
+    rev: 23.10.0
     hooks:
     -   id: black-jupyter
         exclude: ^source/3rdparty
@@ -54,7 +54,7 @@ repos:
     -   id: blacken-docs
 # C++
 -   repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v16.0.6
+    rev: v17.0.3
     hooks:
     -   id: clang-format
         exclude: ^source/3rdparty|source/lib/src/gpu/cudart/.+\.inc
diff --git a/deepmd/descriptor/se_a.py b/deepmd/descriptor/se_a.py
index 8f0051cd4e..2de0b63245 100644
--- a/deepmd/descriptor/se_a.py
+++ b/deepmd/descriptor/se_a.py
@@ -469,13 +469,6 @@ def enable_compression(
                 "empty embedding-net are not supported in model compression!"
             )
 
-        for ii in range(len(self.filter_neuron) - 1):
-            if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
-                raise NotImplementedError(
-                    "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer."
-                    % ",".join([str(item) for item in self.filter_neuron])
-                )
         if self.stripped_type_embedding:
             ret_two_side = get_pattern_nodes_from_graph_def(
                 graph_def, f"filter_type_all{suffix}/.+_two_side_ebd"
diff --git a/deepmd/descriptor/se_atten.py b/deepmd/descriptor/se_atten.py
index 8f3be40596..8e4c3c3ef6 100644
--- a/deepmd/descriptor/se_atten.py
+++ b/deepmd/descriptor/se_atten.py
@@ -387,14 +387,6 @@ def enable_compression(
                 "empty embedding-net are not supported in model compression!"
             )
 
-        for ii in range(len(self.filter_neuron) - 1):
-            if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
-                raise NotImplementedError(
-                    "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer."
-                    % ",".join([str(item) for item in self.filter_neuron])
-                )
-
         if self.attn_layer != 0:
             raise RuntimeError("can not compress model when attention layer is not 0.")
 
diff --git a/deepmd/descriptor/se_r.py b/deepmd/descriptor/se_r.py
index fbc54a651f..ae926c339f 100644
--- a/deepmd/descriptor/se_r.py
+++ b/deepmd/descriptor/se_r.py
@@ -334,14 +334,6 @@ def enable_compression(
             not self.filter_resnet_dt
         ), "Model compression error: descriptor resnet_dt must be false!"
 
-        for ii in range(len(self.filter_neuron) - 1):
-            if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
-                raise NotImplementedError(
-                    "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer."
-                    % ",".join([str(item) for item in self.filter_neuron])
-                )
-
         self.compress = True
         self.table = DPTabulate(
             self,
diff --git a/deepmd/descriptor/se_t.py b/deepmd/descriptor/se_t.py
index 671dbd4e15..d0c9fcbc2e 100644
--- a/deepmd/descriptor/se_t.py
+++ b/deepmd/descriptor/se_t.py
@@ -349,14 +349,6 @@ def enable_compression(
             not self.filter_resnet_dt
         ), "Model compression error: descriptor resnet_dt must be false!"
 
-        for ii in range(len(self.filter_neuron) - 1):
-            if self.filter_neuron[ii] * 2 != self.filter_neuron[ii + 1]:
-                raise NotImplementedError(
-                    "Model Compression error: descriptor neuron [%s] is not supported by model compression! "
-                    "The size of the next layer of the neural network must be twice the size of the previous layer."
-                    % ",".join([str(item) for item in self.filter_neuron])
-                )
-
         self.compress = True
         self.table = DPTabulate(
             self,
diff --git a/deepmd/entrypoints/convert.py b/deepmd/entrypoints/convert.py
index 73906fe074..bea047ba72 100644
--- a/deepmd/entrypoints/convert.py
+++ b/deepmd/entrypoints/convert.py
@@ -5,6 +5,7 @@
     convert_12_to_21,
     convert_13_to_21,
     convert_20_to_21,
+    convert_pb_to_pbtxt,
     convert_pbtxt_to_pb,
     convert_to_21,
 )
@@ -17,20 +18,26 @@ def convert(
     output_model: str,
     **kwargs,
 ):
-    if FROM == "auto":
-        convert_to_21(input_model, output_model)
-    elif FROM == "0.12":
-        convert_012_to_21(input_model, output_model)
-    elif FROM == "1.0":
-        convert_10_to_21(input_model, output_model)
-    elif FROM in ["1.1", "1.2"]:
-        # no difference between 1.1 and 1.2
-        convert_12_to_21(input_model, output_model)
-    elif FROM == "1.3":
-        convert_13_to_21(input_model, output_model)
-    elif FROM == "2.0":
-        convert_20_to_21(input_model, output_model)
-    elif FROM == "pbtxt":
-        convert_pbtxt_to_pb(input_model, output_model)
+    if output_model[-6:] == ".pbtxt":
+        if input_model[-6:] != ".pbtxt":
+            convert_pb_to_pbtxt(input_model, output_model)
+        else:
+            raise RuntimeError("input model is already pbtxt")
     else:
-        raise RuntimeError("unsupported model version " + FROM)
+        if FROM == "auto":
+            convert_to_21(input_model, output_model)
+        elif FROM == "0.12":
+            convert_012_to_21(input_model, output_model)
+        elif FROM == "1.0":
+            convert_10_to_21(input_model, output_model)
+        elif FROM in ["1.1", "1.2"]:
+            # no difference between 1.1 and 1.2
+            convert_12_to_21(input_model, output_model)
+        elif FROM == "1.3":
+            convert_13_to_21(input_model, output_model)
+        elif FROM == "2.0":
+            convert_20_to_21(input_model, output_model)
+        elif FROM == "pbtxt":
+            convert_pbtxt_to_pb(input_model, output_model)
+        else:
+            raise RuntimeError("unsupported model version " + FROM)
diff --git a/deepmd/entrypoints/freeze.py b/deepmd/entrypoints/freeze.py
index 11e0d55645..22f3cb80b4 100755
--- a/deepmd/entrypoints/freeze.py
+++ b/deepmd/entrypoints/freeze.py
@@ -511,9 +511,13 @@ def freeze(
     # We import the meta graph and retrieve a Saver
     try:
         # In case paralle training
-        import horovod.tensorflow as _  # noqa: F401
+        import horovod.tensorflow as HVD
     except ImportError:
         pass
+    else:
+        HVD.init()
+        if HVD.rank() > 0:
+            return
     saver = tf.train.import_meta_graph(
         f"{input_checkpoint}.meta", clear_devices=clear_devices
     )
diff --git a/deepmd/entrypoints/train.py b/deepmd/entrypoints/train.py
index bd7a2ac7ec..9469b7df90 100755
--- a/deepmd/entrypoints/train.py
+++ b/deepmd/entrypoints/train.py
@@ -406,9 +406,7 @@ def get_nbor_stat(jdata, rcut, one_type: bool = False):
             tmp_data.get_batch()
             assert (
                 tmp_data.get_type_map()
-            ), "In multi-task mode, 'type_map.raw' must be defined in data systems {}! ".format(
-                systems
-            )
+            ), f"In multi-task mode, 'type_map.raw' must be defined in data systems {systems}! "
             if train_data is None:
                 train_data = tmp_data
             else:
diff --git a/deepmd/env.py b/deepmd/env.py
index 075e37446f..9b7f86f0d5 100644
--- a/deepmd/env.py
+++ b/deepmd/env.py
@@ -89,6 +89,7 @@ def dlopen_library(module: str, filename: str):
     "global_cvt_2_tf_float",
     "global_cvt_2_ener_float",
     "MODEL_VERSION",
+    "SHARED_LIB_DIR",
     "SHARED_LIB_MODULE",
     "default_tf_session_config",
     "reset_default_tf_session_config",
diff --git a/deepmd/fit/dos.py b/deepmd/fit/dos.py
index 9a7cb734e5..bbf7d39a09 100644
--- a/deepmd/fit/dos.py
+++ b/deepmd/fit/dos.py
@@ -168,7 +168,7 @@ def get_numb_fparam(self) -> int:
 
     def get_numb_aparam(self) -> int:
         """Get the number of atomic parameters."""
-        return self.numb_fparam
+        return self.numb_aparam
 
     def get_numb_dos(self) -> int:
         """Get the number of gridpoints in energy space."""
diff --git a/deepmd/fit/ener.py b/deepmd/fit/ener.py
index 61cf0ce40c..e74d4a7e6d 100644
--- a/deepmd/fit/ener.py
+++ b/deepmd/fit/ener.py
@@ -228,7 +228,7 @@ def get_numb_fparam(self) -> int:
 
     def get_numb_aparam(self) -> int:
         """Get the number of atomic parameters."""
-        return self.numb_fparam
+        return self.numb_aparam
 
     def compute_output_stats(self, all_stat: dict, mixed_type: bool = False) -> None:
         """Compute the ouput statistics.
diff --git a/deepmd/infer/deep_pot.py b/deepmd/infer/deep_pot.py
index 031c5de1bc..fc9a6a76ed 100644
--- a/deepmd/infer/deep_pot.py
+++ b/deepmd/infer/deep_pot.py
@@ -307,7 +307,10 @@ def _get_natoms_and_nframes(
             natoms = len(atom_types[0])
         else:
             natoms = len(atom_types)
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        if natoms == 0:
+            assert coords.size == 0
+        else:
+            coords = np.reshape(np.array(coords), [-1, natoms * 3])
         nframes = coords.shape[0]
         return natoms, nframes
 
@@ -415,7 +418,7 @@ def _prepare_feed_dict(
             atom_types = np.array(atom_types, dtype=int).reshape([-1, natoms])
         else:
             atom_types = np.array(atom_types, dtype=int).reshape([-1])
-        coords = np.reshape(np.array(coords), [-1, natoms * 3])
+        coords = np.reshape(np.array(coords), [nframes, natoms * 3])
         if cells is None:
             pbc = False
             # make cells to work around the requirement of pbc
diff --git a/deepmd/lmp.py b/deepmd/lmp.py
index fe08cc0a3b..5238cd9935 100644
--- a/deepmd/lmp.py
+++ b/deepmd/lmp.py
@@ -18,6 +18,7 @@
 )
 
 from deepmd.env import (
+    SHARED_LIB_DIR,
     TF_VERSION,
     tf,
 )
@@ -74,7 +75,7 @@ def get_library_path(module: str, filename: str) -> List[str]:
     raise RuntimeError("Unsupported platform")
 
 tf_dir = tf.sysconfig.get_lib()
-op_dir = str((Path(__file__).parent / "lib").absolute())
+op_dir = str(SHARED_LIB_DIR)
 
 
 cuda_library_paths = []
diff --git a/deepmd/model/dos.py b/deepmd/model/dos.py
index 2327d67c58..697fad9a9e 100644
--- a/deepmd/model/dos.py
+++ b/deepmd/model/dos.py
@@ -88,7 +88,7 @@ def get_numb_fparam(self) -> int:
 
     def get_numb_aparam(self) -> int:
         """Get the number of atomic parameters."""
-        return self.numb_fparam
+        return self.numb_aparam
 
     def data_stat(self, data):
         all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False)
diff --git a/deepmd/model/ener.py b/deepmd/model/ener.py
index e2eb8744dc..1976c1ad51 100644
--- a/deepmd/model/ener.py
+++ b/deepmd/model/ener.py
@@ -120,7 +120,7 @@ def get_numb_fparam(self) -> int:
 
     def get_numb_aparam(self) -> int:
         """Get the number of atomic parameters."""
-        return self.numb_fparam
+        return self.numb_aparam
 
     def data_stat(self, data):
         all_stat = make_stat_input(data, self.data_stat_nbatch, merge_sys=False)
diff --git a/deepmd/train/trainer.py b/deepmd/train/trainer.py
index 1f7b78045b..bbcb305404 100644
--- a/deepmd/train/trainer.py
+++ b/deepmd/train/trainer.py
@@ -368,12 +368,12 @@ def _build_network(self, data, suffix=""):
                 self.place_holders[kk] = tf.placeholder(
                     GLOBAL_TF_FLOAT_PRECISION, [None], "t_" + kk
                 )
-            self._get_place_horders(data_requirement)
+            self._get_place_holders(data_requirement)
         else:
             if not self.multi_task_mode:
-                self._get_place_horders(data.get_data_dict())
+                self._get_place_holders(data.get_data_dict())
             else:
-                self._get_place_horders(data[next(iter(data.keys()))].get_data_dict())
+                self._get_place_holders(data[next(iter(data.keys()))].get_data_dict())
 
         self.place_holders["type"] = tf.placeholder(tf.int32, [None], name="t_type")
         self.place_holders["natoms_vec"] = tf.placeholder(
@@ -1035,7 +1035,7 @@ def save_compressed(self):
         if self.is_compress:
             self.saver.save(self.sess, os.path.join(os.getcwd(), self.save_ckpt))
 
-    def _get_place_horders(self, data_dict):
+    def _get_place_holders(self, data_dict):
         for kk in data_dict.keys():
             if kk == "type":
                 continue
diff --git a/deepmd/utils/argcheck.py b/deepmd/utils/argcheck.py
index ae446ef348..7104eb1de4 100644
--- a/deepmd/utils/argcheck.py
+++ b/deepmd/utils/argcheck.py
@@ -56,7 +56,7 @@ def type_embedding_args():
     doc_trainable = "If the parameters in the embedding net are trainable"
 
     return [
-        Argument("neuron", list, optional=True, default=[8], doc=doc_neuron),
+        Argument("neuron", List[int], optional=True, default=[8], doc=doc_neuron),
         Argument(
             "activation_function",
             str,
@@ -77,9 +77,9 @@ def spin_args():
     doc_virtual_len = "The distance between virtual atom representing spin and its corresponding real atom for each atom type with spin"
 
     return [
-        Argument("use_spin", list, doc=doc_use_spin),
-        Argument("spin_norm", list, doc=doc_spin_norm),
-        Argument("virtual_len", list, doc=doc_virtual_len),
+        Argument("use_spin", List[bool], doc=doc_use_spin),
+        Argument("spin_norm", List[float], doc=doc_spin_norm),
+        Argument("virtual_len", List[float], doc=doc_virtual_len),
     ]
 
 
@@ -159,10 +159,10 @@ def descrpt_local_frame_args():
 - axis_rule[i*6+5]: index of the axis atom defining the second axis. Note that the neighbors with the same class and type are sorted according to their relative distance."
 
     return [
-        Argument("sel_a", list, optional=False, doc=doc_sel_a),
-        Argument("sel_r", list, optional=False, doc=doc_sel_r),
+        Argument("sel_a", List[int], optional=False, doc=doc_sel_a),
+        Argument("sel_r", List[int], optional=False, doc=doc_sel_r),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
-        Argument("axis_rule", list, optional=False, doc=doc_axis_rule),
+        Argument("axis_rule", List[int], optional=False, doc=doc_axis_rule),
     ]
 
 
@@ -185,10 +185,12 @@ def descrpt_se_a_args():
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
 
     return [
-        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
         Argument(
             "axis_neuron",
             int,
@@ -212,7 +214,11 @@ def descrpt_se_a_args():
         Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
-            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
         ),
         Argument(
             "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
@@ -236,10 +242,12 @@ def descrpt_se_t_args():
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
 
     return [
-        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
         Argument(
             "activation_function",
             str,
@@ -289,10 +297,12 @@ def descrpt_se_r_args():
     doc_set_davg_zero = "Set the normalization average to zero. This option should be set when `atom_ener` in the energy fitting is used"
 
     return [
-        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
         Argument(
             "activation_function",
             str,
@@ -308,7 +318,11 @@ def descrpt_se_r_args():
         Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
-            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
         ),
         Argument(
             "set_davg_zero", bool, optional=True, default=False, doc=doc_set_davg_zero
@@ -356,10 +370,14 @@ def descrpt_se_atten_common_args():
     doc_attn_mask = "Whether to do mask on the diagonal in the attention matrix"
 
     return [
-        Argument("sel", [int, list, str], optional=True, default="auto", doc=doc_sel),
+        Argument(
+            "sel", [int, List[int], str], optional=True, default="auto", doc=doc_sel
+        ),
         Argument("rcut", float, optional=True, default=6.0, doc=doc_rcut),
         Argument("rcut_smth", float, optional=True, default=0.5, doc=doc_rcut_smth),
-        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
         Argument(
             "axis_neuron",
             int,
@@ -383,7 +401,11 @@ def descrpt_se_atten_common_args():
         Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
         Argument("seed", [int, None], optional=True, doc=doc_seed),
         Argument(
-            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
         ),
         Argument("attn", int, optional=True, default=128, doc=doc_attn),
         Argument("attn_layer", int, optional=True, default=2, doc=doc_attn_layer),
@@ -454,8 +476,10 @@ def descrpt_se_a_mask_args():
     doc_seed = "Random seed for parameter initialization"
 
     return [
-        Argument("sel", [list, str], optional=True, default="auto", doc=doc_sel),
-        Argument("neuron", list, optional=True, default=[10, 20, 40], doc=doc_neuron),
+        Argument("sel", [List[int], str], optional=True, default="auto", doc=doc_sel),
+        Argument(
+            "neuron", List[int], optional=True, default=[10, 20, 40], doc=doc_neuron
+        ),
         Argument(
             "axis_neuron",
             int,
@@ -476,7 +500,11 @@ def descrpt_se_a_mask_args():
             "type_one_side", bool, optional=True, default=False, doc=doc_type_one_side
         ),
         Argument(
-            "exclude_types", list, optional=True, default=[], doc=doc_exclude_types
+            "exclude_types",
+            List[List[int]],
+            optional=True,
+            default=[],
+            doc=doc_exclude_types,
         ),
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument("trainable", bool, optional=True, default=True, doc=doc_trainable),
@@ -525,7 +553,7 @@ def fitting_ener():
     doc_resnet_dt = 'Whether to use a "Timestep" in the skip connection'
     doc_trainable = "Whether the parameters in the fitting net are trainable. This option can be\n\n\
 - bool: True if all parameters of the fitting net are trainable, False otherwise.\n\n\
-- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of tihs list should be equal to len(`neuron`)+1."
+- list of bool: Specifies if each layer is trainable. Since the fitting net is composed by hidden layers followed by a output layer, the length of this list should be equal to len(`neuron`)+1."
     doc_rcond = "The condition number used to determine the inital energy shift for each type of atoms. See `rcond` in :py:meth:`numpy.linalg.lstsq` for more details."
     doc_seed = "Random seed for parameter initialization of the fitting net"
     doc_atom_ener = "Specify the atomic energy in vacuum for each type"
@@ -547,7 +575,7 @@ def fitting_ener():
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument(
             "neuron",
-            list,
+            List[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -563,14 +591,24 @@ def fitting_ener():
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
         Argument(
-            "trainable", [list, bool], optional=True, default=True, doc=doc_trainable
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
         ),
         Argument(
             "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
         ),
         Argument("seed", [int, None], optional=True, doc=doc_seed),
-        Argument("atom_ener", list, optional=True, default=[], doc=doc_atom_ener),
-        Argument("layer_name", list, optional=True, doc=doc_layer_name),
+        Argument(
+            "atom_ener",
+            List[Optional[float]],
+            optional=True,
+            default=[],
+            doc=doc_atom_ener,
+        ),
+        Argument("layer_name", List[str], optional=True, doc=doc_layer_name),
         Argument(
             "use_aparam_as_mask",
             bool,
@@ -602,7 +640,7 @@ def fitting_dos():
         Argument("numb_fparam", int, optional=True, default=0, doc=doc_numb_fparam),
         Argument("numb_aparam", int, optional=True, default=0, doc=doc_numb_aparam),
         Argument(
-            "neuron", list, optional=True, default=[120, 120, 120], doc=doc_neuron
+            "neuron", List[int], optional=True, default=[120, 120, 120], doc=doc_neuron
         ),
         Argument(
             "activation_function",
@@ -614,7 +652,11 @@ def fitting_dos():
         Argument("precision", str, optional=True, default="float64", doc=doc_precision),
         Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
         Argument(
-            "trainable", [list, bool], optional=True, default=True, doc=doc_trainable
+            "trainable",
+            [List[bool], bool],
+            optional=True,
+            default=True,
+            doc=doc_trainable,
         ),
         Argument(
             "rcond", [float, type(None)], optional=True, default=None, doc=doc_rcond
@@ -642,7 +684,7 @@ def fitting_polar():
     return [
         Argument(
             "neuron",
-            list,
+            List[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -658,12 +700,14 @@ def fitting_polar():
         Argument("resnet_dt", bool, optional=True, default=True, doc=doc_resnet_dt),
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument("fit_diag", bool, optional=True, default=True, doc=doc_fit_diag),
-        Argument("scale", [list, float], optional=True, default=1.0, doc=doc_scale),
+        Argument(
+            "scale", [List[float], float], optional=True, default=1.0, doc=doc_scale
+        ),
         # Argument("diag_shift", [list,float], optional = True, default = 0.0, doc = doc_diag_shift),
         Argument("shift_diag", bool, optional=True, default=True, doc=doc_shift_diag),
         Argument(
             "sel_type",
-            [list, int, None],
+            [List[int], int, None],
             optional=True,
             alias=["pol_type"],
             doc=doc_sel_type,
@@ -687,7 +731,7 @@ def fitting_dipole():
     return [
         Argument(
             "neuron",
-            list,
+            List[int],
             optional=True,
             default=[120, 120, 120],
             alias=["n_neuron"],
@@ -704,7 +748,7 @@ def fitting_dipole():
         Argument("precision", str, optional=True, default="default", doc=doc_precision),
         Argument(
             "sel_type",
-            [list, int, None],
+            [List[int], int, None],
             optional=True,
             alias=["dipole_type"],
             doc=doc_sel_type,
@@ -740,8 +784,10 @@ def modifier_dipole_charge():
 
     return [
         Argument("model_name", str, optional=False, doc=doc_model_name),
-        Argument("model_charge_map", list, optional=False, doc=doc_model_charge_map),
-        Argument("sys_charge_map", list, optional=False, doc=doc_sys_charge_map),
+        Argument(
+            "model_charge_map", List[float], optional=False, doc=doc_model_charge_map
+        ),
+        Argument("sys_charge_map", List[float], optional=False, doc=doc_sys_charge_map),
         Argument("ewald_beta", float, optional=True, default=0.4, doc=doc_ewald_beta),
         Argument("ewald_h", float, optional=True, default=1.0, doc=doc_ewald_h),
     ]
@@ -770,7 +816,7 @@ def model_compression():
 
     return [
         Argument("model_file", str, optional=False, doc=doc_model_file),
-        Argument("table_config", list, optional=False, doc=doc_table_config),
+        Argument("table_config", List[float], optional=False, doc=doc_table_config),
         Argument("min_nbor_dist", float, optional=False, doc=doc_min_nbor_dist),
     ]
 
@@ -814,7 +860,7 @@ def model_args(exclude_hybrid=False):
         "model",
         dict,
         [
-            Argument("type_map", list, optional=True, doc=doc_type_map),
+            Argument("type_map", List[str], optional=True, doc=doc_type_map),
             Argument(
                 "data_stat_nbatch",
                 int,
@@ -1456,11 +1502,13 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
     )
 
     args = [
-        Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
         Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
         Argument(
             "batch_size",
-            [list, int, str],
+            [List[int], int, str],
             optional=True,
             default="auto",
             doc=doc_batch_size,
@@ -1477,7 +1525,7 @@ def training_data_args():  # ! added by Ziyao: new specification style for data
         ),
         Argument(
             "sys_probs",
-            list,
+            List[float],
             optional=True,
             default=None,
             doc=doc_sys_probs,
@@ -1521,11 +1569,13 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
     doc_numb_btch = "An integer that specifies the number of batches to be sampled for each validation period."
 
     args = [
-        Argument("systems", [list, str], optional=False, default=".", doc=doc_systems),
+        Argument(
+            "systems", [List[str], str], optional=False, default=".", doc=doc_systems
+        ),
         Argument("set_prefix", str, optional=True, default="set", doc=doc_set_prefix),
         Argument(
             "batch_size",
-            [list, int, str],
+            [List[int], int, str],
             optional=True,
             default="auto",
             doc=doc_batch_size,
@@ -1542,7 +1592,7 @@ def validation_data_args():  # ! added by Ziyao: new specification style for dat
         ),
         Argument(
             "sys_probs",
-            list,
+            List[float],
             optional=True,
             default=None,
             doc=doc_sys_probs,
diff --git a/deepmd/utils/batch_size.py b/deepmd/utils/batch_size.py
index 41d9fd4f64..2b3117d849 100644
--- a/deepmd/utils/batch_size.py
+++ b/deepmd/utils/batch_size.py
@@ -100,10 +100,12 @@ def execute(
         OutOfMemoryError
             OOM when batch size is 1
         """
+        if natoms > 0:
+            batch_nframes = self.current_batch_size // natoms
+        else:
+            batch_nframes = self.current_batch_size
         try:
-            n_batch, result = callable(
-                max(self.current_batch_size // natoms, 1), start_index
-            )
+            n_batch, result = callable(max(batch_nframes, 1), start_index)
         except OutOfMemoryError as e:
             # TODO: it's very slow to catch OOM error; I don't know what TF is doing here
             # but luckily we only need to catch once
diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py
index 0071da755c..69a6cbe112 100644
--- a/deepmd/utils/data_system.py
+++ b/deepmd/utils/data_system.py
@@ -353,28 +353,15 @@ def set_sys_probs(self, sys_probs=None, auto_prob_style: str = "prob_sys_size"):
             elif auto_prob_style == "prob_sys_size":
                 probs = self.prob_nbatches
             elif auto_prob_style[:14] == "prob_sys_size;":
-                probs = self._prob_sys_size_ext(auto_prob_style)
+                probs = prob_sys_size_ext(
+                    auto_prob_style, self.get_nsystems(), self.nbatches
+                )
             else:
                 raise RuntimeError("Unknown auto prob style: " + auto_prob_style)
         else:
-            probs = self._process_sys_probs(sys_probs)
+            probs = process_sys_probs(sys_probs, self.nbatches)
         self.sys_probs = probs
 
-    def _get_sys_probs(self, sys_probs, auto_prob_style):  # depreciated
-        if sys_probs is None:
-            if auto_prob_style == "prob_uniform":
-                prob_v = 1.0 / float(self.nsystems)
-                prob = [prob_v for ii in range(self.nsystems)]
-            elif auto_prob_style == "prob_sys_size":
-                prob = self.prob_nbatches
-            elif auto_prob_style[:14] == "prob_sys_size;":
-                prob = self._prob_sys_size_ext(auto_prob_style)
-            else:
-                raise RuntimeError("unknown style " + auto_prob_style)
-        else:
-            prob = self._process_sys_probs(sys_probs)
-        return prob
-
     def get_batch(self, sys_idx: Optional[int] = None) -> dict:
         # batch generation style altered by Ziyao Li:
         # one should specify the "sys_prob" and "auto_prob_style" params
@@ -623,42 +610,44 @@ def _check_type_map_consistency(self, type_map_list):
                     ret = ii
         return ret
 
-    def _process_sys_probs(self, sys_probs):
-        sys_probs = np.array(sys_probs)
-        type_filter = sys_probs >= 0
-        assigned_sum_prob = np.sum(type_filter * sys_probs)
-        # 1e-8 is to handle floating point error; See #1917
-        assert (
-            assigned_sum_prob <= 1.0 + 1e-8
-        ), "the sum of assigned probability should be less than 1"
-        rest_sum_prob = 1.0 - assigned_sum_prob
-        if not np.isclose(rest_sum_prob, 0):
-            rest_nbatch = (1 - type_filter) * self.nbatches
-            rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
-            ret_prob = rest_prob + type_filter * sys_probs
-        else:
-            ret_prob = sys_probs
-        assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
-        return ret_prob
-
-    def _prob_sys_size_ext(self, keywords):
-        block_str = keywords.split(";")[1:]
-        block_stt = []
-        block_end = []
-        block_weights = []
-        for ii in block_str:
-            stt = int(ii.split(":")[0])
-            end = int(ii.split(":")[1])
-            weight = float(ii.split(":")[2])
-            assert weight >= 0, "the weight of a block should be no less than 0"
-            block_stt.append(stt)
-            block_end.append(end)
-            block_weights.append(weight)
-        nblocks = len(block_str)
-        block_probs = np.array(block_weights) / np.sum(block_weights)
-        sys_probs = np.zeros([self.get_nsystems()])
-        for ii in range(nblocks):
-            nbatch_block = self.nbatches[block_stt[ii] : block_end[ii]]
-            tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
-            sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
-        return sys_probs
+
+def process_sys_probs(sys_probs, nbatch):
+    sys_probs = np.array(sys_probs)
+    type_filter = sys_probs >= 0
+    assigned_sum_prob = np.sum(type_filter * sys_probs)
+    # 1e-8 is to handle floating point error; See #1917
+    assert (
+        assigned_sum_prob <= 1.0 + 1e-8
+    ), "the sum of assigned probability should be less than 1"
+    rest_sum_prob = 1.0 - assigned_sum_prob
+    if not np.isclose(rest_sum_prob, 0):
+        rest_nbatch = (1 - type_filter) * nbatch
+        rest_prob = rest_sum_prob * rest_nbatch / np.sum(rest_nbatch)
+        ret_prob = rest_prob + type_filter * sys_probs
+    else:
+        ret_prob = sys_probs
+    assert np.isclose(np.sum(ret_prob), 1), "sum of probs should be 1"
+    return ret_prob
+
+
+def prob_sys_size_ext(keywords, nsystems, nbatch):
+    block_str = keywords.split(";")[1:]
+    block_stt = []
+    block_end = []
+    block_weights = []
+    for ii in block_str:
+        stt = int(ii.split(":")[0])
+        end = int(ii.split(":")[1])
+        weight = float(ii.split(":")[2])
+        assert weight >= 0, "the weight of a block should be no less than 0"
+        block_stt.append(stt)
+        block_end.append(end)
+        block_weights.append(weight)
+    nblocks = len(block_str)
+    block_probs = np.array(block_weights) / np.sum(block_weights)
+    sys_probs = np.zeros([nsystems])
+    for ii in range(nblocks):
+        nbatch_block = nbatch[block_stt[ii] : block_end[ii]]
+        tmp_prob = [float(i) for i in nbatch_block] / np.sum(nbatch_block)
+        sys_probs[block_stt[ii] : block_end[ii]] = tmp_prob * block_probs[ii]
+    return sys_probs
diff --git a/deepmd/utils/tabulate.py b/deepmd/utils/tabulate.py
index 427887089a..d0a167f1dc 100644
--- a/deepmd/utils/tabulate.py
+++ b/deepmd/utils/tabulate.py
@@ -571,13 +571,13 @@ def _make_data(self, xx, idx):
                                 + xx
                             )
                             dy = op_module.unaggregated_dy_dx_s(
-                                yy,
+                                yy - xx,
                                 self.matrix["layer_" + str(layer + 1)][idx],
                                 xbar,
                                 tf.constant(self.functype),
                             ) + tf.ones([1, 1], yy.dtype)
                             dy2 = op_module.unaggregated_dy2_dx_s(
-                                yy,
+                                yy - xx,
                                 dy,
                                 self.matrix["layer_" + str(layer + 1)][idx],
                                 xbar,
@@ -626,26 +626,72 @@ def _make_data(self, xx, idx):
                             tf.matmul(yy, self.matrix["layer_" + str(layer + 1)][idx])
                             + self.bias["layer_" + str(layer + 1)][idx]
                         )
-                        tt, zz = self._layer_1(
-                            yy,
-                            self.matrix["layer_" + str(layer + 1)][idx],
-                            self.bias["layer_" + str(layer + 1)][idx],
-                        )
-                        dz = op_module.unaggregated_dy_dx(
-                            zz - tt,
-                            self.matrix["layer_" + str(layer + 1)][idx],
-                            dy,
-                            ybar,
-                            tf.constant(self.functype),
-                        )
-                        dy2 = op_module.unaggregated_dy2_dx(
-                            zz - tt,
-                            self.matrix["layer_" + str(layer + 1)][idx],
-                            dy,
-                            dy2,
-                            ybar,
-                            tf.constant(self.functype),
-                        )
+                        if self.neuron[layer] == self.neuron[layer - 1]:
+                            zz = (
+                                self._layer_0(
+                                    yy,
+                                    self.matrix["layer_" + str(layer + 1)][idx],
+                                    self.bias["layer_" + str(layer + 1)][idx],
+                                )
+                                + yy
+                            )
+                            dz = op_module.unaggregated_dy_dx(
+                                zz - yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
+                            dy2 = op_module.unaggregated_dy2_dx(
+                                zz - yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                dy2,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
+                        elif self.neuron[layer] == 2 * self.neuron[layer - 1]:
+                            tt, zz = self._layer_1(
+                                yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                self.bias["layer_" + str(layer + 1)][idx],
+                            )
+                            dz = op_module.unaggregated_dy_dx(
+                                zz - tt,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
+                            dy2 = op_module.unaggregated_dy2_dx(
+                                zz - tt,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                dy2,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
+                        else:
+                            zz = self._layer_0(
+                                yy,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                self.bias["layer_" + str(layer + 1)][idx],
+                            )
+                            dz = op_module.unaggregated_dy_dx(
+                                zz,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
+                            dy2 = op_module.unaggregated_dy2_dx(
+                                zz,
+                                self.matrix["layer_" + str(layer + 1)][idx],
+                                dy,
+                                dy2,
+                                ybar,
+                                tf.constant(self.functype),
+                            )
                         dy = dz
                         yy = zz
 
diff --git a/deepmd_cli/main.py b/deepmd_cli/main.py
index 5a0670d8dc..bffc1c6911 100644
--- a/deepmd_cli/main.py
+++ b/deepmd_cli/main.py
@@ -491,7 +491,7 @@ def main_parser() -> argparse.ArgumentParser:
         "--output-model",
         default="convert_out.pb",
         type=str,
-        help="the output model",
+        help="the output model\nIf OUTPUT_MODEL ends with '.pbtxt', the provided model will be converted to pbtxt format, without version conversion.",
     )
 
     # neighbor_stat
diff --git a/doc/install/easy-install-dev.md b/doc/install/easy-install-dev.md
index 855c2f1839..dd943c37af 100644
--- a/doc/install/easy-install-dev.md
+++ b/doc/install/easy-install-dev.md
@@ -17,7 +17,7 @@ docker pull ghcr.io/deepmodeling/deepmd-kit:devel
 Below is an one-line shell command to download the [artifact](https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip) containing wheels and install it with `pip`:
 
 ```sh
-bash -c 'wget -O /tmp/z.$$ https://nightly.link/deepmodeling/deepmd-kit/workflows/build_wheel/devel/artifact.zip && unzip /tmp/z.$$ -d /tmp/dist.$$ && pip install -U --pre deepmd-kit[gpu,cu11,lmp] --find-links /tmp/dist.$$ && rm -r /tmp/z.$$ /tmp/dist.$$'
+pip install -U --pre deepmd-kit[gpu,cu11,lmp] --extra-index-url https://deepmodeling.github.io/deepmd-kit/simple
 ```
 
 `cu11` and `lmp` are optional, which is the same as the stable version.
diff --git a/doc/third-party/lammps-command.md b/doc/third-party/lammps-command.md
index a9d849bc7c..cdfa4b87d6 100644
--- a/doc/third-party/lammps-command.md
+++ b/doc/third-party/lammps-command.md
@@ -38,7 +38,7 @@ pair_style deepmd models ... keyword value ...
 - models = frozen model(s) to compute the interaction.
 If multiple models are provided, then only the first model serves to provide energy and force prediction for each timestep of molecular dynamics,
 and the model deviation will be computed among all models every `out_freq` timesteps.
-- keyword = *out_file* or *out_freq* or *fparam* or *fparam_from_compute* or *atomic* or *relative* or *relative_v* or *aparam* or *ttm*
+- keyword = *out_file* or *out_freq* or *fparam* or *fparam_from_compute* or *aparam_from_compute* or *atomic* or *relative* or *relative_v* or *aparam* or *ttm*
 <pre>
     <i>out_file</i> value = filename
         filename = The file name for the model deviation output. Default is model_devi.out
@@ -48,6 +48,8 @@ and the model deviation will be computed among all models every `out_freq` times
         parameters = one or more frame parameters required for model evaluation.
     <i>fparam_from_compute</i> value = id
         id = compute id used to update the frame parameter.
+    <i>aparam_from_compute</i> value = id
+        id = compute id used to update the atom parameter.
     <i>atomic</i> = no value is required.
         If this keyword is set, the force model deviation of each atom will be output.
     <i>relative</i> value = level
@@ -69,6 +71,9 @@ pair_coeff * * O H
 
 pair_style deepmd cp.pb fparam_from_compute TEMP
 compute    TEMP all temp
+
+pair_style deepmd ener.pb aparam_from_compute 1
+compute    1 all ke/atom
 ```
 
 ### Description
@@ -89,6 +94,7 @@ $$E_{v_i}=\frac{\left|D_{v_i}\right|}{\left|v_i\right|+l}$$
 
 If the keyword `fparam` is set, the given frame parameter(s) will be fed to the model.
 If the keyword `fparam_from_compute` is set, the global parameter(s) from compute command (e.g., temperature from [compute temp command](https://docs.lammps.org/compute_temp.html)) will be fed to the model as the frame parameter(s).
+If the keyword `aparam_from_compute` is set, the atomic parameter(s) from compute command (e.g., per-atom translational kinetic energy from [compute ke/atom command](https://docs.lammps.org/compute_ke_atom.html)) will be fed to the model as the atom parameter(s).
 If the keyword `aparam` is set, the given atomic parameter(s) will be fed to the model, where each atom is assumed to have the same atomic parameter(s).
 If the keyword `ttm` is set, electronic temperatures from [fix ttm command](https://docs.lammps.org/fix_ttm.html) will be fed to the model as the atomic parameters.
 
diff --git a/examples/infer_water/convert_model.c b/examples/infer_water/convert_model.c
new file mode 100644
index 0000000000..3fc2d74b65
--- /dev/null
+++ b/examples/infer_water/convert_model.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: LGPL-3.0-or-later
+#include "deepmd/c_api.h"
+
+int main() {
+  DP_ConvertPbtxtToPb("../../source/tests/infer/deeppot.pbtxt", "graph.pb");
+  return 0;
+}
diff --git a/pyproject.toml b/pyproject.toml
index 8c5267567b..35a11d2163 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,7 +38,7 @@ dependencies = [
     'numpy',
     'scipy',
     'pyyaml',
-    'dargs >= 0.3.5',
+    'dargs >= 0.4.1',
     'python-hostlist >= 1.21',
     'typing_extensions; python_version < "3.8"',
     'importlib_metadata>=1.4; python_version < "3.8"',
diff --git a/source/api_c/include/deepmd.hpp b/source/api_c/include/deepmd.hpp
index 71ff5b3dcc..90c1c1c918 100644
--- a/source/api_c/include/deepmd.hpp
+++ b/source/api_c/include/deepmd.hpp
@@ -618,6 +618,7 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @warning Natoms should not be zero when computing multiple frames.
    **/
   template <typename VALUETYPE, typename ENERGYVTYPE>
   void compute(
@@ -630,7 +631,7 @@ class DeepPot {
       const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
     unsigned int natoms = atype.size();
-    unsigned int nframes = coord.size() / natoms / 3;
+    unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
     assert(nframes * natoms * 3 == coord.size());
     if (!box.empty()) {
       assert(box.size() == nframes * 9);
@@ -676,6 +677,7 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @warning Natoms should not be zero when computing multiple frames.
    **/
   template <typename VALUETYPE, typename ENERGYVTYPE>
   void compute(
@@ -690,7 +692,7 @@ class DeepPot {
       const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
     unsigned int natoms = atype.size();
-    unsigned int nframes = coord.size() / natoms / 3;
+    unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
     assert(nframes * natoms * 3 == coord.size());
     if (!box.empty()) {
       assert(box.size() == nframes * 9);
@@ -743,6 +745,7 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @warning Natoms should not be zero when computing multiple frames.
    **/
   template <typename VALUETYPE, typename ENERGYVTYPE>
   void compute(
@@ -758,7 +761,7 @@ class DeepPot {
       const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
     unsigned int natoms = atype.size();
-    unsigned int nframes = coord.size() / natoms / 3;
+    unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
     assert(nframes * natoms * 3 == coord.size());
     if (!box.empty()) {
       assert(box.size() == nframes * 9);
@@ -810,6 +813,7 @@ class DeepPot {
    * nframes x natoms x dim_aparam.
    * natoms x dim_aparam. Then all frames are assumed to be provided with the
    *same aparam.
+   * @warning Natoms should not be zero when computing multiple frames.
    **/
   template <typename VALUETYPE, typename ENERGYVTYPE>
   void compute(
@@ -827,7 +831,7 @@ class DeepPot {
       const std::vector<VALUETYPE> &fparam = std::vector<VALUETYPE>(),
       const std::vector<VALUETYPE> &aparam = std::vector<VALUETYPE>()) {
     unsigned int natoms = atype.size();
-    unsigned int nframes = coord.size() / natoms / 3;
+    unsigned int nframes = natoms > 0 ? coord.size() / natoms / 3 : 1;
     assert(nframes * natoms * 3 == coord.size());
     if (!box.empty()) {
       assert(box.size() == nframes * 9);
diff --git a/source/api_c/tests/test_deeppot_a_hpp.cc b/source/api_c/tests/test_deeppot_a_hpp.cc
index a423e0acc6..814122d538 100644
--- a/source/api_c/tests/test_deeppot_a_hpp.cc
+++ b/source/api_c/tests/test_deeppot_a_hpp.cc
@@ -478,6 +478,23 @@ TYPED_TEST(TestInferDeepPotAHPP, cpu_lmp_nlist_type_sel) {
   }
 }
 
+TYPED_TEST(TestInferDeepPotAHPP, cpu_build_nlist_empty_input) {
+  using VALUETYPE = TypeParam;
+  std::vector<VALUETYPE> coord;
+  std::vector<int> atype;
+  std::vector<VALUETYPE>& box = this->box;
+  unsigned int natoms = 0;
+  deepmd::hpp::DeepPot& dp = this->dp;
+  double ener;
+  std::vector<VALUETYPE> force, virial;
+
+  dp.compute(ener, force, virial, coord, atype, box);
+  // no errors will be fine
+  EXPECT_EQ(force.size(), natoms * 3);
+  EXPECT_EQ(virial.size(), 9);
+  EXPECT_LT(fabs(ener), EPSILON);
+}
+
 TYPED_TEST(TestInferDeepPotAHPP, print_summary) {
   deepmd::hpp::DeepPot& dp = this->dp;
   dp.print_summary("");
diff --git a/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc b/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
index 9f808d8315..39bc1140d0 100644
--- a/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
+++ b/source/api_c/tests/test_deeppot_model_devi_fparam_aparam.cc
@@ -263,7 +263,7 @@ TYPED_TEST(TestInferDeepPotModeDeviFparamAparam, cpu_lmp_list_std) {
       for (int kk = 0; kk < nmodel; ++kk) {
         avg_f[dd] += fmd[kk][ii * 3 + dd];
       }
-      avg_f[dd] /= (nmodel)*1.0;
+      avg_f[dd] /= (nmodel) * 1.0;
     }
     VALUETYPE std = 0.;
     for (int kk = 0; kk < nmodel; ++kk) {
diff --git a/source/api_c/tests/test_deeppot_model_devi_hpp.cc b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
index 30091c7a5b..75b0fab880 100644
--- a/source/api_c/tests/test_deeppot_model_devi_hpp.cc
+++ b/source/api_c/tests/test_deeppot_model_devi_hpp.cc
@@ -335,7 +335,7 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std) {
       for (int kk = 0; kk < nmodel; ++kk) {
         avg_f[dd] += fmd[kk][ii * 3 + dd];
       }
-      avg_f[dd] /= (nmodel)*1.0;
+      avg_f[dd] /= (nmodel) * 1.0;
     }
     VALUETYPE std = 0.;
     for (int kk = 0; kk < nmodel; ++kk) {
diff --git a/source/api_cc/src/DeepPot.cc b/source/api_cc/src/DeepPot.cc
index 785ed00cb8..23a0a7e663 100644
--- a/source/api_cc/src/DeepPot.cc
+++ b/source/api_cc/src/DeepPot.cc
@@ -574,7 +574,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam_) {
   int nall = datype_.size();
-  int nframes = dcoord_.size() / nall / 3;
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
   int nloc = nall;
   atommap = deepmd::AtomMap(datype_.begin(), datype_.begin() + nloc);
   assert(nloc == atommap.get_type().size());
@@ -658,7 +659,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam__) {
   int nall = datype_.size();
-  int nframes = dcoord_.size() / nall / 3;
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
   std::vector<VALUETYPE> fparam;
   std::vector<VALUETYPE> aparam_;
   validate_fparam_aparam(nframes, (aparam_nall ? nall : (nall - nghost)),
@@ -753,7 +755,8 @@ void DeepPot::compute_inner(ENERGYVTYPE& dener,
                             const std::vector<VALUETYPE>& fparam,
                             const std::vector<VALUETYPE>& aparam) {
   int nall = datype_.size();
-  int nframes = dcoord_.size() / nall / 3;
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
   int nloc = nall - nghost;
 
   std::vector<std::pair<std::string, Tensor>> input_tensors;
@@ -841,7 +844,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& dbox,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam_) {
-  int nframes = dcoord_.size() / 3 / datype_.size();
+  // if datype.size is 0, not clear nframes; but 1 is just ok
+  int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
   atommap = deepmd::AtomMap(datype_.begin(), datype_.end());
   int nloc = datype_.size();
   std::vector<VALUETYPE> fparam;
@@ -930,7 +934,8 @@ void DeepPot::compute(ENERGYVTYPE& dener,
                       const std::vector<VALUETYPE>& fparam_,
                       const std::vector<VALUETYPE>& aparam__) {
   int nall = datype_.size();
-  int nframes = dcoord_.size() / 3 / nall;
+  // if nall==0, unclear nframes, but 1 is ok
+  int nframes = nall > 0 ? (dcoord_.size() / nall / 3) : 1;
   int nloc = nall - nghost;
   std::vector<VALUETYPE> fparam;
   std::vector<VALUETYPE> aparam_;
diff --git a/source/api_cc/src/common.cc b/source/api_cc/src/common.cc
index fad7e374ab..5994e9446f 100644
--- a/source/api_cc/src/common.cc
+++ b/source/api_cc/src/common.cc
@@ -380,7 +380,8 @@ int deepmd::session_input_tensors(
     const deepmd::AtomMap& atommap,
     const std::string scope,
     const bool aparam_nall) {
-  int nframes = dcoord_.size() / 3 / datype_.size();
+  // if datype.size is 0, not clear nframes; but 1 is just ok
+  int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
   int nall = datype_.size();
   int nloc = nall;
   assert(nall * 3 * nframes == dcoord_.size());
@@ -445,10 +446,13 @@ int deepmd::session_input_tensors(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(
-      aparam_.begin(), aparam__.begin(),
-      aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
-      (aparam_nall ? nall : nloc));
+  if ((aparam_nall ? nall : nloc) > 0) {
+    atommap.forward<VALUETYPE>(
+        aparam_.begin(), aparam__.begin(),
+        aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
+        (aparam_nall ? nall : nloc));
+  }
+  // if == 0, aparam__.size should also be 0, so no need to forward
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {
@@ -520,7 +524,8 @@ int deepmd::session_input_tensors(
     const int ago,
     const std::string scope,
     const bool aparam_nall) {
-  int nframes = dcoord_.size() / 3 / datype_.size();
+  // if datype.size is 0, not clear nframes; but 1 is just ok
+  int nframes = datype_.size() > 0 ? (dcoord_.size() / 3 / datype_.size()) : 1;
   int nall = datype_.size();
   int nloc = nall - nghost;
   assert(nall * 3 * nframes == dcoord_.size());
@@ -581,10 +586,13 @@ int deepmd::session_input_tensors(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(
-      aparam_.begin(), aparam__.begin(),
-      aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
-      (aparam_nall ? nall : nloc));
+  if ((aparam_nall ? nall : nloc) > 0) {
+    atommap.forward<VALUETYPE>(
+        aparam_.begin(), aparam__.begin(),
+        aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
+        (aparam_nall ? nall : nloc));
+  }
+  // if == 0, aparam__.size should also be 0, so no need to forward
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {
@@ -717,10 +725,13 @@ int deepmd::session_input_tensors_mixed_type(
   std::vector<VALUETYPE> dcoord(dcoord_);
   atommap.forward<VALUETYPE>(dcoord.begin(), dcoord_.begin(), 3, nframes, nall);
   std::vector<VALUETYPE> aparam_(aparam__);
-  atommap.forward<VALUETYPE>(
-      aparam_.begin(), aparam__.begin(),
-      aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
-      (aparam_nall ? nall : nloc));
+  if ((aparam_nall ? nall : nloc) > 0) {
+    atommap.forward<VALUETYPE>(
+        aparam_.begin(), aparam__.begin(),
+        aparam__.size() / nframes / (aparam_nall ? nall : nloc), nframes,
+        (aparam_nall ? nall : nloc));
+  }
+  // if == 0, aparam__.size should also be 0, so no need to forward
 
   for (int ii = 0; ii < nframes; ++ii) {
     for (int jj = 0; jj < nall * 3; ++jj) {
diff --git a/source/api_cc/tests/test_deeppot_model_devi.cc b/source/api_cc/tests/test_deeppot_model_devi.cc
index e28408319f..3ceba6a94e 100644
--- a/source/api_cc/tests/test_deeppot_model_devi.cc
+++ b/source/api_cc/tests/test_deeppot_model_devi.cc
@@ -330,7 +330,7 @@ TYPED_TEST(TestInferDeepPotModeDevi, cpu_lmp_list_std) {
       for (int kk = 0; kk < nmodel; ++kk) {
         avg_f[dd] += fmd[kk][ii * 3 + dd];
       }
-      avg_f[dd] /= (nmodel)*1.0;
+      avg_f[dd] /= (nmodel) * 1.0;
     }
     VALUETYPE std = 0.;
     for (int kk = 0; kk < nmodel; ++kk) {
diff --git a/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc b/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
index 58060fc397..4647b30139 100644
--- a/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
+++ b/source/api_cc/tests/test_deeppot_model_devi_fparam_aparam.cc
@@ -292,7 +292,7 @@ TYPED_TEST(TestInferDeepPotModeDeviFparamAparam, cpu_lmp_list_std) {
       for (int kk = 0; kk < nmodel; ++kk) {
         avg_f[dd] += fmd[kk][ii * 3 + dd];
       }
-      avg_f[dd] /= (nmodel)*1.0;
+      avg_f[dd] /= (nmodel) * 1.0;
     }
     VALUETYPE std = 0.;
     for (int kk = 0; kk < nmodel; ++kk) {
diff --git a/source/install/docker_test_package_c.sh b/source/install/docker_test_package_c.sh
index ada1799953..ed3b265821 100755
--- a/source/install/docker_test_package_c.sh
+++ b/source/install/docker_test_package_c.sh
@@ -5,11 +5,11 @@ SCRIPT_PATH=$(dirname $(realpath -s $0))
 
 # assume libdeepmd_c.tar.gz has been created
 
-wget "https://drive.google.com/uc?export=download&id=1xldLhzm4uSkq6iPohSycNWAsWqKAenKX" -O ${SCRIPT_PATH}/../../examples/infer_water/"graph.pb"
-
 docker run --rm -v ${SCRIPT_PATH}/../..:/root/deepmd-kit -w /root/deepmd-kit \
 	gcc:4.9 \
 	/bin/sh -c "tar vxzf libdeepmd_c.tar.gz \
             && cd examples/infer_water \
+            && gcc convert_model.c -std=c99 -L ../../libdeepmd_c/lib -I ../../libdeepmd_c/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=../../libdeepmd_c/lib -o convert_model \
             && gcc infer_water.c -std=c99 -L ../../libdeepmd_c/lib -I ../../libdeepmd_c/include -Wl,--no-as-needed -ldeepmd_c -Wl,-rpath=../../libdeepmd_c/lib -o infer_water \
+            && ./convert_model \
             && ./infer_water"
diff --git a/source/lib/src/gpu/tabulate.cu b/source/lib/src/gpu/tabulate.cu
index a22742ae19..71ea17ced5 100644
--- a/source/lib/src/gpu/tabulate.cu
+++ b/source/lib/src/gpu/tabulate.cu
@@ -200,7 +200,9 @@ __global__ void tabulate_fusion_se_a_fifth_order_polynomial(
   FPTYPE var[6];
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (xx == ago && is_sorted) {
+    if (xx == ago && em[block_idx * nnei * 4 + ii * 4 + 1] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 2] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 3] == 0. && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -286,7 +288,9 @@ __global__ void tabulate_fusion_se_a_grad_fifth_order_polynomial(
   FPTYPE ago = GpuShuffleSync(0xffffffff, em_x[block_idx * nnei + nnei - 1], 0);
   for (int ii = warp_idx; ii < nnei; ii += KTILE) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
-    if (ago == xx && is_sorted) {
+    if (ago == xx && em[block_idx * nnei * 4 + ii * 4 + 1] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 2] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 3] == 0. && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
@@ -393,7 +397,9 @@ __global__ void tabulate_fusion_se_a_grad_grad_fifth_order_polynomial(
   for (int ii = 0; ii < nnei; ii++) {
     FPTYPE xx = em_x[block_idx * nnei + ii];
     FPTYPE dz_xx = dz_dy_dem_x[block_idx * nnei + ii];
-    if (xx == ago && is_sorted) {
+    if (xx == ago && em[block_idx * nnei * 4 + ii * 4 + 1] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 2] == 0. &&
+        em[block_idx * nnei * 4 + ii * 4 + 3] == 0. && is_sorted) {
       unloop = true;
       breakpoint = ii;
     }
diff --git a/source/lib/src/tabulate.cc b/source/lib/src/tabulate.cc
index 3e2a1bec62..9352980351 100644
--- a/source/lib/src/tabulate.cc
+++ b/source/lib/src/tabulate.cc
@@ -108,7 +108,7 @@ void deepmd::tabulate_fusion_se_a_cpu(FPTYPE* out,
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
-      if (ago == xx && is_sorted) {
+      if (ago == xx && ll[1] == 0. && ll[2] == 0. && ll[3] == 0. && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
@@ -195,7 +195,7 @@ void deepmd::tabulate_fusion_se_a_grad_cpu(FPTYPE* dy_dem_x,
       ll[2] = em[ii * nnei * 4 + jj * 4 + 2];
       ll[3] = em[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
-      if (ago == xx && is_sorted) {
+      if (ago == xx && ll[1] == 0. && ll[2] == 0. && ll[3] == 0. && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
@@ -298,7 +298,7 @@ void deepmd::tabulate_fusion_se_a_grad_grad_cpu(FPTYPE* dz_dy,
       hh[3] = dz_dy_dem[ii * nnei * 4 + jj * 4 + 3];
       FPTYPE xx = em_x[ii * nnei + jj];
       FPTYPE dz_xx = dz_dy_dem_x[ii * nnei + jj];
-      if (ago == xx && is_sorted) {
+      if (ago == xx && ll[1] == 0. && ll[2] == 0. && ll[3] == 0. && is_sorted) {
         unloop = true;
       }
       int table_idx = 0;
diff --git a/source/lmp/pair_deepmd.cpp b/source/lmp/pair_deepmd.cpp
index 3fa592bf58..432077de5b 100644
--- a/source/lmp/pair_deepmd.cpp
+++ b/source/lmp/pair_deepmd.cpp
@@ -213,26 +213,59 @@ static void make_uniform_aparam(vector<double> &daparam,
 }
 
 void PairDeepMD::make_fparam_from_compute(vector<double> &fparam) {
-  assert(do_compute);
+  assert(do_compute_fparam);
 
-  int icompute = modify->find_compute(compute_id);
+  int icompute = modify->find_compute(compute_fparam_id);
   Compute *compute = modify->compute[icompute];
 
   assert(compute);
   fparam.resize(dim_fparam);
 
   if (dim_fparam == 1) {
-    compute->compute_scalar();
+    if (!(compute->invoked_flag & Compute::INVOKED_SCALAR)) {
+      compute->compute_scalar();
+      compute->invoked_flag |= Compute::INVOKED_SCALAR;
+    }
     fparam[0] = compute->scalar;
   } else if (dim_fparam > 1) {
-    compute->compute_vector();
+    if (!(compute->invoked_flag & Compute::INVOKED_VECTOR)) {
+      compute->compute_vector();
+      compute->invoked_flag |= Compute::INVOKED_VECTOR;
+    }
     double *cvector = compute->vector;
-    for (int jj = 0; jj < dim_aparam; ++jj) {
+    for (int jj = 0; jj < dim_fparam; ++jj) {
       fparam[jj] = cvector[jj];
     }
   }
 }
 
+void PairDeepMD::make_aparam_from_compute(vector<double> &aparam) {
+  assert(do_compute_aparam);
+
+  int icompute = modify->find_compute(compute_aparam_id);
+  Compute *compute = modify->compute[icompute];
+
+  assert(compute);
+  int nlocal = atom->nlocal;
+  aparam.resize(dim_aparam * nlocal);
+
+  if (!(compute->invoked_flag & Compute::INVOKED_PERATOM)) {
+    compute->compute_peratom();
+    compute->invoked_flag |= Compute::INVOKED_PERATOM;
+  }
+  if (dim_aparam == 1) {
+    double *cvector = compute->vector_atom;
+    aparam.assign(cvector, cvector + nlocal);
+  } else if (dim_aparam > 1) {
+    double **carray = compute->array_atom;
+    for (int ii = 0; ii < nlocal; ++ii) {
+      for (int jj = 0; jj < dim_aparam; ++jj) {
+        aparam[ii * dim_aparam + jj] = carray[ii][jj];
+      }
+    }
+  }
+}
+
 #ifdef USE_TTM
 void PairDeepMD::make_ttm_fparam(vector<double> &fparam) {
   assert(do_ttm);
@@ -379,7 +412,8 @@ PairDeepMD::PairDeepMD(LAMMPS *lmp)
   eps_v = 0.;
   scale = NULL;
   do_ttm = false;
-  do_compute = false;
+  do_compute_fparam = false;
+  do_compute_aparam = false;
   single_model = false;
   multi_models_mod_devi = false;
   multi_models_no_mod_devi = false;
@@ -492,8 +526,10 @@ void PairDeepMD::compute(int eflag, int vflag) {
     }
   }
 
-  // uniform aparam
-  if (aparam.size() > 0) {
+  if (do_compute_aparam) {
+    make_aparam_from_compute(daparam);
+  } else if (aparam.size() > 0) {
+    // uniform aparam
     make_uniform_aparam(daparam, aparam, nlocal);
   } else if (do_ttm) {
 #ifdef USE_TTM
@@ -505,7 +541,7 @@ void PairDeepMD::compute(int eflag, int vflag) {
 #endif
   }
 
-  if (do_compute) {
+  if (do_compute_fparam) {
     make_fparam_from_compute(fparam);
   }
 
@@ -884,6 +920,7 @@ static bool is_key(const string &input) {
   keys.push_back("fparam");
   keys.push_back("aparam");
   keys.push_back("fparam_from_compute");
+  keys.push_back("aparam_from_compute");
   keys.push_back("ttm");
   keys.push_back("atomic");
   keys.push_back("relative");
@@ -1019,15 +1056,24 @@ void PairDeepMD::settings(int narg, char **arg) {
         if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
           error->all(FLERR,
                      "invalid fparam_from_compute key: should be "
-                     "fparam_from_compute compute_id(str)");
+                     "fparam_from_compute compute_fparam_id(str)");
         }
       }
-      do_compute = true;
-      compute_id = arg[iarg + 1];
+      do_compute_fparam = true;
+      compute_fparam_id = arg[iarg + 1];
       iarg += 1 + 1;
-    }
-
-    else if (string(arg[iarg]) == string("atomic")) {
+    } else if (string(arg[iarg]) == string("aparam_from_compute")) {
+      for (int ii = 0; ii < 1; ++ii) {
+        if (iarg + 1 + ii >= narg || is_key(arg[iarg + 1 + ii])) {
+          error->all(FLERR,
+                     "invalid aparam_from_compute key: should be "
+                     "aparam_from_compute compute_aparam_id(str)");
+        }
+      }
+      do_compute_aparam = true;
+      compute_aparam_id = arg[iarg + 1];
+      iarg += 1 + 1;
+    } else if (string(arg[iarg]) == string("atomic")) {
       out_each = 1;
       iarg += 1;
     } else if (string(arg[iarg]) == string("relative")) {
@@ -1056,10 +1102,12 @@ void PairDeepMD::settings(int narg, char **arg) {
   if (out_freq < 0) {
     error->all(FLERR, "Illegal out_freq, should be >= 0");
   }
-  if (do_ttm && aparam.size() > 0) {
-    error->all(FLERR, "aparam and ttm should NOT be set simultaneously");
+  if ((int)do_ttm + (int)do_compute_aparam + (int)(aparam.size() > 0) > 1) {
+    error->all(FLERR,
+               "aparam, aparam_from_compute, and ttm should NOT be set "
+               "simultaneously");
   }
-  if (do_compute && fparam.size() > 0) {
+  if (do_compute_fparam && fparam.size() > 0) {
     error->all(
         FLERR,
         "fparam and fparam_from_compute should NOT be set simultaneously");
@@ -1104,9 +1152,13 @@ void PairDeepMD::settings(int narg, char **arg) {
       }
       cout << endl;
     }
-    if (do_compute) {
-      cout << pre << "using compute id:      ";
-      cout << compute_id << "  " << endl;
+    if (do_compute_fparam) {
+      cout << pre << "using compute id (fparam):      ";
+      cout << compute_fparam_id << "  " << endl;
+    }
+    if (do_compute_aparam) {
+      cout << pre << "using compute id (aparam):      ";
+      cout << compute_aparam_id << "  " << endl;
     }
     if (aparam.size() > 0) {
       cout << pre << "using aparam(s):    ";
diff --git a/source/lmp/pair_deepmd.h b/source/lmp/pair_deepmd.h
index bde7745d36..0f704ab45c 100644
--- a/source/lmp/pair_deepmd.h
+++ b/source/lmp/pair_deepmd.h
@@ -121,8 +121,11 @@ class PairDeepMD : public Pair {
   double eps_v;
 
   void make_fparam_from_compute(std::vector<double> &fparam);
-  bool do_compute;
-  std::string compute_id;
+  bool do_compute_fparam;
+  std::string compute_fparam_id;
+  void make_aparam_from_compute(std::vector<double> &aparam);
+  bool do_compute_aparam;
+  std::string compute_aparam_id;
 
   void make_ttm_fparam(std::vector<double> &fparam);
 
diff --git a/source/op/map_aparam.cc b/source/op/map_aparam.cc
index 0dc8a3743e..d0ff08032d 100644
--- a/source/op/map_aparam.cc
+++ b/source/op/map_aparam.cc
@@ -49,8 +49,8 @@ class MapAparamOp : public OpKernel {
     int nframes = aparam_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nall = natoms(1);
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
-    int numb_aparam = aparam_tensor.shape().dim_size(1) / nall;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
+    int numb_aparam = nall > 0 ? aparam_tensor.shape().dim_size(1) / nall : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == nlist_tensor.shape().dim_size(0)),
diff --git a/source/op/pairwise.cc b/source/op/pairwise.cc
index d60bc3bccc..8ed140a14a 100644
--- a/source/op/pairwise.cc
+++ b/source/op/pairwise.cc
@@ -85,7 +85,7 @@ class PairwiseIdxOp : public OpKernel {
       nloc_qmmm.push_back(nloc_qmmm_ii);
       nghost_qm.push_back(nghost_qm_ii);
       nghost_qmmm.push_back(nghost_qmmm_ii);
-      nframes_qmmm.push_back(backward_qmmm_map.size() / nall);
+      nframes_qmmm.push_back(nall > 0 ? backward_qmmm_map.size() / nall : 0);
     }
     int max_nloc_qm = 1, max_nloc_qmmm = 1, max_nghost_qm = 0,
         max_nghost_qmmm = 0;
diff --git a/source/op/prod_force.cc b/source/op/prod_force.cc
index a2631b8c1d..d8ced591b9 100644
--- a/source/op/prod_force.cc
+++ b/source/op/prod_force.cc
@@ -58,8 +58,8 @@ class ProdForceOp : public OpKernel {
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nall = natoms(1);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
diff --git a/source/op/prod_force_grad.cc b/source/op/prod_force_grad.cc
index fed8616534..2d14022279 100644
--- a/source/op/prod_force_grad.cc
+++ b/source/op/prod_force_grad.cc
@@ -65,8 +65,8 @@ class ProdForceGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_force_grad_multi_device.cc b/source/op/prod_force_grad_multi_device.cc
index ffcd8f0b8b..bbcef6bd91 100644
--- a/source/op/prod_force_grad_multi_device.cc
+++ b/source/op/prod_force_grad_multi_device.cc
@@ -70,8 +70,8 @@ class ProdForceSeAGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
@@ -180,8 +180,8 @@ class ProdForceSeRGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_force_multi_device.cc b/source/op/prod_force_multi_device.cc
index 935b5b9f2f..3eaf005f9a 100644
--- a/source/op/prod_force_multi_device.cc
+++ b/source/op/prod_force_multi_device.cc
@@ -89,8 +89,8 @@ class ProdForceSeAOp : public OpKernel {
     int nloc = natoms[0];
     int nall = natoms[1];
     int nframes = net_deriv_tensor.shape().dim_size(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
@@ -187,8 +187,8 @@ class ProdForceSeROp : public OpKernel {
     int nloc = natoms[0];
     int nall = natoms[1];
     int nframes = net_deriv_tensor.shape().dim_size(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
diff --git a/source/op/prod_force_se_a_grad.cc b/source/op/prod_force_se_a_grad.cc
index 1c859158e0..21dd4fe00a 100644
--- a/source/op/prod_force_se_a_grad.cc
+++ b/source/op/prod_force_se_a_grad.cc
@@ -63,8 +63,8 @@ class ProdForceSeAGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_force_se_a_mask.cc b/source/op/prod_force_se_a_mask.cc
index c2270f4234..32fcf54a79 100644
--- a/source/op/prod_force_se_a_mask.cc
+++ b/source/op/prod_force_se_a_mask.cc
@@ -49,7 +49,7 @@ class ProdForceSeAMaskOp : public OpKernel {
     int nloc = total_atom_num;
     int nall = total_atom_num;
     int ndescrpt = nall * 4;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
diff --git a/source/op/prod_force_se_a_mask_grad.cc b/source/op/prod_force_se_a_mask_grad.cc
index 54eafecc19..6f841b1c7d 100644
--- a/source/op/prod_force_se_a_mask_grad.cc
+++ b/source/op/prod_force_se_a_mask_grad.cc
@@ -55,7 +55,7 @@ class ProdForceSeAMaskGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = total_atom_num;
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
     int nnei = total_atom_num;
 
     // check the sizes
diff --git a/source/op/prod_force_se_r_grad.cc b/source/op/prod_force_se_r_grad.cc
index fbaf667675..f0b4b18323 100644
--- a/source/op/prod_force_se_r_grad.cc
+++ b/source/op/prod_force_se_r_grad.cc
@@ -57,8 +57,8 @@ class ProdForceSeRGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_virial.cc b/source/op/prod_virial.cc
index dfac86b8cf..2719c6c670 100644
--- a/source/op/prod_virial.cc
+++ b/source/op/prod_virial.cc
@@ -63,8 +63,8 @@ class ProdVirialOp : public OpKernel {
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nall = natoms(1);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
diff --git a/source/op/prod_virial_grad.cc b/source/op/prod_virial_grad.cc
index 2c179a2793..b06e273453 100644
--- a/source/op/prod_virial_grad.cc
+++ b/source/op/prod_virial_grad.cc
@@ -70,8 +70,8 @@ class ProdVirialGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_virial_grad_multi_device.cc b/source/op/prod_virial_grad_multi_device.cc
index d3e7025e6e..215c26f184 100644
--- a/source/op/prod_virial_grad_multi_device.cc
+++ b/source/op/prod_virial_grad_multi_device.cc
@@ -76,8 +76,8 @@ class ProdVirialSeAGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
@@ -206,8 +206,8 @@ class ProdVirialSeRGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_virial_multi_device.cc b/source/op/prod_virial_multi_device.cc
index 445770e85a..23b312b797 100644
--- a/source/op/prod_virial_multi_device.cc
+++ b/source/op/prod_virial_multi_device.cc
@@ -70,9 +70,9 @@ class ProdVirialSeAOp : public OpKernel {
     const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
     int nframes = net_deriv_tensor.shape().dim_size(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
@@ -169,9 +169,9 @@ class ProdVirialSeROp : public OpKernel {
     const int* natoms = natoms_tensor.flat<int>().data();
     int nloc = natoms[0];
     int nall = natoms[1];
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
     int nframes = net_deriv_tensor.shape().dim_size(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
     // check the sizes
     OP_REQUIRES(context, (nframes == in_deriv_tensor.shape().dim_size(0)),
                 errors::InvalidArgument("number of samples should match"));
diff --git a/source/op/prod_virial_se_a_grad.cc b/source/op/prod_virial_se_a_grad.cc
index c914bfddaf..a22401d654 100644
--- a/source/op/prod_virial_se_a_grad.cc
+++ b/source/op/prod_virial_se_a_grad.cc
@@ -68,8 +68,8 @@ class ProdVirialSeAGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/prod_virial_se_r_grad.cc b/source/op/prod_virial_se_r_grad.cc
index 90e6588b99..b874c828df 100644
--- a/source/op/prod_virial_se_r_grad.cc
+++ b/source/op/prod_virial_se_r_grad.cc
@@ -62,8 +62,8 @@ class ProdVirialSeRGradOp : public OpKernel {
 
     int nframes = net_deriv_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int ndescrpt = net_deriv_tensor.shape().dim_size(1) / nloc;
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int ndescrpt = nloc > 0 ? net_deriv_tensor.shape().dim_size(1) / nloc : 0;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/soft_min_force.cc b/source/op/soft_min_force.cc
index 7db6936025..a2970f4c3a 100644
--- a/source/op/soft_min_force.cc
+++ b/source/op/soft_min_force.cc
@@ -54,7 +54,7 @@ class SoftMinForceOp : public OpKernel {
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nall = natoms(1);
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == sw_deriv_tensor.shape().dim_size(0)),
diff --git a/source/op/soft_min_force_grad.cc b/source/op/soft_min_force_grad.cc
index be53d74e26..752ad4f93d 100644
--- a/source/op/soft_min_force_grad.cc
+++ b/source/op/soft_min_force_grad.cc
@@ -62,7 +62,7 @@ class SoftMinForceGradOp : public OpKernel {
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/soft_min_virial.cc b/source/op/soft_min_virial.cc
index a37940f651..91a94e01c3 100644
--- a/source/op/soft_min_virial.cc
+++ b/source/op/soft_min_virial.cc
@@ -60,7 +60,7 @@ class SoftMinVirialOp : public OpKernel {
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
     int nall = natoms(1);
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == sw_deriv_tensor.shape().dim_size(0)),
diff --git a/source/op/soft_min_virial_grad.cc b/source/op/soft_min_virial_grad.cc
index ef8f026f23..bc9cb96a63 100644
--- a/source/op/soft_min_virial_grad.cc
+++ b/source/op/soft_min_virial_grad.cc
@@ -67,7 +67,7 @@ class SoftMinVirialGradOp : public OpKernel {
 
     int nframes = du_tensor.shape().dim_size(0);
     int nloc = natoms(0);
-    int nnei = nlist_tensor.shape().dim_size(1) / nloc;
+    int nnei = nloc > 0 ? nlist_tensor.shape().dim_size(1) / nloc : 0;
 
     // check the sizes
     OP_REQUIRES(context, (nframes == grad_shape.dim_size(0)),
diff --git a/source/op/unaggregated_grad.cc b/source/op/unaggregated_grad.cc
index 9a61a3bac9..cf645f6c21 100644
--- a/source/op/unaggregated_grad.cc
+++ b/source/op/unaggregated_grad.cc
@@ -168,7 +168,9 @@ struct UnaggregatedDyDxFunctor {
           accumulator += w[jj * width + ii] * dy_dx[kk * size + jj];
         }
         dz_drou *= accumulator;
-        dz_drou += dy_dx[kk * size + ii % size];
+        if (width == 2 * size || width == size) {
+          dz_drou += dy_dx[kk * size + ii % size];
+        }
         dz_dx[kk * width + ii] = dz_drou;
       }
     }
@@ -256,7 +258,9 @@ struct UnaggregatedDy2DxFunctor {
         dz_drou +=
             grad_grad(ybar[kk * width + ii], z[kk * width + ii], functype) *
             accumulator * accumulator;
-        dz_drou += dy2_dx[kk * size + ii % size];
+        if (width == 2 * size || width == size) {
+          dz_drou += dy2_dx[kk * size + ii % size];
+        }
         dz2_dx[kk * width + ii] = dz_drou;
       }
     }
diff --git a/source/tests/model_compression/input.json b/source/tests/model_compression/input.json
index be64bfb2cf..f750e564d2 100644
--- a/source/tests/model_compression/input.json
+++ b/source/tests/model_compression/input.json
@@ -13,10 +13,12 @@
       ],
       "rcut_smth": 0.50,
       "rcut": 6.00,
+      "_comment": "N2=2N1, N2=N1, and otherwise can be tested",
       "neuron": [
         4,
         8,
-        16
+        17,
+        17
       ],
       "resnet_dt": false,
       "axis_neuron": 16,
diff --git a/source/tests/test_deepmd_data_sys.py b/source/tests/test_deepmd_data_sys.py
index 54b75cbed2..abfa7d7e48 100644
--- a/source/tests/test_deepmd_data_sys.py
+++ b/source/tests/test_deepmd_data_sys.py
@@ -13,6 +13,7 @@
 )
 from deepmd.utils.data_system import (
     DeepmdDataSystem,
+    prob_sys_size_ext,
 )
 
 if GLOBAL_NP_FLOAT_PRECISION == np.float32:
@@ -310,7 +311,9 @@ def test_prob_sys_size_1(self):
         batch_size = 1
         test_size = 1
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
-        prob = ds._prob_sys_size_ext("prob_sys_size; 0:2:2; 2:4:8")
+        prob = prob_sys_size_ext(
+            "prob_sys_size; 0:2:2; 2:4:8", ds.get_nsystems(), ds.get_nbatches()
+        )
         self.assertAlmostEqual(np.sum(prob), 1)
         self.assertAlmostEqual(np.sum(prob[0:2]), 0.2)
         self.assertAlmostEqual(np.sum(prob[2:4]), 0.8)
@@ -332,7 +335,9 @@ def test_prob_sys_size_2(self):
         batch_size = 1
         test_size = 1
         ds = DeepmdDataSystem(self.sys_name, batch_size, test_size, 2.0)
-        prob = ds._prob_sys_size_ext("prob_sys_size; 1:2:0.4; 2:4:1.6")
+        prob = prob_sys_size_ext(
+            "prob_sys_size; 1:2:0.4; 2:4:1.6", ds.get_nsystems(), ds.get_nbatches()
+        )
         self.assertAlmostEqual(np.sum(prob), 1)
         self.assertAlmostEqual(np.sum(prob[1:2]), 0.2)
         self.assertAlmostEqual(np.sum(prob[2:4]), 0.8)
diff --git a/source/tests/test_deeppot_a.py b/source/tests/test_deeppot_a.py
index 006b391e49..1c6cdc4afc 100644
--- a/source/tests/test_deeppot_a.py
+++ b/source/tests/test_deeppot_a.py
@@ -13,6 +13,7 @@
 from deepmd.env import (
     GLOBAL_NP_FLOAT_PRECISION,
     MODEL_VERSION,
+    tf,
 )
 from deepmd.infer import (
     DeepPot,
@@ -302,6 +303,23 @@ def test_2frame_atm(self):
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
+    # TODO: needs to fix
+    @unittest.skipIf(tf.test.is_gpu_available(), reason="Segfault in GPUs")
+    def test_zero_input(self):
+        nframes = 1
+        ee, ff, vv = self.dp.eval(
+            np.zeros([nframes, 0, 3]), self.box, np.zeros([0]), atomic=False
+        )
+        # check shape of the returns
+        natoms = 0
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff.ravel(), 0, default_places)
+        np.testing.assert_almost_equal(ee.ravel(), 0, default_places)
+        np.testing.assert_almost_equal(vv.ravel(), 0, default_places)
+
 
 class TestDeepPotANoPBC(unittest.TestCase):
     @classmethod
@@ -727,7 +745,7 @@ def setUp(self):
 
     def test_convert_012(self):
         old_model = "deeppot.pb"
-        new_model = "deeppot.pbtxt"
+        new_model = "deeppot-new.pb"
         convert_pbtxt_to_pb(str(tests_path / "infer" / "sea_012.pbtxt"), old_model)
         run_dp(f"dp convert-from 0.12 -i {old_model} -o {new_model}")
         dp = DeepPot(new_model)
@@ -737,7 +755,7 @@ def test_convert_012(self):
 
     def test_convert(self):
         old_model = "deeppot.pb"
-        new_model = "deeppot.pbtxt"
+        new_model = "deeppot-new.pb"
         convert_pbtxt_to_pb(str(tests_path / "infer" / "sea_012.pbtxt"), old_model)
         run_dp(f"dp convert-from -i {old_model} -o {new_model}")
         dp = DeepPot(new_model)
diff --git a/source/tests/test_deeppot_r.py b/source/tests/test_deeppot_r.py
index c1746efa01..44c6e3c167 100644
--- a/source/tests/test_deeppot_r.py
+++ b/source/tests/test_deeppot_r.py
@@ -9,6 +9,7 @@
 
 from deepmd.env import (
     GLOBAL_NP_FLOAT_PRECISION,
+    tf,
 )
 from deepmd.infer import (
     DeepPot,
@@ -430,6 +431,23 @@ def test_2frame_atm(self):
         expected_sv = np.sum(expected_v.reshape([nframes, -1, 9]), axis=1)
         np.testing.assert_almost_equal(vv.ravel(), expected_sv.ravel(), default_places)
 
+    # TODO: needs to fix
+    @unittest.skipIf(tf.test.is_gpu_available(), reason="Segfault in GPUs")
+    def test_zero_input(self):
+        nframes = 1
+        ee, ff, vv = self.dp.eval(
+            np.zeros([nframes, 0, 3]), self.box, np.zeros([0]), atomic=False
+        )
+        # check shape of the returns
+        natoms = 0
+        self.assertEqual(ee.shape, (nframes, 1))
+        self.assertEqual(ff.shape, (nframes, natoms, 3))
+        self.assertEqual(vv.shape, (nframes, 9))
+        # check values
+        np.testing.assert_almost_equal(ff.ravel(), 0, default_places)
+        np.testing.assert_almost_equal(ee.ravel(), 0, default_places)
+        np.testing.assert_almost_equal(vv.ravel(), 0, default_places)
+
 
 class TestDeepPotRLargeBoxNoPBC(unittest.TestCase):
     @classmethod