From 5d428bd25e4a9780bb3a3802eacf23728883a0f1 Mon Sep 17 00:00:00 2001
From: Han Wang <wang_han@iapcm.ac.cn>
Date: Thu, 12 Oct 2023 00:13:34 +0800
Subject: [PATCH] fix out-dated docs. add explanation for configrations and
 convergence methods

---
 docs/index.rst                                |  2 +-
 docs/input.md                                 | 16 +++--
 docs/submit_args.rst                          |  8 +++
 dpgen2/conf/alloy_conf.py                     | 17 ++++-
 dpgen2/conf/file_conf.py                      |  4 ++
 dpgen2/entrypoint/args.py                     | 70 +++++++++++++------
 .../report/report_adaptive_lower.py           | 21 ++++++
 .../report/report_trust_levels_max.py         | 18 +++++
 .../report/report_trust_levels_random.py      | 16 +++++
 examples/almg/input.json                      |  6 +-
 10 files changed, 142 insertions(+), 36 deletions(-)

diff --git a/docs/index.rst b/docs/index.rst
index 81b8869a..4e549ed1 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -16,7 +16,7 @@ DPGEN2 is the 2nd generation of the Deep Potential GENerator.
    cli
    input.md
    submit_args
-   dpgen2_configs
+..   dpgen2_configs
 
 .. _developer-guide:
 
diff --git a/docs/input.md b/docs/input.md
index 4e1bfdc7..148d293b 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -51,9 +51,7 @@ This section defines how a model is trained.
 	"type" : "dp",
 	"numb_models" : 4,
 	"config" : {},
-	"template_script" : {
-		"_comment" : "omitted content of tempalte script"
-	},
+	"template_script" : "/path/to/the/template/input.json",
 	"_comment" : "all"
 }
 ```
@@ -71,11 +69,15 @@ This section defines how the configuration space is explored.
 	"config" : {
 		"command": "lmp -var restart 0"
 	},
+	"convergence": {
+	    "type" :	"fixed-levels",
+	    "conv_accuracy" :	0.9,
+	    "level_f_lo":	0.05,
+	    "level_f_hi":	0.50,
+	    "_comment" : "all"
+	},
 	"max_numb_iter" :	5,
-	"conv_accuracy" :	0.9,
 	"fatal_at_max" :	false,
-	"f_trust_lo":		0.05,
-	"f_trust_hi":		0.50,
 	"configurations":	[
 		{
 		"lattice" : ["fcc", 4.57],
@@ -124,7 +126,7 @@ The `"configurations"` provides the initial configurations (coordinates of atoms
 - `list[str]`: The strings provides the path to the configuration files.
 - `dict`: Automatic alloy configuration generator. See [the detailed doc](alloy_configs) of the allowed keys.
 
-The `"stages"` defines the exploration stages. It is of type `list[list[dict]]`. The outer `list` enumerate the exploration stages, the inner list enumerate the task groups of the stage. Each `dict` defines a stage. See [the full documentation of the target group](task_group_configs) for writting task groups.
+The `"stages"` defines the exploration stages. It is of type `list[list[dict]]`. The outer `list` enumerate the exploration stages, the inner list enumerate the task groups of the stage. Each `dict` defines a stage. See [the full documentation of the task group](task_group_configs) for writting task groups.
 
 `"n_sample"` tells the number of confgiruations randomly sampled from the set picked by `"conf_idx"` from `configurations` for each exploration task. All configurations has the equal possibility to be sampled. The default value of `"n_sample"` is `null`, in this case all picked configurations are sampled. In the example, we have 3 samples for stage 0 task group 0 and 2 thermodynamic states (NVT, T=50 and 100K), then the task group has 3x2=6 NVT DPMD tasks.
 
diff --git a/docs/submit_args.rst b/docs/submit_args.rst
index 5cbc5503..214ec244 100644
--- a/docs/submit_args.rst
+++ b/docs/submit_args.rst
@@ -5,3 +5,11 @@ Arguments of the submit script
 .. dargs::
    :module: dpgen2.entrypoint.args
    :func: submit_args
+
+
+Task group definition
+------------------
+.. _task_group_configs:
+.. dargs::
+   :module: dpgen2.exploration.task
+   :func: task_group_args
diff --git a/dpgen2/conf/alloy_conf.py b/dpgen2/conf/alloy_conf.py
index 32fa203f..0c56e35c 100644
--- a/dpgen2/conf/alloy_conf.py
+++ b/dpgen2/conf/alloy_conf.py
@@ -95,12 +95,25 @@ def generate(
             ms.append(ss)
         return ms
 
+    @staticmethod
+    def doc() -> str:
+        from dpgen2.entrypoint.args import (
+            make_link,
+        )
+
+        return f"Generate alloys with {make_link('a certain lattice or user proided structure', 'explore[lmp]/configurations[alloy]/lattice')}, the elements randomly occuping the lattice with {make_link('user provided probability', 'explore[lmp]/configurations[alloy]/concentration')} ."
+
     @staticmethod
     def args() -> List[Argument]:
+        from dpgen2.entrypoint.args import (
+            make_link,
+        )
+
+        link_to_type_map = make_link("type_map", "inputs/type_map")
         doc_numb_confs = "The number of configurations to generate"
-        doc_lattice = 'The lattice. Should be a list providing [ "lattice_type", lattice_const ], or a list providing [ "/path/to/dpdata/system", "fmt" ]. The two styles are distinguished by the type of the second element.'
+        doc_lattice = 'The lattice. Should be a list providing [ "lattice_type", lattice_const ], or a list providing [ "/path/to/dpdata/system", "fmt" ]. The two styles are distinguished by the type of the second element. Currently "lattice_type" can be "bcc", "fcc", "hcp", "sc" or "diamond".'
         doc_replicate = "The number of replicates in each direction"
-        doc_concentration = "The concentration of each element. If None all elements have the same concentration"
+        doc_concentration = f"The concentration of each element. `List[List[float]]` or `List[float]` or `None`. If `List[float]`, the concentrations of each element. The length of the list should be the same as the {link_to_type_map}. If `List[List[float]]`, a list of concentrations (`List[float]`) is randomly picked from the List. If `None`, the elements are assumed to be of equal concentration."
         doc_cell_pert_frac = "The faction of cell perturbation"
         doc_atom_pert_dist = "The distance of atomic position perturbation"
 
diff --git a/dpgen2/conf/file_conf.py b/dpgen2/conf/file_conf.py
index a9614000..e8ae2443 100644
--- a/dpgen2/conf/file_conf.py
+++ b/dpgen2/conf/file_conf.py
@@ -81,6 +81,10 @@ def generate_mixed(
         ms.from_deepmd_npy_mixed(self.files[0], fmt="deepmd/npy/mixed", labeled=False)  # type: ignore
         return ms
 
+    @staticmethod
+    def doc() -> str:
+        return "Generate alloys from user provided file(s). The file(s) are assume to be load by `dpdata`."
+
     @staticmethod
     def args() -> List[Argument]:
         doc_files = "The paths to the configuration files. widecards are supported."
diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py
index fdffa9d7..5590176e 100644
--- a/dpgen2/entrypoint/args.py
+++ b/dpgen2/entrypoint/args.py
@@ -31,6 +31,16 @@
 )
 
 
+def make_link(content, ref_key):
+    try:
+        raw_anchor = dargs.RAW_ANCHOR
+    except AttributeError:
+        raw_anchor = dargs.dargs.RAW_ANCHOR
+    return (
+        f"`{content} <{ref_key}_>`_" if not raw_anchor else f"`{content} <#{ref_key}>`_"
+    )
+
+
 def dp_dist_train_args():
     doc_config = "Configuration of training"
     doc_template_script = "File names of the template training script. It can be a `List[str]`, the length of which is the same as `numb_models`. Each template script in the list is used to train a model. Can be a `str`, the models share the same template training script. "
@@ -97,10 +107,17 @@ def variant_train():
 
 
 def variant_conv():
-    doc = "the type of the convergence check"
+    doc = "the type of the condidate selection and convergence check method."
     var_list = []
     for kk in conv_styles.keys():
-        var_list.append(Argument(kk, dict, conv_styles[kk].args()))
+        var_list.append(
+            Argument(
+                kk,
+                dict,
+                conv_styles[kk].args(),
+                doc=conv_styles[kk].doc(),
+            )
+        )
     return Variant(
         "type",
         var_list,
@@ -109,10 +126,17 @@ def variant_conv():
 
 
 def variant_conf():
-    doc = "the type of the configuration generator"
+    doc = "the type of the initial configuration generator."
     var_list = []
     for kk in conf_styles.keys():
-        var_list.append(Argument(kk, dict, conf_styles[kk].args()))
+        var_list.append(
+            Argument(
+                kk,
+                dict,
+                conf_styles[kk].args(),
+                doc=conf_styles[kk].doc(),
+            )
+        )
     return Variant(
         "type",
         var_list,
@@ -130,7 +154,12 @@ def lmp_args():
     doc_convergence = "The method of convergence check."
     doc_configuration_prefix = "The path prefix of lmp initial configurations"
     doc_configuration = "A list of initial configurations."
-    doc_stages = "A list of exploration stages."
+    doc_stages = (
+        "The definition of exploration stages of type `List[List[ExplorationTaskGroup]`. "
+        "The outer list provides the enumeration of the exploration stages. "
+        "Then each stage is defined by a list of exploration task groups. "
+        "The definition of each task group is described in :ref:`Task group definition` "
+    )
 
     return [
         Argument(
@@ -158,13 +187,6 @@ def lmp_args():
             optional=False,
             doc=doc_convergence,
         ),
-        Argument(
-            "configuration_prefix",
-            str,
-            optional=True,
-            default=None,
-            doc=doc_configuration_prefix,
-        ),
         Argument(
             "configurations",
             list,
@@ -180,11 +202,12 @@ def lmp_args():
 
 
 def variant_explore():
-    doc = "the type of the exploration"
+    doc = "The type of the exploration"
+    doc_lmp = "The exploration by LAMMPS simulations"
     return Variant(
         "type",
         [
-            Argument("lmp", dict, lmp_args()),
+            Argument("lmp", dict, lmp_args(), doc=doc_lmp),
         ],
         doc=doc,
     )
@@ -234,15 +257,16 @@ def input_args():
     doc_type_map = 'The type map. e.g. ["Al", "Mg"]. Al and Mg will have type 0 and 1, respectively.'
     doc_mass_map = "The mass map. e.g. [27., 24.]. Al and Mg will be set with mass 27. and 24. amu, respectively."
     doc_mixed_type = "Use `deepmd/npy/mixed` format for storing training data."
-    doc_do_finetune = """Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step,
-                       which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the
-                       finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain"
-                       is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify
-                       the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init,"
-                       which means we use --init-frz-model to train based on models from the previous iteration. The "do_finetune" flag
-                       is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related
-                       to finetuning will not be done.
-                       """
+    doc_do_finetune = (
+        "Finetune the pretrained model before the first iteration. If it is set to True, then an additional step, finetune-step, "
+        'which is based on a branch of "PrepRunDPTrain," will be added before the dpgen_step. In the '
+        'finetune-step, the internal flag finetune_mode is set to "finetune," which means SuperOP "PrepRunDPTrain" '
+        'is now used as the "Finetune." In this step, we finetune the pretrained model in the train step and modify '
+        'the template after training. After that, in the normal dpgen-step, the flag do_finetune is set as "train-init," '
+        'which means we use `--init-frz-model` to train based on models from the previous iteration. The "do_finetune" flag '
+        'is set to False by default, while the internal flag finetune_mode is set to "no," which means anything related '
+        "to finetuning will not be done."
+    )
     doc_do_finetune = textwrap.dedent(doc_do_finetune)
     doc_init_data_prefix = "The prefix of initial data systems"
     doc_init_sys = "The inital data systems"
diff --git a/dpgen2/exploration/report/report_adaptive_lower.py b/dpgen2/exploration/report/report_adaptive_lower.py
index 3f78c390..49dd9dd2 100644
--- a/dpgen2/exploration/report/report_adaptive_lower.py
+++ b/dpgen2/exploration/report/report_adaptive_lower.py
@@ -128,6 +128,27 @@ def __init__(
         self.fmt_flt = "%.4f"
         self.header_str = "#" + self.fmt_str % print_tuple
 
+    @staticmethod
+    def doc() -> str:
+        def make_class_doc_link(key):
+            from dpgen2.entrypoint.args import (
+                make_link,
+            )
+
+            return make_link(key, f"explore[lmp]/convergence[adaptive-lower]/{key}")
+
+        numb_candi_f_link = make_class_doc_link("numb_candi_f")
+        rate_candi_f_link = make_class_doc_link("rate_candi_f")
+        numb_candi_v_link = make_class_doc_link("numb_candi_v")
+        rate_candi_v_link = make_class_doc_link("rate_candi_v")
+        numb_candi_s = f"{numb_candi_f_link} or {numb_candi_v_link}"
+        rate_candi_s = f"{rate_candi_f_link} or {rate_candi_v_link}"
+        level_f_hi_link = make_class_doc_link("level_f_hi")
+        level_v_hi_link = make_class_doc_link("level_v_hi")
+        conv_tolerance_link = make_class_doc_link("conv_tolerance")
+        n_checked_steps_link = make_class_doc_link("n_checked_steps")
+        return f"The method of adaptive adjust the lower trust levels. In each step of iterations, a number (set by {numb_candi_s}) or a ratio (set by {rate_candi_s}) of configurations with a model deviation lower than the higher trust level ({level_f_hi_link}, {level_v_hi_link}) are treated as candidates. The lowest model deviation of the candidates are treated as the lower trust level. If the lower trust level does not change significant (controlled by {conv_tolerance_link}) in {n_checked_steps_link}, the stage is treated as converged. "
+
     @staticmethod
     def args() -> List[Argument]:
         doc_level_f_hi = "The higher trust level of force model deviation"
diff --git a/dpgen2/exploration/report/report_trust_levels_max.py b/dpgen2/exploration/report/report_trust_levels_max.py
index 84c97bb7..636572e2 100644
--- a/dpgen2/exploration/report/report_trust_levels_max.py
+++ b/dpgen2/exploration/report/report_trust_levels_max.py
@@ -89,3 +89,21 @@ def _get_candidates(
         else:
             ret = self.traj_cand_picked
         return ret
+
+    @staticmethod
+    def doc() -> str:
+        def make_class_doc_link(key):
+            from dpgen2.entrypoint.args import (
+                make_link,
+            )
+
+            return make_link(
+                key, f"explore[lmp]/convergence[fixed-levels-max-select]/{key}"
+            )
+
+        level_f_hi_link = make_class_doc_link("level_f_hi")
+        level_v_hi_link = make_class_doc_link("level_v_hi")
+        level_f_lo_link = make_class_doc_link("level_f_lo")
+        level_v_lo_link = make_class_doc_link("level_v_lo")
+        conv_accuracy_link = make_class_doc_link("conv_accuracy")
+        return f"The configurations with force model deviation between {level_f_lo_link}, {level_f_hi_link} or virial model deviation between {level_v_lo_link} and {level_v_hi_link} are treated as candidates (The virial model deviation check is optional). The configurations with maximal model deviation in the candidates are sent for FP calculations. If the ratio of accurate (below {level_f_lo_link} and {level_v_lo_link}) is higher then {conv_accuracy_link}, the stage is treated as converged."
diff --git a/dpgen2/exploration/report/report_trust_levels_random.py b/dpgen2/exploration/report/report_trust_levels_random.py
index f668c258..fb69c46c 100644
--- a/dpgen2/exploration/report/report_trust_levels_random.py
+++ b/dpgen2/exploration/report/report_trust_levels_random.py
@@ -83,3 +83,19 @@ def _get_candidates(
         else:
             ret = self.traj_cand_picked
         return ret
+
+    @staticmethod
+    def doc() -> str:
+        def make_class_doc_link(key):
+            from dpgen2.entrypoint.args import (
+                make_link,
+            )
+
+            return make_link(key, f"explore[lmp]/convergence[fixed-levels]/{key}")
+
+        level_f_hi_link = make_class_doc_link("level_f_hi")
+        level_v_hi_link = make_class_doc_link("level_v_hi")
+        level_f_lo_link = make_class_doc_link("level_f_lo")
+        level_v_lo_link = make_class_doc_link("level_v_lo")
+        conv_accuracy_link = make_class_doc_link("conv_accuracy")
+        return f"The configurations with force model deviation between {level_f_lo_link}, {level_f_hi_link} or virial model deviation between {level_v_lo_link} and {level_v_hi_link} are treated as candidates (The virial model deviation check is optional). The configurations will be randomly sampled from candidates for FP calculations. If the ratio of accurate (below {level_f_lo_link} and {level_v_lo_link}) is higher then {conv_accuracy_link}, the stage is treated as converged."
diff --git a/examples/almg/input.json b/examples/almg/input.json
index 338ca92d..6f883e1d 100644
--- a/examples/almg/input.json
+++ b/examples/almg/input.json
@@ -127,9 +127,10 @@
     "train":{
 	"type" :	"dp",
 	"numb_models" : 4,
-	"config" : {},
-	"template_script" : {
+	"config" : {
+	    "init_model_policy" : "no"
 	},
+	"template_script" : "dp_template.json",
 	"_comment" : "all"
     },
 
@@ -148,7 +149,6 @@
 	"max_numb_iter" :	5,
 	"fatal_at_max" :	false,
 	"output_nopbc":		false,
-	"configuration_prefix": null,
 	"configurations":	[
 	    {
 		"type": "alloy",