Skip to content

Commit

Permalink
support extra FP outputs; add post_command for Gaussian (#256)
Browse files Browse the repository at this point in the history
<!-- This is an auto-generated comment: release notes by coderabbit.ai
-->
## Summary by CodeRabbit


- **New Features**
- Introduced a new optional argument for additional output files in
multiple functions, enhancing flexibility in file handling.
- Added the capability to execute a post-command after the main task in
the Gaussian workflow.
- Enhanced output structures across various components to accommodate
additional output data.

- **Bug Fixes**
- Improved the handling of extra output files in various methods to
ensure they are properly returned and logged.

- **Documentation**
- Updated documentation to clarify the purpose and usage of new
arguments related to extra output files and post-commands.

<!-- end of auto-generated comment: release notes by coderabbit.ai -->

---------

Signed-off-by: zjgemi <liuxin_zijian@163.com>
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
zjgemi and pre-commit-ci[bot] authored Aug 27, 2024
1 parent 899a76f commit 643e889
Show file tree
Hide file tree
Showing 13 changed files with 83 additions and 5 deletions.
8 changes: 8 additions & 0 deletions dpgen2/entrypoint/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,7 @@ def fp_args(inputs, run):
doc_inputs_config = "Configuration for preparing vasp inputs"
doc_run_config = "Configuration for running vasp tasks"
doc_task_max = "Maximum number of vasp tasks for each iteration"
doc_extra_output_files = "Extra output file names, support wildcards"

return [
Argument(
Expand All @@ -476,6 +477,13 @@ def fp_args(inputs, run):
doc=doc_run_config,
),
Argument("task_max", int, optional=True, default=10, doc=doc_task_max),
Argument(
"extra_output_files",
list,
optional=True,
default=[],
doc=doc_extra_output_files,
),
]


Expand Down
1 change: 1 addition & 0 deletions dpgen2/entrypoint/submit.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ def workflow_concurrent_learning(

fp_config["inputs"] = fp_inputs
fp_config["run"] = config["fp"]["run_config"]
fp_config["extra_output_files"] = config["fp"]["extra_output_files"]
if fp_style == "deepmd":
assert (
"teacher_model_path" in fp_config["run"]
Expand Down
6 changes: 6 additions & 0 deletions dpgen2/fp/abacus.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def get_output_sign(cls):
{
"log": Artifact(Path),
"labeled_data": Artifact(Path),
"extra_outputs": Artifact(List[Path]),
}
)

Expand Down Expand Up @@ -202,10 +203,15 @@ def execute(
out_name = fp_default_out_data_name
sys.to("deepmd/npy", workdir / out_name)

extra_outputs = []
for fname in ip["config"]["extra_output_files"]:
extra_outputs += list(workdir.glob(fname))

return OPIO(
{
"log": workdir / "log",
"labeled_data": workdir / out_name,
"extra_outputs": extra_outputs,
}
)

Expand Down
6 changes: 6 additions & 0 deletions dpgen2/fp/cp2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def get_output_sign(cls):
{
"log": Artifact(Path),
"labeled_data": Artifact(Path),
"extra_outputs": Artifact(List[Path]),
}
)

Expand Down Expand Up @@ -170,10 +171,15 @@ def execute(
out_name = fp_default_out_data_name
sys.to("deepmd/npy", workdir / out_name)

extra_outputs = []
for fname in ip["config"]["extra_output_files"]:
extra_outputs += list(workdir.glob(fname))

return OPIO(
{
"log": workdir / "output.log",
"labeled_data": workdir / out_name,
"extra_outputs": extra_outputs,
}
)

Expand Down
23 changes: 23 additions & 0 deletions dpgen2/fp/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import (
Any,
List,
Optional,
Tuple,
)

Expand Down Expand Up @@ -134,6 +135,7 @@ def run_task(
self,
command: str,
out: str,
post_command: Optional[str] = None,
) -> Tuple[str, str]:
r"""Defines how one FP task runs
Expand Down Expand Up @@ -170,6 +172,23 @@ def run_task(
)
)
raise TransientError("gaussian failed")
if post_command is not None:
ret, out, err = run_command(post_command, shell=True)
if ret != 0:
logging.error(
"".join(
(
"gaussian postprocessing failed\n",
"out msg: ",
out,
"\n",
"err msg: ",
err,
"\n",
)
)
)
raise TransientError("gaussian postprocessing failed")
# convert the output to deepmd/npy format
sys = dpdata.LabeledSystem(gaussian_output_name, fmt="gaussian/log")
sys.to("deepmd/npy", out_name)
Expand All @@ -187,6 +206,7 @@ def args() -> List[dargs.Argument]:

doc_gaussian_cmd = "The command of Gaussian"
doc_gaussian_out = "The output dir name of labeled data. In `deepmd/npy` format provided by `dpdata`."
doc_post_command = "The command after Gaussian"
return [
Argument(
"command", str, optional=True, default="g16", doc=doc_gaussian_cmd
Expand All @@ -198,4 +218,7 @@ def args() -> List[dargs.Argument]:
default=fp_default_out_data_name,
doc=doc_gaussian_out,
),
Argument(
"post_command", str, optional=True, default=None, doc=doc_post_command
),
]
6 changes: 6 additions & 0 deletions dpgen2/fp/run_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ def get_output_sign(cls):
{
"log": Artifact(Path),
"labeled_data": Artifact(Path),
"extra_outputs": Artifact(List[Path]),
}
)

Expand Down Expand Up @@ -196,9 +197,14 @@ def execute(
Path(iname).symlink_to(ii)
out_name, log_name = self.run_task(**config)

extra_outputs = []
for fname in ip["config"]["extra_output_files"]:
extra_outputs += list(work_dir.glob(fname))

return OPIO(
{
"log": work_dir / log_name,
"labeled_data": work_dir / out_name,
"extra_outputs": extra_outputs,
}
)
6 changes: 5 additions & 1 deletion dpgen2/superop/prep_run_fp.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def __init__(
self._output_artifacts = {
"logs": OutputArtifact(),
"labeled_data": OutputArtifact(),
"extra_outputs": OutputArtifact(),
}

super().__init__(
Expand Down Expand Up @@ -170,7 +171,7 @@ def _prep_run_fp(
"int('{{item}}')",
input_parameter=["task_name"],
input_artifact=["task_path"],
output_artifact=["log", "labeled_data"],
output_artifact=["log", "labeled_data", "extra_outputs"],
**template_slice_config,
),
python_packages=upload_python_packages,
Expand Down Expand Up @@ -200,5 +201,8 @@ def _prep_run_fp(
prep_run_steps.outputs.artifacts["labeled_data"]._from = run_fp.outputs.artifacts[
"labeled_data"
]
prep_run_steps.outputs.artifacts["extra_outputs"]._from = run_fp.outputs.artifacts[
"extra_outputs"
]

return prep_run_steps
3 changes: 2 additions & 1 deletion dpgen2/utils/download_dpgen2_artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ def add_output(
"prep-run-fp": DownloadDefinition()
.add_input("confs")
.add_output("logs")
.add_output("labeled_data"),
.add_output("labeled_data")
.add_output("extra_outputs"),
"collect-data": DownloadDefinition().add_output("iter_data"),
}

Expand Down
1 change: 1 addition & 0 deletions tests/fp/test_abacus.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def test_abacus(self):
"command": "cp -r %s OUT.ABACUS && cat %s"
% (data_path / "OUT.ABACUS", data_path / "log"),
},
"extra_output_files": [],
}
confs = [data_path / "sys-2"]
type_map = ["Na"]
Expand Down
1 change: 1 addition & 0 deletions tests/fp/test_cp2k.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def test_cp2k(self):
"command": "cp -r %s output.log && cat %s"
% (data_path / "output.log", data_path / "output.log"),
},
"extra_output_files": [],
}
confs = [data_path / "sys-3"]
type_map = ["Na"]
Expand Down
9 changes: 6 additions & 3 deletions tests/fp/test_run_vasp.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,8 @@ def new_init(obj, foo):
"command": "myvasp",
"log": "foo.log",
"out": "data",
}
},
"extra_output_files": [],
},
"task_name": self.task_name,
"task_path": self.task_path,
Expand Down Expand Up @@ -128,7 +129,8 @@ def new_init(obj, foo):
"config": {
"run": {
"command": "myvasp",
}
},
"extra_output_files": [],
},
"task_name": self.task_name,
"task_path": self.task_path,
Expand Down Expand Up @@ -163,7 +165,8 @@ def test_error(self, mocked_run):
"config": {
"run": {
"command": "myvasp",
}
},
"extra_output_files": [],
},
"task_name": self.task_name,
"task_path": self.task_path,
Expand Down
3 changes: 3 additions & 0 deletions tests/mocked_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ def execute(
{
"log": work_dir / log,
"labeled_data": work_dir / labeled_data,
"extra_outputs": [],
}
)

Expand Down Expand Up @@ -578,6 +579,7 @@ def execute(
{
"log": work_dir / log,
"labeled_data": work_dir / labeled_data,
"extra_outputs": [],
}
)

Expand Down Expand Up @@ -633,6 +635,7 @@ def execute(
{
"log": work_dir / log,
"labeled_data": work_dir / labeled_data,
"extra_outputs": [],
}
)

Expand Down
15 changes: 15 additions & 0 deletions tests/utils/test_dl_dpgen2_arti.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,11 @@ def test_fp_download(self, mocked_dl):
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
mock.call(
"arti-extra_outputs",
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
]
self.assertEqual(len(mocked_dl.call_args_list), len(expected))
for ii, jj in zip(mocked_dl.call_args_list, expected):
Expand Down Expand Up @@ -174,6 +179,11 @@ def test_fp_download_chkpnt(self, mocked_dl):
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
mock.call(
"arti-extra_outputs",
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
]
self.assertEqual(len(mocked_dl.call_args_list), len(expected))
for ii, jj in zip(mocked_dl.call_args_list, expected):
Expand All @@ -200,6 +210,11 @@ def test_fp_download_chkpnt(self, mocked_dl):
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
mock.call(
"arti-extra_outputs",
path=Path("iter-000001/prep-run-fp/outputs"),
skip_exists=True,
),
]
self.assertEqual(len(mocked_dl.call_args_list), len(expected))
for ii, jj in zip(mocked_dl.call_args_list, expected):
Expand Down

0 comments on commit 643e889

Please sign in to comment.