From 643e88988e3bf487d348c039bee74b792cf8496a Mon Sep 17 00:00:00 2001 From: Xinzijian Liu Date: Tue, 27 Aug 2024 11:24:42 +0800 Subject: [PATCH] support extra FP outputs; add post_command for Gaussian (#256) ## Summary by CodeRabbit - **New Features** - Introduced a new optional argument for additional output files in multiple functions, enhancing flexibility in file handling. - Added the capability to execute a post-command after the main task in the Gaussian workflow. - Enhanced output structures across various components to accommodate additional output data. - **Bug Fixes** - Improved the handling of extra output files in various methods to ensure they are properly returned and logged. - **Documentation** - Updated documentation to clarify the purpose and usage of new arguments related to extra output files and post-commands. --------- Signed-off-by: zjgemi Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- dpgen2/entrypoint/args.py | 8 ++++++++ dpgen2/entrypoint/submit.py | 1 + dpgen2/fp/abacus.py | 6 ++++++ dpgen2/fp/cp2k.py | 6 ++++++ dpgen2/fp/gaussian.py | 23 +++++++++++++++++++++++ dpgen2/fp/run_fp.py | 6 ++++++ dpgen2/superop/prep_run_fp.py | 6 +++++- dpgen2/utils/download_dpgen2_artifacts.py | 3 ++- tests/fp/test_abacus.py | 1 + tests/fp/test_cp2k.py | 1 + tests/fp/test_run_vasp.py | 9 ++++++--- tests/mocked_ops.py | 3 +++ tests/utils/test_dl_dpgen2_arti.py | 15 +++++++++++++++ 13 files changed, 83 insertions(+), 5 deletions(-) diff --git a/dpgen2/entrypoint/args.py b/dpgen2/entrypoint/args.py index 45504127..3445e42e 100644 --- a/dpgen2/entrypoint/args.py +++ b/dpgen2/entrypoint/args.py @@ -459,6 +459,7 @@ def fp_args(inputs, run): doc_inputs_config = "Configuration for preparing vasp inputs" doc_run_config = "Configuration for running vasp tasks" doc_task_max = "Maximum number of vasp tasks for each iteration" + doc_extra_output_files = "Extra output file names, support wildcards" return [ Argument( @@ -476,6 +477,13 @@ def fp_args(inputs, run): doc=doc_run_config, ), Argument("task_max", int, optional=True, default=10, doc=doc_task_max), + Argument( + "extra_output_files", + list, + optional=True, + default=[], + doc=doc_extra_output_files, + ), ] diff --git a/dpgen2/entrypoint/submit.py b/dpgen2/entrypoint/submit.py index 35242dfe..3f1b2825 100644 --- a/dpgen2/entrypoint/submit.py +++ b/dpgen2/entrypoint/submit.py @@ -564,6 +564,7 @@ def workflow_concurrent_learning( fp_config["inputs"] = fp_inputs fp_config["run"] = config["fp"]["run_config"] + fp_config["extra_output_files"] = config["fp"]["extra_output_files"] if fp_style == "deepmd": assert ( "teacher_model_path" in fp_config["run"] diff --git a/dpgen2/fp/abacus.py b/dpgen2/fp/abacus.py index ae5594dd..28769b01 100644 --- a/dpgen2/fp/abacus.py +++ b/dpgen2/fp/abacus.py @@ -166,6 +166,7 @@ def get_output_sign(cls): { "log": Artifact(Path), "labeled_data": Artifact(Path), + "extra_outputs": Artifact(List[Path]), } ) @@ -202,10 +203,15 @@ def execute( out_name = fp_default_out_data_name sys.to("deepmd/npy", workdir / out_name) + extra_outputs = [] + for fname in ip["config"]["extra_output_files"]: + extra_outputs += list(workdir.glob(fname)) + return OPIO( { "log": workdir / "log", "labeled_data": workdir / out_name, + "extra_outputs": extra_outputs, } ) diff --git a/dpgen2/fp/cp2k.py b/dpgen2/fp/cp2k.py index 276e5ddd..67ed5d55 100644 --- a/dpgen2/fp/cp2k.py +++ b/dpgen2/fp/cp2k.py @@ -126,6 +126,7 @@ def get_output_sign(cls): { "log": Artifact(Path), "labeled_data": Artifact(Path), + "extra_outputs": Artifact(List[Path]), } ) @@ -170,10 +171,15 @@ def execute( out_name = fp_default_out_data_name sys.to("deepmd/npy", workdir / out_name) + extra_outputs = [] + for fname in ip["config"]["extra_output_files"]: + extra_outputs += list(workdir.glob(fname)) + return OPIO( { "log": workdir / "output.log", "labeled_data": workdir / out_name, + "extra_outputs": extra_outputs, } ) diff --git a/dpgen2/fp/gaussian.py b/dpgen2/fp/gaussian.py index 99a96579..b6aba200 100644 --- a/dpgen2/fp/gaussian.py +++ b/dpgen2/fp/gaussian.py @@ -3,6 +3,7 @@ from typing import ( Any, List, + Optional, Tuple, ) @@ -134,6 +135,7 @@ def run_task( self, command: str, out: str, + post_command: Optional[str] = None, ) -> Tuple[str, str]: r"""Defines how one FP task runs @@ -170,6 +172,23 @@ def run_task( ) ) raise TransientError("gaussian failed") + if post_command is not None: + ret, out, err = run_command(post_command, shell=True) + if ret != 0: + logging.error( + "".join( + ( + "gaussian postprocessing failed\n", + "out msg: ", + out, + "\n", + "err msg: ", + err, + "\n", + ) + ) + ) + raise TransientError("gaussian postprocessing failed") # convert the output to deepmd/npy format sys = dpdata.LabeledSystem(gaussian_output_name, fmt="gaussian/log") sys.to("deepmd/npy", out_name) @@ -187,6 +206,7 @@ def args() -> List[dargs.Argument]: doc_gaussian_cmd = "The command of Gaussian" doc_gaussian_out = "The output dir name of labeled data. In `deepmd/npy` format provided by `dpdata`." + doc_post_command = "The command after Gaussian" return [ Argument( "command", str, optional=True, default="g16", doc=doc_gaussian_cmd @@ -198,4 +218,7 @@ def args() -> List[dargs.Argument]: default=fp_default_out_data_name, doc=doc_gaussian_out, ), + Argument( + "post_command", str, optional=True, default=None, doc=doc_post_command + ), ] diff --git a/dpgen2/fp/run_fp.py b/dpgen2/fp/run_fp.py index 258ff9fb..64e0c3a8 100644 --- a/dpgen2/fp/run_fp.py +++ b/dpgen2/fp/run_fp.py @@ -58,6 +58,7 @@ def get_output_sign(cls): { "log": Artifact(Path), "labeled_data": Artifact(Path), + "extra_outputs": Artifact(List[Path]), } ) @@ -196,9 +197,14 @@ def execute( Path(iname).symlink_to(ii) out_name, log_name = self.run_task(**config) + extra_outputs = [] + for fname in ip["config"]["extra_output_files"]: + extra_outputs += list(work_dir.glob(fname)) + return OPIO( { "log": work_dir / log_name, "labeled_data": work_dir / out_name, + "extra_outputs": extra_outputs, } ) diff --git a/dpgen2/superop/prep_run_fp.py b/dpgen2/superop/prep_run_fp.py index 6203e755..ba659c6d 100644 --- a/dpgen2/superop/prep_run_fp.py +++ b/dpgen2/superop/prep_run_fp.py @@ -70,6 +70,7 @@ def __init__( self._output_artifacts = { "logs": OutputArtifact(), "labeled_data": OutputArtifact(), + "extra_outputs": OutputArtifact(), } super().__init__( @@ -170,7 +171,7 @@ def _prep_run_fp( "int('{{item}}')", input_parameter=["task_name"], input_artifact=["task_path"], - output_artifact=["log", "labeled_data"], + output_artifact=["log", "labeled_data", "extra_outputs"], **template_slice_config, ), python_packages=upload_python_packages, @@ -200,5 +201,8 @@ def _prep_run_fp( prep_run_steps.outputs.artifacts["labeled_data"]._from = run_fp.outputs.artifacts[ "labeled_data" ] + prep_run_steps.outputs.artifacts["extra_outputs"]._from = run_fp.outputs.artifacts[ + "extra_outputs" + ] return prep_run_steps diff --git a/dpgen2/utils/download_dpgen2_artifacts.py b/dpgen2/utils/download_dpgen2_artifacts.py index 02dd7965..b5f69153 100644 --- a/dpgen2/utils/download_dpgen2_artifacts.py +++ b/dpgen2/utils/download_dpgen2_artifacts.py @@ -67,7 +67,8 @@ def add_output( "prep-run-fp": DownloadDefinition() .add_input("confs") .add_output("logs") - .add_output("labeled_data"), + .add_output("labeled_data") + .add_output("extra_outputs"), "collect-data": DownloadDefinition().add_output("iter_data"), } diff --git a/tests/fp/test_abacus.py b/tests/fp/test_abacus.py index a555b898..32d6c979 100644 --- a/tests/fp/test_abacus.py +++ b/tests/fp/test_abacus.py @@ -43,6 +43,7 @@ def test_abacus(self): "command": "cp -r %s OUT.ABACUS && cat %s" % (data_path / "OUT.ABACUS", data_path / "log"), }, + "extra_output_files": [], } confs = [data_path / "sys-2"] type_map = ["Na"] diff --git a/tests/fp/test_cp2k.py b/tests/fp/test_cp2k.py index 3e26a3d1..22c87b28 100644 --- a/tests/fp/test_cp2k.py +++ b/tests/fp/test_cp2k.py @@ -48,6 +48,7 @@ def test_cp2k(self): "command": "cp -r %s output.log && cat %s" % (data_path / "output.log", data_path / "output.log"), }, + "extra_output_files": [], } confs = [data_path / "sys-3"] type_map = ["Na"] diff --git a/tests/fp/test_run_vasp.py b/tests/fp/test_run_vasp.py index 48283f1c..7202d6ce 100644 --- a/tests/fp/test_run_vasp.py +++ b/tests/fp/test_run_vasp.py @@ -81,7 +81,8 @@ def new_init(obj, foo): "command": "myvasp", "log": "foo.log", "out": "data", - } + }, + "extra_output_files": [], }, "task_name": self.task_name, "task_path": self.task_path, @@ -128,7 +129,8 @@ def new_init(obj, foo): "config": { "run": { "command": "myvasp", - } + }, + "extra_output_files": [], }, "task_name": self.task_name, "task_path": self.task_path, @@ -163,7 +165,8 @@ def test_error(self, mocked_run): "config": { "run": { "command": "myvasp", - } + }, + "extra_output_files": [], }, "task_name": self.task_name, "task_path": self.task_path, diff --git a/tests/mocked_ops.py b/tests/mocked_ops.py index db8ca085..fbd2308c 100644 --- a/tests/mocked_ops.py +++ b/tests/mocked_ops.py @@ -521,6 +521,7 @@ def execute( { "log": work_dir / log, "labeled_data": work_dir / labeled_data, + "extra_outputs": [], } ) @@ -578,6 +579,7 @@ def execute( { "log": work_dir / log, "labeled_data": work_dir / labeled_data, + "extra_outputs": [], } ) @@ -633,6 +635,7 @@ def execute( { "log": work_dir / log, "labeled_data": work_dir / labeled_data, + "extra_outputs": [], } ) diff --git a/tests/utils/test_dl_dpgen2_arti.py b/tests/utils/test_dl_dpgen2_arti.py index cf767e4e..c1166678 100644 --- a/tests/utils/test_dl_dpgen2_arti.py +++ b/tests/utils/test_dl_dpgen2_arti.py @@ -144,6 +144,11 @@ def test_fp_download(self, mocked_dl): path=Path("iter-000001/prep-run-fp/outputs"), skip_exists=True, ), + mock.call( + "arti-extra_outputs", + path=Path("iter-000001/prep-run-fp/outputs"), + skip_exists=True, + ), ] self.assertEqual(len(mocked_dl.call_args_list), len(expected)) for ii, jj in zip(mocked_dl.call_args_list, expected): @@ -174,6 +179,11 @@ def test_fp_download_chkpnt(self, mocked_dl): path=Path("iter-000001/prep-run-fp/outputs"), skip_exists=True, ), + mock.call( + "arti-extra_outputs", + path=Path("iter-000001/prep-run-fp/outputs"), + skip_exists=True, + ), ] self.assertEqual(len(mocked_dl.call_args_list), len(expected)) for ii, jj in zip(mocked_dl.call_args_list, expected): @@ -200,6 +210,11 @@ def test_fp_download_chkpnt(self, mocked_dl): path=Path("iter-000001/prep-run-fp/outputs"), skip_exists=True, ), + mock.call( + "arti-extra_outputs", + path=Path("iter-000001/prep-run-fp/outputs"), + skip_exists=True, + ), ] self.assertEqual(len(mocked_dl.call_args_list), len(expected)) for ii, jj in zip(mocked_dl.call_args_list, expected):