diff --git a/requirements.txt b/requirements.txt index da79b41..9db6dc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ bdbag>=1.4.1 click~=8.1 -cwl-utils>=0.27 +cwl-utils==0.29 cwlprov==0.1.1 networkx==3.1 prov>=1.5.1 diff --git a/setup.cfg b/setup.cfg index 6ec0090..7e42f53 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,7 +32,7 @@ python_requires=>=3.8, <4 install_requires= bdbag>=1.4.1 click~=8.1 - cwl-utils>=0.27 + cwl-utils==0.29 cwlprov==0.1.1 networkx==3.1 prov>=1.5.1 diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py index ee6b084..0d76432 100644 --- a/src/runcrate/convert.py +++ b/src/runcrate/convert.py @@ -35,7 +35,7 @@ from rocrate.rocrate import ROCrate from .constants import PROFILES_BASE, PROFILES_VERSION, TERMS_NAMESPACE -from .utils import as_list +from .utils import as_list, parse_img WORKFLOW_BASENAME = "packed.cwl" @@ -61,6 +61,8 @@ WROC_PROFILE_VERSION = "1.0" +DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage" + def convert_cwl_type(cwl_type): if isinstance(cwl_type, list): @@ -503,9 +505,24 @@ def to_wf_p(k): action["endTime"] = activity.end().time.isoformat() action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage") action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation") + self.add_container_images(crate, action, activity) for job in activity.steps(): self.add_action(crate, job, parent_instrument=instrument) + def add_container_images(self, crate, action, activity): + images = set() + for assoc in activity.association(): + for agent in activity.provenance.prov_doc.get_record(assoc.agent_id): + images |= agent.get_attribute("cwlprov:image") + for im in images: + properties = parse_img(im) + properties.update({ + "@type": "ContainerImage", + "additionalType": {"@id": DOCKER_IMG_TYPE} + }) + roc_img = crate.add(ContextEntity(crate, properties=properties)) + action.append_to("containerImage", roc_img, compact=True) + def add_action_params(self, crate, activity, to_wf_p, ptype="usage"): action_params = [] all_roles = set() diff --git a/src/runcrate/utils.py b/src/runcrate/utils.py index 7ea8cc1..064ed5b 100644 --- a/src/runcrate/utils.py +++ b/src/runcrate/utils.py @@ -17,3 +17,39 @@ def as_list(value): if isinstance(value, list): return value return [value] + + +def parse_img_name(img_name): + parts = img_name.split("/") + if len(parts) == 3: + registry = parts[0] + name = "/".join(parts[1:]) + else: + registry = "docker.io" + name = "/".join(parts) + return registry, name + + +def parse_img(img_str): + """\ + Parse image string following the docker pull syntax NAME[:TAG|@DIGEST]. + CWL's DockerRequirement also accepts HTTP URLs for docker load. + """ + parsed = {} + if img_str.startswith("http://") or img_str.startswith("https://"): + return img_str + parts = img_str.rsplit("@", 1) + if len(parts) == 2: + parsed["registry"], parsed["name"] = parse_img_name(parts[0]) + algo, digest = parts[1].split(":", 1) + assert algo == "sha256" + parsed[algo] = digest + return parsed + parts = img_str.rsplit(":", 1) + if len(parts) == 2: + parsed["registry"], parsed["name"] = parse_img_name(parts[0]) + parsed["tag"] = parts[1] + return parsed + assert len(parts) == 1 + parsed["registry"], parsed["name"] = parse_img_name(parts[0]) + return parsed diff --git a/tests/test_cwlprov_crate_builder.py b/tests/test_cwlprov_crate_builder.py index e93a875..416351f 100644 --- a/tests/test_cwlprov_crate_builder.py +++ b/tests/test_cwlprov_crate_builder.py @@ -182,6 +182,16 @@ def test_revsort(data_dir, tmpdir): metadata = json.load(f) context = metadata['@context'] assert TERMS_NAMESPACE in context + # Docker image + for action in crate.get_by_type("CreateAction"): + if action is wf_action: + continue + assert "containerImage" in action + img = action["containerImage"] + assert img.type == "ContainerImage" + assert img["additionalType"] == "https://w3id.org/ro/terms/workflow-run#DockerImage" + assert img["name"] == "debian" + assert img["tag"] == "8" def test_no_input(data_dir, tmpdir): diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..efd8ede --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,42 @@ +# Copyright 2023 CRS4. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from runcrate.utils import parse_img + + +def test_parse_img(): + assert parse_img("python") == { + "registry": "docker.io", + "name": "python" + } + assert parse_img("python:3.12") == { + "registry": "docker.io", + "name": "python", + "tag": "3.12" + } + assert parse_img("josiah/python:3.11") == { + "registry": "docker.io", + "name": "josiah/python", + "tag": "3.11" + } + assert parse_img("quay.io/josiah/python:3.11") == { + "registry": "quay.io", + "name": "josiah/python", + "tag": "3.11" + } + assert parse_img("python@sha256:7b8d65a924f596eb65306214f559253c468336bcae09fd575429774563460caf") == { + "registry": "docker.io", + "name": "python", + "sha256": "7b8d65a924f596eb65306214f559253c468336bcae09fd575429774563460caf" + }