From fc92b4863ac4720d5d0bf2e4036591f200fa2ff0 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 22 Nov 2024 08:33:19 -0500 Subject: [PATCH] support plugins for alternative FsAccess (#233) Arvados-DCO-1.1-Signed-off-by: Peter Amstutz --- README.rst | 54 ++++++++++++++++++++++++++++++++++++++++++ cwltest/compare.py | 30 +++++++++++++---------- cwltest/main.py | 9 ++++++- cwltest/stdfsaccess.py | 48 +++++++++++++++++++++++++++++++++++++ cwltest/utils.py | 38 +++++++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 13 deletions(-) create mode 100644 cwltest/stdfsaccess.py diff --git a/README.rst b/README.rst index d1cc50e..f5f646a 100644 --- a/README.rst +++ b/README.rst @@ -91,3 +91,57 @@ Once you upload JSON file to a server, you make a badge by using a link like htt Here is an example of markdown to add a badge:: ![test result](https://flat.badgen.net/https/path/to/generated/json?icon=commonwl) + +************************* +Custom file access module +************************* + +If your CWL implementation does not write output files to a local file +system location but instead to some other remote storage system, you +can provide an alternate implementation of the *StdFsAccess* object +that is able to access your storage system. + +Step 1: + +Implement your own class with the same public interface of the +*StdFsAccess* object in *cwltest/stdfsaccess.py* (as of this writing, +the methods are *open*, *size*, *isfile* and *isdir*). These methods +should expect to be called with URIs from the *location* field of the +outputs of test cases. + +Define a function that, when called, returns a new instance of your object. + +Step 2: + +Create a Python package containing your class (or add it to an +existing one). + +In the package metadata, add an entry point that declares the module +(in this example, *my_cwl_runner.fsaccess*) containing the function +(in this example, *get_fsaccess*) that *cwltest* will invoke to get an +object implementing the *StdFsAccess* interface. + +In *setup.py* this looks like: + +.. code:: python + + setup( + ... + entry_points={"cwltest.fsaccess": ["fsaccess=my_cwl_runner.fsaccess:get_fsaccess"]}}, + ... + ) + +In *pyproject.toml* it looks like: + +.. code:: + + [project.entry-points.'cwltest.fsaccess'] + fsaccess = 'my_cwl_runner.fsaccess:get_fsaccess' + + +Step 3: + +Install your package in the same Python environemnt as the +installation of *cwltest*. When invoked, *cwltest* will query Python +package metadata for a package with the *cwltest.fsaccess* entry point +and call it to get back a custom filesystem access object. diff --git a/cwltest/compare.py b/cwltest/compare.py index 18452ed..0e3008b 100644 --- a/cwltest/compare.py +++ b/cwltest/compare.py @@ -2,9 +2,10 @@ import hashlib import json -import os.path -import urllib.parse from typing import Any, Callable, Dict, Optional, Set +import cwltest.stdfsaccess + +fs_access = cwltest.stdfsaccess.StdFsAccess("") class CompareFail(Exception): @@ -130,13 +131,14 @@ def _compare_location( actual_comp = "path" else: actual_comp = "location" - path = urllib.parse.urlparse(actual[actual_comp]).path + if actual.get("class") == "Directory": actual[actual_comp] = actual[actual_comp].rstrip("/") - exist_fun: Callable[[str], bool] = os.path.isdir + exist_fun: Callable[[str], bool] = fs_access.isdir else: - exist_fun = os.path.isfile - if not exist_fun(path) and not skip_details: + exist_fun = fs_access.isfile + + if not exist_fun(actual[actual_comp]) and not skip_details: raise CompareFail.format( expected, actual, @@ -160,15 +162,17 @@ def _compare_location( def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None: if "path" in actual: - path = urllib.parse.urlparse(actual["path"]).path + path = actual["path"] else: - path = urllib.parse.urlparse(actual["location"]).path + path = actual["location"] checksum = hashlib.sha1() # nosec - with open(path, "rb") as f: + + with fs_access.open(path, "rb") as f: contents = f.read(1024 * 1024) while contents != b"": checksum.update(contents) contents = f.read(1024 * 1024) + actual_checksum_on_disk = f"sha1${checksum.hexdigest()}" if "checksum" in actual: actual_checksum_declared = actual["checksum"] @@ -193,10 +197,12 @@ def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None: def _compare_size(expected: Dict[str, Any], actual: Dict[str, Any]) -> None: if "path" in actual: - path = urllib.parse.urlparse(actual["path"]).path + path = actual["path"] else: - path = urllib.parse.urlparse(actual["location"]).path - actual_size_on_disk = os.path.getsize(path) + path = actual["location"] + + actual_size_on_disk = fs_access.size(path) + if "size" in actual: actual_size_declared = actual["size"] if actual_size_on_disk != actual_size_declared: diff --git a/cwltest/main.py b/cwltest/main.py index a731531..d8a7218 100644 --- a/cwltest/main.py +++ b/cwltest/main.py @@ -13,7 +13,12 @@ import schema_salad.ref_resolver import schema_salad.schema from cwltest.argparser import arg_parser -from cwltest.utils import CWLTestConfig, CWLTestReport, TestResult +from cwltest.utils import ( + CWLTestConfig, + CWLTestReport, + TestResult, + load_optional_fsaccess_plugin, +) from schema_salad.exceptions import ValidationException from cwltest import logger, utils @@ -116,6 +121,8 @@ def main() -> int: suite_name, _ = os.path.splitext(os.path.basename(args.test)) report: Optional[junit_xml.TestSuite] = junit_xml.TestSuite(suite_name, []) + load_optional_fsaccess_plugin() + ntotal: Dict[str, int] = Counter() npassed: Dict[str, List[CWLTestReport]] = defaultdict(list) diff --git a/cwltest/stdfsaccess.py b/cwltest/stdfsaccess.py new file mode 100644 index 0000000..b59375e --- /dev/null +++ b/cwltest/stdfsaccess.py @@ -0,0 +1,48 @@ +"""Abstracted IO access.""" + +import os +import urllib +from typing import IO, Any + +from schema_salad.ref_resolver import uri_file_path + + +def abspath(src: str, basedir: str) -> str: + """Determine local filesystem absolute path given a basedir, handling both plain paths and URIs.""" + if src.startswith("file://"): + abpath = uri_file_path(src) + elif urllib.parse.urlsplit(src).scheme in ["http", "https"]: + return src + else: + if basedir.startswith("file://"): + abpath = src if os.path.isabs(src) else basedir + "/" + src + else: + abpath = src if os.path.isabs(src) else os.path.join(basedir, src) + return abpath + + +class StdFsAccess: + """Local filesystem implementation.""" + + def __init__(self, basedir: str) -> None: + """Perform operations with respect to a base directory.""" + self.basedir = basedir + + def _abs(self, p: str) -> str: + return abspath(p, self.basedir) + + def open(self, fn: str, mode: str) -> IO[Any]: + """Open a file from a file: URI.""" + return open(self._abs(fn), mode) + + def size(self, fn: str) -> int: + """Get the size of the file resource pointed to by a URI.""" + return os.stat(self._abs(fn)).st_size + + def isfile(self, fn: str) -> bool: + """Determine if a resource pointed to by a URI represents a file.""" + return os.path.isfile(self._abs(fn)) + + def isdir(self, fn: str) -> bool: + """Determine if a resource pointed to by a URI represents a directory.""" + return os.path.isdir(self._abs(fn)) diff --git a/cwltest/utils.py b/cwltest/utils.py index b9cd745..a638042 100644 --- a/cwltest/utils.py +++ b/cwltest/utils.py @@ -27,6 +27,8 @@ import schema_salad.avro import schema_salad.ref_resolver import schema_salad.schema +import cwltest.compare +import cwltest.stdfsaccess from cwltest.compare import CompareFail, compare from rdflib import Graph from ruamel.yaml.scalarstring import ScalarString @@ -37,6 +39,10 @@ else: from importlib_resources import as_file, files +# available since Python 3.8 (minimum version supports as of this +# writing) so we don't need to fuss with backports +from importlib.metadata import entry_points, EntryPoint + from cwltest import REQUIRED, UNSUPPORTED_FEATURE, logger, templock __all__ = ["files", "as_file"] @@ -659,3 +665,35 @@ def absuri(path: str) -> str: if "://" in path: return path return "file://" + os.path.abspath(path) + + +def load_optional_fsaccess_plugin() -> None: + """ + Load optional fsaccess plugin. + + Looks for a package with cwltest.fsaccess entry point and if so, + use that to get a filesystem access object that will be used for + checking test output. + """ + fsaccess_eps: List[EntryPoint] + + try: + # The interface to importlib.metadata.entry_points() changed + # several times between Python 3.8 and 3.13; the code below + # actually works fine on all of them but there's no single + # mypy annotation that works across of them. Explicitly cast + # it to a consistent type to make mypy shut up. + fsaccess_eps = cast(List[EntryPoint], entry_points()["cwltest.fsaccess"]) # type: ignore [redundant-cast, unused-ignore] + except KeyError: + return + + if len(fsaccess_eps) == 0: + return + + if len(fsaccess_eps) > 1: + logger.warn( + "More than one cwltest.fsaccess entry point found, selected %s", + fsaccess_eps[0], + ) + + cwltest.compare.fs_access = fsaccess_eps[0].load()()