Skip to content

Commit

Permalink
support plugins for alternative FsAccess (#233)
Browse files Browse the repository at this point in the history
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>
  • Loading branch information
tetron authored Nov 22, 2024
1 parent feeb0da commit fc92b48
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 13 deletions.
54 changes: 54 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,57 @@ Once you upload JSON file to a server, you make a badge by using a link like htt
Here is an example of markdown to add a badge::

![test result](https://flat.badgen.net/https/path/to/generated/json?icon=commonwl)

*************************
Custom file access module
*************************

If your CWL implementation does not write output files to a local file
system location but instead to some other remote storage system, you
can provide an alternate implementation of the *StdFsAccess* object
that is able to access your storage system.

Step 1:

Implement your own class with the same public interface of the
*StdFsAccess* object in *cwltest/stdfsaccess.py* (as of this writing,
the methods are *open*, *size*, *isfile* and *isdir*). These methods
should expect to be called with URIs from the *location* field of the
outputs of test cases.

Define a function that, when called, returns a new instance of your object.

Step 2:

Create a Python package containing your class (or add it to an
existing one).

In the package metadata, add an entry point that declares the module
(in this example, *my_cwl_runner.fsaccess*) containing the function
(in this example, *get_fsaccess*) that *cwltest* will invoke to get an
object implementing the *StdFsAccess* interface.

In *setup.py* this looks like:

.. code:: python
setup(
...
entry_points={"cwltest.fsaccess": ["fsaccess=my_cwl_runner.fsaccess:get_fsaccess"]}},
...
)
In *pyproject.toml* it looks like:

.. code::
[project.entry-points.'cwltest.fsaccess']
fsaccess = 'my_cwl_runner.fsaccess:get_fsaccess'
Step 3:

Install your package in the same Python environemnt as the
installation of *cwltest*. When invoked, *cwltest* will query Python
package metadata for a package with the *cwltest.fsaccess* entry point
and call it to get back a custom filesystem access object.
30 changes: 18 additions & 12 deletions cwltest/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@

import hashlib
import json
import os.path
import urllib.parse
from typing import Any, Callable, Dict, Optional, Set
import cwltest.stdfsaccess

fs_access = cwltest.stdfsaccess.StdFsAccess("")


class CompareFail(Exception):
Expand Down Expand Up @@ -130,13 +131,14 @@ def _compare_location(
actual_comp = "path"
else:
actual_comp = "location"
path = urllib.parse.urlparse(actual[actual_comp]).path

if actual.get("class") == "Directory":
actual[actual_comp] = actual[actual_comp].rstrip("/")
exist_fun: Callable[[str], bool] = os.path.isdir
exist_fun: Callable[[str], bool] = fs_access.isdir
else:
exist_fun = os.path.isfile
if not exist_fun(path) and not skip_details:
exist_fun = fs_access.isfile

if not exist_fun(actual[actual_comp]) and not skip_details:
raise CompareFail.format(
expected,
actual,
Expand All @@ -160,15 +162,17 @@ def _compare_location(

def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
if "path" in actual:
path = urllib.parse.urlparse(actual["path"]).path
path = actual["path"]
else:
path = urllib.parse.urlparse(actual["location"]).path
path = actual["location"]
checksum = hashlib.sha1() # nosec
with open(path, "rb") as f:

with fs_access.open(path, "rb") as f:
contents = f.read(1024 * 1024)
while contents != b"":
checksum.update(contents)
contents = f.read(1024 * 1024)

actual_checksum_on_disk = f"sha1${checksum.hexdigest()}"
if "checksum" in actual:
actual_checksum_declared = actual["checksum"]
Expand All @@ -193,10 +197,12 @@ def _compare_checksum(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:

def _compare_size(expected: Dict[str, Any], actual: Dict[str, Any]) -> None:
if "path" in actual:
path = urllib.parse.urlparse(actual["path"]).path
path = actual["path"]
else:
path = urllib.parse.urlparse(actual["location"]).path
actual_size_on_disk = os.path.getsize(path)
path = actual["location"]

actual_size_on_disk = fs_access.size(path)

if "size" in actual:
actual_size_declared = actual["size"]
if actual_size_on_disk != actual_size_declared:
Expand Down
9 changes: 8 additions & 1 deletion cwltest/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
import schema_salad.ref_resolver
import schema_salad.schema
from cwltest.argparser import arg_parser
from cwltest.utils import CWLTestConfig, CWLTestReport, TestResult
from cwltest.utils import (
CWLTestConfig,
CWLTestReport,
TestResult,
load_optional_fsaccess_plugin,
)
from schema_salad.exceptions import ValidationException

from cwltest import logger, utils
Expand Down Expand Up @@ -116,6 +121,8 @@ def main() -> int:
suite_name, _ = os.path.splitext(os.path.basename(args.test))
report: Optional[junit_xml.TestSuite] = junit_xml.TestSuite(suite_name, [])

load_optional_fsaccess_plugin()

ntotal: Dict[str, int] = Counter()
npassed: Dict[str, List[CWLTestReport]] = defaultdict(list)

Expand Down
48 changes: 48 additions & 0 deletions cwltest/stdfsaccess.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Abstracted IO access."""

import os
import urllib
from typing import IO, Any

from schema_salad.ref_resolver import uri_file_path


def abspath(src: str, basedir: str) -> str:
"""Determine local filesystem absolute path given a basedir, handling both plain paths and URIs."""
if src.startswith("file://"):
abpath = uri_file_path(src)
elif urllib.parse.urlsplit(src).scheme in ["http", "https"]:
return src
else:
if basedir.startswith("file://"):
abpath = src if os.path.isabs(src) else basedir + "/" + src
else:
abpath = src if os.path.isabs(src) else os.path.join(basedir, src)
return abpath


class StdFsAccess:
"""Local filesystem implementation."""

def __init__(self, basedir: str) -> None:
"""Perform operations with respect to a base directory."""
self.basedir = basedir

def _abs(self, p: str) -> str:
return abspath(p, self.basedir)

def open(self, fn: str, mode: str) -> IO[Any]:
"""Open a file from a file: URI."""
return open(self._abs(fn), mode)

def size(self, fn: str) -> int:
"""Get the size of the file resource pointed to by a URI."""
return os.stat(self._abs(fn)).st_size

def isfile(self, fn: str) -> bool:
"""Determine if a resource pointed to by a URI represents a file."""
return os.path.isfile(self._abs(fn))

def isdir(self, fn: str) -> bool:
"""Determine if a resource pointed to by a URI represents a directory."""
return os.path.isdir(self._abs(fn))
38 changes: 38 additions & 0 deletions cwltest/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import schema_salad.avro
import schema_salad.ref_resolver
import schema_salad.schema
import cwltest.compare
import cwltest.stdfsaccess
from cwltest.compare import CompareFail, compare
from rdflib import Graph
from ruamel.yaml.scalarstring import ScalarString
Expand All @@ -37,6 +39,10 @@
else:
from importlib_resources import as_file, files

# available since Python 3.8 (minimum version supports as of this
# writing) so we don't need to fuss with backports
from importlib.metadata import entry_points, EntryPoint

from cwltest import REQUIRED, UNSUPPORTED_FEATURE, logger, templock

__all__ = ["files", "as_file"]
Expand Down Expand Up @@ -659,3 +665,35 @@ def absuri(path: str) -> str:
if "://" in path:
return path
return "file://" + os.path.abspath(path)


def load_optional_fsaccess_plugin() -> None:
"""
Load optional fsaccess plugin.
Looks for a package with cwltest.fsaccess entry point and if so,
use that to get a filesystem access object that will be used for
checking test output.
"""
fsaccess_eps: List[EntryPoint]

try:
# The interface to importlib.metadata.entry_points() changed
# several times between Python 3.8 and 3.13; the code below
# actually works fine on all of them but there's no single
# mypy annotation that works across of them. Explicitly cast
# it to a consistent type to make mypy shut up.
fsaccess_eps = cast(List[EntryPoint], entry_points()["cwltest.fsaccess"]) # type: ignore [redundant-cast, unused-ignore]
except KeyError:
return

if len(fsaccess_eps) == 0:
return

if len(fsaccess_eps) > 1:
logger.warn(
"More than one cwltest.fsaccess entry point found, selected %s",
fsaccess_eps[0],
)

cwltest.compare.fs_access = fsaccess_eps[0].load()()

0 comments on commit fc92b48

Please sign in to comment.