Skip to content

Commit

Permalink
Merge pull request #19132 from jmchilton/posix_prefer_links
Browse files Browse the repository at this point in the history
Allow a posix file source to prefer linking.
  • Loading branch information
guerler authored Nov 14, 2024
2 parents ff9470e + 272e9ff commit e97018f
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 28 deletions.
8 changes: 8 additions & 0 deletions lib/galaxy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@
"level": "INFO",
"qualname": "watchdog.observers.inotify_buffer",
},
"py.warnings": {
"level": "ERROR",
"qualname": "py.warnings",
},
"celery.utils.functional": {
"level": "INFO",
"qualname": "celery.utils.functional",
},
},
"filters": {
"stack": {
Expand Down
7 changes: 7 additions & 0 deletions lib/galaxy/files/sources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,10 @@ def to_dict(self, for_serialization=False, user_context: "OptionalUserContext" =
context doesn't need to be present after the plugin is re-hydrated.
"""

@abc.abstractmethod
def prefer_links(self) -> bool:
"""Prefer linking to files."""


class SupportsBrowsing(metaclass=abc.ABCMeta):
"""An interface indicating that this filesource is browsable.
Expand Down Expand Up @@ -351,6 +355,9 @@ def user_has_access(self, user_context: "OptionalUserContext") -> bool:
or (self._user_has_required_roles(user_context) and self._user_has_required_groups(user_context))
)

def prefer_links(self) -> bool:
return False

@property
def user_context_required(self) -> bool:
return self.requires_roles is not None or self.requires_groups is not None
Expand Down
7 changes: 7 additions & 0 deletions lib/galaxy/files/sources/posix.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@
DEFAULT_ENFORCE_SYMLINK_SECURITY = True
DEFAULT_DELETE_ON_REALIZE = False
DEFAULT_ALLOW_SUBDIR_CREATION = True
DEFAULT_PREFER_LINKS = False


class PosixFilesSourceProperties(FilesSourceProperties, total=False):
root: str
enforce_symlink_security: bool
delete_on_realize: bool
allow_subdir_creation: bool
prefer_links: bool


class PosixFilesSource(BaseFilesSource):
Expand All @@ -53,6 +55,10 @@ def __init__(self, **kwd: Unpack[PosixFilesSourceProperties]):
self.enforce_symlink_security = props.get("enforce_symlink_security", DEFAULT_ENFORCE_SYMLINK_SECURITY)
self.delete_on_realize = props.get("delete_on_realize", DEFAULT_DELETE_ON_REALIZE)
self.allow_subdir_creation = props.get("allow_subdir_creation", DEFAULT_ALLOW_SUBDIR_CREATION)
self._prefer_links = props.get("prefer_links", DEFAULT_PREFER_LINKS)

def prefer_links(self) -> bool:
return self._prefer_links

def _list(
self,
Expand Down Expand Up @@ -182,6 +188,7 @@ def _serialization_props(self, user_context: OptionalUserContext = None) -> Posi
"enforce_symlink_security": self.enforce_symlink_security,
"delete_on_realize": self.delete_on_realize,
"allow_subdir_creation": self.allow_subdir_creation,
"prefer_links": self._prefer_links,
}

@property
Expand Down
9 changes: 7 additions & 2 deletions lib/galaxy/files/uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@ def stream_url_to_file(
target_path: Optional[str] = None,
file_source_opts: Optional[FilesSourceOptions] = None,
) -> str:
if file_sources is None:
file_sources = ConfiguredFileSources.from_dict(None, load_stock_plugins=True)
file_sources = ensure_file_sources(file_sources)
file_source, rel_path = file_sources.get_file_source_path(url)
if file_source:
if not target_path:
Expand All @@ -62,6 +61,12 @@ def stream_url_to_file(
raise NoMatchingFileSource(f"Could not find a matching handler for: {url}")


def ensure_file_sources(file_sources: Optional["ConfiguredFileSources"]) -> "ConfiguredFileSources":
if file_sources is None:
file_sources = ConfiguredFileSources.from_dict(None, load_stock_plugins=True)
return file_sources


def stream_to_file(stream, suffix="", prefix="", dir=None, text=False, **kwd):
"""Writes a stream to a temporary file, returns the temporary file's name"""
fd, temp_name = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir, text=text)
Expand Down
77 changes: 57 additions & 20 deletions lib/galaxy/tools/data_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
UploadProblemException,
)
from galaxy.files.uris import (
ensure_file_sources,
stream_to_file,
stream_url_to_file,
)
Expand Down Expand Up @@ -97,13 +98,13 @@ def expand_elements_from(target_or_item):
decompressed_directory = _decompress_target(upload_config, target_or_item)
items = _directory_to_items(decompressed_directory)
elif elements_from == "bagit":
_, elements_from_path = _has_src_to_path(upload_config, target_or_item, is_dataset=False)
_, elements_from_path, _ = _has_src_to_path(upload_config, target_or_item, is_dataset=False)
items = _bagit_to_items(elements_from_path)
elif elements_from == "bagit_archive":
decompressed_directory = _decompress_target(upload_config, target_or_item)
items = _bagit_to_items(decompressed_directory)
elif elements_from == "directory":
_, elements_from_path = _has_src_to_path(upload_config, target_or_item, is_dataset=False)
_, elements_from_path, _ = _has_src_to_path(upload_config, target_or_item, is_dataset=False)
items = _directory_to_items(elements_from_path)
else:
raise Exception(f"Unknown elements from type encountered [{elements_from}]")
Expand Down Expand Up @@ -205,7 +206,7 @@ def _resolve_item(item):
pass
key = keys[composite_item_idx]
writable_file = writable_files[key]
_, src_target = _has_src_to_path(upload_config, composite_item)
_, src_target, _ = _has_src_to_path(upload_config, composite_item)
# do the writing
sniff.handle_composite_file(
datatype,
Expand Down Expand Up @@ -238,10 +239,23 @@ def _resolve_item_with_primary(item):
converted_path = None

deferred = upload_config.get_option(item, "deferred")

link_data_only = upload_config.link_data_only
link_data_only_explicit = upload_config.link_data_only_explicit
if "link_data_only" in item:
# Allow overriding this on a per file basis.
link_data_only, link_data_only_explicit = _link_data_only(item)

name: str
path: Optional[str]
default_in_place = False
if not deferred:
name, path = _has_src_to_path(upload_config, item, is_dataset=True)
name, path, is_link = _has_src_to_path(
upload_config, item, is_dataset=True, link_data_only_explicitly_set=link_data_only_explicit
)
if is_link:
link_data_only = True
default_in_place = True
else:
name, path = _has_src_to_name(item) or "Deferred Dataset", None
sources = []
Expand All @@ -266,10 +280,6 @@ def _resolve_item_with_primary(item):
item["error_message"] = error_message

dbkey = item.get("dbkey", "?")
link_data_only = upload_config.link_data_only
if "link_data_only" in item:
# Allow overriding this on a per file basis.
link_data_only = _link_data_only(item)

ext = "data"
staged_extra_files = None
Expand All @@ -281,7 +291,7 @@ def _resolve_item_with_primary(item):

effective_state = "ok"
if not deferred and not error_message:
in_place = item.get("in_place", False)
in_place = item.get("in_place", default_in_place)
purge_source = item.get("purge_source", True)

registry = upload_config.registry
Expand Down Expand Up @@ -339,7 +349,7 @@ def walk_extra_files(items, prefix=""):
item_prefix = os.path.join(prefix, name)
walk_extra_files(item.get("elements"), prefix=item_prefix)
else:
src_name, src_path = _has_src_to_path(upload_config, item)
src_name, src_path, _ = _has_src_to_path(upload_config, item)
if prefix:
rel_path = os.path.join(prefix, src_name)
else:
Expand Down Expand Up @@ -425,7 +435,7 @@ def _bagit_to_items(directory):


def _decompress_target(upload_config: "UploadConfig", target: Dict[str, Any]):
elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False)
elements_from_name, elements_from_path, _ = _has_src_to_path(upload_config, target, is_dataset=False)
# by default Galaxy will check for a directory with a single file and interpret that
# as the new root for expansion, this is a good user experience for uploading single
# files in a archive but not great from an API perspective. Allow disabling by setting
Expand Down Expand Up @@ -483,13 +493,33 @@ def _has_src_to_name(item) -> Optional[str]:
return name


def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dataset: bool = False) -> Tuple[str, str]:
def _has_src_to_path(
upload_config: "UploadConfig",
item: Dict[str, Any],
is_dataset: bool = False,
link_data_only: bool = False,
link_data_only_explicitly_set: bool = False,
) -> Tuple[str, str, bool]:
assert "src" in item, item
src = item.get("src")
name = item.get("name")
is_link = False
if src == "url":
url = item.get("url")
file_sources = ensure_file_sources(upload_config.file_sources)
assert url, "url cannot be empty"
if not link_data_only_explicitly_set:
file_source, rel_path = file_sources.get_file_source_path(url)
prefer_links = file_source.prefer_links()
if prefer_links:
if rel_path.startswith("/"):
rel_path = rel_path[1:]
path = os.path.abspath(os.path.join(file_source.root, rel_path))
if name is None:
name = url.split("/")[-1]
is_link = True
return name, path, is_link

try:
path = stream_url_to_file(url, file_sources=upload_config.file_sources, dir=upload_config.working_directory)
except Exception as e:
Expand All @@ -513,7 +543,7 @@ def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dat
path = item["path"]
if name is None:
name = os.path.basename(path)
return name, path
return name, path, is_link


def _handle_hash_validation(hash_function: HashFunctionNameEnum, hash_value: str, path: str):
Expand Down Expand Up @@ -564,7 +594,7 @@ def __init__(
self.space_to_tab = request.get("space_to_tab", False)
self.auto_decompress = request.get("auto_decompress", False)
self.deferred = request.get("deferred", False)
self.link_data_only = _link_data_only(request)
self.link_data_only, self.link_data_only_explicit = _link_data_only(request)
self.file_sources_dict = file_sources_dict
self._file_sources = None

Expand Down Expand Up @@ -616,12 +646,19 @@ def ensure_in_working_directory(self, path, purge_source, in_place):
return new_path


def _link_data_only(has_config_dict):
link_data_only = has_config_dict.get("link_data_only", False)
if not isinstance(link_data_only, bool):
# Allow the older string values of 'copy_files' and 'link_to_files'
link_data_only = link_data_only == "copy_files"
return link_data_only
def _link_data_only(has_config_dict) -> Tuple[bool, bool]:
if "link_data_only" in has_config_dict:
link_data_only_raw = has_config_dict["link_data_only"]
if not isinstance(link_data_only_raw, bool):
# Allow the older string values of 'copy_files' and 'link_to_files'
link_data_only = link_data_only_raw == "copy_files"
else:
link_data_only = link_data_only_raw
link_data_only_explicit = True
else:
link_data_only = False
link_data_only_explicit = False
return link_data_only, link_data_only_explicit


def _for_each_src(f, obj):
Expand Down
20 changes: 18 additions & 2 deletions lib/galaxy_test/driver/integration_setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
REQUIRED_GROUP_EXPRESSION = f"{GROUP_A} or '{GROUP_B}'"


def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include_test_data_dir: bool) -> str:
def get_posix_file_source_config(
root_dir: str, roles: str, groups: str, include_test_data_dir: bool, prefer_links: bool = False
) -> str:
rval = f"""
- type: posix
id: posix_test
Expand All @@ -26,6 +28,17 @@ def get_posix_file_source_config(root_dir: str, roles: str, groups: str, include
requires_roles: {roles}
requires_groups: {groups}
"""
if prefer_links:
rval += f"""
- type: posix
id: linking_source
label: Posix
doc: Files from local path to links
root: {root_dir}
writable: true
prefer_links: true
"""

if include_test_data_dir:
rval += """
- type: posix
Expand All @@ -44,9 +57,10 @@ def create_file_source_config_file_on(
include_test_data_dir,
required_role_expression,
required_group_expression,
prefer_links: bool = False,
):
file_contents = get_posix_file_source_config(
root_dir, required_role_expression, required_group_expression, include_test_data_dir
root_dir, required_role_expression, required_group_expression, include_test_data_dir, prefer_links=prefer_links
)
file_path = os.path.join(temp_dir, "file_sources_conf_posix.yml")
with open(file_path, "w") as f:
Expand All @@ -67,6 +81,7 @@ def handle_galaxy_config_kwds(
# Require role for access but do not require groups by default on every test to simplify them
required_role_expression=REQUIRED_ROLE_EXPRESSION,
required_group_expression="",
prefer_links: bool = False,
):
temp_dir = os.path.realpath(mkdtemp())
clazz_ = clazz_ or cls
Expand All @@ -79,6 +94,7 @@ def handle_galaxy_config_kwds(
clazz_.include_test_data_dir,
required_role_expression,
required_group_expression,
prefer_links=prefer_links,
)
config["file_sources_config_file"] = file_sources_config_file

Expand Down
58 changes: 54 additions & 4 deletions test/integration/test_remote_files_posix.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Before running this test, start nginx+webdav in Docker using following command:
# docker run -v `pwd`/test/integration/webdav/data:/media -e WEBDAV_USERNAME=alice -e WEBDAV_PASSWORD=secret1234 -p 7083:7083 jmchilton/webdavdev
# Apache Docker host (shown next) doesn't work because displayname not set in response.
# docker run -v `pwd`/test/integration/webdav:/var/lib/dav -e AUTH_TYPE=Basic -e USERNAME=alice -e PASSWORD=secret1234 -e LOCATION=/ -p 7083:80 bytemark/webdav
import os

from sqlalchemy import select

from galaxy.model import Dataset
from galaxy_test.base import api_asserts
from galaxy_test.base.populators import DatasetPopulator
from galaxy_test.driver import integration_util
Expand Down Expand Up @@ -108,3 +108,53 @@ def _assert_list_response_matches_fixtures(self, list_response):

def _assert_access_forbidden_response(self, response):
api_asserts.assert_status_code_is(response, 403)


class TestPreferLinksPosixFileSourceIntegration(PosixFileSourceSetup, integration_util.IntegrationTestCase):
dataset_populator: DatasetPopulator
framework_tool_and_types = True

@classmethod
def handle_galaxy_config_kwds(cls, config):
PosixFileSourceSetup.handle_galaxy_config_kwds(
config,
cls,
prefer_links=True,
)

def setUp(self):
super().setUp()
self._write_file_fixtures()
self.dataset_populator = DatasetPopulator(self.galaxy_interactor)

def test_links_by_default(self):
with self.dataset_populator.test_history() as history_id:
element = dict(src="url", url="gxfiles://linking_source/a")
target = {
"destination": {"type": "hdas"},
"elements": [element],
}
targets = [target]
payload = {
"history_id": history_id,
"targets": targets,
}
new_dataset = self.dataset_populator.fetch(payload, assert_ok=True).json()["outputs"][0]
content = self.dataset_populator.get_history_dataset_content(history_id, dataset=new_dataset)
assert content == "a\n", content
stmt = select(Dataset).order_by(Dataset.create_time.desc()).limit(1)
dataset = self._app.model.session.execute(stmt).unique().scalar_one()
assert dataset.external_filename.endswith("/root/a")
assert os.path.exists(dataset.external_filename)
assert open(dataset.external_filename).read() == "a\n"
payload = self.dataset_populator.run_tool(
tool_id="cat",
inputs={
"input1": {"src": "hda", "id": new_dataset["id"]},
},
history_id=history_id,
)
derived_dataset = payload["outputs"][0]
self.dataset_populator.wait_for_history(history_id, assert_ok=True)
derived_content = self.dataset_populator.get_history_dataset_content(history_id, dataset=derived_dataset)
assert derived_content.strip() == "a"

0 comments on commit e97018f

Please sign in to comment.