Skip to content

Commit

Permalink
Merge pull request #19109 from nsoranzo/path_upload_check_hashes
Browse files Browse the repository at this point in the history
Test hash validation also for upload by path
  • Loading branch information
nsoranzo authored Nov 5, 2024
2 parents 9834ecd + 1f223d0 commit 242c533
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 20 deletions.
39 changes: 22 additions & 17 deletions lib/galaxy/tools/data_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from galaxy.util.compression_utils import CompressedFile
from galaxy.util.hash_util import (
HASH_NAMES,
HashFunctionNameEnum,
verify_hash,
)

Expand Down Expand Up @@ -85,7 +86,7 @@ def _request_to_galaxy_json(upload_config: "UploadConfig", request):
return {"__unnamed_outputs": fetched_targets}


def _fetch_target(upload_config: "UploadConfig", target):
def _fetch_target(upload_config: "UploadConfig", target: Dict[str, Any]):
destination = target.get("destination", None)
assert destination, "No destination defined."

Expand Down Expand Up @@ -254,14 +255,15 @@ def _resolve_item_with_primary(item):
hash_value = item.get(hash_function)
if hash_value:
hashes.append({"hash_function": hash_function, "hash_value": hash_value})
for hash_dict in hashes:
hash_function = hash_dict.get("hash_function")
hash_value = hash_dict.get("hash_value")
try:
_handle_hash_validation(upload_config, hash_function, hash_value, path)
except Exception as e:
error_message = str(e)
item["error_message"] = error_message
if path:
for hash_dict in hashes:
hash_function = hash_dict.get("hash_function")
hash_value = hash_dict.get("hash_value")
try:
_handle_hash_validation(upload_config, hash_function, hash_value, path)
except Exception as e:
error_message = str(e)
item["error_message"] = error_message

dbkey = item.get("dbkey", "?")
link_data_only = upload_config.link_data_only
Expand Down Expand Up @@ -422,7 +424,7 @@ def _bagit_to_items(directory):
return items


def _decompress_target(upload_config: "UploadConfig", target):
def _decompress_target(upload_config: "UploadConfig", target: Dict[str, Any]):
elements_from_name, elements_from_path = _has_src_to_path(upload_config, target, is_dataset=False)
# by default Galaxy will check for a directory with a single file and interpret that
# as the new root for expansion, this is a good user experience for uploading single
Expand Down Expand Up @@ -481,12 +483,13 @@ def _has_src_to_name(item) -> Optional[str]:
return name


def _has_src_to_path(upload_config, item, is_dataset=False) -> Tuple[str, str]:
def _has_src_to_path(upload_config: "UploadConfig", item: Dict[str, Any], is_dataset: bool = False) -> Tuple[str, str]:
assert "src" in item, item
src = item.get("src")
name = item.get("name")
if src == "url":
url = item.get("url")
assert url, "url cannot be empty"
try:
path = stream_url_to_file(url, file_sources=upload_config.file_sources, dir=upload_config.working_directory)
except Exception as e:
Expand All @@ -513,7 +516,9 @@ def _has_src_to_path(upload_config, item, is_dataset=False) -> Tuple[str, str]:
return name, path


def _handle_hash_validation(upload_config, hash_function, hash_value, path):
def _handle_hash_validation(
upload_config: "UploadConfig", hash_function: HashFunctionNameEnum, hash_value: str, path: str
):
if upload_config.validate_hashes:
verify_hash(path, hash_func_name=hash_function, hash_value=hash_value, what="upload")

Expand Down Expand Up @@ -548,11 +553,11 @@ def get_file_sources(working_directory, file_sources_as_dict=None):
class UploadConfig:
def __init__(
self,
request,
registry,
working_directory,
allow_failed_collections,
file_sources_dict=None,
request: Dict[str, Any],
registry: Registry,
working_directory: str,
allow_failed_collections: bool,
file_sources_dict: Optional[Dict] = None,
):
self.registry = registry
self.working_directory = working_directory
Expand Down
36 changes: 33 additions & 3 deletions test/unit/app/tools/test_data_fetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from contextlib import contextmanager
from shutil import rmtree
from tempfile import mkdtemp
from typing import Optional

import pytest

from galaxy.tools.data_fetch import main
from galaxy.util.unittest_utils import skip_if_github_down
Expand All @@ -13,7 +16,17 @@
URI_FOR_1_2_3 = f"base64://{B64_FOR_1_2_3}"


def test_simple_path_get():
@pytest.mark.parametrize(
"hash_value, error_message",
[
("471ddd37fc297fba09b893b88739ece9", None),
(
"thisisbad",
"Failed to validate upload with [MD5] - expected [thisisbad] got [471ddd37fc297fba09b893b88739ece9]",
),
],
)
def test_simple_path_get(hash_value: str, error_message: Optional[str]):
with _execute_context() as execute_context:
job_directory = execute_context.job_directory
example_path = os.path.join(job_directory, "example_file")
Expand All @@ -25,13 +38,30 @@ def test_simple_path_get():
"destination": {
"type": "hdas",
},
"elements": [{"src": "path", "path": example_path}],
"elements": [
{
"src": "path",
"path": example_path,
"hashes": [
{
"hash_function": "MD5",
"hash_value": hash_value,
}
],
}
],
}
]
],
"validate_hashes": True,
}
execute_context.execute_request(request)
output = _unnamed_output(execute_context)
assert output
hda_result = output["elements"][0]
if error_message is not None:
assert hda_result["error_message"] == error_message
else:
assert "error_message" not in hda_result


@skip_if_github_down
Expand Down

0 comments on commit 242c533

Please sign in to comment.