From f454d004c4c035a06cd8de4c7478ac9b996eeb28 Mon Sep 17 00:00:00 2001 From: Jochen Klar Date: Wed, 28 Feb 2024 16:54:24 +0100 Subject: [PATCH] Filter links to only include links to the TARGET_PATH --- isimip_publisher/commands.py | 12 ++++++++---- isimip_publisher/main.py | 4 ++-- isimip_publisher/tests/test_commands.py | 3 ++- isimip_publisher/utils/files.py | 9 +++++++++ 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/isimip_publisher/commands.py b/isimip_publisher/commands.py index 4815476..739700d 100644 --- a/isimip_publisher/commands.py +++ b/isimip_publisher/commands.py @@ -133,7 +133,8 @@ def write_public_jsons(): def write_link_jsons(): public_links = files.list_links(settings.PUBLIC_PATH, settings.PATH) - datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, public_links, + filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, public_links) + datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, filtered_links, include=settings.INCLUDE, exclude=settings.EXCLUDE) validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets) @@ -172,7 +173,8 @@ def insert_datasets(): def link_links(): remote_links = files.list_links(settings.REMOTE_PATH, settings.PATH, remote_dest=settings.REMOTE_DEST, suffix=settings.PATTERN['suffix']) - datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, remote_links, + filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, remote_links) + datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, filtered_links, include=settings.INCLUDE, exclude=settings.EXCLUDE) validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets) @@ -184,7 +186,8 @@ def link_links(): def link_files(): remote_files = files.list_files(settings.REMOTE_PATH, settings.PATH, remote_dest=settings.REMOTE_DEST, suffix=settings.PATTERN['suffix']) - datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, remote_files, + filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, remote_files) + datasets = patterns.match_datasets(settings.PATTERN, settings.REMOTE_PATH, filtered_links, include=settings.INCLUDE, exclude=settings.EXCLUDE) validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets) @@ -196,7 +199,8 @@ def link_files(): def link_datasets(): # collect and validate the links public_links = files.list_links(settings.PUBLIC_PATH, settings.PATH) - datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, public_links, + filtered_links = files.filter_links(settings.PUBLIC_PATH, settings.TARGET_PATH, settings.PATH, public_links) + datasets = patterns.match_datasets(settings.PATTERN, settings.PUBLIC_PATH, filtered_links, include=settings.INCLUDE, exclude=settings.EXCLUDE) validation.validate_datasets(settings.SCHEMA, settings.PATH, datasets) diff --git a/isimip_publisher/main.py b/isimip_publisher/main.py index 683d382..49ae194 100644 --- a/isimip_publisher/main.py +++ b/isimip_publisher/main.py @@ -94,7 +94,7 @@ def get_parser(add_path=False, add_subparsers=False): # add a subparser for each subcommand for func in [list_remote, list_remote_links, list_local, list_public, list_public_links, match_remote, match_remote_links, match_local, match_public, match_public_links, - fetch_files, write_local_jsons, write_public_jsons, write_link_jsons, + fetch_files, write_local_jsons, write_public_jsons, insert_datasets, update_datasets, publish_datasets, archive_datasets, check, clean, update_search, update_tree, run]: subparser = subparsers.add_parser(func.__name__) @@ -117,7 +117,7 @@ def get_parser(add_path=False, add_subparsers=False): subparser.set_defaults(func=func) subparser.add_argument('doi', help='DOI to process') - for func in [link_links, link_files, link_datasets, link]: + for func in [link_links, link_files, link_datasets, link, write_link_jsons]: subparser = subparsers.add_parser(func.__name__) subparser.set_defaults(func=func) subparser.add_argument('target_path', help='path of the files to process') diff --git a/isimip_publisher/tests/test_commands.py b/isimip_publisher/tests/test_commands.py index 39b0785..af5b659 100644 --- a/isimip_publisher/tests/test_commands.py +++ b/isimip_publisher/tests/test_commands.py @@ -247,7 +247,8 @@ def test_write_public_jsons(setup, public_files, script_runner): def test_write_link_jsons(setup, public_links, script_runner): - response = script_runner.run(['isimip-publisher', 'write_link_jsons', 'round/product/sector2/model']) + response = script_runner.run(['isimip-publisher', 'write_link_jsons', + 'round/product/sector/model', 'round/product/sector2/model']) assert response.success, response.stderr assert not response.stdout assert response.stderr.strip().startswith('write_link_jsons') diff --git a/isimip_publisher/utils/files.py b/isimip_publisher/utils/files.py index 28f179a..4f34bbe 100644 --- a/isimip_publisher/utils/files.py +++ b/isimip_publisher/utils/files.py @@ -52,6 +52,15 @@ def list_links(base_path, path, remote_dest=None, suffix=None): return list_files(base_path, path, remote_dest=remote_dest, suffix=suffix, find_type='l') +def filter_links(public_path, target_path, path, links): + filtered_links = [] + for link_path in links: + target_abspath = public_path / target_path / Path(link_path).relative_to(path) + if target_abspath.exists() and not target_abspath.is_symlink(): + filtered_links.append(link_path) + return filtered_links + + def copy_files(remote_dest, remote_path, local_path, path, datasets): # check if path is a file if Path(path).suffix: