From 00c775c0e940fb9283ce01a5a86558ecc692ca9e Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 17 Sep 2024 16:54:10 +1000 Subject: [PATCH 01/16] chore: add --local-maven-repo as input option --- scripts/release_scripts/run_macaron.sh | 30 ++++++++++++ src/macaron/__main__.py | 31 +++++++++++++ src/macaron/config/global_config.py | 3 ++ .../test.yaml | 46 +++++++++++++++++++ 4 files changed, 110 insertions(+) create mode 100644 tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 5201cedb5..e90532b63 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -328,6 +328,10 @@ if [[ $command == "analyze" ]]; then python_venv_path="$2" shift ;; + --local-maven-repo) + local_maven_repo="$2" + shift + ;; *) rest_command+=("$1") ;; @@ -455,6 +459,32 @@ if [[ -n "${python_venv_path:-}" ]]; then mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container" fi +# Mount the local Maven repo into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. +if [[ -n "${local_maven_repo:-}" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" +else + # If the user doesn't provide local maven repo, we mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly instead. + # This is because we don't want Macaron running within + # the container to use `$HOME/.m2` within the container as it is being used + # by the cyclonedx plugins for dependency resolution. + if [[ -d "$HOME/.m2" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" + else + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" + mkdir -p "$output_local_maven_repo" + + mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + fi +fi + # MACARON entrypoint - verify-policy command argvs # This is for macaron verify-policy command. # Determine the database path to be mounted into ${MACARON_WORKSPACE}/database/macaron.db diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index 3e541d633..71a0a1a4e 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -74,6 +74,29 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None sys.exit(os.EX_OSFILE) global_config.load_python_venv(analyzer_single_args.python_venv) + # Set Python virtual environment path. + if analyzer_single_args.local_maven_repo is None: + # Load the default user local .m2 directory. + # Exit on error if $HOME is not set or empty. + home_dir = os.getenv("HOME") + if not home_dir: + logger.critical("Environment variable HOME is not set.") + sys.exit(os.EX_USAGE) + + local_maven_repo = os.path.join(home_dir, ".m2") + if not os.path.isdir(local_maven_repo): + logger.debug("The default local Maven repo at %s does not exist. Ignore ...") + global_config.local_maven_repo = None + + global_config.local_maven_repo = local_maven_repo + else: + user_provided_local_maven_repo = analyzer_single_args.local_maven_repo + if not os.path.exists(user_provided_local_maven_repo) or not os.path.isdir(user_provided_local_maven_repo): + logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo) + sys.exit(os.EX_USAGE) + + global_config.local_maven_repo = user_provided_local_maven_repo + analyzer = Analyzer(global_config.output_path, global_config.build_log_path) # Initiate reporters. @@ -453,6 +476,14 @@ def main(argv: list[str] | None = None) -> None: ), ) + single_analyze_parser.add_argument( + "--local-maven-repo", + required=False, + help=( + "The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2" + ), + ) + # Dump the default values. sub_parser.add_parser(name="dump-defaults", description="Dumps the defaults.ini file to the output directory.") diff --git a/src/macaron/config/global_config.py b/src/macaron/config/global_config.py index bfee23d32..d6d113a3a 100644 --- a/src/macaron/config/global_config.py +++ b/src/macaron/config/global_config.py @@ -46,6 +46,9 @@ class GlobalConfig: #: The path to Python virtual environment. python_venv_path: str = "" + #: The path to the local .m2 Maven repository. This attribute is None if there is no available .m2 directory. + local_maven_repo: str | None = None + def load( self, macaron_path: str, diff --git a/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml new file mode 100644 index 000000000..7de49fcac --- /dev/null +++ b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml @@ -0,0 +1,46 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Test invalid usecases for --local-maven-repo. + +tags: +- macaron-docker-image +- macaron-python-package + +steps: +- name: HOME environment variable is not set and --local-maven-repo is not used. + kind: analyze + env: + HOME: + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + expect_fail: true +- name: Providing a directory that doesn't exist to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - invalid_dir + expect_fail: true +- name: Create a test file. + kind: shell + options: + cmd: touch test.txt +- name: Providing a file path to --local-maven-repo. + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.github.behnazh-w.demo/example-maven-app@1.0?type=jar + - --local-maven-repo + - ./test.txt + expect_fail: true +- name: Clean up the test file. + kind: shell + options: + cmd: rm test.txt From 5b1e99ed01bb901b62c0623e945365ceee8adf0c Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Thu, 19 Sep 2024 10:31:19 +1000 Subject: [PATCH 02/16] chore: improve run_macaron.sh on local maven repo mounting behavior and improve the unit tests for run_macaron.sh --- scripts/release_scripts/run_macaron.sh | 46 +++-- .../test_run_macaron_sh.py | 165 ++++++++++++++---- 2 files changed, 158 insertions(+), 53 deletions(-) diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index e90532b63..457ccd220 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -339,7 +339,7 @@ if [[ $command == "analyze" ]]; then shift done elif [[ $command == "verify-policy" ]]; then - while [[ $# -gt 0 ]]; do + while [[ $# -gt 0 ]]; do case $1 in -d|--database) arg_database="$2" @@ -355,6 +355,15 @@ elif [[ $command == "verify-policy" ]]; then esac shift done +elif [[ $command == "dump-defaults" ]]; then + while [[ $# -gt 0 ]]; do + case $1 in + *) + rest_command+=("$1") + ;; + esac + shift + done fi # MACARON entrypoint - Main argvs @@ -466,22 +475,27 @@ if [[ -n "${local_maven_repo:-}" ]]; then mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" else - # If the user doesn't provide local maven repo, we mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. - # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly instead. - # This is because we don't want Macaron running within - # the container to use `$HOME/.m2` within the container as it is being used - # by the cyclonedx plugins for dependency resolution. - if [[ -d "$HOME/.m2" ]]; then - local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" - argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") - - mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" - else - local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" - output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" - mkdir -p "$output_local_maven_repo" + # Perform default local maven repo when the user doesn't provide --local-maven-repo and `analyze` command is used. + if [[ "$command" == "analyze" ]]; then + # We mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + if [[ -d "$HOME/.m2" ]]; then + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") + + mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" + # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly + # into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. + # This is because we don't want Macaron running within + # the container to use `$HOME/.m2` within the container as it is being used + # by the cyclonedx plugins for dependency resolution. + else + local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" + output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" + mkdir -p "$output_local_maven_repo" + argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") - mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + mount_dir_ro "--local-maven-repo" "$output_local_maven_repo" "$local_maven_repo_in_container" + fi fi fi diff --git a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py index 985ecaeed..6eaf9a1be 100755 --- a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py +++ b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py @@ -6,13 +6,54 @@ import os import subprocess # nosec B404 import sys +import tempfile from collections import namedtuple +TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) -def test_macaron_command() -> int: - """Test if the ``macaron`` command in the container receives the correct arguments.""" - TestCase = namedtuple("TestCase", ["name", "script_args", "expected_macaron_args"]) +def run_test_case( + test_case: TestCase, + env: dict[str, str], +) -> int: + """Run a test case in an environment with variables defined by `env` and return the exit code.""" + exit_code = 0 + + name, script_args, expected_macaron_args = test_case + print(f"test_macaron_command[{name}]:", end=" ") + + result = subprocess.run( + [ # nosec B603 + "./output/run_macaron.sh", + *script_args, + ], + capture_output=True, + env=env, + check=False, + ) + if result.returncode != 0: + exit_code = 1 + print(f"FAILED with exit code {exit_code}") + print("stderr:") + print(result.stderr.decode("utf-8")) + return exit_code + + resulting_macaron_args = list(result.stderr.decode("utf-8").split()) + + if resulting_macaron_args != expected_macaron_args: + print("FAILED") + print(" script args : %s", str(script_args)) + print(" expected macaron args : %s", str(expected_macaron_args)) + print(" resulting macaron args: %s", str(resulting_macaron_args)) + exit_code = 1 + else: + print("PASSED") + + return exit_code + + +def test_macaron_command_help() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" test_cases = [ TestCase( name="'-h' as main argument", @@ -20,9 +61,9 @@ def test_macaron_command() -> int: expected_macaron_args=["-h"], ), TestCase( - name="'-h' as action argument for 'analyze'", - script_args=["analyze", "-h"], - expected_macaron_args=["analyze", "-h"], + name="'-h' as action argument for 'dump-defaults'", + script_args=["dump-defaults", "-h"], + expected_macaron_args=["dump-defaults", "-h"], ), TestCase( name="'-h' as action argument for 'verify-policy'", @@ -31,47 +72,97 @@ def test_macaron_command() -> int: ), ] + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_no_home_m2() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo and host $HOME/.m2 is not available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + # We mimick the behavior of $HOME/.m2 not available by making $HOME pointing to a directory that doesn't exist. + env["HOME"] = "./non_exist_dir" + exit_code = 0 + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_command_home_m2_available() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + test_cases = [ + TestCase( + name="no --local-maven-repo and host $HOME/.m2 is available", + script_args=["analyze"], + expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], + ), + ] + env = dict(os.environ) env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 + + with tempfile.TemporaryDirectory() as temp_dir: + # We create a temp dir with a .m2 directory and point $HOME to it. + # This .m2 directory contains an empty `repository` directory. + os.mkdir(os.path.join(temp_dir, ".m2")) + os.mkdir(os.path.join(temp_dir, ".m2/repository")) + env["HOME"] = temp_dir + + for case in test_cases: + exit_code = run_test_case(case, env) + + return exit_code + + +def test_macaron_provide_local_maven_repo() -> int: + """Test if the ``macaron`` command in the container receives the correct arguments.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_cases = [ + TestCase( + name="with --local-maven-repo pointing to an existing directory", + script_args=["analyze", "--local-maven-repo", f"{temp_dir}"], + expected_macaron_args=[ + "analyze", + "--local-maven-repo", + "/home/macaron/analyze_local_maven_repo_readonly", + ], + ), + ] + + env = dict(os.environ) + env["MCN_DEBUG_ARGS"] = "1" + exit_code = 0 - for test_case in test_cases: - name, script_args, expected_macaron_args = test_case - print(f"test_macaron_command[{name}]:", end=" ") - - result = subprocess.run( - [ # nosec B603 - "./output/run_macaron.sh", - *script_args, - ], - capture_output=True, - env=env, - check=False, - ) - if result.returncode != 0: - exit_code = 1 - print(f"FAILED with exit code {exit_code}") - print("stderr:") - print(result.stderr.decode("utf-8")) - continue - - resulting_macaron_args = list(result.stderr.decode("utf-8").split()) - - if resulting_macaron_args != expected_macaron_args: - print("FAILED") - print(" script args : %s", str(script_args)) - print(" expected macaron args : %s", str(expected_macaron_args)) - print(" resulting macaron args: %s", str(resulting_macaron_args)) - exit_code = 1 - else: - print("PASSED") + for case in test_cases: + exit_code = run_test_case(case, env) return exit_code def main() -> int: """Run all tests.""" - return test_macaron_command() + return ( + test_macaron_command_help() + | test_macaron_command_no_home_m2() + | test_macaron_command_home_m2_available() + | test_macaron_provide_local_maven_repo() + ) if __name__ == "__main__": From 5ea77ee26ade31eb4022fc349aca9f587e2dbca3 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Fri, 20 Sep 2024 14:05:32 +1000 Subject: [PATCH 03/16] chore: add maven local artifact finding --- src/macaron/artifact/local_artifact.py | 80 +++++++++++++ src/macaron/artifact/maven.py | 38 ++++++ src/macaron/slsa_analyzer/analyze_context.py | 4 + src/macaron/slsa_analyzer/analyzer.py | 12 ++ .../package_registry/jfrog_maven_registry.py | 47 +------- tests/artifact/test_local_artifact.py | 108 ++++++++++++++++++ tests/artifact/test_maven.py | 77 ++++++++++++- .../test_jfrog_maven_registry.py | 79 +------------ 8 files changed, 324 insertions(+), 121 deletions(-) create mode 100644 src/macaron/artifact/local_artifact.py create mode 100644 tests/artifact/test_local_artifact.py diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py new file mode 100644 index 000000000..eb862e477 --- /dev/null +++ b/src/macaron/artifact/local_artifact.py @@ -0,0 +1,80 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""This module declares types and utilities for handling local artifacts.""" + +import os +from collections.abc import Mapping + +from packageurl import PackageURL + +from macaron.artifact.maven import construct_maven_repository_path +from macaron.config.global_config import global_config + + +def get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Get A.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + local_artifact_mapper["maven"] = global_config.local_maven_repo + + if global_config.python_venv_path: + local_artifact_mapper["pypi"] = global_config.python_venv_path + + return local_artifact_mapper + + +def construct_local_artifact_path_from_purl( + build_purl_type: str, + component_purl: PackageURL, + local_artifact_repo_mapper: Mapping[str, str], +) -> str | None: + """Get B.""" + local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) + if local_artifact_repo is None: + return None + + artifact_path = None + match build_purl_type: + case "maven": + group = component_purl.namespace + artifact = component_purl.name + version = component_purl.version + + if group is None or version is None: + return None + + artifact_path = os.path.join( + local_artifact_repo, + "repository", + construct_maven_repository_path(group, artifact, version), + ) + case "pypi": + # TODO: implement this. + pass + case _: + return None + + return artifact_path + + +def get_local_artifact_paths( + purl: PackageURL, + build_tool_purl_types: list[str], + local_artifact_repo_mapper: Mapping[str, str], +) -> dict[str, str]: + """Get C.""" + result = {} + + for build_purl_type in build_tool_purl_types: + local_artfiact_path = construct_local_artifact_path_from_purl( + build_purl_type=build_purl_type, + component_purl=purl, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + + if local_artfiact_path and os.path.isdir(local_artfiact_path): + result[build_purl_type] = local_artfiact_path + + return result diff --git a/src/macaron/artifact/maven.py b/src/macaron/artifact/maven.py index 6130aed83..dd97431f7 100644 --- a/src/macaron/artifact/maven.py +++ b/src/macaron/artifact/maven.py @@ -158,3 +158,41 @@ def is_valid_maven_group_id(group_id: str) -> bool: # Should match strings like org.example.foo, org.example-2.foo.bar_1. pattern = r"^[a-zA-Z][a-zA-Z0-9-]*\.([a-zA-Z][a-zA-Z0-9-]*\.)*[a-zA-Z][a-zA-Z0-9-]*[a-zA-Z0-9]$" return re.match(pattern, group_id) is not None + + +def construct_maven_repository_path( + group_id: str, + artifact_id: str | None = None, + version: str | None = None, + asset_name: str | None = None, +) -> str: + """Construct a path to a folder or file on the registry, assuming Maven repository layout. + + For more details regarding Maven repository layout, see the following: + - https://maven.apache.org/repository/layout.html + - https://maven.apache.org/guides/mini/guide-naming-conventions.html + + Parameters + ---------- + group_id : str + The group id of a Maven package. + artifact_id : str + The artifact id of a Maven package. + version : str + The version of a Maven package. + asset_name : str + The asset name. + + Returns + ------- + str + The path to a folder or file on the registry. + """ + path = group_id.replace(".", "/") + if artifact_id: + path = "/".join([path, artifact_id]) + if version: + path = "/".join([path, version]) + if asset_name: + path = "/".join([path, asset_name]) + return path diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index f6c8fd22a..5d57e8b2b 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -55,6 +55,9 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" + local_artifact_paths: dict[str, str] + # TODO this doc string for this variable need more informatino, to be revise later. + """The mapping between build tool types and the directory that contains the corresponding artifacts.""" class AnalyzeContext: @@ -110,6 +113,7 @@ def __init__( provenance_repo_url=None, provenance_commit_digest=None, provenance_verified=False, + local_artifact_paths={}, ) @property diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 8190f87fd..2639d00ae 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -16,6 +16,7 @@ from sqlalchemy.orm import Session from macaron import __version__ +from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -473,6 +474,17 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest + discovered_build_toosl = ( + analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] + ) + build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl] + analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( + # The PURL is definitely valid here. + PackageURL.from_string(analyze_ctx.component.purl), + build_tools_purl_types, + local_artifact_repo_mapper=get_local_artifact_repo_mapper(), + ) + analyze_ctx.check_results = registry.scan(analyze_ctx) return Record( diff --git a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py index 65987d1e2..bfdd9aa4b 100644 --- a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py @@ -13,6 +13,7 @@ import requests +from macaron.artifact.maven import construct_maven_repository_path from macaron.config.defaults import defaults from macaron.errors import ConfigurationError from macaron.json_tools import JsonType @@ -197,44 +198,6 @@ def is_detected(self, build_tool: BaseBuildTool) -> bool: compatible_build_tool_classes = [Maven, Gradle] return any(isinstance(build_tool, build_tool_class) for build_tool_class in compatible_build_tool_classes) - def construct_maven_repository_path( - self, - group_id: str, - artifact_id: str | None = None, - version: str | None = None, - asset_name: str | None = None, - ) -> str: - """Construct a path to a folder or file on the registry, assuming Maven repository layout. - - For more details regarding Maven repository layout, see the following: - - https://maven.apache.org/repository/layout.html - - https://maven.apache.org/guides/mini/guide-naming-conventions.html - - Parameters - ---------- - group_id : str - The group id of a Maven package. - artifact_id : str - The artifact id of a Maven package. - version : str - The version of a Maven package. - asset_name : str - The asset name. - - Returns - ------- - str - The path to a folder or file on the registry. - """ - path = group_id.replace(".", "/") - if artifact_id: - path = "/".join([path, artifact_id]) - if version: - path = "/".join([path, version]) - if asset_name: - path = "/".join([path, asset_name]) - return path - def fetch_artifact_ids(self, group_id: str) -> list[str]: """Get all artifact ids under a group id. @@ -251,7 +214,7 @@ def fetch_artifact_ids(self, group_id: str) -> list[str]: The artifacts ids under the group. """ folder_info_url = self.construct_folder_info_url( - folder_path=self.construct_maven_repository_path(group_id), + folder_path=construct_maven_repository_path(group_id), ) try: @@ -440,7 +403,7 @@ def fetch_asset_names( list[str] The list of asset names. """ - folder_path = self.construct_maven_repository_path( + folder_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -615,7 +578,7 @@ def fetch_asset_metadata( JFrogMavenAssetMetadata | None The asset's metadata, or ``None`` if the metadata cannot be retrieved. """ - file_path = self.construct_maven_repository_path( + file_path = construct_maven_repository_path( group_id=group_id, artifact_id=artifact_id, version=version, @@ -798,7 +761,7 @@ def construct_asset_url( str The URL to the asset, which can be use for downloading the asset. """ - group_path = self.construct_maven_repository_path(group_id) + group_path = construct_maven_repository_path(group_id) return urlunsplit( SplitResult( scheme="https", diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py new file mode 100644 index 000000000..3e44bdcb6 --- /dev/null +++ b/tests/artifact/test_local_artifact.py @@ -0,0 +1,108 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +"""Test the local artifact utilities.""" + +import tempfile +from collections.abc import Mapping + +import pytest +from packageurl import PackageURL + +from macaron.artifact.local_artifact import construct_local_artifact_path_from_purl, get_local_artifact_paths + + +@pytest.mark.parametrize( + ("build_purl_type", "purl_str", "local_artifact_repo_mapper", "expectation"), + [ + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {"maven": "/home/foo/.m2"}, + "/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre", + id="A maven type PURL with available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {}, + None, + id="A maven type PURL without an available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + {"pypi": "/home/foo/.venv"}, + None, + id="A maven type PURL without an available local maven repo but there is a Python venv", + ), + pytest.param( + "maven", + "pkg:maven/com.google.guava/guava", + {"maven": "/home/foo/.m2"}, + None, + id="A maven type PURL with missing version and an available local maven repo", + ), + pytest.param( + "maven", + "pkg:maven/guava", + {"maven": "/home/foo/.m2"}, + None, + id="A maven type PURL with missing groupd Id and an available local maven repo", + ), + pytest.param( + "maven", + "pkg:github/oracle/macaron", + {"maven": "/home/foo/.m2"}, + None, + id="A git type PURL and an available local maven repo", + ), + ], +) +def test_construct_local_artifact_path_from_purl( + build_purl_type: str, + purl_str: str, + local_artifact_repo_mapper: Mapping[str, str], + expectation: str, +) -> None: + """Test constructing a local artifact path from a given purl.""" + component_purl = PackageURL.from_string(purl_str) + assert ( + construct_local_artifact_path_from_purl( + build_purl_type=build_purl_type, + component_purl=component_purl, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + == expectation + ) + + +@pytest.mark.parametrize( + ("purl_str", "build_tool_purl_types"), + [ + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["maven", "pypi"], + id="A maven type PURL where multiple build tool types are discovered", + ), + ], +) +def test_get_local_artifact_paths_non_existing( + purl_str: str, + build_tool_purl_types: list[str], +) -> None: + """Test getting local artifact paths of non existing artifacts. + + The local artifact repos are available. + """ + purl = PackageURL.from_string(purl_str) + with tempfile.TemporaryDirectory() as temp_dir: + local_artifact_repo_mapper = { + "maven": temp_dir, + "pypi": temp_dir, + } + assert not get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) diff --git a/tests/artifact/test_maven.py b/tests/artifact/test_maven.py index 31e95ba53..6014c20ad 100644 --- a/tests/artifact/test_maven.py +++ b/tests/artifact/test_maven.py @@ -6,7 +6,7 @@ import pytest from packageurl import PackageURL -from macaron.artifact.maven import MavenSubjectPURLMatcher +from macaron.artifact.maven import MavenSubjectPURLMatcher, construct_maven_repository_path from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload @@ -86,3 +86,78 @@ def test_to_maven_artifact_subject( ) == provenance_payload.statement["subject"][subject_index] ) + + +@pytest.mark.parametrize( + ("args", "expected_path"), + [ + pytest.param( + { + "group_id": "io.micronaut", + }, + "io/micronaut", + id="Only group_id 1", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + }, + "com/fasterxml/jackson/core", + id="Only group_id 2", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + }, + "com/fasterxml/jackson/core/jackson-annotations", + id="group_id and artifact_id", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9", + id="group_id and artifact_id and version", + ), + pytest.param( + { + "group_id": "com.fasterxml.jackson.core", + "artifact_id": "jackson-annotations", + "version": "2.9.9", + "asset_name": "jackson-annotations-2.9.9.jar", + }, + "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", + id="group_id and artifact_id and version and asset_name,", + ), + ], +) +def test_construct_maven_repository_path( + args: dict, + expected_path: str, +) -> None: + """Test the ``construct_maven_repository_path`` method.""" + assert construct_maven_repository_path(**args) == expected_path + + +@pytest.mark.parametrize( + ("group_id", "expected_group_path"), + [ + ( + "io.micronaut", + "io/micronaut", + ), + ( + "com.fasterxml.jackson.core", + "com/fasterxml/jackson/core", + ), + ], +) +def test_to_group_folder_path( + group_id: str, + expected_group_path: str, +) -> None: + """Test the ``to_gorup_folder_path`` method.""" + assert construct_maven_repository_path(group_id) == expected_group_path diff --git a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py index eca170319..ebb960366 100644 --- a/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py +++ b/tests/slsa_analyzer/package_registry/test_jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for the ``JFrogMavenRegistry`` class.""" @@ -137,83 +137,6 @@ def test_is_detected( assert jfrog_maven.is_detected(build_tool) is False -@pytest.mark.parametrize( - ("args", "expected_path"), - [ - pytest.param( - { - "group_id": "io.micronaut", - }, - "io/micronaut", - id="Only group_id 1", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - }, - "com/fasterxml/jackson/core", - id="Only group_id 2", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - }, - "com/fasterxml/jackson/core/jackson-annotations", - id="group_id and artifact_id", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9", - id="group_id and artifact_id and version", - ), - pytest.param( - { - "group_id": "com.fasterxml.jackson.core", - "artifact_id": "jackson-annotations", - "version": "2.9.9", - "asset_name": "jackson-annotations-2.9.9.jar", - }, - "com/fasterxml/jackson/core/jackson-annotations/2.9.9/jackson-annotations-2.9.9.jar", - id="group_id and artifact_id and version and asset_name,", - ), - ], -) -def test_construct_maven_repository_path( - jfrog_maven: JFrogMavenRegistry, - args: dict, - expected_path: str, -) -> None: - """Test the ``construct_maven_repository_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(**args) == expected_path - - -@pytest.mark.parametrize( - ("group_id", "expected_group_path"), - [ - ( - "io.micronaut", - "io/micronaut", - ), - ( - "com.fasterxml.jackson.core", - "com/fasterxml/jackson/core", - ), - ], -) -def test_to_group_folder_path( - jfrog_maven: JFrogMavenRegistry, - group_id: str, - expected_group_path: str, -) -> None: - """Test the ``to_gorup_folder_path`` method.""" - assert jfrog_maven.construct_maven_repository_path(group_id) == expected_group_path - - @pytest.mark.parametrize( ("folder_path", "expected_url"), [ From d7a2712e95eb758757fdfaa1efdc7d4b7a061114 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Mon, 23 Sep 2024 12:00:27 +1000 Subject: [PATCH 04/16] chore: allow multiple artifact paths for a purl type --- src/macaron/artifact/local_artifact.py | 42 ++++++++++++++------ src/macaron/slsa_analyzer/analyze_context.py | 5 +-- tests/artifact/test_local_artifact.py | 25 +++++++----- 3 files changed, 46 insertions(+), 26 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index eb862e477..67b70c318 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -25,17 +25,17 @@ def get_local_artifact_repo_mapper() -> Mapping[str, str]: return local_artifact_mapper -def construct_local_artifact_path_from_purl( +def construct_local_artifact_paths_from_purl( build_purl_type: str, component_purl: PackageURL, local_artifact_repo_mapper: Mapping[str, str], -) -> str | None: +) -> list[str] | None: """Get B.""" local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) if local_artifact_repo is None: return None - artifact_path = None + artifact_path = [] match build_purl_type: case "maven": group = component_purl.namespace @@ -45,10 +45,12 @@ def construct_local_artifact_path_from_purl( if group is None or version is None: return None - artifact_path = os.path.join( - local_artifact_repo, - "repository", - construct_maven_repository_path(group, artifact, version), + artifact_path.append( + os.path.join( + local_artifact_repo, + "repository", + construct_maven_repository_path(group, artifact, version), + ) ) case "pypi": # TODO: implement this. @@ -59,22 +61,36 @@ def construct_local_artifact_path_from_purl( return artifact_path +# key: purl type +# value: list of paths +# If a key doesn't exist -> cannot construct the artifact paths for that purl type +# (no local artifact repo found or not enough information from PURL string or simply +# the PURL string is not applicable for that purl type). +# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository. def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], local_artifact_repo_mapper: Mapping[str, str], -) -> dict[str, str]: +) -> dict[str, list[str]]: """Get C.""" - result = {} + local_artifact_paths_purl_mapping = {} for build_purl_type in build_tool_purl_types: - local_artfiact_path = construct_local_artifact_path_from_purl( + local_artfiact_paths = construct_local_artifact_paths_from_purl( build_purl_type=build_purl_type, component_purl=purl, local_artifact_repo_mapper=local_artifact_repo_mapper, ) - if local_artfiact_path and os.path.isdir(local_artfiact_path): - result[build_purl_type] = local_artfiact_path + if not local_artfiact_paths: + continue - return result + resolved_local_artifact_paths = [] + + for local_artifact_path in local_artfiact_paths: + if os.path.isdir(local_artifact_path): + resolved_local_artifact_paths.append(local_artifact_path) + + local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths + + return local_artifact_paths_purl_mapping diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index 5d57e8b2b..f0a27569a 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -55,9 +55,8 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" - local_artifact_paths: dict[str, str] - # TODO this doc string for this variable need more informatino, to be revise later. - """The mapping between build tool types and the directory that contains the corresponding artifacts.""" + local_artifact_paths: dict[str, list[str]] + """The mapping between purl types and the local artifact absolute paths.""" class AnalyzeContext: diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 3e44bdcb6..50157ac5e 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -9,7 +9,7 @@ import pytest from packageurl import PackageURL -from macaron.artifact.local_artifact import construct_local_artifact_path_from_purl, get_local_artifact_paths +from macaron.artifact.local_artifact import construct_local_artifact_paths_from_purl, get_local_artifact_paths @pytest.mark.parametrize( @@ -19,7 +19,7 @@ "maven", "pkg:maven/com.google.guava/guava@33.2.1-jre", {"maven": "/home/foo/.m2"}, - "/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre", + ["/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre"], id="A maven type PURL with available local maven repo", ), pytest.param( @@ -63,12 +63,12 @@ def test_construct_local_artifact_path_from_purl( build_purl_type: str, purl_str: str, local_artifact_repo_mapper: Mapping[str, str], - expectation: str, + expectation: list[str], ) -> None: """Test constructing a local artifact path from a given purl.""" component_purl = PackageURL.from_string(purl_str) assert ( - construct_local_artifact_path_from_purl( + construct_local_artifact_paths_from_purl( build_purl_type=build_purl_type, component_purl=component_purl, local_artifact_repo_mapper=local_artifact_repo_mapper, @@ -78,18 +78,20 @@ def test_construct_local_artifact_path_from_purl( @pytest.mark.parametrize( - ("purl_str", "build_tool_purl_types"), + ("purl_str", "build_tool_purl_types", "expectation"), [ pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", ["maven", "pypi"], - id="A maven type PURL where multiple build tool types are discovered", + {"maven": []}, + id="A maven type PURL where multiple build tool types are discovered. But no artifact path is available.", ), ], ) def test_get_local_artifact_paths_non_existing( purl_str: str, build_tool_purl_types: list[str], + expectation: dict[str, list[str]], ) -> None: """Test getting local artifact paths of non existing artifacts. @@ -101,8 +103,11 @@ def test_get_local_artifact_paths_non_existing( "maven": temp_dir, "pypi": temp_dir, } - assert not get_local_artifact_paths( - purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + assert ( + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + == expectation ) From 9be38a6e8a2963f0ad764704902cd855bcf1f60e Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 24 Sep 2024 14:50:13 +1000 Subject: [PATCH 05/16] chore: refactor the local artifact finding logic and add pypi support --- src/macaron/artifact/local_artifact.py | 213 +++++++++++++++----- src/macaron/slsa_analyzer/analyzer.py | 36 +++- tests/artifact/test_local_artifact.py | 263 +++++++++++++++++++------ 3 files changed, 392 insertions(+), 120 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 67b70c318..3d23a7e7b 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -3,70 +3,177 @@ """This module declares types and utilities for handling local artifacts.""" +import fnmatch +import glob import os from collections.abc import Mapping from packageurl import PackageURL from macaron.artifact.maven import construct_maven_repository_path -from macaron.config.global_config import global_config -def get_local_artifact_repo_mapper() -> Mapping[str, str]: - """Get A.""" - local_artifact_mapper: dict[str, str] = {} +def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories. - if global_config.local_maven_repo: - local_artifact_mapper["maven"] = global_config.local_maven_repo + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) - if global_config.python_venv_path: - local_artifact_mapper["pypi"] = global_config.python_venv_path + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not maven_purl.type == "maven": + return None - return local_artifact_mapper + group = maven_purl.namespace + artifact = maven_purl.name + version = maven_purl.version + if group is None or version is None: + return None -def construct_local_artifact_paths_from_purl( - build_purl_type: str, - component_purl: PackageURL, - local_artifact_repo_mapper: Mapping[str, str], + return [construct_maven_repository_path(group, artifact, version)] + + +def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories. + + Parameters + ---------- + maven_purl : PackageURL + A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) + + Returns + ------- + list[str] | None + A list of glob patterns or None if an error happened. + """ + if not pypi_purl.type == "pypi": + return None + + name = pypi_purl.name + version = pypi_purl.version + + if version is None: + return None + + # These patterns are from the content of a wheel file, which are extracted into the site-packages + # directory. References: + # https://packaging.python.org/en/latest/specifications/binary-distribution-format/#file-contents + glob_patterns = [] + glob_patterns.append(name) + glob_patterns.append(f"{name}-{version}.dist-info") + glob_patterns.append(f"{name}-{version}.data") + + return glob_patterns + + +def find_artifact_paths_from_local_maven_repo( + local_maven_repo: str, + glob_patterns: list[str], +) -> list[str] | None: + """Return a list of existed directories within `local_maven_repo`. + + Each directory path has the form ``local_maven_repo``/. + + None means error. + """ + if not os.path.isdir(local_maven_repo): + return None + + artifact_paths = [] + for pattern in glob_patterns: + found_paths = glob.glob( + root_dir=local_maven_repo, + pathname=pattern, + ) + + for found_path in found_paths: + full_path = os.path.join(local_maven_repo, found_path) + if os.path.isdir(full_path): + artifact_paths.append(full_path) + + return artifact_paths + + +# Assume that local_python_venv exists. +# In here we need to do it case-insensitively +# We also assume that packages are just one level down from venv_path +# The return element are relative paths from venv. +def find_artifact_paths_from_python_venv( + venv_path: str, + glob_patterns: list[str], ) -> list[str] | None: - """Get B.""" - local_artifact_repo = local_artifact_repo_mapper.get(build_purl_type) - if local_artifact_repo is None: + """TBD.""" + if not os.path.isdir(venv_path): + return None + + artifact_paths = [] + + try: + venv_path_entries = os.listdir(venv_path) + except (NotADirectoryError, PermissionError, FileNotFoundError): return None - artifact_path = [] - match build_purl_type: - case "maven": - group = component_purl.namespace - artifact = component_purl.name - version = component_purl.version - - if group is None or version is None: - return None - - artifact_path.append( - os.path.join( - local_artifact_repo, - "repository", - construct_maven_repository_path(group, artifact, version), - ) - ) - case "pypi": - # TODO: implement this. - pass - case _: + all_package_dirs: list[str] = [] + for entry in venv_path_entries: + entry_path = os.path.join(venv_path, entry) + if os.path.isdir(entry_path): + all_package_dirs.append(entry) + + for package_dir in all_package_dirs: + for pattern in glob_patterns: + if fnmatch.fnmatch(package_dir.lower(), pattern.lower()): + full_path = os.path.join(venv_path, package_dir) + artifact_paths.append(full_path) + + return artifact_paths + + +def _get_local_artifact_path_for_build_tool_purl_type( + purl: PackageURL, + build_tool_purl_type: str, + local_artifact_repo: str, +) -> list[str] | None: + """TBD.""" + if build_tool_purl_type == "maven": + maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) + if not maven_artifact_patterns: return None - return artifact_path + artifact_paths = find_artifact_paths_from_local_maven_repo( + local_maven_repo=local_artifact_repo, + glob_patterns=maven_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + if build_tool_purl_type == "pypi": + pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + if not pypi_artifact_patterns: + return None + + artifact_paths = find_artifact_paths_from_python_venv( + venv_path=local_artifact_repo, + glob_patterns=pypi_artifact_patterns, + ) + + if artifact_paths: + return artifact_paths + + return None # key: purl type # value: list of paths # If a key doesn't exist -> cannot construct the artifact paths for that purl type -# (no local artifact repo found or not enough information from PURL string or simply -# the PURL string is not applicable for that purl type). -# If a value is an empty list -> Can construct the local artifact paths but no paths exist in the local artifact repository. +# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found. +# We assume that the paths in local_artifact_repo_mapper all exists/ def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], @@ -75,22 +182,20 @@ def get_local_artifact_paths( """Get C.""" local_artifact_paths_purl_mapping = {} - for build_purl_type in build_tool_purl_types: - local_artfiact_paths = construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=purl, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - - if not local_artfiact_paths: + for build_tool_purl_type in build_tool_purl_types: + local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type) + if not local_artifact_repo: continue - resolved_local_artifact_paths = [] + artifact_paths = _get_local_artifact_path_for_build_tool_purl_type( + purl=purl, + build_tool_purl_type=build_tool_purl_type, + local_artifact_repo=local_artifact_repo, + ) - for local_artifact_path in local_artfiact_paths: - if os.path.isdir(local_artifact_path): - resolved_local_artifact_paths.append(local_artifact_path) + if not artifact_paths: + continue - local_artifact_paths_purl_mapping[build_purl_type] = resolved_local_artifact_paths + local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths return local_artifact_paths_purl_mapping diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 2639d00ae..913339548 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -2,10 +2,13 @@ # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """This module handles the cloning and analyzing a Git repo.""" + +import glob import logging import os import re import sys +from collections.abc import Mapping from datetime import datetime, timezone from pathlib import Path from typing import Any, NamedTuple @@ -16,7 +19,7 @@ from sqlalchemy.orm import Session from macaron import __version__ -from macaron.artifact.local_artifact import get_local_artifact_paths, get_local_artifact_repo_mapper +from macaron.artifact.local_artifact import get_local_artifact_paths from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -474,15 +477,14 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest - discovered_build_toosl = ( + discovered_build_tools = ( analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] ) - build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_toosl] + build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_tools] analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( - # The PURL is definitely valid here. PackageURL.from_string(analyze_ctx.component.purl), build_tools_purl_types, - local_artifact_repo_mapper=get_local_artifact_repo_mapper(), + local_artifact_repo_mapper=self._get_local_artifact_repo_mapper(), ) analyze_ctx.check_results = registry.scan(analyze_ctx) @@ -985,6 +987,30 @@ def _verify_repository_link(self, parsed_purl: PackageURL, analyze_ctx: AnalyzeC ) analyze_ctx.dynamic_data["repo_verification"].append(verification_result) + @staticmethod + def _get_local_artifact_repo_mapper() -> Mapping[str, str]: + """Return the mapping between purl type and its local artifact repo path if that path exists.""" + local_artifact_mapper: dict[str, str] = {} + + if global_config.local_maven_repo: + m2_repository_dir = os.path.join(global_config.local_maven_repo, "repository") + if os.path.isdir(m2_repository_dir): + local_artifact_mapper["maven"] = m2_repository_dir + + if global_config.python_venv_path: + site_packages_dir_pattern = os.path.join( + global_config.python_venv_path, + "lib", + "python3.*", + "site-packages", + ) + site_packages_dirs = glob.glob(site_packages_dir_pattern) + + if len(site_packages_dirs) == 1: + local_artifact_mapper["pypi"] = site_packages_dirs.pop() + + return local_artifact_mapper + class DuplicateCmpError(DuplicateError): """This class is used for duplicated software component errors.""" diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 50157ac5e..87e571f5a 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -3,74 +3,138 @@ """Test the local artifact utilities.""" -import tempfile -from collections.abc import Mapping +import os +from pathlib import Path import pytest from packageurl import PackageURL -from macaron.artifact.local_artifact import construct_local_artifact_paths_from_purl, get_local_artifact_paths +from macaron.artifact.local_artifact import ( + construct_local_artifact_paths_glob_pattern_maven_purl, + construct_local_artifact_paths_glob_pattern_pypi_purl, + find_artifact_paths_from_python_venv, + get_local_artifact_paths, +) @pytest.mark.parametrize( - ("build_purl_type", "purl_str", "local_artifact_repo_mapper", "expectation"), + ("purl_str", "expectation"), [ pytest.param( - "maven", "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"maven": "/home/foo/.m2"}, - ["/home/foo/.m2/repository/com/google/guava/guava/33.2.1-jre"], - id="A maven type PURL with available local maven repo", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group, artifact and version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {}, - None, - id="A maven type PURL without an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre?type=jar", + ["com/google/guava/guava/33.2.1-jre"], + id="A Maven PURL with group artifact, version and type qualifier", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given maven purl.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param("pkg:pypi/django@5.0.6", id="The purl type is not supported."), + pytest.param("pkg:maven/guava@33.2.1-jre", id="Missing group id in the PURL"), + pytest.param("pkg:maven/guava", id="Missing version"), + ], +) +def test_construct_local_artifact_paths_glob_pattern_maven_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given maven purl with error.""" + maven_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + assert result is None + + +@pytest.mark.parametrize( + ("purl_str", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/com.google.guava/guava@33.2.1-jre", - {"pypi": "/home/foo/.venv"}, - None, - id="A maven type PURL without an available local maven repo but there is a Python venv", + "pkg:pypi/django@5.0.6", + ["django", "django-5.0.6.dist-info", "django-5.0.6.data"], + id="A valid pypi PURL with version", + ) + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl( + purl_str: str, + expectation: list[str], +) -> None: + """Test constructing a local artifact patterns from a given pypi purl.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is not None + assert sorted(result) == sorted(expectation) + + +@pytest.mark.parametrize( + ("purl_str"), + [ + pytest.param( + "pkg:pypi/django", + id="A pypi PURL without version", ), pytest.param( - "maven", - "pkg:maven/com.google.guava/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing version and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + id="The purl type is not supported.", ), + ], +) +def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: str) -> None: + """Test constructing a local artifact patterns from a given pypi purl with error.""" + pypi_purl = PackageURL.from_string(purl_str) + result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + assert result is None + + +def test_find_artifact_paths_from_invalid_python_venv() -> None: + """Test find_artifact_paths_from_python_venv method with invalid venv path""" + assert not find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + + +@pytest.mark.parametrize( + ("purl_str", "build_tool_purl_types", "local_artifact_repo_mapper", "expectation"), + [ pytest.param( - "maven", - "pkg:maven/guava", - {"maven": "/home/foo/.m2"}, - None, - id="A maven type PURL with missing groupd Id and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + ["maven", "pypi"], + {}, + {}, + id="A maven type PURL where multiple build tool types are discovered. But no local repository is available.", ), pytest.param( - "maven", - "pkg:github/oracle/macaron", - {"maven": "/home/foo/.m2"}, - None, - id="A git type PURL and an available local maven repo", + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + {}, + id="A maven type PURL where no build tool types are discovered and no local repository is available.", ), ], ) -def test_construct_local_artifact_path_from_purl( - build_purl_type: str, +def test_get_local_artifact_paths_empty( purl_str: str, - local_artifact_repo_mapper: Mapping[str, str], - expectation: list[str], + build_tool_purl_types: list[str], + local_artifact_repo_mapper: dict[str, str], + expectation: dict[str, list[str]], ) -> None: - """Test constructing a local artifact path from a given purl.""" - component_purl = PackageURL.from_string(purl_str) + """Test getting local artifact paths where the result is empty.""" + purl = PackageURL.from_string(purl_str) assert ( - construct_local_artifact_paths_from_purl( - build_purl_type=build_purl_type, - component_purl=component_purl, + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, local_artifact_repo_mapper=local_artifact_repo_mapper, ) == expectation @@ -83,31 +147,108 @@ def test_construct_local_artifact_path_from_purl( pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", ["maven", "pypi"], - {"maven": []}, - id="A maven type PURL where multiple build tool types are discovered. But no artifact path is available.", + {}, + id="A maven type PURL where multiple build tool types are discovered", + ), + pytest.param( + "pkg:maven/com.google.guava/guava@33.2.1-jre", + [], + {}, + id="A maven type PURL where no build tool is discovered", + ), + pytest.param( + "pkg:pypi/django@5.0.3", + [], + {}, + id="A maven type PURL where no build tool is discovered", ), ], ) -def test_get_local_artifact_paths_non_existing( +def test_get_local_artifact_paths_not_available( purl_str: str, build_tool_purl_types: list[str], expectation: dict[str, list[str]], + tmp_path: Path, ) -> None: - """Test getting local artifact paths of non existing artifacts. - - The local artifact repos are available. - """ + """Test getting local artifact paths where the artifact paths are not available.""" purl = PackageURL.from_string(purl_str) - with tempfile.TemporaryDirectory() as temp_dir: - local_artifact_repo_mapper = { - "maven": temp_dir, - "pypi": temp_dir, - } - assert ( - get_local_artifact_paths( - purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, - ) - == expectation + local_artifact_repo_mapper = { + "maven": str(tmp_path), + "pypi": str(tmp_path), + } + + assert ( + get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, ) + == expectation + ) + + +def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with maven purl.""" + purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + maven_artifact_path = f"{local_artifact_repo_mapper['maven']}/com/oracle/macaron/macaron/0.13.0" + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + os.makedirs(maven_artifact_path) + + expectation = { + "maven": [maven_artifact_path], + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + + assert result == expectation + + +def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: + """Test getting local artifact paths succeeded with pypi purl.""" + purl = PackageURL.from_string("pkg:pypi/macaron@0.13.0") + build_tool_purl_types = ["maven", "pypi"] + + tmp_path_str = str(tmp_path) + + local_artifact_repo_mapper = { + "maven": f"{tmp_path_str}/.m2/repository", + "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", + } + pypi_artifact_paths = [ + f"{local_artifact_repo_mapper['pypi']}/macaron", + f"{local_artifact_repo_mapper['pypi']}/macaron-0.13.0.dist-info", + f"{local_artifact_repo_mapper['pypi']}/Macaron-0.13.0.dist-info", + ] + + os.makedirs(local_artifact_repo_mapper["maven"]) + os.makedirs(local_artifact_repo_mapper["pypi"]) + + for artifact_path in pypi_artifact_paths: + os.makedirs(artifact_path) + + expectation = { + "pypi": sorted(pypi_artifact_paths), + } + + result = get_local_artifact_paths( + purl=purl, + build_tool_purl_types=build_tool_purl_types, + local_artifact_repo_mapper=local_artifact_repo_mapper, + ) + for value in result.values(): + value.sort() + + assert result == expectation From bce54d6228685ddf4973362ccbc22ee2ea572f8e Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 4 Dec 2024 13:46:04 +1000 Subject: [PATCH 06/16] chore: improve doc strings --- scripts/release_scripts/run_macaron.sh | 26 ++-- src/macaron/__main__.py | 2 +- src/macaron/artifact/local_artifact.py | 118 ++++++++++++++---- .../test_run_macaron_sh.py | 14 +-- 4 files changed, 122 insertions(+), 38 deletions(-) diff --git a/scripts/release_scripts/run_macaron.sh b/scripts/release_scripts/run_macaron.sh index 457ccd220..54ca13fd1 100755 --- a/scripts/release_scripts/run_macaron.sh +++ b/scripts/release_scripts/run_macaron.sh @@ -468,26 +468,36 @@ if [[ -n "${python_venv_path:-}" ]]; then mount_dir_ro "--python-venv" "$python_venv_path" "$python_venv_in_container" fi -# Mount the local Maven repo into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. +# Mount the local Maven repo from the +# host file system into the container's +# ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. if [[ -n "${local_maven_repo:-}" ]]; then local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") mount_dir_ro "--local-maven-repo" "$local_maven_repo" "$local_maven_repo_in_container" else - # Perform default local maven repo when the user doesn't provide --local-maven-repo and `analyze` command is used. + # Mounting default local maven repo only + # when the user doesn't provide --local-maven-repo AND `analyze` command is used. if [[ "$command" == "analyze" ]]; then - # We mount $HOME/.m2 into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. + # We mount the host's $HOME/.m2 into the container's + # ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly ONLY IF $HOME/.m2 directory exists. if [[ -d "$HOME/.m2" ]]; then local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" argv_command+=("--local-maven-repo" "${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly") mount_dir_ro "--local-maven-repo" "$HOME/.m2" "$local_maven_repo_in_container" - # If $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly - # into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. - # This is because we don't want Macaron running within - # the container to use `$HOME/.m2` within the container as it is being used - # by the cyclonedx plugins for dependency resolution. + # If the host's $HOME/.m2 doesn't exist, we create and mount an empty directory ${output}/analyze_local_maven_repo_readonly + # into ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly. And then provide + # ${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly into the --local-maven-repo + # flag. + # This is because: + # - By default if --local-maven-repo is not used, Macaron uses $HOME/.m2 of the current + # environment as the local maven repo. + # - If --local-maven-repo is not set when Macaron is running in the Docker container, it will try to + # use $HOME/.m2 WITHIN the container. This is not desirable as this $HOME/.m2 is being used + # by the cyclonedx plugins for dependency resolution, which requires read write. We treat the local + # maven repo as a read only directory, hence they cannot share. else local_maven_repo_in_container="${MACARON_WORKSPACE}/analyze_local_maven_repo_readonly" output_local_maven_repo="${output}/analyze_local_maven_repo_readonly" diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index 71a0a1a4e..f4c6cf7e6 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -74,7 +74,7 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None sys.exit(os.EX_OSFILE) global_config.load_python_venv(analyzer_single_args.python_venv) - # Set Python virtual environment path. + # Set local maven repo path. if analyzer_single_args.local_maven_repo is None: # Load the default user local .m2 directory. # Exit on error if $HOME is not set or empty. diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 3d23a7e7b..a8704c46b 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -14,17 +14,26 @@ def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: - """Return a list of glob pattern(s) to be search in a maven layout local repo for artifact directories. + """Return a list of glob pattern(s) representing maven artifacts in a local maven repository. + + The glob pattern(s) can be used to search in `<...>/.m2/repository` directory. Parameters ---------- maven_purl : PackageURL - A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) + A maven type PackageURL instance. Returns ------- list[str] | None A list of glob patterns or None if an error happened. + + Examples + -------- + >>> from packageurl import PackageURL + >>> purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") + >>> construct_local_artifact_paths_glob_pattern_maven_purl(purl) + ['com/oracle/macaron/macaron/0.13.0'] """ if not maven_purl.type == "maven": return None @@ -40,17 +49,27 @@ def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageUR def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: - """Return a list of glob pattern(s) to be search in a Python virtual environment for artifact directories. + """Return a list of glob pattern(s) representing python artifacts in a virtual environment. + + The glob pattern(s) can be used to search in `<...>//lib/python3.x/site-packages` + directory. Parameters ---------- maven_purl : PackageURL - A maven type PackageURL instance (e.g. `PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0)`) + A pypi type PackageURL instance. Returns ------- list[str] | None A list of glob patterns or None if an error happened. + + Examples + -------- + >>> from packageurl import PackageURL + >>> purl = PackageURL.from_string("pkg:pypi/django@1.11.1") + >>> construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + ['django', 'django-1.11.1.dist-info', 'django-1.11.1.data'] """ if not pypi_purl.type == "pypi": return None @@ -76,11 +95,21 @@ def find_artifact_paths_from_local_maven_repo( local_maven_repo: str, glob_patterns: list[str], ) -> list[str] | None: - """Return a list of existed directories within `local_maven_repo`. + """Find maven artifacts within a local maven repository directory. - Each directory path has the form ``local_maven_repo``/. + ``local_maven_repo`` should be in format `<...>/.m2/repository`. - None means error. + Parameters + ---------- + local_maven_repo: str + The path to the directories to find artifacts. + glob_patterns: list[str] + The list of glob patterns that matches to artifact file names. + + Returns + ------- + list[str] | None + The list of path to found artifacts in the form of ``local_maven_repo``/ """ if not os.path.isdir(local_maven_repo): return None @@ -100,15 +129,27 @@ def find_artifact_paths_from_local_maven_repo( return artifact_paths -# Assume that local_python_venv exists. -# In here we need to do it case-insensitively -# We also assume that packages are just one level down from venv_path -# The return element are relative paths from venv. def find_artifact_paths_from_python_venv( venv_path: str, glob_patterns: list[str], ) -> list[str] | None: - """TBD.""" + """Find python artifacts within a python virtual environment directory. + + For packages in the virtual environment, we will treat their name case-insensitively. + https://packaging.python.org/en/latest/specifications/name-normalization/ + + Parameters + ---------- + local_maven_repo: str + The path to the directories to find artifacts. + glob_patterns: list[str] + The list of glob patterns that matches to artifact file names. + + Returns + ------- + list[str] | None + The list of path to found artifacts in the form of ``local_maven_repo``/ + """ if not os.path.isdir(venv_path): return None @@ -139,7 +180,7 @@ def _get_local_artifact_path_for_build_tool_purl_type( build_tool_purl_type: str, local_artifact_repo: str, ) -> list[str] | None: - """TBD.""" + """Find local artifacts within ``local_artifact_repo`` depending on the purl type.""" if build_tool_purl_type == "maven": maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) if not maven_artifact_patterns: @@ -169,24 +210,57 @@ def _get_local_artifact_path_for_build_tool_purl_type( return None -# key: purl type -# value: list of paths -# If a key doesn't exist -> cannot construct the artifact paths for that purl type -# (no local artifact repo found or not enough information from PURL type is not supported) OR no valid artifact paths found. -# We assume that the paths in local_artifact_repo_mapper all exists/ def get_local_artifact_paths( purl: PackageURL, build_tool_purl_types: list[str], local_artifact_repo_mapper: Mapping[str, str], ) -> dict[str, list[str]]: - """Get C.""" - local_artifact_paths_purl_mapping = {} + """Return the path to local artifacts for a PackageURL. + + We look for local artifacts of this PURL in all local repos corresponding to each purl + type in ``build_tool_purl_types`` (e.g a pypi build tool type will map to the python virtual + environment, if available). + + This function returns a dictionary with: + - keys: The purl type + - values: The list of aritfact paths corresponding to a purl type + + If a key doesn't exist, we cannot construct the artifact paths for that purl type. This can + happen because of: + - no local artifact repo found or given from user OR + - not enough information from PURL type OR + - build PURL type is not supported OR + - no valid artifact paths found + + We assume that all paths in ``local_artifact_repo_mapper`` exist. + + Parameters + ---------- + purl : PackageURL + The purl we want to find local artifacts + build_tool_purl_types : list[str] + The list of build tool purl type to look for local artifacts. + local_artifact_repo_mapper: Mapping[str, str] + The mapping between each build purl type and the local artifact repo directory. + + Returns + ------- + dict[str, list[str]] + A mapping between build purl type and the paths to local artifacts if found. + """ + result = {} for build_tool_purl_type in build_tool_purl_types: local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type) if not local_artifact_repo: continue + # ``local_artifact_repo`` here correspond to ``build_tool_purl_type`` already + # However, because for each build tool purl type, we have different ways of: + # - Generating glob patterns + # - Applying the glob patterns + # I still put ``local_artifact_repo`` in _get_local_artifact_path_for_build_tool_purl_type + # to further handle those tasks. artifact_paths = _get_local_artifact_path_for_build_tool_purl_type( purl=purl, build_tool_purl_type=build_tool_purl_type, @@ -196,6 +270,6 @@ def get_local_artifact_paths( if not artifact_paths: continue - local_artifact_paths_purl_mapping[build_tool_purl_type] = artifact_paths + result[build_tool_purl_type] = artifact_paths - return local_artifact_paths_purl_mapping + return result diff --git a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py index 6eaf9a1be..d72be69d1 100755 --- a/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py +++ b/tests/integration/cases/run_macaron_sh_script_unit_test/test_run_macaron_sh.py @@ -81,7 +81,7 @@ def test_macaron_command_help() -> int: return exit_code -def test_macaron_command_no_home_m2() -> int: +def test_macaron_command_no_home_m2_on_host() -> int: """Test if the ``macaron`` command in the container receives the correct arguments.""" test_cases = [ TestCase( @@ -103,11 +103,11 @@ def test_macaron_command_no_home_m2() -> int: return exit_code -def test_macaron_command_home_m2_available() -> int: +def test_macaron_command_host_home_m2_available() -> int: """Test if the ``macaron`` command in the container receives the correct arguments.""" test_cases = [ TestCase( - name="no --local-maven-repo and host $HOME/.m2 is available", + name="no --local-maven-repo provided by the user and host $HOME/.m2 is available", script_args=["analyze"], expected_macaron_args=["analyze", "--local-maven-repo", "/home/macaron/analyze_local_maven_repo_readonly"], ), @@ -130,7 +130,7 @@ def test_macaron_command_home_m2_available() -> int: return exit_code -def test_macaron_provide_local_maven_repo() -> int: +def test_macaron_user_provide_valid_local_maven_repo() -> int: """Test if the ``macaron`` command in the container receives the correct arguments.""" with tempfile.TemporaryDirectory() as temp_dir: test_cases = [ @@ -159,9 +159,9 @@ def main() -> int: """Run all tests.""" return ( test_macaron_command_help() - | test_macaron_command_no_home_m2() - | test_macaron_command_home_m2_available() - | test_macaron_provide_local_maven_repo() + | test_macaron_command_no_home_m2_on_host() + | test_macaron_command_host_home_m2_available() + | test_macaron_user_provide_valid_local_maven_repo() ) From e64c7c98699a5fa301e821328284eb666b3150bb Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 4 Dec 2024 22:44:39 +1000 Subject: [PATCH 07/16] chore: update command_analyze rst --- docs/source/pages/cli_usage/command_analyze.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/source/pages/cli_usage/command_analyze.rst b/docs/source/pages/cli_usage/command_analyze.rst index 2bb7f3f6a..0d43d791d 100644 --- a/docs/source/pages/cli_usage/command_analyze.rst +++ b/docs/source/pages/cli_usage/command_analyze.rst @@ -24,6 +24,7 @@ Usage [-d DIGEST] [-pe PROVENANCE_EXPECTATION] [--skip-deps] [--deps-depth DEPS_DEPTH] [-g TEMPLATE_PATH] [--python-venv PYTHON_VENV] + [--local-maven-repo LOCAL_MAVEN_REPO] ------- Options @@ -79,6 +80,10 @@ Options The path to the Python virtual environment of the target software component. +.. option:: --local-maven-repo LOCAL_MAVEN_REPO + + The path to the local .m2 directory. If this option is not used, Macaron will use the default location at $HOME/.m2 + ----------- Environment ----------- From 072879ca69278b41dc71a842312d1fa67adaf466 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 10 Dec 2024 10:31:54 +1000 Subject: [PATCH 08/16] chore: fix typo --- src/macaron/artifact/local_artifact.py | 2 +- tests/artifact/test_local_artifact.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index a8704c46b..8e11e9aae 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -56,7 +56,7 @@ def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) Parameters ---------- - maven_purl : PackageURL + pypi_purl : PackageURL A pypi type PackageURL instance. Returns diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 87e571f5a..d521e4e9c 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -160,7 +160,7 @@ def test_get_local_artifact_paths_empty( "pkg:pypi/django@5.0.3", [], {}, - id="A maven type PURL where no build tool is discovered", + id="A pypi type PURL where no build tool is discovered", ), ], ) From 33fed9aca676cb74b699d9357c14767da3813c04 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 10 Dec 2024 10:42:46 +1000 Subject: [PATCH 09/16] chore: remove redundant os.path.exists --- src/macaron/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/macaron/__main__.py b/src/macaron/__main__.py index f4c6cf7e6..a3d1dcbc5 100644 --- a/src/macaron/__main__.py +++ b/src/macaron/__main__.py @@ -91,7 +91,7 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None global_config.local_maven_repo = local_maven_repo else: user_provided_local_maven_repo = analyzer_single_args.local_maven_repo - if not os.path.exists(user_provided_local_maven_repo) or not os.path.isdir(user_provided_local_maven_repo): + if not os.path.isdir(user_provided_local_maven_repo): logger.error("The user provided local Maven repo at %s is not valid.", user_provided_local_maven_repo) sys.exit(os.EX_USAGE) From 62836927376a81b33fa24d16bae9a05a948373a1 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Tue, 10 Dec 2024 10:44:06 +1000 Subject: [PATCH 10/16] chore: use != instead of not == --- src/macaron/artifact/local_artifact.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 8e11e9aae..35562ae24 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -35,7 +35,7 @@ def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageUR >>> construct_local_artifact_paths_glob_pattern_maven_purl(purl) ['com/oracle/macaron/macaron/0.13.0'] """ - if not maven_purl.type == "maven": + if maven_purl.type != "maven": return None group = maven_purl.namespace @@ -71,7 +71,7 @@ def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) >>> construct_local_artifact_paths_glob_pattern_pypi_purl(purl) ['django', 'django-1.11.1.dist-info', 'django-1.11.1.data'] """ - if not pypi_purl.type == "pypi": + if pypi_purl.type != "pypi": return None name = pypi_purl.name From c686684689fd067a93e0726bbba8edf512fe6a53 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 12:28:07 +1000 Subject: [PATCH 11/16] refactor: use exceptions for errors in get_local_artifact_paths and remove the behavior of checking one PURL against multiple local artifact repos --- src/macaron/artifact/local_artifact.py | 166 ++++++++----------- src/macaron/errors.py | 4 + src/macaron/slsa_analyzer/analyze_context.py | 6 +- src/macaron/slsa_analyzer/analyzer.py | 19 ++- tests/artifact/test_local_artifact.py | 128 ++++++-------- 5 files changed, 135 insertions(+), 188 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 35562ae24..7cc8b8257 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -6,11 +6,11 @@ import fnmatch import glob import os -from collections.abc import Mapping from packageurl import PackageURL from macaron.artifact.maven import construct_maven_repository_path +from macaron.errors import LocalArtifactFinderError def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: @@ -94,7 +94,7 @@ def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) def find_artifact_paths_from_local_maven_repo( local_maven_repo: str, glob_patterns: list[str], -) -> list[str] | None: +) -> list[str]: """Find maven artifacts within a local maven repository directory. ``local_maven_repo`` should be in format `<...>/.m2/repository`. @@ -102,17 +102,23 @@ def find_artifact_paths_from_local_maven_repo( Parameters ---------- local_maven_repo: str - The path to the directories to find artifacts. + The path to the directory to find artifacts. glob_patterns: list[str] The list of glob patterns that matches to artifact file names. Returns ------- - list[str] | None - The list of path to found artifacts in the form of ``local_maven_repo``/ + list[str] + The list of path to found artifacts in the form of ``local_maven_repo``/. + If no artifact is found, this list will be empty. + + Raises + ------ + LocalArtifactFinderError + If ``local_maven_repo`` doesn't exist. """ if not os.path.isdir(local_maven_repo): - return None + raise LocalArtifactFinderError(f"{local_maven_repo} doesn't exist.") artifact_paths = [] for pattern in glob_patterns: @@ -130,146 +136,110 @@ def find_artifact_paths_from_local_maven_repo( def find_artifact_paths_from_python_venv( - venv_path: str, + venv_site_package_path: str, glob_patterns: list[str], -) -> list[str] | None: +) -> list[str]: """Find python artifacts within a python virtual environment directory. For packages in the virtual environment, we will treat their name case-insensitively. https://packaging.python.org/en/latest/specifications/name-normalization/ + ``venv_site_package_path`` should be in format `<...>/lib/python3.*/site-packages/`. + Parameters ---------- - local_maven_repo: str - The path to the directories to find artifacts. + venv_path: str + The path to the local directory to find artifacts. glob_patterns: list[str] The list of glob patterns that matches to artifact file names. Returns ------- - list[str] | None - The list of path to found artifacts in the form of ``local_maven_repo``/ + list[str] + The list of path to found artifacts in the form of ``venv_site_package_path``/ + If no artifact is found, this list will be empty. + + Raises + ------ + LocalArtifactFinderError + If ``venv_site_package_path`` doesn't exist or if we cannot view the sub-directory of it. """ - if not os.path.isdir(venv_path): - return None + if not os.path.isdir(venv_site_package_path): + raise LocalArtifactFinderError(f"{venv_site_package_path} doesn't exist.") artifact_paths = [] try: - venv_path_entries = os.listdir(venv_path) - except (NotADirectoryError, PermissionError, FileNotFoundError): - return None + venv_path_entries = os.listdir(venv_site_package_path) + except (NotADirectoryError, PermissionError, FileNotFoundError) as error: + error_msg = f"Cannot view the sub-directory of venv {venv_site_package_path}" + raise LocalArtifactFinderError(error_msg) from error all_package_dirs: list[str] = [] for entry in venv_path_entries: - entry_path = os.path.join(venv_path, entry) + entry_path = os.path.join(venv_site_package_path, entry) if os.path.isdir(entry_path): all_package_dirs.append(entry) for package_dir in all_package_dirs: for pattern in glob_patterns: if fnmatch.fnmatch(package_dir.lower(), pattern.lower()): - full_path = os.path.join(venv_path, package_dir) + full_path = os.path.join(venv_site_package_path, package_dir) artifact_paths.append(full_path) return artifact_paths -def _get_local_artifact_path_for_build_tool_purl_type( - purl: PackageURL, - build_tool_purl_type: str, - local_artifact_repo: str, -) -> list[str] | None: - """Find local artifacts within ``local_artifact_repo`` depending on the purl type.""" - if build_tool_purl_type == "maven": - maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) - if not maven_artifact_patterns: - return None - - artifact_paths = find_artifact_paths_from_local_maven_repo( - local_maven_repo=local_artifact_repo, - glob_patterns=maven_artifact_patterns, - ) - - if artifact_paths: - return artifact_paths - - if build_tool_purl_type == "pypi": - pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) - if not pypi_artifact_patterns: - return None - - artifact_paths = find_artifact_paths_from_python_venv( - venv_path=local_artifact_repo, - glob_patterns=pypi_artifact_patterns, - ) - - if artifact_paths: - return artifact_paths - - return None - - def get_local_artifact_paths( purl: PackageURL, - build_tool_purl_types: list[str], - local_artifact_repo_mapper: Mapping[str, str], -) -> dict[str, list[str]]: + local_artifact_repo_path: str, +) -> list[str]: """Return the path to local artifacts for a PackageURL. - We look for local artifacts of this PURL in all local repos corresponding to each purl - type in ``build_tool_purl_types`` (e.g a pypi build tool type will map to the python virtual - environment, if available). - - This function returns a dictionary with: - - keys: The purl type - - values: The list of aritfact paths corresponding to a purl type + We look for local artifacts of this PURL in ``local_artifact_repo_path``. - If a key doesn't exist, we cannot construct the artifact paths for that purl type. This can - happen because of: - - no local artifact repo found or given from user OR - - not enough information from PURL type OR - - build PURL type is not supported OR - - no valid artifact paths found + This function returns a list of paths (as strings), each has the format + ``local_artifact_repo_path``/path/to/artifact`` - We assume that all paths in ``local_artifact_repo_mapper`` exist. + We assume that ``local_artifact_repo_path`` exists. Parameters ---------- purl : PackageURL The purl we want to find local artifacts - build_tool_purl_types : list[str] - The list of build tool purl type to look for local artifacts. - local_artifact_repo_mapper: Mapping[str, str] - The mapping between each build purl type and the local artifact repo directory. + local_artifact_repo_path : str + The local artifact repo directory. Returns ------- - dict[str, list[str]] - A mapping between build purl type and the paths to local artifacts if found. + list[str] + The list contains the found artifact paths. It will be empty if no artifact can be found. + + Raises + ------ + LocalArtifactFinderError + If an error happens when looking for local artifacts. """ - result = {} - - for build_tool_purl_type in build_tool_purl_types: - local_artifact_repo = local_artifact_repo_mapper.get(build_tool_purl_type) - if not local_artifact_repo: - continue - - # ``local_artifact_repo`` here correspond to ``build_tool_purl_type`` already - # However, because for each build tool purl type, we have different ways of: - # - Generating glob patterns - # - Applying the glob patterns - # I still put ``local_artifact_repo`` in _get_local_artifact_path_for_build_tool_purl_type - # to further handle those tasks. - artifact_paths = _get_local_artifact_path_for_build_tool_purl_type( - purl=purl, - build_tool_purl_type=build_tool_purl_type, - local_artifact_repo=local_artifact_repo, + purl_type = purl.type + + if purl_type == "maven": + maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) + if not maven_artifact_patterns: + raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}") + + return find_artifact_paths_from_local_maven_repo( + local_maven_repo=local_artifact_repo_path, + glob_patterns=maven_artifact_patterns, ) - if not artifact_paths: - continue + if purl_type == "pypi": + pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + if not pypi_artifact_patterns: + raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}") - result[build_tool_purl_type] = artifact_paths + return find_artifact_paths_from_python_venv( + venv_site_package_path=local_artifact_repo_path, + glob_patterns=pypi_artifact_patterns, + ) - return result + raise LocalArtifactFinderError(f"Unsupported PURL type {purl_type}") diff --git a/src/macaron/errors.py b/src/macaron/errors.py index b4e8b813f..5ae90295c 100644 --- a/src/macaron/errors.py +++ b/src/macaron/errors.py @@ -90,3 +90,7 @@ class DependencyAnalyzerError(MacaronError): class HeuristicAnalyzerValueError(MacaronError): """Error class for BaseHeuristicAnalyzer errors when parsing data.""" + + +class LocalArtifactFinderError(MacaronError): + """Happens when there is an error looking for local artifacts.""" diff --git a/src/macaron/slsa_analyzer/analyze_context.py b/src/macaron/slsa_analyzer/analyze_context.py index f0a27569a..31da3d54c 100644 --- a/src/macaron/slsa_analyzer/analyze_context.py +++ b/src/macaron/slsa_analyzer/analyze_context.py @@ -55,8 +55,8 @@ class ChecksOutputs(TypedDict): """The commit digest extracted from provenance, if applicable.""" provenance_verified: bool """True if the provenance exists and has been verified against a signed companion provenance.""" - local_artifact_paths: dict[str, list[str]] - """The mapping between purl types and the local artifact absolute paths.""" + local_artifact_paths: list[str] + """The local artifact absolute paths.""" class AnalyzeContext: @@ -112,7 +112,7 @@ def __init__( provenance_repo_url=None, provenance_commit_digest=None, provenance_verified=False, - local_artifact_paths={}, + local_artifact_paths=[], ) @property diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 913339548..bafe13e42 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -115,6 +115,8 @@ def __init__(self, output_path: str, build_log_path: str) -> None: # Create database tables: all checks have been registered so all tables should be mapped now self.db_man.create_tables() + self.local_artifact_repo_mapper = Analyzer._get_local_artifact_repo_mapper() + def run( self, user_config: dict, @@ -477,15 +479,14 @@ def run_single( analyze_ctx.dynamic_data["provenance_repo_url"] = provenance_repo_url analyze_ctx.dynamic_data["provenance_commit_digest"] = provenance_commit_digest - discovered_build_tools = ( - analyze_ctx.dynamic_data["build_spec"]["tools"] + analyze_ctx.dynamic_data["build_spec"]["purl_tools"] - ) - build_tools_purl_types = [build_tool.purl_type for build_tool in discovered_build_tools] - analyze_ctx.dynamic_data["local_artifact_paths"] = get_local_artifact_paths( - PackageURL.from_string(analyze_ctx.component.purl), - build_tools_purl_types, - local_artifact_repo_mapper=self._get_local_artifact_repo_mapper(), - ) + if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper: + local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type] + analyze_ctx.dynamic_data["local_artifact_paths"].extend( + get_local_artifact_paths( + purl=parsed_purl, + local_artifact_repo_path=local_artifact_repo_path, + ) + ) analyze_ctx.check_results = registry.scan(analyze_ctx) diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index d521e4e9c..b2130f85a 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -15,6 +15,7 @@ find_artifact_paths_from_python_venv, get_local_artifact_paths, ) +from macaron.errors import LocalArtifactFinderError @pytest.mark.parametrize( @@ -101,154 +102,125 @@ def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: s def test_find_artifact_paths_from_invalid_python_venv() -> None: """Test find_artifact_paths_from_python_venv method with invalid venv path""" - assert not find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + with pytest.raises(LocalArtifactFinderError): + find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) @pytest.mark.parametrize( - ("purl_str", "build_tool_purl_types", "local_artifact_repo_mapper", "expectation"), + ("purl_str", "expectation"), [ pytest.param( "pkg:maven/com.google.guava/guava@33.2.1-jre", - ["maven", "pypi"], - {}, - {}, - id="A maven type PURL where multiple build tool types are discovered. But no local repository is available.", + [], + id="A maven type PURL", ), pytest.param( - "pkg:maven/com.google.guava/guava@33.2.1-jre", + "pkg:pypi/django@5.0.3", [], - {}, - {}, - id="A maven type PURL where no build tool types are discovered and no local repository is available.", + id="A pypi type PURL", ), ], ) -def test_get_local_artifact_paths_empty( +def test_get_local_artifact_paths_not_available( purl_str: str, - build_tool_purl_types: list[str], - local_artifact_repo_mapper: dict[str, str], - expectation: dict[str, list[str]], + expectation: list[str], + tmp_path: Path, ) -> None: - """Test getting local artifact paths where the result is empty.""" + """Test getting local artifact paths where we cannot find local artifacts for the PURL.""" purl = PackageURL.from_string(purl_str) + assert ( get_local_artifact_paths( purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + local_artifact_repo_path=str(tmp_path), ) == expectation ) @pytest.mark.parametrize( - ("purl_str", "build_tool_purl_types", "expectation"), + ("purl_str"), [ pytest.param( - "pkg:maven/com.google.guava/guava@33.2.1-jre", - ["maven", "pypi"], - {}, - id="A maven type PURL where multiple build tool types are discovered", + "pkg:maven/com.google.guava/guava", + id="A maven type PURL with no version", ), pytest.param( - "pkg:maven/com.google.guava/guava@33.2.1-jre", - [], - {}, - id="A maven type PURL where no build tool is discovered", + "pkg:maven/guava@33.2.1-jre", + id="A maven type PURL with no group", ), pytest.param( - "pkg:pypi/django@5.0.3", - [], - {}, - id="A pypi type PURL where no build tool is discovered", + "pkg:maven/guava", + id="A maven type PURL with no group and no version", + ), + pytest.param( + "pkg:pypi/django", + id="A pypi type PURL without version", + ), + pytest.param( + "pkg:github/oracle/macaron", + id="A github type PURL (unsupported)", ), ], ) -def test_get_local_artifact_paths_not_available( +def test_get_local_artifact_paths_invalid_purl( purl_str: str, - build_tool_purl_types: list[str], - expectation: dict[str, list[str]], tmp_path: Path, ) -> None: - """Test getting local artifact paths where the artifact paths are not available.""" + """Test getting local artifact paths where the input PURL is invalid.""" purl = PackageURL.from_string(purl_str) - local_artifact_repo_mapper = { - "maven": str(tmp_path), - "pypi": str(tmp_path), - } - assert ( + with pytest.raises(LocalArtifactFinderError): get_local_artifact_paths( purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + local_artifact_repo_path=str(tmp_path), ) - == expectation - ) def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: """Test getting local artifact paths succeeded with maven purl.""" purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") - build_tool_purl_types = ["maven", "pypi"] tmp_path_str = str(tmp_path) - local_artifact_repo_mapper = { - "maven": f"{tmp_path_str}/.m2/repository", - "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", - } - maven_artifact_path = f"{local_artifact_repo_mapper['maven']}/com/oracle/macaron/macaron/0.13.0" - os.makedirs(local_artifact_repo_mapper["maven"]) - os.makedirs(local_artifact_repo_mapper["pypi"]) - os.makedirs(maven_artifact_path) - - expectation = { - "maven": [maven_artifact_path], - } + maven_local_repo_path = f"{tmp_path_str}/.m2/repository" + target_artifact_path = f"{maven_local_repo_path}/com/oracle/macaron/macaron/0.13.0" + os.makedirs(maven_local_repo_path) + os.makedirs(target_artifact_path) result = get_local_artifact_paths( purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + local_artifact_repo_path=maven_local_repo_path, ) - assert result == expectation + assert result == [target_artifact_path] def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: """Test getting local artifact paths succeeded with pypi purl.""" purl = PackageURL.from_string("pkg:pypi/macaron@0.13.0") - build_tool_purl_types = ["maven", "pypi"] tmp_path_str = str(tmp_path) - local_artifact_repo_mapper = { - "maven": f"{tmp_path_str}/.m2/repository", - "pypi": f"{tmp_path_str}/.venv/lib/python3.11/site-packages", - } + python_venv_path = f"{tmp_path_str}/.venv/lib/python3.11/site-packages" + + # We are also testing if the patterns match case-insensitively. pypi_artifact_paths = [ - f"{local_artifact_repo_mapper['pypi']}/macaron", - f"{local_artifact_repo_mapper['pypi']}/macaron-0.13.0.dist-info", - f"{local_artifact_repo_mapper['pypi']}/Macaron-0.13.0.dist-info", + f"{python_venv_path}/macaron", + f"{python_venv_path}/macaron-0.13.0.dist-info", + f"{python_venv_path}/Macaron-0.13.0.dist-info", + f"{python_venv_path}/macaron-0.13.0.data", + f"{python_venv_path}/Macaron-0.13.0.data", ] - os.makedirs(local_artifact_repo_mapper["maven"]) - os.makedirs(local_artifact_repo_mapper["pypi"]) + os.makedirs(python_venv_path) for artifact_path in pypi_artifact_paths: os.makedirs(artifact_path) - expectation = { - "pypi": sorted(pypi_artifact_paths), - } - result = get_local_artifact_paths( purl=purl, - build_tool_purl_types=build_tool_purl_types, - local_artifact_repo_mapper=local_artifact_repo_mapper, + local_artifact_repo_path=python_venv_path, ) - for value in result.values(): - value.sort() - assert result == expectation + assert sorted(result) == sorted(pypi_artifact_paths) From 48460d209951dfa80e19beb6c2736f24e162d451 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 12:37:26 +1000 Subject: [PATCH 12/16] chore: improve function names and documentation to highlight the return values of this feature --- src/macaron/artifact/local_artifact.py | 22 +++++++++++++--------- tests/artifact/test_local_artifact.py | 12 ++++++------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 7cc8b8257..094eb9c2c 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -13,8 +13,8 @@ from macaron.errors import LocalArtifactFinderError -def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: - """Return a list of glob pattern(s) representing maven artifacts in a local maven repository. +def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) representing the directory that contains the local maven artifacts for ``maven_purl``. The glob pattern(s) can be used to search in `<...>/.m2/repository` directory. @@ -48,8 +48,8 @@ def construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl: PackageUR return [construct_maven_repository_path(group, artifact, version)] -def construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: - """Return a list of glob pattern(s) representing python artifacts in a virtual environment. +def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) -> list[str] | None: + """Return a list of glob pattern(s) representing directories that contains the artifacts in a Python virtual environment. The glob pattern(s) can be used to search in `<...>//lib/python3.x/site-packages` directory. @@ -194,12 +194,16 @@ def get_local_artifact_paths( purl: PackageURL, local_artifact_repo_path: str, ) -> list[str]: - """Return the path to local artifacts for a PackageURL. + """Return the paths to directories that store local artifacts for a PackageURL. - We look for local artifacts of this PURL in ``local_artifact_repo_path``. + We look for local artifacts of ``purl`` in ``local_artifact_repo_path``. This function returns a list of paths (as strings), each has the format - ``local_artifact_repo_path``/path/to/artifact`` + ``local_artifact_repo_path``/path/to/artifact_dir`` + + This will mean that no path to an artifact is returned. Therefore, it's the responsibility + of this function caller to inspect the artifact directory to obtain the required + artifact. We assume that ``local_artifact_repo_path`` exists. @@ -223,7 +227,7 @@ def get_local_artifact_paths( purl_type = purl.type if purl_type == "maven": - maven_artifact_patterns = construct_local_artifact_paths_glob_pattern_maven_purl(purl) + maven_artifact_patterns = construct_local_artifact_dirs_glob_pattern_maven_purl(purl) if not maven_artifact_patterns: raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}") @@ -233,7 +237,7 @@ def get_local_artifact_paths( ) if purl_type == "pypi": - pypi_artifact_patterns = construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + pypi_artifact_patterns = construct_local_artifact_dirs_glob_pattern_pypi_purl(purl) if not pypi_artifact_patterns: raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}") diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index b2130f85a..71fb85282 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -10,8 +10,8 @@ from packageurl import PackageURL from macaron.artifact.local_artifact import ( - construct_local_artifact_paths_glob_pattern_maven_purl, - construct_local_artifact_paths_glob_pattern_pypi_purl, + construct_local_artifact_dirs_glob_pattern_maven_purl, + construct_local_artifact_dirs_glob_pattern_pypi_purl, find_artifact_paths_from_python_venv, get_local_artifact_paths, ) @@ -39,7 +39,7 @@ def test_construct_local_artifact_paths_glob_pattern_maven_purl( ) -> None: """Test constructing a local artifact patterns from a given maven purl.""" maven_purl = PackageURL.from_string(purl_str) - result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + result = construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl=maven_purl) assert result is not None assert sorted(result) == sorted(expectation) @@ -55,7 +55,7 @@ def test_construct_local_artifact_paths_glob_pattern_maven_purl( def test_construct_local_artifact_paths_glob_pattern_maven_purl_error(purl_str: str) -> None: """Test constructing a local artifact patterns from a given maven purl with error.""" maven_purl = PackageURL.from_string(purl_str) - result = construct_local_artifact_paths_glob_pattern_maven_purl(maven_purl=maven_purl) + result = construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl=maven_purl) assert result is None @@ -75,7 +75,7 @@ def test_construct_local_artifact_paths_glob_pattern_pypi_purl( ) -> None: """Test constructing a local artifact patterns from a given pypi purl.""" pypi_purl = PackageURL.from_string(purl_str) - result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + result = construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl=pypi_purl) assert result is not None assert sorted(result) == sorted(expectation) @@ -96,7 +96,7 @@ def test_construct_local_artifact_paths_glob_pattern_pypi_purl( def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: str) -> None: """Test constructing a local artifact patterns from a given pypi purl with error.""" pypi_purl = PackageURL.from_string(purl_str) - result = construct_local_artifact_paths_glob_pattern_pypi_purl(pypi_purl=pypi_purl) + result = construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl=pypi_purl) assert result is None From a8578102a99c562eb6dac9f43395e87cb50794a8 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 12:40:07 +1000 Subject: [PATCH 13/16] chore: improve description of the docker_local_maven_repo_input_errors integration test case --- .../cases/docker_local_maven_repo_input_errors/test.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml index 7de49fcac..1fcd585c3 100644 --- a/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml +++ b/tests/integration/cases/docker_local_maven_repo_input_errors/test.yaml @@ -27,7 +27,7 @@ steps: - --local-maven-repo - invalid_dir expect_fail: true -- name: Create a test file. +- name: Create a test file to mimick user input. Therefore, this test file will be outside of the output dir. kind: shell options: cmd: touch test.txt @@ -40,7 +40,7 @@ steps: - --local-maven-repo - ./test.txt expect_fail: true -- name: Clean up the test file. +- name: Clean up the test file because it's not automatically cleaned up by the test script as it's outside of the output directory. kind: shell options: cmd: rm test.txt From 5936102286d60fa9186d7fe794d9c11706de0ff9 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 14:21:13 +1000 Subject: [PATCH 14/16] chore: update more doc strings and function name to highlight the return values --- src/macaron/artifact/local_artifact.py | 32 +++++++++++++------------- src/macaron/slsa_analyzer/analyzer.py | 4 ++-- tests/artifact/test_local_artifact.py | 14 +++++------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/macaron/artifact/local_artifact.py b/src/macaron/artifact/local_artifact.py index 094eb9c2c..ed37c335a 100644 --- a/src/macaron/artifact/local_artifact.py +++ b/src/macaron/artifact/local_artifact.py @@ -32,7 +32,7 @@ def construct_local_artifact_dirs_glob_pattern_maven_purl(maven_purl: PackageURL -------- >>> from packageurl import PackageURL >>> purl = PackageURL.from_string("pkg:maven/com.oracle.macaron/macaron@0.13.0") - >>> construct_local_artifact_paths_glob_pattern_maven_purl(purl) + >>> construct_local_artifact_dirs_glob_pattern_maven_purl(purl) ['com/oracle/macaron/macaron/0.13.0'] """ if maven_purl.type != "maven": @@ -68,7 +68,7 @@ def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) -------- >>> from packageurl import PackageURL >>> purl = PackageURL.from_string("pkg:pypi/django@1.11.1") - >>> construct_local_artifact_paths_glob_pattern_pypi_purl(purl) + >>> construct_local_artifact_dirs_glob_pattern_pypi_purl(purl) ['django', 'django-1.11.1.dist-info', 'django-1.11.1.data'] """ if pypi_purl.type != "pypi": @@ -91,11 +91,11 @@ def construct_local_artifact_dirs_glob_pattern_pypi_purl(pypi_purl: PackageURL) return glob_patterns -def find_artifact_paths_from_local_maven_repo( +def find_artifact_dirs_from_local_maven_repo( local_maven_repo: str, glob_patterns: list[str], ) -> list[str]: - """Find maven artifacts within a local maven repository directory. + """Find directories that contains maven artifacts within a maven local repository. ``local_maven_repo`` should be in format `<...>/.m2/repository`. @@ -104,13 +104,13 @@ def find_artifact_paths_from_local_maven_repo( local_maven_repo: str The path to the directory to find artifacts. glob_patterns: list[str] - The list of glob patterns that matches to artifact file names. + The list of glob patterns that matches to artifact directory names. Returns ------- list[str] - The list of path to found artifacts in the form of ``local_maven_repo``/. - If no artifact is found, this list will be empty. + The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir + If no artifact directory is found, this list will be empty. Raises ------ @@ -135,11 +135,11 @@ def find_artifact_paths_from_local_maven_repo( return artifact_paths -def find_artifact_paths_from_python_venv( +def find_artifact_dirs_from_python_venv( venv_site_package_path: str, glob_patterns: list[str], ) -> list[str]: - """Find python artifacts within a python virtual environment directory. + """Find directories within a python virtual environment. For packages in the virtual environment, we will treat their name case-insensitively. https://packaging.python.org/en/latest/specifications/name-normalization/ @@ -151,13 +151,13 @@ def find_artifact_paths_from_python_venv( venv_path: str The path to the local directory to find artifacts. glob_patterns: list[str] - The list of glob patterns that matches to artifact file names. + The list of glob patterns that matches to artifact directory names. Returns ------- list[str] - The list of path to found artifacts in the form of ``venv_site_package_path``/ - If no artifact is found, this list will be empty. + The list of paths to artifact directories in the form of ``venv_site_package_path``/path/to/artifact_dir + If no artifact directory is found, this list will be empty. Raises ------ @@ -190,7 +190,7 @@ def find_artifact_paths_from_python_venv( return artifact_paths -def get_local_artifact_paths( +def get_local_artifact_dirs( purl: PackageURL, local_artifact_repo_path: str, ) -> list[str]: @@ -217,7 +217,7 @@ def get_local_artifact_paths( Returns ------- list[str] - The list contains the found artifact paths. It will be empty if no artifact can be found. + The list contains the artifact directory paths. It will be empty if no artifact can be found. Raises ------ @@ -231,7 +231,7 @@ def get_local_artifact_paths( if not maven_artifact_patterns: raise LocalArtifactFinderError(f"Cannot generate maven artifact patterns for {purl}") - return find_artifact_paths_from_local_maven_repo( + return find_artifact_dirs_from_local_maven_repo( local_maven_repo=local_artifact_repo_path, glob_patterns=maven_artifact_patterns, ) @@ -241,7 +241,7 @@ def get_local_artifact_paths( if not pypi_artifact_patterns: raise LocalArtifactFinderError(f"Cannot generate Python package patterns for {purl}") - return find_artifact_paths_from_python_venv( + return find_artifact_dirs_from_python_venv( venv_site_package_path=local_artifact_repo_path, glob_patterns=pypi_artifact_patterns, ) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index bafe13e42..a37a3d722 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -19,7 +19,7 @@ from sqlalchemy.orm import Session from macaron import __version__ -from macaron.artifact.local_artifact import get_local_artifact_paths +from macaron.artifact.local_artifact import get_local_artifact_dirs from macaron.config.defaults import defaults from macaron.config.global_config import global_config from macaron.config.target_config import Configuration @@ -482,7 +482,7 @@ def run_single( if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper: local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type] analyze_ctx.dynamic_data["local_artifact_paths"].extend( - get_local_artifact_paths( + get_local_artifact_dirs( purl=parsed_purl, local_artifact_repo_path=local_artifact_repo_path, ) diff --git a/tests/artifact/test_local_artifact.py b/tests/artifact/test_local_artifact.py index 71fb85282..0f0bbd270 100644 --- a/tests/artifact/test_local_artifact.py +++ b/tests/artifact/test_local_artifact.py @@ -12,8 +12,8 @@ from macaron.artifact.local_artifact import ( construct_local_artifact_dirs_glob_pattern_maven_purl, construct_local_artifact_dirs_glob_pattern_pypi_purl, - find_artifact_paths_from_python_venv, - get_local_artifact_paths, + find_artifact_dirs_from_python_venv, + get_local_artifact_dirs, ) from macaron.errors import LocalArtifactFinderError @@ -103,7 +103,7 @@ def test_construct_local_artifact_paths_glob_pattern_pypi_purl_error(purl_str: s def test_find_artifact_paths_from_invalid_python_venv() -> None: """Test find_artifact_paths_from_python_venv method with invalid venv path""" with pytest.raises(LocalArtifactFinderError): - find_artifact_paths_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) + find_artifact_dirs_from_python_venv("./does-not-exist", ["django", "django-5.0.6.dist-info"]) @pytest.mark.parametrize( @@ -130,7 +130,7 @@ def test_get_local_artifact_paths_not_available( purl = PackageURL.from_string(purl_str) assert ( - get_local_artifact_paths( + get_local_artifact_dirs( purl=purl, local_artifact_repo_path=str(tmp_path), ) @@ -171,7 +171,7 @@ def test_get_local_artifact_paths_invalid_purl( purl = PackageURL.from_string(purl_str) with pytest.raises(LocalArtifactFinderError): - get_local_artifact_paths( + get_local_artifact_dirs( purl=purl, local_artifact_repo_path=str(tmp_path), ) @@ -188,7 +188,7 @@ def test_get_local_artifact_paths_succeeded_maven(tmp_path: Path) -> None: os.makedirs(maven_local_repo_path) os.makedirs(target_artifact_path) - result = get_local_artifact_paths( + result = get_local_artifact_dirs( purl=purl, local_artifact_repo_path=maven_local_repo_path, ) @@ -218,7 +218,7 @@ def test_get_local_artifact_paths_succeeded_pypi(tmp_path: Path) -> None: for artifact_path in pypi_artifact_paths: os.makedirs(artifact_path) - result = get_local_artifact_paths( + result = get_local_artifact_dirs( purl=purl, local_artifact_repo_path=python_venv_path, ) From 6265fe09c4086e8f6ccf5227969f3f160da2b251 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 15:14:09 +1000 Subject: [PATCH 15/16] chore: add exception catching in analyzer.py --- src/macaron/slsa_analyzer/analyzer.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index a37a3d722..c6edfd6d6 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -30,6 +30,7 @@ DuplicateError, InvalidAnalysisTargetError, InvalidPURLError, + LocalArtifactFinderError, ProvenanceError, PURLNotFoundError, ) @@ -481,12 +482,14 @@ def run_single( if parsed_purl and parsed_purl.type in self.local_artifact_repo_mapper: local_artifact_repo_path = self.local_artifact_repo_mapper[parsed_purl.type] - analyze_ctx.dynamic_data["local_artifact_paths"].extend( - get_local_artifact_dirs( + try: + local_artifact_dirs = get_local_artifact_dirs( purl=parsed_purl, local_artifact_repo_path=local_artifact_repo_path, ) - ) + analyze_ctx.dynamic_data["local_artifact_paths"].extend(local_artifact_dirs) + except LocalArtifactFinderError as error: + logger.debug(error) analyze_ctx.check_results = registry.scan(analyze_ctx) From 9a1da36da52309e4553e0b5d79a12ee03bcfce10 Mon Sep 17 00:00:00 2001 From: Trong Nhan Mai Date: Wed, 11 Dec 2024 17:12:57 +1000 Subject: [PATCH 16/16] chore: add a log message when the input python venv has multiple python3.* directories --- src/macaron/slsa_analyzer/analyzer.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index c6edfd6d6..a5fd67f22 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -1012,6 +1012,11 @@ def _get_local_artifact_repo_mapper() -> Mapping[str, str]: if len(site_packages_dirs) == 1: local_artifact_mapper["pypi"] = site_packages_dirs.pop() + else: + logger.info( + "There are multiple python3.* directories in the input Python venv. " + + "This venv will NOT be used for local artifact findings." + ) return local_artifact_mapper