From da182aaa537367c85625f37160da570d9952c975 Mon Sep 17 00:00:00 2001 From: Ben Selwyn-Smith Date: Tue, 4 Jun 2024 14:39:52 +1000 Subject: [PATCH] chore: validate input repo, commit, provenance to ensure they match (#739) Signed-off-by: Ben Selwyn-Smith --- scripts/dev_scripts/integration_tests.sh | 14 +- src/macaron/json_tools.py | 41 +++-- src/macaron/repo_finder/commit_finder.py | 2 +- .../repo_finder/provenance_extractor.py | 155 ++++++++++++++++-- .../repo_finder/repo_finder_deps_dev.py | 2 +- src/macaron/slsa_analyzer/analyzer.py | 94 ++++++++--- .../package_registry/jfrog_maven_registry.py | 4 +- .../expectations/cue/cue_validator.py | 4 +- .../provenance/intoto/__init__.py | 2 +- .../provenance/intoto/v01/__init__.py | 2 +- .../provenance/intoto/v1/__init__.py | 2 +- .../slsa_analyzer/provenance/loader.py | 3 +- src/macaron/util.py | 16 -- src/macaron/vsa/vsa.py | 2 +- ...verifier_explicitly_provided_cue_PASS.json | 90 +++++----- .../repo_finder/test_provenance_extractor.py | 26 ++- .../provenance/intoto/v01/test_validate.py | 4 +- .../provenance/intoto/v1/test_validate.py | 2 +- tests/slsa_analyzer/test_analyze_context.py | 2 +- tests/vsa/test_compare_vsa.py | 2 +- 20 files changed, 329 insertions(+), 140 deletions(-) diff --git a/scripts/dev_scripts/integration_tests.sh b/scripts/dev_scripts/integration_tests.sh index 839803ecd..8ad4eaf83 100755 --- a/scripts/dev_scripts/integration_tests.sh +++ b/scripts/dev_scripts/integration_tests.sh @@ -707,7 +707,7 @@ JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/sl EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/slsa_verifier.ini PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl -$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail +$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -d 6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6 --skip-deps || log_fail check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail @@ -719,7 +719,7 @@ JSON_RESULT=$WORKSPACE/output/reports/github_com/slsa-framework/slsa-verifier/sl EXPECTATION_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/expectations/cue/resources/valid_expectations/slsa_verifier_PASS.cue DEFAULTS_FILE=$WORKSPACE/tests/e2e/defaults/allow_url_link_github.ini PROVENANCE_FILE=$WORKSPACE/tests/slsa_analyzer/provenance/resources/valid_provenances/slsa-verifier-linux-amd64.intoto.jsonl -$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -b main -d fc50b662fcfeeeb0e97243554b47d9b20b14efac --skip-deps || log_fail +$RUN_MACARON -dp $DEFAULTS_FILE analyze -pe $EXPECTATION_FILE -pf $PROVENANCE_FILE -rp https://github.com/slsa-framework/slsa-verifier -d 6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6 --skip-deps || log_fail check_or_update_expected_output $COMPARE_JSON_OUT $JSON_RESULT $JSON_EXPECTED || log_fail @@ -762,7 +762,7 @@ check_or_update_expected_output $COMPARE_POLICIES $POLICY_RESULT $POLICY_EXPECTE echo -e "\n----------------------------------------------------------------------------------" echo "behnazh-w/example-maven-app as a local and remote repository" -echo "Test the Witness and GitHub provenances as an input, Cue expectation validation, Policy CLI and VSA generation." +echo "Test the Witness and GitHub provenances as an input, Cue expectation validation, Policy CLI and VSA generation, User input vs. provenance." echo -e "----------------------------------------------------------------------------------\n" RUN_POLICY="macaron verify-policy" POLICY_FILE=$WORKSPACE/tests/policy_engine/resources/policies/example-maven-project/policy.dl @@ -794,6 +794,14 @@ $RUN_POLICY -f $POLICY_FILE -d "$WORKSPACE/output/macaron.db" || log_fail check_or_update_expected_output "$COMPARE_POLICIES" "$POLICY_RESULT" "$POLICY_EXPECTED" || log_fail check_or_update_expected_output "$COMPARE_VSA" "$VSA_RESULT" "$VSA_PAYLOAD_EXPECTED" || log_fail +# Validate user input of repo and commit vs provenance. +$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -rp https://github.com/behnazh-w/example-maven-app -d 2deca75ed5dd365eaf1558a82347b1f11306135f --skip-deps || log_fail + +# Validate user input of repo and commit (via purl) vs provenance. +$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -purl pkg:github/behnazh-w/example-maven-app@2deca75 --skip-deps || log_fail + +# Validate user input of repo and commit (via purl with tag) vs provenance. +$RUN_MACARON analyze -pf $GITHUB_PROVENANCE_FILE -purl pkg:github/behnazh-w/example-maven-app@1.0 --skip-deps || log_fail # Testing the Repo Finder's remote calls. # This requires the 'packageurl' Python module diff --git a/src/macaron/json_tools.py b/src/macaron/json_tools.py index c38ebe15f..4b4aef98c 100644 --- a/src/macaron/json_tools.py +++ b/src/macaron/json_tools.py @@ -3,26 +3,26 @@ """This module provides utility functions for JSON data.""" import logging +from collections.abc import Sequence from typing import TypeVar -from macaron.util import JsonType - +JsonType = int | float | str | None | bool | list["JsonType"] | dict[str, "JsonType"] T = TypeVar("T", bound=JsonType) logger: logging.Logger = logging.getLogger(__name__) -def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T | None: +def json_extract(entry: dict | list, keys: Sequence[str | int], type_: type[T]) -> T | None: """Return the value found by following the list of depth-sequential keys inside the passed JSON dictionary. The value must be of the passed type. Parameters ---------- - entry: JsonType + entry: dict | list An entry point into a JSON structure. - keys: list[str] - The list of depth-sequential keys within the JSON. + keys: Sequence[str | int] + The sequence of depth-sequential keys within the JSON. Can be dict keys or list indices. type: type[T] The type to check the value against and return it as. @@ -31,19 +31,28 @@ def json_extract(entry: JsonType, keys: list[str], type_: type[T]) -> T | None: T | None: The found value as the type of the type parameter. """ - target = entry - - for index, key in enumerate(keys): - if not isinstance(target, dict): - logger.debug("Expect the value .%s to be a dict.", ".".join(keys[:index])) + target: JsonType = entry + for key in keys: + if isinstance(target, dict) and isinstance(key, str): + if key not in target: + logger.debug("JSON key '%s' not found in dict target.", key) + return None + elif isinstance(target, list) and isinstance(key, int): + if key < 0 or key >= len(target): + logger.debug("JSON list index '%s' is outside of list bounds %s.", key, len(target)) + return None + else: + logger.debug("Cannot index '%s' (type: %s) in target (type: %s).", key, type(key), type(target)) return None - if key not in target: - logger.debug("JSON key '%s' not found in .%s", key, ".".join(keys[:index])) - return None - target = target[key] + + # If statement required for mypy to not complain. The else case can never happen because of the above if block. + if isinstance(target, dict) and isinstance(key, str): + target = target[key] + elif isinstance(target, list) and isinstance(key, int): + target = target[key] if isinstance(target, type_): return target - logger.debug("Expect the value .%s to be of type %s", ".".join(keys), type_) + logger.debug("Found value of incorrect type: %s instead of %s.", type(target), type(type_)) return None diff --git a/src/macaron/repo_finder/commit_finder.py b/src/macaron/repo_finder/commit_finder.py index b4610a36f..e6d4f2e66 100644 --- a/src/macaron/repo_finder/commit_finder.py +++ b/src/macaron/repo_finder/commit_finder.py @@ -199,7 +199,7 @@ def extract_commit_from_version(git_obj: Git, version: str) -> str | None: if 7 <= len(version) <= 40 and re.match(hex_only_pattern, version): try: commit = git_obj.get_commit(version) - except BadName as error: + except (BadName, ValueError) as error: logger.debug("Failed to retrieve commit: %s", error) if not commit: diff --git a/src/macaron/repo_finder/provenance_extractor.py b/src/macaron/repo_finder/provenance_extractor.py index b66bbe14d..c327154ab 100644 --- a/src/macaron/repo_finder/provenance_extractor.py +++ b/src/macaron/repo_finder/provenance_extractor.py @@ -3,11 +3,20 @@ """This module contains methods for extracting repository and commit metadata from provenance files.""" import logging +import urllib.parse + +from packageurl import PackageURL +from pydriller import Git from macaron.errors import ProvenanceError -from macaron.json_tools import json_extract +from macaron.json_tools import JsonType, json_extract +from macaron.repo_finder.commit_finder import ( + AbstractPurlType, + determine_abstract_purl_type, + extract_commit_from_version, +) +from macaron.repo_finder.repo_finder import to_domain_from_known_purl_types from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, InTotoV1Payload, InTotoV01Payload -from macaron.util import JsonType logger: logging.Logger = logging.getLogger(__name__) @@ -67,16 +76,8 @@ def _extract_from_slsa_v01(payload: InTotoV01Payload) -> tuple[str | None, str | if not list_index: return None, None - material_list = json_extract(predicate, ["materials"], list) - if not material_list: - return None, None - - if list_index >= len(material_list): - logger.debug("Material list index outside of material list bounds.") - return None, None - - material = material_list[list_index] - if not material or not isinstance(material, dict): + material = json_extract(predicate, ["materials", list_index], dict) + if not material: logger.debug("Indexed material list entry is invalid.") return None, None @@ -232,3 +233,133 @@ def _clean_spdx(uri: str) -> str: """ url, _, _ = uri.lstrip("git+").rpartition("@") return url + + +def check_if_input_repo_commit_provenance_conflict( + repo_path_input: str | None, + digest_input: str | None, + provenance_repo_url: str | None, + provenance_commit_digest: str | None, +) -> bool: + """Test if the input repo and commit match the contents of the provenance. + + Parameters + ---------- + repo_path_input: str | None + The repo URL from input. + digest_input: str | None + The digest from input. + provenance_repo_url: str | None + The repo URL from provenance. + provenance_commit_digest: str | None + The commit digest from provenance. + + Returns + ------- + bool + True if there is a conflict between the inputs, False otherwise, or if the comparison cannot be performed. + """ + # Check the provenance repo against the input repo. + if repo_path_input and provenance_repo_url and repo_path_input != provenance_repo_url: + logger.debug( + "The repository URL from input does not match what exists in the provenance. " + "Input Repo: %s, Provenance Repo: %s.", + repo_path_input, + provenance_repo_url, + ) + return True + + # Check the provenance commit against the input commit. + if digest_input and provenance_commit_digest and digest_input != provenance_commit_digest: + logger.debug( + "The commit digest from input does not match what exists in the provenance. " + "Input Commit: %s, Provenance Commit: %s.", + digest_input, + provenance_commit_digest, + ) + return True + + return False + + +def check_if_input_purl_provenance_conflict( + git_obj: Git, + repo_path_input: bool, + digest_input: bool, + provenance_repo_url: str | None, + provenance_commit_digest: str | None, + purl: PackageURL, +) -> bool: + """Test if the input repository type PURL's repo and commit match the contents of the provenance. + + Parameters + ---------- + git_obj: Git + The Git object. + repo_path_input: bool + True if there is a repo as input. + digest_input: str + True if there is a commit as input. + provenance_repo_url: str | None + The repo url from provenance. + provenance_commit_digest: str | None + The commit digest from provenance. + purl: PackageURL + The input repository PURL. + + Returns + ------- + bool + True if there is a conflict between the inputs, False otherwise, or if the comparison cannot be performed. + """ + if determine_abstract_purl_type(purl) != AbstractPurlType.REPOSITORY: + return False + + # Check the PURL repo against the provenance. + if not repo_path_input and provenance_repo_url: + if not check_if_repository_purl_and_url_match(provenance_repo_url, purl): + logger.debug( + "The repo url passed via purl input does not match what exists in the provenance. " + "Purl: %s, Provenance: %s.", + purl, + provenance_repo_url, + ) + return True + + # Check the PURL commit against the provenance. + if not digest_input and provenance_commit_digest and purl.version: + purl_commit = extract_commit_from_version(git_obj, purl.version) + if purl_commit and purl_commit != provenance_commit_digest: + logger.debug( + "The commit digest passed via purl input does not match what exists in the " + "provenance. Purl Commit: %s, Provenance Commit: %s.", + purl_commit, + provenance_commit_digest, + ) + return True + + return False + + +def check_if_repository_purl_and_url_match(url: str, repo_purl: PackageURL) -> bool: + """Compare a repository PURL and URL for equality. + + Parameters + ---------- + url: str + The URL. + repo_purl: PackageURL + A PURL that is of the repository abstract type. E.g. GitHub. + + Returns + ------- + bool + True if the two inputs match in terms of URL netloc/domain and path. + """ + expanded_purl_type = to_domain_from_known_purl_types(repo_purl.type) + parsed_url = urllib.parse.urlparse(url) + purl_path = repo_purl.name + if repo_purl.namespace: + purl_path = f"{repo_purl.namespace}/{purl_path}" + # Note that the urllib method includes the "/" before path while the PURL method does not. + return f"{parsed_url.hostname}{parsed_url.path}".lower() == f"{expanded_purl_type or repo_purl.type}/{purl_path}" diff --git a/src/macaron/repo_finder/repo_finder_deps_dev.py b/src/macaron/repo_finder/repo_finder_deps_dev.py index c7de6107f..7f2266051 100644 --- a/src/macaron/repo_finder/repo_finder_deps_dev.py +++ b/src/macaron/repo_finder/repo_finder_deps_dev.py @@ -9,7 +9,7 @@ from packageurl import PackageURL -from macaron.repo_finder.provenance_extractor import json_extract +from macaron.json_tools import json_extract from macaron.repo_finder.repo_finder_base import BaseRepoFinder from macaron.repo_finder.repo_validator import find_valid_repository_url from macaron.util import send_get_http_raw diff --git a/src/macaron/slsa_analyzer/analyzer.py b/src/macaron/slsa_analyzer/analyzer.py index 8af5cb848..dd5d2cf5a 100644 --- a/src/macaron/slsa_analyzer/analyzer.py +++ b/src/macaron/slsa_analyzer/analyzer.py @@ -35,7 +35,11 @@ from macaron.output_reporter.results import Record, Report, SCMStatus from macaron.repo_finder import repo_finder from macaron.repo_finder.commit_finder import find_commit -from macaron.repo_finder.provenance_extractor import extract_repo_and_commit_from_provenance +from macaron.repo_finder.provenance_extractor import ( + check_if_input_purl_provenance_conflict, + check_if_input_repo_commit_provenance_conflict, + extract_repo_and_commit_from_provenance, +) from macaron.repo_finder.provenance_finder import ProvenanceFinder from macaron.slsa_analyzer import git_url from macaron.slsa_analyzer.analyze_context import AnalyzeContext @@ -322,6 +326,8 @@ def run_single( provenance_payload = ProvenanceFinder().find_provenance(parsed_purl) # Try to extract the repository URL and commit digest from the Provenance, if it exists. + repo_path_input: str | None = config.get_value("path") + digest_input: str | None = config.get_value("digest") provenance_repo_url = provenance_commit_digest = None if provenance_payload: try: @@ -331,6 +337,17 @@ def run_single( except ProvenanceError as error: logger.debug("Failed to extract repo or commit from provenance: %s", error) + # Try to validate the input repo and/or commit against provenance contents. + if (provenance_repo_url or provenance_commit_digest) and check_if_input_repo_commit_provenance_conflict( + repo_path_input, digest_input, provenance_repo_url, provenance_commit_digest + ): + return Record( + record_id=repo_id, + description="Input mismatch between repo/commit and provenance.", + pre_config=config, + status=SCMStatus.ANALYSIS_FAILED, + ) + # Create the analysis target. available_domains = [git_service.hostname for git_service in GIT_SERVICES if git_service.hostname] try: @@ -345,11 +362,40 @@ def run_single( status=SCMStatus.ANALYSIS_FAILED, ) + # Prepare the repo. + git_obj = None + if analysis_target.repo_path: + git_obj = self._prepare_repo( + os.path.join(self.output_path, self.GIT_REPOS_DIR), + analysis_target.repo_path, + analysis_target.branch, + analysis_target.digest, + analysis_target.parsed_purl, + ) + + # Check if only one of the repo or digest came from direct input. + if git_obj and (provenance_repo_url or provenance_commit_digest) and parsed_purl: + if check_if_input_purl_provenance_conflict( + git_obj, + bool(repo_path_input), + bool(digest_input), + provenance_repo_url, + provenance_commit_digest, + parsed_purl, + ): + return Record( + record_id=repo_id, + description="Input mismatch between repo/commit (purl) and provenance.", + pre_config=config, + status=SCMStatus.ANALYSIS_FAILED, + ) + # Create the component. try: component = self.add_component( analysis, analysis_target, + git_obj, existing_records, provenance_payload, ) @@ -507,6 +553,7 @@ def add_component( self, analysis: Analysis, analysis_target: AnalysisTarget, + git_obj: Git | None, existing_records: dict[str, Record] | None = None, provenance_payload: InTotoPayload | None = None, ) -> Component: @@ -521,6 +568,8 @@ def add_component( The current analysis instance. analysis_target: AnalysisTarget The target of this analysis. + git_obj: Git | None + The pydriller.Git object of the repository. existing_records : dict[str, Record] | None The mapping of existing records that the analysis has run successfully. provenance_payload: InTotoVPayload | None @@ -539,32 +588,23 @@ def add_component( The component is already analyzed in the same session. """ # Note: the component created in this function will be added to the database. - repository = None - if analysis_target.repo_path: - git_obj = self._prepare_repo( - os.path.join(self.output_path, self.GIT_REPOS_DIR), - analysis_target.repo_path, - analysis_target.branch, - analysis_target.digest, - analysis_target.parsed_purl, - ) - if git_obj: - # TODO: use both the repo URL and the commit hash to check. - if ( - existing_records - and (existing_record := existing_records.get(git_url.get_remote_origin_of_local_repo(git_obj))) - is not None - ): - raise DuplicateCmpError( - f"{analysis_target.repo_path} is already analyzed.", context=existing_record.context - ) + if git_obj: + # TODO: use both the repo URL and the commit hash to check. + if ( + existing_records + and (existing_record := existing_records.get(git_url.get_remote_origin_of_local_repo(git_obj))) + is not None + ): + raise DuplicateCmpError( + f"{analysis_target.repo_path} is already analyzed.", context=existing_record.context + ) - repository = self.add_repository(analysis_target.branch, git_obj) - else: - # We cannot prepare the repository even though we have successfully resolved the repository path for the - # software component. If this happens, we don't raise error and treat the software component as if it - # does not have any ``Repository`` attached to it. - repository = None + repository = self.add_repository(analysis_target.branch, git_obj) + else: + # We cannot prepare the repository even though we have successfully resolved the repository path for the + # software component. If this happens, we don't raise error and treat the software component as if it + # does not have any ``Repository`` attached to it. + repository = None if not analysis_target.parsed_purl: # If the PURL is not available. This will only mean that the user don't provide PURL but only provide the @@ -923,7 +963,7 @@ def _resolve_local_path(start_dir: str, local_path: str) -> str: The resolved path in canonical form or an empty string if errors. """ # Resolve the path by joining dir and path. - # Because strict mode is enabled, if a path doesn’t exist or a symlink loop + # Because strict mode is enabled, if a path doesn't exist or a symlink loop # is encountered, OSError is raised. # ValueError is raised if we use both relative and absolute paths in os.path.commonpath. try: diff --git a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py index ce52a6595..1c78d4409 100644 --- a/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py +++ b/src/macaron/slsa_analyzer/package_registry/jfrog_maven_registry.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Assets on a package registry.""" @@ -14,11 +14,11 @@ from macaron.config.defaults import defaults from macaron.errors import ConfigurationError +from macaron.json_tools import JsonType from macaron.slsa_analyzer.build_tool.base_build_tool import BaseBuildTool from macaron.slsa_analyzer.build_tool.gradle import Gradle from macaron.slsa_analyzer.build_tool.maven import Maven from macaron.slsa_analyzer.package_registry.package_registry import PackageRegistry -from macaron.util import JsonType logger: logging.Logger = logging.getLogger(__name__) diff --git a/src/macaron/slsa_analyzer/provenance/expectations/cue/cue_validator.py b/src/macaron/slsa_analyzer/provenance/expectations/cue/cue_validator.py index 6feaeab15..70e203af8 100644 --- a/src/macaron/slsa_analyzer/provenance/expectations/cue/cue_validator.py +++ b/src/macaron/slsa_analyzer/provenance/expectations/cue/cue_validator.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """The cue module invokes the CUE schema validator.""" @@ -10,7 +10,7 @@ from macaron import MACARON_PATH from macaron.errors import CUEExpectationError, CUERuntimeError -from macaron.util import JsonType +from macaron.json_tools import JsonType # Load the CUE shared library. cue = ctypes.CDLL(os.path.join(MACARON_PATH, "bin", "cuevalidate.so")) diff --git a/src/macaron/slsa_analyzer/provenance/intoto/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/__init__.py index c82a590fc..03b3f16f4 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/__init__.py @@ -10,11 +10,11 @@ from packageurl import PackageURL +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto import v01, v1 from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.slsa_analyzer.provenance.intoto.v01 import InTotoV01Subject from macaron.slsa_analyzer.provenance.intoto.v1 import InTotoV1ResourceDescriptor -from macaron.util import JsonType # Type of an in-toto statement. # This is currently either a v0.1 statement or v1 statement. diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py index 95fc3b304..94f8b6f78 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v01/__init__.py @@ -7,8 +7,8 @@ from typing import TypedDict, TypeGuard +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError -from macaron.util import JsonType class InTotoV01Statement(TypedDict): diff --git a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py index 3ffe08bd6..a428c712b 100644 --- a/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py +++ b/src/macaron/slsa_analyzer/provenance/intoto/v1/__init__.py @@ -8,8 +8,8 @@ from collections.abc import Callable from typing import TypedDict, TypeGuard +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError -from macaron.util import JsonType class InTotoV1Statement(TypedDict): diff --git a/src/macaron/slsa_analyzer/provenance/loader.py b/src/macaron/slsa_analyzer/provenance/loader.py index cdde8245f..65dfee1bb 100644 --- a/src/macaron/slsa_analyzer/provenance/loader.py +++ b/src/macaron/slsa_analyzer/provenance/loader.py @@ -12,9 +12,10 @@ from urllib.parse import urlparse from macaron.config.defaults import defaults +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, validate_intoto_payload from macaron.slsa_analyzer.provenance.intoto.errors import LoadIntotoAttestationError, ValidateInTotoPayloadError -from macaron.util import JsonType, send_get_http_raw +from macaron.util import send_get_http_raw logger: logging.Logger = logging.getLogger(__name__) diff --git a/src/macaron/util.py b/src/macaron/util.py index 25f876af7..90876b9c0 100644 --- a/src/macaron/util.py +++ b/src/macaron/util.py @@ -260,19 +260,3 @@ def copy_file_bulk(file_list: list, src_path: str, target_path: str) -> bool: return False return True - - -JsonType = int | float | str | None | bool | list["JsonType"] | dict[str, "JsonType"] - - -def get_if_exists(doc: JsonType, path: list[str | int]) -> JsonType | None: - """Get a json dict value if it exists.""" - while len(path) > 0: - this = path.pop(0) - if isinstance(this, str) and isinstance(doc, dict) and this in doc: - doc = doc[this] - elif isinstance(this, int) and isinstance(doc, list) and 0 <= this < len(doc): - doc = doc[this] - else: - return None - return doc diff --git a/src/macaron/vsa/vsa.py b/src/macaron/vsa/vsa.py index f06a948f9..43b9ca156 100644 --- a/src/macaron/vsa/vsa.py +++ b/src/macaron/vsa/vsa.py @@ -20,7 +20,7 @@ from macaron.database.database_manager import get_db_manager from macaron.database.table_definitions import ProvenanceSubject -from macaron.util import JsonType +from macaron.json_tools import JsonType logger: logging.Logger = logging.getLogger(__name__) diff --git a/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json b/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json index fd29d3f4c..ef30c99f9 100644 --- a/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json +++ b/tests/e2e/expected_results/slsa-verifier/slsa-verifier_explicitly_provided_cue_PASS.json @@ -1,50 +1,50 @@ { "metadata": { - "timestamps": "2024-05-07 15:16:38", + "timestamps": "2024-05-14 13:23:14", "has_passing_check": true, "run_checks": [ - "mcn_build_script_1", - "mcn_build_service_1", - "mcn_provenance_derived_commit_1", - "mcn_trusted_builder_level_three_1", + "mcn_provenance_witness_level_one_1", "mcn_provenance_derived_repo_1", "mcn_build_as_code_1", "mcn_provenance_available_1", - "mcn_infer_artifact_pipeline_1", - "mcn_provenance_expectation_1", "mcn_version_control_system_1", - "mcn_provenance_witness_level_one_1" + "mcn_provenance_expectation_1", + "mcn_trusted_builder_level_three_1", + "mcn_infer_artifact_pipeline_1", + "mcn_provenance_derived_commit_1", + "mcn_build_script_1", + "mcn_build_service_1" ], "check_tree": { "mcn_provenance_available_1": { - "mcn_provenance_expectation_1": {}, + "mcn_provenance_level_three_1": {}, "mcn_provenance_witness_level_one_1": {}, - "mcn_provenance_level_three_1": {} + "mcn_provenance_expectation_1": {} }, "mcn_provenance_derived_commit_1": {}, "mcn_version_control_system_1": { - "mcn_build_script_1": {}, "mcn_trusted_builder_level_three_1": { "mcn_build_as_code_1": { - "mcn_build_service_1": {}, - "mcn_infer_artifact_pipeline_1": {} + "mcn_infer_artifact_pipeline_1": {}, + "mcn_build_service_1": {} } - } + }, + "mcn_build_script_1": {} }, "mcn_provenance_derived_repo_1": {} } }, "target": { "info": { - "full_name": "pkg:github.com/slsa-framework/slsa-verifier@fc50b662fcfeeeb0e97243554b47d9b20b14efac", + "full_name": "pkg:github.com/slsa-framework/slsa-verifier@6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6", "local_cloned_path": "git_repos/github_com/slsa-framework/slsa-verifier", "remote_path": "https://github.com/slsa-framework/slsa-verifier", - "branch": "main", - "commit_hash": "fc50b662fcfeeeb0e97243554b47d9b20b14efac", - "commit_date": "2022-10-04T01:00:02+00:00" + "branch": null, + "commit_hash": "6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6", + "commit_date": "2022-08-25T11:37:20-05:00" }, "provenances": { - "is_inferred": true, + "is_inferred": false, "content": { "github_actions": [ { @@ -53,24 +53,23 @@ "predicateType": "https://slsa.dev/provenance/v0.2", "predicate": { "builder": { - "id": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + "id": "" }, - "buildType": "Custom github_actions", + "buildType": "", "invocation": { "configSource": { - "uri": "https://github.com/slsa-framework/slsa-verifier@refs/heads/main", + "uri": "", "digest": { - "sha1": "fc50b662fcfeeeb0e97243554b47d9b20b14efac" + "sha1": "" }, - "entryPoint": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + "entryPoint": "" }, "parameters": {}, "environment": {} }, "buildConfig": { - "jobID": "", - "stepID": "", - "stepName": "" + "jobID": "", + "stepID": "" }, "metadata": { "buildInvocationId": "", @@ -91,15 +90,14 @@ ] } } - ], - "npm Registry": [] + ] } }, "checks": { "summary": { "DISABLED": 0, - "FAILED": 3, - "PASSED": 8, + "FAILED": 2, + "PASSED": 9, "SKIPPED": 0, "UNKNOWN": 0 }, @@ -125,9 +123,9 @@ "build_tool_name: go", "ci_service_name: github_actions", "language: BuildLanguage.GO", - "build_tool_command: [\"go\", \"build\", \"-mod=vendor\", \"-o\", \"service\", \"./cli/experimental/service/\"]", + "build_tool_command: [\"go\", \"build\", \"-mod=vendor\"]", { - "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/pre-submit.cli.yml" + "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6/.github/workflows/pre-submit.yml" } ], "result_type": "PASSED" @@ -157,6 +155,17 @@ ], "result_type": "PASSED" }, + { + "check_id": "mcn_provenance_derived_commit_1", + "check_description": "Check whether the commit came from provenance.", + "slsa_requirements": [ + "Security - SLSA Level 4" + ], + "justification": [ + "commit_info: The commit digest was found from provenance." + ], + "result_type": "PASSED" + }, { "check_id": "mcn_provenance_derived_repo_1", "check_description": "Check whether the repo came from provenance.", @@ -164,7 +173,7 @@ "Security - SLSA Level 4" ], "justification": [ - "repository_url: The repository URL was found from provenance." + "repository_info: The repository URL was found from provenance." ], "result_type": "PASSED" }, @@ -192,7 +201,7 @@ "build_tool_name: slsa-framework/slsa-github-generator/.github/workflows/builder_go_slsa3.yml@v1.2.0", "ci_service_name: github_actions", { - "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/fc50b662fcfeeeb0e97243554b47d9b20b14efac/.github/workflows/release.yml" + "build_trigger": "https://github.com/slsa-framework/slsa-verifier/blob/6fb4f7e2dd9c2f5d4f55fa88f6796278a7bba6d6/.github/workflows/release.yml" } ], "result_type": "PASSED" @@ -221,17 +230,6 @@ ], "result_type": "FAILED" }, - { - "check_id": "mcn_provenance_derived_commit_1", - "check_description": "Check whether the commit came from provenance.", - "slsa_requirements": [ - "Security - SLSA Level 4" - ], - "justification": [ - "commit_digest: The analysis commit did not match the provenance commit." - ], - "result_type": "FAILED" - }, { "check_id": "mcn_provenance_witness_level_one_1", "check_description": "Check whether the target has a level-1 witness provenance.", diff --git a/tests/repo_finder/test_provenance_extractor.py b/tests/repo_finder/test_provenance_extractor.py index ff0914686..704aaaa8f 100644 --- a/tests/repo_finder/test_provenance_extractor.py +++ b/tests/repo_finder/test_provenance_extractor.py @@ -5,12 +5,15 @@ import json import pytest +from packageurl import PackageURL from macaron.errors import ProvenanceError -from macaron.json_tools import json_extract -from macaron.repo_finder.provenance_extractor import extract_repo_and_commit_from_provenance +from macaron.json_tools import JsonType, json_extract +from macaron.repo_finder.provenance_extractor import ( + check_if_repository_purl_and_url_match, + extract_repo_and_commit_from_provenance, +) from macaron.slsa_analyzer.provenance.intoto import validate_intoto_payload -from macaron.util import JsonType @pytest.fixture(name="slsa_v1_gcb_1_provenance") @@ -496,6 +499,21 @@ def test_invalid_type_payloads(type_: str, predicate_type: str) -> None: _test_extract_repo_and_commit_from_provenance(payload) +@pytest.mark.parametrize( + ("url", "purl_string", "expected"), + [ + ("https://github.com:9000/oracle/macaron", "pkg:github/oracle/macaron", True), + ("http://user:pass@github.com/oracle/macaron", "pkg:github.com/oracle/macaron", True), + ("https://bitbucket.org:9000/example/test", "pkg:bitbucket/example/test", True), + ("http://bitbucket.org/example;key1=1?key2=2#key3=3", "pkg:bitbucket.org/example", True), + ], +) +def test_compare_purl_and_url(url: str, purl_string: str, expected: bool) -> None: + """Test comparison of repository type PURLs against matching URLs.""" + purl = PackageURL.from_string(purl_string) + assert expected == check_if_repository_purl_and_url_match(url, purl) + + def _test_extract_repo_and_commit_from_provenance( payload: dict[str, JsonType], expected_repo: str | None = None, expected_commit: str | None = None ) -> None: @@ -506,7 +524,7 @@ def _test_extract_repo_and_commit_from_provenance( assert expected_commit == commit -def _json_modify(entry: JsonType, keys: list[str], new_value: JsonType) -> None: +def _json_modify(entry: dict | list, keys: list[str], new_value: JsonType) -> None: """Modify the value found by following the list of depth-sequential keys inside the passed JSON dictionary. The found value will be overwritten by the `new_value` parameter. diff --git a/tests/slsa_analyzer/provenance/intoto/v01/test_validate.py b/tests/slsa_analyzer/provenance/intoto/v01/test_validate.py index 2e438a484..99d8f4032 100644 --- a/tests/slsa_analyzer/provenance/intoto/v01/test_validate.py +++ b/tests/slsa_analyzer/provenance/intoto/v01/test_validate.py @@ -1,13 +1,13 @@ -# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved. # Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. """Tests for validation of in-toto attestation version 0.1.""" import pytest +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.slsa_analyzer.provenance.intoto.v01 import validate_intoto_statement, validate_intoto_subject -from macaron.util import JsonType @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/provenance/intoto/v1/test_validate.py b/tests/slsa_analyzer/provenance/intoto/v1/test_validate.py index ca03668a5..44ea4d0a3 100644 --- a/tests/slsa_analyzer/provenance/intoto/v1/test_validate.py +++ b/tests/slsa_analyzer/provenance/intoto/v1/test_validate.py @@ -5,9 +5,9 @@ import pytest +from macaron.json_tools import JsonType from macaron.slsa_analyzer.provenance.intoto.errors import ValidateInTotoPayloadError from macaron.slsa_analyzer.provenance.intoto.v1 import validate_intoto_statement -from macaron.util import JsonType @pytest.mark.parametrize( diff --git a/tests/slsa_analyzer/test_analyze_context.py b/tests/slsa_analyzer/test_analyze_context.py index 110d6aea5..dd33bda50 100644 --- a/tests/slsa_analyzer/test_analyze_context.py +++ b/tests/slsa_analyzer/test_analyze_context.py @@ -9,13 +9,13 @@ from unittest.mock import MagicMock from macaron.code_analyzer.call_graph import BaseNode, CallGraph +from macaron.json_tools import JsonType from macaron.slsa_analyzer.asset import VirtualReleaseAsset from macaron.slsa_analyzer.ci_service.github_actions.github_actions_ci import GitHubActions from macaron.slsa_analyzer.provenance.intoto import validate_intoto_payload from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData from macaron.slsa_analyzer.slsa_req import ReqName, SLSAReqStatus from macaron.slsa_analyzer.specs.ci_spec import CIInfo -from macaron.util import JsonType from tests.conftest import MockAnalyzeContext diff --git a/tests/vsa/test_compare_vsa.py b/tests/vsa/test_compare_vsa.py index a49db8d3f..7fe9b7281 100644 --- a/tests/vsa/test_compare_vsa.py +++ b/tests/vsa/test_compare_vsa.py @@ -5,7 +5,7 @@ import pytest -from macaron.util import JsonType +from macaron.json_tools import JsonType from tests.vsa.compare_vsa import compare_json, skip_compare