diff --git a/src/macaron/slsa_analyzer/git_url.py b/src/macaron/slsa_analyzer/git_url.py index a516186ac..603188c5f 100644 --- a/src/macaron/slsa_analyzer/git_url.py +++ b/src/macaron/slsa_analyzer/git_url.py @@ -124,10 +124,6 @@ def check_out_repo_target( This function assumes that a remote "origin" exist and checkout from that remote ONLY. - If ``offline_mode`` is False, this function will fetch new changes from origin remote. The fetching operation - will prune and update all references (e.g. tags, branches) to make sure that the local repository is up-to-date - with the repository specified by origin remote. - If ``offline_mode`` is True and neither ``branch_name`` nor commit are provided, this function will not do anything and the HEAD commit will be analyzed. If there are uncommitted local changes, the HEAD commit will appear in the report but the repo with local changes will be analyzed. We leave it up to the user to decide @@ -166,36 +162,13 @@ def check_out_repo_target( bool True if succeed else False. """ - if not offline_mode: - # Fetch from remote origin by running ``git fetch origin --force --tags --prune --prune-tags`` inside the target - # repository. - # The flags `--force --tags --prune --prune-tags` are used to make sure we analyze the most up-to-date version - # of the repo. - # - Any modified tags in the remote repository is updated locally. - # - Prune deleted branches and tags in the remote from the local repository. - # References: - # https://git-scm.com/docs/git-fetch - # https://github.com/oracle/macaron/issues/547 + if not offline_mode and not branch_name and not digest: try: - git_obj.repo.git.fetch( - "origin", - "--force", - "--tags", - "--prune", - "--prune-tags", - ) + git_obj.repo.git.checkout("--force", "origin/HEAD") except GitCommandError: - logger.error("Unable to fetch from the origin remote of the repository.") + logger.debug("Cannot checkout the default branch at origin/HEAD") return False - # By default check out the commit at origin/HEAD only when offline_mode is False. - if not branch_name and not digest: - try: - git_obj.repo.git.checkout("--force", "origin/HEAD") - except GitCommandError: - logger.debug("Cannot checkout the default branch at origin/HEAD") - return False - # The following checkout operations will be done whether offline_mode is False or not. if branch_name and not digest: try: @@ -300,9 +273,9 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: """Clone the remote repository and return the `git.Repo` object for that repository. If there is an existing non-empty ``clone_dir``, Macaron assumes the repository has - been cloned already and cancels the clone. - This could happen when multiple runs of Macaron use the same ``, leading - to Macaron potentially trying to clone a repository multiple times. + been cloned already and will attempt to fetch the latest changes. The fetching operation + will prune and update all references (e.g. tags, branches) to make sure that the local + repository is up-to-date with the repository specified by origin remote. We use treeless partial clone to reduce clone time, by retrieving trees and blobs lazily. For more details, see the following: @@ -337,11 +310,34 @@ def clone_remote_repo(clone_dir: str, url: str) -> Repo | None: os.rmdir(clone_dir) logger.debug("The clone dir %s is empty. It has been deleted for cloning the repo.", clone_dir) except OSError: - logger.debug( - "The clone dir %s is not empty. Cloning will not be proceeded.", - clone_dir, - ) - return None + # Update the existing repository by running ``git fetch`` inside the existing directory. + # The flags `--force --tags --prune --prune-tags` are used to make sure we analyze the most up-to-date + # version of the repo. + # - Any modified tags in the remote repository are updated locally. + # - Deleted branches and tags in the remote repository are pruned from the local copy. + # References: + # https://git-scm.com/docs/git-fetch + # https://github.com/oracle/macaron/issues/547 + try: + git_env_patch = { + # Setting the GIT_TERMINAL_PROMPT environment variable to ``0`` stops + # ``git clone`` from prompting for login credentials. + "GIT_TERMINAL_PROMPT": "0", + } + subprocess.run( # nosec B603 + args=["git", "fetch", "origin", "--force", "--tags", "--prune", "--prune-tags"], + capture_output=True, + cwd=clone_dir, + # If `check=True` and return status code is not zero, subprocess.CalledProcessError is + # raised, which we don't want. We want to check the return status code of the subprocess + # later on. + check=False, + env=get_patched_env(git_env_patch), + ) + return Repo(path=clone_dir) + except (subprocess.CalledProcessError, OSError): + logger.debug("The clone dir %s is not empty. An attempt to update it failed.") + return None # Ensure that the parent directory where the repo is cloned into exists. parent_dir = Path(clone_dir).parent diff --git a/tests/integration/cases/update_local_repositories/modify_clone.sh b/tests/integration/cases/update_local_repositories/modify_clone.sh new file mode 100755 index 000000000..a2d32ebb5 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/modify_clone.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +cd output/git_repos/github_com/avaje/avaje-prisms +git tag --delete avaje-prisms-1.1 diff --git a/tests/integration/cases/update_local_repositories/policy.dl b/tests/integration/cases/update_local_repositories/policy.dl new file mode 100644 index 000000000..b51b37ca8 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/policy.dl @@ -0,0 +1,11 @@ +/* Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. */ +/* Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. */ + +#include "prelude.dl" + +Policy("test_policy", component_id, "") :- + check_passed(component_id, "mcn_version_control_system_1"), + is_repo_url(component_id, "https://github.com/avaje/avaje-prisms"). + +apply_policy_to("test_policy", component_id) :- + is_component(component_id, "pkg:maven/io.avaje/avaje-prisms@1.1"). diff --git a/tests/integration/cases/update_local_repositories/test.yaml b/tests/integration/cases/update_local_repositories/test.yaml new file mode 100644 index 000000000..242d23c76 --- /dev/null +++ b/tests/integration/cases/update_local_repositories/test.yaml @@ -0,0 +1,31 @@ +# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved. +# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/. + +description: | + Ensuring previously cloned repositories are updated when newer changes are available at their remote origins. + +tags: +- macaron-python-package +- macaron-docker-image + +steps: +- name: Analyze a repository + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.avaje/avaje-prisms@1.1 +- name: Delete the chosen tag from the repository + kind: shell + options: + cmd: ./modify_clone.sh +- name: Analyze the repository again + kind: analyze + options: + command_args: + - -purl + - pkg:maven/io.avaje/avaje-prisms@1.1 +- name: Run macaron verify-policy to verify version control check which will only pass if the tag is found + kind: verify + options: + policy: policy.dl