Skip to content

Commit

Permalink
chore: update git fetch command and remove git pull command (#548)
Browse files Browse the repository at this point in the history
Signed-off-by: Trong Nhan Mai <trong.nhan.mai@oracle.com>
  • Loading branch information
tromai authored Dec 22, 2023
1 parent 7dcf304 commit 8056f9f
Show file tree
Hide file tree
Showing 17 changed files with 853 additions and 203 deletions.
3 changes: 3 additions & 0 deletions docs/source/pages/using.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,9 @@ Analyzing a locally cloned repository
Therefore, any uncommitted changes in the repository need to be backed up to prevent loss (these include unstaged changes, staged changes and untracked files).
However, Macaron will not modify the history of the repository.

.. note::
We assume that the ``origin`` remote exists in the cloned repository and checkout the relevant commits from ``origin`` only.

If you have a local repository that you want to analyze, Macaron also supports running the analysis against a local repository.

Assume that the dir tree at the local repository has the following components:
Expand Down
1 change: 0 additions & 1 deletion scripts/dev_scripts/integration_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,6 @@ echo -e "\n---------------------------------------------------------------------
echo "apache/maven: Check: Check the e2e status code of running with invalid branch or digest defined in the yaml configuration."
echo -e "----------------------------------------------------------------------------------\n"
declare -a INVALID_BRANCH_DIGEST=(
"maven_digest_no_branch.yaml"
"maven_invalid_branch.yaml"
"maven_invalid_digest.yaml"
)
Expand Down
4 changes: 2 additions & 2 deletions src/macaron/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,11 @@ def analyze_slsa_levels_single(analyzer_single_args: argparse.Namespace) -> None
branch = analyzer_single_args.branch
digest = analyzer_single_args.digest

if repo_path and purl and not (branch and digest):
if repo_path and purl and not digest:
# To provide the purl together with the repository path, the user must specify the branch and commit
# digest.
logger.error(
"Please provide the branch and commit digest for the repo at %s that matches to the PURL string %s.",
"Please provide the commit digest for the repo at %s that matches to the PURL string %s.",
repo_path,
purl,
)
Expand Down
7 changes: 0 additions & 7 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,13 +649,6 @@ def _prepare_repo(
The pydriller.Git object of the repository or None if error.
"""
# TODO: separate the logic for handling remote and local repos instead of putting them into this method.
# Cannot specify a commit hash without specifying the branch.
if not branch_name and digest:
logger.error(
"Cannot specify a commit hash without specifying the branch for repo at %s.",
repo_path,
)
return None

logger.info(
"Preparing the repository for the analysis (path=%s, branch=%s, digest=%s)",
Expand Down
228 changes: 161 additions & 67 deletions src/macaron/slsa_analyzer/git_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,123 @@
logger: logging.Logger = logging.getLogger(__name__)


def check_out_repo_target(git_obj: Git, branch_name: str = "", digest: str = "", offline_mode: bool = False) -> bool:
def parse_git_branch_output(content: str) -> list[str]:
"""Return the list of branch names from a string that has a format similar to the output of ``git branch --list``.
Parameters
----------
content : str
The raw output as string from the ``git branch`` command.
Returns
-------
list[str]
The list of strings where each string is a branch element from the raw output.
Examples
--------
>>> from pprint import pprint
>>> content = '''
... * (HEAD detached at 7fc81f8)
... master
... remotes/origin/HEAD -> origin/master
... remotes/origin/master
... remotes/origin/v2.dev
... remotes/origin/v3.dev
... '''
>>> pprint(parse_git_branch_output(content))
['(HEAD detached at 7fc81f8)',
'master',
'remotes/origin/HEAD -> origin/master',
'remotes/origin/master',
'remotes/origin/v2.dev',
'remotes/origin/v3.dev']
"""
git_branch_output_lines = content.splitlines()
branches = []
for line in git_branch_output_lines:
# The ``*`` symbol will appear next to the branch name where HEAD is currently on.
# Branches in git cannot have ``*`` in its name so we can safely replace without tampering with its actual name.
# https://git-scm.com/docs/git-check-ref-format
branch = line.replace("*", "").strip()

# Ignore elements that contain only whitespaces. This is because the raw content of git branch
# can have extra new line at the end, which can be picked up as an empty element in `git_branch_output_lines`.
if len(branch) == 0:
continue

branches.append(branch)

return branches


def get_branches_containing_commit(git_obj: Git, commit: str, remote: str = "origin") -> list[str]:
"""Get the branches from a remote that contains a specific commit.
The returned branch names will be in the form of <remote>/<branch_name>.
Parameters
----------
git_obj : Git
The pydriller.Git wrapper object of the target repository.
commit : str
The hash of the commit we want to get all the branches.
remote : str, optional
The name of the remote to check the branches, by default "origin".
Returns
-------
list[str]
The list of branches that contains the commit.
"""
try:
raw_output: str = git_obj.repo.git.branch(
"--remotes",
"--list",
f"{remote}/*",
"--contains",
commit,
)
except GitCommandError:
logger.debug("Error while looking up branches that contain commit %s.", commit)
return []

return parse_git_branch_output(raw_output)


def check_out_repo_target(
git_obj: Git,
branch_name: str = "",
digest: str = "",
offline_mode: bool = False,
) -> bool:
"""Checkout the branch and commit specified by the user.
If no branch name is provided, this method will checkout the default branch
of the repository and analyze the latest commit from remote. Note that checking out the branch
is always performed before checking out the specific ``digest`` (if provided).
This fucntion assumes that a remote "origin" exist and checkout from that remote ONLY.
If ``digest`` is not provided, this method always pulls (fast-forward only) and checks out the latest commit.
If ``offline_mode`` is False, this function will fetch new changes from origin remote. The fetching operation
will prune and update all references (e.g. tags, branches) to make sure that the local repository is up-to-date
with the repository specified by origin remote.
If ``digest`` is provided, this method will checkout that specific commit. If ``digest``
cannot be found in the current branch, this method will pull (fast-forward only) from remote.
If ``branch_name`` and a commit are not provided, this function will checkout the latest commit of the
default branch (i.e. origin/HEAD).
This method supports repositories which are cloned from existing remote repositories.
Other scenarios are not covered (e.g. a newly initiated repository).
If ``branch_name`` is provided and a commit is not provided, this function will checkout that branch from origin
remote (i.e. origin/<branch_name).
If ``branch_name`` is not provided and a commit is provided, this function will checkout the commit directly.
If both ``branch_name`` and a commit are provided, this function will checkout the commit directly only if that
commit exists in the branch origin/<branch_name>. If not, this fucntion will return False.
If ``offline_mode`` is set, this method will not pull/fetch from remote while checking out the branch or commit.
For all scenarios:
- If the checkout fails (e.g. a branch or a commit doesn't exist), this function will return
False.
- This function will perform a force checkout
https://git-scm.com/docs/git-checkout#Documentation/git-checkout.txt---force
This function supports repositories which are cloned from existing remote repositories.
Other scenarios are not covered (e.g. a newly initiated repository).
Parameters
----------
Expand All @@ -58,74 +159,67 @@ def check_out_repo_target(git_obj: Git, branch_name: str = "", digest: str = "",
bool
True if succeed else False.
"""
# Resolve the branch name to check out.
res_branch = ""
if branch_name:
res_branch = branch_name
else:
res_branch = get_default_branch(git_obj)
if not res_branch:
logger.error("Cannot determine the default branch for this repository.")
logger.info("Consider providing the specific branch to be analyzed or fully cloning the repo instead.")
if not offline_mode:
# Fetch from remote origin by running ``git fetch origin --force --tags --prune --prune-tags`` inside the target
# repository.
# The flags `--force --tags --prune --prune-tags` are used to make sure we analyze the most up-to-date version
# of the repo.
# - Any modified tags in the remote repository is updated locally.
# - Prune deleted branches and tags in the remote from the local repository.
# References:
# https://git-scm.com/docs/git-fetch
# https://github.com/oracle/macaron/issues/547
try:
git_obj.repo.git.fetch(
"origin",
"--force",
"--tags",
"--prune",
"--prune-tags",
)
except GitCommandError:
logger.error("Unable to fetch from the origin remote of the repository.")
return False

if not offline_mode:
# Fetch from remote by running ``git fetch`` inside the target repository.
# We don't specify any remote name (e.g. origin) because we want git to resolve the default fetching
# target by itself.
# For example, the user runs Macaron on a local repository where the remote is set to have name "foo_origin"
# instead.
# References: https://git-scm.com/docs/git-fetch
if not branch_name and not digest:
try:
git_obj.repo.git.fetch()
except GitCommandError as error:
logger.error("Unable to fetch from the remote repository. Error: %s", error)
git_obj.repo.git.checkout("--force", "origin/HEAD")
except GitCommandError:
logger.debug("Cannot checkout the default branch at origin/HEAD")
return False

try:
# Switch to the target branch by running ``git checkout <branch_name>`` in the target repository.
# We need to use force checkout to prevent issues similar to https://github.com/oracle/macaron/issues/530.
git_obj.repo.git.checkout("--force", res_branch)
except GitCommandError as error:
logger.error("Cannot checkout branch %s. Error: %s", res_branch, error)
return False
if branch_name and not digest:
try:
git_obj.repo.git.checkout("--force", f"origin/{branch_name}")
except GitCommandError:
logger.debug("Cannot checkout branch %s from origin remote.", branch_name)
return False

logger.info("Successfully checkout branch %s.", res_branch)
if not branch_name and digest:
try:
git_obj.repo.git.checkout("--force", f"{digest}")
except GitCommandError:
logger.debug("Cannot checkout commit %s.", digest)
return False

if not offline_mode:
# We only pull the latest changes if one of these scenarios happens:
# - no digest is provided: we need to pull and analyze the latest commit.
# - a commit digest is provided but it does not exist locally: we need to
# pull the latest changes to check if that commit is available.
# We want to check if the commit already exist locally first because we want to avoid pulling unecessary
# if it does.
# We do this by checking if the commit we want to analyze is an ancestor of the commit being referenced by HEAD
# (which point to the tip of the branch).
# If the commit we want to analyze is same as HEAD, that commit is still considered as the ancestor of HEAD.
# The ``is_ancestor`` method runs ``git merge-base`` behind the scence.
# For more information on computing the ancestor status of two commits: https://git-scm.com/docs/git-merge-base.
if not digest or not git_obj.repo.is_ancestor(digest, "HEAD"):
logger.info("Pulling the latest changes of branch %s fast-forward only.", res_branch)
if branch_name and digest:
branches = get_branches_containing_commit(
git_obj=git_obj,
commit=digest,
remote="origin",
)

if f"origin/{branch_name}" in branches:
try:
# Pull the latest changes on the current branch fast-forward only.
git_obj.repo.git.pull("--ff-only")
except GitCommandError as error:
logger.error(error)
git_obj.repo.git.checkout("--force", f"{digest}")
except GitCommandError:
logger.debug("Cannot checkout commit %s.", digest)
return False

if digest:
# Checkout the specific commit that the user want by running ``git checkout <commit>`` in the target repository.
# We need to use force checkout to prevent issues similar to https://github.com/oracle/macaron/issues/530.
try:
git_obj.repo.git.checkout("--force", digest)
except GitCommandError as error:
logger.error(
"Commit %s cannot be checked out. Error: %s",
digest,
error,
)
else:
logger.error("Commit %s is not in branch %s.", digest, branch_name)
return False

# Further validation to make sure the git checkout operations happen as expected.
final_head_commit: Commit = git_obj.repo.head.commit
if not final_head_commit:
logger.critical("Cannot get the head commit after checking out.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,18 @@
target:
id: micronaut-test
# https://github.com/micronaut-projects/micronaut-test/commit/7679d10b4073a3b842b6c56877c35fa8cd10acff
branch: master
digest: 7679d10b4073a3b842b6c56877c35fa8cd10acff
path: https://github.com/micronaut-projects/micronaut-test

dependencies:
- id: slf4j
# For version 1.7.36
# https://github.com/qos-ch/slf4j/commit/e9ee55cca93c2bf26f14482a9bdf961c750d2a56
branch: v_1.7.36
# For version 1.7.36
# https://github.com/qos-ch/slf4j/commit/e9ee55cca93c2bf26f14482a9bdf961c750d2a56
digest: e9ee55cca93c2bf26f14482a9bdf961c750d2a56
path: https://github.com/qos-ch/slf4j.git

- id: caffeine
# For version 2.9.3
# https://github.com/ben-manes/caffeine/commit/05a040c2478341bab8a58a02b3dc1fe14d626d72
branch: v2.9.3
# For version 2.9.3
# https://github.com/ben-manes/caffeine/commit/05a040c2478341bab8a58a02b3dc1fe14d626d72
digest: 05a040c2478341bab8a58a02b3dc1fe14d626d72
path: https://github.com/ben-manes/caffeine.git
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

target:
id: micronaut-test
# https://github.com/micronaut-projects/micronaut-test/commit/7679d10b4073a3b842b6c56877c35fa8cd10acff
branch: master
digest: 7679d10b4073a3b842b6c56877c35fa8cd10acff
path: https://github.com/micronaut-projects/micronaut-test
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"id": "slf4j",
"purl": "",
"path": "https://github.com/qos-ch/slf4j.git",
"branch": "v_1.7.36",
"branch": "",
"digest": "e9ee55cca93c2bf26f14482a9bdf961c750d2a56",
"note": "",
"available": "AVAILABLE"
Expand All @@ -12,7 +12,7 @@
"id": "caffeine",
"purl": "",
"path": "https://github.com/ben-manes/caffeine.git",
"branch": "v2.9.3",
"branch": "",
"digest": "05a040c2478341bab8a58a02b3dc1fe14d626d72",
"note": "",
"available": "AVAILABLE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"id": "slf4j",
"purl": "",
"path": "https://github.com/qos-ch/slf4j.git",
"branch": "v_1.7.36",
"branch": "",
"digest": "e9ee55cca93c2bf26f14482a9bdf961c750d2a56",
"note": "",
"available": "AVAILABLE"
Expand All @@ -12,7 +12,7 @@
"id": "caffeine",
"purl": "",
"path": "https://github.com/ben-manes/caffeine.git",
"branch": "v2.9.3",
"branch": "",
"digest": "05a040c2478341bab8a58a02b3dc1fe14d626d72",
"note": "",
"available": "AVAILABLE"
Expand Down
8 changes: 0 additions & 8 deletions tests/e2e/configurations/maven_digest_no_branch.yaml

This file was deleted.

Loading

0 comments on commit 8056f9f

Please sign in to comment.