Skip to content

Commit

Permalink
refactor: source code repo heuristic replacing unreachable project links
Browse files Browse the repository at this point in the history
  • Loading branch information
art1f1c3R committed Feb 12, 2025
1 parent 58b1a8b commit 57aba12
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 144 deletions.
4 changes: 2 additions & 2 deletions src/macaron/malware_analyzer/pypi_heuristics/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ class Heuristics(str, Enum):
#: Indicates that the package does not contain any project links (such as documentation or Git repository pages).
EMPTY_PROJECT_LINK = "empty_project_link"

#: Indicates that the package contains project links, but all of them are unreachable.
UNREACHABLE_PROJECT_LINKS = "unreachable_project_links"
#: Indicates that the source code repository for the package was not found.
SOURCE_CODE_REPO = "source_code_repo"

#: Indicates that the package contains only one release.
ONE_RELEASE = "one_release"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""The heuristic analyzer to check the project links."""
"""The heuristic analyzer to check if a source code repo was found."""

import logging

import requests

from macaron.json_tools import JsonType
from macaron.malware_analyzer.pypi_heuristics.base_analyzer import BaseHeuristicAnalyzer
from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult, Heuristics
Expand All @@ -15,17 +13,17 @@
logger: logging.Logger = logging.getLogger(__name__)


class UnreachableProjectLinksAnalyzer(BaseHeuristicAnalyzer):
class SourceCodeRepoAnalyzer(BaseHeuristicAnalyzer):
"""
Analyze the accessibility of the project links.
Analyze the accessibility of the source code repository.
If >= 1 project links are reachable, the analyzer consider the package as benign.
Passes if a repository was found and validated by the repo finder, otherwise fails.
"""

def __init__(self) -> None:
super().__init__(
name="unreachable_project_links_analyzer",
heuristic=Heuristics.UNREACHABLE_PROJECT_LINKS,
name="source_code_repo_analyzer",
heuristic=Heuristics.SOURCE_CODE_REPO,
depends_on=[(Heuristics.EMPTY_PROJECT_LINK, HeuristicResult.PASS)],
)

Expand All @@ -42,18 +40,7 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
tuple[HeuristicResult, dict[str, JsonType]]:
The result and related information collected during the analysis.
"""
project_links: dict | None = pypi_package_json.get_project_links()

if project_links is None:
return HeuristicResult.SKIP, {}

for link in project_links.values():
try:
response = requests.head(link, timeout=3)
if response.status_code < 400:
return HeuristicResult.PASS, {}
except requests.exceptions.RequestException as error:
logger.debug(error)
continue

return HeuristicResult.FAIL, {}
# If a sourcecode repo exists, then this will have already been validated
if not pypi_package_json.component.repository:
return HeuristicResult.FAIL, {}
return HeuristicResult.PASS, {}
26 changes: 13 additions & 13 deletions src/macaron/slsa_analyzer/checks/detect_malicious_metadata_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
from macaron.malware_analyzer.pypi_heuristics.metadata.empty_project_link import EmptyProjectLinkAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.high_release_frequency import HighReleaseFrequencyAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.one_release import OneReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo import SourceCodeRepoAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unchanged_release import UnchangedReleaseAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.unreachable_project_links import UnreachableProjectLinksAnalyzer
from macaron.malware_analyzer.pypi_heuristics.metadata.wheel_absence import WheelAbsenceAnalyzer
from macaron.malware_analyzer.pypi_heuristics.pypi_sourcecode_analyzer import PyPISourcecodeAnalyzer
from macaron.malware_analyzer.pypi_heuristics.sourcecode.suspicious_setup import SuspiciousSetupAnalyzer
Expand Down Expand Up @@ -69,7 +69,7 @@ class MaliciousMetadataFacts(CheckFacts):
# When implementing new analyzer, appending the classes to this list
ANALYZERS: list = [
EmptyProjectLinkAnalyzer,
UnreachableProjectLinksAnalyzer,
SourceCodeRepoAnalyzer,
OneReleaseAnalyzer,
HighReleaseFrequencyAnalyzer,
UnchangedReleaseAnalyzer,
Expand Down Expand Up @@ -97,7 +97,7 @@ class MaliciousMetadataFacts(CheckFacts):
] = {
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.FAIL, # One Release
HeuristicResult.SKIP, # High Release Frequency
HeuristicResult.SKIP, # Unchanged Release
Expand All @@ -112,7 +112,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.FAIL, # One Release
HeuristicResult.SKIP, # High Release Frequency
HeuristicResult.SKIP, # Unchanged Release
Expand All @@ -127,7 +127,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
Expand All @@ -141,7 +141,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.PASS, # Unchanged Release
Expand All @@ -155,7 +155,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
Expand All @@ -169,7 +169,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.FAIL, # Unchanged Release
Expand All @@ -183,21 +183,21 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.MEDIUM,
(
HeuristicResult.PASS, # Empty Project
HeuristicResult.FAIL, # Unreachable Project Links
HeuristicResult.FAIL, # Source Code Repo
HeuristicResult.PASS, # One Release
HeuristicResult.FAIL, # High Release Frequency
HeuristicResult.PASS, # Unchanged Release
HeuristicResult.FAIL, # Closer Release Join Date
HeuristicResult.FAIL, # Suspicious Setup
HeuristicResult.FAIL, # Wheel Absence
HeuristicResult.SKIP, # Anomalous Version
# All project links are unreachable, frequent releases of multiple versions,
# No source code repo, frequent releases of multiple versions,
# and the maintainer released it shortly after account registration.
# The setup.py file contains suspicious imports and .whl file isn't present.
): Confidence.HIGH,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.FAIL, # One Release
HeuristicResult.SKIP, # High Release Frequency
HeuristicResult.SKIP, # Unchanged Release
Expand All @@ -212,7 +212,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.FAIL, # One Release
HeuristicResult.SKIP, # High Release Frequency
HeuristicResult.SKIP, # Unchanged Release
Expand All @@ -227,7 +227,7 @@ class MaliciousMetadataFacts(CheckFacts):
): Confidence.MEDIUM,
(
HeuristicResult.FAIL, # Empty Project
HeuristicResult.SKIP, # Unreachable Project Links
HeuristicResult.SKIP, # Source Code Repo
HeuristicResult.FAIL, # One Release
HeuristicResult.SKIP, # High Release Frequency
HeuristicResult.SKIP, # Unchanged Release
Expand Down
32 changes: 32 additions & 0 deletions tests/malware_analyzer/pypi/test_source_code_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) 2024 - 2025, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""Tests for heuristic detecting malicious metadata from PyPI"""

from unittest.mock import MagicMock

import pytest

from macaron.malware_analyzer.pypi_heuristics.heuristics import HeuristicResult
from macaron.malware_analyzer.pypi_heuristics.metadata.source_code_repo import SourceCodeRepoAnalyzer


@pytest.mark.parametrize(
("repository", "expected_result"),
[
pytest.param(None, HeuristicResult.FAIL, id="test_no_repo"),
pytest.param(
MagicMock(),
HeuristicResult.PASS,
id="test_valid_repo",
),
],
)
def test_repo_existence(
pypi_package_json: MagicMock, repository: MagicMock | None, expected_result: HeuristicResult
) -> None:
"""Test if the source code repo exists."""
pypi_package_json.component.repository = repository
analyzer = SourceCodeRepoAnalyzer()
result, _ = analyzer.analyze(pypi_package_json)
assert result == expected_result
105 changes: 0 additions & 105 deletions tests/malware_analyzer/pypi/test_unreachable_project_links_analyzer.py

This file was deleted.

0 comments on commit 57aba12

Please sign in to comment.