Skip to content

Commit

Permalink
feat: introduce confidence scores for check facts
Browse files Browse the repository at this point in the history
Signed-off-by: behnazh-w <behnaz.hassanshahi@oracle.com>
  • Loading branch information
behnazh-w committed Jan 31, 2024
1 parent 064ce8f commit 7f92fe2
Show file tree
Hide file tree
Showing 28 changed files with 531 additions and 434 deletions.
104 changes: 45 additions & 59 deletions src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""
Expand All @@ -10,7 +10,6 @@
For table associated with a check see the check module.
"""
import hashlib
import logging
import os
import string
Expand All @@ -19,14 +18,23 @@
from typing import Any, Self

from packageurl import PackageURL
from sqlalchemy import Boolean, Column, Enum, ForeignKey, Integer, String, Table, UniqueConstraint
from sqlalchemy import (
Boolean,
CheckConstraint,
Column,
Enum,
Float,
ForeignKey,
Integer,
String,
Table,
UniqueConstraint,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship

from macaron.database.database_manager import ORMBase
from macaron.database.rfc3339_datetime import RFC3339DateTime
from macaron.errors import CUEExpectationError, CUERuntimeError, InvalidPURLError
from macaron.slsa_analyzer.provenance.expectations.cue import cue_validator
from macaron.slsa_analyzer.provenance.expectations.expectation import Expectation
from macaron.errors import InvalidPURLError
from macaron.slsa_analyzer.slsa_req import ReqName

logger: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -415,6 +423,16 @@ class CheckFacts(ORMBase):
#: The primary key.
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003

#: The confidence score to estimate the accuracy of the check fact. This value should be in [0.0, 1.0] with
#: a lower value depicting a lower confidence. Because some analyses used in checks may use
#: heuristics, the results can be inaccurate in certain cases.
#: We use the confidence score to enable the check designer to assign a confidence estimate.
#: This confidence is stored in the database to be used by the policy. This confidence score is
#: also used to decide which evidence should be shown to the user in the HTML/JSON report.
confidence: Mapped[float] = mapped_column(
Float, CheckConstraint("confidence>=0.0 AND confidence<=1.0"), nullable=False
)

#: The foreign key to the software component.
component_id: Mapped[int] = mapped_column(Integer, ForeignKey("_component.id"), nullable=False)

Expand All @@ -430,68 +448,36 @@ class CheckFacts(ORMBase):
#: A many-to-one relationship with check results.
checkresult: Mapped["MappedCheckResult"] = relationship(back_populates="checkfacts")

#: The polymorphic inheritance configuration.
__mapper_args__ = {
"polymorphic_identity": "CheckFacts",
"polymorphic_on": "check_type",
}

def __lt__(self, other: Self) -> bool:
"""Compare two check facts using their confidence values.
class CUEExpectation(Expectation, CheckFacts):
"""ORM Class for an expectation."""
This comparison function is intended to be used by a heapq, which is a Min-Heap data structure.
The root element in a heapq is the minimum element in the queue and each `confidence` value is in [0, 1].
Therefore, we need reverse the comparison function to make sure the fact with highest confidence is stored
in the root element. This implementation compares `1 - confidence` to return True if the confidence of
`fact_a` is greater than the confidence of `fact_b`.
# TODO: provenance content check should store the expectation, its evaluation result,
# and which PROVENANCE it was applied to rather than only linking to the repository.
.. code-block:: pycon
__tablename__ = "_expectation"
>>> fact_a = CheckFacts()
>>> fact_b = CheckFacts()
>>> fact_a.confidence = 0.2
>>> fact_b.confidence = 0.7
>>> fact_b < fact_a
True
#: The primary key, which is also a foreign key to the base check table.
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003
Return
------
bool
"""
return (1 - self.confidence) < (1 - other.confidence)

#: The polymorphic inheritance configuration.
__mapper_args__ = {
"polymorphic_identity": "_expectation",
"polymorphic_identity": "CheckFacts",
"polymorphic_on": "check_type",
}

@classmethod
def make_expectation(cls, expectation_path: str) -> Self | None:
"""Construct a CUE expectation from a CUE file.
Note: we require the CUE expectation file to have a "target" field.
Parameters
----------
expectation_path: str
The path to the expectation file.
Returns
-------
Self
The instantiated expectation object.
"""
logger.info("Generating an expectation from file %s", expectation_path)
expectation: CUEExpectation = CUEExpectation(
description="CUE expectation",
path=expectation_path,
target="",
expectation_type="CUE",
)

try:
with open(expectation_path, encoding="utf-8") as expectation_file:
expectation.text = expectation_file.read()
expectation.sha = str(hashlib.sha256(expectation.text.encode("utf-8")).hexdigest())
expectation.target = cue_validator.get_target(expectation.text)
expectation._validator = ( # pylint: disable=protected-access
lambda provenance: cue_validator.validate_expectation(expectation.text, provenance)
)
except (OSError, CUERuntimeError, CUEExpectationError) as error:
logger.error("CUE expectation error: %s", error)
return None

# TODO remove type ignore once mypy adds support for Self.
return expectation # type: ignore


class Provenance(ORMBase):
"""ORM class for a provenance document."""
Expand Down
6 changes: 4 additions & 2 deletions src/macaron/policy_engine/souffle_code_generator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""Generate souffle datalog for policy prelude."""

import logging
import os

from sqlalchemy import Column, MetaData, Table
from sqlalchemy import Column, Float, MetaData, Table
from sqlalchemy.sql.sqltypes import Boolean, Integer, String, Text

logger: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -81,6 +81,8 @@ def column_to_souffle_type(column: Column) -> str:
souffle_type = "symbol"
elif isinstance(sql_type, Integer):
souffle_type = "number"
elif isinstance(sql_type, Float):
souffle_type = "number"
elif isinstance(sql_type, Text):
souffle_type = "symbol"
elif isinstance(sql_type, Boolean):
Expand Down
8 changes: 4 additions & 4 deletions src/macaron/slsa_analyzer/analyze_context.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the Analyze Context class.
Expand Down Expand Up @@ -38,8 +38,6 @@ class ChecksOutputs(TypedDict):
"""The CI services information for this repository."""
is_inferred_prov: bool
"""True if we cannot find the provenance and Macaron need to infer the provenance."""
# We need to use typing.Protocol for multiple inheritance, however, the Expectation
# class uses inlined functions, which is not supported by Protocol.
expectation: Expectation | None
"""The expectation to verify the provenance for this repository."""
package_registries: list[PackageRegistryInfo]
Expand Down Expand Up @@ -109,7 +107,9 @@ def provenances(self) -> dict[str, list[InTotoV01Statement | InTotoV1Statement]]
# By default, initialize every key with an empty list.
result: dict[str, list[InTotoV01Statement | InTotoV1Statement]] = defaultdict(list)
for ci_info in ci_services:
result[ci_info["service"].name].extend(payload.statement for payload in ci_info["provenances"])
result[ci_info["service"].name].extend(
prov_asset.payload.statement for prov_asset in ci_info["provenances"]
)
package_registry_entries = self.dynamic_data["package_registries"]
for package_registry_entry in package_registry_entries:
result[package_registry_entry.package_registry.name].extend(
Expand Down
9 changes: 8 additions & 1 deletion src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from macaron.repo_finder.commit_finder import find_commit
from macaron.slsa_analyzer import git_url
from macaron.slsa_analyzer.analyze_context import AnalyzeContext
from macaron.slsa_analyzer.asset import VirtualReleaseAsset
from macaron.slsa_analyzer.build_tool import BUILD_TOOLS

# To load all checks into the registry
Expand All @@ -40,6 +41,7 @@
from macaron.slsa_analyzer.package_registry import PACKAGE_REGISTRIES
from macaron.slsa_analyzer.provenance.expectations.expectation_registry import ExpectationRegistry
from macaron.slsa_analyzer.provenance.intoto import InTotoV01Payload
from macaron.slsa_analyzer.provenance.slsa import SLSAProvenanceData
from macaron.slsa_analyzer.registry import registry
from macaron.slsa_analyzer.specs.ci_spec import CIInfo
from macaron.slsa_analyzer.specs.inferred_provenance import Provenance
Expand Down Expand Up @@ -857,7 +859,12 @@ def perform_checks(self, analyze_ctx: AnalyzeContext) -> dict[str, CheckResult]:
callgraph=callgraph,
provenance_assets=[],
latest_release={},
provenances=[InTotoV01Payload(statement=Provenance().payload)],
provenances=[
SLSAProvenanceData(
payload=InTotoV01Payload(statement=Provenance().payload),
asset=VirtualReleaseAsset(name="No_ASSET", url="NO_URL", size_in_bytes=0),
)
],
)
)

Expand Down
31 changes: 29 additions & 2 deletions src/macaron/slsa_analyzer/asset/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# Copyright (c) 2023 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2023 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module defines classes and interfaces related to assets.
Assets are files published from some build.
"""

from typing import Protocol
from typing import NamedTuple, Protocol


class AssetLocator(Protocol):
Expand Down Expand Up @@ -38,3 +38,30 @@ def download(self, dest: str) -> bool:
bool
``True`` if the asset is downloaded successfully; ``False`` if not.
"""


class VirtualReleaseAsset(NamedTuple):
"""A dummy asset used when an asset doesn't actually exist."""

#: The asset name.
name: str
#: The URL to the asset.
url: str
#: The size of the asset, in bytes.
size_in_bytes: int

def download(self, dest: str) -> bool: # pylint: disable=unused-argument
"""Download the asset.
Parameters
----------
dest : str
The local destination where the asset is downloaded to.
Note that this must include the file name.
Returns
-------
bool
``True`` if the asset is downloaded successfully; ``False`` if not.
"""
return False
16 changes: 7 additions & 9 deletions src/macaron/slsa_analyzer/checks/base_check.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2022 - 2023, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2022 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module contains the BaseCheck class to be inherited by other concrete Checks."""
Expand Down Expand Up @@ -99,9 +99,7 @@ def run(self, target: AnalyzeContext, skipped_info: SkippedInfo | None = None) -
check_result_data: CheckResultData

if skipped_info:
check_result_data = CheckResultData(
justification=[skipped_info["suppress_comment"]], result_tables=[], result_type=self.result_on_skip
)
check_result_data = CheckResultData(result_tables=[], result_type=self.result_on_skip)
logger.info(
"Check %s is skipped on target %s, comment: %s",
self.check_info.check_id,
Expand All @@ -115,14 +113,14 @@ def run(self, target: AnalyzeContext, skipped_info: SkippedInfo | None = None) -
self.check_info.check_id,
check_result_data.result_type.value,
target.component.purl,
check_result_data.justification,
check_result_data.justification_report,
)

# This justification string will be stored in the feedback column of `SLSARequirement` table.
# TODO: Storing the justification as feedback in the `SLSARequirement` table seems redundant and might need
# refactoring.
justification_str = ""
for ele in check_result_data.justification:
if isinstance(ele, dict):
for key, val in ele.items():
justification_str += f"{key}: {val}. "
for _, ele in check_result_data.justification_report:
justification_str += f"{str(ele)}. "

target.bulk_update_req_status(
Expand Down
Loading

0 comments on commit 7f92fe2

Please sign in to comment.