Skip to content

Commit

Permalink
Merge pull request #573 from lenskit/fix/fallback-bias
Browse files Browse the repository at this point in the history
Improve pipeline logging and builder API
  • Loading branch information
mdekstrand authored Dec 25, 2024
2 parents ce74335 + 6b1c07f commit 6dbdb1a
Show file tree
Hide file tree
Showing 7 changed files with 49 additions and 7 deletions.
2 changes: 1 addition & 1 deletion lenskit/lenskit/basic/bias.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def __init__(

@property
def is_trained(self) -> bool:
return hasattr(self, "bias_")
return hasattr(self, "model_")

def train(self, data: Dataset):
"""
Expand Down
4 changes: 4 additions & 0 deletions lenskit/lenskit/logging/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Logging, progress, and resource records.
"""

import os
from typing import Any

import structlog
Expand All @@ -21,6 +22,7 @@
]

get_logger = structlog.stdlib.get_logger
_trace_debug = os.environ.get("LK_TRACE", "no").lower() == "debug"


def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any):
Expand All @@ -32,3 +34,5 @@ def trace(logger: structlog.stdlib.BoundLogger, *args: Any, **kwargs: Any):
meth = getattr(logger, "trace", None)
if meth is not None:
meth(*args, **kwargs)
elif _trace_debug:
logger.debug(*args, **kwargs)
5 changes: 3 additions & 2 deletions lenskit/lenskit/pipeline/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
# pyright: strict
from __future__ import annotations

import logging
import typing
import warnings
from types import FunctionType, UnionType
from uuid import NAMESPACE_URL, uuid4, uuid5

import structlog
from typing_extensions import Any, Literal, Self, TypeAlias, TypeVar, cast, overload

from lenskit.data import Dataset
Expand Down Expand Up @@ -51,7 +51,7 @@
"topn_pipeline",
]

_log = logging.getLogger(__name__)
_log = structlog.stdlib.get_logger(__name__)

# common type var for quick use
T = TypeVar("T")
Expand Down Expand Up @@ -707,6 +707,7 @@ def run_all(self, *nodes: str | Node[Any], **kwargs: object) -> PipelineState:

runner = PipelineRunner(self, kwargs)
node_list = [self.node(n) for n in nodes]
_log.debug("running pipeline", name=self.name, nodes=[n.name for n in node_list])
if not node_list:
node_list = self.nodes

Expand Down
2 changes: 1 addition & 1 deletion lenskit/lenskit/pipeline/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def candidate_selector(self, sel: Component):
self._selector = sel

def predicts_ratings(
self, transform: Component | None = None, *, fallback: Component | None = None
self, *, transform: Component | None = None, fallback: Component | None = None
):
"""
Specify that this pipeline will predict ratings, optionally providing a
Expand Down
5 changes: 5 additions & 0 deletions lenskit/lenskit/pipeline/components.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from __future__ import annotations

import inspect
import json
from abc import abstractmethod
from importlib import import_module
from types import FunctionType
Expand Down Expand Up @@ -214,6 +215,10 @@ def __call__(self, **kwargs: Any) -> COut:
"""
...

def __repr__(self) -> str:
params = json.dumps(self.get_config(), indent=2)
return f"<{self.__class__.__name__} {params}>"


def instantiate_component(
comp: str | type | FunctionType, config: dict[str, Any] | None
Expand Down
7 changes: 4 additions & 3 deletions lenskit/lenskit/pipeline/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def run(self, node: Node[Any], *, required: bool = True) -> Any:
elif status == "failed": # pragma: nocover
raise RuntimeError(f"{node} previously failed")

trace(self.log, "processing node %s", node)
trace(self.log, "processing node", node=node.name)
self.status[node.name] = "in-progress"
try:
self._run_node(node, required)
Expand Down Expand Up @@ -96,6 +96,7 @@ def _inject_input(self, name: str, types: set[type] | None, required: bool) -> N
if val is not None and types and not is_compatible_data(val, *types):
raise TypeError(f"invalid data for input {name} (expected {types}, got {type(val)})")

trace(self.log, "injecting input", name=name, value=val)
self.state[name] = val

def _run_component(
Expand All @@ -107,7 +108,7 @@ def _run_component(
required: bool,
) -> None:
in_data = {}
log = self.log.bind(component=name)
log = self.log.bind(node=name)
trace(log, "processing inputs")
for iname, itype in inputs.items():
# look up the input wiring for this parameter input
Expand Down Expand Up @@ -158,7 +159,7 @@ def _run_component(

in_data[iname] = ival

trace(log, "running component")
trace(log, "running component", component=comp)
self.state[name] = comp(**in_data)


Expand Down
31 changes: 31 additions & 0 deletions lenskit/tests/basic/test_composite.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Licensed under the MIT license, see LICENSE.md for details.
# SPDX-License-Identifier: MIT

import logging
import pickle
from typing import Any

Expand All @@ -20,9 +21,13 @@
from lenskit.data import Dataset
from lenskit.data.items import ItemList
from lenskit.data.types import ID
from lenskit.operations import predict, score
from lenskit.pipeline import Pipeline
from lenskit.pipeline.common import RecPipelineBuilder
from lenskit.util.test import ml_ds, ml_ratings # noqa: F401

_log = logging.getLogger(__name__)


def test_fallback_fill_missing(ml_ds: Dataset):
pipe = Pipeline()
Expand Down Expand Up @@ -51,3 +56,29 @@ def test_fallback_fill_missing(ml_ds: Dataset):

assert scores[:2] == approx(known(2, ItemList(item_ids=items[:2])).scores())
assert scores[2:] == approx(bias(2, ItemList(item_ids=items[2:])).scores())


def test_fallback_double_bias(rng: np.random.Generator, ml_ds: Dataset):
builder = RecPipelineBuilder()
builder.scorer(BiasScorer(damping=50))
builder.predicts_ratings(fallback=BiasScorer(damping=0))
pipe = builder.build("double-bias")

_log.info("pipeline configuration: %s", pipe.get_config().model_dump_json(indent=2))

pipe.train(ml_ds)

for user in rng.choice(ml_ds.users.ids(), 100):
items = rng.choice(ml_ds.items.ids(), 500)
scores = score(pipe, user, items)
scores = scores.scores()
assert scores is not None
assert not np.any(np.isnan(scores))

preds = predict(pipe, user, items)

preds = preds.scores()
assert preds is not None
assert not np.any(np.isnan(preds))

assert scores == approx(preds)

0 comments on commit 6dbdb1a

Please sign in to comment.