diff --git a/lenskit/lenskit/basic/popularity.py b/lenskit/lenskit/basic/popularity.py index 421d0a31d..e7f7adb58 100644 --- a/lenskit/lenskit/basic/popularity.py +++ b/lenskit/lenskit/basic/popularity.py @@ -111,22 +111,19 @@ def train(self, data: Dataset, options: TrainingOptions = TrainingOptions()): return _log.info("counting time-bounded item popularity") - log = data.interaction_table(format="pandas") + log = data.interaction_table(format="pandas", original_ids=True) item_scores = None - if "timestamps" not in log.columns: + if "timestamp" not in log.columns: _log.warning("no timestamps in interaction log; falling back to PopScorer") super().train(data, options) return else: - counts = np.zeros(data.item_count, dtype=np.int32) start_timestamp = self.config.cutoff.timestamp() - item_nums = log["item_num"][log["timestamp"] > start_timestamp] - np.add.at(counts, item_nums, 1) + item_ids = log["item_id"][log["timestamp"] > start_timestamp] + counts = item_ids.value_counts().reindex(data.items.index, fill_value=0) - item_scores = super()._train_internal( - pd.Series(counts, index=data.items.index), - ) + item_scores = super()._train_internal(counts) self.items_ = data.items.copy() self.item_scores_ = np.require(item_scores.reindex(self.items_.ids()).values, np.float32) diff --git a/lenskit/tests/basic/test_time_bounded_popular.py b/lenskit/tests/basic/test_time_bounded_popular.py index ee5577de3..35841bbaf 100644 --- a/lenskit/tests/basic/test_time_bounded_popular.py +++ b/lenskit/tests/basic/test_time_bounded_popular.py @@ -19,8 +19,8 @@ two_days_ago = ts - timedelta(days=2) simple_df = pd.DataFrame( { - "item": [1, 2, 2, 3], - "user": [10, 12, 10, 13], + "item_id": [1, 2, 2, 3], + "user_id": [10, 12, 10, 13], "rating": [4.0, 3.0, 5.0, 2.0], "timestamp": [i.timestamp() for i in [ts, one_day_ago, one_day_ago, one_day_ago]], }