return masked anomaly scores

Summary: Parameter to return mask of test statistic (anomaly score) applied to critical value (threshold). Differential Revision: D68624041 fbshipit-source-id: 88e3a42686f01214830328408ec5666b7bcf7015
facebookresearch · Jan 28, 2025 · 08b5d52 · 08b5d52
1 parent 1aad463
commit 08b5d52
Show file tree

Hide file tree

Showing 2 changed files with 49 additions and 5 deletions.
diff --git a/kats/detectors/interval_detector.py b/kats/detectors/interval_detector.py
@@ -567,6 +567,7 @@ def fit_predict(
         interval_padding: int = 30,
         interval_units: str = "m",
         r_tol: float = 0.1,
+        mask_scores: bool = False,
         **kwargs: Any,
     ) -> AnomalyResponse:
         """Fit and predict on a Interval based AB test on time series data.
@@ -594,10 +595,11 @@ def fit_predict(
             r_tol: Relative tolerance used for automatic assignment to duration property.
                 If duration is `None`, then a value is automatically assigned such that
                 alpha is corrected to be no greater than alpha * (1 + r_tol).
+            mask_scores: If True the returned scores will be masked by applying them to the critical value
 
         Returns:
             The results of the Interval based AB test. Including:
-                - scores: Raw test statistic.
+                - scores: Raw test statistic, or mask of test statistic applied to critical value.
                 - predicted_ts: Boolean array of predictions that are formed from contiguous intervals.
                 - stat_sig: Statistical significance of `scores`.
                 - upper: Upper limit in the (1 - alpha) confidence interval.
@@ -657,10 +659,13 @@ def fit_predict(
         _stat_sig: pd.Series = self.test_result.stat_sig
         _upper: pd.Series = self.test_result.upper
         _lower: pd.Series = self.test_result.lower
+        scores = (
+            pd.Series(self._get_test_decision(ABIntervalType.REJECT))
+            if mask_scores
+            else self.test_result.test_statistic
+        )
         return AnomalyResponse(
-            scores=TimeSeriesData(
-                time=_data.time, value=self.test_result.test_statistic
-            ),
+            scores=TimeSeriesData(time=_data.time, value=scores),
             confidence_band=ConfidenceBand(
                 upper=TimeSeriesData(
                     time=_data.time,

diff --git a/kats/tests/detectors/test_interval_detector.py b/kats/tests/detectors/test_interval_detector.py
@@ -7,7 +7,7 @@
 
 from datetime import datetime, timedelta
 from operator import attrgetter
-from typing import List, Tuple, Type, Union
+from typing import cast, List, Tuple, Type, Union
 from unittest import TestCase
 
 import numpy as np
@@ -16,6 +16,7 @@
 
 from kats.consts import TimeSeriesData
 from kats.detectors.interval_detector import (
+    ABInterval,
     ar_1,
     IntervalDetectorModel,
     OneSampleProportionIntervalDetectorModel,
@@ -401,6 +402,44 @@ def test_e2e(self, test_statistic: TestStatistic) -> None:
         assert _predicted_ds.value.iloc[40:45].all()
         assert np.isclose(_stat_sig_ts.value.iloc[40:45].values, 0.0).all()
 
+    def test_mask_scores(self) -> None:
+        """
+        Test consecutively positive masked scores exceeding the given duration
+        count are equivalent to anomaly intervals
+        """
+        detector = TwoSampleProportionIntervalDetectorModel(
+            serialized_model=_SERIALIZED
+        )
+        duration = 5
+        detector.duration = duration
+
+        df = self.df.copy()
+        # not considered an anomaly. duration is not satisfied.
+        df.value_b.iloc[10 : 10 + duration - 1] = 1.0
+        # considered an anomaly.
+        df.value_b.iloc[40 : 40 + duration] = 1.0
+
+        anomaly_response = detector.fit_predict(TimeSeriesData(df), mask_scores=True)
+
+        anomaly_intervals = [
+            (interval.start_idx, interval.end_idx)
+            for interval in detector.anomaly_intervals
+        ]
+        reproduced_intervals = []
+        previous_score = False
+        start_index = -1
+        for i, score in enumerate(anomaly_response.scores.value):
+            if (
+                score is False
+                and previous_score is True
+                and i - start_index >= duration
+            ):
+                reproduced_intervals.append((start_index, i - 1))
+            elif score is True and previous_score is False:
+                start_index = i
+            previous_score = score
+        assert anomaly_intervals == reproduced_intervals
+
     def test_duration(self) -> None:
         """E2E test of the duration parameter."""
         df = self.df.copy()