Skip to content

Commit

Permalink
return masked anomaly scores
Browse files Browse the repository at this point in the history
Summary: Parameter to return mask of test statistic (anomaly score) applied to critical value (threshold).

Differential Revision: D68624041

fbshipit-source-id: 88e3a42686f01214830328408ec5666b7bcf7015
  • Loading branch information
Rikin Shah authored and facebook-github-bot committed Jan 28, 2025
1 parent 1aad463 commit 08b5d52
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 5 deletions.
13 changes: 9 additions & 4 deletions kats/detectors/interval_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,7 @@ def fit_predict(
interval_padding: int = 30,
interval_units: str = "m",
r_tol: float = 0.1,
mask_scores: bool = False,
**kwargs: Any,
) -> AnomalyResponse:
"""Fit and predict on a Interval based AB test on time series data.
Expand Down Expand Up @@ -594,10 +595,11 @@ def fit_predict(
r_tol: Relative tolerance used for automatic assignment to duration property.
If duration is `None`, then a value is automatically assigned such that
alpha is corrected to be no greater than alpha * (1 + r_tol).
mask_scores: If True the returned scores will be masked by applying them to the critical value
Returns:
The results of the Interval based AB test. Including:
- scores: Raw test statistic.
- scores: Raw test statistic, or mask of test statistic applied to critical value.
- predicted_ts: Boolean array of predictions that are formed from contiguous intervals.
- stat_sig: Statistical significance of `scores`.
- upper: Upper limit in the (1 - alpha) confidence interval.
Expand Down Expand Up @@ -657,10 +659,13 @@ def fit_predict(
_stat_sig: pd.Series = self.test_result.stat_sig
_upper: pd.Series = self.test_result.upper
_lower: pd.Series = self.test_result.lower
scores = (
pd.Series(self._get_test_decision(ABIntervalType.REJECT))
if mask_scores
else self.test_result.test_statistic
)
return AnomalyResponse(
scores=TimeSeriesData(
time=_data.time, value=self.test_result.test_statistic
),
scores=TimeSeriesData(time=_data.time, value=scores),
confidence_band=ConfidenceBand(
upper=TimeSeriesData(
time=_data.time,
Expand Down
41 changes: 40 additions & 1 deletion kats/tests/detectors/test_interval_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

from datetime import datetime, timedelta
from operator import attrgetter
from typing import List, Tuple, Type, Union
from typing import cast, List, Tuple, Type, Union
from unittest import TestCase

import numpy as np
Expand All @@ -16,6 +16,7 @@

from kats.consts import TimeSeriesData
from kats.detectors.interval_detector import (
ABInterval,
ar_1,
IntervalDetectorModel,
OneSampleProportionIntervalDetectorModel,
Expand Down Expand Up @@ -401,6 +402,44 @@ def test_e2e(self, test_statistic: TestStatistic) -> None:
assert _predicted_ds.value.iloc[40:45].all()
assert np.isclose(_stat_sig_ts.value.iloc[40:45].values, 0.0).all()

def test_mask_scores(self) -> None:
"""
Test consecutively positive masked scores exceeding the given duration
count are equivalent to anomaly intervals
"""
detector = TwoSampleProportionIntervalDetectorModel(
serialized_model=_SERIALIZED
)
duration = 5
detector.duration = duration

df = self.df.copy()
# not considered an anomaly. duration is not satisfied.
df.value_b.iloc[10 : 10 + duration - 1] = 1.0
# considered an anomaly.
df.value_b.iloc[40 : 40 + duration] = 1.0

anomaly_response = detector.fit_predict(TimeSeriesData(df), mask_scores=True)

anomaly_intervals = [
(interval.start_idx, interval.end_idx)
for interval in detector.anomaly_intervals
]
reproduced_intervals = []
previous_score = False
start_index = -1
for i, score in enumerate(anomaly_response.scores.value):
if (
score is False
and previous_score is True
and i - start_index >= duration
):
reproduced_intervals.append((start_index, i - 1))
elif score is True and previous_score is False:
start_index = i
previous_score = score
assert anomaly_intervals == reproduced_intervals

def test_duration(self) -> None:
"""E2E test of the duration parameter."""
df = self.df.copy()
Expand Down

0 comments on commit 08b5d52

Please sign in to comment.