Fix bug in cusum prediction

Summary: We encountered an issue with the CUSUM prediction when we generated only one chunk of prediction because we were attempting to extract frequency information. The CUSUM method reorganizes data into chunks and then reconstructs it based on frequency, but if we only have one chunk, there is no frequency information to extract. In reality, we can obtain frequency information from the data, but we lose it after reconstruction. Reconstruction is necessary to split the data into chunks for prediction purposes. To address this issue, we provided a fix that generates a separate data point using the same logic. The problem has a blast radius of 637 requests per day (as seen in DoD and detector gadget). Here's an example request and response: https://fburl.com/thrift_fiddle/8tnn9q1a The issue we're facing is that we're not receiving the proper message here. The issue only occurs for the last chunk when backtesting or creating a model, so it doesn't completely block users. However, it does provide noise for us and slightly affects our metrics. To address this issue, I obtained the frequency from historical data for this specific case, and it is now working correctly. I kept the old logic for all other cases to reduce the blast radius of the diff (to only failed cases) in case something goes wrong. this example request and responce https://fburl.com/thrift_fiddle/8tnn9q1a the problem we don't getting proper message here IMPORTANT we still will have problem if predicted data provided empty. Need to invest more to identify the correct behaivour there. Reviewed By: islijepcevic Differential Revision: D63729249 fbshipit-source-id: d7bb4300342919b201639363fcdd31c7bf9b1cc3
facebookresearch · Oct 17, 2024 · 16df5c3 · 16df5c3
1 parent 40aab6d
commit 16df5c3
Show file tree

Hide file tree

Showing 2 changed files with 119 additions and 7 deletions.
diff --git a/kats/detectors/cusum_model.py b/kats/detectors/cusum_model.py
@@ -852,6 +852,7 @@ def fit_predict(
                 predict_results.score,
                 predict_results.absolute_change,
                 historical_data.value.name,
+                freq_historical=frequency,
             )
             score_tsd.extend(
                 score_tsd_vec,
@@ -984,16 +985,28 @@ def _reorganize_back(
         scores: TimeSeriesData,
         magnitude_ts: TimeSeriesData,
         name: str,
+        freq_historical: Optional[pd.Timedelta] = None,
     ) -> Tuple[TimeSeriesData, TimeSeriesData]:
         anom_scores_val_array = np.asarray(scores.value)
         anom_mag_val_array = np.asarray(magnitude_ts.value)
-        freq = scores.time[1] - scores.time[0]
-        time_need = pd.date_range(
-            start=scores.time.iloc[0],
-            end=None,
-            periods=anom_scores_val_array.shape[0] * anom_scores_val_array.shape[1],
-            freq=freq,
-        )
+
+        if len(scores.time) == 0:
+            # empty time range
+            time_need = pd.date_range(start=0, end=0, periods=0)
+        else:
+            freq = freq_historical
+            if len(scores.time) > 1:
+                freq = scores.time[1] - scores.time[0]
+            elif freq == None:
+                assert ValueError(
+                    "CUSUM prediction error, get not enough data to infer frequency"
+                )
+            time_need = pd.date_range(
+                start=scores.time.iloc[0],
+                end=None,
+                periods=anom_scores_val_array.shape[0] * anom_scores_val_array.shape[1],
+                freq=freq,
+            )
 
         anom_scores_val_1d = pd.Series(
             anom_scores_val_array.T.reshape([-1]),

diff --git a/kats/tests/detectors/test_cusum_model.py b/kats/tests/detectors/test_cusum_model.py
@@ -1158,6 +1158,105 @@ def test_vectorized_true_results_irregular_granularity(self) -> None:
             (d.vectorized_trans_flag, d1.vectorized_trans_flag), (False, False)
         )
 
+    def test_vectorized_small_prediction_data_cases(self) -> None:
+        """
+        Test cases when the prediction data is one chank of scan window
+        we still have to return some results
+
+        Calculation to get one chank. Basically we need scan window 2 times more then frequency to get step window equal to frequence, which lead one chank prediction
+        step_window = scan_window/2 = 172800/2 = 86400
+        n_hist_win_pts = historical_window/freq = 604800/86400 = 7
+        multi_ts_len = (historical_window+step_window)/freq = (604800+86400)/86400 = 8
+        n_step_win_pts =  multi_ts_len - n_hist_win_pts = 1
+        so we are predicting for 1 point by the end of this calulation
+        """
+
+        ts = {
+            "1725353999": 167,
+            "1725440399": 77,
+            "1725526799": 144,
+            "1725613199": 123,
+            "1725699599": 142,
+            "1725785999": 132,
+            "1725872399": 287,
+            "1725958799": 213,
+            "1726045199": 91,
+            "1726131599": 312,
+            "1726217999": 196,
+            "1726304399": 80,
+            "1726390799": 217,
+            "1726477199": 210,
+            "1726563599": 297,
+            "1726649999": 120,
+            "1726736399": 294,
+            "1726822799": 93,
+            "1726909199": 304,
+            "1726995599": 355,
+            "1727081999": 83,
+            "1727168399": 151,
+            "1727254799": 137,
+            "1727341199": 289,
+            "1727427599": 80,
+            "1727513999": 79,
+            "1727600399": 191,
+        }
+        scanWindow = 172800
+        historyWindow = 604800
+        hist_data = TimeSeriesData()
+        data = TimeSeriesData(
+            time=pd.to_datetime(list(ts.keys()), unit="s"),
+            value=pd.Series(list(ts.values())),
+        )
+
+        d = CUSUMDetectorModel(
+            scan_window=scanWindow,
+            historical_window=historyWindow,
+            remove_seasonality=True,
+            score_func=CusumScoreFunction.z_score,
+            vectorized=False,
+        )
+
+        anom = d.fit_predict(data=data, historical_data=hist_data)
+        d1 = CUSUMDetectorModel(
+            scan_window=scanWindow,
+            historical_window=historyWindow,
+            remove_seasonality=True,
+            score_func=CusumScoreFunction.z_score,
+            vectorized=True,
+        )
+
+        anom1 = d1.fit_predict(data=data, historical_data=hist_data)
+
+        self.assertEqual(
+            (d.vectorized_trans_flag, d1.vectorized_trans_flag), (False, True)
+        )
+        # pyre-fixme[16]: `bool` has no attribute `sum`.
+        self.assertEqual((anom1.scores.time == anom.scores.time).sum(0), len(ts))
+        self.assertEqual(np.round(anom1.scores.value - anom.scores.value, 5).sum(0), 0)
+        self.assertEqual(
+            np.round(
+                anom1.anomaly_magnitude_ts.value - anom.anomaly_magnitude_ts.value, 5
+            ).sum(0),
+            0,
+        )
+        # We still have problem with empty data, here the test to show.
+
+        # d2 = CUSUMDetectorModel(
+        #     scan_window=3600 * 24 * 8,
+        #     historical_window=3600 * 24 * 10,
+        #     remove_seasonality=True,
+        #     score_func=CusumScoreFunction.z_score,
+        #     vectorized=True,
+        # )
+
+        # anom2 = d2.fit_predict(
+        #     data=TimeSeriesData(
+        #         time=pd.DatetimeIndex([]), value=pd.Series([], name=self.ts.value.name)
+        #     ),
+        #     historical_data=self.ts,
+        # )
+        # self.assertTrue(len(anom2.scores) == 0)
+
     def test_vectorized_true_seasonality_true_results(self) -> None:
         d = CUSUMDetectorModel(
             scan_window=3600 * 24 * 8,