fixing

AIStream-Peelout · Sep 22, 2024 · 68c456b · 68c456b
1 parent 9493c84
commit 68c456b
Show file tree

Hide file tree

Showing 14 changed files with 71 additions and 34 deletions.
diff --git a/.flake8 b/.flake8
@@ -1,4 +1,4 @@
 [flake8]
 max_line_length=122
-ignore=E305,W504,E126,E401,E721,E722
+ignore=E305,W504,E126,E401,E721,F722
 max-complexity=19
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -5,3 +5,9 @@ repos:
       - id: trailing-whitespace
       - id: end-of-file-fixer
       - id: check-yaml
+
+  - repo: https://github.com/hhatto/autopep8
+    rev: v2.0.4
+    hooks:
+      - id: autopep8
+        args: [--in-place, --aggressive, --aggressive, --max-line-length=122]
diff --git a/flood_forecast/basic/d_n_linear.py b/flood_forecast/basic/d_n_linear.py
@@ -6,6 +6,7 @@ class NLinear(nn.Module):
     """
     Normalization-Linear
     """
+
     def __init__(self, forecast_history: int, forecast_length: int, enc_in=128, individual=False, n_targs=1):
         super(NLinear, self).__init__()
         self.seq_len = forecast_history
@@ -43,6 +44,7 @@ class MovingAvg(nn.Module):
     """
     Moving average block to highlight the trend of time series
     """
+
     def __init__(self, kernel_size, stride):
         super(MovingAvg, self).__init__()
         self.kernel_size = kernel_size
@@ -62,6 +64,7 @@ class SeriesDecomp(nn.Module):
     """
     Series decomposition block
     """
+
     def __init__(self, kernel_size):
         super(SeriesDecomp, self).__init__()
         self.moving_avg = MovingAvg(kernel_size, stride=1)
@@ -76,6 +79,7 @@ class DLinear(nn.Module):
     """
     Decomposition-Linear
     """
+
     def __init__(self, forecast_history: int, forecast_length: int, individual, enc_in: int, n_targs=1):
         """Code from
 

diff --git a/flood_forecast/custom/custom_opt.py b/flood_forecast/custom/custom_opt.py
@@ -317,6 +317,7 @@ class NegativeLogLikelihood(torch.nn.Module):
     target -> True y
     output -> predicted distribution
     """
+
     def __init__(self):
         super().__init__()
 

diff --git a/flood_forecast/da_rnn/model.py b/flood_forecast/da_rnn/model.py
@@ -37,7 +37,6 @@ def __init__(
             gru_lstm=True,
             probabilistic=False,
             final_act=None):
-
         """For model benchmark information see link on side https://rb.gy/koozff
 
         :param n_time_series: Number of time series present in input

diff --git a/flood_forecast/deployment/inference.py b/flood_forecast/deployment/inference.py
@@ -14,8 +14,8 @@
 
 
 class InferenceMode(object):
-    def __init__(self, forecast_steps: int, num_prediction_samples: int, model_params, csv_path: Union[str, pd.DataFrame], weight_path,
-                 wandb_proj: str = None, torch_script=False):
+    def __init__(self, forecast_steps: int, num_prediction_samples: int, model_params,
+                 csv_path: Union[str, pd.DataFrame], weight_path, wandb_proj: str = None, torch_script=False):
         """Class to handle inference for models,
 
         :param forecast_steps: Number of time-steps to forecast (doesn't have to be hours)

diff --git a/flood_forecast/evaluator.py b/flood_forecast/evaluator.py
@@ -284,12 +284,12 @@ def infer_on_torch_model(
         df_prediction_arr_1 = []
 
         for i in range(0, len(vals[0])):
-            df_train_and_test, end_tensor, history_length, forecast_start_idx, csv_test_loader, df_prediction = handle_later_ev(model, vals[0][i][1], vals[1][i], model.params, csv_series_id_loader, multi_params, vals[0][i][2], vals[0][i][0], datetime_start=datetime_start) # noqa
+            df_train_and_test, end_tensor, history_length, forecast_start_idx, csv_test_loader, df_prediction = handle_later_ev(model, vals[0][i][1], vals[1][i], model.params, csv_series_id_loader, multi_params, vals[0][i][2], vals[0][i][0], datetime_start=datetime_start)  # noqa
             df_train_and_test_arr.append(df_train_and_test)
             end_tensor_arr.append(end_tensor)
             forecast_start_idx_arr.append(forecast_start_idx)
             df_prediction_arr_1.append(df_prediction)
-        return df_train_and_test_arr, end_tensor_arr, history_length, forecast_start_idx_arr, csv_test_loader, df_prediction_arr_1 # noqa
+        return df_train_and_test_arr, end_tensor_arr, history_length, forecast_start_idx_arr, csv_test_loader, df_prediction_arr_1  # noqa
     else:
         csv_test_loader = CSVTestLoader(
             test_csv_path,
@@ -419,7 +419,7 @@ def handle_evaluation_series_loader(csv_series_id_loader: SeriesIDTestLoader, mo
             multi_params=1
         )
         end_tenor_arr.append(end_tensor)
-    return data, end_tenor_arr, model.params["dataset_params"]["forecast_history"], forecast_start_idx, csv_series_id_loader, [] # noqa
+    return data, end_tenor_arr, model.params["dataset_params"]["forecast_history"], forecast_start_idx, csv_series_id_loader, []  # noqa
 
 
 def handle_ci_multi(prediction_samples: torch.Tensor, csv_test_loader: CSVTestLoader, multi_params: int,

diff --git a/flood_forecast/multi_models/crossvivit.py b/flood_forecast/multi_models/crossvivit.py
@@ -171,7 +171,6 @@ def forward(
         :param src: Source sequence. By this point the shape of the code will be
         :type src: Float[torch.Tensor, "batch_t_steps variable_sequence_length model_dim"]
         :param src_pos_emb: Positional embedding of source sequence's tokens of shape [batch_t_steps, variable_sequence_length, model_dim/2]
-
         """
 
         attention_scores = {}

diff --git a/flood_forecast/preprocessing/pytorch_loaders.py b/flood_forecast/preprocessing/pytorch_loaders.py
@@ -234,7 +234,7 @@ def __getitem__(self, idx: int) -> Tuple[Dict, Dict]:
                 idx2 = va[self.series_id_col].iloc[0]
                 va_returned = va[va.columns.difference([self.series_id_col], sort=False)]
                 t = torch.Tensor(va_returned.iloc[idx: self.forecast_history + idx].values)[:, 1:]
-                targ = torch.Tensor(va_returned.iloc[targ_start_idx: targ_start_idx + self.forecast_length].to_numpy())[:, 1:] # noqa
+                targ = torch.Tensor(va_returned.iloc[targ_start_idx: targ_start_idx + self.forecast_length].to_numpy())[:, 1:]  # noqa
                 src_list[self.unique_dict[idx2]] = t
                 targ_list[self.unique_dict[idx2]] = targ
             return src_list, targ_list
@@ -355,6 +355,7 @@ def __len__(self) -> int:
             len(self.df.index) - self.forecast_history - self.forecast_total - 1
         )
 
+
 class AEDataloader(CSVDataLoader):
     def __init__(
             self,
@@ -433,7 +434,7 @@ def __init__(self, params: Dict, n_classes: int = 2):
         :param params: The standard dictionary for a dataloader (see CSVDataLoader)
         :type params: Dict
         :param n_classes: The number of classes in the problem
-        """ # noqa
+        """  # noqa
         self.n_classes = n_classes
         params["forecast_history"] = params["sequence_length"]
         params["no_scale"] = True
@@ -455,7 +456,7 @@ def __getitem__(self, idx: int):
         targ_labs = torch.zeros(self.n_classes)
         casted_shit = int(targ.data.tolist())
         if casted_shit > self.n_classes:
-            raise ValueError("The class " + str(casted_shit) + " is greater than the number of classes " + str(self.n_classes)) # noqa
+            raise ValueError("The class " + str(casted_shit) + " is greater than the number of classes " + str(self.n_classes))  # noqa
         targ_labs[casted_shit] = 1
         return src.float(), targ_labs.float().unsqueeze(0)
 
@@ -624,7 +625,7 @@ def get_item_classification(self, idx: int):
         targ_labs = torch.zeros(self.n_classes)
         casted_shit = int(targ.data.tolist())
         if casted_shit > self.n_classes - 1:  # -1 because counting starts at zero
-            raise ValueError("The class " + str(casted_shit) + " is greater than the number of classes " + str(self.n_classes)) # noqa
+            raise ValueError("The class " + str(casted_shit) + " is greater than the number of classes " + str(self.n_classes))  # noqa
         targ_labs[casted_shit] = 1
         return src.float(), targ_labs.float().unsqueeze(0)
 
@@ -669,7 +670,7 @@ def __init__(self, series_id_col: str, main_params: dict, return_method: str, fo
         super().__init__(series_id_col, main_params, return_method, return_all)
         print("forecast_total is: " + str(forecast_total))
         self.forecast_total = forecast_total
-        self.csv_test_loaders = [CSVTestLoader(loader_1, forecast_total, **main_params) for loader_1 in self.df_orig_list] # noqa
+        self.csv_test_loaders = [CSVTestLoader(loader_1, forecast_total, **main_params) for loader_1 in self.df_orig_list]  # noqa
 
     def get_from_start_date_all(self, forecast_start: datetime, series_id: int = None):
         res = []

diff --git a/flood_forecast/pytorch_training.py b/flood_forecast/pytorch_training.py
@@ -93,7 +93,6 @@ def train_transformer_style(
         forward_params: Dict = {},
         model_filepath: str = "model_save",
         class2=False) -> None:
-
     """Function to train any PyTorchForecast model
 
     :param model:  A properly wrapped PyTorchForecast model

diff --git a/flood_forecast/trainer.py b/flood_forecast/trainer.py
@@ -13,6 +13,7 @@
     plot_df_test_with_confidence_interval,
     plot_df_test_with_probabilistic_confidence_interval)
 
+
 def handle_model_evaluation1(test_acc, params: Dict) -> None:
     """Utility function to help handle model evaluation. Primarily used at the moment for forecasting models.
 
@@ -28,7 +29,7 @@ def handle_model_evaluation1(test_acc, params: Dict) -> None:
     forecast_start_idx = test_acc[2]
     df_prediction_samples = test_acc[3]
     mae = (df_train_and_test.loc[forecast_start_idx:, "preds"] -
-            df_train_and_test.loc[forecast_start_idx:, params["dataset_params"]["target_col"][0]]).abs()
+           df_train_and_test.loc[forecast_start_idx:, params["dataset_params"]["target_col"][0]]).abs()
     inverse_mae = 1 / mae
     i = 0
     for df in df_prediction_samples:
@@ -71,6 +72,7 @@ def handle_model_evaluation1(test_acc, params: Dict) -> None:
                 name=relevant_col))
     wandb.log({"test_plot_all": test_plot_all})
 
+
 def handle_core_eval(trained_model, params: Dict, model_type: str):
     """_summary_
 
@@ -89,8 +91,8 @@ def handle_core_eval(trained_model, params: Dict, model_type: str):
         params["inference_params"],
         {})
     if params["dataset_params"]["class"] == "SeriesIDLoader":
-       data = test_acc[1]
-       for i in range(len(data)):
+        data = test_acc[1]
+        for i in range(len(data)):
             tuple_for_eval = (test_acc[0][i], test_acc[1][i], test_acc[2][i], test_acc[3][i])
             handle_model_evaluation1(tuple_for_eval, params)
     else:
@@ -146,12 +148,26 @@ def train_function(model_type: str, params: Dict) -> PyTorchForecast:
                 trained_model.params["inference_params"]["dataset_params"] = trained_model.params["dataset_params"].copy()
                 del trained_model.params["inference_params"]["dataset_params"]["class"]
                 # noqa: F501
-                trained_model.params["inference_params"]["dataset_params"]["interpolate_param"] = trained_model.params["inference_params"]["dataset_params"].pop("interpolate")
-                trained_model.params["inference_params"]["dataset_params"]["scaling"] = trained_model.params["inference_params"]["dataset_params"].pop("scaler")
+                trained_model.params["inference_params"]["dataset_params"]["interpolate_param"] = trained_model.params["inference_params"]["dataset_params"].pop(
+                    "interpolate")
+                trained_model.params["inference_params"]["dataset_params"]["scaling"] = trained_model.params["inference_params"]["dataset_params"].pop(
+                    "scaler")
                 if "feature_param" in trained_model.params["dataset_params"]:
-                    trained_model.params["inference_params"]["dataset_params"]["feature_params"] = trained_model.params["inference_params"]["dataset_params"].pop("feature_param")
-                delete_params = ["num_workers", "pin_memory", "train_start", "train_end", "valid_start", "valid_end", "test_start", "test_end",
-                                "training_path", "validation_path", "test_path", "batch_size"]
+                    trained_model.params["inference_params"]["dataset_params"]["feature_params"] = trained_model.params["inference_params"]["dataset_params"].pop(
+                        "feature_param")
+                delete_params = [
+                    "num_workers",
+                    "pin_memory",
+                    "train_start",
+                    "train_end",
+                    "valid_start",
+                    "valid_end",
+                    "test_start",
+                    "test_end",
+                    "training_path",
+                    "validation_path",
+                    "test_path",
+                    "batch_size"]
                 for param in delete_params:
                     if param in trained_model.params["inference_params"]["dataset_params"]:
                         del trained_model.params["inference_params"]["dataset_params"][param]
@@ -168,21 +184,23 @@ def train_function(model_type: str, params: Dict) -> PyTorchForecast:
                                                                                            dataset_params)["scaling"]
             params["inference_params"]["dataset_params"].pop('scaler_params', None)
         # TODO Move to other func
-        if params["dataset_params"]["class"] != "GeneralClassificationLoader" and params["dataset_params"]["class"] !="VariableSequenceLength":
+        if params["dataset_params"]["class"] != "GeneralClassificationLoader" and params["dataset_params"]["class"] != "VariableSequenceLength":
             handle_core_eval(trained_model, params, model_type)
 
     else:
         raise Exception("Please supply valid model type for forecasting or classification")
     return trained_model
 
+
 def correct_stupid_sklearn_error(training_conf: Dict) -> Dict:
     """Sklearn for whatever reason decided to only allow scaler params in the form of tuples
     this was stupid so now we have to convert JSON list to tuple.
 
     :param scaling_params: A list of the scaling params
     :type training_conf: Dict
     """
-    training_conf["dataset_params"]["scaler_params"]["feature_range"] = tuple(training_conf["dataset_params"]["scaler_params"]["feature_range"])
+    training_conf["dataset_params"]["scaler_params"]["feature_range"] = tuple(
+        training_conf["dataset_params"]["scaler_params"]["feature_range"])
     if "dataset_params" in training_conf["inference_params"]:
         del training_conf["inference_params"]["dataset_params"]
         print("Fixed dumbass sklearn errors morons should've never changed it")

diff --git a/flood_forecast/transformer_xl/attn.py b/flood_forecast/transformer_xl/attn.py
@@ -80,8 +80,8 @@ def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
         kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None])
         x = (
             (
-                ((queries @ kv) * normalizer_row[:, :, :, None])
-                * normalizer_row_refine[:, :, :, None]
+                ((queries @ kv) * normalizer_row[:, :, :, None]) *
+                normalizer_row_refine[:, :, :, None]
             )
             .transpose(1, 2)
             .contiguous()
@@ -167,8 +167,8 @@ def flash_attention_forward(self, Q, K, V, mask=None):
 
                 mi_new = torch.maximum(m_block_ij, mi)
                 li_new = (
-                    torch.exp(mi - mi_new) * li
-                    + torch.exp(m_block_ij - mi_new) * l_block_ij
+                    torch.exp(mi - mi_new) * li +
+                    torch.exp(m_block_ij - mi_new) * l_block_ij
                 )
 
                 O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + (

diff --git a/flood_forecast/transformer_xl/data_embedding.py b/flood_forecast/transformer_xl/data_embedding.py
@@ -244,7 +244,8 @@ def __init__(self, channels: int):
         inv_freq = 1.0 / (10000 ** (torch.arange(0, self.channels, 2).float() / self.channels))
         self.register_buffer("inv_freq", inv_freq)
 
-    def forward(self, coords: Float[torch.Tensor, "batch_size x y channels"]) -> Float[torch.Tensor, "batch_size height width channels"]:
+    def forward(self, coords: Float[torch.Tensor, "batch_size x y channels"]
+                ) -> Float[torch.Tensor, "batch_size height width channels"]:
         """
         Forward pass of the PositionalEncoding2D module.
 
@@ -285,9 +286,10 @@ def forward(self, coords: Float[torch.Tensor, "batch_size x y channels"]) -> Flo
         emb = torch.zeros((batch_size, height, width, self.channels * 2),
                           device=coords.device).type(coords.type())
         emb[:, :, :, :self.channels] = emb_x
-        emb[:, :, :, self.channels:2*self.channels] = emb_y
+        emb[:, :, :, self.channels:2 * self.channels] = emb_y
         return emb
 
+
 class NeRF_embedding(nn.Module):
     def __init__(self, n_layers: int = 5):
         super().__init__()