From ff2030d8c24ab39821188b26b9d72a52595f37d3 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Fri, 6 Dec 2024 15:46:20 -0800 Subject: [PATCH] Add initial prototype for group average bounds code --- xcdat/temporal.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 0818c1d5..d69d5544 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -876,7 +876,7 @@ def _averager( if self._mode == "average": dv_avg = self._average(ds, data_var) elif self._mode in ["group_average", "climatology", "departures"]: - dv_avg = self._group_average(ds, data_var) + dv_avg, time_bnds = self._group_average(ds, data_var) # The original time dimension is dropped from the dataset because # it becomes obsolete after the data variable is averaged. When the @@ -885,8 +885,10 @@ def _averager( ds = ds.drop_dims(self.dim) ds[dv_avg.name] = dv_avg - if self._mode == "group_average": - ds = ds.bounds.add_missing_bounds(axes="T") + if self._mode in ["group_average", "climatology", "departures"]: + ds[time_bnds.name] = time_bnds + # FIXME: This is not working when time bounds are datetime and + # time is cftime. ds = center_times(ds) if keep_weights: @@ -1479,7 +1481,9 @@ def _average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: return dv - def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: + def _group_average( + self, ds: xr.Dataset, data_var: str + ) -> Tuple[xr.DataArray, xr.DataArray]: """Averages a data variable by time group. Parameters @@ -1491,7 +1495,7 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: Returns ------- - xr.DataArray + Tuple[xr.DataArray, xr.DataArray] The data variable averaged by time group. """ dv = _get_data_var(ds, data_var) @@ -1500,9 +1504,9 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: # values. self._labeled_time = self._label_time_coords(dv[self.dim]) dv = dv.assign_coords({self.dim: self._labeled_time}) + time_bounds = ds.bounds.get_bounds("T", var_key=data_var) if self._weighted: - time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) # Weight the data variable. @@ -1526,6 +1530,25 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: else: dv = self._group_data(dv).mean() + """I think we'll need to collect the bounds for each group, (e.g., group_bounds_array = [("2000-01-01 00:00", "2000-01-02 00:00"), ("2000-01-02 00:00", "2000-01-03 00:00"), ..., ("2000-01-31 00:00", "2000-02-01 00:00")] and then take the min of the lower bound and the max of the upper bound (i.e., group_bnd = [np.min(groups_bound_array[:, 0]), np.max(group_bounds_array[:, 1])]. + """ + # Create time bounds for each group + time_bounds_grouped = self._group_data(time_bounds) + group_bounds = [] + + for _, group_data in time_bounds_grouped: + group_times = group_data.values + group_bnds = (np.min(group_times[:, 0]), np.max(group_times[:, 1])) + group_bounds.append(group_bnds) + + # Convert group bounds to DataArray + da_bnds = xr.DataArray( + data=np.array(group_bounds), + dims=[self.dim, "bnds"], + coords={self.dim: dv[self.dim].values}, + name=f"{self.dim}_bnds", + ) + # After grouping and aggregating, the grouped time dimension's # attributes are removed. Xarray's `keep_attrs=True` option only keeps # attributes for data variables and not their coordinates, so the @@ -1535,7 +1558,7 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: dv = self._add_operation_attrs(dv) - return dv + return dv, da_bnds def _get_weights(self, time_bounds: xr.DataArray) -> xr.DataArray: """Calculates weights for a data variable using time bounds.