Skip to content

Commit

Permalink
fix: incorrect explained variance in CCA (#147)
Browse files Browse the repository at this point in the history
  • Loading branch information
nicrie authored Jan 28, 2024
1 parent 4cf84cf commit f825c68
Showing 1 changed file with 20 additions and 20 deletions.
40 changes: 20 additions & 20 deletions xeofs/models/cca.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,26 @@
The original code is licensed under the MIT License.
Copyright (c) 2020-2023 James Chapman
Copyright (c) 2020 onward James Chapman
"""

from abc import abstractmethod
from datetime import datetime
from typing import Sequence, List, Hashable
from typing_extensions import Self
from typing import Hashable, List, Sequence

import dask.array as da
import numpy as np
import xarray as xr
from scipy.linalg import eigh
from sklearn.base import BaseEstimator
from sklearn.utils.validation import FLOAT_DTYPES
from typing_extensions import Self

from xeofs.models import EOF

from .._version import __version__
from ..preprocessing.preprocessor import Preprocessor
from ..utils.data_types import DataObject, DataArray, DataList
from ..utils.data_types import DataArray, DataList, DataObject


def _check_parameter_number(parameter_name: str, parameter, n_views: int):
Expand Down Expand Up @@ -171,6 +172,7 @@ def _apply_pca(self, views: DataList):
view_transformed = []

for i, view in enumerate(views):
# NOTE: coslat weighting already happens in Preprocessor class
pca = EOF(n_modes=n_pca_modes[i], compute=self.compute)
pca.fit(view, dim=self.sample_name)
if self.compute:
Expand All @@ -196,20 +198,18 @@ def _apply_pca(self, views: DataList):
cum_exp_var_ratio.isel(mode=-1).item()
)
)
n_modes_keep = cum_exp_var_ratio.where(
cum_exp_var_ratio <= self.variance_fraction, drop=True
).size
if n_modes_keep == 0:
n_modes_keep += 1

# TODO: it's more convinient to work the common scaling of sklearn; provide additional parameter
# provide this parameter to transform method as well
scores = pca.scores().isel(mode=slice(0, n_modes_keep))
svals = pca.singular_values().isel(mode=slice(0, n_modes_keep))
scores = (
(scores * svals)
.rename({"mode": self.feature_name})
.transpose(self.sample_name, self.feature_name)
n_modes_keep = (
cum_exp_var_ratio.where(
cum_exp_var_ratio <= self.variance_fraction, drop=True
).size
+ 1
)
# Take at least 2 modes
n_modes_keep = max(n_modes_keep, 2)

scores = pca.scores(normalized=False).isel(mode=slice(0, n_modes_keep))
scores = scores.rename({"mode": self.feature_name}).transpose(
self.sample_name, self.feature_name
)
view_transformed.append(scores)
return view_transformed
Expand Down Expand Up @@ -328,7 +328,7 @@ def _fit_algorithm(self, views: List[DataArray]) -> Self:
# Transform the views using the loadings
transformed_views = [
xr.dot(view, loading, dims=self.feature_name)
for view, loading in zip(views, self.data["loadings"])
for view, loading in zip(self.data["input_data"], self.data["loadings"])
]
# Calculate the variance of each latent dimension in the transformed views
self.data["explained_variance"] = [
Expand All @@ -337,7 +337,7 @@ def _fit_algorithm(self, views: List[DataArray]) -> Self:

# Explained variance ratio
self.data["total_variance"] = [
view.var(self.sample_name).sum() for view in views
view.var(self.sample_name, ddof=1).sum() for view in views
]

# Calculate the explained variance ratio for each latent dimension for each view
Expand Down

0 comments on commit f825c68

Please sign in to comment.