diff --git a/docs/src/references/api/python/utils/clebsch-gordan.rst b/docs/src/references/api/python/utils/clebsch-gordan.rst new file mode 100644 index 000000000..7d1258348 --- /dev/null +++ b/docs/src/references/api/python/utils/clebsch-gordan.rst @@ -0,0 +1,5 @@ +Clebsch-Gordan products +======================= + +.. autoclass:: rascaline.utils.DensityCorrelations + :members: diff --git a/docs/src/references/api/python/utils/index.rst b/docs/src/references/api/python/utils/index.rst index 3d88c4da9..d713fdcb9 100644 --- a/docs/src/references/api/python/utils/index.rst +++ b/docs/src/references/api/python/utils/index.rst @@ -11,3 +11,4 @@ Utility functions and classes that extend the core usage of rascaline. radial-basis power-spectrum splines + clebsch-gordan diff --git a/docs/src/references/api/torch/utils/clebsch-gordan.rst b/docs/src/references/api/torch/utils/clebsch-gordan.rst new file mode 100644 index 000000000..a8cb5299a --- /dev/null +++ b/docs/src/references/api/torch/utils/clebsch-gordan.rst @@ -0,0 +1,5 @@ +Clebsch-Gordan products +======================= + +.. autoclass:: rascaline.torch.utils.DensityCorrelations + :members: diff --git a/docs/src/references/api/torch/utils/index.rst b/docs/src/references/api/torch/utils/index.rst index 1351283df..a2cf425eb 100644 --- a/docs/src/references/api/torch/utils/index.rst +++ b/docs/src/references/api/torch/utils/index.rst @@ -8,3 +8,4 @@ Utility functions and classes that extend the core usage of rascaline-torch :maxdepth: 1 power-spectrum + clebsch-gordan diff --git a/python/rascaline-torch/rascaline/torch/__init__.py b/python/rascaline-torch/rascaline/torch/__init__.py index 226cde60b..0d63d3242 100644 --- a/python/rascaline-torch/rascaline/torch/__init__.py +++ b/python/rascaline-torch/rascaline/torch/__init__.py @@ -9,12 +9,12 @@ _load_library() -from . import utils # noqa -from .calculator_base import CalculatorModule, register_autograd # noqa +from . import utils # noqa: E402, F401 +from .calculator_base import CalculatorModule, register_autograd # noqa: E402, F401 # don't forget to also update `rascaline/__init__.py` and # `rascaline/torch/calculators.py` when modifying this file -from .calculators import ( # noqa +from .calculators import ( # noqa: E402, F401 AtomicComposition, LodeSphericalExpansion, NeighborList, @@ -24,7 +24,7 @@ SphericalExpansion, SphericalExpansionByPair, ) -from .system import systems_to_torch # noqa +from .system import systems_to_torch # noqa: E402, F401 __all__ = [ diff --git a/python/rascaline-torch/rascaline/torch/utils.py b/python/rascaline-torch/rascaline/torch/utils.py new file mode 100644 index 000000000..1625581b4 --- /dev/null +++ b/python/rascaline-torch/rascaline/torch/utils.py @@ -0,0 +1,88 @@ +import importlib +import os +import sys +from typing import Any + +import torch +from metatensor.torch import Labels, LabelsEntry, TensorBlock, TensorMap + +import rascaline.utils + +from .calculator_base import CalculatorModule +from .system import System + + +_HERE = os.path.dirname(__file__) + + +# For details what is happening here take a look an `rascaline.torch.calculators`. + +# create the `_backend` module as an empty module +spec = importlib.util.spec_from_loader( + "rascaline.torch.utils._backend", + loader=None, +) +module = importlib.util.module_from_spec(spec) +# This module only exposes a handful of things, defined here. Any changes here MUST also +# be made to the `metatensor/operations/_classes.py` file, which is used in non +# TorchScript mode. +module.__dict__["Labels"] = Labels +module.__dict__["TensorBlock"] = TensorBlock +module.__dict__["TensorMap"] = TensorMap +module.__dict__["LabelsEntry"] = LabelsEntry +module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting +module.__dict__["torch_jit_annotate"] = torch.jit.annotate +module.__dict__["torch_jit_export"] = torch.jit.export +module.__dict__["TorchTensor"] = torch.Tensor +module.__dict__["TorchModule"] = torch.nn.Module +module.__dict__["TorchScriptClass"] = torch.ScriptClass +module.__dict__["Array"] = torch.Tensor +module.__dict__["CalculatorBase"] = CalculatorModule +module.__dict__["IntoSystem"] = System + + +def is_labels(obj: Any): + return isinstance(obj, Labels) + + +if os.environ.get("RASCALINE_IMPORT_FOR_SPHINX") is None: + is_labels = torch.jit.script(is_labels) + +module.__dict__["is_labels"] = is_labels + + +def check_isinstance(obj, ty): + if isinstance(ty, torch.ScriptClass): + # This branch is taken when `ty` is a custom class (TensorMap, …). since `ty` is + # an instance of `torch.ScriptClass` and not a class itself, there is no way to + # check if obj is an "instance" of this class, so we always return True and hope + # for the best. Most errors should be caught by the TorchScript compiler anyway. + return True + else: + assert isinstance(ty, type) + return isinstance(obj, ty) + + +# register the module in sys.modules, so future import find it directly +sys.modules[spec.name] = module + +# create a module named `rascaline.torch.utils` using code from +# `rascaline.utils` +spec = importlib.util.spec_from_file_location( + "rascaline.torch.utils", rascaline.utils.__file__ +) + +module = importlib.util.module_from_spec(spec) + + +cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake")) +""" +Path containing the CMake configuration files for the underlying C library +""" + +module.__dict__["cmake_prefix_path"] = cmake_prefix_path + +# override `rascaline.torch.utils` (the module associated with the current file) +# with the newly created module +sys.modules[spec.name] = module +spec.loader.exec_module(module) diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py deleted file mode 100644 index 3a73c2d20..000000000 --- a/python/rascaline-torch/rascaline/torch/utils/__init__.py +++ /dev/null @@ -1,13 +0,0 @@ -import os - -from .power_spectrum import PowerSpectrum - - -_HERE = os.path.dirname(__file__) - -cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake")) -""" -Path containing the CMake configuration files for the underlying C library -""" - -__all__ = ["PowerSpectrum"] diff --git a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py b/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py deleted file mode 100644 index 7bf9b9321..000000000 --- a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py +++ /dev/null @@ -1,99 +0,0 @@ -import importlib -import sys -from typing import List, Optional, Union - -import torch -from metatensor.torch import Labels, TensorBlock, TensorMap - -import rascaline.utils.power_spectrum - -from ..calculator_base import CalculatorModule as CalculatorBase -from ..system import System as IntoSystem - - -# For details what is happening here take a look an `rascaline.torch.calculators`. - -# Step 1: create te `_classes` module as an empty module -spec = importlib.util.spec_from_loader( - "rascaline.torch.utils.power_spectrum._classes", - loader=None, -) -module = importlib.util.module_from_spec(spec) -# This module only exposes a handful of things, defined here. Any changes here MUST also -# be made to the `metatensor/operations/_classes.py` file, which is used in non -# TorchScript mode. -module.__dict__["Labels"] = Labels -module.__dict__["TensorBlock"] = TensorBlock -module.__dict__["TensorMap"] = TensorMap -module.__dict__["CalculatorBase"] = CalculatorBase -module.__dict__["IntoSystem"] = IntoSystem - -# register the module in sys.modules, so future import find it directly -sys.modules[spec.name] = module - - -# Step 2: create a module named `rascaline.torch.utils.power_spectrum` using code from -# `rascaline.utils.power_spectrum` -spec = importlib.util.spec_from_file_location( - "rascaline.torch.utils.power_spectrum", - rascaline.utils.power_spectrum.__file__, -) - -module = importlib.util.module_from_spec(spec) -sys.modules[spec.name] = module -spec.loader.exec_module(module) - - -# Store the original class to avoid recursion problems -PowerSpectrumBase = module.PowerSpectrum - - -class PowerSpectrum(torch.nn.Module, PowerSpectrumBase): - """ - Torch version of the general power spectrum of one or of two calculators. - - The class provides :py:meth:`PowerSpectrum.forward` and the integration with - :py:class:`torch.nn.Module`. For more details see - :py:class:`rascaline.utils.PowerSpectrum`. - - :param calculator_1: first calculator - :param calculator_1: second calculator - :param species: List of `species_neighbor` to fill all blocks with. This option - might be useful when joining along the ``sample`` direction after computation. - If :py:obj:`None` blocks are filled with `species_neighbor` from all blocks. - :raises ValueError: If other calculators than - :py:class:`rascaline.SphericalExpansion` or - :py:class:`rascaline.LodeSphericalExpansion` are used. - :raises ValueError: If ``'max_angular'`` of both calculators is different - """ - - def __init__( - self, - calculator_1: CalculatorBase, - calculator_2: Optional[CalculatorBase] = None, - species: Optional[List[int]] = None, - ): - torch.nn.Module.__init__(self) - PowerSpectrumBase.__init__( - self, - calculator_1=calculator_1, - calculator_2=calculator_2, - species=species, - ) - - def forward( - self, - systems: Union[IntoSystem, List[IntoSystem]], - gradients: Optional[List[str]] = None, - use_native_system: bool = True, - ) -> TensorMap: - """forward just calls :py:meth:`PowerSpectrum.compute`""" - - return self.compute( - systems=systems, - gradients=gradients, - use_native_system=use_native_system, - ) - - -module.__dict__["PowerSpectrum"] = PowerSpectrum diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py new file mode 100644 index 000000000..9db042bd7 --- /dev/null +++ b/python/rascaline-torch/tests/utils/correlate_density.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +import io +import os +from typing import Any, List + +import ase.io +import metatensor.torch +import pytest +import torch +from metatensor.torch import Labels, TensorBlock, TensorMap # noqa + +import rascaline.torch +from rascaline.torch.utils.clebsch_gordan.correlate_density import DensityCorrelations + + +DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") + + +@torch.jit.script +def is_tensor_map(obj: Any): + return isinstance(obj, TensorMap) + + +is_tensor_map = torch.jit.script(is_tensor_map) + +SPHERICAL_EXPANSION_HYPERS = { + "cutoff": 2.5, + "max_radial": 3, + "max_angular": 3, + "atomic_gaussian_width": 0.2, + "radial_basis": {"Gto": {}}, + "cutoff_function": {"ShiftedCosine": {"width": 0.5}}, + "center_atom_weight": 1.0, +} + +SELECTED_KEYS = Labels( + names=["spherical_harmonics_l"], values=torch.tensor([1, 3]).reshape(-1, 1) +) + + +def h2o_isolated(): + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] + + +def spherical_expansion(frames: List[ase.Atoms]): + """Returns a rascaline SphericalExpansion""" + calculator = rascaline.torch.SphericalExpansion(**SPHERICAL_EXPANSION_HYPERS) + return calculator.compute(rascaline.torch.systems_to_torch(frames)) + + +# copy of def test_correlate_density_angular_selection( +@pytest.mark.parametrize("selected_keys", [None, SELECTED_KEYS]) +@pytest.mark.parametrize("skip_redundant", [True, False]) +def test_torch_script_correlate_density_angular_selection( + selected_keys: Labels, + skip_redundant: bool, +): + """ + Tests that the correct angular channels are output based on the specified + ``selected_keys``. + """ + frames = h2o_isolated() + nu_1 = spherical_expansion(frames) + correlation_order = 2 + corr_calculator = DensityCorrelations( + max_angular=SPHERICAL_EXPANSION_HYPERS["max_angular"] * correlation_order, + correlation_order=correlation_order, + angular_cutoff=None, + selected_keys=selected_keys, + skip_redundant=skip_redundant, + ) + + ref_nu_2 = corr_calculator.compute(nu_1) + scripted_corr_calculator = torch.jit.script(corr_calculator) + + # Test compute + scripted_nu_2 = scripted_corr_calculator.compute(nu_1) + assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) + assert metatensor.torch.allclose(scripted_nu_2, ref_nu_2) + + # Test compute_metadata + scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1) + assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2) + + +def test_jit_save_load(): + corr_calculator = DensityCorrelations( + max_angular=2, + correlation_order=2, + angular_cutoff=1, + ) + scripted_correlate_density = torch.jit.script(corr_calculator) + with io.BytesIO() as buffer: + torch.jit.save(scripted_correlate_density, buffer) + buffer.seek(0) + torch.jit.load(buffer) + + +def test_save_load(): + """Tests for saving and loading with cg_backend="python-dense", + which makes the DensityCorrelations object non-scriptable due to + a non-contiguous CG cache.""" + corr_calculator = DensityCorrelations( + max_angular=2, + correlation_order=2, + angular_cutoff=1, + cg_backend="python-dense", + ) + with io.BytesIO() as buffer: + torch.save(corr_calculator, buffer) + buffer.seek(0) + torch.load(buffer) diff --git a/python/rascaline/rascaline/utils/__init__.py b/python/rascaline/rascaline/utils/__init__.py index e3e8adc1b..b6bfe14ff 100644 --- a/python/rascaline/rascaline/utils/__init__.py +++ b/python/rascaline/rascaline/utils/__init__.py @@ -1,6 +1,6 @@ import os -from .clebsch_gordan import correlate_density, correlate_density_metadata # noqa +from .clebsch_gordan import DensityCorrelations # noqa from .power_spectrum import PowerSpectrum # noqa from .splines import ( # noqa AtomicDensityBase, diff --git a/python/rascaline/rascaline/utils/_backend.py b/python/rascaline/rascaline/utils/_backend.py new file mode 100644 index 000000000..8e614ed74 --- /dev/null +++ b/python/rascaline/rascaline/utils/_backend.py @@ -0,0 +1,61 @@ +from typing import Any, Union + +import numpy as np +from metatensor import Labels, LabelsEntry, TensorBlock, TensorMap + +from ..calculator_base import CalculatorBase +from ..systems import IntoSystem + + +def torch_jit_is_scripting(): + return False + + +def torch_jit_annotate(annotation, obj): + return obj + + +def torch_jit_export(func): + return func + + +def is_labels(obj: Any): + return isinstance(obj, Labels) + + +try: + from torch import ScriptClass as TorchScriptClass + from torch import Tensor as TorchTensor + from torch.nn import Module as TorchModule +except ImportError: + + class TorchTensor: + pass + + class TorchModule: + + def __call__(self, *arg, **kwargs): + return self.forward(*arg, **kwargs) + + class TorchScriptClass: + pass + + +Array = Union[np.ndarray, TorchTensor] + +__all__ = [ + "Array", + "CalculatorBase", + "IntoSystem", + "Labels", + "TensorBlock", + "TensorMap", + "TorchTensor", + "TorchModule", + "TorchScriptClass", + "LabelsEntry", + "torch_jit_is_scripting", + "torch_jit_annotate", + "torch_jit_export", + "is_labels", +] diff --git a/python/rascaline/rascaline/utils/_dispatch.py b/python/rascaline/rascaline/utils/_dispatch.py new file mode 100644 index 000000000..f1254b4fa --- /dev/null +++ b/python/rascaline/rascaline/utils/_dispatch.py @@ -0,0 +1,450 @@ +"""Helper functions to dispatch methods between numpy and torch. + +The functions are similar to those in metatensor-operations. Missing functions may +already exist there. Functions are ordered alphabetically. +""" + +import itertools +from typing import List, Optional, Union + +import numpy as np + + +try: + import torch + from torch import Tensor as TorchTensor +except ImportError: + + class TorchTensor: + pass + + +UNKNOWN_ARRAY_TYPE = ( + "unknown array type, only numpy arrays and torch tensors are supported" +) + + +def _check_all_torch_tensor(arrays: List[TorchTensor]): + for array in arrays: + if not isinstance(array, TorchTensor): + raise TypeError( + f"expected argument to be a torch.Tensor, but got {type(array)}" + ) + + +def _check_all_np_ndarray(arrays): + for array in arrays: + if not isinstance(array, np.ndarray): + raise TypeError( + f"expected argument to be a np.ndarray, but got {type(array)}" + ) + + +def concatenate(arrays: List[TorchTensor], axis: int): + """ + Concatenate a group of arrays along a given axis. + + This function has the same behavior as ``numpy.concatenate(arrays, axis)`` + and ``torch.concatenate(arrays, axis)``. + + Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to + :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any + axis index > 0 is valid. + """ + if isinstance(arrays[0], TorchTensor): + _check_all_torch_tensor(arrays) + return torch.concatenate(arrays, axis) + elif isinstance(arrays[0], np.ndarray): + _check_all_np_ndarray(arrays) + return np.concatenate(arrays, axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an uninitialized array, with the given ``shape``, and similar dtype, + device and other options as ``array``. + + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.empty_like(array, shape=shape)``. + """ + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + return torch.empty( + shape, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + return np.empty_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def list_to_array(array, data: List[List[int]]): + """Create an object from data with the same type as ``array``.""" + if isinstance(array, TorchTensor): + return torch.tensor(data) + elif isinstance(array, np.ndarray): + return np.array(data) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def matmul(a, b): + """Matrix product of two arrays.""" + if isinstance(a, TorchTensor): + _check_all_torch_tensor([b]) + return torch.matmul(a, b) + elif isinstance(a, np.ndarray): + _check_all_np_ndarray([b]) + return np.matmul(a, b) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def to_index_array(array): + """Returns an array that is suitable for indexing a dimension of + a different array. + After a few checks (int, 1D), this operation will convert the dtype to + torch.long (which is, in some torch versions, the only acceptable type + of index tensor). Numpy arrays are left unchanged. + """ + if len(array.shape) != 1: + raise ValueError("Index arrays must be 1D") + + if isinstance(array, TorchTensor): + if torch.is_floating_point(array): + raise ValueError("Index arrays must be integers") + return array.to(torch.long) + elif isinstance(array, np.ndarray): + if not np.issubdtype(array.dtype, np.integer): + raise ValueError("Index arrays must be integers") + return array + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def unique(array, axis: Optional[int] = None): + """Find the unique elements of an array.""" + if isinstance(array, TorchTensor): + return torch.unique(array, dim=axis) + elif isinstance(array, np.ndarray): + return np.unique(array, axis=axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): + """ + Create an array filled with zeros, with the given ``shape``, and similar + dtype, device and other options as ``array``. + + If ``shape`` is :py:obj:`None`, the array shape is used instead. + ``requires_grad`` is only used for torch tensors, and set the corresponding + value on the returned array. + + This is the equivalent to ``np.zeros_like(array, shape=shape)``. + """ + if isinstance(array, TorchTensor): + if shape is None: + shape = array.size() + + return torch.zeros( + shape, + dtype=array.dtype, + layout=array.layout, + device=array.device, + ).requires_grad_(requires_grad) + elif isinstance(array, np.ndarray): + if shape is None: + shape = array.shape + return np.zeros_like(array, shape=shape, subok=False) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def where(array): + """Return the indices where `array` is True. + + This function has the same behavior as ``np.where(array)``. + """ + if isinstance(array, TorchTensor): + return torch.where(array) + elif isinstance(array, np.ndarray): + return np.where(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def abs(array): + """ + Returns the absolute value of the elements in the array. + + It is equivalent of np.abs(array) and torch.abs(tensor) + """ + if isinstance(array, TorchTensor): + return torch.abs(array) + elif isinstance(array, np.ndarray): + return np.abs(array).astype(array.dtype) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def argsort(array): + """ + Returns the sorted arguments of the elements in the array. + + It is equivalent of np.argsort(array) and torch.argsort(tensor) + """ + if isinstance(array, TorchTensor): + return torch.argsort(array) + elif isinstance(array, np.ndarray): + return np.argsort(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def contiguous(array): + """ + Returns a contiguous array. + + It is equivalent of np.ascontiguousarray(array) and tensor.contiguous(). In + the case of numpy, C order is used for consistency with torch. As such, only + C-contiguity is checked. + """ + if isinstance(array, TorchTensor): + if array.is_contiguous(): + return array + return array.contiguous() + elif isinstance(array, np.ndarray): + if array.flags["C_CONTIGUOUS"]: + return array + return np.ascontiguousarray(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def to_int_list(array) -> List[int]: + if isinstance(array, TorchTensor): + # we need to do it this way because of + # https://github.com/pytorch/pytorch/issues/76295 + return array.to(dtype=torch.int64).tolist() + elif isinstance(array, np.ndarray): + return array.tolist() + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def int_range_like(min_val: int, max_val: int, like): + """ + Returns an array of integers from min to max, non-inclusive, based on the type of + `like` + + It is equivalent of np.arange(start, end) and torch.arange(start, end) for the given + array dtype and device. + """ + if isinstance(like, TorchTensor): + return torch.arange(min_val, max_val, dtype=torch.int64, device=like.device) + elif isinstance(like, np.ndarray): + return np.arange(min_val, max_val).astype(np.int64) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def int_array_like(int_list: Union[List[int], List[List[int]]], like): + """ + Converts the input list of int to a numpy array or torch tensor + based on the type of `like`. + """ + if isinstance(like, TorchTensor): + if torch.jit.isinstance(int_list, List[int]): + return torch.tensor(int_list, dtype=torch.int64, device=like.device) + else: + return torch.tensor(int_list, dtype=torch.int64, device=like.device) + elif isinstance(like, np.ndarray): + return np.array(int_list).astype(np.int64) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def double_array_like(int_list: List[float], like): + """ + Converts the input list of float to a numpy array or torch tensor + based on the array type of `like`. + """ + if isinstance(like, TorchTensor): + return torch.tensor(int_list, dtype=torch.float64, device=like.device) + elif isinstance(like, np.ndarray): + return np.array(int_list).astype(np.float64) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def bool_array_like(bool_list: List[bool], like): + """ + Converts the input list of bool to a numpy array or torch tensor + based on the type of `like`. + """ + if isinstance(like, TorchTensor): + return torch.tensor(bool_list, dtype=torch.bool, device=like.device) + elif isinstance(like, np.ndarray): + return np.array(bool_list).astype(bool) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def cartesian_prod(array1, array2): + """ + Imitates like itertools.product(array1, array2) + """ + if isinstance(array1, TorchTensor) and isinstance(array2, TorchTensor): + return torch.cartesian_prod(array1, array2) + elif isinstance(array1, np.ndarray) and isinstance(array2, np.ndarray): + # using itertools should be fastest way according to + # https://stackoverflow.com/a/28684982 + return np.array(list(itertools.product(array1, array2))) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def all(array, axis: Optional[int] = None): + """Test whether all array elements along a given axis evaluate to True. + + This function has the same behavior as + ``np.all(array,axis=axis)``. + """ + if isinstance(array, bool): + array = np.array(array) + if isinstance(array, list): + array = np.array(array) + + if isinstance(array, TorchTensor): + # torch.all has two implementation, and picks one depending if more than one + # parameter is given. The second one does not supports setting dim to `None` + if axis is None: + return torch.all(input=array) + else: + return torch.all(input=array, dim=axis) + elif isinstance(array, np.ndarray): + return np.all(a=array, axis=axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def max(array): + """ + Takes the maximun value of the array. + + This function has the same behavior as + ``np.max(array)`` or ``torch.max(array)``. + """ + if isinstance(array, TorchTensor): + return torch.max(array) + elif isinstance(array, np.ndarray): + return np.max(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def any(array): + """Test whether any array elements along a given axis evaluate to True. + + This function has the same behavior as + ``np.any(array)``. + """ + if isinstance(array, bool): + array = np.array(array) + if isinstance(array, list): + array = np.array(array) + if isinstance(array, TorchTensor): + return torch.any(array) + elif isinstance(array, np.ndarray): + return np.any(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def allclose( + a: TorchTensor, + b: TorchTensor, + rtol: float, + atol: float, + equal_nan: bool = False, +): + """Compare two arrays using ``allclose`` + + This function has the same behavior as + ``np.allclose(array1, array2, rtol, atol, equal_nan)``. + """ + if isinstance(a, TorchTensor): + _check_all_torch_tensor([b]) + return torch.allclose( + input=a, other=b, rtol=rtol, atol=atol, equal_nan=equal_nan + ) + elif isinstance(a, np.ndarray): + _check_all_np_ndarray([b]) + return np.allclose(a=a, b=b, rtol=rtol, atol=atol, equal_nan=equal_nan) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def swapaxes(array, axis0: int, axis1: int): + """Swaps axes of an array.""" + if isinstance(array, TorchTensor): + return torch.swapaxes(array, axis0, axis1) + elif isinstance(array, np.ndarray): + return np.swapaxes(array, axis0, axis1) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def conjugate(array): + """ + Conjugate the array + + This function has the same behavior as + ``np.conjugate(array)`` or ``torch.conj(array)``. + """ + if isinstance(array, TorchTensor): + return torch.conj(array) + elif isinstance(array, np.ndarray): + return np.conjugate(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def real(array): + """ + Takes the real part of the array + + This function has the same behavior as + ``np.real(array)`` or ``torch.real(array)``. + """ + if isinstance(array, TorchTensor): + return torch.real(array) + elif isinstance(array, np.ndarray): + return np.real(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def imag(array): + """ + Takes the imag part of the array + + This function has the same behavior as + ``np.imag(array)`` or ``torch.imag(array)``. + """ + if isinstance(array, TorchTensor): + return torch.imag(array) + elif isinstance(array, np.ndarray): + return np.imag(array) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index fca7f33a2..0760f2d36 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1,7 +1 @@ -from .correlate_density import correlate_density, correlate_density_metadata # noqa - - -__all__ = [ - "correlate_density", - "correlate_density_metadata", -] +from .correlate_density import DensityCorrelations # noqa: F401 diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py index 4a6118bd3..1ab14bace 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py @@ -3,12 +3,14 @@ Gordan coefficients for use in CG combinations. """ -from typing import Union +import math +from typing import Dict, List import numpy as np import wigners -from . import _dispatch +from .. import _dispatch +from .._backend import Array, Labels, TensorBlock, TensorMap try: @@ -18,55 +20,109 @@ except ImportError: HAS_MOPS = False + try: + import torch from torch import Tensor as TorchTensor + + torch_dtype = torch.dtype + torch_device = torch.device + + HAS_TORCH = True except ImportError: + HAS_TORCH = False class TorchTensor: pass + class torch_dtype: + pass + + class torch_device: + pass + UNKNOWN_ARRAY_TYPE = ( "unknown array type, only numpy arrays and torch tensors are supported" ) -# ================================= -# ===== ClebschGordanReal class -# ================================= +def calculate_cg_coefficients( + lambda_max: int, + sparse: bool = True, + use_torch: bool = False, +) -> TensorMap: + """ + Calculates the Clebsch-Gordan coefficients for all possible combination of l1 and + l2, up to ``lambda_max``. Returns them in :py:class:`TensorMap` format. + The output data structure of the output :py:class:`TensorMap` depends on whether the + backend used to perform CG tensor products uses sparse or dense operations. -class ClebschGordanReal: - """ - Class for computing Clebsch-Gordan coefficients for real spherical - harmonics. + Dense: + - samples: `_`, i.e. a dummy sample. + - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, + where `m1` and `m2` are the m component values for the two arrays + being combined and `mu` is the m component value for the resulting + array. + - properties: `cg_coefficient` - Stores the coefficients in a dictionary in the `self.coeffs` attribute, - which is built at initialization. There are 3 current use cases for the - format of these coefficients. By default, sparse accumulation of products is - performed, whether or not Mops is installed. + Sparse: + - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component + values for the two arrays being combined and `mu` is the m component + value for the resulting array. + - components: `[]`, i.e. no components axis. + - properties: `cg_coefficient` - Case 1: standard sparse format. - Each dictionary entry is a dictionary with entries for each (m1, m2, mu) - combination. + :param lambda_max: maximum angular momentum value to compute CG coefficients for. + :param sparse: whether to store the CG coefficients in sparse format. + :param use_torch: whether torch tensor or numpy arrays should be used for the cg + coeffs + + :returns: :py:class:`TensorMap` of the Clebsch-Gordan coefficients. + """ + # Build some 'like' arrays for the backend dispatch + if use_torch: + complex_like = torch.empty(0, dtype=torch.complex128) + double_like = torch.empty(0, dtype=torch.double) + if isinstance(Labels, torch.ScriptClass): + labels_values_like = torch.empty(0, dtype=torch.double) + else: + labels_values_like = np.empty(0, dtype=np.double) + else: + complex_like = np.empty(0, dtype=np.complex128) + double_like = np.empty(0, dtype=np.double) + labels_values_like = np.empty(0, dtype=np.double) + + # Calculate the CG coefficients, stored as a dict of dense arrays. This is the + # starting point for the conversion to a TensorMap of different formats depending on + # the backend options. + cg_coeff_dict = _build_dense_cg_coeff_dict( + lambda_max, complex_like, double_like, labels_values_like + ) + + # Build the CG cache depending on whether the CG backend is sparse or dense. The + # dispatching of the arrays backends are accounted for by `double_like` and + # `labels_values_like`. + if sparse: + return _cg_coeff_dict_to_tensormap_sparse( + cg_coeff_dict, double_like, labels_values_like + ) + + return _cg_coeff_dict_to_tensormap_dense( + cg_coeff_dict, double_like, labels_values_like + ) - { - (l1, l2, lambda): { - (m1, m2, mu) : cg_{m1, m2, mu}^{l1, l2, lambda} - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - }, - ... - for l1 in range(0, l1_list) - for l2 in range(0, l2_list) - for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) - } - Case 2: standard dense format. +def _build_dense_cg_coeff_dict( + lambda_max: int, complex_like: Array, double_like: Array, labels_values_like: Array +) -> Dict[int, Array]: + """ + Calculates the CG coefficients and stores them as dense arrays in a dictionary. - Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 - * lambda + 1). + Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 * + lambda + 1). { (l1, l2, lambda): @@ -85,173 +141,164 @@ class ClebschGordanReal: for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|)) } - Case 3: MOPS sparse format. + where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient that + describes the combination of the `m1` irreducible component of the `l1` angular + channel and the `m2` irreducible component of the `l2` angular channel into the + irreducible tensor of order `lambda`. In all cases, these correspond to the non-zero + CG coefficients, i.e. those in the range |-l, ..., +l| for each angular order l in + {l1, l2, lambda}. + + :param lambda_max: maximum angular momentum value to compute CG coefficients for. + :param complex_like: an empty array of dtype complex, used for dispatching + operations + :param double_like: an empty array of dtype double, used for dispatching operations + :param labels_values_like: an empty array of dtype int32, used for dispatching + operations + + :returns: dictionary of dense CG coefficients. + """ + # real-to-complex and complex-to-real transformations as matrices + r2c: Dict[int, Array] = {} + c2r: Dict[int, Array] = {} + coeff_dict = {} - Each dictionary entry contains a tuple with four 1D arrays, corresponding to - the CG coeffs and m1, m2, mu indices respectively. All of these arrays are - sorted according to the mu index. This format is used for Sparse - Accumulation of Products (SAP) as implemented in MOPS. See - https://github.com/lab-cosmo/mops . + for o3_lambda in range(0, lambda_max + 1): + c2r[o3_lambda] = _complex2real(o3_lambda, like=complex_like) + r2c[o3_lambda] = _real2complex(o3_lambda, like=complex_like) - { - (l1, l2, lambda): - ( - [ - cg_{m1, m2, mu}^{l1, l2, lambda} - ... - for m1 in range(-l1, l1 + 1), - for m2 in range(-l2, l2 + 1), - for mu in range(-lambda, lambda + 1) - ], - [ - m1 for m1 in range(-l1, l1 + 1), - ], - [ - m2 for m2 in range(-l2, l2 + 1), - ], - [ - mu for mu in range(-lambda, lambda + 1), - ], - ) + for l1 in range(lambda_max + 1): + for l2 in range(lambda_max + 1): + for o3_lambda in range( + max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1 + ): + complex_cg = _complex_clebsch_gordan_matrix( + l1, l2, o3_lambda, complex_like + ) + real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape( + complex_cg.shape + ) - } + real_cg = real_cg.swapaxes(0, 1) + real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape( + real_cg.shape + ) + real_cg = real_cg.swapaxes(0, 1) - where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient - that describes the combination of the `m1` irreducible component of the `l1` - angular channel and the `m2` irreducible component of the `l2` angular - channel into the irreducible tensor of order `lambda`. In all cases, these - correspond to the non-zero CG coefficients, i.e. those in the range |-l, - ..., +l| for each angular order l in {l1, l2, lambda}. + real_cg = real_cg @ c2r[o3_lambda].T - :param lambda_max: maximum lambda value to compute CG coefficients for. - :param sparse: whether to store the CG coefficients in sparse format. - :param use_mops: whether to store the CG coefficients in MOPS sparse format. - This is recommended as the default for sparse accumulation, but can only - be used if Mops is installed. + if (l1 + l2 + o3_lambda) % 2 == 0: + cg_l1l2lam_dense = _dispatch.real(real_cg) + else: + cg_l1l2lam_dense = _dispatch.imag(real_cg) + + coeff_dict[(l1, l2, o3_lambda)] = cg_l1l2lam_dense + + return coeff_dict + + +def _cg_coeff_dict_to_tensormap_dense( + coeff_dict: Dict, double_like: Array, labels_values_like: Array +) -> TensorMap: + """ + Converts the dictionary of dense CG coefficients coefficients to + :py:class:`TensorMap` format, specifically for performing CG tensor products with + the "python-dense" backend. """ + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like), + ) + blocks = [] + + for l1l2lam_values in coeff_dict.values(): + # extending shape by samples and properties + block_value_shape = (1,) + l1l2lam_values.shape + (1,) + blocks.append( + TensorBlock( + values=_dispatch.contiguous(l1l2lam_values.reshape(block_value_shape)), + samples=Labels.range("_", 1), + components=[ + Labels( + ["m1"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[0], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["m2"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[1], labels_values_like + ).reshape(-1, 1), + ), + Labels( + ["mu"], + _dispatch.int_range_like( + 0, l1l2lam_values.shape[2], labels_values_like + ).reshape(-1, 1), + ), + ], + properties=Labels.range("cg_coefficient", 1), + ) + ) - def __init__(self, lambda_max: int, sparse: bool = True, use_mops: bool = HAS_MOPS): - self._lambda_max = lambda_max - self._sparse = sparse - - if sparse: - if not HAS_MOPS: - # TODO: provide a warning once Mops is fully ready - # import warnings - # warnings.warn( - # "It is recommended to use MOPS for sparse accumulation. " - # " This can be installed with ``pip install" - # " git+https://github.com/lab-cosmo/mops`." - # " Falling back to numpy for now." - # ) - self._use_mops = False - else: - self._use_mops = True + return TensorMap(keys, blocks) - else: - # TODO: provide a warning once Mops is fully ready - # if HAS_MOPS: - # import warnings - # warnings.warn( - # "Mops is installed, but not being used" - # " as dense operations chosen." - # ) - self._use_mops = False - - self._coeffs = ClebschGordanReal.build_coeff_dict( - self._lambda_max, - self._sparse, - self._use_mops, + +def _cg_coeff_dict_to_tensormap_sparse( + coeff_dict: Dict, double_like: Array, labels_values_like: Array +) -> TensorMap: + """ + Converts the dictionary of dense CG coefficients coefficients to + :py:class:`TensorMap` format, specifically for performing CG tensor products with + the "python-sparse" backend. + """ + dict_keys = list(coeff_dict.keys()) + keys = Labels( + ["l1", "l2", "lambda"], + _dispatch.int_array_like(list(dict_keys), labels_values_like), + ) + blocks = [] + + # For each (l1, l2, lambda) combination, build a TensorBlock of non-zero CG coeffs + for l1, l2, o3_lambda in dict_keys: + cg_l1l2lam_dense = coeff_dict[(l1, l2, o3_lambda)] + + # Find the dense indices of the non-zero CG coeffs + nonzeros_cg_coeffs_idx = _dispatch.where( + _dispatch.abs(cg_l1l2lam_dense) > 1e-15 + ) + + # Create a sparse dictionary indexed by of the non-zero CG coeffs + cg_l1l2lam_sparse = {} + for i in range(len(nonzeros_cg_coeffs_idx[0])): + m1 = nonzeros_cg_coeffs_idx[0][i] + m2 = nonzeros_cg_coeffs_idx[1][i] + mu = nonzeros_cg_coeffs_idx[2][i] + cg_l1l2lam_sparse[(m1, m2, mu)] = cg_l1l2lam_dense[m1, m2, mu] + + l1l2lam_sample_values = [] + for m1m2mu_key in cg_l1l2lam_sparse.keys(): + l1l2lam_sample_values.append(m1m2mu_key) + # extending shape by samples and properties + values = _dispatch.double_array_like( + [*cg_l1l2lam_sparse.values()], double_like + ).reshape(-1, 1) + l1l2lam_sample_values = _dispatch.int_array_like( + l1l2lam_sample_values, labels_values_like + ) + # we have to move put the m1 m2 m3 inside a block so we can access it easier + # inside cg combine function, + blocks.append( + TensorBlock( + values=_dispatch.contiguous(values), + samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values), + components=[], + properties=Labels.range("cg_coefficient", 1), + ) ) - @property - def lambda_max(self): - return self._lambda_max - - @property - def sparse(self): - return self._sparse - - @property - def use_mops(self): - return self._use_mops - - @property - def coeffs(self): - return self._coeffs - - @staticmethod - def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool): - """ - Builds a dictionary of Clebsch-Gordan coefficients for all possible - combination of l1 and l2, up to lambda_max. - """ - # real-to-complex and complex-to-real transformations as matrices - r2c = {} - c2r = {} - coeff_dict = {} - for lambda_ in range(0, lambda_max + 1): - c2r[lambda_] = _complex2real(lambda_) - r2c[lambda_] = _real2complex(lambda_) - - for l1 in range(lambda_max + 1): - for l2 in range(lambda_max + 1): - for lambda_ in range( - max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1 - ): - complex_cg = _complex_clebsch_gordan_matrix(l1, l2, lambda_) - - real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape( - complex_cg.shape - ) - - real_cg = real_cg.swapaxes(0, 1) - real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape( - real_cg.shape - ) - real_cg = real_cg.swapaxes(0, 1) - - real_cg = real_cg @ c2r[lambda_].T - - if (l1 + l2 + lambda_) % 2 == 0: - cg_l1l2lam = np.real(real_cg) - else: - cg_l1l2lam = np.imag(real_cg) - - if sparse: - # Find the m1, m2, mu idxs of the nonzero CG coeffs - nonzeros_cg_coeffs_idx = np.where(np.abs(cg_l1l2lam) > 1e-15) - if use_mops: - # Store CG coeffs in a specific format for use in - # MOPS. Here we need the m1, m2, mu, and CG coeffs - # to be stored as separate 1D arrays. - m1_arr, m2_arr, mu_arr, C_arr = [], [], [], [] - for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx): - m1_arr.append(m1) - m2_arr.append(m2) - mu_arr.append(mu) - C_arr.append(cg_l1l2lam[m1, m2, mu]) - - # Reorder the arrays based on sorted mu values - mu_idxs = np.argsort(mu_arr) - m1_arr = np.array(m1_arr)[mu_idxs] - m2_arr = np.array(m2_arr)[mu_idxs] - mu_arr = np.array(mu_arr)[mu_idxs] - C_arr = np.array(C_arr)[mu_idxs] - cg_l1l2lam = (C_arr, m1_arr, m2_arr, mu_arr) - else: - # Otherwise fall back to torch/numpy and store as - # sparse dicts. - cg_l1l2lam = { - (m1, m2, mu): cg_l1l2lam[m1, m2, mu] - for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx) - } - - # Store - coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam - - return coeff_dict + return TensorMap(keys, blocks) # ============================ @@ -259,74 +306,80 @@ def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool): # ============================ -def _real2complex(lambda_: int) -> np.ndarray: +def _real2complex(o3_lambda: int, like: Array) -> Array: """ - Computes a matrix that can be used to convert from real to complex-valued - spherical harmonics(coefficients) of order ``lambda_``. + Computes a matrix that can be used to convert from real to complex-valued spherical + harmonics(coefficients) of order ``o3_lambda``. + + This is meant to be applied to the left: + ``real2complex @ [-o3_lambda, ..., +o3_lambda]``. - This is meant to be applied to the left: ``real2complex @ [-lambda_, ..., - +lambda_]``. + See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details on the + convention for how these tranformations are defined. - See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details - on the convention for how these tranformations are defined. + Operations are dispatched to the corresponding array type given by ``like`` """ - result = np.zeros((2 * lambda_ + 1, 2 * lambda_ + 1), dtype=np.complex128) - inv_sqrt_2 = 1.0 / np.sqrt(2) - i_sqrt_2 = 1j / np.sqrt(2) - for m in range(-lambda_, lambda_ + 1): + result = _dispatch.zeros_like(like, shape=(2 * o3_lambda + 1, 2 * o3_lambda + 1)) + inv_sqrt_2 = 1.0 / math.sqrt(2.0) + i_sqrt_2 = 1.0j / complex(math.sqrt(2.0)) + + for m in range(-o3_lambda, o3_lambda + 1): if m < 0: # Positve part - result[lambda_ + m, lambda_ + m] = +i_sqrt_2 + result[o3_lambda + m, o3_lambda + m] = i_sqrt_2 # Negative part - result[lambda_ - m, lambda_ + m] = -i_sqrt_2 * ((-1) ** m) + result[o3_lambda - m, o3_lambda + m] = -i_sqrt_2 * ((-1) ** m) if m == 0: - result[lambda_, lambda_] = +1.0 + result[o3_lambda, o3_lambda] = 1.0 if m > 0: # Negative part - result[lambda_ - m, lambda_ + m] = +inv_sqrt_2 + result[o3_lambda - m, o3_lambda + m] = inv_sqrt_2 # Positive part - result[lambda_ + m, lambda_ + m] = +inv_sqrt_2 * ((-1) ** m) + result[o3_lambda + m, o3_lambda + m] = inv_sqrt_2 * ((-1) ** m) return result -def _complex2real(lambda_: int) -> np.ndarray: +def _complex2real(o3_lambda: int, like) -> Array: """ Converts from complex to real spherical harmonics. This is just given by the conjugate tranpose of the real->complex transformation matrices. + + Operations are dispatched to the corresponding array type given by ``like`` """ - return np.conjugate(_real2complex(lambda_)).T + return _dispatch.conjugate(_real2complex(o3_lambda, like)).T -def _complex_clebsch_gordan_matrix(l1, l2, lambda_): +def _complex_clebsch_gordan_matrix(l1: int, l2: int, o3_lambda: int, like: Array): r"""clebsch-gordan matrix Computes the Clebsch-Gordan (CG) matrix for transforming complex-valued spherical harmonics. The CG matrix is computed as a 3D array of elements - < l1 m1 l2 m2 | lambda_ mu > + < l1 m1 l2 m2 | o3_lambda mu > where the first axis loops over m1, the second loops over m2, and the third one loops over mu. The matrix is real. For example, using the relation: - | l1 l2 lambda_ mu > = + | l1 l2 o3_lambda mu > = \sum_{m1, m2} - | l1 m1 > | l2 m2 > + | l1 m1 > | l2 m2 > (https://en.wikipedia.org/wiki/Clebsch–Gordan_coefficients, section "Formal definition of Clebsch-Gordan coefficients", eq 2) - one can obtain the spherical harmonics lambda_ from two sets of + one can obtain the spherical harmonics o3_lambda from two sets of spherical harmonics with l1 and l2 (up to a normalization factor). E.g.: Args: l1: l number for the first set of spherical harmonics l2: l number for the second set of spherical harmonics - lambda_: l number For the third set of spherical harmonics + o3_lambda: l number For the third set of spherical harmonics + like: Operations are dispatched to the corresponding this arguments array type Returns: cg: CG matrix for transforming complex-valued spherical harmonics >>> from scipy.special import sph_harm >>> import numpy as np >>> import wigners - >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2) + >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2, np.empty(0)) >>> comp_sph_1 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)]) >>> comp_sph_2 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)]) >>> # obtain the (unnormalized) spherical harmonics @@ -339,10 +392,10 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_): >>> np.allclose(ratio[0], ratio) True """ - if np.abs(l1 - l2) > lambda_ or np.abs(l1 + l2) < lambda_: - return np.zeros((2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1), dtype=np.double) + if abs(l1 - l2) > o3_lambda or abs(l1 + l2) < o3_lambda: + return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * o3_lambda + 1)) else: - return wigners.clebsch_gordan_array(l1, l2, lambda_) + return wigners.clebsch_gordan_array(l1, l2, o3_lambda) # ================================================= @@ -351,202 +404,271 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_): def combine_arrays( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], - lambda_: int, - cg_cache, - return_empty_array: bool = False, -) -> Union[np.ndarray, TorchTensor]: + array_1: Array, + array_2: Array, + o3_lambda: int, + cg_coeffs: TensorMap, + cg_backend: str, +) -> Array: """ - Couples arrays `arr_1` and `arr_2` corresponding to the irreducible - spherical components of 2 angular channels l1 and l2 using the appropriate - Clebsch-Gordan coefficients. As l1 and l2 can be combined to form multiple - lambda channels, this function returns the coupling to a single specified - channel `lambda`. The angular channels l1 and l2 are inferred from the size - of the components axis (axis 1) of the input arrays. - - `arr_1` has shape (n_i, 2 * l1 + 1, n_p) and `arr_2` has shape (n_i, 2 * l2 - + 1, n_q). n_i is the number of samples, n_p and n_q are the number of - properties in each array. The number of samples in each array must be the - same. - - The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is - the input parameter `lambda_`. - - The Clebsch-Gordan coefficients are cached in `cg_cache`. Currently, these - must be produced by the ClebschGordanReal class in this module. These - coefficients can be stored in either sparse dictionaries or dense arrays. - - The combination operation is dispatched such that numpy arrays or torch - tensors are automatically handled. - - `return_empty_array` can be used to return an empty array of the correct - shape, without performing the CG combination step. This can be useful for - probing the outputs of CG iterations in terms of metadata without the - computational cost of performing the CG combinations - i.e. using the - function :py:func:`combine_single_center_to_body_order_metadata_only`. - - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_cache: either a sparse dictionary with keys (m1, m2, mu) and array - values being sparse blocks of shape , or a dense array - of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)]. - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + Couples arrays `array_1` and `array_2` corresponding to the irreducible spherical + components of 2 angular channels l1 and l2 using the appropriate Clebsch-Gordan + coefficients. As l1 and l2 can be combined to form multiple lambda channels, this + function returns the coupling to a single specified channel `lambda`. The angular + channels l1 and l2 are inferred from the size of the components axis (axis 1) of the + input arrays. + + `array_1` has shape (n_i, 2 * l1 + 1, n_p) and `array_2` has shape (n_i, 2 * l2 + 1, + n_q). n_i is the number of samples, n_p and n_q are the number of properties in each + array. The number of samples in each array must be the same. + + The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is the + input parameter `o3_lambda`. + + The Clebsch-Gordan coefficients are cached in `cg_coeffs`. Currently, these must be + produced by the ClebschGordanReal class in this module. These coefficients can be + stored in either sparse dictionaries or dense arrays. + + The combination operation is dispatched such that numpy arrays or torch tensors are + automatically handled. + + `return_empty_array` can be used to return an empty array of the correct shape, + without performing the CG combination step. This can be useful for probing the + outputs of CG iterations in terms of metadata without the computational cost of + performing the CG combinations - i.e. using the function + :py:func:`combine_single_center_to_body_order_metadata_only`. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + :param cg_backend: specifies the combine backend with sparse CG coefficients. It can + have the values "python-dense", "python-sparse", "mops" and "metadata". If + "python-dense" or "python-sparse" is chosen, a dense or sparse combination + (respectively) of the arrays is performed using either numpy or torch, depending + on the backend. If "mops" is chosen, a sparse combination of the arrays is + performed if the external package MOPS is installed. If "metadata" is chosen, no + combination is perfomed, and an empty array of the correct shape is returned. + + + :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties] """ # If just precomputing metadata, return an empty array - if return_empty_array: - return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=True) + if cg_backend == "metadata": + return empty_combine(array_1, array_2, o3_lambda) + + if cg_backend == "python-sparse" or cg_backend == "mops": + return sparse_combine(array_1, array_2, o3_lambda, cg_coeffs, cg_backend) + elif cg_backend == "python-dense": + return dense_combine(array_1, array_2, o3_lambda, cg_coeffs) + else: + raise ValueError( + f"Wrong cg_backend, got '{cg_backend}'," + " but only support 'python-dense', 'python-sparse' and 'mops'." + ) - # Otherwise, perform the CG combination - # Spare CG cache - if cg_cache.sparse: - return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=False) - # Dense CG cache - return dense_combine(arr_1, arr_2, lambda_, cg_cache) +def empty_combine( + array_1: Array, + array_2: Array, + o3_lambda: int, +) -> Array: + """ + Returns an empty array of the correct shape, imitating the output array shape + produced by a CG combination of ``array_1`` and ``array_2``. + """ + # Samples dimensions must be the same + assert array_1.shape[0] == array_2.shape[0] + + # Define other useful dimensions + n_i = array_1.shape[0] # number of samples + n_p = array_1.shape[2] # number of properties in array_1 + n_q = array_2.shape[2] # number of properties in array_2 + + return _dispatch.empty_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q)) def sparse_combine( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], - lambda_: int, - cg_cache, - return_empty_array: bool = False, -) -> Union[np.ndarray, TorchTensor]: + array_1: Array, + array_2: Array, + o3_lambda: int, + cg_coeffs: TensorMap, + cg_backend: str, +) -> Array: """ - Performs a Clebsch-Gordan combination step on 2 arrays using sparse - operations. The angular channel of each block is inferred from the size of - its component axis, and the blocks are combined to the desired output - angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_cache: sparse dictionary with keys (m1, m2, mu) and array values - being sparse blocks of shape - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + Performs a Clebsch-Gordan combination step on 2 arrays using sparse operations. The + angular channel of each block is inferred from the size of its component axis, and + the blocks are combined to the desired output angular channel `o3_lambda` using the + appropriate Clebsch-Gordan coefficients. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + :param cg_backend: specifies the combine backend with sparse CG coefficients. It can + have the values "python-dense", "python-sparse", "mops" and "metadata". If + "python-dense" or "python-sparse" is chosen, a dense or sparse combination + (respectively) of the arrays is performed using either numpy or torch, depending + on the backend. If "mops" is chosen, a sparse combination of the arrays is + performed if the external package MOPS is installed. If "metadata" is chosen, no + combination is perfomed, and an empty array of the correct shape is returned. + + :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties] """ # Samples dimensions must be the same - assert arr_1.shape[0] == arr_2.shape[0] + assert array_1.shape[0] == array_2.shape[0] # Infer l1 and l2 from the len of the length of axis 1 of each tensor - l1 = (arr_1.shape[1] - 1) // 2 - l2 = (arr_2.shape[1] - 1) // 2 + l1 = (array_1.shape[1] - 1) // 2 + l2 = (array_2.shape[1] - 1) // 2 # Define other useful dimensions - n_i = arr_1.shape[0] # number of samples - n_p = arr_1.shape[2] # number of properties in arr_1 - n_q = arr_2.shape[2] # number of properties in arr_2 + n_i = array_1.shape[0] # number of samples + n_p = array_1.shape[2] # number of properties in array_1 + n_q = array_2.shape[2] # number of properties in array_2 + + # The isinstance checks and cg_backend checks makes the logic a bit redundant + # but the redundancy by the isinstance check is required for TorchScript. Logic + # can be made more straightforward once MOPS support TorchScript + if isinstance(array_1, TorchTensor) or cg_backend == "python-sparse": + # Initialise output array + array_out = _dispatch.zeros_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q)) - if return_empty_array: # used when only computing metadata - return _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1) + # Get the corresponding Clebsch-Gordan coefficients + # Fill in each mu component of the output array in turn + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}) + for i in range(len(cg_l1l2lam.samples)): + m1m2mu_key = cg_l1l2lam.samples.entry(i) + m1 = m1m2mu_key[0] + m2 = m1m2mu_key[1] + mu = m1m2mu_key[2] + # Broadcast arrays, multiply together and with CG coeff + array_out[:, mu, :] += ( + array_1[:, m1, :, None] + * array_2[:, m2, None, :] + * cg_l1l2lam.values[i, 0] + ).reshape(n_i, n_p * n_q) - if isinstance(arr_1, np.ndarray) and HAS_MOPS: - # Reshape - arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape( + return array_out + elif isinstance(array_1, np.ndarray) and cg_backend == "mops": + # MOPS sparse accumulation requires some reshaping of the input arrays. See + # https://github.com/lab-cosmo/mops . Currently only supported for a numpy array + # backend. + array_1 = np.repeat(array_1[:, :, :, None], n_q, axis=3).reshape( n_i, 2 * l1 + 1, n_p * n_q ) - arr_2 = np.repeat(arr_2[:, :, None, :], n_p, axis=2).reshape( + array_2 = np.repeat(array_2[:, :, None, :], n_p, axis=2).reshape( n_i, 2 * l2 + 1, n_p * n_q ) - arr_1 = _dispatch.swapaxes(arr_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1) - arr_2 = _dispatch.swapaxes(arr_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1) + array_1 = _dispatch.swapaxes(array_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1) + array_2 = _dispatch.swapaxes(array_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1) - # Do SAP - arr_out = sap( - arr_1, - arr_2, - *cg_cache._coeffs[(l1, l2, lambda_)], - output_size=2 * lambda_ + 1, - ) - assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1) - - # Reshape back - arr_out = arr_out.reshape(n_i, n_p * n_q, 2 * lambda_ + 1) - arr_out = _dispatch.swapaxes(arr_out, 1, 2) - - return arr_out + # We also need to pass SAP the CG coefficients and m1, m2, and mu indices as 1D + # arrays. Extract these from the corresponding TensorBlock in the TensorMap CG + # cache. + block = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}) + samples = block.samples - if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor): - # Initialise output array - arr_out = _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1) + m1_arr: List[int] = [] + m2_arr: List[int] = [] + mu_arr: List[int] = [] + C_arr: List[float] = [] + for sample_i, (m1, m2, mu) in enumerate(samples): - # Get the corresponding Clebsch-Gordan coefficients - cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)] - - # Fill in each mu component of the output array in turn - for m1, m2, mu in cg_coeffs.keys(): - # Broadcast arrays, multiply together and with CG coeff - arr_out[:, mu, :] += ( - arr_1[:, m1, :, None] * arr_2[:, m2, None, :] * cg_coeffs[(m1, m2, mu)] - ).reshape(n_i, n_p * n_q) + m1_arr.append(int(m1)) + m2_arr.append(int(m2)) + mu_arr.append(int(mu)) + C_arr.append(float(block.values[sample_i, 0])) - return arr_out + # Do SAP + array_out = sap( + A=array_1, + B=array_2, + C=C_arr, + indices_A=m1_arr, + indices_B=m2_arr, + indices_output=mu_arr, + output_size=2 * o3_lambda + 1, + ) + assert array_out.shape == (n_i * n_p * n_q, 2 * o3_lambda + 1) + # Reshape back + array_out = array_out.reshape(n_i, n_p * n_q, 2 * o3_lambda + 1) + array_out = _dispatch.swapaxes(array_out, 1, 2) + + return array_out + elif cg_backend not in ["python-sparse", "mops"]: + raise ValueError( + f"sparse cg backend '{cg_backend}' is not known. " + "Only values 'python-sparse' and 'mops' are valid." + ) else: raise TypeError(UNKNOWN_ARRAY_TYPE) def dense_combine( - arr_1: Union[np.ndarray, TorchTensor], - arr_2: Union[np.ndarray, TorchTensor], - lambda_: int, - cg_cache, -) -> Union[np.ndarray, TorchTensor]: + array_1: Array, + array_2: Array, + o3_lambda: int, + cg_coeffs: TensorMap, +) -> Array: """ - Performs a Clebsch-Gordan combination step on 2 arrays using a dense - operation. The angular channel of each block is inferred from the size of - its component axis, and the blocks are combined to the desired output - angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients. - - :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 + - 1, n_q_properties] - :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 + - 1, n_p_properties] - :param lambda_: int value of the resulting coupled channel - :param cg_cache: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + - 1)] - - :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties] + Performs a Clebsch-Gordan combination step on 2 arrays using a dense operation. The + angular channel of each block is inferred from the size of its component axis, and + the blocks are combined to the desired output angular channel `o3_lambda` using the + appropriate Clebsch-Gordan coefficients. + + :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1, + n_q_properties] + :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1, + n_p_properties] + :param o3_lambda: int value of the resulting coupled channel + :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for + either sparse or dense CG tensor products, as returned by + :py:func:`calculate_cg_coefficients`. See the function docstring for details on + the data structure. Only used if ``cg_backend`` is not ``"metadata"``. + + :returns: array of shape [n_samples, (2 * o3_lambda + 1), q_properties * + p_properties] """ - if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor): - # Infer l1 and l2 from the len of the length of axis 1 of each tensor - l1 = (arr_1.shape[1] - 1) // 2 - l2 = (arr_2.shape[1] - 1) // 2 - cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)] - - # (samples None None l1_mu q) * (samples l2_mu p None None) - # -> (samples l2_mu p l1_mu q) we broadcast it in this way - # so we only need to do one swapaxes in the next step - arr_out = arr_1[:, None, None, :, :] * arr_2[:, :, :, None, None] - - # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu) - arr_out = _dispatch.swapaxes(arr_out, 1, 4) - - # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu)) - arr_out = arr_out.reshape( - -1, - arr_1.shape[2] * arr_2.shape[2], - arr_1.shape[1] * arr_2.shape[1], - ) + # Infer l1 and l2 from the len of the length of axis 1 of each tensor + l1 = (array_1.shape[1] - 1) // 2 + l2 = (array_2.shape[1] - 1) // 2 - # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) - cg_coeffs = cg_coeffs.reshape(-1, 2 * lambda_ + 1) + cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}).values - # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) - # -> samples (q p) lam_mu - arr_out = arr_out @ cg_coeffs + # (samples None None l1_mu q) * (samples l2_mu p None None) + # -> (samples l2_mu p l1_mu q) we broadcast it in this way + # so we only need to do one swapaxes in the next step + array_out = array_1[:, None, None, :, :] * array_2[:, :, :, None, None] - # (samples (q p) lam_mu) -> (samples lam_mu (q p)) - return _dispatch.swapaxes(arr_out, 1, 2) + # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu) + array_out = _dispatch.swapaxes(array_out, 1, 4) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) + # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu)) + array_out = array_out.reshape( + -1, + array_1.shape[2] * array_2.shape[2], + array_1.shape[1] * array_2.shape[1], + ) + + # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu) + cg_l1l2lam = cg_l1l2lam.reshape(-1, 2 * o3_lambda + 1) + + # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu) + # -> samples (q p) lam_mu + array_out = array_out @ cg_l1l2lam + + # (samples (q p) lam_mu) -> (samples lam_mu (q p)) + return _dispatch.swapaxes(array_out, 1, 2) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py index 3bb575cc0..86eb3e5ba 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py @@ -4,12 +4,19 @@ metatensor :py:class:`TensorMap` objects. """ -import itertools from typing import List, Optional, Tuple, Union -from metatensor import Labels, TensorBlock, TensorMap - -from . import _cg_cache, _dispatch +from .. import _dispatch +from .._backend import ( + Array, + Labels, + LabelsEntry, + TensorBlock, + TensorMap, + is_labels, + torch_jit_annotate, +) +from . import _cg_cache # ================================================================== @@ -34,32 +41,51 @@ def _standardize_keys(tensor: TensorMap) -> TensorMap: if "species_neighbor" in tensor.keys.names: tensor = tensor.keys_to_properties(keys_to_move="species_neighbor") keys = tensor.keys.insert( - name="order_nu", - values=_dispatch.int_array_like([1], like=tensor.keys.values), index=0, + name="order_nu", + values=_dispatch.int_array_like( + len(tensor.keys.values) * [1], like=tensor.keys.values + ), ) keys = keys.insert( - name="inversion_sigma", - values=_dispatch.int_array_like([1], like=tensor.keys.values), index=1, + name="inversion_sigma", + values=_dispatch.int_array_like( + len(tensor.keys.values) * [1], like=tensor.keys.values + ), ) return TensorMap(keys=keys, blocks=[b.copy() for b in tensor.blocks()]) def _parse_selected_keys( n_iterations: int, + array_like: Array, angular_cutoff: Optional[int] = None, selected_keys: Optional[Union[Labels, List[Labels]]] = None, - like=None, -) -> List[Union[None, Labels]]: +) -> List[Labels]: """ Parses the `selected_keys` argument passed to public functions. Checks the values and returns a :py:class:`list` of :py:class:`Labels` objects, one for - each iteration of CG combination. - - `like` is required if a new :py:class:`Labels` object is to be created by - :py:mod:`_dispatch`. + each iteration of CG combination.The `:param array_like:` determines the + array backend of the Labels created """ + # Check the selected_keys + if ( + (selected_keys is not None) + and (not isinstance(selected_keys, list)) + and (not is_labels(selected_keys)) + ): + raise TypeError("`selected_keys` must be `None`, `Labels` or List[`Labels`]") + + if isinstance(selected_keys, list): + if not all( + [ + is_labels(selected_keys[i]) or (selected_keys[i] is None) + for i in range(len(selected_keys)) + ] + ): + raise TypeError("`selected_keys` must be a Labels or List[Labels]") + # Check angular_cutoff arg if angular_cutoff is not None: if not isinstance(angular_cutoff, int): @@ -67,95 +93,107 @@ def _parse_selected_keys( if angular_cutoff < 1: raise ValueError("`angular_cutoff` must be >= 1") + # we use a new variable for selected_keys so TorchScript can infer correct type + selected_keys_: List[Union[None, Labels]] = [] + if selected_keys is None: if angular_cutoff is None: # no selections at all - selected_keys = [None] * n_iterations + selected_keys_ = [None] * n_iterations else: # Create a key selection with all angular channels <= the specified # angular cutoff - selected_keys = [ - Labels( - names=["spherical_harmonics_l"], - values=_dispatch.int_range_like( - 0, angular_cutoff, like=like - ).reshape(-1, 1), - ) - ] * n_iterations + label = Labels( + names=["spherical_harmonics_l"], + values=_dispatch.int_array_like( + list(range(0, angular_cutoff)), like=array_like + ).reshape(-1, 1), + ) + selected_keys_ = [label] * n_iterations - if isinstance(selected_keys, Labels): + if is_labels(selected_keys): # Create a list, but only apply a key selection at the final iteration - selected_keys = [None] * (n_iterations - 1) + [selected_keys] + selected_keys_ = [None] * (n_iterations - 1) + [selected_keys] + elif isinstance(selected_keys, list): + selected_keys_ = selected_keys - # Check the selected_keys - if not isinstance(selected_keys, List): - raise TypeError( - "`selected_keys` must be a `Labels` or List[Union[None, `Labels`]]" - ) - if not len(selected_keys) == n_iterations: + if not len(selected_keys_) == n_iterations: raise ValueError( - "`selected_keys` must be a List[Union[None, Labels]] of length" + "`selected_keys` must be a List[Labels] of length" " `correlation_order` - 1" ) - if not _dispatch.all( - [isinstance(val, (Labels, type(None))) for val in selected_keys] - ): - raise TypeError("`selected_keys` must be a Labels or List[Union[None, Labels]]") # Now iterate over each of the Labels (or None) in the list and check - for slct in selected_keys: - if slct is None: + for selected in selected_keys_: + if selected is None: continue - assert isinstance(slct, Labels) - if not _dispatch.all( + if not (is_labels(selected)): + raise ValueError("Asserted that elements in `selected` are Labels") + + if not all( [ name in ["spherical_harmonics_l", "inversion_sigma"] - for name in slct.names + for name in selected.names ] ): raise ValueError( "specified key names in `selected_keys` must be either" " 'spherical_harmonics_l' or 'inversion_sigma'" ) - if "spherical_harmonics_l" in slct.names: + if "spherical_harmonics_l" in selected.names: if angular_cutoff is not None: - if not _dispatch.all( - slct.column("spherical_harmonics_l") <= angular_cutoff - ): + below_cutoff: Array = ( + selected.column("spherical_harmonics_l") <= angular_cutoff + ) + if not _dispatch.all(below_cutoff): raise ValueError( "specified angular channels in `selected_keys` must be <= the" " specified `angular_cutoff`" ) - if not _dispatch.all( - [angular_l >= 0 for angular_l in slct.column("spherical_harmonics_l")] - ): + above_zero = _dispatch.bool_array_like( + [ + bool(angular_l >= 0) + for angular_l in selected.column("spherical_harmonics_l") + ], + like=array_like, + ) + if not _dispatch.all(above_zero): raise ValueError( "specified angular channels in `selected_keys` must be >= 0" ) - if "inversion_sigma" in slct.names: + if "inversion_sigma" in selected.names: if not _dispatch.all( - [parity_s in [-1, +1] for parity_s in slct.column("inversion_sigma")] + _dispatch.bool_array_like( + [ + bool(parity_s in [-1, 1]) + for parity_s in selected.column("inversion_sigma") + ], + array_like, + ) ): raise ValueError( "specified parities in `selected_keys` must be -1 or +1" ) - return selected_keys + return selected_keys_ def _parse_bool_iteration_filters( n_iterations: int, - skip_redundant: Optional[Union[bool, List[bool]]] = False, + skip_redundant: Union[bool, List[bool]] = False, output_selection: Optional[Union[bool, List[bool]]] = None, -) -> List[List[bool]]: +) -> Tuple[List[bool], List[bool]]: """ Parses the `skip_redundant` and `output_selection` arguments passed to public functions. """ if isinstance(skip_redundant, bool): - skip_redundant = [skip_redundant] * n_iterations - if not _dispatch.all([isinstance(val, bool) for val in skip_redundant]): + skip_redundant_ = [skip_redundant] * n_iterations + else: + skip_redundant_ = skip_redundant + + if not all([isinstance(val, bool) for val in skip_redundant_]): raise TypeError("`skip_redundant` must be a `bool` or `list` of `bool`") - if not len(skip_redundant) == n_iterations: + if not len(skip_redundant_) == n_iterations: raise ValueError( "`skip_redundant` must be a bool or `list` of `bool` of length" " `correlation_order` - 1" @@ -165,7 +203,7 @@ def _parse_bool_iteration_filters( else: if isinstance(output_selection, bool): output_selection = [output_selection] * n_iterations - if not isinstance(output_selection, List): + if not isinstance(output_selection, list): raise TypeError("`output_selection` must be passed as `list` of `bool`") if not len(output_selection) == n_iterations: @@ -173,21 +211,21 @@ def _parse_bool_iteration_filters( "`output_selection` must be a ``list`` of ``bool`` of length" " corresponding to the number of CG iterations" ) - if not _dispatch.all([isinstance(v, bool) for v in output_selection]): + if not all([isinstance(v, bool) for v in output_selection]): raise TypeError("`output_selection` must be passed as a `list` of `bool`") - if not _dispatch.all([isinstance(v, bool) for v in output_selection]): + if not all([isinstance(v, bool) for v in output_selection]): raise TypeError("`output_selection` must be passed as a `list` of `bool`") - return skip_redundant, output_selection + return skip_redundant_, output_selection def _precompute_keys( keys_1: Labels, keys_2: Labels, n_iterations: int, - selected_keys: List[Union[None, Labels]], + selected_keys: List[Union[Labels, None]], skip_redundant: List[bool], -) -> List[Tuple[Labels, List[List[int]]]]: +) -> List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]]: """ Computes all the keys metadata needed to perform `n_iterations` of CG combination steps. @@ -201,7 +239,7 @@ def _precompute_keys( If `skip_redundant` is True, then keys that represent redundant CG operations are not included in the output keys at each step. """ - keys_metadata = [] + keys_metadata: List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]] = [] keys_out = keys_1 for iteration in range(n_iterations): # Get the keys metadata for the combination of the 2 tensors @@ -209,12 +247,14 @@ def _precompute_keys( keys_1=keys_out, keys_2=keys_2, ) - if selected_keys[iteration] is not None: + # For TorchScript to determine the type correctly so we can subscript it + selected_keys_i = selected_keys[iteration] + if selected_keys_i is not None: keys_1_entries, keys_2_entries, keys_out = _apply_key_selection( keys_1_entries, keys_2_entries, keys_out, - selected_keys=selected_keys[iteration], + selected_keys=selected_keys_i, ) if skip_redundant[iteration]: @@ -237,7 +277,8 @@ def _precompute_keys( def _precompute_keys_full_product( keys_1: Labels, keys_2: Labels -) -> Tuple[List, List, Labels]: +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: + # Due to TorchScript we cannot use List[LabelsEntry] """ Given the keys of 2 TensorMaps, returns the keys that would be present after a full CG product of these TensorMaps. @@ -272,11 +313,11 @@ def _precompute_keys_full_product( `keys_2` must follow the key name convention: ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - Returned is Tuple[List, List, Labels]. The first two lists correspond to the - LabelsEntry objects of the keys being combined. The third element is a - Labels object corresponding to the keys of the output TensorMap. Each entry - in this Labels object corresponds to the keys is formed by combination of - the pair of blocks indexed by correspoding key pairs in the first two lists. + The first two lists of the returned value correspond to the LabelsEntry objects of + the keys being combined. The third element is a Labels object corresponding to the + keys of the output TensorMap. Each entry in this Labels object corresponds to the + keys is formed by combination of the pair of blocks indexed by correspoding key + pairs in the first two lists. """ # Get the correlation order of the first TensorMap. unique_nu = _dispatch.unique(keys_1.column("order_nu")) @@ -285,7 +326,7 @@ def _precompute_keys_full_product( "keys_1 must correspond to a tensor of a single correlation order." f" Found {len(unique_nu)} body orders: {unique_nu}" ) - nu1 = unique_nu[0] + nu1 = int(unique_nu[0]) # Define new correlation order of output TensorMap nu = nu1 + 1 @@ -295,23 +336,25 @@ def _precompute_keys_full_product( # If nu1 = 1, the key names don't yet have any "lx" columns if nu1 == 1: - l_list_names = [] + l_list_names: List[str] = [] new_l_list_names = ["l1", "l2"] else: l_list_names = [f"l{angular_l}" for angular_l in range(1, nu1 + 1)] new_l_list_names = l_list_names + [f"l{nu}"] # Check key names - assert _dispatch.all( - keys_1.names - == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - + l_list_names - + [f"k{k}" for k in range(2, nu1)] - ) - assert _dispatch.all( - keys_2.names - == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] - ) + assert keys_1.names == [ + "order_nu", + "inversion_sigma", + "spherical_harmonics_l", + "species_center", + ] + l_list_names + [f"k{k}" for k in range(2, nu1)] + assert keys_2.names == [ + "order_nu", + "inversion_sigma", + "spherical_harmonics_l", + "species_center", + ] # Define key names of output Labels (i.e. for combined TensorMap) new_names = ( @@ -320,43 +363,64 @@ def _precompute_keys_full_product( + [f"k{k}" for k in range(2, nu)] ) - new_key_values = [] - keys_1_entries = [] - keys_2_entries = [] - for key_1, key_2 in itertools.product(keys_1, keys_2): - # Unpack relevant key values - sig1, lam1, a = key_1.values[1:4] - sig2, lam2, a2 = key_2.values[1:4] - - # Only combine blocks of the same chemical species - if a != a2: - continue - - # First calculate the possible non-zero angular channels that can be - # formed from combination of blocks of order `lam1` and `lam2`. This - # corresponds to values in the inclusive range { |lam1 - lam2|, ..., - # |lam1 + lam2| } - nonzero_lams = _dispatch.int_range_like( - abs(lam1 - lam2), abs(lam1 + lam2) + 1, like=key_1.values - ) - - # Now iterate over the non-zero angular channels and apply the custom - # selections - for lambda_ in nonzero_lams: - # Calculate new sigma - sig = sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_) - - # Extract the l and k lists from keys_1 - l_list = key_1.values[4 : 4 + nu1].tolist() - k_list = key_1.values[4 + nu1 :].tolist() + # Define key names of output Labels (i.e. for combined TensorMap) + new_names = ( + ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"] + + new_l_list_names + + [f"k{k}" for k in range(2, nu)] + ) - # Build the new keys values. l{nu} is `lam2`` (i.e. - # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is - # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`). - new_vals = [nu, sig, lambda_, a] + l_list + [lam2] + k_list + [lam1] - new_key_values.append(new_vals) - keys_1_entries.append(key_1) - keys_2_entries.append(key_2) + new_key_values: List[List[int]] = [] + # Types are actually LabelsEntry, but TorchScript does not understand this. + keys_1_entries: List[LabelsEntry] = [] + keys_2_entries: List[LabelsEntry] = [] + for i in range(len(keys_1)): + for j in range(len(keys_2)): + key_1 = keys_1.entry(i) + key_2 = keys_2.entry(j) + # Unpack relevant key values + sig1 = int(keys_1.values[i, 1]) + lam1 = int(keys_1.values[i, 2]) + a = int(keys_1.values[i, 3]) + sig2 = int(keys_2.values[j, 1]) + lam2 = int(keys_2.values[j, 2]) + a2 = int(keys_2.values[j, 3]) + + # Only combine blocks of the same chemical species + if a != a2: + continue + + # First calculate the possible non-zero angular channels that can be + # formed from combination of blocks of order `lam1` and `lam2`. This + # corresponds to values in the inclusive range { |lam1 - lam2|, ..., + # |lam1 + lam2| } + min_lam: int = abs(lam1 - lam2) + max_lam: int = abs(lam1 + lam2) + 1 + nonzero_lams = list(range(min_lam, max_lam)) + + # Now iterate over the non-zero angular channels and apply the custom + # selections + for lambda_ in nonzero_lams: + # Calculate new sigma + sig = int(sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_)) + + # Extract the l and k lists from keys_1 + l_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 : 4 + nu1]) + k_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 + nu1 :]) + + # Build the new keys values. l{nu} is `lam2`` (i.e. + # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is + # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`). + new_vals: List[int] = ( + torch_jit_annotate(List[int], [nu, sig, lambda_, a]) + + l_list + + [lam2] + + k_list + + [lam1] + ) + new_key_values.append(new_vals) + keys_1_entries.append(key_1) + keys_2_entries.append(key_2) # Define new keys as the full product of keys_1 and keys_2 keys_out = Labels( @@ -368,8 +432,11 @@ def _precompute_keys_full_product( def _apply_key_selection( - keys_1_entries: List, keys_2_entries: List, keys_out: Labels, selected_keys: Labels -) -> Tuple[List, List, Labels]: + keys_1_entries: List[LabelsEntry], + keys_2_entries: List[LabelsEntry], + keys_out: Labels, + selected_keys: Labels, +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: """ Applies a selection according to `selected_keys` to the keys of an output TensorMap `keys_out` produced by combination of blocks indexed by keys @@ -381,35 +448,45 @@ def _apply_key_selection( If a selection in `selected_keys` is not valid based on the keys in `keys_out`, an error is raised. """ - # Extract the relevant columns from `selected_keys` that the selection will - # be performed on - keys_out_vals = [[k[name] for name in selected_keys.names] for k in keys_out] + # Extract the relevant columns from `selected_keys` that the selection will be + # performed on + col_idx = _dispatch.int_array_like( + [keys_out.names.index(name) for name in selected_keys.names], keys_out.values + ) + keys_out_vals = keys_out.values[:, col_idx] # First check that all of the selected keys exist in the output keys - for slct in selected_keys.values: - if not _dispatch.any([_dispatch.all(slct == k) for k in keys_out_vals]): + for selected in selected_keys.values: + if not any( + [bool(all(selected == keys_out_vals[i])) for i in range(len(keys_out_vals))] + ): raise ValueError( - f"selected key {selected_keys.names} = {slct} not found" + f"selected key {selected_keys.names} = {selected} not found" " in the output keys. Check the `selected_keys` argument." ) # Build a mask of the selected keys - mask = [ - _dispatch.any([_dispatch.all(i == j) for j in selected_keys.values]) - for i in keys_out_vals - ] + mask = _dispatch.bool_array_like( + [any([bool(all(i == j)) for j in selected_keys.values]) for i in keys_out_vals], + like=selected_keys.values, + ) + mask_indices = _dispatch.int_array_like( + list(range(len(keys_1_entries))), like=selected_keys.values + )[mask] # Apply the mask to key entries and keys and return - keys_1_entries = [k for k, isin in zip(keys_1_entries, mask) if isin] - keys_2_entries = [k for k, isin in zip(keys_2_entries, mask) if isin] + keys_1_entries = [keys_1_entries[i] for i in mask_indices] + keys_2_entries = [keys_2_entries[i] for i in mask_indices] keys_out = Labels(names=keys_out.names, values=keys_out.values[mask]) return keys_1_entries, keys_2_entries, keys_out def _remove_redundant_keys( - keys_1_entries: List, keys_2_entries: List, keys_out: Labels -) -> Tuple[List, List, Labels]: + keys_1_entries: List[LabelsEntry], + keys_2_entries: List[LabelsEntry], + keys_out: Labels, +) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]: """ For a Labels object `keys_out` that corresponds to the keys of a TensorMap formed by combined of the blocks described by the entries in the lists @@ -428,11 +505,12 @@ def _remove_redundant_keys( nu = nu1 + 1 # Identify keys of redundant blocks and remove them - key_idxs_to_keep = [] - for key_idx, key in enumerate(keys_out): - # Get the important key values. This is all of the keys, excpet the k - # list - key_vals_slice = key.values[: 4 + (nu + 1)].tolist() + key_idxs_to_keep: List[int] = [] + for key_idx in range(len(keys_out)): + key = keys_out.entry(key_idx) + # Get the important key values. This is all of the keys, except the k + # list. + key_vals_slice: List[int] = _dispatch.to_int_list(key.values[: 4 + (nu + 1)]) first_part, l_list = key_vals_slice[:4], key_vals_slice[4:] # Sort the l list @@ -441,18 +519,19 @@ def _remove_redundant_keys( # Compare the sliced key with the one recreated when the l list is # sorted. If they are identical, this is the key of the block that we # want to compute a CG combination for. - key_slice_tuple = tuple(first_part + l_list) - key_slice_sorted_tuple = tuple(first_part + l_list_sorted) - if _dispatch.all(key_slice_tuple == key_slice_sorted_tuple): + key_slice_tuple = _dispatch.int_array_like(first_part + l_list, like=key.values) + key_slice_sorted_tuple = _dispatch.int_array_like( + first_part + l_list_sorted, like=key.values + ) + if all(key_slice_tuple == key_slice_sorted_tuple): key_idxs_to_keep.append(key_idx) # Build a reduced Labels object for the combined keys, with redundancies removed keys_out_red = Labels( names=keys_out.names, - values=_dispatch.int_array_like( - [keys_out[idx].values for idx in key_idxs_to_keep], - like=keys_1_entries[0].values, - ), + values=keys_out.values[ + _dispatch.int_array_like(key_idxs_to_keep, like=keys_out.values) + ], ) # Store the list of reduced entries that combine to form the reduced output keys @@ -471,23 +550,21 @@ def _combine_blocks_same_samples( block_1: TensorBlock, block_2: TensorBlock, lambda_: int, - cg_cache, - compute_metadata_only: bool = False, + cg_coeffs: TensorMap, + cg_backend: str, ) -> TensorBlock: """ For a given pair of TensorBlocks and desired angular channel, combines the values arrays and returns a new TensorBlock. + + If cg_coeffs are None, tensor blocks with empty arrays are returned that only + contain the metadata. """ # Do the CG combination - single center so no shape pre-processing required - if compute_metadata_only: - combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=True - ) - else: - combined_values = _cg_cache.combine_arrays( - block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=False - ) + combined_values = _cg_cache.combine_arrays( + block_1.values, block_2.values, lambda_, cg_coeffs, cg_backend + ) # Infer the new nu value: block 1's properties are nu pairs of # "species_neighbor_x" and "nx". @@ -496,7 +573,25 @@ def _combine_blocks_same_samples( # Define the new property names for "nx" and "species_neighbor_x" n_names = [f"n{i}" for i in range(1, combined_nu + 1)] neighbor_names = [f"species_neighbor_{i}" for i in range(1, combined_nu + 1)] - prop_names = [item for i in zip(neighbor_names, n_names) for item in i] + prop_names_zip = [ + [neighbor_names[i], n_names[i]] for i in range(len(neighbor_names)) + ] + prop_names: List[str] = [] + for i in range(len(prop_names_zip)): + prop_names.extend(prop_names_zip[i]) + + # create cross product list of indices in a torch-scriptable way of + # [[i, j] for i in range(len(block_1.properties.values)) for j in + # range(len(block_2.properties.values))] + # [0, 1, 2], [0, 1] -> [[0, 1], [0, 2], [1, 0], [1, 1], [2, 0], [2, 1]] + block_1_block_2_product_idx = _dispatch.cartesian_prod( + _dispatch.int_range_like( + 0, len(block_2.properties.values), like=block_2.values + ), + _dispatch.int_range_like( + 0, len(block_1.properties.values), like=block_1.values + ), + ) # Create a TensorBlock combined_block = TensorBlock( @@ -506,7 +601,7 @@ def _combine_blocks_same_samples( Labels( names=["spherical_harmonics_m"], values=_dispatch.int_range_like( - min_val=-lambda_, max_val=lambda_ + 1, like=block_1.values + min_val=-lambda_, max_val=lambda_ + 1, like=block_1.samples.values ).reshape(-1, 1), ), ], @@ -514,11 +609,11 @@ def _combine_blocks_same_samples( names=prop_names, values=_dispatch.int_array_like( [ - _dispatch.concatenate((b2, b1)) - for b2 in block_2.properties.values - for b1 in block_1.properties.values + _dispatch.to_int_list(block_2.properties.values[indices[0]]) + + _dispatch.to_int_list(block_1.properties.values[indices[1]]) + for indices in block_1_block_2_product_idx ], - like=block_1.values, + block_1.samples.values, ), ), ) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py deleted file mode 100644 index 839ea7f3e..000000000 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py +++ /dev/null @@ -1,134 +0,0 @@ -""" -Module containing dispatch functions for numpy/torch CG combination operations. -""" - -from typing import List, Optional - -import numpy as np - - -try: - import torch - from torch import Tensor as TorchTensor -except ImportError: - - class TorchTensor: - pass - - -UNKNOWN_ARRAY_TYPE = ( - "unknown array type, only numpy arrays and torch tensors are supported" -) - - -def unique(array, axis: Optional[int] = None): - """Find the unique elements of an array.""" - if isinstance(array, TorchTensor): - return torch.unique(array, dim=axis) - elif isinstance(array, np.ndarray): - return np.unique(array, axis=axis) - - -def int_range_like(min_val, max_val, like): - """Returns an array of integers from min to max, non-inclusive, based on the - type of `like`""" - if isinstance(like, TorchTensor): - return torch.arange(min_val, max_val, dtype=torch.int64, device=like.device) - elif isinstance(like, np.ndarray): - return np.arange(min_val, max_val).astype(np.int64) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def int_array_like(int_list: List[int], like): - """ - Converts the input list of int to a numpy array or torch tensor - based on the type of `like`. - """ - if isinstance(like, TorchTensor): - return torch.tensor(int_list, dtype=torch.int64, device=like.device) - elif isinstance(like, np.ndarray): - return np.array(int_list).astype(np.int64) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def concatenate(arrays, axis: Optional[int] = 0): - """Concatenate arrays along an axis.""" - if isinstance(arrays[0], TorchTensor): - return torch.cat(arrays, dim=axis) - elif isinstance(arrays[0], np.ndarray): - return np.concatenate(arrays, axis=axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def all(array, axis: Optional[int] = None): - """Test whether all array elements along a given axis evaluate to True. - - This function has the same behavior as - ``np.all(array,axis=axis)``. - """ - if isinstance(array, bool): - array = np.array(array) - if isinstance(array, list): - array = np.array(array) - - if isinstance(array, TorchTensor): - # torch.all has two implementation, and picks one depending if more than one - # parameter is given. The second one does not supports setting dim to `None` - if axis is None: - return torch.all(input=array) - else: - return torch.all(input=array, dim=axis) - elif isinstance(array, np.ndarray): - return np.all(a=array, axis=axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def any(array): - """Test whether any array elements along a given axis evaluate to True. - - This function has the same behavior as - ``np.any(array)``. - """ - if isinstance(array, bool): - array = np.array(array) - if isinstance(array, list): - array = np.array(array) - if isinstance(array, TorchTensor): - return torch.any(array) - elif isinstance(array, np.ndarray): - return np.any(array) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def zeros_like(shape, like): - """Return an array of zeros with the same shape and type as a given array. - - This function has the same behavior as - ``np.zeros_like(array)``. - """ - if isinstance(like, TorchTensor): - return torch.zeros( - shape, - requires_grad=like.requires_grad, - dtype=like.dtype, - device=like.device, - ) - elif isinstance(like, np.ndarray): - return np.zeros(shape, dtype=like.dtype) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def swapaxes(array, axis0: int, axis1: int): - """Swaps axes of an array.""" - if isinstance(array, TorchTensor): - return torch.swapaxes(array, axis0, axis1) - elif isinstance(array, np.ndarray): - return np.swapaxes(array, axis0, axis1) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py index bf4c0c42f..a2181186b 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py @@ -6,255 +6,384 @@ from typing import List, Optional, Union -from metatensor import Labels, TensorMap +import numpy as np -from . import _cg_cache, _clebsch_gordan, _dispatch +from .. import _dispatch +from .._backend import ( + Labels, + LabelsEntry, + TensorBlock, + TensorMap, + TorchModule, + TorchScriptClass, + torch_jit_export, + torch_jit_is_scripting, +) +from . import _cg_cache, _clebsch_gordan +try: + import mops # noqa F401 + + HAS_MOPS = True +except ImportError: + HAS_MOPS = False + +try: + import torch + + HAS_TORCH = True +except ImportError: + HAS_TORCH = False + # ====================================================================== # ===== Public API functions # ====================================================================== -def correlate_density( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, -) -> Union[TensorMap, List[TensorMap]]: +class DensityCorrelations(TorchModule): """ - Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor - with itself up to the desired correlation order. Returns - :py:class:`TensorMap`(s) corresponding to the density correlations output - from the specified iteration(s). - - A density descriptor necessarily is body order 2 (i.e. correlation order 1), - but can be single- or multi-center. The output is a :py:class:`list` of - density correlations for each iteration specified in `output_selection`, up - to the target order passed in `correlation_order`. By default only the last - correlation (i.e. the correlation of order ``correlation_order``) is - returned. + Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with + itself up to the desired correlation order. Returns :py:class:`TensorMap` + corresponding to the density correlations output from the specified iteration(s). + + A density descriptor necessarily is body order 2 (i.e. correlation order 1), but can + be single- or multi-center. The output is a :py:class:`list` of density correlations + for each iteration specified in `output_selection`, up to the target order passed in + `correlation_order`. By default only the last correlation (i.e. the correlation of + order ``correlation_order``) is returned. This function is an iterative special case of the more general - :py:func:`correlate_tensors`. As a density is being correlated with itself, - some redundant CG tensor products can be skipped with the `skip_redundant` - keyword. + :py:func:`correlate_tensors`. As a density is being correlated with itself, some + redundant CG tensor products can be skipped with the `skip_redundant` keyword. Selections on the angular and parity channels at each iteration can also be controlled with arguments `angular_cutoff`, `angular_selection` and `parity_selection`. - :param density: A density descriptor of body order 2 (correlation order 1), - in :py:class:`TensorMap` format. This may be, for example, a rascaline - :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. - Alternatively, this could be multi-center descriptor, such as a pair - density. - :param correlation_order: The desired correlation order of the output - descriptor. Must be >= 1. - :param angular_cutoff: The maximum angular channel to compute at any given - CG iteration, applied globally to all iterations until the target - correlation order is reached. - :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]` - specifying the angular and/or parity channels to output at each - iteration. All :py:class:`Labels` objects passed here must only contain - key names "spherical_harmonics_l" and "inversion_sigma". If a single - :py:class:`Labels` object is passed, this is applied to the final - iteration only. If a :py:class:`list` of :py:class:`Labels` objects is - passed, each is applied to its corresponding iteration. If None is - passed, all angular and parity channels are output at each iteration, - with the global `angular_cutoff` applied if specified. - :param skip_redundant: Whether to skip redundant CG combinations. Defaults - to False, which means all combinations are performed. If a - :py:class:`list` of :py:class:`bool` is passed, this is applied to each - iteration. If a single :py:class:`bool` is passed, this is applied to - all iterations. - :param output_selection: A :py:class:`list` of :py:class:`bool` specifying - whether to output a :py:class:`TensorMap` for each iteration. If a - single :py:class:`bool` is passed as True, outputs from all iterations - will be returned. If a :py:class:`list` of :py:class:`bool` is passed, - this controls the output at each corresponding iteration. If None is - passed, only the final iteration is output. - - :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the - density correlations output from the specified iterations. If the output - from a single iteration is requested, a :py:class:`TensorMap` is - returned instead. - """ - return _correlate_density( - density, - correlation_order, - angular_cutoff, - selected_keys, - skip_redundant, - output_selection, - compute_metadata_only=False, - sparse=True, # sparse CG cache by default - ) - - -def correlate_density_metadata( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, -) -> Union[TensorMap, List[TensorMap]]: - """ - Returns the metadata-only :py:class:`TensorMap`(s) that would be output by - the function :py:func:`correlate_density` under the same settings, without - perfoming the actual Clebsch-Gordan tensor products. See this function for - full documentation. - """ + :param max_angular: The maximum angular order for which CG coefficients should be + computed and stored. This must be large enough to cover the maximum angular + order reached in the CG iterations on a density input to the :py:meth:`compute` + method. + :param correlation_order: The desired correlation order of the output descriptor. + Must be >= 1. + :param angular_cutoff: The maximum angular channel to compute at any given CG + iteration, applied globally to all iterations until the target correlation order + is reached. + :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]` specifying + the angular and/or parity channels to output at each iteration. All + :py:class:`Labels` objects passed here must only contain key names + "spherical_harmonics_l" and "inversion_sigma". If a single :py:class:`Labels` + object is passed, this is applied to the final iteration only. If a + :py:class:`list` of :py:class:`Labels` objects is passed, each is applied to its + corresponding iteration. If None is passed, all angular and parity channels are + output at each iteration, with the global `angular_cutoff` applied if specified. + :param skip_redundant: Whether to skip redundant CG combinations. Defaults to False, + which means all combinations are performed. If a :py:class:`list` of + :py:class:`bool` is passed, this is applied to each iteration. If a single + :py:class:`bool` is passed, this is applied to all iterations. + :param output_selection: A :py:class:`list` of :py:class:`bool` specifying whether + to output a :py:class:`TensorMap` for each iteration. If a single + :py:class:`bool` is passed as True, outputs from all iterations will be + returned. If a :py:class:`list` of :py:class:`bool` is passed, this controls the + output at each corresponding iteration. If None is passed, only the final + iteration is output. + :param arrays_backend: Determines the array backend, either "numpy" or "torch". + :param cg_backend: Determines the backend for the CG combination. It can be even + "python-sparse", "python-dense" or "mops". If the CG combination performs on the + sparse coefficients, it means that for each (l1, l2, lambda) block the (m1, m2, + mu) coefficients are stored in a sparse format only storing the nonzero + coefficients. If the parameter are None, the most optimal choice is determined + given available packages and ``arrays_backend``. - return _correlate_density( - density, - correlation_order, - angular_cutoff, - selected_keys, - skip_redundant, - output_selection, - compute_metadata_only=True, - ) - - -# ==================================================================== -# ===== Private functions that do the work on the TensorMap level -# ==================================================================== - - -def _correlate_density( - density: TensorMap, - correlation_order: int, - angular_cutoff: Optional[int] = None, - selected_keys: Optional[Union[Labels, List[Labels]]] = None, - skip_redundant: Optional[Union[bool, List[bool]]] = False, - output_selection: Optional[Union[bool, List[bool]]] = None, - compute_metadata_only: bool = False, - sparse: bool = True, -) -> Union[TensorMap, List[TensorMap]]: - """ - Performs the density correlations for public functions - :py:func:`correlate_density` and :py:func:`correlate_density_metadata`. + - "python-dense": Uses the python implementation performing the combinations + with the dense CG coefficients. + - "python-sparse": Uses the python implementation performing the + combinations with the sparse CG coefficients. + - "mops": Uses the ``mops`` package to optimize the sparse combinations. At + the moment it is only available with ``arrays_backend="numpy"`` + + :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the density + correlations output from the specified iterations. If the output from a single + iteration is requested, a :py:class:`TensorMap` is returned instead. """ - # Check inputs - if correlation_order <= 1: - raise ValueError("`correlation_order` must be > 1") - # TODO: implement combinations of gradients too - if _dispatch.any([len(list(block.gradients())) > 0 for block in density]): - raise NotImplementedError( - "Clebsch Gordan combinations with gradients not yet implemented." - " Use metatensor.remove_gradients to remove gradients from the input." - ) - # Check metadata - if not ( - _dispatch.all(density.keys.names == ["spherical_harmonics_l", "species_center"]) - or _dispatch.all( - density.keys.names - == ["spherical_harmonics_l", "species_center", "species_neighbor"] - ) + + _selected_keys: List[Union[Labels, None]] + + def __init__( + self, + max_angular: int, + correlation_order: int, + angular_cutoff: Optional[int] = None, + selected_keys: Optional[Union[Labels, List[Labels]]] = None, + skip_redundant: Optional[Union[bool, List[bool]]] = False, + output_selection: Optional[Union[bool, List[bool]]] = None, + arrays_backend: Optional[str] = None, + cg_backend: Optional[str] = None, ): - raise ValueError( - "input `density` must have key names" - ' ["spherical_harmonics_l", "species_center"] or' - ' ["spherical_harmonics_l", "species_center", "species_neighbor"]' + super().__init__() + if arrays_backend is None: + if torch_jit_is_scripting(): + arrays_backend = "torch" + else: + if isinstance(Labels, TorchScriptClass): + arrays_backend = "torch" + else: + arrays_backend = "numpy" + elif arrays_backend == "numpy": + if torch_jit_is_scripting(): + raise ValueError( + "Module is torch scripted but 'numpy' was given as `arrays_backend`" + ) + arrays_backend = "numpy" + elif arrays_backend == "torch": + arrays_backend = "torch" + else: + raise ValueError( + f"Unknown `arrays_backend` {arrays_backend}." + "Only 'numpy' and 'torch' are supported." + ) + + # Choosing the optimal cg combine backend + if cg_backend is None: + if arrays_backend == "torch": + self._cg_backend = "python-dense" + if arrays_backend == "numpy" and HAS_MOPS: + self._cg_backend = "mops" + else: + self._cg_backend = "python-sparse" + elif cg_backend == "python-dense": + self._cg_backend = "python-dense" + elif cg_backend == "python-sparse": + self._cg_backend = "python-sparse" + elif cg_backend == "mops": + if arrays_backend == "torch": + raise NotImplementedError( + "'torch' was determined or given as `arrays_backend` " + "and 'mops' was given as `cg_backend`, " + "but mops does not support torch backend yet" + ) + else: + assert arrays_backend == "numpy" + if not HAS_MOPS: + raise ImportError( + "mops is not installed, but 'mops' was given as `cg_backend`" + ) + self._cg_backend = "mops" + + else: + raise ValueError( + f"Unknown `cg_backend` {cg_backend}." + "Only 'python-dense', 'python-sparse' and 'mops' are supported." + ) + + if max_angular < 0: + raise ValueError( + f"Given `max_angular={max_angular}` negative. " + "Must be greater equal 0." + ) + self._max_angular = max_angular + self._cg_coefficients = _cg_cache.calculate_cg_coefficients( + lambda_max=self._max_angular, + sparse=(self._cg_backend in ["python-sparse", "mops"]), + use_torch=(arrays_backend == "torch"), ) - if not _dispatch.all(density.component_names == ["spherical_harmonics_m"]): - raise ValueError( - "input `density` must have a single component" - " axis with name `spherical_harmonics_m`" + + # Check inputs + if correlation_order <= 1: + raise ValueError("`correlation_order` must be > 1") + self._correlation_order = correlation_order + + n_iterations = correlation_order - 1 # num iterations + # Parse the selected keys + self._angular_cutoff = angular_cutoff + + if arrays_backend == "torch": + array_like = torch.empty(0) + elif arrays_backend == "numpy": + array_like = np.empty(0) + + self._selected_keys: List[Union[Labels, None]] = ( + _clebsch_gordan._parse_selected_keys( + n_iterations=n_iterations, + array_like=array_like, + angular_cutoff=self._angular_cutoff, + selected_keys=selected_keys, + ) ) - n_iterations = correlation_order - 1 # num iterations - density = _clebsch_gordan._standardize_keys(density) # standardize metadata - density_correlation = density # create a copy to combine with itself - - # Parse the selected keys - selected_keys = _clebsch_gordan._parse_selected_keys( - n_iterations=n_iterations, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - like=density.keys.values, - ) - # Parse the bool flags that control skipping of redundant CG combinations - # and TensorMap output from each iteration - skip_redundant, output_selection = _clebsch_gordan._parse_bool_iteration_filters( - n_iterations, - skip_redundant=skip_redundant, - output_selection=output_selection, - ) - - # Pre-compute the keys needed to perform each CG iteration - key_metadata = _clebsch_gordan._precompute_keys( - density.keys, - density.keys, - n_iterations=n_iterations, - selected_keys=selected_keys, - skip_redundant=skip_redundant, - ) - # Compute CG coefficient cache - if compute_metadata_only: - cg_cache = None - else: - angular_max = max( - _dispatch.concatenate( - [density.keys.column("spherical_harmonics_l")] - + [mdata[2].column("spherical_harmonics_l") for mdata in key_metadata] + # Parse the bool flags that control skipping of redundant CG combinations + # and TensorMap output from each iteration + self._skip_redundant, self._output_selection = ( + _clebsch_gordan._parse_bool_iteration_filters( + n_iterations, + skip_redundant=skip_redundant, + output_selection=output_selection, ) ) - # TODO: keys have been precomputed, so perhaps we don't need to - # compute all CG coefficients up to angular_max here. - # TODO: use sparse cache by default until we understand under which - # circumstances (and if) dense is faster. - cg_cache = _cg_cache.ClebschGordanReal(angular_max, sparse=sparse) - - # Perform iterative CG tensor products - density_correlations = [] - for iteration in range(n_iterations): - # Define the correlation order of the current iteration - correlation_order_it = iteration + 2 - - # Combine block pairs - blocks_out = [] - for key_1, key_2, key_out in zip(*key_metadata[iteration]): - block_out = _clebsch_gordan._combine_blocks_same_samples( - density_correlation[key_1], - density[key_2], - key_out["spherical_harmonics_l"], - cg_cache, - compute_metadata_only=compute_metadata_only, + + def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: + """ + Calls the :py:meth:`DensityCorrelations.compute` function. + + This is intended for :py:class:`torch.nn.Module` compatibility, and should be + ignored in pure Python mode. + """ + return self.compute(density) + + def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]: + """ + Computes the density correlations by taking iterative Clebsch-Gordan (CG) tensor + products of the input `density` descriptor with itself. + + :param density: A density descriptor of body order 2 (correlation order 1), in + :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. + """ + return self._correlate_density( + density, + compute_metadata=False, + ) + + @torch_jit_export + def compute_metadata( + self, + density: TensorMap, + ) -> Union[TensorMap, List[TensorMap]]: + """ + Returns the metadata-only :py:class:`TensorMap` that would be output by the + function :py:meth:`compute` for the same calculator under the same settings, + without performing the actual Clebsch-Gordan tensor products. + + :param density: A density descriptor of body order 2 (correlation order 1), in + :py:class:`TensorMap` format. This may be, for example, a rascaline + :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`. + Alternatively, this could be multi-center descriptor, such as a pair + density. + """ + return self._correlate_density( + density, + compute_metadata=True, + ) + + # ==================================================================== + # ===== Private functions that do the work on the TensorMap level + # ==================================================================== + def _correlate_density( + self, density: TensorMap, compute_metadata: bool + ) -> Union[TensorMap, List[TensorMap]]: + + # Check metadata + if not ( + density.keys.names == ["spherical_harmonics_l", "species_center"] + or density.keys.names + == ["spherical_harmonics_l", "species_center", "species_neighbor"] + ): + raise ValueError( + "input `density` must have key names" + ' ["spherical_harmonics_l", "species_center"] or' + ' ["spherical_harmonics_l", "species_center", "species_neighbor"]' ) - blocks_out.append(block_out) - keys_out = key_metadata[iteration][2] - density_correlation = TensorMap(keys=keys_out, blocks=blocks_out) - - # If this tensor is to be included in the output, move the [l1, l2, ...] - # keys to properties and store - if output_selection[iteration]: - density_correlations.append( - density_correlation.keys_to_properties( - [f"l{i}" for i in range(1, correlation_order_it + 1)] - + [f"k{i}" for i in range(2, correlation_order_it)] - ) + if not density.component_names == ["spherical_harmonics_m"]: + raise ValueError( + "input `density` must have a single component" + " axis with name `spherical_harmonics_m`" ) + n_iterations = self._correlation_order - 1 # num iterations + density = _clebsch_gordan._standardize_keys(density) # standardize metadata + density_correlation = density # create a copy to combine with itself - # Drop redundant key names. TODO: these should be part of the global - # matadata associated with the TensorMap. Awaiting this functionality in - # metatensor. - for i, tensor in enumerate(density_correlations): - keys = tensor.keys - if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1: - keys = keys.remove(name="order_nu") - if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1: - keys = keys.remove(name="inversion_sigma") - density_correlations[i] = TensorMap( - keys=keys, blocks=[b.copy() for b in tensor.blocks()] + # TODO: implement combinations of gradients too + # we have to create a bool array with dispatch to be TorchScript compatible + contains_gradients = all( + [len(list(block.gradients())) > 0 for _, block in density.items()] ) + if contains_gradients: + raise NotImplementedError( + "Clebsch Gordan combinations with gradients not yet implemented." + " Use metatensor.remove_gradients to remove gradients from the input." + ) + # Pre-compute the keys needed to perform each CG iteration + key_metadata = _clebsch_gordan._precompute_keys( + density.keys, + density.keys, + n_iterations=n_iterations, + selected_keys=self._selected_keys, + skip_redundant=self._skip_redundant, + ) + max_angular = max( + _dispatch.max(density.keys.column("spherical_harmonics_l")), + max( + [ + int(_dispatch.max(mdata[2].column("spherical_harmonics_l"))) + for mdata in key_metadata + ] + ), + ) + if self._max_angular < max_angular: + raise ValueError( + f"The density you provide requires max_angular={max_angular} " + f"but on initialization max_angular={self._max_angular} was given" + ) + + # Perform iterative CG tensor products + density_correlations: List[TensorMap] = [] + if compute_metadata: + cg_backend = "metadata" + else: + cg_backend = self._cg_backend + + for iteration in range(n_iterations): + # Define the correlation order of the current iteration + correlation_order_it = iteration + 2 + + # Combine block pairs + blocks_out: List[TensorBlock] = [] + key_metadata_i = key_metadata[iteration] + for j in range(len(key_metadata_i[0])): + key_1: LabelsEntry = key_metadata_i[0][j] + key_2: LabelsEntry = key_metadata_i[1][j] + lambda_out: int = int( + key_metadata_i[2].column("spherical_harmonics_l")[j] + ) + block_out = _clebsch_gordan._combine_blocks_same_samples( + density_correlation.block(key_1), + density.block(key_2), + lambda_out, + self._cg_coefficients, + cg_backend, + ) + blocks_out.append(block_out) + keys_out = key_metadata[iteration][2] + density_correlation = TensorMap(keys=keys_out, blocks=blocks_out) + + # If this tensor is to be included in the output, move the [l1, l2, ...] + # keys to properties and store + if self._output_selection[iteration]: + density_correlations.append( + density_correlation.keys_to_properties( + [f"l{i}" for i in range(1, correlation_order_it + 1)] + + [f"k{i}" for i in range(2, correlation_order_it)] + ) + ) + + # Drop redundant key names. TODO: these should be part of the global + # metadata associated with the TensorMap. Awaiting this functionality in + # metatensor. + for i, tensor in enumerate(density_correlations): + keys = tensor.keys + if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1: + keys = keys.remove(name="order_nu") + if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1: + keys = keys.remove(name="inversion_sigma") + density_correlations[i] = TensorMap( + keys=keys, blocks=[b.copy() for b in tensor.blocks()] + ) - # Return a single TensorMap in the simple case - if len(density_correlations) == 1: - return density_correlations[0] + # Return a single TensorMap in the simple case + if len(density_correlations) == 1: + return density_correlations[0] - # Otherwise return a list of TensorMaps - return density_correlations + # Otherwise return a list of TensorMaps + return density_correlations diff --git a/python/rascaline/rascaline/utils/power_spectrum/__init__.py b/python/rascaline/rascaline/utils/power_spectrum/__init__.py index a6d2d6f3f..bf9f8f5d7 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/__init__.py +++ b/python/rascaline/rascaline/utils/power_spectrum/__init__.py @@ -1,4 +1 @@ -from .calculator import PowerSpectrum - - -__all__ = ["PowerSpectrum"] +from .calculator import PowerSpectrum # noqa: F401 diff --git a/python/rascaline/rascaline/utils/power_spectrum/_classes.py b/python/rascaline/rascaline/utils/power_spectrum/_classes.py deleted file mode 100644 index 01ea036d2..000000000 --- a/python/rascaline/rascaline/utils/power_spectrum/_classes.py +++ /dev/null @@ -1,13 +0,0 @@ -from metatensor import Labels, TensorBlock, TensorMap - -from ...calculator_base import CalculatorBase -from ...systems import IntoSystem - - -__all__ = [ - "CalculatorBase", - "IntoSystem", - "Labels", - "TensorBlock", - "TensorMap", -] diff --git a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py b/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py deleted file mode 100644 index 397f354e9..000000000 --- a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Helper functions to dispatch methods between numpy and torch. - -The functions are similar to those in metatensor-operations. Missing functions may -already exist there. Functions are ordered alphabetically. -""" - -from typing import List, Optional - -import numpy as np - - -try: - import torch - from torch import Tensor as TorchTensor -except ImportError: - - class TorchTensor: - pass - - -UNKNOWN_ARRAY_TYPE = ( - "unknown array type, only numpy arrays and torch tensors are supported" -) - - -def _check_all_torch_tensor(arrays: List[TorchTensor]): - for array in arrays: - if not isinstance(array, TorchTensor): - raise TypeError( - f"expected argument to be a torch.Tensor, but got {type(array)}" - ) - - -def _check_all_np_ndarray(arrays): - for array in arrays: - if not isinstance(array, np.ndarray): - raise TypeError( - f"expected argument to be a np.ndarray, but got {type(array)}" - ) - - -def concatenate(arrays: List[TorchTensor], axis: int): - """ - Concatenate a group of arrays along a given axis. - - This function has the same behavior as ``numpy.concatenate(arrays, axis)`` - and ``torch.concatenate(arrays, axis)``. - - Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to - :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any - axis index > 0 is valid. - """ - if isinstance(arrays[0], TorchTensor): - _check_all_torch_tensor(arrays) - return torch.concatenate(arrays, axis) - elif isinstance(arrays[0], np.ndarray): - _check_all_np_ndarray(arrays) - return np.concatenate(arrays, axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an uninitialized array, with the given ``shape``, and similar dtype, - device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.empty_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - return torch.empty( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.empty_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def list_to_array(array, data: List[List[int]]): - """Create an object from data with the same type as ``array``.""" - if isinstance(array, TorchTensor): - return torch.tensor(data) - elif isinstance(array, np.ndarray): - return np.array(data) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def matmul(a, b): - """Matrix product of two arrays.""" - if isinstance(a, TorchTensor): - _check_all_torch_tensor([b]) - return torch.matmul(a, b) - elif isinstance(a, np.ndarray): - _check_all_np_ndarray([b]) - return np.matmul(a, b) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def to_index_array(array): - """Returns an array that is suitable for indexing a dimension of - a different array. - After a few checks (int, 1D), this operation will convert the dtype to - torch.long (which is, in some torch versions, the only acceptable type - of index tensor). Numpy arrays are left unchanged. - """ - if len(array.shape) != 1: - raise ValueError("Index arrays must be 1D") - - if isinstance(array, TorchTensor): - if torch.is_floating_point(array): - raise ValueError("Index arrays must be integers") - return array.to(torch.long) - elif isinstance(array, np.ndarray): - if not np.issubdtype(array.dtype, np.integer): - raise ValueError("Index arrays must be integers") - return array - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def unique(array, axis: Optional[int] = None): - """Find the unique elements of an array.""" - if isinstance(array, TorchTensor): - return torch.unique(array, dim=axis) - elif isinstance(array, np.ndarray): - return np.unique(array, axis=axis) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) - - -def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False): - """ - Create an array filled with zeros, with the given ``shape``, and similar - dtype, device and other options as ``array``. - - If ``shape`` is :py:obj:`None`, the array shape is used instead. - ``requires_grad`` is only used for torch tensors, and set the corresponding - value on the returned array. - - This is the equivalent to ``np.zeros_like(array, shape=shape)``. - """ - if isinstance(array, TorchTensor): - if shape is None: - shape = array.size() - - return torch.zeros( - shape, - dtype=array.dtype, - layout=array.layout, - device=array.device, - ).requires_grad_(requires_grad) - elif isinstance(array, np.ndarray): - return np.zeros_like(array, shape=shape, subok=False) - else: - raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/power_spectrum/calculator.py b/python/rascaline/rascaline/utils/power_spectrum/calculator.py index d96a65e55..f64bbb7c1 100644 --- a/python/rascaline/rascaline/utils/power_spectrum/calculator.py +++ b/python/rascaline/rascaline/utils/power_spectrum/calculator.py @@ -2,11 +2,18 @@ from math import sqrt from typing import List, Optional, Union -from . import _dispatch -from ._classes import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap - - -class PowerSpectrum: +from .. import _dispatch +from .._backend import ( + CalculatorBase, + IntoSystem, + Labels, + TensorBlock, + TensorMap, + TorchModule, +) + + +class PowerSpectrum(TorchModule): r"""General power spectrum of one or of two calculators. If ``calculator_2`` is provided, the invariants :math:`p_{nl}` are generated by @@ -124,6 +131,7 @@ def __init__( calculator_2: Optional[CalculatorBase] = None, species: Optional[List[int]] = None, ): + super().__init__() self.calculator_1 = calculator_1 self.calculator_2 = calculator_2 self.species = species @@ -310,6 +318,25 @@ def compute( return TensorMap(new_keys, new_blocks).keys_to_properties("l") + def forward( + self, + systems: Union[IntoSystem, List[IntoSystem]], + gradients: Optional[List[str]] = None, + use_native_system: bool = True, + ) -> TensorMap: + """ + Calls the :py:meth:`PowerSpectrum.compute` function. + + This is intended for :py:class:`torch.nn.Module` compatibility, and should be + ignored in pure Python mode. + """ + + return self.compute( + systems=systems, + gradients=gradients, + use_native_system=use_native_system, + ) + def _positions_gradients( new_block: TensorBlock, block_1: TensorBlock, block_2: TensorBlock, factor: float diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index 7822d159e..59da6ba02 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -8,14 +8,10 @@ from metatensor import Labels, TensorBlock, TensorMap import rascaline -from rascaline.utils import PowerSpectrum -from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal +from rascaline.utils import PowerSpectrum, _dispatch +from rascaline.utils.clebsch_gordan._cg_cache import calculate_cg_coefficients from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys -from rascaline.utils.clebsch_gordan.correlate_density import ( - _correlate_density, - correlate_density, - correlate_density_metadata, -) +from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations # Try to import some modules @@ -35,11 +31,29 @@ HAS_SYMPY = True except ImportError: HAS_SYMPY = False +try: + from mops import sparse_accumulation_of_products as sap # noqa F401 + + HAS_MOPS = True +except ImportError: + HAS_MOPS = False if HAS_SYMPY: from .rotations import WignerDReal, transform_frame_o3, transform_frame_so3 +try: + import torch + + HAS_TORCH = True +except ImportError: + HAS_TORCH = False + +if HAS_TORCH: + ARRAYS_BACKEND = ["numpy", "torch"] +else: + ARRAYS_BACKEND = ["numpy"] + DATA_ROOT = os.path.join(os.path.dirname(__file__), "data") SPHEX_HYPERS = { @@ -63,32 +77,48 @@ } -# ============ Pytest fixtures ============ - - -@pytest.fixture() -def cg_cache_sparse(): - return ClebschGordanReal(lambda_max=5, sparse=True) - - -@pytest.fixture() -def cg_cache_dense(): - return ClebschGordanReal(lambda_max=5, sparse=False) - - # ============ Helper functions ============ def h2_isolated(): - return ase.io.read(os.path.join(DATA_ROOT, "h2_isolated.xyz"), ":") + return [ + ase.Atoms( + symbols=["H", "H"], + positions=[ + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] def h2o_isolated(): - return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":") + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + ) + ] def h2o_periodic(): - return ase.io.read(os.path.join(DATA_ROOT, "h2o_periodic.xyz"), ":") + + return [ + ase.Atoms( + symbols=["O", "H", "H"], + positions=[ + [2.56633400, 2.50000000, 2.50370100], + [1.97361700, 1.73067300, 2.47063400], + [1.97361700, 3.26932700, 2.47063400], + ], + cell=[5, 5, 5], + pbc=[True, True, True], + ) + ] def wigner_d_matrices(lmax: int): @@ -169,19 +199,14 @@ def test_so3_equivariance(): nu_1 = spherical_expansion(frames) nu_1_so3 = spherical_expansion(frames_so3) - - nu_3 = correlate_density( - density=nu_1, - correlation_order=nu_target, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - ) - nu_3_so3 = correlate_density( - density=nu_1_so3, + corr_calculator = DensityCorrelations( + max_angular=3, correlation_order=nu_target, angular_cutoff=angular_cutoff, selected_keys=selected_keys, ) + nu_3 = corr_calculator.compute(nu_1) + nu_3_so3 = corr_calculator.compute(nu_1_so3) nu_3_transf = wig.transform_tensormap_so3(nu_3) assert metatensor.allclose(nu_3_transf, nu_3_so3) @@ -203,18 +228,14 @@ def test_o3_equivariance(): nu_1 = spherical_expansion(frames) nu_1_o3 = spherical_expansion(frames_o3) - nu_3 = correlate_density( - density=nu_1, - correlation_order=nu_target, - angular_cutoff=angular_cutoff, - selected_keys=selected_keys, - ) - nu_3_o3 = correlate_density( - density=nu_1_o3, + corr_calculator = DensityCorrelations( + max_angular=angular_cutoff, correlation_order=nu_target, angular_cutoff=angular_cutoff, selected_keys=selected_keys, ) + nu_3 = corr_calculator.compute(nu_1) + nu_3_o3 = corr_calculator.compute(nu_1_o3) nu_3_transf = wig.transform_tensormap_o3(nu_3) assert metatensor.allclose(nu_3_transf, nu_3_o3) @@ -238,13 +259,14 @@ def test_lambda_soap_vs_powerspectrum(): # Build a lambda-SOAP density = spherical_expansion(frames) - lsoap = correlate_density( - density=density, + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS["max_angular"], correlation_order=2, selected_keys=Labels( names=["spherical_harmonics_l"], values=np.array([0]).reshape(-1, 1) ), ) + lsoap = corr_calculator.compute(density) keys = lsoap.keys.remove(name="spherical_harmonics_l") lsoap = TensorMap(keys=keys, blocks=[b.copy() for b in lsoap.blocks()]) @@ -288,21 +310,23 @@ def test_correlate_density_norm(correlation_order): nu1 = spherical_expansion_small(frames) # Build higher body order tensor without sorting the l lists - nux = correlate_density( - nu1, + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, skip_redundant=False, ) - # Build higher body order tensor *with* sorting the l lists - nux_sorted_l = correlate_density( - nu1, + corr_calculator_skip_redundant = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, correlation_order=correlation_order, angular_cutoff=None, selected_keys=None, skip_redundant=True, ) + nux = corr_calculator.compute(nu1) + # Build higher body order tensor *with* sorting the l lists + nux_sorted_l = corr_calculator_skip_redundant.compute(nu1) # Standardize the features by passing through the CG combination code but with # no iterations (i.e. body order 1 -> 1) @@ -349,7 +373,8 @@ def test_correlate_density_norm(correlation_order): @pytest.mark.parametrize("l1, l2", [(1, 2), (2, 3), (0, 5)]) -def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): +@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND) +def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): """ Test orthogonality relationships of cached dense CG coefficients. @@ -357,35 +382,73 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): https://en.wikipedia.org/wiki/Clebsch%E2%80%93Gordan_coefficients#Orthogonality_relations for details. """ + cg_coeffs = calculate_cg_coefficients( + lambda_max=5, sparse=False, use_torch=arrays_backend == "torch" + ) + lam_min = abs(l1 - l2) lam_max = l1 + l2 + if arrays_backend == "torch": + int64_like = torch.empty(0, dtype=torch.int64) + float64_like = torch.empty(0, dtype=torch.float64) + bool_like = torch.empty(0, dtype=torch.bool) + elif arrays_backend == "numpy": + int64_like = np.empty(0, dtype=np.int64) + float64_like = np.empty(0, dtype=np.float64) + bool_like = np.empty(0, dtype=np.bool_) + else: + raise ValueError(f"Not supported arrays backend {arrays_backend}.") # We test lam dimension # \sum_{-m1 \leq l1 \leq m1, -m2 \leq l2 \leq m2} # <λμ|l1m1,l2m2> = δ_μμ' for lam in range(lam_min, lam_max): - cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1) + cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape( + -1, 2 * lam + 1 + ) dot_product = cg_mat.T @ cg_mat - diag_mask = np.zeros(dot_product.shape, dtype=np.bool_) - diag_mask[np.diag_indices(len(dot_product))] = True - assert np.allclose( - dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask] + diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) + diag_indices = ( + _dispatch.int_range_like(0, len(dot_product), int64_like), + _dispatch.int_range_like(0, len(dot_product), int64_like), + ) + diag_mask[diag_indices] = True + assert _dispatch.allclose( + dot_product[~diag_mask], + _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask], + rtol=1e-05, + atol=1e-08, + ) + assert _dispatch.allclose( + dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08 ) - assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0]) # We test l1 l2 dimension # \sum_{|l1-l2| \leq λ \leq l1+l2} \sum_{-μ \leq λ \leq μ} # <λμ|l1m1,l2m2> = δ_m1m1' δ_m2m2' l1l2_dim = (2 * l1 + 1) * (2 * l2 + 1) - dot_product = np.zeros((l1l2_dim, l1l2_dim)) + dot_product = _dispatch.zeros_like(float64_like, (l1l2_dim, l1l2_dim)) for lam in range(lam_min, lam_max + 1): - cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1) + cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape( + -1, 2 * lam + 1 + ) dot_product += cg_mat @ cg_mat.T - diag_mask = np.zeros(dot_product.shape, dtype=np.bool_) - diag_mask[np.diag_indices(len(dot_product))] = True + diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape) + diag_indices = ( + _dispatch.int_range_like(0, len(dot_product), int64_like), + _dispatch.int_range_like(0, len(dot_product), int64_like), + ) + diag_mask[diag_indices] = True - assert np.allclose(dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask]) - assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0]) + assert _dispatch.allclose( + dot_product[~diag_mask], + _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask], + rtol=1e-05, + atol=1e-08, + ) + assert _dispatch.allclose( + dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08 + ) @pytest.mark.skipif( @@ -393,28 +456,57 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2): ) def test_correlate_density_dense_sparse_agree(): """ - Tests for agreement between nu=3 tensors built using both sparse and dense + Tests for agreement between nu=2 tensors built using both sparse and dense CG coefficient caches. """ frames = h2o_periodic() density = spherical_expansion_small(frames) + correlation_order = 2 + corr_calculator_sparse = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="python-sparse", + ) + corr_calculator_dense = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="python-dense", + ) # NOTE: testing the private function here so we can control the use of # sparse v dense CG cache - n_body_sparse = _correlate_density( - density, - correlation_order=2, - compute_metadata_only=False, - sparse=True, + n_body_sparse = corr_calculator_sparse.compute(density) + n_body_dense = corr_calculator_dense.compute(density) + + assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8) + + +@pytest.mark.skipif(not HAS_MOPS, reason="mops is not installed") +def test_correlate_density_mops_python_sparse_agree(): + """ + Tests for agreement between nu=2 tensors built using both "python-sparse" + and "mops" CG backend. + """ + frames = h2o_periodic() + density = spherical_expansion_small(frames) + + correlation_order = 2 + corr_calculator_python = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="python-sparse", ) - n_body_dense = _correlate_density( - density, - correlation_order=2, - compute_metadata_only=False, - sparse=False, + corr_calculator_mops = DensityCorrelations( + max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order, + correlation_order=correlation_order, + cg_backend="mops", ) + # NOTE: testing the private function here so we can control the use of + # sparse v dense CG cache + n_body_python = corr_calculator_python.compute(density) + n_body_mops = corr_calculator_mops.compute(density) - assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8) + assert metatensor.allclose(n_body_python, n_body_mops, atol=1e-8, rtol=1e-8) # ============ Test metadata ============ @@ -429,26 +521,24 @@ def test_correlate_density_metadata_agree(): :py:func:`correlate_density_metadata` agree. """ frames = h2o_isolated() - correlation_order = 3 skip_redundant = True - for nu1 in [spherical_expansion_small(frames), spherical_expansion(frames)]: - # Build higher body order tensor with CG computation - nux = correlate_density( - nu1, - correlation_order=correlation_order, + + for max_angular, nu1 in [ + (2, spherical_expansion_small(frames)), + (3, spherical_expansion(frames)), + ]: + corr_calculator = DensityCorrelations( + max_angular=max_angular, + correlation_order=3, angular_cutoff=3, selected_keys=None, skip_redundant=skip_redundant, ) + # Build higher body order tensor with CG computation + nux = corr_calculator.compute(nu1) # Build higher body order tensor without CG computation - i.e. metadata # only - nux_metadata_only = correlate_density_metadata( - nu1, - correlation_order=correlation_order, - angular_cutoff=3, - selected_keys=None, - skip_redundant=skip_redundant, - ) + nux_metadata_only = corr_calculator.compute_metadata(nu1) assert metatensor.equal_metadata(nux, nux_metadata_only) @@ -460,9 +550,11 @@ def test_correlate_density_metadata_agree(): ], ) @pytest.mark.parametrize("skip_redundant", [True, False]) +@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND + [None]) def test_correlate_density_angular_selection( selected_keys: Labels, skip_redundant: bool, + arrays_backend: str, ): """ Tests that the correct angular channels are output based on the specified @@ -471,13 +563,18 @@ def test_correlate_density_angular_selection( frames = h2o_isolated() nu_1 = spherical_expansion(frames) - nu_2 = correlate_density( - density=nu_1, - correlation_order=2, + correlation_order = 2 + corr_calculator = DensityCorrelations( + max_angular=SPHEX_HYPERS["max_angular"] * correlation_order, + correlation_order=correlation_order, angular_cutoff=None, selected_keys=selected_keys, skip_redundant=skip_redundant, + arrays_backend=arrays_backend, ) + if arrays_backend is not None: + nu_1 = nu_1.to(arrays=arrays_backend) + nu_2 = corr_calculator.compute(nu_1) if selected_keys is None: assert np.all( diff --git a/python/rascaline/tests/utils/data/h2_isolated.xyz b/python/rascaline/tests/utils/data/h2_isolated.xyz deleted file mode 100644 index ec5f59680..000000000 --- a/python/rascaline/tests/utils/data/h2_isolated.xyz +++ /dev/null @@ -1,4 +0,0 @@ -2 -pbc="F F F" -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/tests/utils/data/h2o_isolated.xyz b/python/rascaline/tests/utils/data/h2o_isolated.xyz deleted file mode 100644 index fc876d2ba..000000000 --- a/python/rascaline/tests/utils/data/h2o_isolated.xyz +++ /dev/null @@ -1,5 +0,0 @@ -3 -pbc="F F F" -O 2.56633400 2.50000000 2.50370100 -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/python/rascaline/tests/utils/data/h2o_periodic.xyz b/python/rascaline/tests/utils/data/h2o_periodic.xyz deleted file mode 100644 index 3374566e6..000000000 --- a/python/rascaline/tests/utils/data/h2o_periodic.xyz +++ /dev/null @@ -1,5 +0,0 @@ -3 -Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" pbc="T T T" -O 2.56633400 2.50000000 2.50370100 -H 1.97361700 1.73067300 2.47063400 -H 1.97361700 3.26932700 2.47063400 diff --git a/tox.ini b/tox.ini index 6d59f3066..4265a7232 100644 --- a/tox.ini +++ b/tox.ini @@ -62,7 +62,7 @@ commands = # note: platform_system can be "Linux","Darwin", or "Windows". description = Run Python unit tests with all dependencies installed (ase, pyscf, - and chemfiles are optional dependencies) + chemfiles and torch are optional dependencies) deps = {[testenv]metatensor-core-requirement} ase @@ -72,6 +72,7 @@ deps = pytest-cov scipy sympy + torch pyscf;platform_system!="Windows" wigners # TODO: add mops once it becomes stable enough (and potentially supports windows) @@ -79,7 +80,6 @@ deps = commands = pytest {[testenv]test_options} {posargs} - [testenv:min-deps] description = Run Python unit tests with the minimal dependencies installed deps = @@ -97,6 +97,7 @@ deps = {[testenv]packaging_deps} {[testenv]metatensor-torch-requirement} + metatensor-operations pytest pytest-cov numpy