diff --git a/docs/src/references/api/python/utils/clebsch-gordan.rst b/docs/src/references/api/python/utils/clebsch-gordan.rst
new file mode 100644
index 000000000..7d1258348
--- /dev/null
+++ b/docs/src/references/api/python/utils/clebsch-gordan.rst
@@ -0,0 +1,5 @@
+Clebsch-Gordan products
+=======================
+
+.. autoclass:: rascaline.utils.DensityCorrelations
+    :members:
diff --git a/docs/src/references/api/python/utils/index.rst b/docs/src/references/api/python/utils/index.rst
index 3d88c4da9..d713fdcb9 100644
--- a/docs/src/references/api/python/utils/index.rst
+++ b/docs/src/references/api/python/utils/index.rst
@@ -11,3 +11,4 @@ Utility functions and classes that extend the core usage of rascaline.
     radial-basis
     power-spectrum
     splines
+    clebsch-gordan
diff --git a/docs/src/references/api/torch/utils/clebsch-gordan.rst b/docs/src/references/api/torch/utils/clebsch-gordan.rst
new file mode 100644
index 000000000..a8cb5299a
--- /dev/null
+++ b/docs/src/references/api/torch/utils/clebsch-gordan.rst
@@ -0,0 +1,5 @@
+Clebsch-Gordan products
+=======================
+
+.. autoclass:: rascaline.torch.utils.DensityCorrelations
+    :members:
diff --git a/docs/src/references/api/torch/utils/index.rst b/docs/src/references/api/torch/utils/index.rst
index 1351283df..a2cf425eb 100644
--- a/docs/src/references/api/torch/utils/index.rst
+++ b/docs/src/references/api/torch/utils/index.rst
@@ -8,3 +8,4 @@ Utility functions and classes that extend the core usage of rascaline-torch
     :maxdepth: 1
 
     power-spectrum
+    clebsch-gordan
diff --git a/python/rascaline-torch/rascaline/torch/__init__.py b/python/rascaline-torch/rascaline/torch/__init__.py
index 226cde60b..0d63d3242 100644
--- a/python/rascaline-torch/rascaline/torch/__init__.py
+++ b/python/rascaline-torch/rascaline/torch/__init__.py
@@ -9,12 +9,12 @@
 
 _load_library()
 
-from . import utils  # noqa
-from .calculator_base import CalculatorModule, register_autograd  # noqa
+from . import utils  # noqa: E402, F401
+from .calculator_base import CalculatorModule, register_autograd  # noqa: E402, F401
 
 # don't forget to also update `rascaline/__init__.py` and
 # `rascaline/torch/calculators.py` when modifying this file
-from .calculators import (  # noqa
+from .calculators import (  # noqa: E402, F401
     AtomicComposition,
     LodeSphericalExpansion,
     NeighborList,
@@ -24,7 +24,7 @@
     SphericalExpansion,
     SphericalExpansionByPair,
 )
-from .system import systems_to_torch  # noqa
+from .system import systems_to_torch  # noqa: E402, F401
 
 
 __all__ = [
diff --git a/python/rascaline-torch/rascaline/torch/utils.py b/python/rascaline-torch/rascaline/torch/utils.py
new file mode 100644
index 000000000..1625581b4
--- /dev/null
+++ b/python/rascaline-torch/rascaline/torch/utils.py
@@ -0,0 +1,88 @@
+import importlib
+import os
+import sys
+from typing import Any
+
+import torch
+from metatensor.torch import Labels, LabelsEntry, TensorBlock, TensorMap
+
+import rascaline.utils
+
+from .calculator_base import CalculatorModule
+from .system import System
+
+
+_HERE = os.path.dirname(__file__)
+
+
+# For details what is happening here take a look an `rascaline.torch.calculators`.
+
+# create the `_backend` module as an empty module
+spec = importlib.util.spec_from_loader(
+    "rascaline.torch.utils._backend",
+    loader=None,
+)
+module = importlib.util.module_from_spec(spec)
+# This module only exposes a handful of things, defined here. Any changes here MUST also
+# be made to the `metatensor/operations/_classes.py` file, which is used in non
+# TorchScript mode.
+module.__dict__["Labels"] = Labels
+module.__dict__["TensorBlock"] = TensorBlock
+module.__dict__["TensorMap"] = TensorMap
+module.__dict__["LabelsEntry"] = LabelsEntry
+module.__dict__["torch_jit_is_scripting"] = torch.jit.is_scripting
+module.__dict__["torch_jit_annotate"] = torch.jit.annotate
+module.__dict__["torch_jit_export"] = torch.jit.export
+module.__dict__["TorchTensor"] = torch.Tensor
+module.__dict__["TorchModule"] = torch.nn.Module
+module.__dict__["TorchScriptClass"] = torch.ScriptClass
+module.__dict__["Array"] = torch.Tensor
+module.__dict__["CalculatorBase"] = CalculatorModule
+module.__dict__["IntoSystem"] = System
+
+
+def is_labels(obj: Any):
+    return isinstance(obj, Labels)
+
+
+if os.environ.get("RASCALINE_IMPORT_FOR_SPHINX") is None:
+    is_labels = torch.jit.script(is_labels)
+
+module.__dict__["is_labels"] = is_labels
+
+
+def check_isinstance(obj, ty):
+    if isinstance(ty, torch.ScriptClass):
+        # This branch is taken when `ty` is a custom class (TensorMap, …). since `ty` is
+        # an instance of `torch.ScriptClass` and not a class itself, there is no way to
+        # check if obj is an "instance" of this class, so we always return True and hope
+        # for the best. Most errors should be caught by the TorchScript compiler anyway.
+        return True
+    else:
+        assert isinstance(ty, type)
+        return isinstance(obj, ty)
+
+
+# register the module in sys.modules, so future import find it directly
+sys.modules[spec.name] = module
+
+# create a module named `rascaline.torch.utils` using code from
+# `rascaline.utils`
+spec = importlib.util.spec_from_file_location(
+    "rascaline.torch.utils", rascaline.utils.__file__
+)
+
+module = importlib.util.module_from_spec(spec)
+
+
+cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake"))
+"""
+Path containing the CMake configuration files for the underlying C library
+"""
+
+module.__dict__["cmake_prefix_path"] = cmake_prefix_path
+
+# override `rascaline.torch.utils` (the module associated with the current file)
+# with the newly created module
+sys.modules[spec.name] = module
+spec.loader.exec_module(module)
diff --git a/python/rascaline-torch/rascaline/torch/utils/__init__.py b/python/rascaline-torch/rascaline/torch/utils/__init__.py
deleted file mode 100644
index 3a73c2d20..000000000
--- a/python/rascaline-torch/rascaline/torch/utils/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import os
-
-from .power_spectrum import PowerSpectrum
-
-
-_HERE = os.path.dirname(__file__)
-
-cmake_prefix_path = os.path.realpath(os.path.join(_HERE, "..", "lib", "cmake"))
-"""
-Path containing the CMake configuration files for the underlying C library
-"""
-
-__all__ = ["PowerSpectrum"]
diff --git a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py b/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py
deleted file mode 100644
index 7bf9b9321..000000000
--- a/python/rascaline-torch/rascaline/torch/utils/power_spectrum.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import importlib
-import sys
-from typing import List, Optional, Union
-
-import torch
-from metatensor.torch import Labels, TensorBlock, TensorMap
-
-import rascaline.utils.power_spectrum
-
-from ..calculator_base import CalculatorModule as CalculatorBase
-from ..system import System as IntoSystem
-
-
-# For details what is happening here take a look an `rascaline.torch.calculators`.
-
-# Step 1: create te `_classes` module as an empty module
-spec = importlib.util.spec_from_loader(
-    "rascaline.torch.utils.power_spectrum._classes",
-    loader=None,
-)
-module = importlib.util.module_from_spec(spec)
-# This module only exposes a handful of things, defined here. Any changes here MUST also
-# be made to the `metatensor/operations/_classes.py` file, which is used in non
-# TorchScript mode.
-module.__dict__["Labels"] = Labels
-module.__dict__["TensorBlock"] = TensorBlock
-module.__dict__["TensorMap"] = TensorMap
-module.__dict__["CalculatorBase"] = CalculatorBase
-module.__dict__["IntoSystem"] = IntoSystem
-
-# register the module in sys.modules, so future import find it directly
-sys.modules[spec.name] = module
-
-
-# Step 2: create a module named `rascaline.torch.utils.power_spectrum` using code from
-# `rascaline.utils.power_spectrum`
-spec = importlib.util.spec_from_file_location(
-    "rascaline.torch.utils.power_spectrum",
-    rascaline.utils.power_spectrum.__file__,
-)
-
-module = importlib.util.module_from_spec(spec)
-sys.modules[spec.name] = module
-spec.loader.exec_module(module)
-
-
-# Store the original class to avoid recursion problems
-PowerSpectrumBase = module.PowerSpectrum
-
-
-class PowerSpectrum(torch.nn.Module, PowerSpectrumBase):
-    """
-    Torch version of the general power spectrum of one or of two calculators.
-
-    The class provides :py:meth:`PowerSpectrum.forward` and the integration with
-    :py:class:`torch.nn.Module`. For more details see
-    :py:class:`rascaline.utils.PowerSpectrum`.
-
-    :param calculator_1: first calculator
-    :param calculator_1: second calculator
-    :param species: List of `species_neighbor` to fill all blocks with. This option
-        might be useful when joining along the ``sample`` direction after computation.
-        If :py:obj:`None` blocks are filled with `species_neighbor` from all blocks.
-    :raises ValueError: If other calculators than
-        :py:class:`rascaline.SphericalExpansion` or
-        :py:class:`rascaline.LodeSphericalExpansion` are used.
-    :raises ValueError: If ``'max_angular'`` of both calculators is different
-    """
-
-    def __init__(
-        self,
-        calculator_1: CalculatorBase,
-        calculator_2: Optional[CalculatorBase] = None,
-        species: Optional[List[int]] = None,
-    ):
-        torch.nn.Module.__init__(self)
-        PowerSpectrumBase.__init__(
-            self,
-            calculator_1=calculator_1,
-            calculator_2=calculator_2,
-            species=species,
-        )
-
-    def forward(
-        self,
-        systems: Union[IntoSystem, List[IntoSystem]],
-        gradients: Optional[List[str]] = None,
-        use_native_system: bool = True,
-    ) -> TensorMap:
-        """forward just calls :py:meth:`PowerSpectrum.compute`"""
-
-        return self.compute(
-            systems=systems,
-            gradients=gradients,
-            use_native_system=use_native_system,
-        )
-
-
-module.__dict__["PowerSpectrum"] = PowerSpectrum
diff --git a/python/rascaline-torch/tests/utils/correlate_density.py b/python/rascaline-torch/tests/utils/correlate_density.py
new file mode 100644
index 000000000..9db042bd7
--- /dev/null
+++ b/python/rascaline-torch/tests/utils/correlate_density.py
@@ -0,0 +1,121 @@
+# -*- coding: utf-8 -*-
+import io
+import os
+from typing import Any, List
+
+import ase.io
+import metatensor.torch
+import pytest
+import torch
+from metatensor.torch import Labels, TensorBlock, TensorMap  # noqa
+
+import rascaline.torch
+from rascaline.torch.utils.clebsch_gordan.correlate_density import DensityCorrelations
+
+
+DATA_ROOT = os.path.join(os.path.dirname(__file__), "data")
+
+
+@torch.jit.script
+def is_tensor_map(obj: Any):
+    return isinstance(obj, TensorMap)
+
+
+is_tensor_map = torch.jit.script(is_tensor_map)
+
+SPHERICAL_EXPANSION_HYPERS = {
+    "cutoff": 2.5,
+    "max_radial": 3,
+    "max_angular": 3,
+    "atomic_gaussian_width": 0.2,
+    "radial_basis": {"Gto": {}},
+    "cutoff_function": {"ShiftedCosine": {"width": 0.5}},
+    "center_atom_weight": 1.0,
+}
+
+SELECTED_KEYS = Labels(
+    names=["spherical_harmonics_l"], values=torch.tensor([1, 3]).reshape(-1, 1)
+)
+
+
+def h2o_isolated():
+    return [
+        ase.Atoms(
+            symbols=["O", "H", "H"],
+            positions=[
+                [2.56633400, 2.50000000, 2.50370100],
+                [1.97361700, 1.73067300, 2.47063400],
+                [1.97361700, 3.26932700, 2.47063400],
+            ],
+        )
+    ]
+
+
+def spherical_expansion(frames: List[ase.Atoms]):
+    """Returns a rascaline SphericalExpansion"""
+    calculator = rascaline.torch.SphericalExpansion(**SPHERICAL_EXPANSION_HYPERS)
+    return calculator.compute(rascaline.torch.systems_to_torch(frames))
+
+
+# copy of def test_correlate_density_angular_selection(
+@pytest.mark.parametrize("selected_keys", [None, SELECTED_KEYS])
+@pytest.mark.parametrize("skip_redundant", [True, False])
+def test_torch_script_correlate_density_angular_selection(
+    selected_keys: Labels,
+    skip_redundant: bool,
+):
+    """
+    Tests that the correct angular channels are output based on the specified
+    ``selected_keys``.
+    """
+    frames = h2o_isolated()
+    nu_1 = spherical_expansion(frames)
+    correlation_order = 2
+    corr_calculator = DensityCorrelations(
+        max_angular=SPHERICAL_EXPANSION_HYPERS["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
+        angular_cutoff=None,
+        selected_keys=selected_keys,
+        skip_redundant=skip_redundant,
+    )
+
+    ref_nu_2 = corr_calculator.compute(nu_1)
+    scripted_corr_calculator = torch.jit.script(corr_calculator)
+
+    # Test compute
+    scripted_nu_2 = scripted_corr_calculator.compute(nu_1)
+    assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2)
+    assert metatensor.torch.allclose(scripted_nu_2, ref_nu_2)
+
+    # Test compute_metadata
+    scripted_nu_2 = scripted_corr_calculator.compute_metadata(nu_1)
+    assert metatensor.torch.equal_metadata(scripted_nu_2, ref_nu_2)
+
+
+def test_jit_save_load():
+    corr_calculator = DensityCorrelations(
+        max_angular=2,
+        correlation_order=2,
+        angular_cutoff=1,
+    )
+    scripted_correlate_density = torch.jit.script(corr_calculator)
+    with io.BytesIO() as buffer:
+        torch.jit.save(scripted_correlate_density, buffer)
+        buffer.seek(0)
+        torch.jit.load(buffer)
+
+
+def test_save_load():
+    """Tests for saving and loading with cg_backend="python-dense",
+    which makes the DensityCorrelations object non-scriptable due to
+    a non-contiguous CG cache."""
+    corr_calculator = DensityCorrelations(
+        max_angular=2,
+        correlation_order=2,
+        angular_cutoff=1,
+        cg_backend="python-dense",
+    )
+    with io.BytesIO() as buffer:
+        torch.save(corr_calculator, buffer)
+        buffer.seek(0)
+        torch.load(buffer)
diff --git a/python/rascaline/rascaline/utils/__init__.py b/python/rascaline/rascaline/utils/__init__.py
index e3e8adc1b..b6bfe14ff 100644
--- a/python/rascaline/rascaline/utils/__init__.py
+++ b/python/rascaline/rascaline/utils/__init__.py
@@ -1,6 +1,6 @@
 import os
 
-from .clebsch_gordan import correlate_density, correlate_density_metadata  # noqa
+from .clebsch_gordan import DensityCorrelations  # noqa
 from .power_spectrum import PowerSpectrum  # noqa
 from .splines import (  # noqa
     AtomicDensityBase,
diff --git a/python/rascaline/rascaline/utils/_backend.py b/python/rascaline/rascaline/utils/_backend.py
new file mode 100644
index 000000000..8e614ed74
--- /dev/null
+++ b/python/rascaline/rascaline/utils/_backend.py
@@ -0,0 +1,61 @@
+from typing import Any, Union
+
+import numpy as np
+from metatensor import Labels, LabelsEntry, TensorBlock, TensorMap
+
+from ..calculator_base import CalculatorBase
+from ..systems import IntoSystem
+
+
+def torch_jit_is_scripting():
+    return False
+
+
+def torch_jit_annotate(annotation, obj):
+    return obj
+
+
+def torch_jit_export(func):
+    return func
+
+
+def is_labels(obj: Any):
+    return isinstance(obj, Labels)
+
+
+try:
+    from torch import ScriptClass as TorchScriptClass
+    from torch import Tensor as TorchTensor
+    from torch.nn import Module as TorchModule
+except ImportError:
+
+    class TorchTensor:
+        pass
+
+    class TorchModule:
+
+        def __call__(self, *arg, **kwargs):
+            return self.forward(*arg, **kwargs)
+
+    class TorchScriptClass:
+        pass
+
+
+Array = Union[np.ndarray, TorchTensor]
+
+__all__ = [
+    "Array",
+    "CalculatorBase",
+    "IntoSystem",
+    "Labels",
+    "TensorBlock",
+    "TensorMap",
+    "TorchTensor",
+    "TorchModule",
+    "TorchScriptClass",
+    "LabelsEntry",
+    "torch_jit_is_scripting",
+    "torch_jit_annotate",
+    "torch_jit_export",
+    "is_labels",
+]
diff --git a/python/rascaline/rascaline/utils/_dispatch.py b/python/rascaline/rascaline/utils/_dispatch.py
new file mode 100644
index 000000000..f1254b4fa
--- /dev/null
+++ b/python/rascaline/rascaline/utils/_dispatch.py
@@ -0,0 +1,450 @@
+"""Helper functions to dispatch methods between numpy and torch.
+
+The functions are similar to those in metatensor-operations. Missing functions may
+already exist there. Functions are ordered alphabetically.
+"""
+
+import itertools
+from typing import List, Optional, Union
+
+import numpy as np
+
+
+try:
+    import torch
+    from torch import Tensor as TorchTensor
+except ImportError:
+
+    class TorchTensor:
+        pass
+
+
+UNKNOWN_ARRAY_TYPE = (
+    "unknown array type, only numpy arrays and torch tensors are supported"
+)
+
+
+def _check_all_torch_tensor(arrays: List[TorchTensor]):
+    for array in arrays:
+        if not isinstance(array, TorchTensor):
+            raise TypeError(
+                f"expected argument to be a torch.Tensor, but got {type(array)}"
+            )
+
+
+def _check_all_np_ndarray(arrays):
+    for array in arrays:
+        if not isinstance(array, np.ndarray):
+            raise TypeError(
+                f"expected argument to be a np.ndarray, but got {type(array)}"
+            )
+
+
+def concatenate(arrays: List[TorchTensor], axis: int):
+    """
+    Concatenate a group of arrays along a given axis.
+
+    This function has the same behavior as ``numpy.concatenate(arrays, axis)``
+    and ``torch.concatenate(arrays, axis)``.
+
+    Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to
+    :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any
+    axis index > 0 is valid.
+    """
+    if isinstance(arrays[0], TorchTensor):
+        _check_all_torch_tensor(arrays)
+        return torch.concatenate(arrays, axis)
+    elif isinstance(arrays[0], np.ndarray):
+        _check_all_np_ndarray(arrays)
+        return np.concatenate(arrays, axis)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False):
+    """
+    Create an uninitialized array, with the given ``shape``, and similar dtype,
+    device and other options as ``array``.
+
+    If ``shape`` is :py:obj:`None`, the array shape is used instead.
+    ``requires_grad`` is only used for torch tensors, and set the corresponding
+    value on the returned array.
+
+    This is the equivalent to ``np.empty_like(array, shape=shape)``.
+    """
+    if isinstance(array, TorchTensor):
+        if shape is None:
+            shape = array.size()
+        return torch.empty(
+            shape,
+            dtype=array.dtype,
+            layout=array.layout,
+            device=array.device,
+        ).requires_grad_(requires_grad)
+    elif isinstance(array, np.ndarray):
+        return np.empty_like(array, shape=shape, subok=False)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def list_to_array(array, data: List[List[int]]):
+    """Create an object from data with the same type as ``array``."""
+    if isinstance(array, TorchTensor):
+        return torch.tensor(data)
+    elif isinstance(array, np.ndarray):
+        return np.array(data)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def matmul(a, b):
+    """Matrix product of two arrays."""
+    if isinstance(a, TorchTensor):
+        _check_all_torch_tensor([b])
+        return torch.matmul(a, b)
+    elif isinstance(a, np.ndarray):
+        _check_all_np_ndarray([b])
+        return np.matmul(a, b)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def to_index_array(array):
+    """Returns an array that is suitable for indexing a dimension of
+    a different array.
+    After a few checks (int, 1D), this operation will convert the dtype to
+    torch.long (which is, in some torch versions, the only acceptable type
+    of index tensor). Numpy arrays are left unchanged.
+    """
+    if len(array.shape) != 1:
+        raise ValueError("Index arrays must be 1D")
+
+    if isinstance(array, TorchTensor):
+        if torch.is_floating_point(array):
+            raise ValueError("Index arrays must be integers")
+        return array.to(torch.long)
+    elif isinstance(array, np.ndarray):
+        if not np.issubdtype(array.dtype, np.integer):
+            raise ValueError("Index arrays must be integers")
+        return array
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def unique(array, axis: Optional[int] = None):
+    """Find the unique elements of an array."""
+    if isinstance(array, TorchTensor):
+        return torch.unique(array, dim=axis)
+    elif isinstance(array, np.ndarray):
+        return np.unique(array, axis=axis)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False):
+    """
+    Create an array filled with zeros, with the given ``shape``, and similar
+    dtype, device and other options as ``array``.
+
+    If ``shape`` is :py:obj:`None`, the array shape is used instead.
+    ``requires_grad`` is only used for torch tensors, and set the corresponding
+    value on the returned array.
+
+    This is the equivalent to ``np.zeros_like(array, shape=shape)``.
+    """
+    if isinstance(array, TorchTensor):
+        if shape is None:
+            shape = array.size()
+
+        return torch.zeros(
+            shape,
+            dtype=array.dtype,
+            layout=array.layout,
+            device=array.device,
+        ).requires_grad_(requires_grad)
+    elif isinstance(array, np.ndarray):
+        if shape is None:
+            shape = array.shape
+        return np.zeros_like(array, shape=shape, subok=False)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def where(array):
+    """Return the indices where `array` is True.
+
+    This function has the same behavior as ``np.where(array)``.
+    """
+    if isinstance(array, TorchTensor):
+        return torch.where(array)
+    elif isinstance(array, np.ndarray):
+        return np.where(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def abs(array):
+    """
+    Returns the absolute value of the elements in the array.
+
+    It is equivalent of np.abs(array) and torch.abs(tensor)
+    """
+    if isinstance(array, TorchTensor):
+        return torch.abs(array)
+    elif isinstance(array, np.ndarray):
+        return np.abs(array).astype(array.dtype)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def argsort(array):
+    """
+    Returns the sorted arguments of the elements in the array.
+
+    It is equivalent of np.argsort(array) and torch.argsort(tensor)
+    """
+    if isinstance(array, TorchTensor):
+        return torch.argsort(array)
+    elif isinstance(array, np.ndarray):
+        return np.argsort(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def contiguous(array):
+    """
+    Returns a contiguous array.
+
+    It is equivalent of np.ascontiguousarray(array) and tensor.contiguous(). In
+    the case of numpy, C order is used for consistency with torch. As such, only
+    C-contiguity is checked.
+    """
+    if isinstance(array, TorchTensor):
+        if array.is_contiguous():
+            return array
+        return array.contiguous()
+    elif isinstance(array, np.ndarray):
+        if array.flags["C_CONTIGUOUS"]:
+            return array
+        return np.ascontiguousarray(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def to_int_list(array) -> List[int]:
+    if isinstance(array, TorchTensor):
+        # we need to do it this way because of
+        # https://github.com/pytorch/pytorch/issues/76295
+        return array.to(dtype=torch.int64).tolist()
+    elif isinstance(array, np.ndarray):
+        return array.tolist()
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def int_range_like(min_val: int, max_val: int, like):
+    """
+    Returns an array of integers from min to max, non-inclusive, based on the type of
+    `like`
+
+    It is equivalent of np.arange(start, end) and torch.arange(start, end) for the given
+    array dtype and device.
+    """
+    if isinstance(like, TorchTensor):
+        return torch.arange(min_val, max_val, dtype=torch.int64, device=like.device)
+    elif isinstance(like, np.ndarray):
+        return np.arange(min_val, max_val).astype(np.int64)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def int_array_like(int_list: Union[List[int], List[List[int]]], like):
+    """
+    Converts the input list of int to a numpy array or torch tensor
+    based on the type of `like`.
+    """
+    if isinstance(like, TorchTensor):
+        if torch.jit.isinstance(int_list, List[int]):
+            return torch.tensor(int_list, dtype=torch.int64, device=like.device)
+        else:
+            return torch.tensor(int_list, dtype=torch.int64, device=like.device)
+    elif isinstance(like, np.ndarray):
+        return np.array(int_list).astype(np.int64)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def double_array_like(int_list: List[float], like):
+    """
+    Converts the input list of float to a numpy array or torch tensor
+    based on the array type of `like`.
+    """
+    if isinstance(like, TorchTensor):
+        return torch.tensor(int_list, dtype=torch.float64, device=like.device)
+    elif isinstance(like, np.ndarray):
+        return np.array(int_list).astype(np.float64)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def bool_array_like(bool_list: List[bool], like):
+    """
+    Converts the input list of bool to a numpy array or torch tensor
+    based on the type of `like`.
+    """
+    if isinstance(like, TorchTensor):
+        return torch.tensor(bool_list, dtype=torch.bool, device=like.device)
+    elif isinstance(like, np.ndarray):
+        return np.array(bool_list).astype(bool)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def cartesian_prod(array1, array2):
+    """
+    Imitates like itertools.product(array1, array2)
+    """
+    if isinstance(array1, TorchTensor) and isinstance(array2, TorchTensor):
+        return torch.cartesian_prod(array1, array2)
+    elif isinstance(array1, np.ndarray) and isinstance(array2, np.ndarray):
+        # using itertools should be fastest way according to
+        # https://stackoverflow.com/a/28684982
+        return np.array(list(itertools.product(array1, array2)))
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def all(array, axis: Optional[int] = None):
+    """Test whether all array elements along a given axis evaluate to True.
+
+    This function has the same behavior as
+    ``np.all(array,axis=axis)``.
+    """
+    if isinstance(array, bool):
+        array = np.array(array)
+    if isinstance(array, list):
+        array = np.array(array)
+
+    if isinstance(array, TorchTensor):
+        # torch.all has two implementation, and picks one depending if more than one
+        # parameter is given. The second one does not supports setting dim to `None`
+        if axis is None:
+            return torch.all(input=array)
+        else:
+            return torch.all(input=array, dim=axis)
+    elif isinstance(array, np.ndarray):
+        return np.all(a=array, axis=axis)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def max(array):
+    """
+    Takes the maximun value of the array.
+
+    This function has the same behavior as
+    ``np.max(array)`` or ``torch.max(array)``.
+    """
+    if isinstance(array, TorchTensor):
+        return torch.max(array)
+    elif isinstance(array, np.ndarray):
+        return np.max(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def any(array):
+    """Test whether any array elements along a given axis evaluate to True.
+
+    This function has the same behavior as
+    ``np.any(array)``.
+    """
+    if isinstance(array, bool):
+        array = np.array(array)
+    if isinstance(array, list):
+        array = np.array(array)
+    if isinstance(array, TorchTensor):
+        return torch.any(array)
+    elif isinstance(array, np.ndarray):
+        return np.any(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def allclose(
+    a: TorchTensor,
+    b: TorchTensor,
+    rtol: float,
+    atol: float,
+    equal_nan: bool = False,
+):
+    """Compare two arrays using ``allclose``
+
+    This function has the same behavior as
+    ``np.allclose(array1, array2, rtol, atol, equal_nan)``.
+    """
+    if isinstance(a, TorchTensor):
+        _check_all_torch_tensor([b])
+        return torch.allclose(
+            input=a, other=b, rtol=rtol, atol=atol, equal_nan=equal_nan
+        )
+    elif isinstance(a, np.ndarray):
+        _check_all_np_ndarray([b])
+        return np.allclose(a=a, b=b, rtol=rtol, atol=atol, equal_nan=equal_nan)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def swapaxes(array, axis0: int, axis1: int):
+    """Swaps axes of an array."""
+    if isinstance(array, TorchTensor):
+        return torch.swapaxes(array, axis0, axis1)
+    elif isinstance(array, np.ndarray):
+        return np.swapaxes(array, axis0, axis1)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def conjugate(array):
+    """
+    Conjugate the array
+
+    This function has the same behavior as
+    ``np.conjugate(array)`` or ``torch.conj(array)``.
+    """
+    if isinstance(array, TorchTensor):
+        return torch.conj(array)
+    elif isinstance(array, np.ndarray):
+        return np.conjugate(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def real(array):
+    """
+    Takes the real part of the array
+
+    This function has the same behavior as
+    ``np.real(array)`` or ``torch.real(array)``.
+    """
+    if isinstance(array, TorchTensor):
+        return torch.real(array)
+    elif isinstance(array, np.ndarray):
+        return np.real(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
+
+
+def imag(array):
+    """
+    Takes the imag part of the array
+
+    This function has the same behavior as
+    ``np.imag(array)`` or ``torch.imag(array)``.
+    """
+    if isinstance(array, TorchTensor):
+        return torch.imag(array)
+    elif isinstance(array, np.ndarray):
+        return np.imag(array)
+    else:
+        raise TypeError(UNKNOWN_ARRAY_TYPE)
diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py
index fca7f33a2..0760f2d36 100644
--- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py
+++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py
@@ -1,7 +1 @@
-from .correlate_density import correlate_density, correlate_density_metadata  # noqa
-
-
-__all__ = [
-    "correlate_density",
-    "correlate_density_metadata",
-]
+from .correlate_density import DensityCorrelations  # noqa: F401
diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py
index 4a6118bd3..1ab14bace 100644
--- a/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py
+++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cg_cache.py
@@ -3,12 +3,14 @@
 Gordan coefficients for use in CG combinations.
 """
 
-from typing import Union
+import math
+from typing import Dict, List
 
 import numpy as np
 import wigners
 
-from . import _dispatch
+from .. import _dispatch
+from .._backend import Array, Labels, TensorBlock, TensorMap
 
 
 try:
@@ -18,55 +20,109 @@
 except ImportError:
     HAS_MOPS = False
 
+
 try:
+    import torch
     from torch import Tensor as TorchTensor
+
+    torch_dtype = torch.dtype
+    torch_device = torch.device
+
+    HAS_TORCH = True
 except ImportError:
+    HAS_TORCH = False
 
     class TorchTensor:
         pass
 
+    class torch_dtype:
+        pass
+
+    class torch_device:
+        pass
+
 
 UNKNOWN_ARRAY_TYPE = (
     "unknown array type, only numpy arrays and torch tensors are supported"
 )
 
 
-# =================================
-# ===== ClebschGordanReal class
-# =================================
+def calculate_cg_coefficients(
+    lambda_max: int,
+    sparse: bool = True,
+    use_torch: bool = False,
+) -> TensorMap:
+    """
+    Calculates the Clebsch-Gordan coefficients for all possible combination of l1 and
+    l2, up to ``lambda_max``. Returns them in :py:class:`TensorMap` format.
 
+    The output data structure of the output :py:class:`TensorMap` depends on whether the
+    backend used to perform CG tensor products uses sparse or dense operations.
 
-class ClebschGordanReal:
-    """
-    Class for computing Clebsch-Gordan coefficients for real spherical
-    harmonics.
+    Dense:
+        - samples: `_`, i.e. a dummy sample.
+        - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes,
+          where `m1` and `m2` are the m component values for the two arrays
+          being combined and `mu` is the m component value for the resulting
+          array.
+        - properties: `cg_coefficient`
 
-    Stores the coefficients in a dictionary in the `self.coeffs` attribute,
-    which is built at initialization. There are 3 current use cases for the
-    format of these coefficients. By default, sparse accumulation of products is
-    performed, whether or not Mops is installed.
+    Sparse:
+        - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component
+          values for the two arrays being combined and `mu` is the m component
+          value for the resulting array.
+        - components: `[]`, i.e. no components axis.
+        - properties: `cg_coefficient`
 
-    Case 1: standard sparse format.
 
-    Each dictionary entry is a dictionary with entries for each (m1, m2, mu)
-    combination.
+    :param lambda_max: maximum angular momentum value to compute CG coefficients for.
+    :param sparse: whether to store the CG coefficients in sparse format.
+    :param use_torch: whether torch tensor or numpy arrays should be used for the cg
+        coeffs
+
+    :returns: :py:class:`TensorMap` of the Clebsch-Gordan coefficients.
+    """
+    # Build some 'like' arrays for the backend dispatch
+    if use_torch:
+        complex_like = torch.empty(0, dtype=torch.complex128)
+        double_like = torch.empty(0, dtype=torch.double)
+        if isinstance(Labels, torch.ScriptClass):
+            labels_values_like = torch.empty(0, dtype=torch.double)
+        else:
+            labels_values_like = np.empty(0, dtype=np.double)
+    else:
+        complex_like = np.empty(0, dtype=np.complex128)
+        double_like = np.empty(0, dtype=np.double)
+        labels_values_like = np.empty(0, dtype=np.double)
+
+    # Calculate the CG coefficients, stored as a dict of dense arrays. This is the
+    # starting point for the conversion to a TensorMap of different formats depending on
+    # the backend options.
+    cg_coeff_dict = _build_dense_cg_coeff_dict(
+        lambda_max, complex_like, double_like, labels_values_like
+    )
+
+    # Build the CG cache depending on whether the CG backend is sparse or dense. The
+    # dispatching of the arrays backends are accounted for by `double_like` and
+    # `labels_values_like`.
+    if sparse:
+        return _cg_coeff_dict_to_tensormap_sparse(
+            cg_coeff_dict, double_like, labels_values_like
+        )
+
+    return _cg_coeff_dict_to_tensormap_dense(
+        cg_coeff_dict, double_like, labels_values_like
+    )
 
-    {
-        (l1, l2, lambda): {
-            (m1, m2, mu) : cg_{m1, m2, mu}^{l1, l2, lambda}
-            for m1 in range(-l1, l1 + 1),
-            for m2 in range(-l2, l2 + 1),
-        },
-        ...
-        for l1 in range(0, l1_list)
-        for l2 in range(0, l2_list)
-        for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|))
-    }
 
-    Case 2: standard dense format.
+def _build_dense_cg_coeff_dict(
+    lambda_max: int, complex_like: Array, double_like: Array, labels_values_like: Array
+) -> Dict[int, Array]:
+    """
+    Calculates the CG coefficients and stores them as dense arrays in a dictionary.
 
-    Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2
-    * lambda + 1).
+    Each dictionary entry is a dense array with shape (2 * l1 + 1, 2 * l2 + 1, 2 *
+    lambda + 1).
 
     {
         (l1, l2, lambda):
@@ -85,173 +141,164 @@ class ClebschGordanReal:
         for lambda in range(0, range(|l1 - l2|, ..., |l1 + l2|))
     }
 
-    Case 3: MOPS sparse format.
+    where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient that
+    describes the combination of the `m1` irreducible component of the `l1` angular
+    channel and the `m2` irreducible component of the `l2` angular channel into the
+    irreducible tensor of order `lambda`. In all cases, these correspond to the non-zero
+    CG coefficients, i.e. those in the range |-l, ..., +l| for each angular order l in
+    {l1, l2, lambda}.
+
+    :param lambda_max: maximum angular momentum  value to compute CG coefficients for.
+    :param complex_like: an empty array of dtype complex, used for dispatching
+        operations
+    :param double_like: an empty array of dtype double, used for dispatching operations
+    :param labels_values_like: an empty array of dtype int32, used for dispatching
+        operations
+
+    :returns: dictionary of dense CG coefficients.
+    """
+    # real-to-complex and complex-to-real transformations as matrices
+    r2c: Dict[int, Array] = {}
+    c2r: Dict[int, Array] = {}
+    coeff_dict = {}
 
-    Each dictionary entry contains a tuple with four 1D arrays, corresponding to
-    the CG coeffs and m1, m2, mu indices respectively. All of these arrays are
-    sorted according to the mu index. This format is used for Sparse
-    Accumulation of Products (SAP) as implemented in MOPS. See
-    https://github.com/lab-cosmo/mops .
+    for o3_lambda in range(0, lambda_max + 1):
+        c2r[o3_lambda] = _complex2real(o3_lambda, like=complex_like)
+        r2c[o3_lambda] = _real2complex(o3_lambda, like=complex_like)
 
-    {
-        (l1, l2, lambda):
-            (
-                [
-                    cg_{m1, m2, mu}^{l1, l2, lambda}
-                    ...
-                    for m1 in range(-l1, l1 + 1),
-                    for m2 in range(-l2, l2 + 1),
-                    for mu in range(-lambda, lambda + 1)
-                ],
-                [
-                    m1 for m1 in range(-l1, l1 + 1),
-                ],
-                [
-                    m2 for m2 in range(-l2, l2 + 1),
-                ],
-                [
-                    mu for mu in range(-lambda, lambda + 1),
-                ],
-            )
+    for l1 in range(lambda_max + 1):
+        for l2 in range(lambda_max + 1):
+            for o3_lambda in range(
+                max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1
+            ):
+                complex_cg = _complex_clebsch_gordan_matrix(
+                    l1, l2, o3_lambda, complex_like
+                )
 
+                real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape(
+                    complex_cg.shape
+                )
 
-    }
+                real_cg = real_cg.swapaxes(0, 1)
+                real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape(
+                    real_cg.shape
+                )
+                real_cg = real_cg.swapaxes(0, 1)
 
-    where `cg_{m1, m2, mu}^{l1, l2, lambda}` is the Clebsch-Gordan coefficient
-    that describes the combination of the `m1` irreducible component of the `l1`
-    angular channel and the `m2` irreducible component of the `l2` angular
-    channel into the irreducible tensor of order `lambda`. In all cases, these
-    correspond to the non-zero CG coefficients, i.e. those in the range |-l,
-    ..., +l| for each angular order l in {l1, l2, lambda}.
+                real_cg = real_cg @ c2r[o3_lambda].T
 
-    :param lambda_max: maximum lambda value to compute CG coefficients for.
-    :param sparse: whether to store the CG coefficients in sparse format.
-    :param use_mops: whether to store the CG coefficients in MOPS sparse format.
-        This is recommended as the default for sparse accumulation, but can only
-        be used if Mops is installed.
+                if (l1 + l2 + o3_lambda) % 2 == 0:
+                    cg_l1l2lam_dense = _dispatch.real(real_cg)
+                else:
+                    cg_l1l2lam_dense = _dispatch.imag(real_cg)
+
+                coeff_dict[(l1, l2, o3_lambda)] = cg_l1l2lam_dense
+
+    return coeff_dict
+
+
+def _cg_coeff_dict_to_tensormap_dense(
+    coeff_dict: Dict, double_like: Array, labels_values_like: Array
+) -> TensorMap:
+    """
+    Converts the dictionary of dense CG coefficients coefficients to
+    :py:class:`TensorMap` format, specifically for performing CG tensor products with
+    the "python-dense" backend.
     """
+    keys = Labels(
+        ["l1", "l2", "lambda"],
+        _dispatch.int_array_like(list(coeff_dict.keys()), labels_values_like),
+    )
+    blocks = []
+
+    for l1l2lam_values in coeff_dict.values():
+        # extending shape by samples and properties
+        block_value_shape = (1,) + l1l2lam_values.shape + (1,)
+        blocks.append(
+            TensorBlock(
+                values=_dispatch.contiguous(l1l2lam_values.reshape(block_value_shape)),
+                samples=Labels.range("_", 1),
+                components=[
+                    Labels(
+                        ["m1"],
+                        _dispatch.int_range_like(
+                            0, l1l2lam_values.shape[0], labels_values_like
+                        ).reshape(-1, 1),
+                    ),
+                    Labels(
+                        ["m2"],
+                        _dispatch.int_range_like(
+                            0, l1l2lam_values.shape[1], labels_values_like
+                        ).reshape(-1, 1),
+                    ),
+                    Labels(
+                        ["mu"],
+                        _dispatch.int_range_like(
+                            0, l1l2lam_values.shape[2], labels_values_like
+                        ).reshape(-1, 1),
+                    ),
+                ],
+                properties=Labels.range("cg_coefficient", 1),
+            )
+        )
 
-    def __init__(self, lambda_max: int, sparse: bool = True, use_mops: bool = HAS_MOPS):
-        self._lambda_max = lambda_max
-        self._sparse = sparse
-
-        if sparse:
-            if not HAS_MOPS:
-                # TODO: provide a warning once Mops is fully ready
-                # import warnings
-                # warnings.warn(
-                #     "It is recommended to use MOPS for sparse accumulation. "
-                #     " This can be installed with ``pip install"
-                #     " git+https://github.com/lab-cosmo/mops`."
-                #     " Falling back to numpy for now."
-                # )
-                self._use_mops = False
-            else:
-                self._use_mops = True
+    return TensorMap(keys, blocks)
 
-        else:
-            # TODO: provide a warning once Mops is fully ready
-            # if HAS_MOPS:
-            #     import warnings
-            #     warnings.warn(
-            #         "Mops is installed, but not being used"
-            #         " as dense operations chosen."
-            #     )
-            self._use_mops = False
-
-        self._coeffs = ClebschGordanReal.build_coeff_dict(
-            self._lambda_max,
-            self._sparse,
-            self._use_mops,
+
+def _cg_coeff_dict_to_tensormap_sparse(
+    coeff_dict: Dict, double_like: Array, labels_values_like: Array
+) -> TensorMap:
+    """
+    Converts the dictionary of dense CG coefficients coefficients to
+    :py:class:`TensorMap` format, specifically for performing CG tensor products with
+    the "python-sparse" backend.
+    """
+    dict_keys = list(coeff_dict.keys())
+    keys = Labels(
+        ["l1", "l2", "lambda"],
+        _dispatch.int_array_like(list(dict_keys), labels_values_like),
+    )
+    blocks = []
+
+    # For each (l1, l2, lambda) combination, build a TensorBlock of non-zero CG coeffs
+    for l1, l2, o3_lambda in dict_keys:
+        cg_l1l2lam_dense = coeff_dict[(l1, l2, o3_lambda)]
+
+        # Find the dense indices of the non-zero CG coeffs
+        nonzeros_cg_coeffs_idx = _dispatch.where(
+            _dispatch.abs(cg_l1l2lam_dense) > 1e-15
+        )
+
+        # Create a sparse dictionary indexed by  of the non-zero CG coeffs
+        cg_l1l2lam_sparse = {}
+        for i in range(len(nonzeros_cg_coeffs_idx[0])):
+            m1 = nonzeros_cg_coeffs_idx[0][i]
+            m2 = nonzeros_cg_coeffs_idx[1][i]
+            mu = nonzeros_cg_coeffs_idx[2][i]
+            cg_l1l2lam_sparse[(m1, m2, mu)] = cg_l1l2lam_dense[m1, m2, mu]
+
+        l1l2lam_sample_values = []
+        for m1m2mu_key in cg_l1l2lam_sparse.keys():
+            l1l2lam_sample_values.append(m1m2mu_key)
+        # extending shape by samples and properties
+        values = _dispatch.double_array_like(
+            [*cg_l1l2lam_sparse.values()], double_like
+        ).reshape(-1, 1)
+        l1l2lam_sample_values = _dispatch.int_array_like(
+            l1l2lam_sample_values, labels_values_like
+        )
+        # we have to move put the m1 m2 m3 inside a block so we can access it easier
+        # inside cg combine function,
+        blocks.append(
+            TensorBlock(
+                values=_dispatch.contiguous(values),
+                samples=Labels(["m1", "m2", "mu"], l1l2lam_sample_values),
+                components=[],
+                properties=Labels.range("cg_coefficient", 1),
+            )
         )
 
-    @property
-    def lambda_max(self):
-        return self._lambda_max
-
-    @property
-    def sparse(self):
-        return self._sparse
-
-    @property
-    def use_mops(self):
-        return self._use_mops
-
-    @property
-    def coeffs(self):
-        return self._coeffs
-
-    @staticmethod
-    def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool):
-        """
-        Builds a dictionary of Clebsch-Gordan coefficients for all possible
-        combination of l1 and l2, up to lambda_max.
-        """
-        # real-to-complex and complex-to-real transformations as matrices
-        r2c = {}
-        c2r = {}
-        coeff_dict = {}
-        for lambda_ in range(0, lambda_max + 1):
-            c2r[lambda_] = _complex2real(lambda_)
-            r2c[lambda_] = _real2complex(lambda_)
-
-        for l1 in range(lambda_max + 1):
-            for l2 in range(lambda_max + 1):
-                for lambda_ in range(
-                    max(l1, l2) - min(l1, l2), min(lambda_max, (l1 + l2)) + 1
-                ):
-                    complex_cg = _complex_clebsch_gordan_matrix(l1, l2, lambda_)
-
-                    real_cg = (r2c[l1].T @ complex_cg.reshape(2 * l1 + 1, -1)).reshape(
-                        complex_cg.shape
-                    )
-
-                    real_cg = real_cg.swapaxes(0, 1)
-                    real_cg = (r2c[l2].T @ real_cg.reshape(2 * l2 + 1, -1)).reshape(
-                        real_cg.shape
-                    )
-                    real_cg = real_cg.swapaxes(0, 1)
-
-                    real_cg = real_cg @ c2r[lambda_].T
-
-                    if (l1 + l2 + lambda_) % 2 == 0:
-                        cg_l1l2lam = np.real(real_cg)
-                    else:
-                        cg_l1l2lam = np.imag(real_cg)
-
-                    if sparse:
-                        # Find the m1, m2, mu idxs of the nonzero CG coeffs
-                        nonzeros_cg_coeffs_idx = np.where(np.abs(cg_l1l2lam) > 1e-15)
-                        if use_mops:
-                            # Store CG coeffs in a specific format for use in
-                            # MOPS. Here we need the m1, m2, mu, and CG coeffs
-                            # to be stored as separate 1D arrays.
-                            m1_arr, m2_arr, mu_arr, C_arr = [], [], [], []
-                            for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx):
-                                m1_arr.append(m1)
-                                m2_arr.append(m2)
-                                mu_arr.append(mu)
-                                C_arr.append(cg_l1l2lam[m1, m2, mu])
-
-                            # Reorder the arrays based on sorted mu values
-                            mu_idxs = np.argsort(mu_arr)
-                            m1_arr = np.array(m1_arr)[mu_idxs]
-                            m2_arr = np.array(m2_arr)[mu_idxs]
-                            mu_arr = np.array(mu_arr)[mu_idxs]
-                            C_arr = np.array(C_arr)[mu_idxs]
-                            cg_l1l2lam = (C_arr, m1_arr, m2_arr, mu_arr)
-                        else:
-                            # Otherwise fall back to torch/numpy and store as
-                            # sparse dicts.
-                            cg_l1l2lam = {
-                                (m1, m2, mu): cg_l1l2lam[m1, m2, mu]
-                                for m1, m2, mu in zip(*nonzeros_cg_coeffs_idx)
-                            }
-
-                    # Store
-                    coeff_dict[(l1, l2, lambda_)] = cg_l1l2lam
-
-        return coeff_dict
+    return TensorMap(keys, blocks)
 
 
 # ============================
@@ -259,74 +306,80 @@ def build_coeff_dict(lambda_max: int, sparse: bool, use_mops: bool):
 # ============================
 
 
-def _real2complex(lambda_: int) -> np.ndarray:
+def _real2complex(o3_lambda: int, like: Array) -> Array:
     """
-    Computes a matrix that can be used to convert from real to complex-valued
-    spherical harmonics(coefficients) of order ``lambda_``.
+    Computes a matrix that can be used to convert from real to complex-valued spherical
+    harmonics(coefficients) of order ``o3_lambda``.
+
+    This is meant to be applied to the left:
+    ``real2complex @ [-o3_lambda, ..., +o3_lambda]``.
 
-    This is meant to be applied to the left: ``real2complex @ [-lambda_, ...,
-    +lambda_]``.
+    See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details on the
+    convention for how these tranformations are defined.
 
-    See https://en.wikipedia.org/wiki/Spherical_harmonics#Real_form for details
-    on the convention for how these tranformations are defined.
+    Operations are dispatched to the corresponding array type given by ``like``
     """
-    result = np.zeros((2 * lambda_ + 1, 2 * lambda_ + 1), dtype=np.complex128)
-    inv_sqrt_2 = 1.0 / np.sqrt(2)
-    i_sqrt_2 = 1j / np.sqrt(2)
-    for m in range(-lambda_, lambda_ + 1):
+    result = _dispatch.zeros_like(like, shape=(2 * o3_lambda + 1, 2 * o3_lambda + 1))
+    inv_sqrt_2 = 1.0 / math.sqrt(2.0)
+    i_sqrt_2 = 1.0j / complex(math.sqrt(2.0))
+
+    for m in range(-o3_lambda, o3_lambda + 1):
         if m < 0:
             # Positve part
-            result[lambda_ + m, lambda_ + m] = +i_sqrt_2
+            result[o3_lambda + m, o3_lambda + m] = i_sqrt_2
             # Negative part
-            result[lambda_ - m, lambda_ + m] = -i_sqrt_2 * ((-1) ** m)
+            result[o3_lambda - m, o3_lambda + m] = -i_sqrt_2 * ((-1) ** m)
 
         if m == 0:
-            result[lambda_, lambda_] = +1.0
+            result[o3_lambda, o3_lambda] = 1.0
 
         if m > 0:
             # Negative part
-            result[lambda_ - m, lambda_ + m] = +inv_sqrt_2
+            result[o3_lambda - m, o3_lambda + m] = inv_sqrt_2
             # Positive part
-            result[lambda_ + m, lambda_ + m] = +inv_sqrt_2 * ((-1) ** m)
+            result[o3_lambda + m, o3_lambda + m] = inv_sqrt_2 * ((-1) ** m)
 
     return result
 
 
-def _complex2real(lambda_: int) -> np.ndarray:
+def _complex2real(o3_lambda: int, like) -> Array:
     """
     Converts from complex to real spherical harmonics. This is just given by the
     conjugate tranpose of the real->complex transformation matrices.
+
+    Operations are dispatched to the corresponding array type given by ``like``
     """
-    return np.conjugate(_real2complex(lambda_)).T
+    return _dispatch.conjugate(_real2complex(o3_lambda, like)).T
 
 
-def _complex_clebsch_gordan_matrix(l1, l2, lambda_):
+def _complex_clebsch_gordan_matrix(l1: int, l2: int, o3_lambda: int, like: Array):
     r"""clebsch-gordan matrix
     Computes the Clebsch-Gordan (CG) matrix for
     transforming complex-valued spherical harmonics.
     The CG matrix is computed as a 3D array of elements
-        < l1 m1 l2 m2 | lambda_ mu >
+        < l1 m1 l2 m2 | o3_lambda mu >
     where the first axis loops over m1, the second loops over m2,
     and the third one loops over mu. The matrix is real.
     For example, using the relation:
-        | l1 l2 lambda_ mu > =
+        | l1 l2 o3_lambda mu > =
             \sum_{m1, m2}
-            <l1 m1 l2 m2 | lambda_ mu > | l1 m1 > | l2 m2 >
+            <l1 m1 l2 m2 | o3_lambda mu > | l1 m1 > | l2 m2 >
     (https://en.wikipedia.org/wiki/Clebsch–Gordan_coefficients, section
     "Formal definition of Clebsch-Gordan coefficients", eq 2)
-    one can obtain the spherical harmonics lambda_ from two sets of
+    one can obtain the spherical harmonics o3_lambda from two sets of
     spherical harmonics with l1 and l2 (up to a normalization factor).
     E.g.:
     Args:
         l1: l number for the first set of spherical harmonics
         l2: l number for the second set of spherical harmonics
-        lambda_: l number For the third set of spherical harmonics
+        o3_lambda: l number For the third set of spherical harmonics
+        like: Operations are dispatched to the corresponding this arguments array type
     Returns:
         cg: CG matrix for transforming complex-valued spherical harmonics
     >>> from scipy.special import sph_harm
     >>> import numpy as np
     >>> import wigners
-    >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2)
+    >>> C_112 = _complex_clebsch_gordan_matrix(1, 1, 2, np.empty(0))
     >>> comp_sph_1 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)])
     >>> comp_sph_2 = np.array([sph_harm(m, 1, 0.2, 0.2) for m in range(-1, 1 + 1)])
     >>> # obtain the (unnormalized) spherical harmonics
@@ -339,10 +392,10 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_):
     >>> np.allclose(ratio[0], ratio)
     True
     """
-    if np.abs(l1 - l2) > lambda_ or np.abs(l1 + l2) < lambda_:
-        return np.zeros((2 * l1 + 1, 2 * l2 + 1, 2 * lambda_ + 1), dtype=np.double)
+    if abs(l1 - l2) > o3_lambda or abs(l1 + l2) < o3_lambda:
+        return _dispatch.zeros_like(like, (2 * l1 + 1, 2 * l2 + 1, 2 * o3_lambda + 1))
     else:
-        return wigners.clebsch_gordan_array(l1, l2, lambda_)
+        return wigners.clebsch_gordan_array(l1, l2, o3_lambda)
 
 
 # =================================================
@@ -351,202 +404,271 @@ def _complex_clebsch_gordan_matrix(l1, l2, lambda_):
 
 
 def combine_arrays(
-    arr_1: Union[np.ndarray, TorchTensor],
-    arr_2: Union[np.ndarray, TorchTensor],
-    lambda_: int,
-    cg_cache,
-    return_empty_array: bool = False,
-) -> Union[np.ndarray, TorchTensor]:
+    array_1: Array,
+    array_2: Array,
+    o3_lambda: int,
+    cg_coeffs: TensorMap,
+    cg_backend: str,
+) -> Array:
     """
-    Couples arrays `arr_1` and `arr_2` corresponding to the irreducible
-    spherical components of 2 angular channels l1 and l2 using the appropriate
-    Clebsch-Gordan coefficients. As l1 and l2 can be combined to form multiple
-    lambda channels, this function returns the coupling to a single specified
-    channel `lambda`. The angular channels l1 and l2 are inferred from the size
-    of the components axis (axis 1) of the input arrays.
-
-    `arr_1` has shape (n_i, 2 * l1 + 1, n_p) and `arr_2` has shape (n_i, 2 * l2
-    + 1, n_q). n_i is the number of samples, n_p and n_q are the number of
-    properties in each array. The number of samples in each array must be the
-    same.
-
-    The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is
-    the input parameter `lambda_`.
-
-    The Clebsch-Gordan coefficients are cached in `cg_cache`. Currently, these
-    must be produced by the ClebschGordanReal class in this module. These
-    coefficients can be stored in either sparse dictionaries or dense arrays.
-
-    The combination operation is dispatched such that numpy arrays or torch
-    tensors are automatically handled.
-
-    `return_empty_array` can be used to return an empty array of the correct
-    shape, without performing the CG combination step. This can be useful for
-    probing the outputs of CG iterations in terms of metadata without the
-    computational cost of performing the CG combinations - i.e. using the
-    function :py:func:`combine_single_center_to_body_order_metadata_only`.
-
-    :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 +
-        1, n_q_properties]
-    :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 +
-        1, n_p_properties]
-    :param lambda_: int value of the resulting coupled channel
-    :param cg_cache: either a sparse dictionary with keys (m1, m2, mu) and array
-        values being sparse blocks of shape <TODO: fill out>, or a dense array
-        of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ + 1)].
-
-    :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties]
+    Couples arrays `array_1` and `array_2` corresponding to the irreducible spherical
+    components of 2 angular channels l1 and l2 using the appropriate Clebsch-Gordan
+    coefficients. As l1 and l2 can be combined to form multiple lambda channels, this
+    function returns the coupling to a single specified channel `lambda`. The angular
+    channels l1 and l2 are inferred from the size of the components axis (axis 1) of the
+    input arrays.
+
+    `array_1` has shape (n_i, 2 * l1 + 1, n_p) and `array_2` has shape (n_i, 2 * l2 + 1,
+    n_q). n_i is the number of samples, n_p and n_q are the number of properties in each
+    array. The number of samples in each array must be the same.
+
+    The ouput array has shape (n_i, 2 * lambda + 1, n_p * n_q), where lambda is the
+    input parameter `o3_lambda`.
+
+    The Clebsch-Gordan coefficients are cached in `cg_coeffs`. Currently, these must be
+    produced by the ClebschGordanReal class in this module. These coefficients can be
+    stored in either sparse dictionaries or dense arrays.
+
+    The combination operation is dispatched such that numpy arrays or torch tensors are
+    automatically handled.
+
+    `return_empty_array` can be used to return an empty array of the correct shape,
+    without performing the CG combination step. This can be useful for probing the
+    outputs of CG iterations in terms of metadata without the computational cost of
+    performing the CG combinations - i.e. using the function
+    :py:func:`combine_single_center_to_body_order_metadata_only`.
+
+    :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1,
+        n_q_properties]
+    :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1,
+        n_p_properties]
+    :param o3_lambda: int value of the resulting coupled channel
+    :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for
+        either sparse or dense CG tensor products, as returned by
+        :py:func:`calculate_cg_coefficients`. See the function docstring for details on
+        the data structure. Only used if ``cg_backend`` is not ``"metadata"``.
+    :param cg_backend: specifies the combine backend with sparse CG coefficients. It can
+        have the values "python-dense", "python-sparse", "mops" and "metadata". If
+        "python-dense" or "python-sparse" is chosen, a dense or sparse combination
+        (respectively) of the arrays is performed using either numpy or torch, depending
+        on the backend. If "mops" is chosen, a sparse combination of the arrays is
+        performed if the external package MOPS is installed. If "metadata" is chosen, no
+        combination is perfomed, and an empty array of the correct shape is returned.
+
+
+    :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties]
     """
     # If just precomputing metadata, return an empty array
-    if return_empty_array:
-        return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=True)
+    if cg_backend == "metadata":
+        return empty_combine(array_1, array_2, o3_lambda)
+
+    if cg_backend == "python-sparse" or cg_backend == "mops":
+        return sparse_combine(array_1, array_2, o3_lambda, cg_coeffs, cg_backend)
+    elif cg_backend == "python-dense":
+        return dense_combine(array_1, array_2, o3_lambda, cg_coeffs)
+    else:
+        raise ValueError(
+            f"Wrong cg_backend, got '{cg_backend}',"
+            " but only support 'python-dense', 'python-sparse' and 'mops'."
+        )
 
-    # Otherwise, perform the CG combination
-    # Spare CG cache
-    if cg_cache.sparse:
-        return sparse_combine(arr_1, arr_2, lambda_, cg_cache, return_empty_array=False)
 
-    # Dense CG cache
-    return dense_combine(arr_1, arr_2, lambda_, cg_cache)
+def empty_combine(
+    array_1: Array,
+    array_2: Array,
+    o3_lambda: int,
+) -> Array:
+    """
+    Returns an empty array of the correct shape, imitating the output array shape
+    produced by a CG combination of ``array_1`` and ``array_2``.
+    """
+    # Samples dimensions must be the same
+    assert array_1.shape[0] == array_2.shape[0]
+
+    # Define other useful dimensions
+    n_i = array_1.shape[0]  # number of samples
+    n_p = array_1.shape[2]  # number of properties in array_1
+    n_q = array_2.shape[2]  # number of properties in array_2
+
+    return _dispatch.empty_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q))
 
 
 def sparse_combine(
-    arr_1: Union[np.ndarray, TorchTensor],
-    arr_2: Union[np.ndarray, TorchTensor],
-    lambda_: int,
-    cg_cache,
-    return_empty_array: bool = False,
-) -> Union[np.ndarray, TorchTensor]:
+    array_1: Array,
+    array_2: Array,
+    o3_lambda: int,
+    cg_coeffs: TensorMap,
+    cg_backend: str,
+) -> Array:
     """
-    Performs a Clebsch-Gordan combination step on 2 arrays using sparse
-    operations. The angular channel of each block is inferred from the size of
-    its component axis, and the blocks are combined to the desired output
-    angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients.
-
-    :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 +
-        1, n_q_properties]
-    :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 +
-        1, n_p_properties]
-    :param lambda_: int value of the resulting coupled channel
-    :param cg_cache: sparse dictionary with keys (m1, m2, mu) and array values
-        being sparse blocks of shape <TODO: fill out>
-
-    :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties]
+    Performs a Clebsch-Gordan combination step on 2 arrays using sparse operations. The
+    angular channel of each block is inferred from the size of its component axis, and
+    the blocks are combined to the desired output angular channel `o3_lambda` using the
+    appropriate Clebsch-Gordan coefficients.
+
+    :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1,
+        n_q_properties]
+    :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1,
+        n_p_properties]
+    :param o3_lambda: int value of the resulting coupled channel
+    :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for
+        either sparse or dense CG tensor products, as returned by
+        :py:func:`calculate_cg_coefficients`. See the function docstring for details on
+        the data structure. Only used if ``cg_backend`` is not ``"metadata"``.
+    :param cg_backend: specifies the combine backend with sparse CG coefficients. It can
+        have the values "python-dense", "python-sparse", "mops" and "metadata". If
+        "python-dense" or "python-sparse" is chosen, a dense or sparse combination
+        (respectively) of the arrays is performed using either numpy or torch, depending
+        on the backend. If "mops" is chosen, a sparse combination of the arrays is
+        performed if the external package MOPS is installed. If "metadata" is chosen, no
+        combination is perfomed, and an empty array of the correct shape is returned.
+
+    :returns: array of shape [n_samples, (2*o3_lambda+1), q_properties * p_properties]
     """
     # Samples dimensions must be the same
-    assert arr_1.shape[0] == arr_2.shape[0]
+    assert array_1.shape[0] == array_2.shape[0]
 
     # Infer l1 and l2 from the len of the length of axis 1 of each tensor
-    l1 = (arr_1.shape[1] - 1) // 2
-    l2 = (arr_2.shape[1] - 1) // 2
+    l1 = (array_1.shape[1] - 1) // 2
+    l2 = (array_2.shape[1] - 1) // 2
 
     # Define other useful dimensions
-    n_i = arr_1.shape[0]  # number of samples
-    n_p = arr_1.shape[2]  # number of properties in arr_1
-    n_q = arr_2.shape[2]  # number of properties in arr_2
+    n_i = array_1.shape[0]  # number of samples
+    n_p = array_1.shape[2]  # number of properties in array_1
+    n_q = array_2.shape[2]  # number of properties in array_2
+
+    # The isinstance checks and cg_backend checks makes the logic a bit redundant
+    # but the redundancy by the isinstance check is required for TorchScript. Logic
+    # can be made more straightforward once MOPS support TorchScript
+    if isinstance(array_1, TorchTensor) or cg_backend == "python-sparse":
+        # Initialise output array
+        array_out = _dispatch.zeros_like(array_1, (n_i, 2 * o3_lambda + 1, n_p * n_q))
 
-    if return_empty_array:  # used when only computing metadata
-        return _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1)
+        # Get the corresponding Clebsch-Gordan coefficients
+        # Fill in each mu component of the output array in turn
+        cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda})
+        for i in range(len(cg_l1l2lam.samples)):
+            m1m2mu_key = cg_l1l2lam.samples.entry(i)
+            m1 = m1m2mu_key[0]
+            m2 = m1m2mu_key[1]
+            mu = m1m2mu_key[2]
+            # Broadcast arrays, multiply together and with CG coeff
+            array_out[:, mu, :] += (
+                array_1[:, m1, :, None]
+                * array_2[:, m2, None, :]
+                * cg_l1l2lam.values[i, 0]
+            ).reshape(n_i, n_p * n_q)
 
-    if isinstance(arr_1, np.ndarray) and HAS_MOPS:
-        # Reshape
-        arr_1 = np.repeat(arr_1[:, :, :, None], n_q, axis=3).reshape(
+        return array_out
+    elif isinstance(array_1, np.ndarray) and cg_backend == "mops":
+        # MOPS sparse accumulation requires some reshaping of the input arrays. See
+        # https://github.com/lab-cosmo/mops . Currently only supported for a numpy array
+        # backend.
+        array_1 = np.repeat(array_1[:, :, :, None], n_q, axis=3).reshape(
             n_i, 2 * l1 + 1, n_p * n_q
         )
-        arr_2 = np.repeat(arr_2[:, :, None, :], n_p, axis=2).reshape(
+        array_2 = np.repeat(array_2[:, :, None, :], n_p, axis=2).reshape(
             n_i, 2 * l2 + 1, n_p * n_q
         )
 
-        arr_1 = _dispatch.swapaxes(arr_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1)
-        arr_2 = _dispatch.swapaxes(arr_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1)
+        array_1 = _dispatch.swapaxes(array_1, 1, 2).reshape(n_i * n_p * n_q, 2 * l1 + 1)
+        array_2 = _dispatch.swapaxes(array_2, 1, 2).reshape(n_i * n_p * n_q, 2 * l2 + 1)
 
-        # Do SAP
-        arr_out = sap(
-            arr_1,
-            arr_2,
-            *cg_cache._coeffs[(l1, l2, lambda_)],
-            output_size=2 * lambda_ + 1,
-        )
-        assert arr_out.shape == (n_i * n_p * n_q, 2 * lambda_ + 1)
-
-        # Reshape back
-        arr_out = arr_out.reshape(n_i, n_p * n_q, 2 * lambda_ + 1)
-        arr_out = _dispatch.swapaxes(arr_out, 1, 2)
-
-        return arr_out
+        # We also need to pass SAP the CG coefficients and m1, m2, and mu indices as 1D
+        # arrays. Extract these from the corresponding TensorBlock in the TensorMap CG
+        # cache.
+        block = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda})
+        samples = block.samples
 
-    if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor):
-        # Initialise output array
-        arr_out = _dispatch.zeros_like((n_i, 2 * lambda_ + 1, n_p * n_q), like=arr_1)
+        m1_arr: List[int] = []
+        m2_arr: List[int] = []
+        mu_arr: List[int] = []
+        C_arr: List[float] = []
+        for sample_i, (m1, m2, mu) in enumerate(samples):
 
-        # Get the corresponding Clebsch-Gordan coefficients
-        cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)]
-
-        # Fill in each mu component of the output array in turn
-        for m1, m2, mu in cg_coeffs.keys():
-            # Broadcast arrays, multiply together and with CG coeff
-            arr_out[:, mu, :] += (
-                arr_1[:, m1, :, None] * arr_2[:, m2, None, :] * cg_coeffs[(m1, m2, mu)]
-            ).reshape(n_i, n_p * n_q)
+            m1_arr.append(int(m1))
+            m2_arr.append(int(m2))
+            mu_arr.append(int(mu))
+            C_arr.append(float(block.values[sample_i, 0]))
 
-        return arr_out
+        # Do SAP
+        array_out = sap(
+            A=array_1,
+            B=array_2,
+            C=C_arr,
+            indices_A=m1_arr,
+            indices_B=m2_arr,
+            indices_output=mu_arr,
+            output_size=2 * o3_lambda + 1,
+        )
+        assert array_out.shape == (n_i * n_p * n_q, 2 * o3_lambda + 1)
 
+        # Reshape back
+        array_out = array_out.reshape(n_i, n_p * n_q, 2 * o3_lambda + 1)
+        array_out = _dispatch.swapaxes(array_out, 1, 2)
+
+        return array_out
+    elif cg_backend not in ["python-sparse", "mops"]:
+        raise ValueError(
+            f"sparse cg backend '{cg_backend}' is not known. "
+            "Only values 'python-sparse' and 'mops' are valid."
+        )
     else:
         raise TypeError(UNKNOWN_ARRAY_TYPE)
 
 
 def dense_combine(
-    arr_1: Union[np.ndarray, TorchTensor],
-    arr_2: Union[np.ndarray, TorchTensor],
-    lambda_: int,
-    cg_cache,
-) -> Union[np.ndarray, TorchTensor]:
+    array_1: Array,
+    array_2: Array,
+    o3_lambda: int,
+    cg_coeffs: TensorMap,
+) -> Array:
     """
-    Performs a Clebsch-Gordan combination step on 2 arrays using a dense
-    operation. The angular channel of each block is inferred from the size of
-    its component axis, and the blocks are combined to the desired output
-    angular channel `lambda_` using the appropriate Clebsch-Gordan coefficients.
-
-    :param arr_1: array with the m values for l1 with shape [n_samples, 2 * l1 +
-        1, n_q_properties]
-    :param arr_2: array with the m values for l2 with shape [n_samples, 2 * l2 +
-        1, n_p_properties]
-    :param lambda_: int value of the resulting coupled channel
-    :param cg_cache: dense array of shape [(2 * l1 +1) * (2 * l2 +1), (2 * lambda_ +
-        1)]
-
-    :returns: array of shape [n_samples, (2*lambda_+1), q_properties * p_properties]
+    Performs a Clebsch-Gordan combination step on 2 arrays using a dense operation. The
+    angular channel of each block is inferred from the size of its component axis, and
+    the blocks are combined to the desired output angular channel `o3_lambda` using the
+    appropriate Clebsch-Gordan coefficients.
+
+    :param array_1: array with the m values for l1 with shape [n_samples, 2 * l1 + 1,
+        n_q_properties]
+    :param array_2: array with the m values for l2 with shape [n_samples, 2 * l2 + 1,
+        n_p_properties]
+    :param o3_lambda: int value of the resulting coupled channel
+    :param cg_coeffs: a :py:class:`TensorMap` containing CG coefficients in a format for
+        either sparse or dense CG tensor products, as returned by
+        :py:func:`calculate_cg_coefficients`. See the function docstring for details on
+        the data structure. Only used if ``cg_backend`` is not ``"metadata"``.
+
+    :returns: array of shape [n_samples, (2 * o3_lambda + 1), q_properties *
+        p_properties]
     """
-    if isinstance(arr_1, np.ndarray) or isinstance(arr_1, TorchTensor):
-        # Infer l1 and l2 from the len of the length of axis 1 of each tensor
-        l1 = (arr_1.shape[1] - 1) // 2
-        l2 = (arr_2.shape[1] - 1) // 2
-        cg_coeffs = cg_cache.coeffs[(l1, l2, lambda_)]
-
-        # (samples None None l1_mu q) * (samples l2_mu p None None)
-        # -> (samples l2_mu p l1_mu q) we broadcast it in this way
-        # so we only need to do one swapaxes in the next step
-        arr_out = arr_1[:, None, None, :, :] * arr_2[:, :, :, None, None]
-
-        # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu)
-        arr_out = _dispatch.swapaxes(arr_out, 1, 4)
-
-        # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu))
-        arr_out = arr_out.reshape(
-            -1,
-            arr_1.shape[2] * arr_2.shape[2],
-            arr_1.shape[1] * arr_2.shape[1],
-        )
+    # Infer l1 and l2 from the len of the length of axis 1 of each tensor
+    l1 = (array_1.shape[1] - 1) // 2
+    l2 = (array_2.shape[1] - 1) // 2
 
-        # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu)
-        cg_coeffs = cg_coeffs.reshape(-1, 2 * lambda_ + 1)
+    cg_l1l2lam = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": o3_lambda}).values
 
-        # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu)
-        # -> samples (q p) lam_mu
-        arr_out = arr_out @ cg_coeffs
+    # (samples None None l1_mu q) * (samples l2_mu p None None)
+    # -> (samples l2_mu p l1_mu q) we broadcast it in this way
+    # so we only need to do one swapaxes in the next step
+    array_out = array_1[:, None, None, :, :] * array_2[:, :, :, None, None]
 
-        # (samples (q p) lam_mu) -> (samples lam_mu (q p))
-        return _dispatch.swapaxes(arr_out, 1, 2)
+    # (samples l2_mu p l1_mu q) -> (samples q p l1_mu l2_mu)
+    array_out = _dispatch.swapaxes(array_out, 1, 4)
 
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
+    # samples (q p l1_mu l2_mu) -> (samples (q p) (l1_mu l2_mu))
+    array_out = array_out.reshape(
+        -1,
+        array_1.shape[2] * array_2.shape[2],
+        array_1.shape[1] * array_2.shape[1],
+    )
+
+    # (l1_mu l2_mu lam_mu) -> ((l1_mu l2_mu) lam_mu)
+    cg_l1l2lam = cg_l1l2lam.reshape(-1, 2 * o3_lambda + 1)
+
+    # (samples (q p) (l1_mu l2_mu)) @ ((l1_mu l2_mu) lam_mu)
+    # -> samples (q p) lam_mu
+    array_out = array_out @ cg_l1l2lam
+
+    # (samples (q p) lam_mu) -> (samples lam_mu (q p))
+    return _dispatch.swapaxes(array_out, 1, 2)
diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py
index 3bb575cc0..86eb3e5ba 100644
--- a/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py
+++ b/python/rascaline/rascaline/utils/clebsch_gordan/_clebsch_gordan.py
@@ -4,12 +4,19 @@
 metatensor :py:class:`TensorMap` objects.
 """
 
-import itertools
 from typing import List, Optional, Tuple, Union
 
-from metatensor import Labels, TensorBlock, TensorMap
-
-from . import _cg_cache, _dispatch
+from .. import _dispatch
+from .._backend import (
+    Array,
+    Labels,
+    LabelsEntry,
+    TensorBlock,
+    TensorMap,
+    is_labels,
+    torch_jit_annotate,
+)
+from . import _cg_cache
 
 
 # ==================================================================
@@ -34,32 +41,51 @@ def _standardize_keys(tensor: TensorMap) -> TensorMap:
     if "species_neighbor" in tensor.keys.names:
         tensor = tensor.keys_to_properties(keys_to_move="species_neighbor")
     keys = tensor.keys.insert(
-        name="order_nu",
-        values=_dispatch.int_array_like([1], like=tensor.keys.values),
         index=0,
+        name="order_nu",
+        values=_dispatch.int_array_like(
+            len(tensor.keys.values) * [1], like=tensor.keys.values
+        ),
     )
     keys = keys.insert(
-        name="inversion_sigma",
-        values=_dispatch.int_array_like([1], like=tensor.keys.values),
         index=1,
+        name="inversion_sigma",
+        values=_dispatch.int_array_like(
+            len(tensor.keys.values) * [1], like=tensor.keys.values
+        ),
     )
     return TensorMap(keys=keys, blocks=[b.copy() for b in tensor.blocks()])
 
 
 def _parse_selected_keys(
     n_iterations: int,
+    array_like: Array,
     angular_cutoff: Optional[int] = None,
     selected_keys: Optional[Union[Labels, List[Labels]]] = None,
-    like=None,
-) -> List[Union[None, Labels]]:
+) -> List[Labels]:
     """
     Parses the `selected_keys` argument passed to public functions. Checks the
     values and returns a :py:class:`list` of :py:class:`Labels` objects, one for
-    each iteration of CG combination.
-
-    `like` is required if a new :py:class:`Labels` object is to be created by
-    :py:mod:`_dispatch`.
+    each iteration of CG combination.The `:param array_like:` determines the
+    array backend of the Labels created
     """
+    # Check the selected_keys
+    if (
+        (selected_keys is not None)
+        and (not isinstance(selected_keys, list))
+        and (not is_labels(selected_keys))
+    ):
+        raise TypeError("`selected_keys` must be `None`, `Labels` or List[`Labels`]")
+
+    if isinstance(selected_keys, list):
+        if not all(
+            [
+                is_labels(selected_keys[i]) or (selected_keys[i] is None)
+                for i in range(len(selected_keys))
+            ]
+        ):
+            raise TypeError("`selected_keys` must be a Labels or List[Labels]")
+
     # Check angular_cutoff arg
     if angular_cutoff is not None:
         if not isinstance(angular_cutoff, int):
@@ -67,95 +93,107 @@ def _parse_selected_keys(
         if angular_cutoff < 1:
             raise ValueError("`angular_cutoff` must be >= 1")
 
+    # we use a new variable for selected_keys so TorchScript can infer correct type
+    selected_keys_: List[Union[None, Labels]] = []
+
     if selected_keys is None:
         if angular_cutoff is None:  # no selections at all
-            selected_keys = [None] * n_iterations
+            selected_keys_ = [None] * n_iterations
         else:
             # Create a key selection with all angular channels <= the specified
             # angular cutoff
-            selected_keys = [
-                Labels(
-                    names=["spherical_harmonics_l"],
-                    values=_dispatch.int_range_like(
-                        0, angular_cutoff, like=like
-                    ).reshape(-1, 1),
-                )
-            ] * n_iterations
+            label = Labels(
+                names=["spherical_harmonics_l"],
+                values=_dispatch.int_array_like(
+                    list(range(0, angular_cutoff)), like=array_like
+                ).reshape(-1, 1),
+            )
+            selected_keys_ = [label] * n_iterations
 
-    if isinstance(selected_keys, Labels):
+    if is_labels(selected_keys):
         # Create a list, but only apply a key selection at the final iteration
-        selected_keys = [None] * (n_iterations - 1) + [selected_keys]
+        selected_keys_ = [None] * (n_iterations - 1) + [selected_keys]
+    elif isinstance(selected_keys, list):
+        selected_keys_ = selected_keys
 
-    # Check the selected_keys
-    if not isinstance(selected_keys, List):
-        raise TypeError(
-            "`selected_keys` must be a `Labels` or List[Union[None, `Labels`]]"
-        )
-    if not len(selected_keys) == n_iterations:
+    if not len(selected_keys_) == n_iterations:
         raise ValueError(
-            "`selected_keys` must be a List[Union[None, Labels]] of length"
+            "`selected_keys` must be a List[Labels] of length"
             " `correlation_order` - 1"
         )
-    if not _dispatch.all(
-        [isinstance(val, (Labels, type(None))) for val in selected_keys]
-    ):
-        raise TypeError("`selected_keys` must be a Labels or List[Union[None, Labels]]")
 
     # Now iterate over each of the Labels (or None) in the list and check
-    for slct in selected_keys:
-        if slct is None:
+    for selected in selected_keys_:
+        if selected is None:
             continue
-        assert isinstance(slct, Labels)
-        if not _dispatch.all(
+        if not (is_labels(selected)):
+            raise ValueError("Asserted that elements in `selected` are Labels")
+
+        if not all(
             [
                 name in ["spherical_harmonics_l", "inversion_sigma"]
-                for name in slct.names
+                for name in selected.names
             ]
         ):
             raise ValueError(
                 "specified key names in `selected_keys` must be either"
                 " 'spherical_harmonics_l' or 'inversion_sigma'"
             )
-        if "spherical_harmonics_l" in slct.names:
+        if "spherical_harmonics_l" in selected.names:
             if angular_cutoff is not None:
-                if not _dispatch.all(
-                    slct.column("spherical_harmonics_l") <= angular_cutoff
-                ):
+                below_cutoff: Array = (
+                    selected.column("spherical_harmonics_l") <= angular_cutoff
+                )
+                if not _dispatch.all(below_cutoff):
                     raise ValueError(
                         "specified angular channels in `selected_keys` must be <= the"
                         " specified `angular_cutoff`"
                     )
-            if not _dispatch.all(
-                [angular_l >= 0 for angular_l in slct.column("spherical_harmonics_l")]
-            ):
+            above_zero = _dispatch.bool_array_like(
+                [
+                    bool(angular_l >= 0)
+                    for angular_l in selected.column("spherical_harmonics_l")
+                ],
+                like=array_like,
+            )
+            if not _dispatch.all(above_zero):
                 raise ValueError(
                     "specified angular channels in `selected_keys` must be >= 0"
                 )
-        if "inversion_sigma" in slct.names:
+        if "inversion_sigma" in selected.names:
             if not _dispatch.all(
-                [parity_s in [-1, +1] for parity_s in slct.column("inversion_sigma")]
+                _dispatch.bool_array_like(
+                    [
+                        bool(parity_s in [-1, 1])
+                        for parity_s in selected.column("inversion_sigma")
+                    ],
+                    array_like,
+                )
             ):
                 raise ValueError(
                     "specified parities in `selected_keys` must be -1 or +1"
                 )
 
-    return selected_keys
+    return selected_keys_
 
 
 def _parse_bool_iteration_filters(
     n_iterations: int,
-    skip_redundant: Optional[Union[bool, List[bool]]] = False,
+    skip_redundant: Union[bool, List[bool]] = False,
     output_selection: Optional[Union[bool, List[bool]]] = None,
-) -> List[List[bool]]:
+) -> Tuple[List[bool], List[bool]]:
     """
     Parses the `skip_redundant` and `output_selection` arguments passed to
     public functions.
     """
     if isinstance(skip_redundant, bool):
-        skip_redundant = [skip_redundant] * n_iterations
-    if not _dispatch.all([isinstance(val, bool) for val in skip_redundant]):
+        skip_redundant_ = [skip_redundant] * n_iterations
+    else:
+        skip_redundant_ = skip_redundant
+
+    if not all([isinstance(val, bool) for val in skip_redundant_]):
         raise TypeError("`skip_redundant` must be a `bool` or `list` of `bool`")
-    if not len(skip_redundant) == n_iterations:
+    if not len(skip_redundant_) == n_iterations:
         raise ValueError(
             "`skip_redundant` must be a bool or `list` of `bool` of length"
             " `correlation_order` - 1"
@@ -165,7 +203,7 @@ def _parse_bool_iteration_filters(
     else:
         if isinstance(output_selection, bool):
             output_selection = [output_selection] * n_iterations
-        if not isinstance(output_selection, List):
+        if not isinstance(output_selection, list):
             raise TypeError("`output_selection` must be passed as `list` of `bool`")
 
     if not len(output_selection) == n_iterations:
@@ -173,21 +211,21 @@ def _parse_bool_iteration_filters(
             "`output_selection` must be a ``list`` of ``bool`` of length"
             " corresponding to the number of CG iterations"
         )
-    if not _dispatch.all([isinstance(v, bool) for v in output_selection]):
+    if not all([isinstance(v, bool) for v in output_selection]):
         raise TypeError("`output_selection` must be passed as a `list` of `bool`")
-    if not _dispatch.all([isinstance(v, bool) for v in output_selection]):
+    if not all([isinstance(v, bool) for v in output_selection]):
         raise TypeError("`output_selection` must be passed as a `list` of `bool`")
 
-    return skip_redundant, output_selection
+    return skip_redundant_, output_selection
 
 
 def _precompute_keys(
     keys_1: Labels,
     keys_2: Labels,
     n_iterations: int,
-    selected_keys: List[Union[None, Labels]],
+    selected_keys: List[Union[Labels, None]],
     skip_redundant: List[bool],
-) -> List[Tuple[Labels, List[List[int]]]]:
+) -> List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]]:
     """
     Computes all the keys metadata needed to perform `n_iterations` of CG
     combination steps.
@@ -201,7 +239,7 @@ def _precompute_keys(
     If `skip_redundant` is True, then keys that represent redundant CG
     operations are not included in the output keys at each step.
     """
-    keys_metadata = []
+    keys_metadata: List[Tuple[List[LabelsEntry], List[LabelsEntry], Labels]] = []
     keys_out = keys_1
     for iteration in range(n_iterations):
         # Get the keys metadata for the combination of the 2 tensors
@@ -209,12 +247,14 @@ def _precompute_keys(
             keys_1=keys_out,
             keys_2=keys_2,
         )
-        if selected_keys[iteration] is not None:
+        # For TorchScript to determine the type correctly so we can subscript it
+        selected_keys_i = selected_keys[iteration]
+        if selected_keys_i is not None:
             keys_1_entries, keys_2_entries, keys_out = _apply_key_selection(
                 keys_1_entries,
                 keys_2_entries,
                 keys_out,
-                selected_keys=selected_keys[iteration],
+                selected_keys=selected_keys_i,
             )
 
         if skip_redundant[iteration]:
@@ -237,7 +277,8 @@ def _precompute_keys(
 
 def _precompute_keys_full_product(
     keys_1: Labels, keys_2: Labels
-) -> Tuple[List, List, Labels]:
+) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]:
+    # Due to TorchScript we cannot use List[LabelsEntry]
     """
     Given the keys of 2 TensorMaps, returns the keys that would be present after
     a full CG product of these TensorMaps.
@@ -272,11 +313,11 @@ def _precompute_keys_full_product(
     `keys_2` must follow the key name convention: ["order_nu",
     "inversion_sigma", "spherical_harmonics_l", "species_center"]
 
-    Returned is Tuple[List, List, Labels]. The first two lists correspond to the
-    LabelsEntry objects of the keys being combined. The third element is a
-    Labels object corresponding to the keys of the output TensorMap. Each entry
-    in this Labels object corresponds to the keys is formed by combination of
-    the pair of blocks indexed by correspoding key pairs in the first two lists.
+    The first two lists of the returned value correspond to the LabelsEntry objects of
+    the keys being combined. The third element is a Labels object corresponding to the
+    keys of the output TensorMap. Each entry in this Labels object corresponds to the
+    keys is formed by combination of the pair of blocks indexed by correspoding key
+    pairs in the first two lists.
     """
     # Get the correlation order of the first TensorMap.
     unique_nu = _dispatch.unique(keys_1.column("order_nu"))
@@ -285,7 +326,7 @@ def _precompute_keys_full_product(
             "keys_1 must correspond to a tensor of a single correlation order."
             f" Found {len(unique_nu)} body orders: {unique_nu}"
         )
-    nu1 = unique_nu[0]
+    nu1 = int(unique_nu[0])
 
     # Define new correlation order of output TensorMap
     nu = nu1 + 1
@@ -295,23 +336,25 @@ def _precompute_keys_full_product(
 
     # If nu1 = 1, the key names don't yet have any "lx" columns
     if nu1 == 1:
-        l_list_names = []
+        l_list_names: List[str] = []
         new_l_list_names = ["l1", "l2"]
     else:
         l_list_names = [f"l{angular_l}" for angular_l in range(1, nu1 + 1)]
         new_l_list_names = l_list_names + [f"l{nu}"]
 
     # Check key names
-    assert _dispatch.all(
-        keys_1.names
-        == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"]
-        + l_list_names
-        + [f"k{k}" for k in range(2, nu1)]
-    )
-    assert _dispatch.all(
-        keys_2.names
-        == ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"]
-    )
+    assert keys_1.names == [
+        "order_nu",
+        "inversion_sigma",
+        "spherical_harmonics_l",
+        "species_center",
+    ] + l_list_names + [f"k{k}" for k in range(2, nu1)]
+    assert keys_2.names == [
+        "order_nu",
+        "inversion_sigma",
+        "spherical_harmonics_l",
+        "species_center",
+    ]
 
     # Define key names of output Labels (i.e. for combined TensorMap)
     new_names = (
@@ -320,43 +363,64 @@ def _precompute_keys_full_product(
         + [f"k{k}" for k in range(2, nu)]
     )
 
-    new_key_values = []
-    keys_1_entries = []
-    keys_2_entries = []
-    for key_1, key_2 in itertools.product(keys_1, keys_2):
-        # Unpack relevant key values
-        sig1, lam1, a = key_1.values[1:4]
-        sig2, lam2, a2 = key_2.values[1:4]
-
-        # Only combine blocks of the same chemical species
-        if a != a2:
-            continue
-
-        # First calculate the possible non-zero angular channels that can be
-        # formed from combination of blocks of order `lam1` and `lam2`. This
-        # corresponds to values in the inclusive range { |lam1 - lam2|, ...,
-        # |lam1 + lam2| }
-        nonzero_lams = _dispatch.int_range_like(
-            abs(lam1 - lam2), abs(lam1 + lam2) + 1, like=key_1.values
-        )
-
-        # Now iterate over the non-zero angular channels and apply the custom
-        # selections
-        for lambda_ in nonzero_lams:
-            # Calculate new sigma
-            sig = sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_)
-
-            # Extract the l and k lists from keys_1
-            l_list = key_1.values[4 : 4 + nu1].tolist()
-            k_list = key_1.values[4 + nu1 :].tolist()
+    # Define key names of output Labels (i.e. for combined TensorMap)
+    new_names = (
+        ["order_nu", "inversion_sigma", "spherical_harmonics_l", "species_center"]
+        + new_l_list_names
+        + [f"k{k}" for k in range(2, nu)]
+    )
 
-            # Build the new keys values. l{nu} is `lam2`` (i.e.
-            # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is
-            # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`).
-            new_vals = [nu, sig, lambda_, a] + l_list + [lam2] + k_list + [lam1]
-            new_key_values.append(new_vals)
-            keys_1_entries.append(key_1)
-            keys_2_entries.append(key_2)
+    new_key_values: List[List[int]] = []
+    # Types are actually LabelsEntry, but TorchScript does not understand this.
+    keys_1_entries: List[LabelsEntry] = []
+    keys_2_entries: List[LabelsEntry] = []
+    for i in range(len(keys_1)):
+        for j in range(len(keys_2)):
+            key_1 = keys_1.entry(i)
+            key_2 = keys_2.entry(j)
+            # Unpack relevant key values
+            sig1 = int(keys_1.values[i, 1])
+            lam1 = int(keys_1.values[i, 2])
+            a = int(keys_1.values[i, 3])
+            sig2 = int(keys_2.values[j, 1])
+            lam2 = int(keys_2.values[j, 2])
+            a2 = int(keys_2.values[j, 3])
+
+            # Only combine blocks of the same chemical species
+            if a != a2:
+                continue
+
+            # First calculate the possible non-zero angular channels that can be
+            # formed from combination of blocks of order `lam1` and `lam2`. This
+            # corresponds to values in the inclusive range { |lam1 - lam2|, ...,
+            # |lam1 + lam2| }
+            min_lam: int = abs(lam1 - lam2)
+            max_lam: int = abs(lam1 + lam2) + 1
+            nonzero_lams = list(range(min_lam, max_lam))
+
+            # Now iterate over the non-zero angular channels and apply the custom
+            # selections
+            for lambda_ in nonzero_lams:
+                # Calculate new sigma
+                sig = int(sig1 * sig2 * (-1) ** (lam1 + lam2 + lambda_))
+
+                # Extract the l and k lists from keys_1
+                l_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 : 4 + nu1])
+                k_list: List[int] = _dispatch.to_int_list(keys_1.values[i, 4 + nu1 :])
+
+                # Build the new keys values. l{nu} is `lam2`` (i.e.
+                # "spherical_harmonics_l" of the key from `keys_2`. k{nu-1} is
+                # `lam1` (i.e. "spherical_harmonics_l" of the key from `keys_1`).
+                new_vals: List[int] = (
+                    torch_jit_annotate(List[int], [nu, sig, lambda_, a])
+                    + l_list
+                    + [lam2]
+                    + k_list
+                    + [lam1]
+                )
+                new_key_values.append(new_vals)
+                keys_1_entries.append(key_1)
+                keys_2_entries.append(key_2)
 
     # Define new keys as the full product of keys_1 and keys_2
     keys_out = Labels(
@@ -368,8 +432,11 @@ def _precompute_keys_full_product(
 
 
 def _apply_key_selection(
-    keys_1_entries: List, keys_2_entries: List, keys_out: Labels, selected_keys: Labels
-) -> Tuple[List, List, Labels]:
+    keys_1_entries: List[LabelsEntry],
+    keys_2_entries: List[LabelsEntry],
+    keys_out: Labels,
+    selected_keys: Labels,
+) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]:
     """
     Applies a selection according to `selected_keys` to the keys of an output
     TensorMap `keys_out` produced by combination of blocks indexed by keys
@@ -381,35 +448,45 @@ def _apply_key_selection(
     If a selection in `selected_keys` is not valid based on the keys in
     `keys_out`, an error is raised.
     """
-    # Extract the relevant columns from `selected_keys` that the selection will
-    # be performed on
-    keys_out_vals = [[k[name] for name in selected_keys.names] for k in keys_out]
+    # Extract the relevant columns from `selected_keys` that the selection will be
+    # performed on
+    col_idx = _dispatch.int_array_like(
+        [keys_out.names.index(name) for name in selected_keys.names], keys_out.values
+    )
+    keys_out_vals = keys_out.values[:, col_idx]
 
     # First check that all of the selected keys exist in the output keys
-    for slct in selected_keys.values:
-        if not _dispatch.any([_dispatch.all(slct == k) for k in keys_out_vals]):
+    for selected in selected_keys.values:
+        if not any(
+            [bool(all(selected == keys_out_vals[i])) for i in range(len(keys_out_vals))]
+        ):
             raise ValueError(
-                f"selected key {selected_keys.names} = {slct} not found"
+                f"selected key {selected_keys.names} = {selected} not found"
                 " in the output keys. Check the `selected_keys` argument."
             )
 
     # Build a mask of the selected keys
-    mask = [
-        _dispatch.any([_dispatch.all(i == j) for j in selected_keys.values])
-        for i in keys_out_vals
-    ]
+    mask = _dispatch.bool_array_like(
+        [any([bool(all(i == j)) for j in selected_keys.values]) for i in keys_out_vals],
+        like=selected_keys.values,
+    )
 
+    mask_indices = _dispatch.int_array_like(
+        list(range(len(keys_1_entries))), like=selected_keys.values
+    )[mask]
     # Apply the mask to key entries and keys and return
-    keys_1_entries = [k for k, isin in zip(keys_1_entries, mask) if isin]
-    keys_2_entries = [k for k, isin in zip(keys_2_entries, mask) if isin]
+    keys_1_entries = [keys_1_entries[i] for i in mask_indices]
+    keys_2_entries = [keys_2_entries[i] for i in mask_indices]
     keys_out = Labels(names=keys_out.names, values=keys_out.values[mask])
 
     return keys_1_entries, keys_2_entries, keys_out
 
 
 def _remove_redundant_keys(
-    keys_1_entries: List, keys_2_entries: List, keys_out: Labels
-) -> Tuple[List, List, Labels]:
+    keys_1_entries: List[LabelsEntry],
+    keys_2_entries: List[LabelsEntry],
+    keys_out: Labels,
+) -> Tuple[List[LabelsEntry], List[LabelsEntry], Labels]:
     """
     For a Labels object `keys_out` that corresponds to the keys of a TensorMap
     formed by combined of the blocks described by the entries in the lists
@@ -428,11 +505,12 @@ def _remove_redundant_keys(
     nu = nu1 + 1
 
     # Identify keys of redundant blocks and remove them
-    key_idxs_to_keep = []
-    for key_idx, key in enumerate(keys_out):
-        # Get the important key values. This is all of the keys, excpet the k
-        # list
-        key_vals_slice = key.values[: 4 + (nu + 1)].tolist()
+    key_idxs_to_keep: List[int] = []
+    for key_idx in range(len(keys_out)):
+        key = keys_out.entry(key_idx)
+        # Get the important key values. This is all of the keys, except the k
+        # list.
+        key_vals_slice: List[int] = _dispatch.to_int_list(key.values[: 4 + (nu + 1)])
         first_part, l_list = key_vals_slice[:4], key_vals_slice[4:]
 
         # Sort the l list
@@ -441,18 +519,19 @@ def _remove_redundant_keys(
         # Compare the sliced key with the one recreated when the l list is
         # sorted. If they are identical, this is the key of the block that we
         # want to compute a CG combination for.
-        key_slice_tuple = tuple(first_part + l_list)
-        key_slice_sorted_tuple = tuple(first_part + l_list_sorted)
-        if _dispatch.all(key_slice_tuple == key_slice_sorted_tuple):
+        key_slice_tuple = _dispatch.int_array_like(first_part + l_list, like=key.values)
+        key_slice_sorted_tuple = _dispatch.int_array_like(
+            first_part + l_list_sorted, like=key.values
+        )
+        if all(key_slice_tuple == key_slice_sorted_tuple):
             key_idxs_to_keep.append(key_idx)
 
     # Build a reduced Labels object for the combined keys, with redundancies removed
     keys_out_red = Labels(
         names=keys_out.names,
-        values=_dispatch.int_array_like(
-            [keys_out[idx].values for idx in key_idxs_to_keep],
-            like=keys_1_entries[0].values,
-        ),
+        values=keys_out.values[
+            _dispatch.int_array_like(key_idxs_to_keep, like=keys_out.values)
+        ],
     )
 
     # Store the list of reduced entries that combine to form the reduced output keys
@@ -471,23 +550,21 @@ def _combine_blocks_same_samples(
     block_1: TensorBlock,
     block_2: TensorBlock,
     lambda_: int,
-    cg_cache,
-    compute_metadata_only: bool = False,
+    cg_coeffs: TensorMap,
+    cg_backend: str,
 ) -> TensorBlock:
     """
     For a given pair of TensorBlocks and desired angular channel, combines the
     values arrays and returns a new TensorBlock.
+
+    If cg_coeffs are None, tensor blocks with empty arrays are returned that only
+    contain the metadata.
     """
 
     # Do the CG combination - single center so no shape pre-processing required
-    if compute_metadata_only:
-        combined_values = _cg_cache.combine_arrays(
-            block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=True
-        )
-    else:
-        combined_values = _cg_cache.combine_arrays(
-            block_1.values, block_2.values, lambda_, cg_cache, return_empty_array=False
-        )
+    combined_values = _cg_cache.combine_arrays(
+        block_1.values, block_2.values, lambda_, cg_coeffs, cg_backend
+    )
 
     # Infer the new nu value: block 1's properties are nu pairs of
     # "species_neighbor_x" and "nx".
@@ -496,7 +573,25 @@ def _combine_blocks_same_samples(
     # Define the new property names for "nx" and "species_neighbor_x"
     n_names = [f"n{i}" for i in range(1, combined_nu + 1)]
     neighbor_names = [f"species_neighbor_{i}" for i in range(1, combined_nu + 1)]
-    prop_names = [item for i in zip(neighbor_names, n_names) for item in i]
+    prop_names_zip = [
+        [neighbor_names[i], n_names[i]] for i in range(len(neighbor_names))
+    ]
+    prop_names: List[str] = []
+    for i in range(len(prop_names_zip)):
+        prop_names.extend(prop_names_zip[i])
+
+    # create cross product list of indices in a torch-scriptable way of
+    # [[i, j] for i in range(len(block_1.properties.values)) for j in
+    #             range(len(block_2.properties.values))]
+    # [0, 1, 2], [0, 1] -> [[0, 1], [0, 2], [1, 0], [1, 1], [2, 0], [2, 1]]
+    block_1_block_2_product_idx = _dispatch.cartesian_prod(
+        _dispatch.int_range_like(
+            0, len(block_2.properties.values), like=block_2.values
+        ),
+        _dispatch.int_range_like(
+            0, len(block_1.properties.values), like=block_1.values
+        ),
+    )
 
     # Create a TensorBlock
     combined_block = TensorBlock(
@@ -506,7 +601,7 @@ def _combine_blocks_same_samples(
             Labels(
                 names=["spherical_harmonics_m"],
                 values=_dispatch.int_range_like(
-                    min_val=-lambda_, max_val=lambda_ + 1, like=block_1.values
+                    min_val=-lambda_, max_val=lambda_ + 1, like=block_1.samples.values
                 ).reshape(-1, 1),
             ),
         ],
@@ -514,11 +609,11 @@ def _combine_blocks_same_samples(
             names=prop_names,
             values=_dispatch.int_array_like(
                 [
-                    _dispatch.concatenate((b2, b1))
-                    for b2 in block_2.properties.values
-                    for b1 in block_1.properties.values
+                    _dispatch.to_int_list(block_2.properties.values[indices[0]])
+                    + _dispatch.to_int_list(block_1.properties.values[indices[1]])
+                    for indices in block_1_block_2_product_idx
                 ],
-                like=block_1.values,
+                block_1.samples.values,
             ),
         ),
     )
diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py b/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py
deleted file mode 100644
index 839ea7f3e..000000000
--- a/python/rascaline/rascaline/utils/clebsch_gordan/_dispatch.py
+++ /dev/null
@@ -1,134 +0,0 @@
-"""
-Module containing dispatch functions for numpy/torch CG combination operations.
-"""
-
-from typing import List, Optional
-
-import numpy as np
-
-
-try:
-    import torch
-    from torch import Tensor as TorchTensor
-except ImportError:
-
-    class TorchTensor:
-        pass
-
-
-UNKNOWN_ARRAY_TYPE = (
-    "unknown array type, only numpy arrays and torch tensors are supported"
-)
-
-
-def unique(array, axis: Optional[int] = None):
-    """Find the unique elements of an array."""
-    if isinstance(array, TorchTensor):
-        return torch.unique(array, dim=axis)
-    elif isinstance(array, np.ndarray):
-        return np.unique(array, axis=axis)
-
-
-def int_range_like(min_val, max_val, like):
-    """Returns an array of integers from min to max, non-inclusive, based on the
-    type of `like`"""
-    if isinstance(like, TorchTensor):
-        return torch.arange(min_val, max_val, dtype=torch.int64, device=like.device)
-    elif isinstance(like, np.ndarray):
-        return np.arange(min_val, max_val).astype(np.int64)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def int_array_like(int_list: List[int], like):
-    """
-    Converts the input list of int to a numpy array or torch tensor
-    based on the type of `like`.
-    """
-    if isinstance(like, TorchTensor):
-        return torch.tensor(int_list, dtype=torch.int64, device=like.device)
-    elif isinstance(like, np.ndarray):
-        return np.array(int_list).astype(np.int64)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def concatenate(arrays, axis: Optional[int] = 0):
-    """Concatenate arrays along an axis."""
-    if isinstance(arrays[0], TorchTensor):
-        return torch.cat(arrays, dim=axis)
-    elif isinstance(arrays[0], np.ndarray):
-        return np.concatenate(arrays, axis=axis)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def all(array, axis: Optional[int] = None):
-    """Test whether all array elements along a given axis evaluate to True.
-
-    This function has the same behavior as
-    ``np.all(array,axis=axis)``.
-    """
-    if isinstance(array, bool):
-        array = np.array(array)
-    if isinstance(array, list):
-        array = np.array(array)
-
-    if isinstance(array, TorchTensor):
-        # torch.all has two implementation, and picks one depending if more than one
-        # parameter is given. The second one does not supports setting dim to `None`
-        if axis is None:
-            return torch.all(input=array)
-        else:
-            return torch.all(input=array, dim=axis)
-    elif isinstance(array, np.ndarray):
-        return np.all(a=array, axis=axis)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def any(array):
-    """Test whether any array elements along a given axis evaluate to True.
-
-    This function has the same behavior as
-    ``np.any(array)``.
-    """
-    if isinstance(array, bool):
-        array = np.array(array)
-    if isinstance(array, list):
-        array = np.array(array)
-    if isinstance(array, TorchTensor):
-        return torch.any(array)
-    elif isinstance(array, np.ndarray):
-        return np.any(array)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def zeros_like(shape, like):
-    """Return an array of zeros with the same shape and type as a given array.
-
-    This function has the same behavior as
-    ``np.zeros_like(array)``.
-    """
-    if isinstance(like, TorchTensor):
-        return torch.zeros(
-            shape,
-            requires_grad=like.requires_grad,
-            dtype=like.dtype,
-            device=like.device,
-        )
-    elif isinstance(like, np.ndarray):
-        return np.zeros(shape, dtype=like.dtype)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def swapaxes(array, axis0: int, axis1: int):
-    """Swaps axes of an array."""
-    if isinstance(array, TorchTensor):
-        return torch.swapaxes(array, axis0, axis1)
-    elif isinstance(array, np.ndarray):
-        return np.swapaxes(array, axis0, axis1)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py
index bf4c0c42f..a2181186b 100644
--- a/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py
+++ b/python/rascaline/rascaline/utils/clebsch_gordan/correlate_density.py
@@ -6,255 +6,384 @@
 
 from typing import List, Optional, Union
 
-from metatensor import Labels, TensorMap
+import numpy as np
 
-from . import _cg_cache, _clebsch_gordan, _dispatch
+from .. import _dispatch
+from .._backend import (
+    Labels,
+    LabelsEntry,
+    TensorBlock,
+    TensorMap,
+    TorchModule,
+    TorchScriptClass,
+    torch_jit_export,
+    torch_jit_is_scripting,
+)
+from . import _cg_cache, _clebsch_gordan
 
 
+try:
+    import mops  # noqa F401
+
+    HAS_MOPS = True
+except ImportError:
+    HAS_MOPS = False
+
+try:
+    import torch
+
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+
 # ======================================================================
 # ===== Public API functions
 # ======================================================================
 
 
-def correlate_density(
-    density: TensorMap,
-    correlation_order: int,
-    angular_cutoff: Optional[int] = None,
-    selected_keys: Optional[Union[Labels, List[Labels]]] = None,
-    skip_redundant: Optional[Union[bool, List[bool]]] = False,
-    output_selection: Optional[Union[bool, List[bool]]] = None,
-) -> Union[TensorMap, List[TensorMap]]:
+class DensityCorrelations(TorchModule):
     """
-    Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor
-    with itself up to the desired correlation order. Returns
-    :py:class:`TensorMap`(s) corresponding to the density correlations output
-    from the specified iteration(s).
-
-    A density descriptor necessarily is body order 2 (i.e. correlation order 1),
-    but can be single- or multi-center. The output is a :py:class:`list` of
-    density correlations for each iteration specified in `output_selection`, up
-    to the target order passed in `correlation_order`. By default only the last
-    correlation (i.e. the correlation of order ``correlation_order``) is
-    returned.
+    Takes iterative Clebsch-Gordan (CG) tensor products of a density descriptor with
+    itself up to the desired correlation order. Returns :py:class:`TensorMap`
+    corresponding to the density correlations output from the specified iteration(s).
+
+    A density descriptor necessarily is body order 2 (i.e. correlation order 1), but can
+    be single- or multi-center. The output is a :py:class:`list` of density correlations
+    for each iteration specified in `output_selection`, up to the target order passed in
+    `correlation_order`. By default only the last correlation (i.e. the correlation of
+    order ``correlation_order``) is returned.
 
     This function is an iterative special case of the more general
-    :py:func:`correlate_tensors`. As a density is being correlated with itself,
-    some redundant CG tensor products can be skipped with the `skip_redundant`
-    keyword.
+    :py:func:`correlate_tensors`. As a density is being correlated with itself, some
+    redundant CG tensor products can be skipped with the `skip_redundant` keyword.
 
     Selections on the angular and parity channels at each iteration can also be
     controlled with arguments `angular_cutoff`, `angular_selection` and
     `parity_selection`.
 
-    :param density: A density descriptor of body order 2 (correlation order 1),
-        in :py:class:`TensorMap` format. This may be, for example, a rascaline
-        :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`.
-        Alternatively, this could be multi-center descriptor, such as a pair
-        density.
-    :param correlation_order: The desired correlation order of the output
-        descriptor. Must be >= 1.
-    :param angular_cutoff: The maximum angular channel to compute at any given
-        CG iteration, applied globally to all iterations until the target
-        correlation order is reached.
-    :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]`
-        specifying the angular and/or parity channels to output at each
-        iteration. All :py:class:`Labels` objects passed here must only contain
-        key names "spherical_harmonics_l" and "inversion_sigma". If a single
-        :py:class:`Labels` object is passed, this is applied to the final
-        iteration only. If a :py:class:`list` of :py:class:`Labels` objects is
-        passed, each is applied to its corresponding iteration. If None is
-        passed, all angular and parity channels are output at each iteration,
-        with the global `angular_cutoff` applied if specified.
-    :param skip_redundant: Whether to skip redundant CG combinations. Defaults
-        to False, which means all combinations are performed. If a
-        :py:class:`list` of :py:class:`bool` is passed, this is applied to each
-        iteration. If a single :py:class:`bool` is passed, this is applied to
-        all iterations.
-    :param output_selection: A :py:class:`list` of :py:class:`bool` specifying
-        whether to output a :py:class:`TensorMap` for each iteration. If a
-        single :py:class:`bool` is passed as True, outputs from all iterations
-        will be returned. If a :py:class:`list` of :py:class:`bool` is passed,
-        this controls the output at each corresponding iteration. If None is
-        passed, only the final iteration is output.
-
-    :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the
-        density correlations output from the specified iterations. If the output
-        from a single iteration is requested, a :py:class:`TensorMap` is
-        returned instead.
-    """
-    return _correlate_density(
-        density,
-        correlation_order,
-        angular_cutoff,
-        selected_keys,
-        skip_redundant,
-        output_selection,
-        compute_metadata_only=False,
-        sparse=True,  # sparse CG cache by default
-    )
-
-
-def correlate_density_metadata(
-    density: TensorMap,
-    correlation_order: int,
-    angular_cutoff: Optional[int] = None,
-    selected_keys: Optional[Union[Labels, List[Labels]]] = None,
-    skip_redundant: Optional[Union[bool, List[bool]]] = False,
-    output_selection: Optional[Union[bool, List[bool]]] = None,
-) -> Union[TensorMap, List[TensorMap]]:
-    """
-    Returns the metadata-only :py:class:`TensorMap`(s) that would be output by
-    the function :py:func:`correlate_density` under the same settings, without
-    perfoming the actual Clebsch-Gordan tensor products. See this function for
-    full documentation.
-    """
+    :param max_angular: The maximum angular order for which CG coefficients should be
+        computed and stored. This must be large enough to cover the maximum angular
+        order reached in the CG iterations on a density input to the :py:meth:`compute`
+        method.
+    :param correlation_order: The desired correlation order of the output descriptor.
+        Must be >= 1.
+    :param angular_cutoff: The maximum angular channel to compute at any given CG
+        iteration, applied globally to all iterations until the target correlation order
+        is reached.
+    :param selected_keys: :py:class:`Labels` or `List[:py:class:`Labels`]` specifying
+        the angular and/or parity channels to output at each iteration. All
+        :py:class:`Labels` objects passed here must only contain key names
+        "spherical_harmonics_l" and "inversion_sigma". If a single :py:class:`Labels`
+        object is passed, this is applied to the final iteration only. If a
+        :py:class:`list` of :py:class:`Labels` objects is passed, each is applied to its
+        corresponding iteration. If None is passed, all angular and parity channels are
+        output at each iteration, with the global `angular_cutoff` applied if specified.
+    :param skip_redundant: Whether to skip redundant CG combinations. Defaults to False,
+        which means all combinations are performed. If a :py:class:`list` of
+        :py:class:`bool` is passed, this is applied to each iteration. If a single
+        :py:class:`bool` is passed, this is applied to all iterations.
+    :param output_selection: A :py:class:`list` of :py:class:`bool` specifying whether
+        to output a :py:class:`TensorMap` for each iteration. If a single
+        :py:class:`bool` is passed as True, outputs from all iterations will be
+        returned. If a :py:class:`list` of :py:class:`bool` is passed, this controls the
+        output at each corresponding iteration. If None is passed, only the final
+        iteration is output.
+    :param arrays_backend: Determines the array backend, either "numpy" or "torch".
+    :param cg_backend: Determines the backend for the CG combination. It can be even
+        "python-sparse", "python-dense" or "mops". If the CG combination performs on the
+        sparse coefficients, it means that for each (l1, l2, lambda) block the (m1, m2,
+        mu) coefficients are stored in a sparse format only storing the nonzero
+        coefficients. If the parameter are None, the most optimal choice is determined
+        given available packages and ``arrays_backend``.
 
-    return _correlate_density(
-        density,
-        correlation_order,
-        angular_cutoff,
-        selected_keys,
-        skip_redundant,
-        output_selection,
-        compute_metadata_only=True,
-    )
-
-
-# ====================================================================
-# ===== Private functions that do the work on the TensorMap level
-# ====================================================================
-
-
-def _correlate_density(
-    density: TensorMap,
-    correlation_order: int,
-    angular_cutoff: Optional[int] = None,
-    selected_keys: Optional[Union[Labels, List[Labels]]] = None,
-    skip_redundant: Optional[Union[bool, List[bool]]] = False,
-    output_selection: Optional[Union[bool, List[bool]]] = None,
-    compute_metadata_only: bool = False,
-    sparse: bool = True,
-) -> Union[TensorMap, List[TensorMap]]:
-    """
-    Performs the density correlations for public functions
-    :py:func:`correlate_density` and :py:func:`correlate_density_metadata`.
+        - "python-dense": Uses the python implementation performing the combinations
+          with the dense CG coefficients.
+        - "python-sparse": Uses the python implementation performing the
+          combinations with the sparse CG coefficients.
+        - "mops": Uses the ``mops`` package to optimize the sparse combinations. At
+          the moment it is only available with ``arrays_backend="numpy"``
+
+    :return: A :py:class:`list` of :py:class:`TensorMap` corresponding to the density
+        correlations output from the specified iterations. If the output from a single
+        iteration is requested, a :py:class:`TensorMap` is returned instead.
     """
-    # Check inputs
-    if correlation_order <= 1:
-        raise ValueError("`correlation_order` must be > 1")
-    # TODO: implement combinations of gradients too
-    if _dispatch.any([len(list(block.gradients())) > 0 for block in density]):
-        raise NotImplementedError(
-            "Clebsch Gordan combinations with gradients not yet implemented."
-            " Use metatensor.remove_gradients to remove gradients from the input."
-        )
-    # Check metadata
-    if not (
-        _dispatch.all(density.keys.names == ["spherical_harmonics_l", "species_center"])
-        or _dispatch.all(
-            density.keys.names
-            == ["spherical_harmonics_l", "species_center", "species_neighbor"]
-        )
+
+    _selected_keys: List[Union[Labels, None]]
+
+    def __init__(
+        self,
+        max_angular: int,
+        correlation_order: int,
+        angular_cutoff: Optional[int] = None,
+        selected_keys: Optional[Union[Labels, List[Labels]]] = None,
+        skip_redundant: Optional[Union[bool, List[bool]]] = False,
+        output_selection: Optional[Union[bool, List[bool]]] = None,
+        arrays_backend: Optional[str] = None,
+        cg_backend: Optional[str] = None,
     ):
-        raise ValueError(
-            "input `density` must have key names"
-            ' ["spherical_harmonics_l", "species_center"] or'
-            ' ["spherical_harmonics_l", "species_center", "species_neighbor"]'
+        super().__init__()
+        if arrays_backend is None:
+            if torch_jit_is_scripting():
+                arrays_backend = "torch"
+            else:
+                if isinstance(Labels, TorchScriptClass):
+                    arrays_backend = "torch"
+                else:
+                    arrays_backend = "numpy"
+        elif arrays_backend == "numpy":
+            if torch_jit_is_scripting():
+                raise ValueError(
+                    "Module is torch scripted but 'numpy' was given as `arrays_backend`"
+                )
+            arrays_backend = "numpy"
+        elif arrays_backend == "torch":
+            arrays_backend = "torch"
+        else:
+            raise ValueError(
+                f"Unknown `arrays_backend` {arrays_backend}."
+                "Only 'numpy' and 'torch' are supported."
+            )
+
+        # Choosing the optimal cg combine backend
+        if cg_backend is None:
+            if arrays_backend == "torch":
+                self._cg_backend = "python-dense"
+            if arrays_backend == "numpy" and HAS_MOPS:
+                self._cg_backend = "mops"
+            else:
+                self._cg_backend = "python-sparse"
+        elif cg_backend == "python-dense":
+            self._cg_backend = "python-dense"
+        elif cg_backend == "python-sparse":
+            self._cg_backend = "python-sparse"
+        elif cg_backend == "mops":
+            if arrays_backend == "torch":
+                raise NotImplementedError(
+                    "'torch' was determined or given as `arrays_backend` "
+                    "and 'mops' was given as `cg_backend`, "
+                    "but mops does not support torch backend yet"
+                )
+            else:
+                assert arrays_backend == "numpy"
+                if not HAS_MOPS:
+                    raise ImportError(
+                        "mops is not installed, but 'mops' was given as `cg_backend`"
+                    )
+                self._cg_backend = "mops"
+
+        else:
+            raise ValueError(
+                f"Unknown `cg_backend` {cg_backend}."
+                "Only 'python-dense', 'python-sparse' and 'mops' are supported."
+            )
+
+        if max_angular < 0:
+            raise ValueError(
+                f"Given `max_angular={max_angular}` negative. "
+                "Must be greater equal 0."
+            )
+        self._max_angular = max_angular
+        self._cg_coefficients = _cg_cache.calculate_cg_coefficients(
+            lambda_max=self._max_angular,
+            sparse=(self._cg_backend in ["python-sparse", "mops"]),
+            use_torch=(arrays_backend == "torch"),
         )
-    if not _dispatch.all(density.component_names == ["spherical_harmonics_m"]):
-        raise ValueError(
-            "input `density` must have a single component"
-            " axis with name `spherical_harmonics_m`"
+
+        # Check inputs
+        if correlation_order <= 1:
+            raise ValueError("`correlation_order` must be > 1")
+        self._correlation_order = correlation_order
+
+        n_iterations = correlation_order - 1  # num iterations
+        # Parse the selected keys
+        self._angular_cutoff = angular_cutoff
+
+        if arrays_backend == "torch":
+            array_like = torch.empty(0)
+        elif arrays_backend == "numpy":
+            array_like = np.empty(0)
+
+        self._selected_keys: List[Union[Labels, None]] = (
+            _clebsch_gordan._parse_selected_keys(
+                n_iterations=n_iterations,
+                array_like=array_like,
+                angular_cutoff=self._angular_cutoff,
+                selected_keys=selected_keys,
+            )
         )
-    n_iterations = correlation_order - 1  # num iterations
-    density = _clebsch_gordan._standardize_keys(density)  # standardize metadata
-    density_correlation = density  # create a copy to combine with itself
-
-    # Parse the selected keys
-    selected_keys = _clebsch_gordan._parse_selected_keys(
-        n_iterations=n_iterations,
-        angular_cutoff=angular_cutoff,
-        selected_keys=selected_keys,
-        like=density.keys.values,
-    )
-    # Parse the bool flags that control skipping of redundant CG combinations
-    # and TensorMap output from each iteration
-    skip_redundant, output_selection = _clebsch_gordan._parse_bool_iteration_filters(
-        n_iterations,
-        skip_redundant=skip_redundant,
-        output_selection=output_selection,
-    )
-
-    # Pre-compute the keys needed to perform each CG iteration
-    key_metadata = _clebsch_gordan._precompute_keys(
-        density.keys,
-        density.keys,
-        n_iterations=n_iterations,
-        selected_keys=selected_keys,
-        skip_redundant=skip_redundant,
-    )
-    # Compute CG coefficient cache
-    if compute_metadata_only:
-        cg_cache = None
-    else:
-        angular_max = max(
-            _dispatch.concatenate(
-                [density.keys.column("spherical_harmonics_l")]
-                + [mdata[2].column("spherical_harmonics_l") for mdata in key_metadata]
+        # Parse the bool flags that control skipping of redundant CG combinations
+        # and TensorMap output from each iteration
+        self._skip_redundant, self._output_selection = (
+            _clebsch_gordan._parse_bool_iteration_filters(
+                n_iterations,
+                skip_redundant=skip_redundant,
+                output_selection=output_selection,
             )
         )
-        # TODO: keys have been precomputed, so perhaps we don't need to
-        # compute all CG coefficients up to angular_max here.
-        # TODO: use sparse cache by default until we understand under which
-        # circumstances (and if) dense is faster.
-        cg_cache = _cg_cache.ClebschGordanReal(angular_max, sparse=sparse)
-
-    # Perform iterative CG tensor products
-    density_correlations = []
-    for iteration in range(n_iterations):
-        # Define the correlation order of the current iteration
-        correlation_order_it = iteration + 2
-
-        # Combine block pairs
-        blocks_out = []
-        for key_1, key_2, key_out in zip(*key_metadata[iteration]):
-            block_out = _clebsch_gordan._combine_blocks_same_samples(
-                density_correlation[key_1],
-                density[key_2],
-                key_out["spherical_harmonics_l"],
-                cg_cache,
-                compute_metadata_only=compute_metadata_only,
+
+    def forward(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]:
+        """
+        Calls the :py:meth:`DensityCorrelations.compute` function.
+
+        This is intended for :py:class:`torch.nn.Module` compatibility, and should be
+        ignored in pure Python mode.
+        """
+        return self.compute(density)
+
+    def compute(self, density: TensorMap) -> Union[TensorMap, List[TensorMap]]:
+        """
+        Computes the density correlations by taking iterative Clebsch-Gordan (CG) tensor
+        products of the input `density` descriptor with itself.
+
+        :param density: A density descriptor of body order 2 (correlation order 1), in
+            :py:class:`TensorMap` format. This may be, for example, a rascaline
+            :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`.
+            Alternatively, this could be multi-center descriptor, such as a pair
+            density.
+        """
+        return self._correlate_density(
+            density,
+            compute_metadata=False,
+        )
+
+    @torch_jit_export
+    def compute_metadata(
+        self,
+        density: TensorMap,
+    ) -> Union[TensorMap, List[TensorMap]]:
+        """
+        Returns the metadata-only :py:class:`TensorMap` that would be output by the
+        function :py:meth:`compute` for the same calculator under the same settings,
+        without performing the actual Clebsch-Gordan tensor products.
+
+        :param density: A density descriptor of body order 2 (correlation order 1), in
+            :py:class:`TensorMap` format. This may be, for example, a rascaline
+            :py:class:`SphericalExpansion` or :py:class:`LodeSphericalExpansion`.
+            Alternatively, this could be multi-center descriptor, such as a pair
+            density.
+        """
+        return self._correlate_density(
+            density,
+            compute_metadata=True,
+        )
+
+    # ====================================================================
+    # ===== Private functions that do the work on the TensorMap level
+    # ====================================================================
+    def _correlate_density(
+        self, density: TensorMap, compute_metadata: bool
+    ) -> Union[TensorMap, List[TensorMap]]:
+
+        # Check metadata
+        if not (
+            density.keys.names == ["spherical_harmonics_l", "species_center"]
+            or density.keys.names
+            == ["spherical_harmonics_l", "species_center", "species_neighbor"]
+        ):
+            raise ValueError(
+                "input `density` must have key names"
+                ' ["spherical_harmonics_l", "species_center"] or'
+                ' ["spherical_harmonics_l", "species_center", "species_neighbor"]'
             )
-            blocks_out.append(block_out)
-        keys_out = key_metadata[iteration][2]
-        density_correlation = TensorMap(keys=keys_out, blocks=blocks_out)
-
-        # If this tensor is to be included in the output, move the [l1, l2, ...]
-        # keys to properties and store
-        if output_selection[iteration]:
-            density_correlations.append(
-                density_correlation.keys_to_properties(
-                    [f"l{i}" for i in range(1, correlation_order_it + 1)]
-                    + [f"k{i}" for i in range(2, correlation_order_it)]
-                )
+        if not density.component_names == ["spherical_harmonics_m"]:
+            raise ValueError(
+                "input `density` must have a single component"
+                " axis with name `spherical_harmonics_m`"
             )
+        n_iterations = self._correlation_order - 1  # num iterations
+        density = _clebsch_gordan._standardize_keys(density)  # standardize metadata
+        density_correlation = density  # create a copy to combine with itself
 
-    # Drop redundant key names. TODO: these should be part of the global
-    # matadata associated with the TensorMap. Awaiting this functionality in
-    # metatensor.
-    for i, tensor in enumerate(density_correlations):
-        keys = tensor.keys
-        if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1:
-            keys = keys.remove(name="order_nu")
-        if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1:
-            keys = keys.remove(name="inversion_sigma")
-        density_correlations[i] = TensorMap(
-            keys=keys, blocks=[b.copy() for b in tensor.blocks()]
+        # TODO: implement combinations of gradients too
+        # we have to create a bool array with dispatch to be TorchScript compatible
+        contains_gradients = all(
+            [len(list(block.gradients())) > 0 for _, block in density.items()]
         )
+        if contains_gradients:
+            raise NotImplementedError(
+                "Clebsch Gordan combinations with gradients not yet implemented."
+                " Use metatensor.remove_gradients to remove gradients from the input."
+            )
+        # Pre-compute the keys needed to perform each CG iteration
+        key_metadata = _clebsch_gordan._precompute_keys(
+            density.keys,
+            density.keys,
+            n_iterations=n_iterations,
+            selected_keys=self._selected_keys,
+            skip_redundant=self._skip_redundant,
+        )
+        max_angular = max(
+            _dispatch.max(density.keys.column("spherical_harmonics_l")),
+            max(
+                [
+                    int(_dispatch.max(mdata[2].column("spherical_harmonics_l")))
+                    for mdata in key_metadata
+                ]
+            ),
+        )
+        if self._max_angular < max_angular:
+            raise ValueError(
+                f"The density you provide requires max_angular={max_angular} "
+                f"but on initialization max_angular={self._max_angular} was given"
+            )
+
+        # Perform iterative CG tensor products
+        density_correlations: List[TensorMap] = []
+        if compute_metadata:
+            cg_backend = "metadata"
+        else:
+            cg_backend = self._cg_backend
+
+        for iteration in range(n_iterations):
+            # Define the correlation order of the current iteration
+            correlation_order_it = iteration + 2
+
+            # Combine block pairs
+            blocks_out: List[TensorBlock] = []
+            key_metadata_i = key_metadata[iteration]
+            for j in range(len(key_metadata_i[0])):
+                key_1: LabelsEntry = key_metadata_i[0][j]
+                key_2: LabelsEntry = key_metadata_i[1][j]
+                lambda_out: int = int(
+                    key_metadata_i[2].column("spherical_harmonics_l")[j]
+                )
+                block_out = _clebsch_gordan._combine_blocks_same_samples(
+                    density_correlation.block(key_1),
+                    density.block(key_2),
+                    lambda_out,
+                    self._cg_coefficients,
+                    cg_backend,
+                )
+                blocks_out.append(block_out)
+            keys_out = key_metadata[iteration][2]
+            density_correlation = TensorMap(keys=keys_out, blocks=blocks_out)
+
+            # If this tensor is to be included in the output, move the [l1, l2, ...]
+            # keys to properties and store
+            if self._output_selection[iteration]:
+                density_correlations.append(
+                    density_correlation.keys_to_properties(
+                        [f"l{i}" for i in range(1, correlation_order_it + 1)]
+                        + [f"k{i}" for i in range(2, correlation_order_it)]
+                    )
+                )
+
+        # Drop redundant key names. TODO: these should be part of the global
+        # metadata associated with the TensorMap. Awaiting this functionality in
+        # metatensor.
+        for i, tensor in enumerate(density_correlations):
+            keys = tensor.keys
+            if len(_dispatch.unique(tensor.keys.column("order_nu"))) == 1:
+                keys = keys.remove(name="order_nu")
+            if len(_dispatch.unique(tensor.keys.column("inversion_sigma"))) == 1:
+                keys = keys.remove(name="inversion_sigma")
+            density_correlations[i] = TensorMap(
+                keys=keys, blocks=[b.copy() for b in tensor.blocks()]
+            )
 
-    # Return a single TensorMap in the simple case
-    if len(density_correlations) == 1:
-        return density_correlations[0]
+        # Return a single TensorMap in the simple case
+        if len(density_correlations) == 1:
+            return density_correlations[0]
 
-    # Otherwise return a list of TensorMaps
-    return density_correlations
+        # Otherwise return a list of TensorMaps
+        return density_correlations
diff --git a/python/rascaline/rascaline/utils/power_spectrum/__init__.py b/python/rascaline/rascaline/utils/power_spectrum/__init__.py
index a6d2d6f3f..bf9f8f5d7 100644
--- a/python/rascaline/rascaline/utils/power_spectrum/__init__.py
+++ b/python/rascaline/rascaline/utils/power_spectrum/__init__.py
@@ -1,4 +1 @@
-from .calculator import PowerSpectrum
-
-
-__all__ = ["PowerSpectrum"]
+from .calculator import PowerSpectrum  # noqa: F401
diff --git a/python/rascaline/rascaline/utils/power_spectrum/_classes.py b/python/rascaline/rascaline/utils/power_spectrum/_classes.py
deleted file mode 100644
index 01ea036d2..000000000
--- a/python/rascaline/rascaline/utils/power_spectrum/_classes.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from metatensor import Labels, TensorBlock, TensorMap
-
-from ...calculator_base import CalculatorBase
-from ...systems import IntoSystem
-
-
-__all__ = [
-    "CalculatorBase",
-    "IntoSystem",
-    "Labels",
-    "TensorBlock",
-    "TensorMap",
-]
diff --git a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py b/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py
deleted file mode 100644
index 397f354e9..000000000
--- a/python/rascaline/rascaline/utils/power_spectrum/_dispatch.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""Helper functions to dispatch methods between numpy and torch.
-
-The functions are similar to those in metatensor-operations. Missing functions may
-already exist there. Functions are ordered alphabetically.
-"""
-
-from typing import List, Optional
-
-import numpy as np
-
-
-try:
-    import torch
-    from torch import Tensor as TorchTensor
-except ImportError:
-
-    class TorchTensor:
-        pass
-
-
-UNKNOWN_ARRAY_TYPE = (
-    "unknown array type, only numpy arrays and torch tensors are supported"
-)
-
-
-def _check_all_torch_tensor(arrays: List[TorchTensor]):
-    for array in arrays:
-        if not isinstance(array, TorchTensor):
-            raise TypeError(
-                f"expected argument to be a torch.Tensor, but got {type(array)}"
-            )
-
-
-def _check_all_np_ndarray(arrays):
-    for array in arrays:
-        if not isinstance(array, np.ndarray):
-            raise TypeError(
-                f"expected argument to be a np.ndarray, but got {type(array)}"
-            )
-
-
-def concatenate(arrays: List[TorchTensor], axis: int):
-    """
-    Concatenate a group of arrays along a given axis.
-
-    This function has the same behavior as ``numpy.concatenate(arrays, axis)``
-    and ``torch.concatenate(arrays, axis)``.
-
-    Passing `axis` as ``0`` is equivalent to :py:func:`numpy.vstack`, ``1`` to
-    :py:func:`numpy.hstack`, and ``2`` to :py:func:`numpy.dstack`, though any
-    axis index > 0 is valid.
-    """
-    if isinstance(arrays[0], TorchTensor):
-        _check_all_torch_tensor(arrays)
-        return torch.concatenate(arrays, axis)
-    elif isinstance(arrays[0], np.ndarray):
-        _check_all_np_ndarray(arrays)
-        return np.concatenate(arrays, axis)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def empty_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False):
-    """
-    Create an uninitialized array, with the given ``shape``, and similar dtype,
-    device and other options as ``array``.
-
-    If ``shape`` is :py:obj:`None`, the array shape is used instead.
-    ``requires_grad`` is only used for torch tensors, and set the corresponding
-    value on the returned array.
-
-    This is the equivalent to ``np.empty_like(array, shape=shape)``.
-    """
-    if isinstance(array, TorchTensor):
-        if shape is None:
-            shape = array.size()
-        return torch.empty(
-            shape,
-            dtype=array.dtype,
-            layout=array.layout,
-            device=array.device,
-        ).requires_grad_(requires_grad)
-    elif isinstance(array, np.ndarray):
-        return np.empty_like(array, shape=shape, subok=False)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def list_to_array(array, data: List[List[int]]):
-    """Create an object from data with the same type as ``array``."""
-    if isinstance(array, TorchTensor):
-        return torch.tensor(data)
-    elif isinstance(array, np.ndarray):
-        return np.array(data)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def matmul(a, b):
-    """Matrix product of two arrays."""
-    if isinstance(a, TorchTensor):
-        _check_all_torch_tensor([b])
-        return torch.matmul(a, b)
-    elif isinstance(a, np.ndarray):
-        _check_all_np_ndarray([b])
-        return np.matmul(a, b)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def to_index_array(array):
-    """Returns an array that is suitable for indexing a dimension of
-    a different array.
-    After a few checks (int, 1D), this operation will convert the dtype to
-    torch.long (which is, in some torch versions, the only acceptable type
-    of index tensor). Numpy arrays are left unchanged.
-    """
-    if len(array.shape) != 1:
-        raise ValueError("Index arrays must be 1D")
-
-    if isinstance(array, TorchTensor):
-        if torch.is_floating_point(array):
-            raise ValueError("Index arrays must be integers")
-        return array.to(torch.long)
-    elif isinstance(array, np.ndarray):
-        if not np.issubdtype(array.dtype, np.integer):
-            raise ValueError("Index arrays must be integers")
-        return array
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def unique(array, axis: Optional[int] = None):
-    """Find the unique elements of an array."""
-    if isinstance(array, TorchTensor):
-        return torch.unique(array, dim=axis)
-    elif isinstance(array, np.ndarray):
-        return np.unique(array, axis=axis)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
-
-
-def zeros_like(array, shape: Optional[List[int]] = None, requires_grad: bool = False):
-    """
-    Create an array filled with zeros, with the given ``shape``, and similar
-    dtype, device and other options as ``array``.
-
-    If ``shape`` is :py:obj:`None`, the array shape is used instead.
-    ``requires_grad`` is only used for torch tensors, and set the corresponding
-    value on the returned array.
-
-    This is the equivalent to ``np.zeros_like(array, shape=shape)``.
-    """
-    if isinstance(array, TorchTensor):
-        if shape is None:
-            shape = array.size()
-
-        return torch.zeros(
-            shape,
-            dtype=array.dtype,
-            layout=array.layout,
-            device=array.device,
-        ).requires_grad_(requires_grad)
-    elif isinstance(array, np.ndarray):
-        return np.zeros_like(array, shape=shape, subok=False)
-    else:
-        raise TypeError(UNKNOWN_ARRAY_TYPE)
diff --git a/python/rascaline/rascaline/utils/power_spectrum/calculator.py b/python/rascaline/rascaline/utils/power_spectrum/calculator.py
index d96a65e55..f64bbb7c1 100644
--- a/python/rascaline/rascaline/utils/power_spectrum/calculator.py
+++ b/python/rascaline/rascaline/utils/power_spectrum/calculator.py
@@ -2,11 +2,18 @@
 from math import sqrt
 from typing import List, Optional, Union
 
-from . import _dispatch
-from ._classes import CalculatorBase, IntoSystem, Labels, TensorBlock, TensorMap
-
-
-class PowerSpectrum:
+from .. import _dispatch
+from .._backend import (
+    CalculatorBase,
+    IntoSystem,
+    Labels,
+    TensorBlock,
+    TensorMap,
+    TorchModule,
+)
+
+
+class PowerSpectrum(TorchModule):
     r"""General power spectrum of one or of two calculators.
 
     If ``calculator_2`` is provided, the invariants :math:`p_{nl}` are generated by
@@ -124,6 +131,7 @@ def __init__(
         calculator_2: Optional[CalculatorBase] = None,
         species: Optional[List[int]] = None,
     ):
+        super().__init__()
         self.calculator_1 = calculator_1
         self.calculator_2 = calculator_2
         self.species = species
@@ -310,6 +318,25 @@ def compute(
 
         return TensorMap(new_keys, new_blocks).keys_to_properties("l")
 
+    def forward(
+        self,
+        systems: Union[IntoSystem, List[IntoSystem]],
+        gradients: Optional[List[str]] = None,
+        use_native_system: bool = True,
+    ) -> TensorMap:
+        """
+        Calls the :py:meth:`PowerSpectrum.compute` function.
+
+        This is intended for :py:class:`torch.nn.Module` compatibility, and should be
+        ignored in pure Python mode.
+        """
+
+        return self.compute(
+            systems=systems,
+            gradients=gradients,
+            use_native_system=use_native_system,
+        )
+
 
 def _positions_gradients(
     new_block: TensorBlock, block_1: TensorBlock, block_2: TensorBlock, factor: float
diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py
index 7822d159e..59da6ba02 100644
--- a/python/rascaline/tests/utils/correlate_density.py
+++ b/python/rascaline/tests/utils/correlate_density.py
@@ -8,14 +8,10 @@
 from metatensor import Labels, TensorBlock, TensorMap
 
 import rascaline
-from rascaline.utils import PowerSpectrum
-from rascaline.utils.clebsch_gordan._cg_cache import ClebschGordanReal
+from rascaline.utils import PowerSpectrum, _dispatch
+from rascaline.utils.clebsch_gordan._cg_cache import calculate_cg_coefficients
 from rascaline.utils.clebsch_gordan._clebsch_gordan import _standardize_keys
-from rascaline.utils.clebsch_gordan.correlate_density import (
-    _correlate_density,
-    correlate_density,
-    correlate_density_metadata,
-)
+from rascaline.utils.clebsch_gordan.correlate_density import DensityCorrelations
 
 
 # Try to import some modules
@@ -35,11 +31,29 @@
     HAS_SYMPY = True
 except ImportError:
     HAS_SYMPY = False
+try:
+    from mops import sparse_accumulation_of_products as sap  # noqa F401
+
+    HAS_MOPS = True
+except ImportError:
+    HAS_MOPS = False
 
 if HAS_SYMPY:
     from .rotations import WignerDReal, transform_frame_o3, transform_frame_so3
 
 
+try:
+    import torch
+
+    HAS_TORCH = True
+except ImportError:
+    HAS_TORCH = False
+
+if HAS_TORCH:
+    ARRAYS_BACKEND = ["numpy", "torch"]
+else:
+    ARRAYS_BACKEND = ["numpy"]
+
 DATA_ROOT = os.path.join(os.path.dirname(__file__), "data")
 
 SPHEX_HYPERS = {
@@ -63,32 +77,48 @@
 }
 
 
-# ============ Pytest fixtures ============
-
-
-@pytest.fixture()
-def cg_cache_sparse():
-    return ClebschGordanReal(lambda_max=5, sparse=True)
-
-
-@pytest.fixture()
-def cg_cache_dense():
-    return ClebschGordanReal(lambda_max=5, sparse=False)
-
-
 # ============ Helper functions ============
 
 
 def h2_isolated():
-    return ase.io.read(os.path.join(DATA_ROOT, "h2_isolated.xyz"), ":")
+    return [
+        ase.Atoms(
+            symbols=["H", "H"],
+            positions=[
+                [1.97361700, 1.73067300, 2.47063400],
+                [1.97361700, 3.26932700, 2.47063400],
+            ],
+        )
+    ]
 
 
 def h2o_isolated():
-    return ase.io.read(os.path.join(DATA_ROOT, "h2o_isolated.xyz"), ":")
+    return [
+        ase.Atoms(
+            symbols=["O", "H", "H"],
+            positions=[
+                [2.56633400, 2.50000000, 2.50370100],
+                [1.97361700, 1.73067300, 2.47063400],
+                [1.97361700, 3.26932700, 2.47063400],
+            ],
+        )
+    ]
 
 
 def h2o_periodic():
-    return ase.io.read(os.path.join(DATA_ROOT, "h2o_periodic.xyz"), ":")
+
+    return [
+        ase.Atoms(
+            symbols=["O", "H", "H"],
+            positions=[
+                [2.56633400, 2.50000000, 2.50370100],
+                [1.97361700, 1.73067300, 2.47063400],
+                [1.97361700, 3.26932700, 2.47063400],
+            ],
+            cell=[5, 5, 5],
+            pbc=[True, True, True],
+        )
+    ]
 
 
 def wigner_d_matrices(lmax: int):
@@ -169,19 +199,14 @@ def test_so3_equivariance():
 
     nu_1 = spherical_expansion(frames)
     nu_1_so3 = spherical_expansion(frames_so3)
-
-    nu_3 = correlate_density(
-        density=nu_1,
-        correlation_order=nu_target,
-        angular_cutoff=angular_cutoff,
-        selected_keys=selected_keys,
-    )
-    nu_3_so3 = correlate_density(
-        density=nu_1_so3,
+    corr_calculator = DensityCorrelations(
+        max_angular=3,
         correlation_order=nu_target,
         angular_cutoff=angular_cutoff,
         selected_keys=selected_keys,
     )
+    nu_3 = corr_calculator.compute(nu_1)
+    nu_3_so3 = corr_calculator.compute(nu_1_so3)
 
     nu_3_transf = wig.transform_tensormap_so3(nu_3)
     assert metatensor.allclose(nu_3_transf, nu_3_so3)
@@ -203,18 +228,14 @@ def test_o3_equivariance():
     nu_1 = spherical_expansion(frames)
     nu_1_o3 = spherical_expansion(frames_o3)
 
-    nu_3 = correlate_density(
-        density=nu_1,
-        correlation_order=nu_target,
-        angular_cutoff=angular_cutoff,
-        selected_keys=selected_keys,
-    )
-    nu_3_o3 = correlate_density(
-        density=nu_1_o3,
+    corr_calculator = DensityCorrelations(
+        max_angular=angular_cutoff,
         correlation_order=nu_target,
         angular_cutoff=angular_cutoff,
         selected_keys=selected_keys,
     )
+    nu_3 = corr_calculator.compute(nu_1)
+    nu_3_o3 = corr_calculator.compute(nu_1_o3)
 
     nu_3_transf = wig.transform_tensormap_o3(nu_3)
     assert metatensor.allclose(nu_3_transf, nu_3_o3)
@@ -238,13 +259,14 @@ def test_lambda_soap_vs_powerspectrum():
 
     # Build a lambda-SOAP
     density = spherical_expansion(frames)
-    lsoap = correlate_density(
-        density=density,
+    corr_calculator = DensityCorrelations(
+        max_angular=SPHEX_HYPERS["max_angular"],
         correlation_order=2,
         selected_keys=Labels(
             names=["spherical_harmonics_l"], values=np.array([0]).reshape(-1, 1)
         ),
     )
+    lsoap = corr_calculator.compute(density)
     keys = lsoap.keys.remove(name="spherical_harmonics_l")
     lsoap = TensorMap(keys=keys, blocks=[b.copy() for b in lsoap.blocks()])
 
@@ -288,21 +310,23 @@ def test_correlate_density_norm(correlation_order):
     nu1 = spherical_expansion_small(frames)
 
     # Build higher body order tensor without sorting the l lists
-    nux = correlate_density(
-        nu1,
+    corr_calculator = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
         correlation_order=correlation_order,
         angular_cutoff=None,
         selected_keys=None,
         skip_redundant=False,
     )
-    # Build higher body order tensor *with* sorting the l lists
-    nux_sorted_l = correlate_density(
-        nu1,
+    corr_calculator_skip_redundant = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
         correlation_order=correlation_order,
         angular_cutoff=None,
         selected_keys=None,
         skip_redundant=True,
     )
+    nux = corr_calculator.compute(nu1)
+    # Build higher body order tensor *with* sorting the l lists
+    nux_sorted_l = corr_calculator_skip_redundant.compute(nu1)
 
     # Standardize the features by passing through the CG combination code but with
     # no iterations (i.e. body order 1 -> 1)
@@ -349,7 +373,8 @@ def test_correlate_density_norm(correlation_order):
 
 
 @pytest.mark.parametrize("l1, l2", [(1, 2), (2, 3), (0, 5)])
-def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2):
+@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND)
+def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend):
     """
     Test orthogonality relationships of cached dense CG coefficients.
 
@@ -357,35 +382,73 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2):
     https://en.wikipedia.org/wiki/Clebsch%E2%80%93Gordan_coefficients#Orthogonality_relations
     for details.
     """
+    cg_coeffs = calculate_cg_coefficients(
+        lambda_max=5, sparse=False, use_torch=arrays_backend == "torch"
+    )
+
     lam_min = abs(l1 - l2)
     lam_max = l1 + l2
 
+    if arrays_backend == "torch":
+        int64_like = torch.empty(0, dtype=torch.int64)
+        float64_like = torch.empty(0, dtype=torch.float64)
+        bool_like = torch.empty(0, dtype=torch.bool)
+    elif arrays_backend == "numpy":
+        int64_like = np.empty(0, dtype=np.int64)
+        float64_like = np.empty(0, dtype=np.float64)
+        bool_like = np.empty(0, dtype=np.bool_)
+    else:
+        raise ValueError(f"Not supported arrays backend {arrays_backend}.")
     # We test lam dimension
     # \sum_{-m1 \leq l1 \leq m1, -m2 \leq l2 \leq m2}
     #           <λμ|l1m1,l2m2> <l1m1',l2m2'|λμ'> = δ_μμ'
     for lam in range(lam_min, lam_max):
-        cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1)
+        cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape(
+            -1, 2 * lam + 1
+        )
         dot_product = cg_mat.T @ cg_mat
-        diag_mask = np.zeros(dot_product.shape, dtype=np.bool_)
-        diag_mask[np.diag_indices(len(dot_product))] = True
-        assert np.allclose(
-            dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask]
+        diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape)
+        diag_indices = (
+            _dispatch.int_range_like(0, len(dot_product), int64_like),
+            _dispatch.int_range_like(0, len(dot_product), int64_like),
+        )
+        diag_mask[diag_indices] = True
+        assert _dispatch.allclose(
+            dot_product[~diag_mask],
+            _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask],
+            rtol=1e-05,
+            atol=1e-08,
+        )
+        assert _dispatch.allclose(
+            dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08
         )
-        assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0])
 
     # We test l1 l2 dimension
     # \sum_{|l1-l2| \leq λ \leq l1+l2} \sum_{-μ \leq λ \leq μ}
     #            <l1m1,l2m2|λμ> <λμ|l1m1,l2m2> = δ_m1m1' δ_m2m2'
     l1l2_dim = (2 * l1 + 1) * (2 * l2 + 1)
-    dot_product = np.zeros((l1l2_dim, l1l2_dim))
+    dot_product = _dispatch.zeros_like(float64_like, (l1l2_dim, l1l2_dim))
     for lam in range(lam_min, lam_max + 1):
-        cg_mat = cg_cache_dense.coeffs[(l1, l2, lam)].reshape(-1, 2 * lam + 1)
+        cg_mat = cg_coeffs.block({"l1": l1, "l2": l2, "lambda": lam}).values.reshape(
+            -1, 2 * lam + 1
+        )
         dot_product += cg_mat @ cg_mat.T
-    diag_mask = np.zeros(dot_product.shape, dtype=np.bool_)
-    diag_mask[np.diag_indices(len(dot_product))] = True
+    diag_mask = _dispatch.zeros_like(bool_like, dot_product.shape)
+    diag_indices = (
+        _dispatch.int_range_like(0, len(dot_product), int64_like),
+        _dispatch.int_range_like(0, len(dot_product), int64_like),
+    )
+    diag_mask[diag_indices] = True
 
-    assert np.allclose(dot_product[~diag_mask], np.zeros(dot_product.shape)[~diag_mask])
-    assert np.allclose(dot_product[diag_mask], dot_product[diag_mask][0])
+    assert _dispatch.allclose(
+        dot_product[~diag_mask],
+        _dispatch.zeros_like(float64_like, dot_product.shape)[~diag_mask],
+        rtol=1e-05,
+        atol=1e-08,
+    )
+    assert _dispatch.allclose(
+        dot_product[diag_mask], dot_product[diag_mask][0:1], rtol=1e-05, atol=1e-08
+    )
 
 
 @pytest.mark.skipif(
@@ -393,28 +456,57 @@ def test_clebsch_gordan_orthogonality(cg_cache_dense, l1, l2):
 )
 def test_correlate_density_dense_sparse_agree():
     """
-    Tests for agreement between nu=3 tensors built using both sparse and dense
+    Tests for agreement between nu=2 tensors built using both sparse and dense
     CG coefficient caches.
     """
     frames = h2o_periodic()
     density = spherical_expansion_small(frames)
 
+    correlation_order = 2
+    corr_calculator_sparse = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
+        cg_backend="python-sparse",
+    )
+    corr_calculator_dense = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
+        cg_backend="python-dense",
+    )
     # NOTE: testing the private function here so we can control the use of
     # sparse v dense CG cache
-    n_body_sparse = _correlate_density(
-        density,
-        correlation_order=2,
-        compute_metadata_only=False,
-        sparse=True,
+    n_body_sparse = corr_calculator_sparse.compute(density)
+    n_body_dense = corr_calculator_dense.compute(density)
+
+    assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8)
+
+
+@pytest.mark.skipif(not HAS_MOPS, reason="mops is not installed")
+def test_correlate_density_mops_python_sparse_agree():
+    """
+    Tests for agreement between nu=2 tensors built using both "python-sparse"
+    and "mops" CG backend.
+    """
+    frames = h2o_periodic()
+    density = spherical_expansion_small(frames)
+
+    correlation_order = 2
+    corr_calculator_python = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
+        cg_backend="python-sparse",
     )
-    n_body_dense = _correlate_density(
-        density,
-        correlation_order=2,
-        compute_metadata_only=False,
-        sparse=False,
+    corr_calculator_mops = DensityCorrelations(
+        max_angular=SPHEX_HYPERS_SMALL["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
+        cg_backend="mops",
     )
+    # NOTE: testing the private function here so we can control the use of
+    # sparse v dense CG cache
+    n_body_python = corr_calculator_python.compute(density)
+    n_body_mops = corr_calculator_mops.compute(density)
 
-    assert metatensor.allclose(n_body_sparse, n_body_dense, atol=1e-8, rtol=1e-8)
+    assert metatensor.allclose(n_body_python, n_body_mops, atol=1e-8, rtol=1e-8)
 
 
 # ============ Test metadata  ============
@@ -429,26 +521,24 @@ def test_correlate_density_metadata_agree():
     :py:func:`correlate_density_metadata` agree.
     """
     frames = h2o_isolated()
-    correlation_order = 3
     skip_redundant = True
-    for nu1 in [spherical_expansion_small(frames), spherical_expansion(frames)]:
-        # Build higher body order tensor with CG computation
-        nux = correlate_density(
-            nu1,
-            correlation_order=correlation_order,
+
+    for max_angular, nu1 in [
+        (2, spherical_expansion_small(frames)),
+        (3, spherical_expansion(frames)),
+    ]:
+        corr_calculator = DensityCorrelations(
+            max_angular=max_angular,
+            correlation_order=3,
             angular_cutoff=3,
             selected_keys=None,
             skip_redundant=skip_redundant,
         )
+        # Build higher body order tensor with CG computation
+        nux = corr_calculator.compute(nu1)
         # Build higher body order tensor without CG computation - i.e. metadata
         # only
-        nux_metadata_only = correlate_density_metadata(
-            nu1,
-            correlation_order=correlation_order,
-            angular_cutoff=3,
-            selected_keys=None,
-            skip_redundant=skip_redundant,
-        )
+        nux_metadata_only = corr_calculator.compute_metadata(nu1)
         assert metatensor.equal_metadata(nux, nux_metadata_only)
 
 
@@ -460,9 +550,11 @@ def test_correlate_density_metadata_agree():
     ],
 )
 @pytest.mark.parametrize("skip_redundant", [True, False])
+@pytest.mark.parametrize("arrays_backend", ARRAYS_BACKEND + [None])
 def test_correlate_density_angular_selection(
     selected_keys: Labels,
     skip_redundant: bool,
+    arrays_backend: str,
 ):
     """
     Tests that the correct angular channels are output based on the specified
@@ -471,13 +563,18 @@ def test_correlate_density_angular_selection(
     frames = h2o_isolated()
     nu_1 = spherical_expansion(frames)
 
-    nu_2 = correlate_density(
-        density=nu_1,
-        correlation_order=2,
+    correlation_order = 2
+    corr_calculator = DensityCorrelations(
+        max_angular=SPHEX_HYPERS["max_angular"] * correlation_order,
+        correlation_order=correlation_order,
         angular_cutoff=None,
         selected_keys=selected_keys,
         skip_redundant=skip_redundant,
+        arrays_backend=arrays_backend,
     )
+    if arrays_backend is not None:
+        nu_1 = nu_1.to(arrays=arrays_backend)
+    nu_2 = corr_calculator.compute(nu_1)
 
     if selected_keys is None:
         assert np.all(
diff --git a/python/rascaline/tests/utils/data/h2_isolated.xyz b/python/rascaline/tests/utils/data/h2_isolated.xyz
deleted file mode 100644
index ec5f59680..000000000
--- a/python/rascaline/tests/utils/data/h2_isolated.xyz
+++ /dev/null
@@ -1,4 +0,0 @@
-2
-pbc="F F F"
-H        1.97361700       1.73067300       2.47063400
-H        1.97361700       3.26932700       2.47063400
diff --git a/python/rascaline/tests/utils/data/h2o_isolated.xyz b/python/rascaline/tests/utils/data/h2o_isolated.xyz
deleted file mode 100644
index fc876d2ba..000000000
--- a/python/rascaline/tests/utils/data/h2o_isolated.xyz
+++ /dev/null
@@ -1,5 +0,0 @@
-3
-pbc="F F F"
-O        2.56633400       2.50000000       2.50370100
-H        1.97361700       1.73067300       2.47063400
-H        1.97361700       3.26932700       2.47063400
diff --git a/python/rascaline/tests/utils/data/h2o_periodic.xyz b/python/rascaline/tests/utils/data/h2o_periodic.xyz
deleted file mode 100644
index 3374566e6..000000000
--- a/python/rascaline/tests/utils/data/h2o_periodic.xyz
+++ /dev/null
@@ -1,5 +0,0 @@
-3
-Lattice="5.0 0.0 0.0 0.0 5.0 0.0 0.0 0.0 5.0" pbc="T T T"
-O        2.56633400       2.50000000       2.50370100
-H        1.97361700       1.73067300       2.47063400
-H        1.97361700       3.26932700       2.47063400
diff --git a/tox.ini b/tox.ini
index 6d59f3066..4265a7232 100644
--- a/tox.ini
+++ b/tox.ini
@@ -62,7 +62,7 @@ commands =
 # note: platform_system can be "Linux","Darwin", or "Windows".
 description =
     Run Python unit tests with all dependencies installed (ase, pyscf,
-    and chemfiles are optional dependencies)
+    chemfiles and torch are optional dependencies)
 deps =
     {[testenv]metatensor-core-requirement}
     ase
@@ -72,6 +72,7 @@ deps =
     pytest-cov
     scipy
     sympy
+    torch
     pyscf;platform_system!="Windows"
     wigners
     # TODO: add mops once it becomes stable enough (and potentially supports windows)
@@ -79,7 +80,6 @@ deps =
 commands =
     pytest {[testenv]test_options} {posargs}
 
-
 [testenv:min-deps]
 description = Run Python unit tests with the minimal dependencies installed
 deps =
@@ -97,6 +97,7 @@ deps =
     {[testenv]packaging_deps}
     {[testenv]metatensor-torch-requirement}
 
+    metatensor-operations
     pytest
     pytest-cov
     numpy