Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[maint] Merge main to feature branch #2544

Merged
merged 5 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/config/gitlab_commits.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nvidia-mgpu-repo: cuda-quantum/cuquantum-mgpu.git
nvidia-mgpu-commit: 806e7fe5c459f52296ae0d3bd8bc57c3ea806152
nvidia-mgpu-commit: fdea89e034c7ac6b6b3d0e239d8924bcd2c08f96
2 changes: 1 addition & 1 deletion docker/build/devdeps.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -170,5 +170,5 @@ RUN apt-get update && apt-get install -y --no-install-recommends python3 python3
&& python3 -m pip install --no-cache-dir \
ipython==8.15.0 pandoc==2.3 sphinx==5.3.0 sphinx_rtd_theme==1.2.0 sphinx-reredirects==0.1.2 \
sphinx-copybutton==0.5.2 sphinx_inline_tabs==2023.4.21 enum-tools[sphinx] breathe==4.34.0 \
nbsphinx==0.9.2 sphinx_gallery==0.13.0 myst-parser==1.0.0 ipykernel==6.29.4 notebook==7.1.3 \
nbsphinx==0.9.2 sphinx_gallery==0.13.0 myst-parser==1.0.0 ipykernel==6.29.4 notebook==7.3.2 \
ipywidgets==8.1.5
2 changes: 1 addition & 1 deletion docker/release/cudaq.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
&& ln -s /bin/python3 /bin/python
RUN apt-get update && apt-get install -y --no-install-recommends gcc g++ python3-dev \
# Ref: https://github.com/qutip/qutip/issues/2412
&& python3 -m pip install --no-cache-dir notebook==7.1.3 "qutip<5" matplotlib \
&& python3 -m pip install --no-cache-dir notebook==7.3.2 "qutip<5" matplotlib \
&& apt-get remove -y gcc g++ python3-dev \
&& apt-get autoremove -y && apt-get clean && rm -rf /var/lib/apt/lists/*

Expand Down
2 changes: 1 addition & 1 deletion docker/release/cudaq.ext.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ RUN cuda_version_suffix=$(echo ${CUDA_VERSION} | tr . -) && \
apt-get install -y --no-install-recommends curl jq
RUN if [ -x "$(command -v pip)" ]; then \
apt-get install -y --no-install-recommends gcc libpython3-dev \
&& pip install --no-cache-dir jupyterlab; \
&& pip install --no-cache-dir jupyterlab==4.3.4; \
if [ -n "$MPI_ROOT" ]; then \
pip install --no-cache-dir mpi4py~=3.1; \
fi; \
Expand Down
2 changes: 2 additions & 0 deletions docs/sphinx/api/languages/python_api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,8 @@ Data Types
.. autoclass:: cudaq.operator.cudm_state.CuDensityMatState
:members:

.. autoclass:: cudaq.operator.helpers.InitialState

.. autofunction:: cudaq.operator.cudm_state.to_cupy_array

.. autoclass:: cudaq::SampleResult
Expand Down
71 changes: 71 additions & 0 deletions docs/sphinx/examples/python/dynamics/mgmn/initial_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import cudaq
from cudaq import operators, spin, Schedule, RungeKuttaIntegrator

import numpy as np
import matplotlib.pyplot as plt
import os

# On a system with multiple GPUs, `mpiexec` can be used as follows:
# `mpiexec -np <N> python3 multi_gpu.py `
cudaq.mpi.initialize()

# Set the target to our dynamics simulator
cudaq.set_target("dynamics")

# Large number of spins
N = 20
dimensions = {}
for i in range(N):
dimensions[i] = 2

# Observable is the average magnetization operator
avg_magnetization_op = operators.zero()
for i in range(N):
avg_magnetization_op += (spin.z(i) / N)

# Arbitrary coupling constant
g = 1.0
# Construct the Hamiltonian
H = operators.zero()
for i in range(N):
H += 2 * np.pi * spin.x(i)
H += 2 * np.pi * spin.y(i)
for i in range(N - 1):
H += 2 * np.pi * g * spin.x(i) * spin.x(i + 1)
H += 2 * np.pi * g * spin.y(i) * spin.z(i + 1)

steps = np.linspace(0.0, 1, 200)
schedule = Schedule(steps, ["time"])

# Initial state (expressed as an enum)
psi0 = cudaq.operator.InitialState.ZERO
# This can also be used to initialize a uniformly-distributed wave-function instead.
# `psi0 = cudaq.operator.InitialState.UNIFORM`

# Run the simulation
evolution_result = cudaq.evolve(H,
dimensions,
schedule,
psi0,
observables=[avg_magnetization_op],
collapse_operators=[],
store_intermediate_results=True,
integrator=RungeKuttaIntegrator())

exp_val = [
exp_vals[0].expectation()
for exp_vals in evolution_result.expectation_values()
]

if cudaq.mpi.rank() == 0:
# Plot the results
fig = plt.figure(figsize=(12, 6))
plt.plot(steps, exp_val)
plt.ylabel("Average Magnetization")
plt.xlabel("Time")
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
fig.savefig("spin_model.png", dpi=fig.dpi)

cudaq.mpi.finalize()
94 changes: 94 additions & 0 deletions docs/sphinx/examples/python/dynamics/mgmn/multi_gpu.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import cudaq
from cudaq import operators, spin, Schedule, RungeKuttaIntegrator

import numpy as np
import cupy as cp
import matplotlib.pyplot as plt
import os

# On a system with multiple GPUs, `mpiexec` can be used as follows:
# `mpiexec -np <N> python3 multi_gpu.py `
cudaq.mpi.initialize()

# Set the target to our dynamics simulator
cudaq.set_target("dynamics")

# In this example, we solve the Quantum Heisenberg model (https://en.wikipedia.org/wiki/Quantum_Heisenberg_model),
# which exhibits the so-called quantum quench effect.
# e.g., see `Quantum quenches in the anisotropic spin-1/2 Heisenberg chain: different approaches to many-body dynamics far from equilibrium`
# (New J. Phys. 12 055017)
# Large number of spins
N = 21
dimensions = {}
for i in range(N):
dimensions[i] = 2

# Initial state: alternating spin up and down
spin_state = ''
for i in range(N):
spin_state += str(int(i % 2))

# Observable is the staggered magnetization operator
staggered_magnetization_op = operators.zero()
for i in range(N):
if i % 2 == 0:
staggered_magnetization_op += spin.z(i)
else:
staggered_magnetization_op -= spin.z(i)

staggered_magnetization_op /= N

observe_results = []
for g in [0.25, 4.0]:
# Heisenberg model spin coupling strength
Jx = 1.0
Jy = 1.0
Jz = g

# Construct the Hamiltonian
H = operators.zero()

for i in range(N - 1):
H += Jx * spin.x(i) * spin.x(i + 1)
H += Jy * spin.y(i) * spin.y(i + 1)
H += Jz * spin.z(i) * spin.z(i + 1)

steps = np.linspace(0.0, 1, 100)
schedule = Schedule(steps, ["time"])

# Prepare the initial state vector
psi0_ = cp.zeros(2**N, dtype=cp.complex128)
psi0_[int(spin_state, 2)] = 1.0
psi0 = cudaq.State.from_data(psi0_)

# Run the simulation
evolution_result = cudaq.evolve(H,
dimensions,
schedule,
psi0,
observables=[staggered_magnetization_op],
collapse_operators=[],
store_intermediate_results=True,
integrator=RungeKuttaIntegrator())

exp_val = [
exp_vals[0].expectation()
for exp_vals in evolution_result.expectation_values()
]

observe_results.append((g, exp_val))

if cudaq.mpi.rank() == 0:
# Plot the results
fig = plt.figure(figsize=(12, 6))
for g, exp_val in observe_results:
plt.plot(steps, exp_val, label=f'$ g = {g}$')
plt.legend(fontsize=16)
plt.ylabel("Staggered Magnetization")
plt.xlabel("Time")
abspath = os.path.abspath(__file__)
dname = os.path.dirname(abspath)
os.chdir(dname)
fig.savefig("heisenberg_model.png", dpi=fig.dpi)

cudaq.mpi.finalize()
41 changes: 41 additions & 0 deletions docs/sphinx/snippets/python/using/backends/dynamics.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,3 +172,44 @@ def compute_value(param_name, step_idx):
time_dependence = parameter_values(numpy.linspace(0, 1, 100))
cudaq.evolve(system_operator, system_dimensions, time_dependence, initial_state)
#[End Schedule2]

import cudaq
from cudaq import operators, spin, Schedule, RungeKuttaIntegrator

N = 4
dimensions = {}
for i in range(N):
dimensions[i] = 2
g = 1.0
H = operators.zero()
for i in range(N):
H += 2 * np.pi * spin.x(i)
H += 2 * np.pi * spin.y(i)
for i in range(N - 1):
H += 2 * np.pi * g * spin.x(i) * spin.x(i + 1)
H += 2 * np.pi * g * spin.y(i) * spin.z(i + 1)

steps = np.linspace(0.0, 1, 200)
schedule = Schedule(steps, ["time"])

#[Begin MPI]
cudaq.mpi.initialize()

# Set the target to our dynamics simulator
cudaq.set_target("dynamics")

# Initial state (expressed as an enum)
psi0 = cudaq.operator.InitialState.ZERO

# Run the simulation
evolution_result = cudaq.evolve(H,
dimensions,
schedule,
psi0,
observables=[],
collapse_operators=[],
store_intermediate_results=True,
integrator=RungeKuttaIntegrator())

cudaq.mpi.finalize()
#[End MPI]
45 changes: 44 additions & 1 deletion docs/sphinx/using/backends/dynamics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ For example, we can plot the Pauli expectation value for the above simulation as
In particular, for each time step, `evolve` captures an array of expectation values, one for each
observable. Hence, we convert them into sequences for plotting purposes.

Examples that illustrate how to use the ``dynamics`` target are available
in the `CUDA-Q repository <https://github.com/NVIDIA/cuda-quantum/tree/main/docs/sphinx/examples/python/dynamics>`__.

Operator
+++++++++++
Expand Down Expand Up @@ -272,4 +274,45 @@ backend target.
If the output is a '`None`' string, it indicates that your Torch installation does not support CUDA.
In this case, you need to install a CUDA-enabled Torch package via other mechanisms, e.g., building Torch from source or
using their Docker images.


Multi-GPU Multi-Node Execution
+++++++++++++++++++++++++++++++

.. _cudensitymat_mgmn:

CUDA-Q ``dynamics`` target supports parallel execution on multiple GPUs.
To enable parallel execution, the application must initialize MPI as follows.


.. tab:: Python

.. literalinclude:: ../../snippets/python/using/backends/dynamics.py
:language: python
:start-after: [Begin MPI]
:end-before: [End MPI]

.. code:: bash

mpiexec -np <N> python3 program.py

where ``N`` is the number of processes.


By initializing the MPI execution environment (via `cudaq.mpi.initialize()`) in the application code and
invoking it via an MPI launcher, we have activated the multi-node multi-GPU feature of the ``dynamics`` target.
Specifically, it will detect the number of processes (GPUs) and distribute the computation across all available GPUs.


.. note::
The number of MPI processes must be a power of 2, one GPU per process.

.. note::
Not all integrators are capable of handling distributed state. Errors will be raised if parallel execution is activated
but the selected integrator does not support distributed state.

.. warning::
As of cuQuantum version 24.11, there are a couple of `known limitations <https://docs.nvidia.com/cuda/cuquantum/24.11.0/cudensitymat/index.html>`__ for parallel execution:

- Computing the expectation value of a mixed quantum state is not supported. Thus, `collapse_operators` are not supported if expectation calculation is required.

- Some combinations of quantum states and quantum many-body operators are not supported. Errors will be raised in those cases.
13 changes: 13 additions & 0 deletions docs/sphinx/using/backends/simulators.rst
Original file line number Diff line number Diff line change
Expand Up @@ -482,13 +482,26 @@ Specific aspects of the simulation can be configured by setting the following of
* **`OMP_PLACES=cores`**: Set this environment variable to improve CPU parallelization.
* **`OMP_NUM_THREADS=X`**: To enable CPU parallelization, set X to `NUMBER_OF_CORES_PER_NODE/NUMBER_OF_GPUS_PER_NODE`.
* **`CUDAQ_TENSORNET_CONTROLLED_RANK=X`**: Specify the number of controlled qubits whereby the full tensor body of the controlled gate is expanded. If the number of controlled qubits is greater than this value, the gate is applied as a controlled tensor operator to the tensor network state. Default value is 1.
* **`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=X`**: Set this environment variable to `TRUE` (`ON`) or `FALSE` (`OFF`) to enable or disable contraction path reuse when computing expectation values. Default is `OFF`.

.. note::

This backend requires an NVIDIA GPU and CUDA runtime libraries.
If you do not have these dependencies installed, you may encounter an error stating `Invalid simulator requested`.
See the section :ref:`dependencies-and-compatibility` for more information about how to install dependencies.

.. note::

When using contraction path reuse (`CUDAQ_TENSORNET_OBSERVE_CONTRACT_PATH_REUSE=TRUE`), :code:`tensornet` backends perform a single contraction path optimization with an opaque spin operator term. This path is then used to contract all the actual terms in the spin operator, hence saving the path finding time.

As we use an opaque spin operator term as a placeholder for contraction path optimization, the resulting contraction path is not as optimal as if the actual spin operator is used.
For instance, if the spin operator is sparse (only acting on a few qubits), the contraction can be significantly simplified.

.. note::

:code:`tensornet` backends only return the overall expectation value for a :class:`cudaq.SpinOperator` when using the `cudaq::observe` method.
Term-by-term expectation values will not be available in the resulting `ObserveResult` object.
If needed, these values can be computed by calling `cudaq::observe` on individual terms instead.

Matrix product state
+++++++++++++++++++++++++++++++++++
Expand Down
2 changes: 1 addition & 1 deletion python/cudaq/operator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@
from .definitions import operators, spin
from .evolution import evolve, evolve_async
from .expressions import Operator, OperatorSum, ProductOperator, ElementaryOperator, ScalarOperator, RydbergHamiltonian
from .helpers import NumericType
from .helpers import NumericType, InitialState
from .schedule import Schedule
20 changes: 17 additions & 3 deletions python/cudaq/operator/cudm_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ..mlir._mlir_libs._quakeDialects import cudaq_runtime
from .cudm_helpers import cudm, CudmStateType
from .cudm_state import CuDensityMatState, as_cudm_state
from .helpers import InitialState, InitialStateArgT
from .integrator import BaseIntegrator
from .integrators.builtin_integrators import RungeKuttaIntegrator, cuDensityMatTimeStepper
import cupy
Expand All @@ -28,7 +29,7 @@ def evolve_dynamics(
hamiltonian: Operator,
dimensions: Mapping[int, int],
schedule: Schedule,
initial_state: cudaq_runtime.State,
initial_state: InitialStateArgT,
collapse_operators: Sequence[Operator] = [],
observables: Sequence[Operator] = [],
store_intermediate_results=False,
Expand All @@ -43,8 +44,21 @@ def evolve_dynamics(
schedule.reset()
hilbert_space_dims = tuple(dimensions[d] for d in range(len(dimensions)))

with ScopeTimer("evolve.as_cudm_state") as timer:
initial_state = as_cudm_state(initial_state)
# Check that the integrator can support distributed state if this is a distributed simulation.
if cudaq_runtime.mpi.is_initialized() and cudaq_runtime.mpi.num_ranks(
) > 1 and integrator is not None and not integrator.support_distributed_state(
):
raise ValueError(
f"Integrator {type(integrator).__name__} does not support distributed state."
)

if isinstance(initial_state, InitialState):
has_collapse_operators = len(collapse_operators) > 0
initial_state = CuDensityMatState.create_initial_state(
initial_state, hilbert_space_dims, has_collapse_operators)
else:
with ScopeTimer("evolve.as_cudm_state") as timer:
initial_state = as_cudm_state(initial_state)

if not isinstance(initial_state, CuDensityMatState):
raise ValueError("Unknown type")
Expand Down
Loading
Loading