From e9a1f67fc8863b09c5265ce2a1a9f8521cc57522 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Fri, 20 Dec 2024 16:55:37 +1300
Subject: [PATCH 01/24] single_particle_density for OccupationNumberFS

---
 src/strategies_and_params/poststepstrategy.jl | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/strategies_and_params/poststepstrategy.jl b/src/strategies_and_params/poststepstrategy.jl
index 1bdd33240..101a856b9 100644
--- a/src/strategies_and_params/poststepstrategy.jl
+++ b/src/strategies_and_params/poststepstrategy.jl
@@ -279,8 +279,11 @@ function single_particle_density(dvec; component=0)
     ) do (k, v)
         MultiScalar(v^2 .* single_particle_density(k; component))
     end
-
-    return result.tuple ./ sum(result.tuple) .* N
+    if ismissing(N)
+        return result.tuple ./ dot(dvec,dvec)
+    else
+        return result.tuple ./ sum(result.tuple) .* N
+    end
 end
 
 function single_particle_density(add::SingleComponentFockAddress; component=0)

From 9f3f1541b9c9e6502532691da267ed704fe2a770 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 11:27:35 +1300
Subject: [PATCH 02/24] Add AllOverlaps for multiple spectral states

---
 src/StatsTools/reweighting.jl                |  5 +++-
 src/projector_monte_carlo_problem.jl         |  4 ----
 src/strategies_and_params/replicastrategy.jl | 24 ++++++++++++++++----
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index 72eb05c07..a74305405 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -558,7 +558,10 @@ function rayleigh_replica_estimator(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = length(filter(startswith("norm"), names(df)))
+    num_spectral = length(Set(
+        [name[1:findlast('_',name)] for name in names(df) if startswith(name,"norm")]
+    ))
+    num_reps = div(length(filter(startswith("norm"), names(df))),num_spectral)
     time_step = determine_constant_time_step(df)
     T = eltype(df[!, Symbol(shift_name, "_1")])
     shift_v = Vector{T}[]
diff --git a/src/projector_monte_carlo_problem.jl b/src/projector_monte_carlo_problem.jl
index c88de80ce..c8ed7e852 100644
--- a/src/projector_monte_carlo_problem.jl
+++ b/src/projector_monte_carlo_problem.jl
@@ -186,10 +186,6 @@ function ProjectorMonteCarloProblem(
 
     n_spectral = num_spectral_states(spectral_strategy) # spectral_strategy may override n_spectral
 
-    if replica_strategy isa AllOverlaps && n_spectral > 1
-        throw(ArgumentError("AllOverlaps is not implemented for more than one spectral state."))
-    end
-
     if random_seed == true
         random_seed = rand(RandomDevice(),UInt64)
     elseif random_seed == false
diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index bb854a7b2..11dfda907 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -142,10 +142,26 @@ end
 
 function replica_stats(rs::AllOverlaps{N,<:Any,<:Any,B}, spectral_states::NTuple{N}) where {N,B}
     # Not using broadcasting because it wasn't inferred properly.
-    # For now implement this assuming only a single spectral state; generalise later
-    vecs = ntuple(i -> only(spectral_states[i]).v, Val(N))
-    wms = ntuple(i -> only(spectral_states[i]).wm, Val(N))
-    return all_overlaps(rs.operators, vecs, wms, Val(B))
+    if num_spectral_states(spectral_states[1]) > 1
+        names = String[]
+        values = Float64[]
+        for j in 1:num_spectral_states(spectral_states[1])
+            vecs = ntuple(i -> spectral_states[i][j].v, Val(N))
+            wms = ntuple(i -> spectral_states[i][j].wm, Val(N))
+            names_j, values_j = all_overlaps(rs.operators, vecs, wms, Val(B))
+            append!(
+                names,
+                [name[1:findlast('_',name)-1]*"_s$j"*
+                name[findlast('_',name):end] for name in names_j]
+            )
+            append!(values, values_j)
+        end
+        return names, values
+    else
+        vecs = ntuple(i -> only(spectral_states[i]).v, Val(N))
+        wms = ntuple(i -> only(spectral_states[i]).wm, Val(N))
+        return all_overlaps(rs.operators, vecs, wms, Val(B))
+    end
 end
 
 """

From 5fafde0b3ea62cd2df255da83ab682af6626fcaf Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 11:38:44 +1300
Subject: [PATCH 03/24] Add documentation

---
 src/strategies_and_params/replicastrategy.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index 11dfda907..e0a7dd7ef 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -68,6 +68,9 @@ Run `n_replicas` replicas and report overlaps between all pairs of replica vecto
 Column names in the report are of the form `c{i}_dot_c{j}` for vector-vector overlaps, and
 `c{i}_Op{k}_c{j}` for operator overlaps.
 
+For multiple spectral states, the column names are of the form `c{i}_dot_s{l}_c{j}` for 
+vector-vector overlaps, and `c{i}_Op{k}_s{l}_c{j}` for operator overlaps.
+
 See [`ProjectorMonteCarloProblem`](@ref), [`ReplicaStrategy`](@ref) and
 [`AbstractOperator`](@ref Interfaces.AbstractOperator) (for an interface for implementing
 operators).

From 032ff6d3ea814e20c34f110910fcc9a96dff017c Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 12:07:12 +1300
Subject: [PATCH 04/24] Resolve conflict

---
 src/strategies_and_params/poststepstrategy.jl | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/src/strategies_and_params/poststepstrategy.jl b/src/strategies_and_params/poststepstrategy.jl
index 101a856b9..a9e9dc744 100644
--- a/src/strategies_and_params/poststepstrategy.jl
+++ b/src/strategies_and_params/poststepstrategy.jl
@@ -271,7 +271,6 @@ function single_particle_density(dvec; component=0)
     K = keytype(dvec)
     V = float(valtype(dvec))
     M = num_modes(K)
-    N = num_particles(K)
 
     result = mapreduce(
         +, pairs(dvec);
@@ -279,11 +278,7 @@ function single_particle_density(dvec; component=0)
     ) do (k, v)
         MultiScalar(v^2 .* single_particle_density(k; component))
     end
-    if ismissing(N)
-        return result.tuple ./ dot(dvec,dvec)
-    else
-        return result.tuple ./ sum(result.tuple) .* N
-    end
+    return result.tuple ./ sum(abs2, dvec)
 end
 
 function single_particle_density(add::SingleComponentFockAddress; component=0)

From 00cf8ffe84b64dc0cc0276c5bca4513a07390f65 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 12:08:59 +1300
Subject: [PATCH 05/24] Resolve conflict

---
 src/strategies_and_params/poststepstrategy.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/strategies_and_params/poststepstrategy.jl b/src/strategies_and_params/poststepstrategy.jl
index a9e9dc744..07ba515e6 100644
--- a/src/strategies_and_params/poststepstrategy.jl
+++ b/src/strategies_and_params/poststepstrategy.jl
@@ -278,6 +278,7 @@ function single_particle_density(dvec; component=0)
     ) do (k, v)
         MultiScalar(v^2 .* single_particle_density(k; component))
     end
+    
     return result.tuple ./ sum(abs2, dvec)
 end
 

From 9bef2317f0064c56253ff999f898a5d64b305a89 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 11:27:35 +1300
Subject: [PATCH 06/24] Add AllOverlaps for multiple spectral states

---
 src/StatsTools/reweighting.jl                |  5 +++-
 src/projector_monte_carlo_problem.jl         |  4 ----
 src/strategies_and_params/replicastrategy.jl | 24 ++++++++++++++++----
 3 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index 72eb05c07..a74305405 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -558,7 +558,10 @@ function rayleigh_replica_estimator(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = length(filter(startswith("norm"), names(df)))
+    num_spectral = length(Set(
+        [name[1:findlast('_',name)] for name in names(df) if startswith(name,"norm")]
+    ))
+    num_reps = div(length(filter(startswith("norm"), names(df))),num_spectral)
     time_step = determine_constant_time_step(df)
     T = eltype(df[!, Symbol(shift_name, "_1")])
     shift_v = Vector{T}[]
diff --git a/src/projector_monte_carlo_problem.jl b/src/projector_monte_carlo_problem.jl
index 4128fb0b5..317049b75 100644
--- a/src/projector_monte_carlo_problem.jl
+++ b/src/projector_monte_carlo_problem.jl
@@ -206,10 +206,6 @@ function ProjectorMonteCarloProblem(
 
     n_spectral = num_spectral_states(spectral_strategy) # spectral_strategy may override n_spectral
 
-    if replica_strategy isa AllOverlaps && n_spectral > 1
-        throw(ArgumentError("AllOverlaps is not implemented for more than one spectral state."))
-    end
-
     if random_seed == true
         random_seed = rand(RandomDevice(),UInt64)
     elseif random_seed == false
diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index bb854a7b2..11dfda907 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -142,10 +142,26 @@ end
 
 function replica_stats(rs::AllOverlaps{N,<:Any,<:Any,B}, spectral_states::NTuple{N}) where {N,B}
     # Not using broadcasting because it wasn't inferred properly.
-    # For now implement this assuming only a single spectral state; generalise later
-    vecs = ntuple(i -> only(spectral_states[i]).v, Val(N))
-    wms = ntuple(i -> only(spectral_states[i]).wm, Val(N))
-    return all_overlaps(rs.operators, vecs, wms, Val(B))
+    if num_spectral_states(spectral_states[1]) > 1
+        names = String[]
+        values = Float64[]
+        for j in 1:num_spectral_states(spectral_states[1])
+            vecs = ntuple(i -> spectral_states[i][j].v, Val(N))
+            wms = ntuple(i -> spectral_states[i][j].wm, Val(N))
+            names_j, values_j = all_overlaps(rs.operators, vecs, wms, Val(B))
+            append!(
+                names,
+                [name[1:findlast('_',name)-1]*"_s$j"*
+                name[findlast('_',name):end] for name in names_j]
+            )
+            append!(values, values_j)
+        end
+        return names, values
+    else
+        vecs = ntuple(i -> only(spectral_states[i]).v, Val(N))
+        wms = ntuple(i -> only(spectral_states[i]).wm, Val(N))
+        return all_overlaps(rs.operators, vecs, wms, Val(B))
+    end
 end
 
 """

From 18749974138937973c9bacd500000c3e11ddd5e9 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 6 Jan 2025 11:38:44 +1300
Subject: [PATCH 07/24] Add documentation

---
 src/strategies_and_params/replicastrategy.jl | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index 11dfda907..e0a7dd7ef 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -68,6 +68,9 @@ Run `n_replicas` replicas and report overlaps between all pairs of replica vecto
 Column names in the report are of the form `c{i}_dot_c{j}` for vector-vector overlaps, and
 `c{i}_Op{k}_c{j}` for operator overlaps.
 
+For multiple spectral states, the column names are of the form `c{i}_dot_s{l}_c{j}` for 
+vector-vector overlaps, and `c{i}_Op{k}_s{l}_c{j}` for operator overlaps.
+
 See [`ProjectorMonteCarloProblem`](@ref), [`ReplicaStrategy`](@ref) and
 [`AbstractOperator`](@ref Interfaces.AbstractOperator) (for an interface for implementing
 operators).

From 96e74065761d3fa0cb27e7f7a1ae192537a4c8b9 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 13:33:10 +1300
Subject: [PATCH 08/24] Change labels for replicas and spectral states

---
 src/pmc_simulation.jl | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/pmc_simulation.jl b/src/pmc_simulation.jl
index 4a545b9a3..ba89163a7 100644
--- a/src/pmc_simulation.jl
+++ b/src/pmc_simulation.jl
@@ -124,19 +124,19 @@ function PMCSimulation(problem::ProjectorMonteCarloProblem; copy_vectors=true)
     # set up the spectral_states
     wm = working_memory(vectors[1, 1])
     spectral_states = ntuple(n_replicas) do i
-        replica_id = if n_replicas == 1
+        replica_id = if n_replicas == 1 && n_spectral == 1
             ""
         else
-            "_$(i)"
+            "_r$(i)"
         end
         SpectralState(
             ntuple(n_spectral) do j
                 v = vectors[i, j]
                 sp = shift_parameters[i, j]
-                spectral_id = if n_spectral == 1
+                spectral_id = if n_replicas == 1 && n_spectral == 1
                     ""
                 else
-                    "_s$(j)" # j is the spectral state index, i is the replica index
+                    "s$(j)" # j is the spectral state index, i is the replica index
                 end
                 SingleState(
                     hamiltonian, algorithm, v, zerovector(v),

From 4bac2e104822b01ee0a42e8e1cc4328d7b4e8274 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 13:34:25 +1300
Subject: [PATCH 09/24] Fix typo

---
 src/strategies_and_params/poststepstrategy.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/strategies_and_params/poststepstrategy.jl b/src/strategies_and_params/poststepstrategy.jl
index 22045bbad..447a538a3 100644
--- a/src/strategies_and_params/poststepstrategy.jl
+++ b/src/strategies_and_params/poststepstrategy.jl
@@ -33,7 +33,7 @@ See also [`PostStepStrategy`](@ref), [`ReportingStrategy`](@ref).
 """
 post_step_action
 
-# When startegies are a Tuple, apply all of them.
+# When strategies are a Tuple, apply all of them.
 function post_step_action(::Tuple{}, _, _)
     return ()
 end

From 33512a11140b1f245f75b4039dbbb824221abf92 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 13:36:20 +1300
Subject: [PATCH 10/24] Add AllOverlaps for mixed spectral states

---
 src/strategies_and_params/replicastrategy.jl | 120 ++++++++++---------
 1 file changed, 65 insertions(+), 55 deletions(-)

diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index e0a7dd7ef..15d61d79e 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -58,18 +58,22 @@ check_transform(::NoStats, _) = nothing
 
 # TODO: add custom names
 """
-    AllOverlaps(n_replicas=2; operator=nothing, transform=nothing, vecnorm=true)
+    AllOverlaps(n_replicas=2; operator=nothing, transform=nothing, vecnorm=true, mixed_spectral_overlaps=false)
         <: ReplicaStrategy{n_replicas}
 
 Run `n_replicas` replicas and report overlaps between all pairs of replica vectors. If
-`operator` is not `nothing`, the overlap `dot(c1, operator, c2)` is reported as well. If
+`operator` is not `nothing`, the overlap `dot(r1, operator, r2)` is reported as well. If
 `operator` is a tuple of operators, the overlaps are computed for all operators.
 
-Column names in the report are of the form `c{i}_dot_c{j}` for vector-vector overlaps, and
-`c{i}_Op{k}_c{j}` for operator overlaps.
+Column names in the report are of the form `r{i}s{k}_dot_r{j}s{k}` for vector-vector
+overlaps, and `r{i}s{k}_Op{m}_r{j}s{k}` for operator overlaps, where `i` and `j` label the
+replicas, `k` labels the spectral state, and `m` labels the operators.
 
-For multiple spectral states, the column names are of the form `c{i}_dot_s{l}_c{j}` for 
-vector-vector overlaps, and `c{i}_Op{k}_s{l}_c{j}` for operator overlaps.
+The `r{i}s{k}_dot_r{j}s{k}` overlap can be omitted with the flag `vecnorm=false`.
+
+By default, overlaps of different spectral states are omitted. To include overlaps of
+different spectral states `r{i}s{k}_dot_r{j}s{l}` and `r{i}s{k}_Op{m}_r{j}s{l}`, use the
+flag `mixed_spectral_overlaps=true`. 
 
 See [`ProjectorMonteCarloProblem`](@ref), [`ReplicaStrategy`](@ref) and
 [`AbstractOperator`](@ref Interfaces.AbstractOperator) (for an interface for implementing
@@ -103,22 +107,26 @@ where
 is the (right) eigenvector of ``\\hat{G}`` and ``| \\psi \\rangle`` is an eigenvector of
 ``\\hat{H}``.
 
-For a K-tuple of input operators ``(\\hat{A}_1, ..., \\hat{A}_K)``, overlaps of
+For an m-tuple of input operators ``(\\hat{A}_1, ..., \\hat{A}_m)``, overlaps of
 ``\\langle \\phi | f^{-1} \\hat{A} f^{-1} | \\phi \\rangle`` are reported as
-`c{i}_Op{k}_c{j}`. The correct vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi
-\\rangle`` is reported *last* as `c{i}_Op{K+1}_c{j}`. This is in addition to the *bare*
-vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi \\rangle`` that is reported as
-`c{i}_dot_c{j}`.
-
-In either case the `c{i}_dot_c{j}` overlap can be omitted with the flag `vecnorm=false`.
+`r{i}s{k}_Op{m}_r{j}s{k}`. The correct vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi
+\\rangle`` is reported *last* as `r{i}s{k}_Op{m+1}_r{j}s{k}`. This is in addition to the *bare*
+vector-vector overlap ``\\langle \\phi | \\phi \\rangle`` that is reported as 
+`r{i}s{k}_dot_r{j}s{k}`.
 """
-struct AllOverlaps{N,M,O,B} <: ReplicaStrategy{N}
+struct AllOverlaps{N,M,O,B,S} <: ReplicaStrategy{N}
     operators::O
 end
 
 const TupleOrVector = Union{Tuple, Vector}
 
-function AllOverlaps(n_replicas=2; operator=nothing, transform=nothing, vecnorm=true)
+function AllOverlaps(
+    n_replicas=2;
+    operator=nothing,
+    transform=nothing,
+    vecnorm=true,
+    mixed_spectral_overlaps=false
+)
     n_replicas isa Integer || throw(ArgumentError("n_replicas must be an integer"))
     if isnothing(operator)
         operators = ()
@@ -140,64 +148,66 @@ function AllOverlaps(n_replicas=2; operator=nothing, transform=nothing, vecnorm=
     if !vecnorm && length(ops) == 0
         return NoStats(n_replicas)
     end
-    return AllOverlaps{n_replicas,length(ops),typeof(ops),vecnorm}(ops)
+    return AllOverlaps{n_replicas,length(ops),typeof(ops),vecnorm,mixed_spectral_overlaps}(ops)
 end
 
-function replica_stats(rs::AllOverlaps{N,<:Any,<:Any,B}, spectral_states::NTuple{N}) where {N,B}
-    # Not using broadcasting because it wasn't inferred properly.
-    if num_spectral_states(spectral_states[1]) > 1
-        names = String[]
-        values = Float64[]
-        for j in 1:num_spectral_states(spectral_states[1])
-            vecs = ntuple(i -> spectral_states[i][j].v, Val(N))
-            wms = ntuple(i -> spectral_states[i][j].wm, Val(N))
-            names_j, values_j = all_overlaps(rs.operators, vecs, wms, Val(B))
-            append!(
-                names,
-                [name[1:findlast('_',name)-1]*"_s$j"*
-                name[findlast('_',name):end] for name in names_j]
-            )
-            append!(values, values_j)
-        end
-        return names, values
-    else
-        vecs = ntuple(i -> only(spectral_states[i]).v, Val(N))
-        wms = ntuple(i -> only(spectral_states[i]).wm, Val(N))
-        return all_overlaps(rs.operators, vecs, wms, Val(B))
-    end
+function replica_stats(rs::AllOverlaps{N,<:Any,<:Any,B,S}, spectral_states::NTuple{N}) where {N,B,S}
+    n_spectral = num_spectral_states(spectral_states[1])
+    vecs = SMatrix{N,n_spectral}(
+        spectral_states[i][j].v for i in 1:N, j in 1:n_spectral
+    )
+    wms = SMatrix{N,n_spectral}(
+        spectral_states[i][j].wm for i in 1:N, j in 1:n_spectral
+    )
+    return all_overlaps(rs.operators, vecs, wms, Val(B), Val(S))
 end
 
 """
-    all_overlaps(operators, vectors, working_memories, vecnorm=true)
+    all_overlaps(operators, vectors, working_memories, vecnorm=true, mixed_spectral_overlaps=false)
 
 Get all overlaps between vectors and operators.  The flag `vecnorm` can disable the
-vector-vector overlap `c{i}_dot_c{j}`.
+vector-vector overlap `r{i}s{k}_dot_r{j}s{k}`.
 """
 function all_overlaps(
-    operators::TupleOrVector, vecs::NTuple{N,AbstractDVec}, wms, ::Val{B}
-) where {N,B}
+    operators::TupleOrVector, vecs::SMatrix{N,M,<:AbstractDVec}, wms, ::Val{B}, ::Val{S}
+) where {N,M,B,S}
     T = promote_type((valtype(v) for v in vecs)..., eltype.(operators)...)
     names = String[]
     values = T[]
-    for i in 1:N, j in i+1:N
-        if B
-            push!(names, "c$(i)_dot_c$(j)")
-            push!(values, dot(vecs[i], vecs[j]))
-        end
+    for i in 1:N, k in 1:M
         if all(isdiag, operators)
-            v = vecs[i]
+            v = vecs[i,k]
         else
-            v = DictVectors.copy_to_local!(wms[i], vecs[i])
+            v = DictVectors.copy_to_local!(wms[i,k], vecs[i,k])
+        end
+
+        if S
+            for j in 1:N, l in k+1:M
+                if B
+                    push!(names, "r$(i)s$(k)_dot_r$(j)s$(l)")
+                    push!(values, dot(vecs[i,k], vecs[j,l]))
+                end
+                for (m, op) in enumerate(operators)
+                    push!(names, "r$(i)s$(k)_Op$(m)_r$(j)s$(l)")
+                    # Using dot_from_right here because dot will try to copy_to_local! if
+                    # called directly.
+                    push!(values, dot_from_right(v, op, vecs[j,l]))
+                end
+            end
         end
-        for (k, op) in enumerate(operators)
-            push!(names, "c$(i)_Op$(k)_c$(j)")
-            # Using dot_from_right here because dot will try to copy_to_local! if
-            # called directly.
-            push!(values, dot_from_right(v, op, vecs[j]))
+        for j in i+1:N
+            if B
+                push!(names, "r$(i)s$(k)_dot_r$(j)s$(k)")
+                push!(values, dot(vecs[i,k], vecs[j,k]))
+            end
+            for (m, op) in enumerate(operators)
+                push!(names, "r$(i)s$(k)_Op$(m)_r$(j)s$(k)")
+                push!(values, dot_from_right(v, op, vecs[j,k]))
+            end
         end
     end
 
-    num_reports = (N * (N - 1) ÷ 2) * (B + length(operators))
+    num_reports = M * (N * (N - 1) ÷ 2) * (B + length(operators)) + S * N^2 * (M * (M - 1) ÷ 2) * (B + length(operators))
     return SVector{num_reports,String}(names).data, SVector{num_reports,T}(values).data
 end
 

From 0c0ae1f5c37b94973fcc49c2bfb923753b9580a3 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 13:38:53 +1300
Subject: [PATCH 11/24] Update StatsTools functions with new naming scheme

---
 src/StatsTools/fidelity.jl                    | 11 +++--
 src/StatsTools/reweighting.jl                 | 48 ++++++++++---------
 .../variational_energy_estimator.jl           | 11 +++--
 3 files changed, 38 insertions(+), 32 deletions(-)

diff --git a/src/StatsTools/fidelity.jl b/src/StatsTools/fidelity.jl
index 02425cf8e..0712c9bac 100644
--- a/src/StatsTools/fidelity.jl
+++ b/src/StatsTools/fidelity.jl
@@ -4,7 +4,7 @@
 
 Compute the fidelity of the average coefficient vector and the projector defined in
 `p_field` from the [`PMCSimulation`](@ref Main.Rimu.PMCSimulation) or `DataFrame` returned
-by solve, using replicas `_1` and `_2`. Calls [`ratio_of_means`](@ref) to perform a
+by solve, using replicas `_r1s1` and `_r2s1`. Calls [`ratio_of_means`](@ref) to perform a
 blocking analysis on a ratio of the means of separate time series and returns a
 [`RatioBlockingResult`](@ref). The first `skip` steps in the time series are skipped.
 
@@ -22,14 +22,15 @@ where `v` is the projector specified by `p_field`, which is assumed to be normal
 unity with the two-norm (i.e. `v⋅v == 1`), and ``\\mathbf{c}_1`` and ``\\mathbf{c}_2``
 are two replica coefficient vectors.
 """
-function replica_fidelity(sim; p_field = :hproj, skip = 0,  args...)
+function replica_fidelity(sim; p_field = :hproj, skip = 0,  spectral_state = 1, args...)
     df = DataFrame(sim)
-    p_field_1 = Symbol(p_field, :_1)
-    p_field_2 = Symbol(p_field, :_2)
+    p_field_1 = Symbol(p_field, :_r1s, spectral_state)
+    p_field_2 = Symbol(p_field, :_r2s, spectral_state)
     fid_num = conj(getproperty(df, p_field_1)) .* getproperty(df, p_field_2)
     fid_num = fid_num[skip+1:end]
     # denominator
-    fid_den = df.c1_dot_c2[skip+1:end]
+    #fid_den = df.r1s1_dot_r2s1[skip+1:end]
+    fid_den = Vector(df[!, Symbol("r1s", spectral_state, "_dot_r2s", spectral_state)])[skip+1:end]
 
     return ratio_of_means(fid_num, fid_den; args...)
 end
diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index a74305405..d099f641e 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -13,12 +13,14 @@ function determine_constant_time_step(df)
             return parse(Float64, metadata(df)["time_step"])
         elseif hasproperty(df, "time_step")
             return df.time_step[end]
-        elseif hasproperty(df, "time_step_1")
+        elseif hasproperty(df, "time_step_r1s1")
             return df.time_step_1[end]
         elseif hasproperty(df, "dτ") # backwards compatibility
             return df.dτ[end]
         elseif hasproperty(df, "dτ_1")
             return df.dτ_1[end]
+        elseif hasproperty(df, "time_step_1")
+            return df.time_step_1[end]
         else
             throw(ArgumentError("Time step not found in `df`"))
         end
@@ -473,6 +475,7 @@ end
         shift_name="shift",
         op_name="Op1",
         vec_name="dot",
+        spectral_state=1,
         h=0,
         skip=0,
         Anorm=1,
@@ -503,7 +506,8 @@ The second method computes the Rayleigh quotient directly from a `DataFrame` or
 columns, see [`AllOverlaps`](@ref Main.AllOverlaps) for default formatting. The operator
 overlap data can be scaled by a prefactor `Anorm`. A specific reweighting depth can be set
 with keyword argument `h`. The default is `h = 0` which calculates the Rayleigh quotient
-without reweighting.
+without reweighting. To compute the Rayleigh quotient for the `n`th spectral state, set
+`spectral_state = n`.
 
 The reweighting is an extension of the mixed estimator using the reweighting technique
 described in [Umrigar *et al.* (1993)](http://dx.doi.org/10.1063/1.465195).
@@ -552,29 +556,27 @@ function rayleigh_replica_estimator(
     shift_name="shift",
     op_name="Op1",
     vec_name="dot",
+    spectral_state=1,
     h=0,
     skip=0,
     Anorm=1,
     kwargs...
 )
     df = DataFrame(sim)
-    num_spectral = length(Set(
-        [name[1:findlast('_',name)] for name in names(df) if startswith(name,"norm")]
-    ))
-    num_reps = div(length(filter(startswith("norm"), names(df))),num_spectral)
+    num_reps = parse(Int, metadata(df, "num_replicas"))
     time_step = determine_constant_time_step(df)
-    T = eltype(df[!, Symbol(shift_name, "_1")])
+    T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
     for a in 1:num_reps
-        push!(shift_v, Vector(df[!, Symbol(shift_name, "_", a)]))
+        push!(shift_v, Vector(df[!, Symbol(shift_name, "_r", a, "s", spectral_state)]))
     end
-    T = eltype(df[!, Symbol("c1_", vec_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", vec_name, "_r2s1")])
     vec_ol_v = Vector{T}[]
-    T = eltype(df[!, Symbol("c1_", op_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", op_name, "_r2s1")])
     op_ol_v = Vector{T}[]
     for a in 1:num_reps, b in a+1:num_reps
-        push!(op_ol_v, Vector(df[!, Symbol("c", a, "_", op_name, "_c" ,b)] .* Anorm))
-        push!(vec_ol_v, Vector(df[!, Symbol("c", a, "_", vec_name, "_c" ,b)]))
+        push!(op_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", op_name, "_r" ,b, "s", spectral_state)] .* Anorm))
+        push!(vec_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", vec_name, "_r" ,b, "s", spectral_state)]))
     end
 
     return rayleigh_replica_estimator(op_ol_v, vec_ol_v, shift_v, h, time_step; skip, kwargs...)
@@ -599,9 +601,10 @@ Returns a `NamedTuple` with the fields
 * `h_values = 100`: minimum number of reweighting depths
 * `skip = 0`: initial time steps to exclude from averaging
 * `threading = Threads.nthreads() > 1`: if `false` a progress meter is displayed
-* `shift_name = "shift"`: shift data corresponding to column in `df` with names `<shift>_1`, ...
-* `op_name = "Op1"`: name of operator overlap corresponding to column in `df` with names `c1_<op_ol>_c2`, ...
-* `vec_name = "dot"`: name of vector-vector overlap corresponding to column in `df` with names `c1_<vec_ol>_c2`, ...
+* `shift_name = "shift"`: shift data corresponding to column in `df` with names `<shift>_r1s1`, ...
+* `op_name = "Op1"`: name of operator overlap corresponding to column in `df` with names `r1s1_<op_ol>_r2s1`, ...
+* `vec_name = "dot"`: name of vector-vector overlap corresponding to column in `df` with names `r1s1_<vec_ol>_r2s1`, ...
+* `spectral_state = 1`: which spectral state to use
 * `Anorm = 1`: a scalar prefactor to scale the operator overlap data
 * `warn = true`: whether to log warning messages when blocking fails or denominators are small
 
@@ -626,21 +629,22 @@ function rayleigh_replica_estimator_analysis(
     shift_name="shift",
     op_name="Op1",
     vec_name="dot",
+    spectral_state=1,
     Anorm=1,
     warn=true,
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = length(filter(startswith("norm"), names(df)))
+    num_reps = parse(Int, metadata(df, "num_replicas"))
     time_step = determine_constant_time_step(df)
     # estimate the correlation time by blocking the shift data
-    T = eltype(df[!, Symbol(shift_name, "_1")])
+    T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
     E_r = T[]
     correlation_estimate = Int[]
     df_se = DataFrame()
     for a in 1:num_reps
-        push!(shift_v, Vector(df[!, Symbol(shift_name, "_", a)]))
+        push!(shift_v, Vector(df[!, Symbol(shift_name, "_r", a, "s", spectral_state)]))
         se = blocking_analysis(shift_v[a]; skip)
         push!(E_r, se.mean)
         push!(correlation_estimate, 2^(se.k - 1))
@@ -649,13 +653,13 @@ function rayleigh_replica_estimator_analysis(
     if isnothing(h_range)
         h_range = determine_h_range(df, skip, minimum(correlation_estimate), h_values)
     end
-    T = eltype(df[!, Symbol("c1_", vec_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", vec_name, "_r2s1")])
     vec_ol_v = Vector{T}[]
-    T = eltype(df[!, Symbol("c1_", op_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", op_name, "_r2s1")])
     op_ol_v = Vector{T}[]
     for a in 1:num_reps, b in a+1:num_reps
-        push!(op_ol_v, Vector(df[!, Symbol("c", a, "_", op_name, "_c" ,b)] .* Anorm))
-        push!(vec_ol_v, Vector(df[!, Symbol("c", a, "_", vec_name, "_c" ,b)]))
+        push!(op_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", op_name, "_r" ,b, "s", spectral_state)] .* Anorm))
+        push!(vec_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", vec_name, "_r" ,b, "s", spectral_state)]))
     end
 
     df_rre = if threading
diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
index f3ad745a2..ec30f8800 100644
--- a/src/StatsTools/variational_energy_estimator.jl
+++ b/src/StatsTools/variational_energy_estimator.jl
@@ -8,6 +8,7 @@ Compute the variational energy estimator from the replica time series of the `sh
 coefficient vector `overlaps` by blocking analysis.
 The keyword argument `max_replicas` can be used to constrain the number of replicas
 processed to be smaller than all available in `df`.
+The keyword argument `spectral_state` determines which spectral state 
 Other keyword arguments are passed on to [`ratio_of_means()`](@ref).
 Returns a [`RatioBlockingResult`](@ref).
 
@@ -50,9 +51,9 @@ function variational_energy_estimator(shifts, overlaps; kwargs...)
     return ratio_of_means(numerator, denominator; kwargs...)
 end
 
-function variational_energy_estimator(sim; max_replicas=:all, kwargs...)
+function variational_energy_estimator(sim; max_replicas=:all, spectral_state=1, kwargs...)
     df = DataFrame(sim)
-    num_replicas = length(filter(startswith("norm_"), names(df))) # number of replicas
+    num_replicas = parse(Int, metadata(df, "num_replicas"))
     if iszero(num_replicas)
         throw(ArgumentError(
             "No replicas found. Use keyword \
@@ -61,7 +62,7 @@ function variational_energy_estimator(sim; max_replicas=:all, kwargs...)
     end
     @assert num_replicas ≥ 2 "At least two replicas are needed, found $num_replicas"
 
-    num_overlaps = length(filter(startswith(r"c._dot"), names(df)))
+    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
     @assert num_overlaps == binomial(num_replicas, 2) "Unexpected number of overlaps."
 
     # process at most `max_replicas` but at least 2 replicas
@@ -69,12 +70,12 @@ function variational_energy_estimator(sim; max_replicas=:all, kwargs...)
         num_replicas = max(2, min(max_replicas, num_replicas))
     end
 
-    shiftnames = [Symbol("shift_$i") for i in 1:num_replicas]
+    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_replicas]
     shifts = map(name -> getproperty(df, name), shiftnames)
     @assert length(shifts) == num_replicas
 
     overlap_names = [
-        Symbol("c$(i)_dot_c$(j)") for i in 1:num_replicas for j in i+1:num_replicas
+        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_replicas for j in i+1:num_replicas
     ]
     overlaps = map(name -> getproperty(df, name), overlap_names)
     @assert length(overlaps) ≤ num_overlaps

From 9712ee9df92f3a47d8bf95ac93dc20c27f64e94e Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 14:03:15 +1300
Subject: [PATCH 12/24] Update tests and docs with new naming scheme

---
 docs/src/projectormontecarlo.md |  2 +-
 scripts/G2-example.jl           |  9 +++++----
 test/StatsTools.jl              |  2 +-
 test/excited_states_tests.jl    | 25 ++++++++++++++++---------
 test/lomc.jl                    | 26 +++++++++++++-------------
 test/mpi_runtests.jl            |  8 ++++----
 6 files changed, 40 insertions(+), 32 deletions(-)

diff --git a/docs/src/projectormontecarlo.md b/docs/src/projectormontecarlo.md
index 64ab9da7a..0b4116264 100644
--- a/docs/src/projectormontecarlo.md
+++ b/docs/src/projectormontecarlo.md
@@ -39,7 +39,7 @@ replica (`n_replicas = 1`) and single spectral state, the fields `:shift`, `:nor
 be present as well as others depending on the `style` argument and the `post_step_strategy`.
 
 If multiple replicas or spectral states are requested, then the relevant field names in the 
-`DataFrame` will have a suffix identifying the respective replica simulation, e.g. the `shift`s will be reported as `shift_1`, `shift_2`, ... 
+`DataFrame` will have a suffix identifying the respective replica simulation, e.g. the `shift`s will be reported as `shift_r1s1`, `shift_r2s1`, ... 
 
 Many tools for analysing the time series data obtained from a 
 [`ProjectorMonteCarloProblem`](@ref) are contained in the [Module `StatsTools`](@ref).
diff --git a/scripts/G2-example.jl b/scripts/G2-example.jl
index 5861d193e..2d99446e3 100644
--- a/scripts/G2-example.jl
+++ b/scripts/G2-example.jl
@@ -70,13 +70,14 @@ problem = ProjectorMonteCarloProblem(H;
 result = solve(problem)
 df = DataFrame(result);
 
-# The output `DataFrame` has FCIQMC statistics for each replica (e.g. shift, norm),
+# The output `DataFrame` has FCIQMC statistics for each replica (e.g. shift, norm)
+# and for each spectral state (here only the ground state `s1` is calculated),
 println(filter(startswith("shift_"), names(df)))
 
-# as well as vector-vector overlaps (e.g. `c1_dot_c2`),
+# as well as vector-vector overlaps (e.g. `r1s1_dot_r2s1`),
 println(filter(contains("dot"), names(df)))
 
-# and operator overlaps (e.g. `c1_Op1_c2`) between the replicas.
+# and operator overlaps (e.g. `r1s1_Op1_r2s1`) between the replicas.
 println(filter(contains("Op"), names(df)))
 
 # The vector-vector and operator overlaps go into calculating the Rayleigh quotient
@@ -107,7 +108,7 @@ end
 # energy.
 println("Shift energy from $n_replicas replicas:")
 for i in 1:n_replicas
-    se = shift_estimator(df; shift="shift_$i", skip=steps_equilibrate)
+    se = shift_estimator(df; shift="shift_r$(i)s1", skip=steps_equilibrate)
     println("   Replica $i: $(se.mean) ± $(se.err)")
 end
 
diff --git a/test/StatsTools.jl b/test/StatsTools.jl
index 9478b6821..0020ce30a 100644
--- a/test/StatsTools.jl
+++ b/test/StatsTools.jl
@@ -344,7 +344,7 @@ using Rimu.StatsTools: replica_fidelity
     ve = variational_energy_estimator(rr; skip=steps_equi, max_replicas=5)
     # the `max_replicas` option has no effect in this case
     @test kkresults[1][1] < pmedian(ve)
-    @test pmedian(ve) < shift_estimator(rr; shift = :shift_1, skip=steps_equi).mean
+    @test pmedian(ve) < shift_estimator(rr; shift = :shift_r1s1, skip=steps_equi).mean
     @test_throws ArgumentError variational_energy_estimator(DataFrame()) # empty df
     vs = [rand(5) for _ in 1:4]
     @test_throws ArgumentError variational_energy_estimator(vs,vs) # wrong length arrays
diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index 5d2922cc1..a62963ad3 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -13,9 +13,9 @@ using Test
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style)
     s = solve(p)
     df = DataFrame(s)
-    energy1 = shift_estimator(df, shift="shift_s1", skip=1000)
-    energy2 = shift_estimator(df, shift="shift_s2", skip=1000)
-    energy3 = shift_estimator(df, shift="shift_s3", skip=1000)
+    energy1 = shift_estimator(df, shift="shift_r1s1", skip=1000)
+    energy2 = shift_estimator(df, shift="shift_r1s2", skip=1000)
+    energy3 = shift_estimator(df, shift="shift_r1s3", skip=1000)
 
     @test energy1.mean ≈ vals[1]
     @test energy2.mean ≈ vals[2]
@@ -25,12 +25,12 @@ using Test
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, n_replicas)
     s = solve(p)
     df = DataFrame(s)
-    energy1 = shift_estimator(df, shift="shift_1_s1", skip=1000)
-    energy2 = shift_estimator(df, shift="shift_1_s2", skip=1000)
-    energy3 = shift_estimator(df, shift="shift_1_s3", skip=1000)
-    energy4 = shift_estimator(df, shift="shift_2_s1", skip=1000)
-    energy5 = shift_estimator(df, shift="shift_2_s2", skip=1000)
-    energy6 = shift_estimator(df, shift="shift_2_s3", skip=1000)
+    energy1 = shift_estimator(df, shift="shift_r1s1", skip=1000)
+    energy2 = shift_estimator(df, shift="shift_r1s2", skip=1000)
+    energy3 = shift_estimator(df, shift="shift_r1s3", skip=1000)
+    energy4 = shift_estimator(df, shift="shift_r2s1", skip=1000)
+    energy5 = shift_estimator(df, shift="shift_r2s2", skip=1000)
+    energy6 = shift_estimator(df, shift="shift_r2s3", skip=1000)
 
     @test energy1.mean ≈ vals[1]
     @test energy2.mean ≈ vals[2]
@@ -38,4 +38,11 @@ using Test
     @test energy4.mean ≈ vals[1]
     @test energy5.mean ≈ vals[2]
     @test energy6.mean ≈ vals[3]
+
+    replica_strategy = AllOverlaps(n_replicas;mixed_spectral_overlaps=true)
+    p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, replica_strategy)
+    s = solve(p)
+    df = DataFrame(s)
+    num_overlaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
+    @test num_overlaps == 15
 end
diff --git a/test/lomc.jl b/test/lomc.jl
index 9cfd74dc1..644933c99 100644
--- a/test/lomc.jl
+++ b/test/lomc.jl
@@ -105,20 +105,20 @@ Random.seed!(1234)
             @test state.replica_strategy == NoStats(1)
             @test length(state.spectral_states) == 1
             @test "shift" ∈ names(df)
-            @test "shift_1" ∉ names(df)
+            @test "shift_r1s1" ∉ names(df)
 
             df, state = lomc!(H, dv; replica_strategy=NoStats(3))
             @test state.replica_strategy == NoStats(3)
             @test length(state.spectral_states) == 3
-            @test df.shift_1 ≠ df.shift_2 && df.shift_2 ≠ df.shift_3
-            @test "shift_4" ∉ names(df)
+            @test df.shift_r1s1 ≠ df.shift_r2s1 && df.shift_r2s1 ≠ df.shift_r3s1
+            @test "shift_r4s1" ∉ names(df)
 
             @test isnothing(Rimu.check_transform(NoStats(), H))
         end
 
-        # column names are of the form c{i}_dot_c{j} and c{i}_Op{k}_c{j}.
+        # column names are of the form r{i}s{k}_dot_r{j}s{k} and r{i}s{k}_Op{m}_r{j}s{k}.
         function num_stats(df)
-            return length(filter(x -> match(r"^c[0-9]", x) ≠ nothing, names(df)))
+            return length(filter(x -> match(r"^r[0-9]", x) ≠ nothing, names(df)))
         end
         @testset "AllOverlaps" begin
             for dv in (
@@ -183,8 +183,8 @@ Random.seed!(1234)
             G = MatrixHamiltonian(rand(5, 5))
             O = MatrixHamiltonian(rand(ComplexF64, 5, 5))
             df, _ = lomc!(G, v, replica_strategy=AllOverlaps(2; operator=O))
-            @test df.c1_dot_c2 isa Vector{ComplexF64}
-            @test df.c1_Op1_c2 isa Vector{ComplexF64}
+            @test df.r1s1_dot_r2s1 isa Vector{ComplexF64}
+            @test df.r1s1_Op1_r2s1 isa Vector{ComplexF64}
         end
     end
 
@@ -280,12 +280,12 @@ Random.seed!(1234)
         @test df.len[end] > 10
 
         df, state = @suppress_err lomc!(H, copy(dv); maxlength=10, dτ=1e-4, replica_strategy=NoStats(6))
-        @test all(df.len_1[1:end-1] .≤ 10)
-        @test all(df.len_2[1:end-1] .≤ 10)
-        @test all(df.len_3[1:end-1] .≤ 10)
-        @test all(df.len_4[1:end-1] .≤ 10)
-        @test all(df.len_5[1:end-1] .≤ 10)
-        @test all(df.len_6[1:end-1] .≤ 10)
+        @test all(df.len_r1s1[1:end-1] .≤ 10)
+        @test all(df.len_r2s1[1:end-1] .≤ 10)
+        @test all(df.len_r3s1[1:end-1] .≤ 10)
+        @test all(df.len_r4s1[1:end-1] .≤ 10)
+        @test all(df.len_r5s1[1:end-1] .≤ 10)
+        @test all(df.len_r6s1[1:end-1] .≤ 10)
 
         state.max_length[] += 1000
         df_cont = lomc!(state).df
diff --git a/test/mpi_runtests.jl b/test/mpi_runtests.jl
index 82261916f..035d5a1ff 100644
--- a/test/mpi_runtests.jl
+++ b/test/mpi_runtests.jl
@@ -219,8 +219,8 @@ end
                 df = DataFrame(solve(prob))
 
                 density_sum = sum(1:M) do i
-                    top = df[!, Symbol("c1_Op", i, "_c2")]
-                    bot = df.c1_dot_c2
+                    top = df[!, Symbol("r1s1_Op", i, "_r2s1")]
+                    bot = df.r1s1_dot_r2s1
                     pmean(ratio_of_means(top, bot; skip=5000))
                 end
                 @test density_sum ≈ N rtol=1e-3
@@ -234,8 +234,8 @@ end
                 df = DataFrame(solve(prob))
 
                 g2s = map(1:M) do i
-                    top = df[!, Symbol("c1_Op", i, "_c2")]
-                    bot = df.c1_dot_c2
+                    top = df[!, Symbol("r1s1_Op", i, "_r2s1")]
+                    bot = df.r1s1_dot_r2s1
                     pmean(ratio_of_means(top, bot; skip=5000))
                 end
                 for i in 1:cld(M, 2)

From 9d8671b73bad7d25ded2314af9fd2d054925493b Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 15 Jan 2025 14:47:41 +1300
Subject: [PATCH 13/24] Fix ArgumentError in variational_energy_estimator

---
 src/StatsTools/variational_energy_estimator.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
index ec30f8800..90533dd40 100644
--- a/src/StatsTools/variational_energy_estimator.jl
+++ b/src/StatsTools/variational_energy_estimator.jl
@@ -54,7 +54,7 @@ end
 function variational_energy_estimator(sim; max_replicas=:all, spectral_state=1, kwargs...)
     df = DataFrame(sim)
     num_replicas = parse(Int, metadata(df, "num_replicas"))
-    if iszero(num_replicas)
+    if num_replicas == 1
         throw(ArgumentError(
             "No replicas found. Use keyword \
             `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"

From 938c5c148b3614482ae408ebd91a7d12afa4f0eb Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 20 Jan 2025 15:47:37 +1300
Subject: [PATCH 14/24] Fix typo

---
 src/StatsTools/reweighting.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index d099f641e..7da81ede6 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -14,7 +14,7 @@ function determine_constant_time_step(df)
         elseif hasproperty(df, "time_step")
             return df.time_step[end]
         elseif hasproperty(df, "time_step_r1s1")
-            return df.time_step_1[end]
+            return df.time_step_r1s1[end]
         elseif hasproperty(df, "dτ") # backwards compatibility
             return df.dτ[end]
         elseif hasproperty(df, "dτ_1")

From b432ecaf49d4878b40df0a8879f068f09f6a1d0e Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 20 Jan 2025 15:51:37 +1300
Subject: [PATCH 15/24] Add tests

---
 test/excited_states_tests.jl | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index a62963ad3..24b45c2b1 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -5,14 +5,16 @@ using Test
 @testset "excited state energies" begin
     ham = HubbardReal1D(BoseFS(1,1,1,1,1))
     pr = ExactDiagonalizationProblem(ham)
-    vals = solve(pr).values
+    result = solve(pr)
+    vals = result.values
+    vecs = result.vectors
+    g2s = [rayleigh_quotient(G2RealCorrelator(0),vecs[i]) for i in 1:3]
 
     spectral_strategy = GramSchmidt(3)
     last_step=2000
     style = IsDeterministic()
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style)
-    s = solve(p)
-    df = DataFrame(s)
+    df = DataFrame(solve(p))
     energy1 = shift_estimator(df, shift="shift_r1s1", skip=1000)
     energy2 = shift_estimator(df, shift="shift_r1s2", skip=1000)
     energy3 = shift_estimator(df, shift="shift_r1s3", skip=1000)
@@ -23,8 +25,7 @@ using Test
 
     n_replicas = 2
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, n_replicas)
-    s = solve(p)
-    df = DataFrame(s)
+    df = DataFrame(solve(p))
     energy1 = shift_estimator(df, shift="shift_r1s1", skip=1000)
     energy2 = shift_estimator(df, shift="shift_r1s2", skip=1000)
     energy3 = shift_estimator(df, shift="shift_r1s3", skip=1000)
@@ -39,10 +40,13 @@ using Test
     @test energy5.mean ≈ vals[2]
     @test energy6.mean ≈ vals[3]
 
-    replica_strategy = AllOverlaps(n_replicas;mixed_spectral_overlaps=true)
+    replica_strategy = AllOverlaps(n_replicas; operator=G2RealCorrelator(0), mixed_spectral_overlaps=true)
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, replica_strategy)
-    s = solve(p)
-    df = DataFrame(s)
+    df = DataFrame(solve(p))
+    for state in 1:3
+        r = rayleigh_replica_estimator(df; spectral_state=state)
+        @test r.f ≈ g2s[state] atol=0.001
+    end
     num_overlaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
     @test num_overlaps == 15
 end

From d65106d1a7cb2f94b03b476754af981d1dc23741 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 20 Jan 2025 15:57:51 +1300
Subject: [PATCH 16/24] Fix test

---
 test/excited_states_tests.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index 24b45c2b1..c54110fbc 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -45,7 +45,7 @@ using Test
     df = DataFrame(solve(p))
     for state in 1:3
         r = rayleigh_replica_estimator(df; spectral_state=state)
-        @test r.f ≈ g2s[state] atol=0.001
+        @test r.f ≈ g2s[state] atol=0.01
     end
     num_overlaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
     @test num_overlaps == 15

From 2d9bff2cc7ae66d8a685526ea171dc68b8009ed6 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Tue, 21 Jan 2025 11:41:11 +1300
Subject: [PATCH 17/24] Fix formula in G2 example

---
 scripts/G2-example.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/G2-example.jl b/scripts/G2-example.jl
index 2d99446e3..6bd933d53 100644
--- a/scripts/G2-example.jl
+++ b/scripts/G2-example.jl
@@ -83,7 +83,7 @@ println(filter(contains("Op"), names(df)))
 # The vector-vector and operator overlaps go into calculating the Rayleigh quotient
 # for an observable
 # ```math
-#     \langle \hat{G}^{(2)}(d) \rangle = \frac{\sum_{a<b} \mathbf{c}_a^\dagger \cdot \hat{G}^{(2)}(d) \cdot \mathbf{c}_b}{\sum_{a<b} \mathbf{c}_a^\dagger \cdot \mathbf{c}_b }
+#     \langle \hat{G}^{(2)}(d) \rangle = \frac{\sum_{a<b} \mathbf{c}_a^\dagger \hat{G}^{(2)}(d) \mathbf{c}_b}{\sum_{a<b} \mathbf{c}_a^\dagger \mathbf{c}_b }
 # ```
 # The sum over all replica pairs (a,b), especially in the denominator, helps to avoid
 # errors from poor sampling if the number of walkers is too low.

From c6bd84bc5d81e3880c2d4c01373860dccceb14ab Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Tue, 21 Jan 2025 14:45:26 +1300
Subject: [PATCH 18/24] Update documentation

---
 src/StatsTools/fidelity.jl                     | 9 +++++----
 src/StatsTools/variational_energy_estimator.jl | 4 ++--
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/StatsTools/fidelity.jl b/src/StatsTools/fidelity.jl
index 0712c9bac..a397d0f93 100644
--- a/src/StatsTools/fidelity.jl
+++ b/src/StatsTools/fidelity.jl
@@ -1,12 +1,13 @@
 """
-    replica_fidelity(df::DataFrame; p_field = :hproj, skip = 0)
+    replica_fidelity(df::DataFrame; p_field = :hproj, spectral_state = 1, skip = 0)
     replica_fidelity(sim::PMCSimulation; kwargs...)
 
 Compute the fidelity of the average coefficient vector and the projector defined in
 `p_field` from the [`PMCSimulation`](@ref Main.Rimu.PMCSimulation) or `DataFrame` returned
-by solve, using replicas `_r1s1` and `_r2s1`. Calls [`ratio_of_means`](@ref) to perform a
-blocking analysis on a ratio of the means of separate time series and returns a
-[`RatioBlockingResult`](@ref). The first `skip` steps in the time series are skipped.
+by solve, using replicas `_r1s{i}` and `_r2s{i}`, where `i` is the `spectral_state`. Calls
+[`ratio_of_means`](@ref) to perform a blocking analysis on a ratio of the means of separate
+time series and returns a [`RatioBlockingResult`](@ref). The first `skip` steps in the time
+series are skipped.
 
 The fidelity of states `|ψ⟩` and `|ϕ⟩` is defined as
 ```math
diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
index 90533dd40..2f4da9b27 100644
--- a/src/StatsTools/variational_energy_estimator.jl
+++ b/src/StatsTools/variational_energy_estimator.jl
@@ -1,6 +1,6 @@
 """
     variational_energy_estimator(shifts, overlaps; kwargs...)
-    variational_energy_estimator(df::DataFrame; max_replicas=:all, kwargs...)
+    variational_energy_estimator(df::DataFrame; max_replicas=:all, spectral_state=1, kwargs...)
     variational_energy_estimator(sim::PMCSimulation; kwargs...)
     -> r::RatioBlockingResult
 
@@ -8,7 +8,7 @@ Compute the variational energy estimator from the replica time series of the `sh
 coefficient vector `overlaps` by blocking analysis.
 The keyword argument `max_replicas` can be used to constrain the number of replicas
 processed to be smaller than all available in `df`.
-The keyword argument `spectral_state` determines which spectral state 
+The keyword argument `spectral_state` determines which spectral state's energy is computed.
 Other keyword arguments are passed on to [`ratio_of_means()`](@ref).
 Returns a [`RatioBlockingResult`](@ref).
 

From 5811336f84b511f85d6354e0b3a818904ec4b33c Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 22 Jan 2025 15:04:41 +1300
Subject: [PATCH 19/24] Update src/strategies_and_params/replicastrategy.jl

Co-authored-by: mtsch <matijacufar@gmail.com>
---
 src/strategies_and_params/replicastrategy.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index 15d61d79e..cea319721 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -107,7 +107,7 @@ where
 is the (right) eigenvector of ``\\hat{G}`` and ``| \\psi \\rangle`` is an eigenvector of
 ``\\hat{H}``.
 
-For an m-tuple of input operators ``(\\hat{A}_1, ..., \\hat{A}_m)``, overlaps of
+For an ``m``-tuple of input operators ``(\\hat{A}_1, ..., \\hat{A}_m)``, overlaps of
 ``\\langle \\phi | f^{-1} \\hat{A} f^{-1} | \\phi \\rangle`` are reported as
 `r{i}s{k}_Op{m}_r{j}s{k}`. The correct vector-vector overlap ``\\langle \\phi | f^{-2} | \\phi
 \\rangle`` is reported *last* as `r{i}s{k}_Op{m+1}_r{j}s{k}`. This is in addition to the *bare*

From 0fa0aa39877b15fe1efb764b6d90c07265f4f77f Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Fri, 14 Feb 2025 15:57:25 +1300
Subject: [PATCH 20/24] Add functions for retrieving the number of replicas,
 spectral states, and overlaps from DataFrames

---
 src/Interfaces/Interfaces.jl                  | 10 ++++
 src/Interfaces/dataframes.jl                  | 46 +++++++++++++++++++
 src/Rimu.jl                                   |  4 +-
 src/StatsTools/StatsTools.jl                  |  1 +
 src/StatsTools/reweighting.jl                 |  8 ++--
 .../variational_energy_estimator.jl           | 26 +++++++----
 src/pmc_simulation.jl                         |  1 +
 src/qmc_states.jl                             |  3 ++
 test/Interfaces.jl                            |  7 +++
 test/excited_states_tests.jl                  | 12 +++--
 10 files changed, 98 insertions(+), 20 deletions(-)
 create mode 100644 src/Interfaces/dataframes.jl

diff --git a/src/Interfaces/Interfaces.jl b/src/Interfaces/Interfaces.jl
index cea09d223..aa0f38510 100644
--- a/src/Interfaces/Interfaces.jl
+++ b/src/Interfaces/Interfaces.jl
@@ -35,11 +35,18 @@ Follow the links for the definitions of the interfaces!
 * [`apply_column!`](@ref)
 * [`apply_operator!`](@ref)
 * [`step_stats`](@ref)
+
+## Functions for retrieving information from DataFrames:
+
+* [`num_replicas`](@ref)
+* [`num_spectral_states`](@ref)
+* [`num_overlaps`](@ref)
 """
 module Interfaces
 
 using LinearAlgebra: LinearAlgebra, diag
 using VectorInterface: VectorInterface, add, add!, zerovector!, scalartype
+using DataFrames: DataFrame, metadata
 
 import OrderedCollections: freeze
 
@@ -54,9 +61,12 @@ export
     random_offdiagonal, starting_address, allows_address_type,
     LOStructure, IsDiagonal, IsHermitian, AdjointKnown, AdjointUnknown, has_adjoint,
     AbstractOperator, AbstractObservable
+export
+    num_replicas, num_spectral_states, num_overlaps
 
 include("stochasticstyles.jl")
 include("dictvectors.jl")
 include("hamiltonians.jl")
+include("dataframes.jl")
 
 end
diff --git a/src/Interfaces/dataframes.jl b/src/Interfaces/dataframes.jl
new file mode 100644
index 000000000..9d893c5df
--- /dev/null
+++ b/src/Interfaces/dataframes.jl
@@ -0,0 +1,46 @@
+"""
+    num_replicas(df::DataFrame)
+
+Return the number of replicas used in the simulation that produced `df`.
+"""
+function num_replicas(df::DataFrame)
+    if haskey(metadata(df), "num_replicas")
+        return parse(Int, metadata(df, "num_replicas"))
+    else
+        num = length(filter(startswith("norm"), names(df)))
+        num > 0 || throw(ArgumentError("No replicas found in DataFrame"))
+        return num
+    end
+end
+
+"""
+    num_spectral_states(df::DataFrame)
+
+Return the number of spectral states used in the simulation that produced `df`.
+"""
+function num_spectral_states(df::DataFrame)
+    if haskey(metadata(df), "num_spectral_states")
+        return parse(Int, metadata(df, "num_spectral_states"))
+    else
+        if length(filter(startswith("norm"), names(df))) == 0
+            throw(ArgumentError("No spectral states found in DataFrame"))
+        end
+        return 1
+    end
+end
+
+"""
+    num_overlaps(df::DataFrame)
+
+Return the number of overlaps between replicas.
+"""
+function num_overlaps(df::DataFrame)
+    if haskey(metadata(df), "num_overlaps")
+        return parse(Int, metadata(df, "num_overlaps"))
+    else
+        if length(filter(startswith("norm"), names(df))) == 0
+            throw(ArgumentError("No replicas found in DataFrame"))
+        end
+        return length(filter(startswith(r"c[0-9]+_dot"), names(df)))
+    end
+end
diff --git a/src/Rimu.jl b/src/Rimu.jl
index 7a190185c..ba9bed471 100644
--- a/src/Rimu.jl
+++ b/src/Rimu.jl
@@ -46,7 +46,7 @@ include("mpi_helpers.jl")
 
 include("Interfaces/Interfaces.jl")
 @reexport using .Interfaces
-using .Interfaces: dot_from_right
+import .Interfaces: dot_from_right, num_replicas, num_overlaps, num_spectral_states
 include("BitStringAddresses/BitStringAddresses.jl")
 @reexport using .BitStringAddresses
 include("Hamiltonians/Hamiltonians.jl")
@@ -78,7 +78,7 @@ export TimeStepStrategy, ConstantTimeStep, OvershootControl
 export localpart, walkernumber
 export smart_logger, default_logger
 export ProjectorMonteCarloProblem, SimulationPlan, state_vectors
-export FCIQMC, num_replicas, num_spectral_states, GramSchmidt
+export FCIQMC, num_replicas, num_spectral_states, num_overlaps, GramSchmidt
 
 function __init__()
     # Turn on smart logging once at runtime. Turn off with `default_logger()`.
diff --git a/src/StatsTools/StatsTools.jl b/src/StatsTools/StatsTools.jl
index 5152d8dc6..f46a086fa 100644
--- a/src/StatsTools/StatsTools.jl
+++ b/src/StatsTools/StatsTools.jl
@@ -34,6 +34,7 @@ using SpecialFunctions: SpecialFunctions, erf
 using Statistics: Statistics
 using StrFormat: StrFormat, @f_str
 using StrLiterals: StrLiterals
+import ..Interfaces: num_replicas, num_spectral_states, num_overlaps
 
 import ProgressLogging, Folds
 import MacroTools
diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
index c081ff7b8..e15eb5cb8 100644
--- a/src/StatsTools/reweighting.jl
+++ b/src/StatsTools/reweighting.jl
@@ -207,7 +207,7 @@ Compute the [`growth_estimator`](@ref) on a `DataFrame` `df` or
 depths.
 
 Returns a `NamedTuple` with the fields
-* `df_ge`: `DataFrame` with reweighting depth and `growth_estiamator` data. See example
+* `df_ge`: `DataFrame` with reweighting depth and `growth_estimator` data. See example
   below.
 * `correlation_estimate`: estimated correlation time from blocking analysis
 * `se, se_l, se_u`: [`shift_estimator`](@ref) and error
@@ -252,7 +252,6 @@ function growth_estimator_analysis(
     df = DataFrame(sim)
     shift_v = Vector(getproperty(df, Symbol(shift_name))) # casting to `Vector` to make SIMD loops efficient
     norm_v = Vector(getproperty(df, Symbol(norm_name)))
-    num_reps = length(filter(startswith("norm"), names(df)))
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     se = blocking_analysis(shift_v; skip)
     E_r = se.mean
@@ -423,7 +422,6 @@ function mixed_estimator_analysis(
     shift_v = Vector(getproperty(df, Symbol(shift_name))) # casting to `Vector` to make SIMD loops efficient
     hproj_v = Vector(getproperty(df, Symbol(hproj_name)))
     vproj_v = Vector(getproperty(df, Symbol(vproj_name)))
-    num_reps = length(filter(startswith("norm"), names(df)))
 
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     se = blocking_analysis(shift_v; skip)
@@ -568,7 +566,7 @@ function rayleigh_replica_estimator(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = parse(Int, metadata(df, "num_replicas"))
+    num_reps = num_replicas(df)
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
@@ -641,7 +639,7 @@ function rayleigh_replica_estimator_analysis(
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = parse(Int, metadata(df, "num_replicas"))
+    num_reps = num_replicas(df)
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     # estimate the correlation time by blocking the shift data
     T = eltype(df[!, Symbol(shift_name, "_r1s1")])
diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
index 2f4da9b27..48a100cf4 100644
--- a/src/StatsTools/variational_energy_estimator.jl
+++ b/src/StatsTools/variational_energy_estimator.jl
@@ -53,32 +53,38 @@ end
 
 function variational_energy_estimator(sim; max_replicas=:all, spectral_state=1, kwargs...)
     df = DataFrame(sim)
-    num_replicas = parse(Int, metadata(df, "num_replicas"))
-    if num_replicas == 1
+    num_reps = num_replicas(df)
+    if num_reps == 1
         throw(ArgumentError(
             "No replicas found. Use keyword \
             `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"
         ))
     end
-    @assert num_replicas ≥ 2 "At least two replicas are needed, found $num_replicas"
+    @assert num_reps ≥ 2 "At least two replicas are needed, found $num_replicas"
 
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
-    @assert num_overlaps == binomial(num_replicas, 2) "Unexpected number of overlaps."
+    num_olaps = num_overlaps(df)
+    if num_olaps == 0
+        throw(ArgumentError(
+            "No overlaps found. Use keyword \
+            `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"
+        ))
+    end
+    @assert num_olaps == binomial(num_reps, 2) "Unexpected number of overlaps."
 
     # process at most `max_replicas` but at least 2 replicas
     if max_replicas isa Integer
-        num_replicas = max(2, min(max_replicas, num_replicas))
+        num_reps = max(2, min(max_replicas, num_reps))
     end
 
-    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_replicas]
+    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_reps]
     shifts = map(name -> getproperty(df, name), shiftnames)
-    @assert length(shifts) == num_replicas
+    @assert length(shifts) == num_reps
 
     overlap_names = [
-        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_replicas for j in i+1:num_replicas
+        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_reps for j in i+1:num_reps
     ]
     overlaps = map(name -> getproperty(df, name), overlap_names)
-    @assert length(overlaps) ≤ num_overlaps
+    @assert length(overlaps) ≤ num_olaps
 
     return variational_energy_estimator(shifts, overlaps; kwargs...)
 end
diff --git a/src/pmc_simulation.jl b/src/pmc_simulation.jl
index ba89163a7..9eb0e7d37 100644
--- a/src/pmc_simulation.jl
+++ b/src/pmc_simulation.jl
@@ -187,6 +187,7 @@ end
 
 num_spectral_states(sm::PMCSimulation) = num_spectral_states(sm.state)
 num_replicas(sm::PMCSimulation) = num_replicas(sm.state)
+num_overlaps(sm::PMCSimulation) = num_overlaps(sm.state)
 
 function report_simulation_status_metadata!(report::Report, sm::PMCSimulation)
     @unpack modified, aborted, success, message, elapsed_time = sm
diff --git a/src/qmc_states.jl b/src/qmc_states.jl
index 9e6fe17bf..6e6714ebd 100644
--- a/src/qmc_states.jl
+++ b/src/qmc_states.jl
@@ -120,6 +120,8 @@ end
 
 num_replicas(::ReplicaState{N}) where {N} = N
 num_spectral_states(::ReplicaState{<:Any, S}) where {S} = S
+num_overlaps(::ReplicaState{<:Any,<:Any,<:Any,<:Any,<:NoStats}) = 0
+num_overlaps(::ReplicaState{N,<:Any,<:Any,<:Any,<:AllOverlaps{N,<:Any,<:Any,B}}) where {N,B} = B*N*(N-1)÷2
 
 Base.show(io::IO, r::ReplicaState) = show(io, MIME("text/plain"), r)
 function Base.show(io::IO, ::MIME"text/plain", st::ReplicaState)
@@ -187,6 +189,7 @@ function report_default_metadata!(report::Report, state::ReplicaState)
     report_metadata!(report, "laststep", state.simulation_plan.last_step)
     report_metadata!(report, "num_replicas", num_replicas(state))
     report_metadata!(report, "num_spectral_states", num_spectral_states(state))
+    report_metadata!(report, "num_overlaps", num_overlaps(state))
     report_metadata!(report, "hamiltonian", s_state.hamiltonian)
     report_metadata!(report, "reporting_strategy", state.reporting_strategy)
     report_metadata!(report, "shift_strategy", algorithm.shift_strategy)
diff --git a/test/Interfaces.jl b/test/Interfaces.jl
index 18fa74f50..faf57ff5d 100644
--- a/test/Interfaces.jl
+++ b/test/Interfaces.jl
@@ -1,6 +1,7 @@
 using LinearAlgebra
 using Rimu
 using Test
+using DataFrames
 
 @testset "Interface basics" begin
     @test eltype(StyleUnknown{String}()) == String
@@ -36,6 +37,12 @@ using Test
     @test_throws ArgumentError Interfaces.dot_from_right(1, 2, 3)
 end
 
+@testset "DataFrame interfaces" begin
+    @test_throws ArgumentError num_replicas(DataFrame())
+    @test_throws ArgumentError num_spectral_states(DataFrame())
+    @test_throws ArgumentError num_overlaps(DataFrame())
+end
+
 # using lomc! with a matrix was removed in Rimu.jl v0.12.0
 @testset "lomc! with matrix" begin
     ham = [1 1 2 3 2;
diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index c54110fbc..dca8c87f6 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -2,7 +2,7 @@ using Rimu
 using Test
 
 
-@testset "excited state energies" begin
+@testset "excited states" begin
     ham = HubbardReal1D(BoseFS(1,1,1,1,1))
     pr = ExactDiagonalizationProblem(ham)
     result = solve(pr)
@@ -22,6 +22,7 @@ using Test
     @test energy1.mean ≈ vals[1]
     @test energy2.mean ≈ vals[2]
     @test energy3.mean ≈ vals[3]
+    @test num_spectral_states(df) == 3
 
     n_replicas = 2
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, n_replicas)
@@ -40,13 +41,18 @@ using Test
     @test energy5.mean ≈ vals[2]
     @test energy6.mean ≈ vals[3]
 
+    @test num_overlaps(df) == 0
+    @test_throws ArgumentError variational_energy_estimator(df)
+
     replica_strategy = AllOverlaps(n_replicas; operator=G2RealCorrelator(0), mixed_spectral_overlaps=true)
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, replica_strategy)
     df = DataFrame(solve(p))
+    @test num_replicas(df) == 2
+    @test num_overlaps(df) == 1
     for state in 1:3
         r = rayleigh_replica_estimator(df; spectral_state=state)
         @test r.f ≈ g2s[state] atol=0.01
     end
-    num_overlaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
-    @test num_overlaps == 15
+    num_olaps = length(filter(startswith(r"r[0-9]+s[0-9]+_dot"), names(df)))
+    @test num_olaps == 15
 end

From 8859f037d808241f6181384cd851c267fe93079e Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Wed, 12 Mar 2025 12:22:41 +1300
Subject: [PATCH 21/24] Fix docs

---
 docs/make.jl                                  |  2 +-
 src/Interfaces/dataframes.jl                  | 15 ---------------
 src/qmc_states.jl                             |  6 ++++++
 src/strategies_and_params/replicastrategy.jl  |  2 +-
 src/strategies_and_params/spectralstrategy.jl |  2 +-
 5 files changed, 9 insertions(+), 18 deletions(-)

diff --git a/docs/make.jl b/docs/make.jl
index f06912a93..52de7d533 100644
--- a/docs/make.jl
+++ b/docs/make.jl
@@ -75,7 +75,7 @@ makedocs(;
     authors="Joachim Brand <j.brand@massey.ac.nz>",
     checkdocs=:exports,
     doctest=false, # Doctests are done while testing.
-    # warnonly = true, # should be diabled for a release
+    # warnonly = true, # should be disabled for a release
 )
 
 deploydocs(
diff --git a/src/Interfaces/dataframes.jl b/src/Interfaces/dataframes.jl
index 9d893c5df..b3d9b8084 100644
--- a/src/Interfaces/dataframes.jl
+++ b/src/Interfaces/dataframes.jl
@@ -1,8 +1,3 @@
-"""
-    num_replicas(df::DataFrame)
-
-Return the number of replicas used in the simulation that produced `df`.
-"""
 function num_replicas(df::DataFrame)
     if haskey(metadata(df), "num_replicas")
         return parse(Int, metadata(df, "num_replicas"))
@@ -13,11 +8,6 @@ function num_replicas(df::DataFrame)
     end
 end
 
-"""
-    num_spectral_states(df::DataFrame)
-
-Return the number of spectral states used in the simulation that produced `df`.
-"""
 function num_spectral_states(df::DataFrame)
     if haskey(metadata(df), "num_spectral_states")
         return parse(Int, metadata(df, "num_spectral_states"))
@@ -29,11 +19,6 @@ function num_spectral_states(df::DataFrame)
     end
 end
 
-"""
-    num_overlaps(df::DataFrame)
-
-Return the number of overlaps between replicas.
-"""
 function num_overlaps(df::DataFrame)
     if haskey(metadata(df), "num_overlaps")
         return parse(Int, metadata(df, "num_overlaps"))
diff --git a/src/qmc_states.jl b/src/qmc_states.jl
index 6e6714ebd..e40b89bdd 100644
--- a/src/qmc_states.jl
+++ b/src/qmc_states.jl
@@ -120,6 +120,12 @@ end
 
 num_replicas(::ReplicaState{N}) where {N} = N
 num_spectral_states(::ReplicaState{<:Any, S}) where {S} = S
+
+"""
+    num_overlaps(state_or_DataFrame)
+
+Return the number of overlaps between replicas.
+"""
 num_overlaps(::ReplicaState{<:Any,<:Any,<:Any,<:Any,<:NoStats}) = 0
 num_overlaps(::ReplicaState{N,<:Any,<:Any,<:Any,<:AllOverlaps{N,<:Any,<:Any,B}}) where {N,B} = B*N*(N-1)÷2
 
diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index cea319721..1568311fa 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -22,7 +22,7 @@ function:
 abstract type ReplicaStrategy{N} end
 
 """
-    num_replicas(state_or_strategy)
+    num_replicas(state_or_strategy_or_DataFrame)
 
 Return the number of replicas used in the simulation.
 """
diff --git a/src/strategies_and_params/spectralstrategy.jl b/src/strategies_and_params/spectralstrategy.jl
index 7fc8b2f00..84767744c 100644
--- a/src/strategies_and_params/spectralstrategy.jl
+++ b/src/strategies_and_params/spectralstrategy.jl
@@ -12,7 +12,7 @@ of spectral states used in the simulation.
 abstract type SpectralStrategy{S} end
 
 """
-    num_spectral_states(state_or_strategy)
+    num_spectral_states(state_or_strategy_or_DataFrame)
 
 Return the number of spectral states used in the simulation.
 """

From 58e6e6e1cfd3508c8babf4b3ae58cacca654f33f Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Fri, 14 Mar 2025 13:16:57 +1300
Subject: [PATCH 22/24] Clarify docs, add test

---
 src/qmc_states.jl                            | 3 ++-
 src/strategies_and_params/replicastrategy.jl | 3 ++-
 test/excited_states_tests.jl                 | 4 +++-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/qmc_states.jl b/src/qmc_states.jl
index e40b89bdd..ba3d1d408 100644
--- a/src/qmc_states.jl
+++ b/src/qmc_states.jl
@@ -124,7 +124,8 @@ num_spectral_states(::ReplicaState{<:Any, S}) where {S} = S
 """
     num_overlaps(state_or_DataFrame)
 
-Return the number of overlaps between replicas.
+Return the number of overlaps between replicas. Only counts overlaps between replicas of
+the same spectral state, even if `AllOverlaps` is used with `mixed_spectral_overlaps=true`.
 """
 num_overlaps(::ReplicaState{<:Any,<:Any,<:Any,<:Any,<:NoStats}) = 0
 num_overlaps(::ReplicaState{N,<:Any,<:Any,<:Any,<:AllOverlaps{N,<:Any,<:Any,B}}) where {N,B} = B*N*(N-1)÷2
diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index 1568311fa..966e2cc07 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -24,7 +24,8 @@ abstract type ReplicaStrategy{N} end
 """
     num_replicas(state_or_strategy_or_DataFrame)
 
-Return the number of replicas used in the simulation.
+Return the number of replicas used in the simulation. With multiple spectral states, only
+reports the number of replicas per spectral state.
 """
 num_replicas(::ReplicaStrategy{N}) where {N} = N
 
diff --git a/test/excited_states_tests.jl b/test/excited_states_tests.jl
index dca8c87f6..e9c3f94ac 100644
--- a/test/excited_states_tests.jl
+++ b/test/excited_states_tests.jl
@@ -46,8 +46,10 @@ using Test
 
     replica_strategy = AllOverlaps(n_replicas; operator=G2RealCorrelator(0), mixed_spectral_overlaps=true)
     p = ProjectorMonteCarloProblem(ham; spectral_strategy, last_step, style, replica_strategy)
-    df = DataFrame(solve(p))
+    sim = solve(p)
+    df = DataFrame(sim)
     @test num_replicas(df) == 2
+    @test num_overlaps(sim) == 1
     @test num_overlaps(df) == 1
     for state in 1:3
         r = rayleigh_replica_estimator(df; spectral_state=state)

From 191653371fed0dca2ec6dc609ec4bc81f69d846a Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 24 Mar 2025 12:44:28 +1300
Subject: [PATCH 23/24] Add method for num_overlaps of
 ProjectorMonteCarloProblem

---
 src/projector_monte_carlo_problem.jl  | 7 +++++++
 test/projector_monte_carlo_problem.jl | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/src/projector_monte_carlo_problem.jl b/src/projector_monte_carlo_problem.jl
index 317049b75..32d9b1e9a 100644
--- a/src/projector_monte_carlo_problem.jl
+++ b/src/projector_monte_carlo_problem.jl
@@ -287,3 +287,10 @@ end
 
 num_replicas(::ProjectorMonteCarloProblem{N}) where N = N
 num_spectral_states(::ProjectorMonteCarloProblem{<:Any,S}) where {S} = S
+function num_overlaps(p::ProjectorMonteCarloProblem{N}) where {N}
+    if p.replica_strategy isa AllOverlaps{N,<:Any,<:Any,true}
+        return N*(N-1)÷2
+    else
+        return 0
+    end
+end
diff --git a/test/projector_monte_carlo_problem.jl b/test/projector_monte_carlo_problem.jl
index 7ef4d9465..831582051 100644
--- a/test/projector_monte_carlo_problem.jl
+++ b/test/projector_monte_carlo_problem.jl
@@ -104,6 +104,7 @@ end
             "1-element Rimu.SpectralState"
         )
         @test startswith(sprint(show, state_vectors(sm)), "2×1 Rimu.StateVectors")
+        @test num_overlaps(sm) == num_overlaps(p) == num_overlaps(sm.state)
     end
 
     @testset "Default DVec" begin
@@ -187,6 +188,7 @@ using Rimu: num_replicas, num_spectral_states
     @test size(DataFrame(sm))[1] == sm.state.step[]
     @test num_replicas(sm) == num_replicas(p) == num_replicas(sm.state)
     @test num_spectral_states(sm) == num_spectral_states(p) == num_spectral_states(sm.state)
+    @test num_overlaps(sm) == num_overlaps(p) == num_overlaps(sm.state)
     @test size(state_vectors(sm)) == (num_replicas(sm), num_spectral_states(sm))
     @test size(sm.state) == (num_replicas(sm), num_spectral_states(sm))
     @test sm.state[1, 1] === sm.state.spectral_states[1][1]

From b421f29247569af15df86ab15d6df3b6b85b6747 Mon Sep 17 00:00:00 2001
From: jamie-tay <jamiet96@gmail.com>
Date: Mon, 24 Mar 2025 12:47:20 +1300
Subject: [PATCH 24/24] Apply suggestions from code review

Co-authored-by: Joachim Brand <joachim.brand@gmail.com>
---
 src/qmc_states.jl                             | 9 +++++++--
 src/strategies_and_params/replicastrategy.jl  | 2 ++
 src/strategies_and_params/spectralstrategy.jl | 2 ++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/qmc_states.jl b/src/qmc_states.jl
index ba3d1d408..41cccb606 100644
--- a/src/qmc_states.jl
+++ b/src/qmc_states.jl
@@ -124,8 +124,13 @@ num_spectral_states(::ReplicaState{<:Any, S}) where {S} = S
 """
     num_overlaps(state_or_DataFrame)
 
-Return the number of overlaps between replicas. Only counts overlaps between replicas of
-the same spectral state, even if `AllOverlaps` is used with `mixed_spectral_overlaps=true`.
+Return the number of overlaps between replicas that are being reported. Only counts 
+overlaps between replicas of the same spectral state, even if `AllOverlaps` is used with 
+`mixed_spectral_overlaps=true`.
+
+The return value is `0` if no overlaps are being reported, and `N*(N-1)÷2` otherwise, where `N` is the value returned by [`num_replicas`](@ref).
+
+See also [`ProjectorMonteCarloProblem`](@ref), [`AllOverlaps`](@ref), [`num_spectral_states`](@ref).
 """
 num_overlaps(::ReplicaState{<:Any,<:Any,<:Any,<:Any,<:NoStats}) = 0
 num_overlaps(::ReplicaState{N,<:Any,<:Any,<:Any,<:AllOverlaps{N,<:Any,<:Any,B}}) where {N,B} = B*N*(N-1)÷2
diff --git a/src/strategies_and_params/replicastrategy.jl b/src/strategies_and_params/replicastrategy.jl
index 966e2cc07..1bd791f1c 100644
--- a/src/strategies_and_params/replicastrategy.jl
+++ b/src/strategies_and_params/replicastrategy.jl
@@ -26,6 +26,8 @@ abstract type ReplicaStrategy{N} end
 
 Return the number of replicas used in the simulation. With multiple spectral states, only
 reports the number of replicas per spectral state.
+
+See also [`ProjectorMonteCarloProblem`](@ref), [`AllOverlaps`](@ref), [`num_spectral_states`](@ref).
 """
 num_replicas(::ReplicaStrategy{N}) where {N} = N
 
diff --git a/src/strategies_and_params/spectralstrategy.jl b/src/strategies_and_params/spectralstrategy.jl
index 84767744c..ed6a1c3cb 100644
--- a/src/strategies_and_params/spectralstrategy.jl
+++ b/src/strategies_and_params/spectralstrategy.jl
@@ -15,6 +15,8 @@ abstract type SpectralStrategy{S} end
     num_spectral_states(state_or_strategy_or_DataFrame)
 
 Return the number of spectral states used in the simulation.
+
+See also [`ProjectorMonteCarloProblem`](@ref), [`GramSchmidt`](@ref), [`num_replicas`](@ref).
 """
 num_spectral_states(::SpectralStrategy{S}) where {S} = S