RimuQMC · jamie-tay · Dec 20, 2024 · Jan 5, 2025 · Jan 5, 2025 · Jan 5, 2025
diff --git a/docs/src/projectormontecarlo.md b/docs/src/projectormontecarlo.md
@@ -39,7 +39,7 @@ replica (`n_replicas = 1`) and single spectral state, the fields `:shift`, `:nor
 be present as well as others depending on the `style` argument and the `post_step_strategy`.
 
 If multiple replicas or spectral states are requested, then the relevant field names in the 
-`DataFrame` will have a suffix identifying the respective replica simulation, e.g. the `shift`s will be reported as `shift_1`, `shift_2`, ... 
+`DataFrame` will have a suffix identifying the respective replica simulation, e.g. the `shift`s will be reported as `shift_r1s1`, `shift_r2s1`, ... 
 
 Many tools for analysing the time series data obtained from a 
 [`ProjectorMonteCarloProblem`](@ref) are contained in the [Module `StatsTools`](@ref).
diff --git a/scripts/G2-example.jl b/scripts/G2-example.jl
@@ -70,19 +70,20 @@ problem = ProjectorMonteCarloProblem(H;
 result = solve(problem)
 df = DataFrame(result);
 
-# The output `DataFrame` has FCIQMC statistics for each replica (e.g. shift, norm),
+# The output `DataFrame` has FCIQMC statistics for each replica (e.g. shift, norm)
+# and for each spectral state (here only the ground state `s1` is calculated),
 println(filter(startswith("shift_"), names(df)))
 
-# as well as vector-vector overlaps (e.g. `c1_dot_c2`),
+# as well as vector-vector overlaps (e.g. `r1s1_dot_r2s1`),
 println(filter(contains("dot"), names(df)))
 
-# and operator overlaps (e.g. `c1_Op1_c2`) between the replicas.
+# and operator overlaps (e.g. `r1s1_Op1_r2s1`) between the replicas.
 println(filter(contains("Op"), names(df)))
 
 # The vector-vector and operator overlaps go into calculating the Rayleigh quotient
 # for an observable
 # ```math
-#     \langle \hat{G}^{(2)}(d) \rangle = \frac{\sum_{a<b} \mathbf{c}_a^\dagger \cdot \hat{G}^{(2)}(d) \cdot \mathbf{c}_b}{\sum_{a<b} \mathbf{c}_a^\dagger \cdot \mathbf{c}_b }
+#     \langle \hat{G}^{(2)}(d) \rangle = \frac{\sum_{a<b} \mathbf{c}_a^\dagger \hat{G}^{(2)}(d) \mathbf{c}_b}{\sum_{a<b} \mathbf{c}_a^\dagger \mathbf{c}_b }
 # ```
 # The sum over all replica pairs (a,b), especially in the denominator, helps to avoid
 # errors from poor sampling if the number of walkers is too low.
@@ -107,7 +108,7 @@ end
 # energy.
 println("Shift energy from $n_replicas replicas:")
 for i in 1:n_replicas
-    se = shift_estimator(df; shift="shift_$i", skip=steps_equilibrate)
+    se = shift_estimator(df; shift="shift_r$(i)s1", skip=steps_equilibrate)
     println("   Replica $i: $(se.mean) ± $(se.err)")
 end
 

diff --git a/src/StatsTools/fidelity.jl b/src/StatsTools/fidelity.jl
@@ -1,12 +1,13 @@
 """
-    replica_fidelity(df::DataFrame; p_field = :hproj, skip = 0)
+    replica_fidelity(df::DataFrame; p_field = :hproj, spectral_state = 1, skip = 0)
     replica_fidelity(sim::PMCSimulation; kwargs...)
 
 Compute the fidelity of the average coefficient vector and the projector defined in
 `p_field` from the [`PMCSimulation`](@ref Main.Rimu.PMCSimulation) or `DataFrame` returned
-by solve, using replicas `_1` and `_2`. Calls [`ratio_of_means`](@ref) to perform a
-blocking analysis on a ratio of the means of separate time series and returns a
-[`RatioBlockingResult`](@ref). The first `skip` steps in the time series are skipped.
+by solve, using replicas `_r1s{i}` and `_r2s{i}`, where `i` is the `spectral_state`. Calls
+[`ratio_of_means`](@ref) to perform a blocking analysis on a ratio of the means of separate
+time series and returns a [`RatioBlockingResult`](@ref). The first `skip` steps in the time
+series are skipped.
 
 The fidelity of states `|ψ⟩` and `|ϕ⟩` is defined as
 ```math
@@ -22,14 +23,15 @@ where `v` is the projector specified by `p_field`, which is assumed to be normal
 unity with the two-norm (i.e. `v⋅v == 1`), and ``\\mathbf{c}_1`` and ``\\mathbf{c}_2``
 are two replica coefficient vectors.
 """
-function replica_fidelity(sim; p_field = :hproj, skip = 0,  args...)
+function replica_fidelity(sim; p_field = :hproj, skip = 0,  spectral_state = 1, args...)
     df = DataFrame(sim)
-    p_field_1 = Symbol(p_field, :_1)
-    p_field_2 = Symbol(p_field, :_2)
+    p_field_1 = Symbol(p_field, :_r1s, spectral_state)
+    p_field_2 = Symbol(p_field, :_r2s, spectral_state)
     fid_num = conj(getproperty(df, p_field_1)) .* getproperty(df, p_field_2)
     fid_num = fid_num[skip+1:end]
     # denominator
-    fid_den = df.c1_dot_c2[skip+1:end]
+    #fid_den = df.r1s1_dot_r2s1[skip+1:end]
+    fid_den = Vector(df[!, Symbol("r1s", spectral_state, "_dot_r2s", spectral_state)])[skip+1:end]
 
     return ratio_of_means(fid_num, fid_den; args...)
 end
diff --git a/src/StatsTools/reweighting.jl b/src/StatsTools/reweighting.jl
@@ -13,12 +13,14 @@ function determine_constant_time_step(df)
             return parse(Float64, metadata(df)["time_step"])
         elseif hasproperty(df, "time_step")
             return df.time_step[end]
-        elseif hasproperty(df, "time_step_1")
-            return df.time_step_1[end]
+        elseif hasproperty(df, "time_step_r1s1")
+            return df.time_step_r1s1[end]
         elseif hasproperty(df, "dτ") # backwards compatibility
             return df.dτ[end]
         elseif hasproperty(df, "dτ_1")
             return df.dτ_1[end]
+        elseif hasproperty(df, "time_step_1")
+            return df.time_step_1[end]
         else
             throw(ArgumentError("key `\"time_step\"` not found in `df` metadata"))
         end
@@ -477,6 +479,7 @@ end
         shift_name="shift",
         op_name="Op1",
         vec_name="dot",
+        spectral_state=1,
         h=0,
         skip=0,
         Anorm=1,
@@ -507,7 +510,8 @@ The second method computes the Rayleigh quotient directly from a `DataFrame` or
 columns, see [`AllOverlaps`](@ref Main.AllOverlaps) for default formatting. The operator
 overlap data can be scaled by a prefactor `Anorm`. A specific reweighting depth can be set
 with keyword argument `h`. The default is `h = 0` which calculates the Rayleigh quotient
-without reweighting.
+without reweighting. To compute the Rayleigh quotient for the `n`th spectral state, set
+`spectral_state = n`.
 
 The reweighting is an extension of the mixed estimator using the reweighting technique
 described in [Umrigar *et al.* (1993)](http://dx.doi.org/10.1063/1.465195).
@@ -556,27 +560,28 @@ function rayleigh_replica_estimator(
     shift_name="shift",
     op_name="Op1",
     vec_name="dot",
+    spectral_state=1,
     h=0,
     skip=0,
     Anorm=1,
     time_step=nothing,
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = length(filter(startswith("norm"), names(df)))
+    num_reps = parse(Int, metadata(df, "num_replicas"))
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
-    T = eltype(df[!, Symbol(shift_name, "_1")])
+    T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
     for a in 1:num_reps
-        push!(shift_v, Vector(df[!, Symbol(shift_name, "_", a)]))
+        push!(shift_v, Vector(df[!, Symbol(shift_name, "_r", a, "s", spectral_state)]))
     end
-    T = eltype(df[!, Symbol("c1_", vec_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", vec_name, "_r2s1")])
     vec_ol_v = Vector{T}[]
-    T = eltype(df[!, Symbol("c1_", op_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", op_name, "_r2s1")])
     op_ol_v = Vector{T}[]
     for a in 1:num_reps, b in a+1:num_reps
-        push!(op_ol_v, Vector(df[!, Symbol("c", a, "_", op_name, "_c" ,b)] .* Anorm))
-        push!(vec_ol_v, Vector(df[!, Symbol("c", a, "_", vec_name, "_c" ,b)]))
+        push!(op_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", op_name, "_r" ,b, "s", spectral_state)] .* Anorm))
+        push!(vec_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", vec_name, "_r" ,b, "s", spectral_state)]))
     end
 
     return rayleigh_replica_estimator(op_ol_v, vec_ol_v, shift_v, h, time_step; skip, kwargs...)
@@ -601,9 +606,10 @@ Returns a `NamedTuple` with the fields
 * `h_values = 100`: minimum number of reweighting depths
 * `skip = 0`: initial time steps to exclude from averaging
 * `threading = Threads.nthreads() > 1`: if `false` a progress meter is displayed
-* `shift_name = "shift"`: shift data corresponding to column in `df` with names `<shift>_1`, ...
-* `op_name = "Op1"`: name of operator overlap corresponding to column in `df` with names `c1_<op_ol>_c2`, ...
-* `vec_name = "dot"`: name of vector-vector overlap corresponding to column in `df` with names `c1_<vec_ol>_c2`, ...
+* `shift_name = "shift"`: shift data corresponding to column in `df` with names `<shift>_r1s1`, ...
+* `op_name = "Op1"`: name of operator overlap corresponding to column in `df` with names `r1s1_<op_ol>_r2s1`, ...
+* `vec_name = "dot"`: name of vector-vector overlap corresponding to column in `df` with names `r1s1_<vec_ol>_r2s1`, ...
+* `spectral_state = 1`: which spectral state to use
 * `Anorm = 1`: a scalar prefactor to scale the operator overlap data
 * `warn = true`: whether to log warning messages when blocking fails or denominators are small
 
@@ -628,22 +634,23 @@ function rayleigh_replica_estimator_analysis(
     shift_name="shift",
     op_name="Op1",
     vec_name="dot",
+    spectral_state=1,
     Anorm=1,
     warn=true,
     time_step=nothing,
     kwargs...
 )
     df = DataFrame(sim)
-    num_reps = length(filter(startswith("norm"), names(df)))
+    num_reps = parse(Int, metadata(df, "num_replicas"))
     time_step = isnothing(time_step) ? determine_constant_time_step(df) : time_step
     # estimate the correlation time by blocking the shift data
-    T = eltype(df[!, Symbol(shift_name, "_1")])
+    T = eltype(df[!, Symbol(shift_name, "_r1s1")])
     shift_v = Vector{T}[]
     E_r = T[]
     correlation_estimate = Int[]
     df_se = DataFrame()
     for a in 1:num_reps
-        push!(shift_v, Vector(df[!, Symbol(shift_name, "_", a)]))
+        push!(shift_v, Vector(df[!, Symbol(shift_name, "_r", a, "s", spectral_state)]))
         se = blocking_analysis(shift_v[a]; skip)
         push!(E_r, se.mean)
         push!(correlation_estimate, 2^(se.k - 1))
@@ -652,13 +659,13 @@ function rayleigh_replica_estimator_analysis(
     if isnothing(h_range)
         h_range = determine_h_range(df, skip, minimum(correlation_estimate), h_values)
     end
-    T = eltype(df[!, Symbol("c1_", vec_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", vec_name, "_r2s1")])
     vec_ol_v = Vector{T}[]
-    T = eltype(df[!, Symbol("c1_", op_name, "_c2")])
+    T = eltype(df[!, Symbol("r1s1_", op_name, "_r2s1")])
     op_ol_v = Vector{T}[]
     for a in 1:num_reps, b in a+1:num_reps
-        push!(op_ol_v, Vector(df[!, Symbol("c", a, "_", op_name, "_c" ,b)] .* Anorm))
-        push!(vec_ol_v, Vector(df[!, Symbol("c", a, "_", vec_name, "_c" ,b)]))
+        push!(op_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", op_name, "_r" ,b, "s", spectral_state)] .* Anorm))
+        push!(vec_ol_v, Vector(df[!, Symbol("r", a, "s", spectral_state, "_", vec_name, "_r" ,b, "s", spectral_state)]))
     end
 
     df_rre = if threading

diff --git a/src/StatsTools/variational_energy_estimator.jl b/src/StatsTools/variational_energy_estimator.jl
@@ -1,13 +1,14 @@
 """
     variational_energy_estimator(shifts, overlaps; kwargs...)
-    variational_energy_estimator(df::DataFrame; max_replicas=:all, kwargs...)
+    variational_energy_estimator(df::DataFrame; max_replicas=:all, spectral_state=1, kwargs...)
     variational_energy_estimator(sim::PMCSimulation; kwargs...)
     -> r::RatioBlockingResult
 
 Compute the variational energy estimator from the replica time series of the `shifts` and
 coefficient vector `overlaps` by blocking analysis.
 The keyword argument `max_replicas` can be used to constrain the number of replicas
 processed to be smaller than all available in `df`.
+The keyword argument `spectral_state` determines which spectral state's energy is computed.
 Other keyword arguments are passed on to [`ratio_of_means()`](@ref).
 Returns a [`RatioBlockingResult`](@ref).
 
@@ -50,31 +51,31 @@ function variational_energy_estimator(shifts, overlaps; kwargs...)
     return ratio_of_means(numerator, denominator; kwargs...)
 end
 
-function variational_energy_estimator(sim; max_replicas=:all, kwargs...)
+function variational_energy_estimator(sim; max_replicas=:all, spectral_state=1, kwargs...)
     df = DataFrame(sim)
-    num_replicas = length(filter(startswith("norm_"), names(df))) # number of replicas
-    if iszero(num_replicas)
+    num_replicas = parse(Int, metadata(df, "num_replicas"))
+    if num_replicas == 1
         throw(ArgumentError(
             "No replicas found. Use keyword \
             `replica_strategy=AllOverlaps(n)` with n≥2 in `ProjectorMonteCarloProblem` to set up replicas!"
         ))
     end
     @assert num_replicas ≥ 2 "At least two replicas are needed, found $num_replicas"
 
-    num_overlaps = length(filter(startswith(r"c._dot"), names(df)))
+    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
+    num_overlaps = length(filter(startswith("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)"), names(df)))
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
+    num_overlaps = length(filter(startswith(r"r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)"), names(df)))
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
+    num_overlaps = length(filter(startswith("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)"), names(df)))
-    num_overlaps = length(filter(startswith(Regex("r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)")), names(df)))
+    num_overlaps = length(filter(startswith(r"r[0-9]+s$(spectral_state)_dot_r[0-9]+s$(spectral_state)"), names(df)))
     @assert num_overlaps == binomial(num_replicas, 2) "Unexpected number of overlaps."
 
     # process at most `max_replicas` but at least 2 replicas
     if max_replicas isa Integer
         num_replicas = max(2, min(max_replicas, num_replicas))
     end
 
-    shiftnames = [Symbol("shift_$i") for i in 1:num_replicas]
+    shiftnames = [Symbol("shift_r$(i)s$(spectral_state)") for i in 1:num_replicas]
     shifts = map(name -> getproperty(df, name), shiftnames)
     @assert length(shifts) == num_replicas
 
     overlap_names = [
-        Symbol("c$(i)_dot_c$(j)") for i in 1:num_replicas for j in i+1:num_replicas
+        Symbol("r$(i)s$(spectral_state)_dot_r$(j)s$(spectral_state)") for i in 1:num_replicas for j in i+1:num_replicas
     ]
     overlaps = map(name -> getproperty(df, name), overlap_names)
     @assert length(overlaps) ≤ num_overlaps

diff --git a/src/pmc_simulation.jl b/src/pmc_simulation.jl
@@ -124,19 +124,19 @@ function PMCSimulation(problem::ProjectorMonteCarloProblem; copy_vectors=true)
     # set up the spectral_states
     wm = working_memory(vectors[1, 1])
     spectral_states = ntuple(n_replicas) do i
-        replica_id = if n_replicas == 1
+        replica_id = if n_replicas == 1 && n_spectral == 1
             ""
         else
-            "_$(i)"
+            "_r$(i)"
         end
         SpectralState(
             ntuple(n_spectral) do j
                 v = vectors[i, j]
                 sp = shift_parameters[i, j]
-                spectral_id = if n_spectral == 1
+                spectral_id = if n_replicas == 1 && n_spectral == 1
                     ""
                 else
-                    "_s$(j)" # j is the spectral state index, i is the replica index
+                    "s$(j)" # j is the spectral state index, i is the replica index
                 end
                 SingleState(
                     hamiltonian, algorithm, v, zerovector(v),

diff --git a/src/projector_monte_carlo_problem.jl b/src/projector_monte_carlo_problem.jl
@@ -206,10 +206,6 @@ function ProjectorMonteCarloProblem(
 
     n_spectral = num_spectral_states(spectral_strategy) # spectral_strategy may override n_spectral
 
-    if replica_strategy isa AllOverlaps && n_spectral > 1
-        throw(ArgumentError("AllOverlaps is not implemented for more than one spectral state."))
-    end
-
     if random_seed == true
         random_seed = rand(RandomDevice(),UInt64)
     elseif random_seed == false

diff --git a/src/strategies_and_params/poststepstrategy.jl b/src/strategies_and_params/poststepstrategy.jl
@@ -33,7 +33,7 @@ See also [`PostStepStrategy`](@ref), [`ReportingStrategy`](@ref).
 """
 post_step_action
 
-# When startegies are a Tuple, apply all of them.
+# When strategies are a Tuple, apply all of them.
 function post_step_action(::Tuple{}, _, _)
     return ()
 end
@@ -275,7 +275,6 @@ function single_particle_density(dvec; component=0)
     result = sum(pairs(dvec); init=MultiScalar(ntuple(_ -> zero(V), Val(M)))) do (k, v)
         MultiScalar(abs2(v) .* single_particle_density(k; component))
     end
-
     return result.tuple ./ sum(abs2, dvec)
 end