diff --git a/Project.toml b/Project.toml index 5a010e9..a1bab56 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NaNStatistics" uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d" authors = ["C. Brenhin Keller"] -version = "0.6.36" +version = "0.6.37" [deps] PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" diff --git a/src/Histograms.jl b/src/Histograms.jl index 6d3e7e1..09ccdca 100644 --- a/src/Histograms.jl +++ b/src/Histograms.jl @@ -35,6 +35,83 @@ function histcounts(x, xedges::AbstractRange; T=Int64) end histcounts(x, xmin::Number, xmax::Number, nbins::Integer; T=Int64) = histcounts(x, range(xmin, xmax, length=nbins+1); T=T) +""" +```julia +histmean(counts, bincenters) +``` +Estimate the mean of the data represented by a histogram, +specified as `counts` in equally spaced bins centered at `bincenters`. + +## Examples +```julia +julia> binedges = -10:0.01:10; + +julia> counts = histcounts(randn(10000), binedges); + +julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2 +-9.995:0.01:9.995 + +julia> histmean(counts, bincenters) +0.0039890000000003135 +``` +""" +function histmean(counts, bincenters) + N = ∅ₙ = zero(eltype(counts)) + Σ = ∅ = zero(Base.promote_op(*, eltype(counts), eltype(bincenters))) + @inbounds @simd ivdep for i in eachindex(counts, bincenters) + pᵢ = counts[i] * bincenters[i] + Σ += ifelse(isnan(pᵢ), ∅, pᵢ) + N += ifelse(isnan(pᵢ), ∅ₙ, counts[i]) + end + return Σ / N +end +export histmean + +""" +```julia +histstd(counts, bincenters; corrected::Bool=true) +``` +Estimate the standard deviation of the data represented by a histogram, +specified as `counts` in equally spaced bins centered at `bincenters`. + +If `counts` have been normalized, or represent an analytical estimate of a PDF +rather than a histogram representing counts of a dataset, Bessel's correction +to the standard deviation should likely not be performed - i.e., set the +`corrected` keyword argument to `false`. + +## Examples +```julia +julia> binedges = -10:0.01:10; + +julia> counts = histcounts(randn(10000), binedges); + +julia> bincenters = (binedges[1:end-1] + binedges[2:end])/2 +-9.995:0.01:9.995 +t +julia> histstd(counts, bincenters) +0.9991854064196424 +``` +""" +function histstd(counts, bincenters; corrected::Bool=true) + N = ∅ₙ = zero(eltype(counts)) + Σ = ∅ = zero(Base.promote_op(*, eltype(counts), eltype(bincenters))) + @inbounds @simd ivdep for i in eachindex(counts, bincenters) + pᵢ = counts[i] * bincenters[i] + Σ += ifelse(isnan(pᵢ), ∅, pᵢ) + N += ifelse(isnan(pᵢ), ∅ₙ, counts[i]) + end + μ = Σ/N + Σ = ∅ + @inbounds @simd ivdep for i in eachindex(counts, bincenters) + dᵢ = bincenters[i] - μ + pᵢ = counts[i] * dᵢ * dᵢ + Σ += ifelse(isnan(pᵢ), ∅, pᵢ) + end + return Σ / max(N-corrected, ∅ₙ) +end +export histstd + + """ ```julia diff --git a/test/Project.toml b/test/Project.toml index 94bd924..6905ca2 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,4 +1,5 @@ [deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" diff --git a/test/runtests.jl b/test/runtests.jl index 54ed4eb..b976de6 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,5 +1,5 @@ using NaNStatistics -using Test, StatsBase, Statistics, LinearAlgebra +using Test, StatsBase, Statistics, LinearAlgebra, Distributions @testset "ArrayStats" begin include("testArrayStats.jl") end @testset "Covariance and Correlation" begin include("testCovCor.jl") end diff --git a/test/testHistograms.jl b/test/testHistograms.jl index c175b08..53ed0be 100644 --- a/test/testHistograms.jl +++ b/test/testHistograms.jl @@ -34,5 +34,17 @@ w = "size(N,2) < nxbins; any x bins beyond size(N,2) will not be filled" @test (@test_logs (:warn, w) histcounts!(zeros(10,5), x, y, xedges, yedges)) == I(10)[:,1:5] +## --- Statistics on histograms + + a = randn(10000) + binedges = -10:0.1:10 + bincenters = (binedges[1:end-1] + binedges[2:end])/2 + h = histcounts(a, binedges) + @test histmean(h, bincenters) ≈ nanmean(a) atol = 0.1 + @test histstd(h, bincenters) ≈ nanstd(a) atol = 0.1 + + n = pdf.(Normal(0,1), bincenters) + @test histmean(n, bincenters) ≈ 0 atol = 1e-6 + @test histstd(n, bincenters, corrected=false) ≈ 1 atol = 1e-6 ## --- End of File