diff --git a/Project.toml b/Project.toml index 5fb14d5..b4f47b9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "NaNStatistics" uuid = "b946abbf-3ea7-4610-9019-9858bfdeaf2d" authors = ["C. Brenhin Keller"] -version = "0.6.38" +version = "0.6.39" [deps] PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" diff --git a/src/ArrayStats/ArrayStats.jl b/src/ArrayStats/ArrayStats.jl index dc7d832..7ed9559 100644 --- a/src/ArrayStats/ArrayStats.jl +++ b/src/ArrayStats/ArrayStats.jl @@ -156,6 +156,17 @@ ## --- Summary statistics of arrays with NaNs + uinit(T::Type) = uinit(float(T)) + uinit(T::Type{<:Integer}) = typemax(T) + uinit(::Type{Float64}) = NaN + uinit(::Type{Float32}) = NaN32 + uinit(::Type{Float16}) = NaN16 + linit(T::Type) = linit(float(T)) + linit(T::Type{<:Integer}) = typemin(T) + linit(::Type{Float64}) = NaN + linit(::Type{Float32}) = NaN32 + linit(::Type{Float16}) = NaN16 + """ ```julia nanminimum(A; dims) @@ -171,8 +182,8 @@ __nanminimum(A, ::Colon, ::Colon) = _nanminimum(A, :) __nanminimum(A, region, ::Colon) = _nanminimum(A, region) __nanminimum(A, ::Colon, region) = reducedims(_nanminimum(A, region), region) - _nanminimum(A, region) = reduce(nanmin, A, dims=region, init=float(eltype(A))(NaN)) - _nanminimum(A, ::Colon) = reduce(nanmin, A, init=float(eltype(A))(NaN)) + _nanminimum(A, region) = reduce(nanmin, A, dims=region, init=uinit(eltype(A))) + _nanminimum(A, ::Colon) = reduce(nanmin, A, init=uinit(eltype(A))) export nanminimum @@ -191,8 +202,8 @@ __nanmaximum(A, ::Colon, ::Colon) = _nanmaximum(A, :) __nanmaximum(A, region, ::Colon) = _nanmaximum(A, region) __nanmaximum(A, ::Colon, region) = reducedims(_nanmaximum(A, region), region) - _nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=float(eltype(A))(NaN)) - _nanmaximum(A, ::Colon) = reduce(nanmax, A, init=float(eltype(A))(NaN)) + _nanmaximum(A, region) = reduce(nanmax, A, dims=region, init=linit(eltype(A))) + _nanmaximum(A, ::Colon) = reduce(nanmax, A, init=linit(eltype(A))) export nanmaximum """ diff --git a/src/ArrayStats/nanlogsumexp.jl b/src/ArrayStats/nanlogsumexp.jl new file mode 100644 index 0000000..022c694 --- /dev/null +++ b/src/ArrayStats/nanlogsumexp.jl @@ -0,0 +1,85 @@ + +""" +```julia +nanlogsumexp(A) +``` +Return the logarithm of the sum of `exp.(A)` -- i.e., `log(sum(exp.(A)))`, but ignoring `NaN`s and avoiding numerical over/underflow. +As `nancumsum`, but operating on logarithms; as `nanlogsumexp`, but returning a array of cumulative sums, rather than a single value. +## Examples +```julia +``` +""" +nanlogsumexp(A) = _nanlogsumexp(A, nanmaximum(A)) +export nanlogsumexp + +function _nanlogsumexp(A, c) + Σ = ∅ = zero(Base.promote_op(exp, eltype(A))) + @inbounds @simd ivdep for i ∈ eachindex(A) + Aᵢ = exp(A[i] - c) + Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ) + end + return log(Σ) + c +end + + +""" +```julia +nanlogcumsumexp(A) +``` +Return the logarithm of the cumulative sum of `exp.(A)` -- i.e., `log.(cumsum.(exp.(A)))`, but ignoring `NaN`s and avoiding numerical over/underflow. +As `nancumsum`, but operating on logarithms; as `nanlogsumexp`, but returning a array of cumulative sums, rather than a single value. +## Examples +```julia +``` +""" +nanlogcumsumexp(A; reverse=false) = _nanlogcumsumexp(A, static(reverse)) +export nanlogcumsumexp + +function _nanlogcumsumexp(A, ::False) + Tᵣ = Base.promote_op(exp, eltype(A)) + Σ = ∅ = zero(Tᵣ) + lΣ = fill!(similar(A, Tᵣ), ∅) + c = linit(eltype(A)) + i₀ = firstindex(A) + @inbounds for i ∈ eachindex(A) + isnan(c) && (c = A[i]) + if A[i] > c + c = A[i] + Σ = ∅ + @simd ivdep for j ∈ i₀:i + Aᵢ = exp(A[j] - c) + Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ) + end + lΣ[i] = log(Σ) + c + else + Aᵢ = exp(A[i] - c) + Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ) + lΣ[i] = log(Σ) + c + end + end + return lΣ +end +function _nanlogcumsumexp(A, ::True) + Tᵣ = Base.promote_op(exp, eltype(A)) + Σ = ∅ = zero(Tᵣ) + lΣ = fill!(similar(A, Tᵣ), ∅) + c = linit(eltype(A)) + iₗ = lastindex(A) + @inbounds for i ∈ reverse(eachindex(A)) + isnan(c) && (c = A[i]) + if A[i] > c + c = A[i] + Σ = ∅ + @simd ivdep for j ∈ i:iₗ + Aᵢ = exp(A[j] - c) + Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ) + end + lΣ[i] = log(Σ) + c + else + Aᵢ = exp(A[i] - c) + Σ += ifelse(isnan(Aᵢ), ∅, Aᵢ) + lΣ[i] = log(Σ) + c + end + end + return lΣ +end \ No newline at end of file diff --git a/src/NaNStatistics.jl b/src/NaNStatistics.jl index 4c15cf0..b1bff95 100644 --- a/src/NaNStatistics.jl +++ b/src/NaNStatistics.jl @@ -10,6 +10,7 @@ module NaNStatistics include("ArrayStats/nanmean.jl") include("ArrayStats/nansum.jl") include("ArrayStats/nancumsum.jl") + include("ArrayStats/nanlogsumexp.jl") include("ArrayStats/nanvar.jl") include("ArrayStats/nanstd.jl") include("ArrayStats/nansem.jl") diff --git a/test/testArrayStats.jl b/test/testArrayStats.jl index 4b543fe..cb64f69 100644 --- a/test/testArrayStats.jl +++ b/test/testArrayStats.jl @@ -30,6 +30,9 @@ A = [1:10.0..., NaN] @test nansum(A) === 55.0 @test nancumsum(A) == [1.0, 3.0, 6.0, 10.0, 15.0, 21.0, 28.0, 36.0, 45.0, 55.0, 55.0] + @test nanlogsumexp(A) ≈ 10.45862974442671 + @test nanlogcumsumexp(A) ≈ [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671, 10.45862974442671] + @test isequal(nanlogcumsumexp(A, reverse=true), [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0, NaN]) @test nanmean(A) === 5.5 @test nanmean(A, ones(11)) === 5.5 # weighted @test nanrange(A) === 9.0 @@ -49,17 +52,42 @@ @test nanmin(1.,2.) === 1.0 @test nanmax(1.,2.) === 2.0 +## --- Non-monotonic arrays + + A = [1:5.0..., NaN, 5:-1:1...] + @test nancumsum(A) ≈ [1.0, 3.0, 6.0, 10.0, 15.0, 15.0, 20.0, 24.0, 27.0, 29.0, 30.0] + @test nanlogcumsumexp(A) ≈ [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 5.451914395937593, 5.944418386887494, 6.078136371527456, 6.123152745316754, 6.139216411276987, 6.145061576497539] + @test nanlogcumsumexp(A, reverse=true) ≈ [6.145061576497539, 6.139216411276987, 6.123152745316754, 6.078136371527456, 5.944418386887494, 5.451914395937593, 5.451914395937593, 4.440189698561196, 3.4076059644443806, 2.313261687518223, 1.0] + + A = [1:10.0..., NaN, 10:-1:1...] + @test nansum(A) === 110.0 + @test nanlogsumexp(A) ≈ 11.151776924986656 + @test nanrange(A) === 9.0 + @test nanminimum(A) === 1.0 + @test nanmaximum(A) === 10.0 + @test nanextrema(A) === (1.0, 10.0) + @test nanvar(A) ≈ 8.68421052631579 + @test nanstd(A) ≈ 2.946898458772509 + @test nansem(A) ≈ 2.946898458772509/sqrt(20) + @test nanmad(A) ≈ 2.5 + @test nanaad(A) ≈ 2.5 + @test nanmedian(A) ≈ 5.5 + @test nanpctile(A,50) ≈ 5.5 + ## --- Arrays containing only NaNs should yield NaN (or 0 for sums) A = fill(NaN,10) @test nansum(A) == 0 @test nancumsum(A) == zeros(10) + @test isnan(nanlogsumexp(A)) + @test all(isnan, nanlogcumsumexp(A)) + @test all(isnan, nanlogcumsumexp(A, reverse=true)) @test isnan(nanmean(A)) @test isnan(nanmean(A, ones(10))) # weighted @test isnan(nanrange(A)) @test isnan(nanminimum(A)) @test isnan(nanmaximum(A)) - @test all(isnan.(nanextrema(A))) + @test all(isnan, nanextrema(A)) @test isnan(nanvar(A)) @test isnan(nanvar(A, mean=NaN)) @test isnan(nanstd(A)) @@ -78,6 +106,9 @@ A = Float64[] @test nansum(A) == 0 @test nancumsum(A) == Float64[] + @test isnan(nanlogsumexp(A)) + @test nanlogcumsumexp(A) == Float64[] + @test nanlogcumsumexp(A, reverse=true) == Float64[] @test isnan(nanmean(A)) @test isnan(nanmean(A, copy(A))) # weighted @test isnan(nanvar(A)) @@ -99,6 +130,9 @@ A = collect(1:10) @test nansum(A) === 55 @test nancumsum(A) == [1, 3, 6, 10, 15, 21, 28, 36, 45, 55] + @test nanlogsumexp(A) ≈ 10.45862974442671 + @test nanlogcumsumexp(A) ≈ [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671] + @test nanlogcumsumexp(A, reverse=true) ≈ [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0] @test nanmean(A) === 5.5 @test nanmean(A, ones(10)) === 5.5 # weighted @test nanrange(A) === 9 @@ -124,6 +158,9 @@ A = 1:10 @test nansum(A) === 55 @test nancumsum(A) == [1, 3, 6, 10, 15, 21, 28, 36, 45, 55] + @test nanlogsumexp(A) ≈ 10.45862974442671 + @test nanlogcumsumexp(A) ≈ [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671] + @test nanlogcumsumexp(A, reverse=true) ≈ [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0] @test nanmean(A) === 5.5 @test nanmean(A, ones(10)) === 5.5 # weighted @test nanrange(A) === 9 @@ -145,6 +182,9 @@ A = 1:10. @test nansum(A) === 55.0 @test nancumsum(A) == [1.0, 3.0, 6.0, 10.0, 15.0, 21.0, 28.0, 36.0, 45.0, 55.0] + @test nanlogsumexp(A) ≈ 10.45862974442671 + @test nanlogcumsumexp(A) ≈ [1.0, 2.313261687518223, 3.4076059644443806, 4.440189698561196, 5.451914395937593, 6.456193316018123, 7.457762847404243, 8.458339626479004, 9.458551727967379, 10.45862974442671] + @test nanlogcumsumexp(A, reverse=true) ≈ [10.45862974442671, 10.458551727967379, 10.458339626479004, 10.457762847404243, 10.456193316018123, 10.451914395937594, 10.440189698561195, 10.40760596444438, 10.313261687518223, 10.0] @test nanmean(A) === 5.5 @test nanmean(A, ones(10)) === 5.5 # weighted @test nanrange(A) === 9.0