From 5c5666ff0b431e2ceea937ed83626886f14a0ea1 Mon Sep 17 00:00:00 2001 From: Milan Date: Tue, 13 Apr 2021 16:44:03 +0100 Subject: [PATCH 1/2] with set_zero_insignificant --- Project.toml | 2 ++ src/BitInformation.jl | 1 + src/binomial.jl | 22 ++++++++++++++++++++++ src/bit_information.jl | 26 +++++++++++++++++++++----- src/mutual_information.jl | 13 ++++++++++++- test/runtests.jl | 20 ++++++++++++++++++++ 6 files changed, 78 insertions(+), 6 deletions(-) create mode 100644 src/binomial.jl diff --git a/Project.toml b/Project.toml index beb823f..f633d78 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,12 @@ authors = ["Milan and contributors"] version = "0.1.0" [deps] +Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] StatsBase = "0.32, 0.33" +Distributions = "0.24" julia = "1" [extras] diff --git a/src/BitInformation.jl b/src/BitInformation.jl index 7a84903..7185d9d 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -20,5 +20,6 @@ module BitInformation include("bit_information.jl") include("mutual_information.jl") include("bitpattern_entropy.jl") + include("binomial.jl") end diff --git a/src/binomial.jl b/src/binomial.jl new file mode 100644 index 0000000..ca22194 --- /dev/null +++ b/src/binomial.jl @@ -0,0 +1,22 @@ +using Distributions: quantile, Normal + +""" +``` +binom_confidence(n::int,c::Real) +``` +Returns the probability `p₁` of successes in the binomial distribution (p=1/2) of +`n` trials with confidence `c`. + +# Example +At c=0.95, i.e. 95% confidence, n=1000 tosses of +a coin will yield not more than +``` +julia> p₁ = binom_confidence(1000,0.95) +0.5309897516152281 +``` +about 53.1% heads (or tails).""" +function binom_confidence(n::Int,c::Real) + return 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) +end + + diff --git a/src/bit_information.jl b/src/bit_information.jl index b0b6837..e471cc1 100644 --- a/src/bit_information.jl +++ b/src/bit_information.jl @@ -114,7 +114,11 @@ function bitinformation(A::AbstractArray,dims::Symbol) end end -function bitinformation(A::AbstractArray;dims::Int=1) +function bitinformation(A::AbstractArray; + dims::Int=1, + set_zero_insignificant::Bool=true, + confidence::Real=0.99) + N = prod(size(A))-1 # elements in array n1 = bitcount(A)-bitcount([A[end]]) # occurences of bit = 1 @@ -128,10 +132,15 @@ function bitinformation(A::AbstractArray;dims::Int=1) end npair = bitpaircount(A) - return bitinformation(n1,npair,N) + return bitinformation(n1,npair,N;set_zero_insignificant,confidence) end -function bitinformation(n1::Array{Int,1},npair::Array{Int,2},N::Int) +function bitinformation(n1::Array{Int,1}, + npair::Array{Int,2}, + N::Int; + set_zero_insignificant::Bool=true, + confidence::Real=0.99) + nbits = length(n1) @assert size(npair) == (4,nbits) "npair array must be of size 4xnbits" @assert maximum(n1) <= N "N cannot be smaller than maximum(n1)" @@ -157,8 +166,15 @@ function bitinformation(n1::Array{Int,1},npair::Array{Int,2},N::Int) H0 = [entropy([p00,p01],2) for (p00,p01) in zip(pcond[1,:],pcond[2,:])] H1 = [entropy([p10,p11],2) for (p10,p11) in zip(pcond[3,:],pcond[4,:])] - # Information content, take abs as rounding errors can yield negative I - I = @. abs(H - q0*H0 - q1*H1) + # Information content + I = @. H - q0*H0 - q1*H1 + + # remove information that is insignificantly different from a random 50/50 + if set_zero_insignificant + p = binom_confidence(N,confidence) # get chance p for 1 (or 0) from binom distr + I₀ = 1 - entropy([p,1-p],2) # free entropy of random [bit] + I[I .<= I₀] .= 0 # set insignficant to zero + end return I end diff --git a/src/mutual_information.jl b/src/mutual_information.jl index 54f0036..98fabe7 100644 --- a/src/mutual_information.jl +++ b/src/mutual_information.jl @@ -26,7 +26,10 @@ end """Mutual bitwise information of the elements in input arrays A,B. A and B have to be of same size and eltype.""" function bitinformation(A::AbstractArray{T}, - B::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} + B::AbstractArray{T}; + set_zero_insignificant::Bool=true, + confidence::Real=0.99 + ) where {T<:Union{Integer,AbstractFloat}} @assert size(A) == size(B) nelements = length(A) @@ -42,6 +45,14 @@ function bitinformation(A::AbstractArray{T}, # P is the join probabilities mass function P = [C[i,:,:]/nelements for i in 1:nbits] M = [mutual_information(p) for p in P] + + # remove information that is insignificantly different from a random 50/50 + if set_zero_insignificant + p = binom_confidence(nelements,confidence) # get chance p for 1 (or 0) from binom distr + M₀ = 1 - entropy([p,1-p],2) # free entropy of random 50/50 at trial size + M[M .<= M₀] .= 0 # set zero insignificant + end + return M end diff --git a/test/runtests.jl b/test/runtests.jl index 8c0ecce..434ef4a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -236,4 +236,24 @@ end end end end +end + +@testset "Information of random set to zero" begin + + for T in (UInt32,UInt64,Float32,Float64) + for N in [100,1000,10_000] + A = rand(T,N) + # increase confidence here from the default 0.99 to avoid test failures + # from false positives + b = bitinformation(A,confidence=0.999) + for ib in b + @test 0 == ib + end + + m = bitinformation(A[1:end-1],A[2:end],confidence=0.999) + for im in m + @test 0 == im + end + end + end end \ No newline at end of file From c2855610a663f108f738a82bc0b6860d65cb97a9 Mon Sep 17 00:00:00 2001 From: Milan Date: Tue, 13 Apr 2021 16:57:46 +0100 Subject: [PATCH 2/2] with random seed --- Project.toml | 3 ++- test/runtests.jl | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index f633d78..4381489 100644 --- a/Project.toml +++ b/Project.toml @@ -5,11 +5,12 @@ version = "0.1.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -StatsBase = "0.32, 0.33" Distributions = "0.24" +StatsBase = "0.32, 0.33" julia = "1" [extras] diff --git a/test/runtests.jl b/test/runtests.jl index 434ef4a..8fe9599 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using BitInformation using Test import StatsBase.entropy +import Random @testset "Bitpattern entropy" begin for N in [100,1000,10000,100000] @@ -240,6 +241,8 @@ end @testset "Information of random set to zero" begin + Random.seed!(123) + for T in (UInt32,UInt64,Float32,Float64) for N in [100,1000,10_000] A = rand(T,N)