From 7a6ec691bc31a2cb2dc3ff4f64cd3dff332ae85f Mon Sep 17 00:00:00 2001 From: Milan Date: Mon, 28 Feb 2022 19:41:34 +0000 Subject: [PATCH 01/20] bitstring :split redundant code removed + tests --- src/BitInformation.jl | 2 +- src/bitstring.jl | 29 ++++------------------------- test/bitstring.jl | 30 ++++++++++++++++++++++++++++++ test/runtests.jl | 1 + 4 files changed, 36 insertions(+), 26 deletions(-) create mode 100644 test/bitstring.jl diff --git a/src/BitInformation.jl b/src/BitInformation.jl index d5c6040..aec474a 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -11,7 +11,7 @@ module BitInformation bitcount, bitcount_entropy, bitpaircount, bit_condprobability, bit_condentropy - import StatsBase.entropy + import StatsBase: entropy include("which_uint.jl") include("bitstring.jl") diff --git a/src/bitstring.jl b/src/bitstring.jl index 7a0e994..3e4ef80 100644 --- a/src/bitstring.jl +++ b/src/bitstring.jl @@ -1,32 +1,11 @@ """Bitstring function with a split-mode that splits the bitstring into sign, exponent and significant bits.""" -function Base.bitstring(x::Float16,mode::Symbol) +function Base.bitstring(x::T,mode::Symbol) where {T<:AbstractFloat} if mode == :split bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:6]) $(bstr[7:end])" + n = Base.exponent_bits(T) + return "$(bstr[1]) $(bstr[2:n+1]) $(bstr[n+2:end])" else bitstring(x) end -end - -"""Bitstring function with a split-mode that splits the bitstring into -sign, exponent and significant bits.""" -function Base.bitstring(x::Float32,mode::Symbol) - if mode == :split - bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:9]) $(bstr[10:end])" - else - bitstring(x) - end -end - -"""Bitstring function with a split-mode that splits the bitstring into -sign, exponent and significant bits.""" -function Base.bitstring(x::Float64,mode::Symbol) - if mode == :split - bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:12]) $(bstr[13:end])" - else - bitstring(x) - end -end +end \ No newline at end of file diff --git a/test/bitstring.jl b/test/bitstring.jl new file mode 100644 index 0000000..6de4426 --- /dev/null +++ b/test/bitstring.jl @@ -0,0 +1,30 @@ +@testset "Bitstring with split" begin + for T in (Float16,Float32,Float64) + for _ in 1:30 + r = randn() + bs_split = bitstring(r,:split) + bs = bitstring(r) + @test bs == prod(split(bs_split," ")) + end + end + + # some individual values too + @test bitstring(Float16( 0),:split) == "0 00000 0000000000" + @test bitstring(Float16( 1),:split) == "0 01111 0000000000" + @test bitstring(Float16(-1),:split) == "1 01111 0000000000" + @test bitstring(Float16( 2),:split) == "0 10000 0000000000" + + @test bitstring( 0f0,:split) == "0 00000000 00000000000000000000000" + @test bitstring( 1f0,:split) == "0 01111111 00000000000000000000000" + @test bitstring(-1f0,:split) == "1 01111111 00000000000000000000000" + @test bitstring( 2f0,:split) == "0 10000000 00000000000000000000000" + + @test bitstring( 0.0,:split) == + "0 00000000000 0000000000000000000000000000000000000000000000000000" + @test bitstring( 1.0,:split) == + "0 01111111111 0000000000000000000000000000000000000000000000000000" + @test bitstring(-1.0,:split) == + "1 01111111111 0000000000000000000000000000000000000000000000000000" + @test bitstring( 2.0,:split) == + "0 10000000000 0000000000000000000000000000000000000000000000000000" +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 4928c05..652ec0d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,6 +3,7 @@ using Test import StatsBase.entropy import Random +include("bitstring.jl") include("round_nearest.jl") include("shave_set_groom.jl") include("xor_transpose.jl") From f371e21ad0beee627a6ad53d36e1e44d0ed6a5a3 Mon Sep 17 00:00:00 2001 From: Milan Date: Mon, 28 Feb 2022 19:43:36 +0000 Subject: [PATCH 02/20] bitpattern entropy overhauled, uinttype for (u)ints --- src/bitpattern_entropy.jl | 47 ++++++++++++--------------------------- src/which_uint.jl | 14 +++++++++++- 2 files changed, 27 insertions(+), 34 deletions(-) diff --git a/src/bitpattern_entropy.jl b/src/bitpattern_entropy.jl index 0ab2818..7fea03e 100644 --- a/src/bitpattern_entropy.jl +++ b/src/bitpattern_entropy.jl @@ -1,52 +1,33 @@ -"""Calculate the bitpattern entropy in base `base` for all elements in array A.""" -function bitpattern_entropy(A::AbstractArray,base::Real=2) - nbits = sizeof(eltype(A))*8 - T = whichUInt(nbits) - return bitpattern_entropy(T,A,base) -end - -"""Calculate the bitpattern entropy in base `base` for all elements in array A. -In place version that will sort the array.""" -function bitpattern_entropy!(A::AbstractArray,base::Real=2) - nbits = sizeof(eltype(A))*8 - T = whichUInt(nbits) - return bitpattern_entropy!(T,A,base) -end - """Calculate the bitpattern entropy for an array A by reinterpreting the elements as UInts and sorting them to avoid creating a histogram.""" -function bitpattern_entropy(::Type{T},A::AbstractArray,base::Real) where {T<:Unsigned} - return bitpattern_entropy!(T,copy(A),base) +function bitpattern_entropy(A::AbstractArray{T},base::Real=2) where T + return bitpattern_entropy!(copy(A),base) # copy of array to avoid in-place changes to A end """Calculate the bitpattern entropy for an array A by reinterpreting the elements -as UInts and sorting them to avoid creating a histogram. -In-place version of bitpattern_entropy.""" -function bitpattern_entropy!(::Type{T},A::AbstractArray,base::Real) where {T<:Unsigned} +as UInts and sorting them to avoid creating a histogram. In-place version of bitpattern_entropy.""" +function bitpattern_entropy!(A::AbstractArray{T},base::Real=2) where T - # reinterpret to UInt then sort to avoid allocating a histogram - # in-place ver - sort!(reinterpret(T,vec(A))) # TODO maybe think of an inplace version? + # reinterpret to UInt then sort in-place for minimal memory allocation + sort!(reinterpret(Base.uinttype(T),vec(A))) n = length(A) E = 0.0 # entropy - m = 1 # counter + m = 1 # counter, start with 1 as only bitwise-same elements are counted for i in 1:n-1 @inbounds if A[i] == A[i+1] # check whether next entry belongs to the same bin in histogram - m += 1 + m += 1 # increase counter else - p = m/n - E -= p*log(p) - m = 1 + p = m/n # probability/frequency of ith bit pattern + E -= p*log(p) # entropy contribution + m = 1 # start with 1, A[i+1] is already 1st element of next bin end end - p = m/n # complete for last bin - E -= p*log(p) - - # convert to given base, 2 i.e. [bit] by default - E /= log(base) + p = m/n # complete loop for last bin + E -= p*log(p) # entropy contribution of last bin + E /= log(base) # convert to given base, 2 i.e. [bit] by default return E end \ No newline at end of file diff --git a/src/which_uint.jl b/src/which_uint.jl index 40d9b81..55b94c6 100644 --- a/src/which_uint.jl +++ b/src/which_uint.jl @@ -3,7 +3,19 @@ function whichUInt(n::Integer) n == 16 && return UInt16 n == 32 && return UInt32 n == 64 && return UInt64 - throw(error("Only n=8,16,32,64 supported.")) + throw(error("Only n=8,16,32,64 bits supported.")) end whichUInt(::Type{T}) where T = whichUInt(sizeof(T)*8) + +# define the uints for various formats +Base.uinttype(::Type{UInt8}) = UInt8 +Base.uinttype(::Type{UInt16}) = UInt16 +Base.uinttype(::Type{UInt32}) = UInt32 +Base.uinttype(::Type{UInt64}) = UInt64 + +Base.uinttype(::Type{Int8}) = UInt8 +Base.uinttype(::Type{Int16}) = UInt16 +Base.uinttype(::Type{Int32}) = UInt32 +Base.uinttype(::Type{Int64}) = UInt64 + From 97fd531a6c365f1ff427b575d513017d104c132a Mon Sep 17 00:00:00 2001 From: Milan Date: Tue, 1 Mar 2022 13:42:18 +0000 Subject: [PATCH 03/20] +biased_exponent with tests --- src/BitInformation.jl | 4 +-- src/signed_exponent.jl | 52 ++++++++++++++++++++++++----- src/{which_uint.jl => uinttypes.jl} | 22 ++++++------ test/runtests.jl | 1 + test/signed_exponent.jl | 17 ++++++++++ 5 files changed, 75 insertions(+), 21 deletions(-) rename src/{which_uint.jl => uinttypes.jl} (77%) create mode 100644 test/signed_exponent.jl diff --git a/src/BitInformation.jl b/src/BitInformation.jl index aec474a..d9c4ad2 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -5,7 +5,7 @@ module BitInformation export bittranspose, bitbacktranspose, xor_delta, unxor_delta, xor_delta!, unxor_delta!, - signed_exponent + signed_exponent, biased_exponent export bitinformation, mutual_information, redundancy, bitpattern_entropy, bitcount, bitcount_entropy, bitpaircount, bit_condprobability, @@ -13,7 +13,7 @@ module BitInformation import StatsBase: entropy - include("which_uint.jl") + include("uinttypes.jl") include("bitstring.jl") include("bittranspose.jl") include("round_nearest.jl") diff --git a/src/signed_exponent.jl b/src/signed_exponent.jl index 7a1f7bd..0a42d9a 100644 --- a/src/signed_exponent.jl +++ b/src/signed_exponent.jl @@ -1,29 +1,51 @@ """In-place version of `signed_exponent(::Array)`.""" -function signed_exponent!(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} +function signed_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} # sign&fraction mask sfmask = Base.sign_mask(T) | Base.significand_mask(T) - emask = Base.exponent_mask(T) + emask = Base.exponent_mask(T) + esignmask = Base.sign_mask(T) >> 1 # exponent sign mask (1st exp bit) sbits = Base.significand_bits(T) bias = Base.exponent_bias(T) - ebits = Base.exponent_bits(T)-1 for i in eachindex(A) ui = reinterpret(Unsigned,A[i]) - sf = ui & sfmask # sign & fraction bits - e = ((ui & emask) >> sbits) - bias # de-biased exponent - eabs = e == -bias ? 0 : abs(e) # for iszero(A[i]) e == -bias, set to 0 - esign = (e < 0 ? 1 : 0) << ebits # determine sign of exponent - esigned = ((esign | eabs) % typeof(ui)) << sbits # concatentate exponent + sf = ui & sfmask # sign & fraction bits + e = (((ui & emask) >> sbits) % Int) - bias # de-biased exponent + eabs = abs(e) % typeof(ui) # magnitude of exponent + esign = e < 0 ? esignmask : zero(esignmask) # determine sign of exponent + esigned = esign | (eabs << sbits) # concatentate exponent A[i] = reinterpret(T,sf | esigned) # concatenate everything back together end end +"""In-place version of `biased_exponent(::Array)`. Inverse of `signed_exponent!".""" +function biased_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} + + # sign&fraction mask + sfmask = Base.sign_mask(T) | Base.significand_mask(T) + esignmask = Base.sign_mask(T) >> 1 + eabsmask = Base.exponent_mask(T) & ~esignmask + + sbits = Base.significand_bits(T) + bias = Base.uinttype(T)(Base.exponent_bias(T)) + + for i in eachindex(A) + ui = reinterpret(Unsigned,A[i]) + sf = ui & sfmask # sign & fraction bits + eabs = ((ui & eabsmask) >> sbits) # isolate sign-magnitude exponent + esign = (ui & esignmask) == esignmask ? true : false + ebiased = bias + (esign ? -eabs : eabs) # concatenate mag&sign and add bias + ebiased <<= sbits # shit exponent in position + A[i] = reinterpret(T,sf | ebiased) # concatenate everything back together + end +end + """ ```julia -B = signed_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} +B = signed_exponent(A::AbstractArray{T}) where {T<:Base.IEEEFloat} ``` Converts the exponent bits of Float16,Float32 or Float64-arrays from its conventional biased-form into a sign&magnitude representation. @@ -45,4 +67,16 @@ function signed_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} B = copy(A) signed_exponent!(B) return B +end + +""" +```julia +B = biased_exponent(A::AbstractArray{T}) where {T<:Base.IEEEFloat} +``` +Convert the signed exponents from `signed_exponent` back into the +standard biased exponents of IEEE floats.""" +function biased_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} + B = copy(A) + biased_exponent!(B) + return B end \ No newline at end of file diff --git a/src/which_uint.jl b/src/uinttypes.jl similarity index 77% rename from src/which_uint.jl rename to src/uinttypes.jl index 55b94c6..7153a60 100644 --- a/src/which_uint.jl +++ b/src/uinttypes.jl @@ -1,13 +1,3 @@ -function whichUInt(n::Integer) - n == 8 && return UInt8 - n == 16 && return UInt16 - n == 32 && return UInt32 - n == 64 && return UInt64 - throw(error("Only n=8,16,32,64 bits supported.")) -end - -whichUInt(::Type{T}) where T = whichUInt(sizeof(T)*8) - # define the uints for various formats Base.uinttype(::Type{UInt8}) = UInt8 Base.uinttype(::Type{UInt16}) = UInt16 @@ -19,3 +9,15 @@ Base.uinttype(::Type{Int16}) = UInt16 Base.uinttype(::Type{Int32}) = UInt32 Base.uinttype(::Type{Int64}) = UInt64 +# uints for other types are identified by they byte size +Base.uinttype(::Type{T}) where T = Base.uinttype(sizeof(T)*8) + +function Base.uinttype(n::Integer) + n == 8 && return UInt8 + n == 16 && return UInt16 + n == 32 && return UInt32 + n == 64 && return UInt64 + throw(error("Only n=8,16,32,64 bits supported.")) +end + + diff --git a/test/runtests.jl b/test/runtests.jl index 652ec0d..b1d829b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,5 +6,6 @@ import Random include("bitstring.jl") include("round_nearest.jl") include("shave_set_groom.jl") +include("signed_exponent.jl") include("xor_transpose.jl") include("information.jl") \ No newline at end of file diff --git a/test/signed_exponent.jl b/test/signed_exponent.jl new file mode 100644 index 0000000..9e6984c --- /dev/null +++ b/test/signed_exponent.jl @@ -0,0 +1,17 @@ +@testset "Signed/biased exponent idempotence" begin + for T in (Float16,Float32,Float64) + for _ in 1:100 + A = randn(T,1) + @test A == biased_exponent(signed_exponent(A)) + + A = rand(T,1) + @test A == biased_exponent(signed_exponent(A)) + + A = 10*randn(T,1) + @test A == biased_exponent(signed_exponent(A)) + end + + @test [zero(T)] == biased_exponent(signed_exponent([zero(T)])) + @test_broken isnan(biased_exponent(signed_exponent([T(NaN)]))[1]) + end +end \ No newline at end of file From 85f2510c0c17ed383a4e21d1dad1fcd7b15151f0 Mon Sep 17 00:00:00 2001 From: Milan Date: Tue, 1 Mar 2022 18:14:06 +0000 Subject: [PATCH 04/20] fully reversible biased/signed exponent --- src/signed_exponent.jl | 17 ++++++++++++----- test/signed_exponent.jl | 16 +++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/src/signed_exponent.jl b/src/signed_exponent.jl index 0a42d9a..d9d9245 100644 --- a/src/signed_exponent.jl +++ b/src/signed_exponent.jl @@ -26,8 +26,9 @@ function biased_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} # sign&fraction mask sfmask = Base.sign_mask(T) | Base.significand_mask(T) + emask = Base.exponent_mask(T) esignmask = Base.sign_mask(T) >> 1 - eabsmask = Base.exponent_mask(T) & ~esignmask + eabsmask = emask & ~esignmask sbits = Base.significand_bits(T) bias = Base.uinttype(T)(Base.exponent_bias(T)) @@ -39,6 +40,10 @@ function biased_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} esign = (ui & esignmask) == esignmask ? true : false ebiased = bias + (esign ? -eabs : eabs) # concatenate mag&sign and add bias ebiased <<= sbits # shit exponent in position + + # 10000..., i.e. negative zero in the sign-magintude exponent is mapped + # back to NaN/Inf to make signed_exponent fully reversible + ebiased = esign & (eabs == 0) ? emask : ebiased A[i] = reinterpret(T,sf | ebiased) # concatenate everything back together end end @@ -48,7 +53,7 @@ end B = signed_exponent(A::AbstractArray{T}) where {T<:Base.IEEEFloat} ``` Converts the exponent bits of Float16,Float32 or Float64-arrays from its -conventional biased-form into a sign&magnitude representation. +IEEE standard biased-form into a sign-magnitude representation. # Example @@ -60,9 +65,11 @@ julia> bitstring.(signed_exponent([10f0]),:split)[1] "0 00000011 01000000000000000000000" ``` -In the former the exponent 3 is interpret from 0b10000010=130 via subtraction of -the exponent bias of Float32 = 127. In the latter the exponent is inferred from -sign bit (0) and a magnitude represetation 2^1 + 2^1 = 3.""" +In the IEEE standard floats the exponent 3 is interpret from 0b10000010=130 +via subtraction of the exponent bias of Float32 = 127. In sign-magnitude +representation the exponent is inferred from the first exponent (0) as sign bit +and a magnitude 2^1 + 2^1 = 3. NaN/Inf exponent bits are mapped to negative zero +in sign-magnitude representation which is exactly reversed with `biased_exponent`.""" function signed_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} B = copy(A) signed_exponent!(B) diff --git a/test/signed_exponent.jl b/test/signed_exponent.jl index 9e6984c..c4b5a71 100644 --- a/test/signed_exponent.jl +++ b/test/signed_exponent.jl @@ -1,17 +1,15 @@ @testset "Signed/biased exponent idempotence" begin for T in (Float16,Float32,Float64) for _ in 1:100 - A = randn(T,1) - @test A == biased_exponent(signed_exponent(A)) - - A = rand(T,1) - @test A == biased_exponent(signed_exponent(A)) - - A = 10*randn(T,1) - @test A == biased_exponent(signed_exponent(A)) + A = [reinterpret(T,rand(Base.uinttype(T)))] + # call functions with array but evaluated only the element of it + # with === to allow to nan equality too + @test A[1] === biased_exponent(signed_exponent(A))[1] end + # special cases 0, NaN, Inf @test [zero(T)] == biased_exponent(signed_exponent([zero(T)])) - @test_broken isnan(biased_exponent(signed_exponent([T(NaN)]))[1]) + @test isnan(biased_exponent(signed_exponent([T(NaN)]))[1]) + @test isinf(biased_exponent(signed_exponent([T(Inf)]))[1]) end end \ No newline at end of file From b791a7643dd44b40856508bc97cb05d2aee5e7c9 Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 11:11:00 +0000 Subject: [PATCH 05/20] bittranspose with Base.uinttype --- src/bittranspose.jl | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/bittranspose.jl b/src/bittranspose.jl index 16f841c..22eaa36 100644 --- a/src/bittranspose.jl +++ b/src/bittranspose.jl @@ -2,9 +2,9 @@ const DEFAULT_BLOCKSIZE=2^14 """Transpose the bits (aka bit shuffle) of an array to place sign bits, etc. next to each other in memory. Back transpose via bitbacktranspose(). Inplace version that requires a -preallocated output array At, which must contain 0s everywhere.""" -function bittranspose!( ::Type{UIntT}, # UInt equiv to T - At::AbstractArray{T}, # transposed array +preallocated output array At, which must contain 0 bits everywhere.""" +function bittranspose!( ::Type{UIntT}, # UInt corresponding to T + At::AbstractArray{T}, # transposed array to be filled A::AbstractArray{T}; # original array blocksize::Int=DEFAULT_BLOCKSIZE) where {UIntT<:Unsigned,T} @@ -17,10 +17,9 @@ function bittranspose!( ::Type{UIntT}, # UInt equiv to T # At .= zero(T) # make sure output array is zero nblocks = (N-1) ÷ blocksize + 1 # number of blocks - for nb in 1:nblocks + @inbounds for nb in 1:nblocks lastindex = min(nb*blocksize,N) - Ablock = view(A,(nb-1)*blocksize+1:lastindex) Atblock = view(At,(nb-1)*blocksize+1:lastindex) @@ -44,8 +43,8 @@ function bittranspose!( ::Type{UIntT}, # UInt equiv to T # the first nbits elements go into same b in B, then next b idx = (i ÷ nbits) + 1 - @inbounds atui = reinterpret(UIntT,Atblock[idx]) - @inbounds Atblock[idx] = reinterpret(T,atui | bit) + atui = reinterpret(UIntT,Atblock[idx]) + Atblock[idx] = reinterpret(T,atui | bit) i += 1 end end @@ -57,8 +56,8 @@ end """Transpose the bits (aka bit shuffle) of an array to place sign bits, etc. next to each other in memory. Back transpose via bitbacktranspose().""" function bittranspose(A::AbstractArray{T};kwargs...) where T - UIntT = whichUInt(T) - At = fill(reinterpret(T,zero(UIntT)),size(A)) + UIntT = Base.uinttype(T) # get corresponding UInt to T + At = fill(reinterpret(T,zero(UIntT)),size(A)) # preallocate with zeros (essential) bittranspose!(UIntT,At,A;kwargs...) return At end @@ -78,7 +77,7 @@ function bitbacktranspose!( ::Type{UIntT}, # UInt equiv to T # A .= zero(T) # make sure output array is zero nblocks = (N-1) ÷ blocksize + 1 # number of blocks - for nb in 1:nblocks + @inbounds for nb in 1:nblocks lastindex = min(nb*blocksize,N) @@ -104,8 +103,8 @@ function bitbacktranspose!( ::Type{UIntT}, # UInt equiv to T # the first nbits elements go into same a in A, then next a idx = (i % nelements) + 1 - @inbounds aui = reinterpret(UIntT,Ablock[idx]) - @inbounds Ablock[idx] = reinterpret(T,aui | bit) + aui = reinterpret(UIntT,Ablock[idx]) + Ablock[idx] = reinterpret(T,aui | bit) i += 1 end end @@ -116,8 +115,8 @@ end """Backtranspose the bits of array A that were previously transposed with bittranspse().""" function bitbacktranspose(At::AbstractArray{T};kwargs...) where T - UIntT = whichUInt(T) - A = fill(reinterpret(T,zero(UIntT)),size(At)) + UIntT = Base.uinttype(T) # get corresponding UInt to T + A = fill(reinterpret(T,zero(UIntT)),size(At)) # preallocate with zeros (essential) bitbacktranspose!(UIntT,A,At;kwargs...) return A end From dc4c816ae37f9d160813579f3fb8f4477eb1f53a Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 19:30:43 +0000 Subject: [PATCH 06/20] binomial prob capped at 1 --- src/binomial.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/binomial.jl b/src/binomial.jl index e455e72..f106156 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -16,7 +16,8 @@ julia> p₁ = BitInformation.binom_confidence(1000,0.95) ``` about 53.1% heads (or tails).""" function binom_confidence(n::Int,c::Real) - return 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) + p = 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) + return min(1.0,p) end """ From 70009c55e4850fc398d15a6fe92ee3b54dcbe491 Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 19:31:23 +0000 Subject: [PATCH 07/20] rename to uint_types.jl --- src/{uinttypes.jl => uint_types.jl} | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) rename src/{uinttypes.jl => uint_types.jl} (99%) diff --git a/src/uinttypes.jl b/src/uint_types.jl similarity index 99% rename from src/uinttypes.jl rename to src/uint_types.jl index 7153a60..f1c474c 100644 --- a/src/uinttypes.jl +++ b/src/uint_types.jl @@ -18,6 +18,4 @@ function Base.uinttype(n::Integer) n == 32 && return UInt32 n == 64 && return UInt64 throw(error("Only n=8,16,32,64 bits supported.")) -end - - +end \ No newline at end of file From a054f75c8752251d7f3f314c2ef9635e55fe8e4e Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 19:31:42 +0000 Subject: [PATCH 08/20] bitpattern and bitcount tests --- test/bitcount.jl | 34 ++++++++++++++++++++++++++++++++++ test/bitpattern_entropy.jl | 14 ++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 test/bitcount.jl create mode 100644 test/bitpattern_entropy.jl diff --git a/test/bitcount.jl b/test/bitcount.jl new file mode 100644 index 0000000..9ce9a69 --- /dev/null +++ b/test/bitcount.jl @@ -0,0 +1,34 @@ +@testset "Bitcount" begin + @test bitcount(UInt8[1,2,4,8,16,32,64,128]) == ones(8) + @test bitcount(collect(0x0000:0xffff)) == 2^15*ones(16) + + N = 100_000 + c = bitcount(rand(N)) + @test c[1] == 0 # sign always 0 + @test c[2] == 0 # first expbit always 0, i.e. U(0,1) < 1 + @test c[3] == N # second expont always 1 + + @test all(isapprox.(c[15:50],N/2,rtol=1e-1)) +end + +@testset "Bitcountentropy" begin + + # test the PRNG on uniformity + N = 100_000 + H = bitcount_entropy(rand(UInt8,N)) + @test all(isapprox.(H,ones(8),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt16,N)) + @test all(isapprox.(H,ones(16),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt32,N)) + @test all(isapprox.(H,ones(32),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt64,N)) + @test all(isapprox.(H,ones(64),rtol=5e-4)) + + # also for random floats + H = bitcount_entropy(rand(N)) + @test H[1:5] == zeros(5) # first bits never change + @test all(isapprox.(H[16:55],ones(40),rtol=1e-4)) +end \ No newline at end of file diff --git a/test/bitpattern_entropy.jl b/test/bitpattern_entropy.jl new file mode 100644 index 0000000..8d29694 --- /dev/null +++ b/test/bitpattern_entropy.jl @@ -0,0 +1,14 @@ +@testset "Bit pattern entropy" begin + for N in [100,1000,10000,100000] + # every bit pattern is only hit once, hence entropy = log2(N) + @test isapprox(log2(N),bitpattern_entropy(rand(Float32,N)),atol=1e-1) + @test isapprox(log2(N),bitpattern_entropy(rand(Float64,N)),atol=1e-1) + end + + N = 1000_000 # more bit pattern than there are in 8 or 16-bit + @test isapprox(16.0,bitpattern_entropy(rand(UInt16,N)),atol=1e-1) + @test isapprox(16.0,bitpattern_entropy(rand(Int16,N)),atol=1e-1) + + @test isapprox(8.0,bitpattern_entropy(rand(UInt8,N)),atol=1e-1) + @test isapprox(8.0,bitpattern_entropy(rand(Int8,N)),atol=1e-1) +end \ No newline at end of file From 31b3f779d703db47674ba4edb60483c4c11bd808 Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 19:32:48 +0000 Subject: [PATCH 09/20] bitarray extended +bitmap func for visualisation --- src/bitarray.jl | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/bitarray.jl diff --git a/src/bitarray.jl b/src/bitarray.jl new file mode 100644 index 0000000..8ea73f0 --- /dev/null +++ b/src/bitarray.jl @@ -0,0 +1,34 @@ +function Base.BitArray(A::AbstractArray{T}) where T + nbits = 8*sizeof(T) # number of bits in T + UIntN = Base.uinttype(T) # UInt type corresponding to T + + # allocate BitArray with additional dimension for the bits + B = BitArray(undef,nbits,size(A)...) + indices = CartesianIndices(A) + + for i in eachindex(A) # every element in A + a = reinterpret(UIntN,A[i]) # as UInt + mask = UIntN(1) # mask to isolate jth bit + for j in 0:nbits-1 # loop from less to more significant bits + bit = (a & mask) >> j # isolate bit (=true/false) + B[nbits-j,indices[i]] = bit # set BitArray entry to isolated bit in A + mask <<= 1 # shift mask towards more significant bit + end + end + return B +end + +function bitmap(A::AbstractMatrix;dim::Int=1) + @assert dim in (1,2) "dim has to be 1 or 2, $dim provided." + + #TODO bitordering for dim=2 is currently off + n,m = size(A) + B = BitArray(A) + nbits,_ = size(B) + Br = reshape(B,nbits*n,:) + Br = dim == 1 ? Br : Br' + + return Br +end + + From 8f3d41cf770e827cb4f9a68841cf94e597f56b79 Mon Sep 17 00:00:00 2001 From: Milan Date: Wed, 2 Mar 2022 19:34:02 +0000 Subject: [PATCH 10/20] bitcount separated, bitinformation via mutual --- src/BitInformation.jl | 5 +- src/bit_count.jl | 120 ++++++++++++++++++ src/bit_information.jl | 197 ---------------------------- src/mutual_information.jl | 261 ++++++++++++++++++++------------------ test/information.jl | 50 -------- 5 files changed, 264 insertions(+), 369 deletions(-) create mode 100644 src/bit_count.jl delete mode 100644 src/bit_information.jl diff --git a/src/BitInformation.jl b/src/BitInformation.jl index d9c4ad2..580e8c7 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -13,14 +13,15 @@ module BitInformation import StatsBase: entropy - include("uinttypes.jl") + include("uint_types.jl") include("bitstring.jl") + include("bitarray.jl") include("bittranspose.jl") include("round_nearest.jl") include("shave_set_groom.jl") include("xor_delta.jl") include("signed_exponent.jl") - include("bit_information.jl") + include("bit_count.jl") include("mutual_information.jl") include("bitpattern_entropy.jl") include("binomial.jl") diff --git a/src/bit_count.jl b/src/bit_count.jl new file mode 100644 index 0000000..2761783 --- /dev/null +++ b/src/bit_count.jl @@ -0,0 +1,120 @@ +import StatsBase: entropy + +"""Count the occurences of the 1-bit in bit position i across all elements of A.""" +function bitcount(A::AbstractArray{T},i::Int) where {T<:Union{Integer,AbstractFloat}} + nbits = sizeof(T)*8 # number of bits in T + @boundscheck i <= nbits || throw(error("Can't count bit $b for $N-bit type $T.")) + + UIntT = Base.uinttype(T) # UInt corresponding to T + c = 0 # counter + shift = nbits-i # shift bit i in least significant position + mask = one(UIntT) << shift # mask to isolate the bit in position i + @inbounds for a in A + ui = reinterpret(UIntT,a) # mask everything but i and shift + c += (ui & mask) >> shift # to have either 0x0 or 0x1 to increase counter + end + return c +end + +"""Count the occurences of the 1-bit in every bit position b across all elements of A.""" +function bitcount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} + nbits = 8*sizeof(T) # determine the size [bit] of elements in A + C = zeros(Int,nbits) + @inbounds for b in 1:nbits # loop over bit positions and for each + C[b] = bitcount(A,b) # count through all elements in A + end + return C +end + +"""Entropy [bit] for bitcount functions. Maximised to 1bit for random uniformly +distributed bits in A.""" +function bitcount_entropy( A::AbstractArray{T}, # input array + base::Real=2 # entropy with base + ) where {T<:Union{Integer,AbstractFloat}} + + nbits = 8*sizeof(T) # number of bits in T + nelements = length(A) + + C = bitcount(A) # count 1 bits for each bit position + H = zeros(nbits) # allocate entropy for each bit position + + @inbounds for i in 1:nbits # loop over bit positions + p = C[i]/nelements # probability of bit 1 + H[i] = entropy([p,1-p],base) # entropy based on p + end + + return H +end + +"""Update counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in a,b.""" +function bitpair_count!(C::Array{Int,3},a::T,b::T) where {T<:Integer} + nbits = 8*sizeof(T) + mask = one(T) # start with least significant bit + @inbounds for i in 0:nbits-1 # loop from least to most significant bit + j = 1+((a & mask) >>> i) # isolate that bit in a,b + k = 1+((b & mask) >>> i) # and move to 0x0 or 0x1s + C[nbits-i,j,k] += 1 # to be used as index j,k to increase counter C + mask <<= 1 # shift mask to get the next significant bit + end +end + +"""Returns counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in elements of A,B.""" +function bitpair_count( A::AbstractArray{T}, + B::AbstractArray{T} + ) where {T<:Union{Integer,AbstractFloat}} + + @assert size(A) == size(B) "Size of A=$(size(A)) does not match size of B=$(size(B))" + + nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) + C = zeros(Int,nbits,2,2) # array of bitpair counters + + # reinterpret arrays A,B as UInt (no mem allocation required) + Auint = reinterpret(Base.uinttype(T),A) + Buint = reinterpret(Base.uinttype(T),B) + + for (a,b) in zip(Auint,Buint) # run through all elements in A,B pairwise + bitpair_count!(C,a,b) # count the bits and update counter array C + end + + return C +end + + +# """Update counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in a,b.""" +# function bitpair_count!(C::Array{Int,3}, # counter array nbits x 2 x 2 +# A::AbstractArray{T}, # input array A +# B::AbstractArray{T}, # input array B +# i::Int # bit position +# ) where {T<:Integer} + +# nbits = 8*sizeof(T) # number of bits in T +# shift = nbits-i # shift bit i in least significant position +# mask = one(T) << shift # mask to isolate the bit in position i + +# @inbounds for (a,b) in zip(A,B) # loop over all elements in A,B pairwise +# j = 1+((a & mask) >>> shift) # isolate bit i in a,b +# k = 1+((b & mask) >>> shift) # and move to 0x0 or 0x1 +# C[i,j,k] += 1 # to be used as index j,k to increase counter C +# end +# end + +# """Returns counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in elements of A,B.""" +# function bitpair_count( A::AbstractArray{T}, +# B::AbstractArray{T} +# ) where {T<:Union{Integer,AbstractFloat}} + +# @assert size(A) == size(B) "Size of A=$(size(A)) does not match size of B=$(size(B))" + +# nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) +# C = zeros(Int,nbits,2,2) # array of bitpair counters + +# # reinterpret arrays A,B as UInt (no mem allocation required) +# Auint = reinterpret(Base.uinttype(T),A) +# Buint = reinterpret(Base.uinttype(T),B) + +# for i in 1:nbits +# bitpair_count!(C,Auint,Buint,i) # count the bits and update counter array C +# end + +# return C +# end \ No newline at end of file diff --git a/src/bit_information.jl b/src/bit_information.jl deleted file mode 100644 index c34fc83..0000000 --- a/src/bit_information.jl +++ /dev/null @@ -1,197 +0,0 @@ -"""Count the occurences of the 1-bit in bit position b across all elements of A.""" -function bitcount(A::AbstractArray{T},b::Int) where {T<:Union{Integer,AbstractFloat}} - N = sizeof(T)*8 # number of bits in T - @boundscheck b <= N || throw(error("Can't count bit $b for $N-bit type $T.")) - UIntT = whichUInt(T) - n = 0 # counter - shift = N-b # shift desired bit b - mask = one(UIntT) << shift # leftshift always pushes 0s - for a in A # mask everything but b and shift - n += (reinterpret(UIntT,a) & mask) >>> shift # to have either 0x00 or 0x01 - end - return n -end - -"""Count the occurences of the 1-bit in every bit position b across all elements of A.""" -function bitcount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - N = 8*sizeof(T) # determine the size [bit] of elements in A - n = fill(0,N) # preallocate counters - for b in 1:N # loop over bit positions and count each - n[b] = bitcount(A,b) - end - return n -end - -"""Entropy [bit] for bitcount functions. Maximised to 1bit for random uniformly -distributed bits in A.""" -function bitcount_entropy(A::AbstractArray) - N = prod(size(A)) - ps = bitcount(A) / N - e = [abs(entropy([p,1-p],2)) for p in ps] - return e -end - -"""Count pairs of bits across elements of A for bit position b therein. -Returns a 4-element array with counts for 00,01,10,11.""" -function bitpaircount(A::AbstractArray{T},b::Int) where {T<:Union{Integer,AbstractFloat}} - N = sizeof(T)*8 # number of bits in T - @boundscheck b <= N || throw(error("Can't count bit $b for $N-bit type $T.")) - UIntT = whichUInt(T) - - n = [0,0,0,0] # counter for 00,01,10,11 - shift = N-b-1 # shift to the 2nd last position either 0x00,0x10 - shift2 = N-b # shift to last position 0x0 or 0x1 - mask = one(UIntT) << shift2 # mask everything except b - - # a1 is bit from previous entry in A, a2 is the current - # a1 is shifted to sit in the 2nd last position - # a2 sits in the last position - a1 = (reinterpret(UIntT,A[1]) & mask) >>> shift - @inbounds for i in 2:length(A) - a2 = (reinterpret(UIntT,A[i]) & mask) >>> shift2 - n[(a1 | a2)+1] += 1 - a1 = a2 << 1 - end - return n -end - -"""Count pairs of bits across elements of A for every bit position therein. -Returns a 4xn-array with counts for 00,01,10,11 in rows and every bit position -in columns.""" -function bitpaircount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - N = 8*sizeof(T) # number of bits in T - n = fill(0,4,N) # store the 4 possible pair counts for every bit position - for b in 1:N - n[:,b] = bitpaircount(A,b) - end - return n -end - -"""Calculates the conditional probabilities of pairs 00,01,10,11 in the bit sequences -of array A. Returns a 4xn array with conditional probabilities p(nextbit=0|previousbit=0), -p(1|0),p(0|1),p(1|1) in rows and every bit position in columns. Returns NaN when -all bits are either 0,1 (in which case the conditional probability is not defined).""" -function bit_condprobability(A::Array{T}) where {T<:Union{Integer,AbstractFloat}} - N = prod(size(A[2:end])) # elements in array (A[1] is ) - n1 = bitcount(A[1:end-1]) - n0 = N.-n1 - npair = bitpaircount(A) - pcond = similar(npair,Float64) - pcond[1,:] = npair[1,:] ./ n0 - pcond[2,:] = npair[2,:] ./ n0 - pcond[3,:] = npair[3,:] ./ n1 - pcond[4,:] = npair[4,:] ./ n1 - return pcond -end - -"""Calculates the conditional entropy for 00,01,10,11 for every bit position across -all elements of A.""" -function bit_condentropy( A::Array{T}, - set_zero_insignificant::Bool=true, - confidence::Real=0.99) where {T<:Union{Integer,AbstractFloat}} - - pcond = bit_condprobability(A) - - # set NaN (occurs when occurences n=0) 0*-Inf = 0 here. - pcond[isnan.(pcond)] .= 0 - - # conditional entropy H given bit = 0 (H0) and bit = 1 (H1) - H0 = [entropy([p00,p01],2) for (p00,p01) in zip(pcond[1,:],pcond[2,:])] - H1 = [entropy([p10,p11],2) for (p10,p11) in zip(pcond[3,:],pcond[4,:])] - - # set insignficant to zero - if set_zero_insignificant - # get chance p for 1 (or 0) from binom distr - p = binom_confidence(length(A),confidence) - I₀ = 1 - entropy([p,1-p],2) # free entropy of random [bit] - H0[H0 .<= I₀] .= 0 # set insignficant to zero - H1[H1 .<= I₀] .= 0 - end - - return H0,H1 -end - -function bitinformation(A::AbstractArray,dims::Symbol) - if dims == :all_dimensions - N = prod(size(A))-1 # elements in array - n1 = bitcount(A)-bitcount([A[end]]) # occurences of bit = 1 - bi = fill(0.0,length(n1)) - permu = collect(1:ndims(A)) # permutation - perm0 = vcat(permu[2:end],permu[1]) # used to permute permu - for idim in 1:ndims(A) - Aperm = PermutedDimsArray(A,permu) - permute!(permu,perm0) - npair = bitpaircount(Aperm) - bi .+= bitinformation(n1,npair,N) - end - # arithmetic mean of information across the dimensions - return bi ./ ndims(A) - else - return bitinformation(A) - end -end - -function bitinformation(A::AbstractArray; - dims::Int=1, - set_zero_insignificant::Bool=true, - confidence::Real=0.99) - - N = prod(size(A))-1 # elements in array - n1 = bitcount(A)-bitcount([A[end]]) # occurences of bit = 1 - - if dims > 1 - permu = collect(1:ndims(A)) # permutation - perm0 = vcat(permu[2:end],permu[1]) # used to permute permu - for _ in 1:dims-1 - permute!(permu,perm0) - end - A = PermutedDimsArray(A,permu) - end - - npair = bitpaircount(A) - return bitinformation(n1,npair,N;set_zero_insignificant,confidence) -end - -function bitinformation(n1::Array{Int,1}, - npair::Array{Int,2}, - N::Int; - set_zero_insignificant::Bool=true, - confidence::Real=0.99) - - nbits = length(n1) - @assert size(npair) == (4,nbits) "npair array must be of size 4xnbits" - @assert maximum(n1) <= N "N cannot be smaller than maximum(n1)" - - n0 = N.-n1 # occurences of bit = 0 - q0 = n0/N # respective probabilities - q1 = n1/N - - pcond = similar(npair,Float64) # preallocate conditional probability - - pcond[1,:] = npair[1,:] ./ n0 # p(0|0) = n(00)/n(0) - pcond[2,:] = npair[2,:] ./ n0 # p(1|0) = n(01)/n(0) - pcond[3,:] = npair[3,:] ./ n1 # p(0|1) = n(10)/n(1) - pcond[4,:] = npair[4,:] ./ n1 # p(1|1) = n(11)/n(1) - - # set NaN (occurs when occurences n=0) 0*-Inf = 0 here. - pcond[isnan.(pcond)] .= 0 - - # unconditional entropy - H = [entropy([q0i,q1i],2) for (q0i,q1i) in zip(q0,q1)] - - # conditional entropy given bit = 0, bit = 1 - H0 = [entropy([p00,p01],2) for (p00,p01) in zip(pcond[1,:],pcond[2,:])] - H1 = [entropy([p10,p11],2) for (p10,p11) in zip(pcond[3,:],pcond[4,:])] - - # Information content - I = @. H - q0*H0 - q1*H1 - - # remove information that is insignificantly different from a random 50/50 - if set_zero_insignificant - p = binom_confidence(N,confidence) # get chance p for 1 (or 0) from binom distr - I₀ = 1 - entropy([p,1-p],2) # free entropy of random [bit] - I[I .<= I₀] .= 0 # set insignficant to zero - end - - return I -end diff --git a/src/mutual_information.jl b/src/mutual_information.jl index 98fabe7..c17cede 100644 --- a/src/mutual_information.jl +++ b/src/mutual_information.jl @@ -1,137 +1,85 @@ - """Mutual information from the joint probability mass function p -of two variables X,Y. p is an NxM array which elements represent +of two variables X,Y. p is an nx x ny array which elements represent the probabilities of observing x~X and y~Y.""" function mutual_information(p::AbstractArray{T,2},base::Real=2) where T - @assert sum(p) ≈ one(T) "Entries in p have to sum to 1" + @assert sum(p) ≈ one(T) "Entries in p have to sum to 1" @assert all(p .>= zero(T)) "Entries in p have to be >= 1" - nx,ny = size(p) - - mpx = sum(p,dims=2) # marginal probabilities of x - mpy = sum(p,dims=1) + nx,ny = size(p) # events X are 1st dim, Y is 2nd + py = sum(p,dims=1) # marginal probabilities of y + px = sum(p,dims=2) # marginal probabilities of x - M = zero(T) - for j in 1:ny + M = zero(T) # mutual information M + for j in 1:ny # loop over all entries in p for i in 1:nx + # add entropy only for non-zero entries in p if p[i,j] > zero(T) - M += p[i,j]*log(p[i,j]/mpx[i]/mpy[j]) + M += p[i,j]*log(p[i,j]/px[i]/py[j]) end end end - M /= log(base) + M /= log(base) # convert to given base end """Mutual bitwise information of the elements in input arrays A,B. A and B have to be of same size and eltype.""" -function bitinformation(A::AbstractArray{T}, - B::AbstractArray{T}; - set_zero_insignificant::Bool=true, - confidence::Real=0.99 - ) where {T<:Union{Integer,AbstractFloat}} +function mutual_information(A::AbstractArray{T}, + B::AbstractArray{T}; + set_zero_insignificant::Bool=true, + confidence::Real=0.99 + ) where {T<:Union{Integer,AbstractFloat}} - @assert size(A) == size(B) - nelements = length(A) - nbits = 8*sizeof(T) + nelements = length(A) # number of elements in each A and B + nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) - # array of counters - C = zeros(Int,nbits,2,2) + C = bitpair_count(A,B) # nbits x 2 x 2 array of bitpair counters + M = zeros(nbits) # allocate mutual information array + P = zeros(2,2) # allocate joint probability mass function - for (a,b) in zip(A,B) # run through all elements in A,B pairwise - bitcount!(C,a,b) # count the bits and update counter array C + @inbounds for i in 1:nbits # mutual information for every bit position + for j in 1:2, k in 1:2 # joint prob mass from counter C + P[j,k] = C[i,j,k]/nelements + end + M[i] = mutual_information(P) end - # P is the join probabilities mass function - P = [C[i,:,:]/nelements for i in 1:nbits] - M = [mutual_information(p) for p in P] - # remove information that is insignificantly different from a random 50/50 if set_zero_insignificant - p = binom_confidence(nelements,confidence) # get chance p for 1 (or 0) from binom distr - M₀ = 1 - entropy([p,1-p],2) # free entropy of random 50/50 at trial size - M[M .<= M₀] .= 0 # set zero insignificant - end - - return M -end - -"""Mutual bitwise information of the elements in input arrays A,B. -A and B have to be of same size and eltype.""" -function bitinformation(A::AbstractArray{T}, - B::AbstractArray{T}, - n::Int) where {T<:Union{Integer,AbstractFloat}} - - @assert size(A) == size(B) - nelements = length(A) - nbits = 8*sizeof(T) - - # array of counters, first dim is from last bit to first - C = [zeros(Int,2^min(i,n+1),2) for i in nbits:-1:1] - - for (a,b) in zip(A,B) # run through all elements in A,B pairwise - bitcount!(C,a,b,n) # count the bits and update counter array C + Hf = binom_free_entropy(nelements,confidence) # free entropy of random 50/50 at trial size + # get chance p for 1 (or 0) from binom distr + @inbounds for i in eachindex(M) + M[i] = M[i] <= Hf ? 0 : M[i] # set zero what's insignificant + end end - # P is the join probabilities mass function - P = [C[i]/nelements for i in 1:nbits] - M = [mutual_information(p) for p in P] - - # filter out rounding errors - M[isapprox.(M,0,atol=10eps(Float64))] .= 0 return M end -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:AbstractFloat} - uia = reinterpret(Base.uinttype(T),a) - uib = reinterpret(Base.uinttype(T),b) - bitcount!(C,uia,uib) -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:AbstractFloat} - uia = reinterpret(Base.uinttype(T),a) - uib = reinterpret(Base.uinttype(T),b) - bitcount!(C,uia,uib,n) -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:Integer} - nbits = 8*sizeof(T) - mask = one(T) << (nbits-1) - for i in 1:nbits - j = 1+((a & mask) >>> (nbits-i)) - k = 1+((b & mask) >>> (nbits-i)) - C[i,j,k] += 1 - mask >>>= 1 - end -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:Integer} - - nbits = 8*sizeof(T) - - for i = nbits:-1:1 - @boundscheck size(C[end-i+1],1) == 2^min(i,n+1) || throw(BoundsError()) +"""Bitwise real information content of array `A` calculated from the bitwise mutual information +in adjacent entries in A along dimension `dim`.""" +function bitinformation(A::AbstractArray{T}; + dim::Int=1, + kwargs...) where {T<:Union{Integer,AbstractFloat}} + + # Permute A to take adjacent entry in dimension dim + if dim > 1 + permu = collect(1:ndims(A)) # permutation + perm0 = vcat(permu[2:end],permu[1]) # used to permute permu + for _ in 1:dim-1 # create permutation array + permute!(permu,perm0) + end + A = PermutedDimsArray(A,permu) # permute A, such that desired dim is 1st dim end - maskb = one(T) << (nbits-1) - maska = reinterpret(T,signed(maskb) >> n) + # create a two views on A for pairs of adjacent entries + n = size(A)[1] # n elements in dim - for i in 1:nbits - j = 1+((a & maska) >>> max(nbits-n-i,0)) - k = 1+((b & maskb) >>> (nbits-i)) - C[i][j,k] += 1 + # as A is now permuted always select 1st dimension + A1view = selectdim(A,1,1:n-1) # no adjacent entries in A array bounds + A2view = selectdim(A,1,2:n) # for same indices A2 is the adjacent entry to A1 - maska >>>= 1 # shift through nbits - maskb >>>= 1 - end + return mutual_information(A1view,A2view;kwargs...) end """Calculate the bitwise redundancy of two arrays A,B. Redundancy @@ -139,28 +87,101 @@ is a normalised measure of the mutual information 1 for always identical/opposite bits, 0 for no mutual information.""" function redundancy(A::AbstractArray{T}, B::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - mutinf = bitinformation(A,B) # mutual information + + M = mutual_information(A,B) # mutual information HA = bitcount_entropy(A) # entropy of A HB = bitcount_entropy(B) # entropy of B - R = 2mutinf./(HA+HB) # redundancy (symmetric) + + R = ones(size(M)...) # allocate redundancy R + + for i in eachindex(M) # loop over bit positions + HAB = HA[i]+HB[i] # sum of entropies of A,B + if HAB > 0 # entropy = 0 means A,B are fully redundant + R[i] = 2*M[i]/HAB + end + end - R[iszero.(HA+HB)] .= 0.0 # HA+HB = 0 creates NaN return R end -"""Calculate the bitwise redundancy of two arrays A,B. -Multi-bit predictor version which includes n lesser significant bit -as additional predictors in A for the mutual information to account -for round-to-nearest-induced carry bits. Redundancy is normalised -by the entropy of A. To be used for A being the reference array -and B an approximation of it.""" -function redundancy(A::AbstractArray{T}, - B::AbstractArray{T}, - n::Int) where {T<:Union{Integer,AbstractFloat}} - mutinf = bitinformation(A,B,n) # mutual information - HA = bitcount_entropy(A) # entropy of A - R = mutinf./HA # redundancy (asymmetric) +# """Mutual bitwise information of the elements in input arrays A,B. +# A and B have to be of same size and eltype.""" +# function bitinformation(A::AbstractArray{T}, +# B::AbstractArray{T}, +# n::Int) where {T<:Union{Integer,AbstractFloat}} + +# @assert size(A) == size(B) +# nelements = length(A) +# nbits = 8*sizeof(T) - R[iszero.(HA)] .= 0.0 # HA = 0 creates NaN - return R -end \ No newline at end of file +# # array of counters, first dim is from last bit to first +# C = [zeros(Int,2^min(i,n+1),2) for i in nbits:-1:1] + +# for (a,b) in zip(A,B) # run through all elements in A,B pairwise +# bitcount!(C,a,b,n) # count the bits and update counter array C +# end + +# # P is the join probabilities mass function +# P = [C[i]/nelements for i in 1:nbits] +# M = [mutual_information(p) for p in P] + +# # filter out rounding errors +# M[isapprox.(M,0,atol=10eps(Float64))] .= 0 +# return M +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:AbstractFloat} +# uia = reinterpret(Base.uinttype(T),a) +# uib = reinterpret(Base.uinttype(T),b) +# bitcount!(C,uia,uib) +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:AbstractFloat} +# uia = reinterpret(Base.uinttype(T),a) +# uib = reinterpret(Base.uinttype(T),b) +# bitcount!(C,uia,uib,n) +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:Integer} + +# nbits = 8*sizeof(T) + +# for i = nbits:-1:1 +# @boundscheck size(C[end-i+1],1) == 2^min(i,n+1) || throw(BoundsError()) +# end + +# maskb = one(T) << (nbits-1) +# maska = reinterpret(T,signed(maskb) >> n) + +# for i in 1:nbits +# j = 1+((a & maska) >>> max(nbits-n-i,0)) +# k = 1+((b & maskb) >>> (nbits-i)) +# C[i][j,k] += 1 + +# maska >>>= 1 # shift through nbits +# maskb >>>= 1 +# end +# end + +# """Calculate the bitwise redundancy of two arrays A,B. +# Multi-bit predictor version which includes n lesser significant bit +# as additional predictors in A for the mutual information to account +# for round-to-nearest-induced carry bits. Redundancy is normalised +# by the entropy of A. To be used for A being the reference array +# and B an approximation of it.""" +# function redundancy(A::AbstractArray{T}, +# B::AbstractArray{T}, +# n::Int) where {T<:Union{Integer,AbstractFloat}} +# mutinf = bitinformation(A,B,n) # mutual information +# HA = bitcount_entropy(A) # entropy of A +# R = mutinf./HA # redundancy (asymmetric) + +# R[iszero.(HA)] .= 0.0 # HA = 0 creates NaN +# return R +# end \ No newline at end of file diff --git a/test/information.jl b/test/information.jl index d3177bd..c657d43 100644 --- a/test/information.jl +++ b/test/information.jl @@ -1,53 +1,3 @@ -@testset "Bitpattern entropy" begin - for N in [100,1000,10000,100000] - # every bitpattern is only hit once, hence entropy = log2(N) - @test isapprox(log2(N),bitpattern_entropy(rand(Float32,N)),atol=1e-1) - @test isapprox(log2(N),bitpattern_entropy(rand(Float64,N)),atol=1e-1) - end - - N = 1000_000 # more bitpattern than there are in 8 or 16-bit - @test isapprox(16.0,bitpattern_entropy(rand(UInt16,N)),atol=1e-1) - @test isapprox(16.0,bitpattern_entropy(rand(Int16,N)),atol=1e-1) - - @test isapprox(8.0,bitpattern_entropy(rand(UInt8,N)),atol=1e-1) - @test isapprox(8.0,bitpattern_entropy(rand(Int8,N)),atol=1e-1) -end - -@testset "Bitcount" begin - @test bitcount(UInt8[1,2,4,8,16,32,64,128]) == ones(8) - @test bitcount(collect(0x0000:0xffff)) == 2^15*ones(16) - - N = 100_000 - c = bitcount(rand(N)) - @test c[1] == 0 # sign always 0 - @test c[2] == 0 # first expbit always 0, i.e. U(0,1) < 1 - @test c[3] == N # second expont always 1 - - @test all(isapprox.(c[15:50],N/2,rtol=1e-1)) -end - -@testset "Bitcountentropy" begin - - # test the PRNG on uniformity - N = 100_000 - H = bitcount_entropy(rand(UInt8,N)) - @test all(isapprox.(H,ones(8),rtol=5e-4)) - - H = bitcount_entropy(rand(UInt16,N)) - @test all(isapprox.(H,ones(16),rtol=5e-4)) - - H = bitcount_entropy(rand(UInt32,N)) - @test all(isapprox.(H,ones(32),rtol=5e-4)) - - H = bitcount_entropy(rand(UInt64,N)) - @test all(isapprox.(H,ones(64),rtol=5e-4)) - - # also for random floats - H = bitcount_entropy(rand(N)) - @test H[1:5] == zeros(5) # first bits never change - @test all(isapprox.(H[16:55],ones(40),rtol=1e-4)) -end - @testset "Bitinformation random" begin N = 100_000 A = rand(UInt64,N) From 7565eb5bf8df950a23297a89945e39a0398aa7fe Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 14:15:42 +0000 Subject: [PATCH 11/20] bit pair count tested --- src/BitInformation.jl | 2 +- src/binomial.jl | 4 +--- src/bit_count.jl | 23 +++++++++++++---------- src/mutual_information.jl | 2 +- src/shave_set_groom.jl | 4 ++-- src/uint_types.jl | 12 ++++++------ test/bitcount.jl | 32 ++++++++++++++++++++++++++++++++ 7 files changed, 56 insertions(+), 23 deletions(-) diff --git a/src/BitInformation.jl b/src/BitInformation.jl index 580e8c7..62fb306 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -12,6 +12,7 @@ module BitInformation bit_condentropy import StatsBase: entropy + import Distributions: quantile, Normal include("uint_types.jl") include("bitstring.jl") @@ -25,5 +26,4 @@ module BitInformation include("mutual_information.jl") include("bitpattern_entropy.jl") include("binomial.jl") - end diff --git a/src/binomial.jl b/src/binomial.jl index f106156..3c6f18f 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -1,5 +1,3 @@ -import Distributions: quantile, Normal - """ ```julia p₁ = binom_confidence(n::Int,c::Real) @@ -17,7 +15,7 @@ julia> p₁ = BitInformation.binom_confidence(1000,0.95) about 53.1% heads (or tails).""" function binom_confidence(n::Int,c::Real) p = 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) - return min(1.0,p) + return min(1.0,p) # cap probability at 1 (only important for n small) end """ diff --git a/src/bit_count.jl b/src/bit_count.jl index 2761783..d944bbb 100644 --- a/src/bit_count.jl +++ b/src/bit_count.jl @@ -1,17 +1,13 @@ -import StatsBase: entropy - """Count the occurences of the 1-bit in bit position i across all elements of A.""" -function bitcount(A::AbstractArray{T},i::Int) where {T<:Union{Integer,AbstractFloat}} +function bitcount(A::AbstractArray{T},i::Int) where {T<:Unsigned} nbits = sizeof(T)*8 # number of bits in T @boundscheck i <= nbits || throw(error("Can't count bit $b for $N-bit type $T.")) - UIntT = Base.uinttype(T) # UInt corresponding to T c = 0 # counter shift = nbits-i # shift bit i in least significant position - mask = one(UIntT) << shift # mask to isolate the bit in position i + mask = one(T) << shift # mask to isolate the bit in position i @inbounds for a in A - ui = reinterpret(UIntT,a) # mask everything but i and shift - c += (ui & mask) >> shift # to have either 0x0 or 0x1 to increase counter + c += (a & mask) >> shift # to have either 0x0 or 0x1 to increase counter end return c end @@ -20,8 +16,13 @@ end function bitcount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} nbits = 8*sizeof(T) # determine the size [bit] of elements in A C = zeros(Int,nbits) - @inbounds for b in 1:nbits # loop over bit positions and for each - C[b] = bitcount(A,b) # count through all elements in A + Auint = reinterpret(Base.uinttype(T),A) + + # loop over bit positions and for each count through all elements in A + # outer loop: bit position, inner loop: all elements in a + # note this is faster than swapping inner & outer loop + @inbounds for i in 1:nbits + C[i] = bitcount(Auint,i) end return C end @@ -72,7 +73,9 @@ function bitpair_count( A::AbstractArray{T}, Auint = reinterpret(Base.uinttype(T),A) Buint = reinterpret(Base.uinttype(T),B) - for (a,b) in zip(Auint,Buint) # run through all elements in A,B pairwise + # loop over all elements in A,B pairwise, inner loop (within bitpair_count!): bit positions + # note this is faster than swapping inner & outer loop + for (a,b) in zip(Auint,Buint) bitpair_count!(C,a,b) # count the bits and update counter array C end diff --git a/src/mutual_information.jl b/src/mutual_information.jl index c17cede..a9afc08 100644 --- a/src/mutual_information.jl +++ b/src/mutual_information.jl @@ -97,7 +97,7 @@ function redundancy(A::AbstractArray{T}, for i in eachindex(M) # loop over bit positions HAB = HA[i]+HB[i] # sum of entropies of A,B if HAB > 0 # entropy = 0 means A,B are fully redundant - R[i] = 2*M[i]/HAB + R[i] = 2*M[i]/HAB # symmetric redundancy end end diff --git a/src/shave_set_groom.jl b/src/shave_set_groom.jl index f23513e..93dc56d 100644 --- a/src/shave_set_groom.jl +++ b/src/shave_set_groom.jl @@ -132,5 +132,5 @@ for func in (:shave,:halfshave,:set_one,:groom) end end -# """Number of significant bits `nsb` given the number of significant digits `nsd`.""" -# nsb(nsd::Integer) = Integer(ceil(log(10)/log(2)*nsd)) \ No newline at end of file +"""Number of significant bits `nsb` given the number of significant digits `nsd`.""" +nsb(nsd::Integer) = Integer(ceil(log(10)/log(2)*nsd)) \ No newline at end of file diff --git a/src/uint_types.jl b/src/uint_types.jl index f1c474c..5a3b193 100644 --- a/src/uint_types.jl +++ b/src/uint_types.jl @@ -9,13 +9,13 @@ Base.uinttype(::Type{Int16}) = UInt16 Base.uinttype(::Type{Int32}) = UInt32 Base.uinttype(::Type{Int64}) = UInt64 -# uints for other types are identified by they byte size +# uints for other types are identified by their byte size Base.uinttype(::Type{T}) where T = Base.uinttype(sizeof(T)*8) -function Base.uinttype(n::Integer) - n == 8 && return UInt8 - n == 16 && return UInt16 - n == 32 && return UInt32 - n == 64 && return UInt64 +function Base.uinttype(nbits::Integer) + nbits == 8 && return UInt8 + nbits == 16 && return UInt16 + nbits == 32 && return UInt32 + nbits == 64 && return UInt64 throw(error("Only n=8,16,32,64 bits supported.")) end \ No newline at end of file diff --git a/test/bitcount.jl b/test/bitcount.jl index 9ce9a69..08bfb7c 100644 --- a/test/bitcount.jl +++ b/test/bitcount.jl @@ -31,4 +31,36 @@ end H = bitcount_entropy(rand(N)) @test H[1:5] == zeros(5) # first bits never change @test all(isapprox.(H[16:55],ones(40),rtol=1e-4)) +end + +@testset "Bit pair count" begin + for T in (Float16,Float32,Float64) + N = 10_000 + A = rand(T,N) + C1 = bitpair_count(A,A) # count bitpairs with 2 equiv arrays + C2 = bitcount(A) # compare to bits counted in that array + + nbits = 8*sizeof(T) + for i in 1:nbits + @test C1[i,1,2] == 0 # no 01 pair for bitpair_count(A,A) + @test C1[i,2,1] == 0 # no 10 + @test C1[i,1,1] + C1[i,2,2] == N # 00 + 11 are all cases = N + @test C1[i,2,2] == C2[i] # 11 is the same as bitcount(A) + end + + Auint = reinterpret(Base.uinttype(T),A) + Buint = .~Auint # flip all bits in A + + C3 = bitpair_count(Auint,Auint) + @test C1 == C3 # same when called with uints + + C4 = bitpair_count(Auint,Buint) + for i in 1:nbits + @test C4[i,1,2] + C4[i,2,1] == N # 01, 10 are all cases = N + @test C1[i,1,1] == C4[i,1,2] # 00 before is now 01 + @test C1[i,2,2] == C4[i,2,1] # 11 before is now 10 + @test C4[i,1,1] == 0 # no 00 + @test C4[i,2,2] == 0 # no 11 + end + end end \ No newline at end of file From 48eaa5ae887a29d668031ccfbb83cf330171604e Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 14:16:20 +0000 Subject: [PATCH 12/20] tests for (mutual) inf / redudancy updated --- test/information.jl | 184 +++++++++++++++++++++++----------------- test/round_nearest.jl | 13 --- test/shave_set_groom.jl | 2 - 3 files changed, 105 insertions(+), 94 deletions(-) diff --git a/test/information.jl b/test/information.jl index c657d43..822cfca 100644 --- a/test/information.jl +++ b/test/information.jl @@ -1,37 +1,48 @@ @testset "Bitinformation random" begin - N = 100_000 - A = rand(UInt64,N) - @test all(bitinformation(A) .< 1e-3) - - A = rand(Float32,N) - bi = bitinformation(A) - @test all(bi[1:4] .== 0.0) # the first bits are always 0011 - sort!(A) - @test sum(bitinformation(A)) > 12 -end + N = 10_000 + + for T in (UInt8,UInt16,UInt32,UInt64) + A = rand(T,N) + @test all(bitinformation(A,set_zero_insignificant=false) .< 1e-3) + end -@testset "Bitinformation in chunks" begin - N = 1000 - Nhalf = N ÷ 2 - A = rand(UInt32,N) - n11,npair1,N1 = bitcount(A[1:Nhalf-1]),bitpaircount(A[1:Nhalf]),Nhalf - n12,npair2,N2 = bitcount(A[Nhalf:end-1]),bitpaircount(A[Nhalf:end]),Nhalf-1 - @test bitinformation(n11+n12,npair1+npair2,N1+N2) == bitinformation(A) + for T in (Float16,Float32,Float64) + A = rand(T,N) + + # increase confidence to filter out more information for reliable tests... + # i.e. lower the risk of false positives + bi = bitinformation(A,confidence=0.9999) + + # no bit should contain information, insignificant + # information should be filtered out + @test all(bi .== 0.0) + + sort!(A) # introduce some information via sorting + @test sum(bitinformation(A)) > 9 # some bits of information (guessed) + end end @testset "Bitinformation dimensions" begin - A = rand(Float32,30,40,50) - @test bitinformation(A) == bitinformation(A,dims=1) - - # check that the :all_dimensions flag is - bi1 = bitinformation(A,dims=1) - bi2 = bitinformation(A,dims=2) - bi3 = bitinformation(A,dims=3) - @test bitinformation(A,:all_dimensions) == ((bi1 .+ bi2 .+ bi3)/3) - - # check that there is indeed more information in the sorted dimensions - sort!(A,dims=2) - @test sum(bitinformation(A,dims=1)) < sum(bitinformation(A,dims=2)) + + for T in (Float16,Float32,Float64) + A = rand(T,30,40,50) + @test bitinformation(A) == bitinformation(A,dim=1) + + bi1 = bitinformation(A,dim=1) + bi2 = bitinformation(A,dim=2) + bi3 = bitinformation(A,dim=3) + + nbits = 8*sizeof(T) + for i in 1:nbits + @test bi1[i] ≈ bi2[i] atol=1e-3 + @test bi2[i] ≈ bi3[i] atol=1e-3 + @test bi1[i] ≈ bi3[i] atol=1e-3 + end + + # check that there is indeed more information in the sorted dimensions + sort!(A,dims=2) + @test sum(bitinformation(A,dim=1)) < sum(bitinformation(A,dim=2)) + end end @testset "Mutual information" begin @@ -50,8 +61,8 @@ end @test 1.0 == mutual_information([0.0 0.5;0.5 0.0]) # two independent arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] - mutinf = bitinformation(rand(T,N),rand(T,N)) + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) + mutinf = mutual_information(rand(T,N),rand(T,N)) for m in mutinf @test isapprox(0,m,atol=1e-3) end @@ -62,85 +73,100 @@ end # but so in practice slightly lower for rand(UInt), # or clearly lower for low entropy bits in Float16/32/64 # but identical to the bitcount_entropy (up to rounding errors) - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) R = rand(T,N) - mutinf = bitinformation(R,R) + mutinf = mutual_information(R,R) @test mutinf ≈ bitcount_entropy(R) end end @testset "Redundancy" begin - N = 100_000 + N = 10_000 # No redundancy in independent arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64) redun = redundancy(rand(T,N),rand(T,N)) for r in redun @test isapprox(0,r,atol=2e-3) end end + # no redundancy in the mantissa bits of rand + for T in (Float16,Float32,Float64) + redun = redundancy(rand(T,N),rand(T,N)) + for r in redun[end-Base.significand_bits(T):end] + @test isapprox(0,r,atol=2e-3) + end + end + # Full redundancy in identical arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) A = rand(T,N) H = bitcount_entropy(A) R = redundancy(A,A) - for (r,h) in zip(R,H) - if iszero(h) - @test iszero(r) - else - @test isapprox(1,r,atol=2e-3) - end + for r in R + @test r ≈ 1 atol=1e-3 end end -end -@testset "Mutual information with round to nearest" begin - N = 100_000 + # Some artificially introduced redundancy PART I + for T in (UInt8,UInt16,UInt32,UInt64) + + A = rand(T,N) + B = copy(A) # B is A on every second entry + B[1:2:end] .= rand(T,N÷2) # otherwise independent - # compare shaving to round to nearest - # for round to nearest take m more bits into account for the - # joint probability - m = 8 + # joint prob mass is therefore [0.375 0.125; 0.125 0.375] + # at 50% bits are identical, at 50% they are independent + # = 25% same, 25% opposite + p = mutual_information([0.375 0.125;0.125 0.375]) - for T in [Float32,Float64] - R = rand(T,N) - for keepbit in [5,10,15] - mutinf_shave = bitinformation(R,shave(R,keepbit)) - mutinf_round = bitinformation(R,round(R,keepbit),m) - for (s,r) in zip(mutinf_shave,mutinf_round) - @test isapprox(s,r,atol=1e-2) - end + R = redundancy(A,B) + for r in R + @test r ≈ p rtol=1e-1 end end - # shaving shouldn't change - for T in [Float32,Float64] - R = rand(T,N) - for keepbit in [5,10,15] - mutinf_shave = bitinformation(R,shave(R,keepbit)) - mutinf_round = bitinformation(R,shave(R,keepbit),m) - for (s,r) in zip(mutinf_shave,mutinf_round) - @test isapprox(s,r,atol=1e-2) - end + # Some artificially introduced redundancy PART II + for T in (UInt8,UInt16,UInt32,UInt64) + + A = rand(T,N) + B = copy(A) # B is A on every fourth entry + B[1:4:end] .= rand(T,N÷4) # otherwise independent + + # joint prob mass is therefore [0.4375 0.0625; 0.0625 0.4375] + # at 75% bits are identical, at 25% they are independent + # = 12.5% same, 12.5% opposite + p = mutual_information([0.4375 0.0625;0.0625 0.4375]) + + R = redundancy(A,B) + for r in R + @test r ≈ p rtol=1e-1 end end end -@testset "Information of random set to zero" begin - for T in (UInt32,UInt64,Float32,Float64) - for N in [10_000,100_000] - A = rand(T,N) - # increase confidence here from the default 0.99 to avoid test failures - # from false positives - b = bitinformation(A,confidence=0.9999) - for ib in b - @test 0 == ib - end +@testset "Mutual information with shave/round" begin + N = 10_000 - m = bitinformation(A[1:end-1],A[2:end],confidence=0.9999) - for im in m - @test 0 == im + for T in (Float16,Float32,Float64) + R = randn(T,N) + for keepbit in [5,10,15] + + Rshaved = shave(R,keepbit) + mutinf_shave = mutual_information(R,Rshaved) + H_R = bitcount_entropy(R) + H_Rs = bitcount_entropy(Rshaved) + + for (i,(s,hr,hrs)) in enumerate(zip(mutinf_shave,H_R,H_Rs)) + if i <= (1+Base.exponent_bits(T)+keepbit) + @test isapprox(s,hr,atol=1e-2) + @test isapprox(s,hrs,atol=1e-2) + else + @test s == 0 + @test hr > 0 + @test hrs == 0 + end end end end diff --git a/test/round_nearest.jl b/test/round_nearest.jl index 926a192..ac5f189 100644 --- a/test/round_nearest.jl +++ b/test/round_nearest.jl @@ -1,5 +1,3 @@ -using Test - @testset "iseven isodd" begin # check sign bits @test iseven(1f0,-8) @@ -114,17 +112,6 @@ end x = reinterpret(Float32,x) @test 1f0 == round(x,k) end - - # for T in [Float16,Float32,Float64] - # for k in 1:20 - # x = randn(T) - # xr1 = round(x,k+1) - # xr = round(xr1,k) - - # if iseven(xr,k) - - # end - # end end @testset "Round to nearest?" begin diff --git a/test/shave_set_groom.jl b/test/shave_set_groom.jl index a0dc3d5..586e180 100644 --- a/test/shave_set_groom.jl +++ b/test/shave_set_groom.jl @@ -1,5 +1,3 @@ -using Test - @testset "Zero shaves to zero" begin for T in [Float16,Float32,Float64] for k in -5:50 From 9587c5ff1e091c39931b887a1a473d38303e667a Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 14:24:43 +0000 Subject: [PATCH 13/20] whichUInt removed --- src/xor_delta.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/xor_delta.jl b/src/xor_delta.jl index 3fc9ebd..6084cc6 100644 --- a/src/xor_delta.jl +++ b/src/xor_delta.jl @@ -3,10 +3,10 @@ first element is left unchanged. E.g. [0b0011,0b0010] -> [0b0011,0b0001] """ function xor_delta!(A::Array{T,1}) where {T<:Unsigned} a = A[1] - @inbounds for i in 2:length(A) + @inbounds for i in 2:length(A) # skip first element b = A[i] - A[i] = a ⊻ b - a = b + A[i] = a ⊻ b # XOR with prev element + a = b # make next (un-XORed) element prev for next iteration end end @@ -14,9 +14,9 @@ end E.g. [0b0011,0b0001] -> [0b0011,0b0010] """ function unxor_delta!(A::Array{T,1}) where {T<:Unsigned} a = A[1] - @inbounds for i in 2:length(A) + @inbounds for i in 2:length(A) # skip first element b = A[i] - a = a ⊻ b + a = a ⊻ b # un-XOR and store un-XORed a for next iteration A[i] = a end end @@ -58,8 +58,8 @@ end """Bitwise XOR delta. Elements include A are XORed with the previous one. The first element is left unchanged. E.g. [0b0011,0b0010] -> [0b0011,0b0001]. """ -xor_delta(A::Array{T,1}) where {T<:AbstractFloat} = xor_delta(whichUInt(T),A) +xor_delta(A::Array{T,1}) where {T<:AbstractFloat} = xor_delta(Base.uinttype(T),A) """Undo bitwise XOR delta. Elements include A are XORed again to reverse xor_delta. E.g. [0b0011,0b0001] -> [0b0011,0b0010] """ -unxor_delta(A::Array{T,1}) where {T<:AbstractFloat} = unxor_delta(whichUInt(T),A) +unxor_delta(A::Array{T,1}) where {T<:AbstractFloat} = unxor_delta(Base.uinttype(T),A) From da052e1193306c63c4672a2e5eebb0cd647d1bac Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 14:29:59 +0000 Subject: [PATCH 14/20] README updated --- README.md | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 3a7fa61..5fec3d8 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,15 @@ [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://milankl.github.io/BitInformation.jl/dev) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4774191.svg)](https://doi.org/10.5281/zenodo.4774191) -BitInformation.jl is a package for the analysis of bitwise information in Julia Arrays. -Based on counting the occurrences of bits in floats (Float16/32/64 or generally any bittype) -across various dimensions of an array, this package provides functions to calculate quantities -like the bitwise real information content, the mutual information, the redundancy or preserved -information between arrays. - -BitInformation.jl also implements various rounding modes (round,shave,set_one, etc.) -efficiently with bitwise operations. `round(x,i)` implements IEEE's round to nearest tie to even -for any float retaining `i` mantissa bits. Furthermore, transormations like XOR-delta, bittranspose, -or signed_exponent are implemented. +BitInformation.jl is a package for bitwise information analysis and manipulation in Julia arrays. +Based on counting the occurrences of bits in floats (or generally any bits type) across various dimensions, +this package calculates quantities like the bitwise real information content, the mutual information, the +redundancy or preserved information between arrays. + +For bitwise manipulation, BitInformation.jl also implements various rounding modes (IEEE round,shave,set_one, etc.) +efficiently with bitwise operations for any number of bits. E.g. `round(x,i)` implements IEEE's round to nearest +tie-to-even for any float retaining `i` mantissa bits. Furthermore, transormations like XOR-delta, bittranspose +(aka bit shuffle), or signed/biased exponents are implemented. If you'd like to propose changes, or contribute in any form raise an issue, create a [pull request](https://github.com/milankl/BitInformation.jl/pulls) From cdffcff97a7e395a52e1ec7478e94627cb2d4a2a Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 14:49:05 +0000 Subject: [PATCH 15/20] test tolerance increased --- test/shave_set_groom.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/shave_set_groom.jl b/test/shave_set_groom.jl index 586e180..bfaa9db 100644 --- a/test/shave_set_groom.jl +++ b/test/shave_set_groom.jl @@ -55,9 +55,9 @@ end end end -@testset "Approx equal for keepbits=5,10,25" begin +@testset "Approx equal for keepbits=8,15,35" begin for (T,k) in zip([Float16,Float32,Float64], - [6,11,26]) + [8,15,35]) A = rand(T,200,300) Ar = shave(A,k) @test A ≈ Ar From 312d21c20fad062df1fc6fcf4efcd4472fc162f9 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 17:03:08 +0000 Subject: [PATCH 16/20] docs updated --- .github/workflows/Documenter.yml | 2 +- Project.toml | 2 +- docs/Project.toml | 7 +- docs/make.jl | 2 +- docs/src/bitinformation.md | 226 +++++++++++-------------------- docs/src/functions.md | 65 ++++++++- docs/src/index.md | 8 +- docs/src/rounding.md | 12 ++ docs/src/transformations.md | 33 ++++- 9 files changed, 182 insertions(+), 175 deletions(-) diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 22f3365..f5b7a2f 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1.6' + version: '1.7' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy diff --git a/Project.toml b/Project.toml index bd1666c..f28c683 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BitInformation" uuid = "de688a37-743e-4ac2-a6f0-bd62414d1aa7" authors = ["Milan and contributors"] -version = "0.3.0" +version = "0.4.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/docs/Project.toml b/docs/Project.toml index dd8d0ec..a9d839f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,10 +1,5 @@ [deps] -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Documenter = "0.26" -Distributions = "0.24, 0.25" -StatsBase = "0.32, 0.33" \ No newline at end of file +Documenter = "0.26, 0.27" \ No newline at end of file diff --git a/docs/make.jl b/docs/make.jl index 1d867b0..4b332e4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,7 +2,7 @@ using Documenter, BitInformation makedocs( format = Documenter.HTML( - prettyurls = get(ENV, "CI", nothing) == "true"), + prettyurls = get(ENV, "CI", nothing) == "true"), sitename="BitInformation.jl", authors="M Klöwer", modules=[BitInformation], diff --git a/docs/src/bitinformation.md b/docs/src/bitinformation.md index f77d910..c7d9e4c 100644 --- a/docs/src/bitinformation.md +++ b/docs/src/bitinformation.md @@ -1,34 +1,36 @@ # Bitwise information content analysis -## Bitpattern entropy +## Bit pattern entropy -An $n$-bit number format has bitpatterns available to encode a real number. -For most data arrays, not all bitpatterns are used at uniform probability. -The bitpattern entropy is the +An $n$-bit number format has $2^n$ bit patterns available to encode a real number. +For most data arrays, not all bit pattern occur at equal probability. +The bit pattern entropy is the [Shannon information entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) -$H$, in units of bits, calculated from the probability $p_i$ of each bitpattern +$H$, in units of bits, calculated from the probability $p_i$ of each bit pattern ```math H = -\sum_{i=1}^{2^n}p_i \log_2(p_i) ``` -The bitpattern entropy is $H \leq n$ and maximised to $n$ bits for a uniform distribution. -The free entropy is the difference $n-H$. +The bitpattern entropy is $H \leq n$ and maximised to $n$ bits for a uniform distribution, +i.e. all bit pattern occur equally frequent. The free entropy $H_f$ is the difference $n-H$. -In BitInformation.jl, the bitpattern entropy is calculated via `bitpattern_entropy(::Array)` +In BitInformation.jl, the bitpattern entropy is calculated via `bitpattern_entropy(::AbstractArray)` ```julia -julia> A = rand(Float32,100000000); +julia> A = rand(Float32,100000000) .+ 1; julia> bitpattern_entropy(A) 22.938590744784577 ``` Here, the entropy is about 23 bit, meaning that `9` bits are effectively unused. -This is because `rand` samples in `[1,2)`, so that the sign and exponent bits are -always `0 01111111` followed by some random significant bits. +This is because all elements of A are in `[1,2)`, so that the sign and exponent bits are +always `0 01111111` followed by 23 random significant bits, each contributing about 1 bit. -The function `bitpattern_entropy` is based on sorting the array `A`. While this -avoids the allocation of a bitpattern histogram (which would make the function -unsuitable for anything larger than 32 bits) it has to allocate a sorted version of `A`. +!!! note "Implementation details" + The function `bitpattern_entropy` is based on sorting the array `A`. While this + avoids the allocation of a bitpattern histogram (which would make the function + unsuitable for anything larger than 32 bits) it has to allocate a sorted version of `A`. + If you don't mind the sorting in-place use `bitpattern_entropy!(::AbstractArray)`. ## Bit count entropy @@ -39,11 +41,12 @@ i.e. a sequence of bits of length $l$, the form H(b) = -p_0 \log_2(p_0) - p_1\log_2(p_1) ``` -with $p_0,p_1$ being the probability of a bit $b_k$ in $b$ being 0 or 1. The entropy is maximised to 1 bit for equal probabilities $p_0 = p_1 = \tfrac{1}{2}$ in $b$. The function `bitcount(A::Array)` +with $p_0,p_1$ being the probability of a bit $b_k$ in $b$ being 0 or 1. The entropy is maximised to +1 bit for equal probabilities $p_0 = p_1 = \tfrac{1}{2}$ in $b$. The function `bitcount(A::Array)` counts all occurences of the 1-bit in every bit-position in every element of `A`. E.g. ```julia -julia> bitstring.(A) # 5-elemenet Vector{UInt8} +julia> bitstring.(A) # 5-element Vector{UInt8} 5-element Array{String,1}: "10001111" "00010111" @@ -63,8 +66,8 @@ julia> bitcount(A) 3 ``` -The first bit of elements (here: `UInt8`) in `A` are 4 times `1` and so 1 times -`0`, etc. In contrast, elements drawn from a uniform distribution U(0,1) +The first bit of elements (here: `UInt8`) in `A` is 4x `1` and hence 1x `0`. +In contrast, elements drawn from a uniform distribution U(0,1) ```julia julia> A = rand(Float32,100000); @@ -89,7 +92,7 @@ Once the bits in an array are counted, the respective probabilities $p_0,p_1$ ca and the entropy derived. The function `bitcount_entropy(A::Array)` does that ```julia julia> A = rand(UInt8,100000); # entirely random bits -julia> Elefridge.bitcountentropy(A) +julia> bitcount_entropy(A) 8-element Array{Float64,1}: # entropy is for every bit position ≈ 1 0.9999998727542938 0.9999952725717266 @@ -104,14 +107,14 @@ This converges to 1 for larger arrays. ## Bit pair count -The `bitpaircount(A::Array)` function returns a `4xn` (with `n` -being the number of bits in every element of `A`) array, the counts the occurrences -of `00`,`01`,`10`,`11` for all bit-positions in `a in A` across all elements `a` in `A`. -For a length `N` of array `A` (one or multi-dimensional) the maximum occurrence -is `N-1`. E.g. +The `bitpair_count(A::AbstractArray,B::AbstractArray)` function returns a `nx2x2` array, +with `n` being the number of bits elements of `A,B`. The array contains counts the +occurrences of bit pairs between A,B, i.e. `00`,`01`,`10`,`11` for all bit positions. +E.g. ```julia -julia> A = rand(UInt8,5); +julia> A = rand(UInt8,5) +julia> B = rand(UInt8,5) julia> bitstring.(A) 5-element Array{String,1}: "01000010" @@ -120,82 +123,15 @@ julia> bitstring.(A) "01111111" "00010100" -julia> bitpaircount(A) -4×8 Array{Int64,2}: - 2 0 0 0 2 0 0 2 # occurences of `00` in the n-bits of UInt8 - 1 0 2 1 1 1 0 1 # occurences of `01` - 1 1 2 0 1 0 1 1 # occurences of `10` - 0 3 0 3 0 3 3 0 # occurences of `11` +julia> C = bitpair_count(A,B) +julia> C[1,:,:] # bitpairs of A,B in 1st bit position +2×2 Matrix{Int64}: + 2 1 + 1 1 ``` -The first bit of elements in `A` is as a sequence `01000`. Consequently, -`00` occurs 2x, `01` and `10` once each, and `11` does not occur. -Multi-dimensional arrays are unravelled into a vector, following Julia's -memory layout (column-major). - -## Bit conditional entropy - -Based on `bitpaircount` we can calculate the conditional -entropy of the state of one bit given the state of the previous bit (previous -meaning in the same bit position but in the previous element in the array `A`). -In the previous example we obtain -```julia -julia> bit_condprobability(A) -4×8 Array{Float64,2}: - 0.666667 NaN 0.0 0.0 0.666667 0.0 NaN 0.666667 - 0.333333 NaN 1.0 1.0 0.333333 1.0 NaN 0.333333 - 1.0 0.25 1.0 0.0 1.0 0.0 0.25 1.0 - 0.0 0.75 0.0 1.0 0.0 1.0 0.75 0.0 -``` -Given the previous bit being `0` there is a 2/3 chance that th next bit is a `0` -too, and a 1/3 change that the next bit is a `1`, i.e. -$p_{00} = p(\text{next}=0|\text{previous}=0) = 2/3$, and -$p_{10} = p(1|0) = \tfrac{1}{3}$, such that $p(0|0)+p(1|0)=1$ always (which are -the marginal probabilities from below), if not `NaN`, -and similarly for $p(0|1)$ and $p(1|1)$. - -The conditional entropies $H_0,H_1$ are conditioned on the state of the -previous bit $b_{j-1}$ being 0 or 1 - -```math -\begin{aligned} -H_0 &= -p_{00}\log_2(p_{00}) - p_{01}\log_2(p_[01]) \\ -H_1 &= -p_{10}\log_2(p_{10}) - p_{11}\log_2(p_[11]) \\ -\end{aligned} -``` - -The conditional entropy is maximised to 1 bit for bitstreams where the probability -of a bit being 0 or 1 does not depend on the state of the previous bit, which is -here defined as _false information_. - -```julia -julia> r = rand(Float32,100_000) -julia> H₀,H₁ = bit_condentropy(r) -julia> H₀ -32-element Vector{Float64}: - 0.0 - 0.0 - 0.0 - 0.0 - 0.0 - 0.07559467763419292 - 0.5214998930042997 - 0.9684130809383832 - ⋮ - 0.9997866754890564 - 0.999747731180627 - 0.999438123786493 - 0.9968145441905049 - 0.9878425610244357 - 0.9528602299665989 - 0.8124289058679582 - 0.0 -``` - -Sign and the first exponent bits have 0 conditional entropy, which increases to 1 bit for the fully -random significant bits. The last significant bits have lower conditional entropy due to shortcomings -in the pseudo random generator in `rand`, see a discussion -[here](https://sunoru.github.io/RandomNumbers.jl/dev/man/basics/#Conversion-to-Float). +Here, among the 5 element pairs in `A,B` there are 2 which both have a `0` in the first bit +position. One element pair is `01`, one `10` and one `11`. ## Mutual information @@ -217,12 +153,12 @@ $r,s$ is then M(r,s) = \sum_{r=0}^1 \sum_{s=0}^1 p_{rs} \log_2 \left( \frac{p_{rs}}{p_{r=r}p_{s=s}}\right) ``` -The function `bitinformation(::Array{T,N},::Array{T,N})` calculates $M$ as +The function `mutual_information(::AbstractArray,::AbstractArray)` calculates $M$ as ```julia julia> r = rand(Float32,100_000) # [0,1) random float32s julia> s = shave(r,15) # remove information in sbit 16-23 by setting to 0 -julia> bitinformation(r,s) +julia> mutual_information(r,s) 32-element Vector{Float64}: 0.0 ⋮ @@ -239,6 +175,9 @@ julia> bitinformation(r,s) 0.0 0.0 # sbit 23 ``` +The mutual information approaches 1 bit for unchaged bits between `r,s`, but bits that have +been shaved off do not contain any information, hence the mutual information also drops +as there is no entropy to start with. ## Real bitwise information @@ -253,14 +192,14 @@ bits also as I = H - q_0H_0 - q_1H_1 ``` -which is the real information content $I$. This definition is similar to Jeffress et al. (2017) [1], +which is the real information content $I$. This definition is similar to Jeffress et al. (2017) [^1], but avoids an additional assumption of an uncertainty measure. This defines the real information -as the entropy minus the false information. For bitstreams with either $p_0 = 1$ or $p_1 = 1$, +as the entropy minus the false information. For bitstreams with either $p_0 = 1$ or $p_1 = 1$, i.e. all bits are either 0 or 1, the entropies are zero $H = H_0 = H_1 = 0$ and we may refer to the bits in the bitstream as being unused. In the case where $H > p_0H_0 + p_1H_1$, the preceding bit is a predictor for the succeeding bit which means that the bitstream contains real information ($I > 0$). -The computation of $I$ is implemented in `bitinformation(::Array)` +The computation of $I$ is implemented in `bitinformation(::AbstractArray)` ```julia julia> A = rand(UInt8,1000000) # fully random bits @@ -314,14 +253,11 @@ in the last significant bit (flips randomly). The information is maximised to of such a bit one can expect the next (or previous) bit to be the same due to the correlation. -[1] Jeffress, S., Düben, P. & Palmer, T. _Bitwise efficiency in chaotic models_. -*Proc. R. Soc. Math. Phys. Eng. Sci.* 473, 20170144 (2017). - ## Multi-dimensional real information The real information content $I_m$ for an $m$-dimensional array $A$ is the sum of the real information along the dimensions. Let $b_j$ be a bitstream obtained by unravelling -a given bitposition in along its $j$-th dimension. Although the unconditional entropy $H$ +a given bitposition in along its $j$-th dimension. Although the unconditional entropy $H$ is unchanged along the $m$-dimensions, the conditional entropies $H_0,H_1$ change as the preceding and succeeding bit is found in another dimension, e.g. $b_2$ is obtained by re-ordering $b_1$. Normalization by $\tfrac{1}{m}$ is applied to $I_m$ such that the @@ -331,49 +267,34 @@ maximum information is 1 bit in $I_m^*$ I_m^* = H - \frac{p_0}{m}\sum_{j=1}^mH_0(b_j) - \frac{p_1}{m}\sum_{j=1}^mH_1(b_j) ``` -This is implemented in BitInformation.jl as `bitinformation(::Array{T,N},:all_dimensions)`, e.g. +This is implemented in BitInformation.jl as `bitinformation(::AbstractArray,dim::int)`, e.g. ```julia julia> A = rand(Float32,100,200,300) # a 3D array -julia> sort!(A,dims=1) # sort to create some auto-corelation -julia> bitinformation(A,:all_dimensions) +julia> sort!(A,dims=2) # sort to create some auto-corelation +julia> bitinformation(A,dim=2) # information along 2nd dimension 32-element Vector{Float64}: + 0.9284529526801016 + 0.12117292341467487 + 0.12117292341467487 + 0.12117292341467487 + 0.12097216822084497 + 0.11085252207817117 + 0.31618374026691876 + 0.5342647349810241 + 0.44490628938885407 + 0.2420859244356707 + 0.08943278228929732 + 0.01870979798293037 + 0.00221913365831955 0.0 0.0 - 0.0 - 0.0 - 6.447635701154324e-7 - 0.014292670110681693 - 0.31991625275425073 - 0.5440816091704278 - 0.36657938793446365 - 0.2533186226597825 - 0.13051374121057438 ⋮ 0.0 0.0 - 8.687738656254496e-7 - 6.251449893598012e-6 - 5.0038146715651134e-5 - 0.0003054976017733783 - 0.0015377166906772044 - 0.006581160530812665 - 0.022911924843179426 - 0.06155167545633838 - 0.0 ``` - -which is equivalent to - -```julia -julia> bitinformation(A,:all_dimensions) ≈ 1/3*(bitinformation(A,dims=1)+ - bitinformation(A,dims=2)+ - bitinformation(A,dims=3)) -true -``` - -The keyword `dims` will permute the dimensions in `A` to calcualte the information -in the specified dimensions. By default `dims=1`, which uses the ordering of the bits +The keyword `dim` will permute the dimensions in `A` to calcualte the information +in the specified dimensions. By default `dim=1`, which uses the ordering of the bits as they are layed out in memory. ## Redundancy @@ -386,8 +307,8 @@ R(r,s) = \frac{2M(r,s)}{H(r) + H(s)} `R` is the redundancy of information of $r$ in $s$ (and vice versa). $R = 1$ for identical bitstreams $r = s$, but $R = 0$ for statistically independent bitstreams. -BitInformation.jl implements the redundancy calculation via `redundancy(::Array{T,N},::Array{T,N})` -where the inputs have to be of same size and element type `T`. For example, shaving off some of +BitInformation.jl implements the redundancy calculation via `redundancy(::AbstractArray,::AbstractArray)` +where the inputs have to be of same size and element type. For example, shaving off some of the last significant bits will set the redundancy for those to 0, but redundancy is 1 for all bitstreams which are identical @@ -396,11 +317,11 @@ julia> r = rand(Float32,100_000) # random data julia> s = shave(r,7) # keep only sign, exp and sbits 1-7 julia> redundancy(r,s) 32-element Vector{Float64}: - 0.0 # 0 redundancy as entropy = 0 - 0.0 - 0.0 - 0.0 - 0.9999999999993566 # redundancy 1 as bitstreams are identical + 1.0 # redundancy 1 as bitstreams are identical + 1.0 + 1.0 + 1.0 + 0.9999999999993566 # rounding errors can yield ≈1 0.9999999999999962 1.0 1.0000000000000002 @@ -418,7 +339,7 @@ julia> redundancy(r,s) ## Preserved information The preserved information $P(r,s)$ between two bitstreams $r,s$ where $s$ approximates $r$ -is the redundancy-weighted real information $I$ +is the information-weighted redundancy $I$ ```math P(r,s) = R(r,s)I(r) @@ -486,4 +407,9 @@ set the real information $I = 0$. The calculation of $H_f$ is implemented as ```julia julia> BitInformation.binom_free_entropy(10_000_000,0.99) 4.786066739592698e-7 -``` \ No newline at end of file +``` + +## References + +[^1]: Jeffress, S., Düben, P. & Palmer, T. _Bitwise efficiency in chaotic models_. *Proc. R. Soc. Math. Phys. Eng. Sci.* 473, 20170144 (2017). + diff --git a/docs/src/functions.md b/docs/src/functions.md index 515b35e..8972cd8 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -1,5 +1,23 @@ # Index of functions in BitInformation.jl +### Information + +```@docs +BitInformation.bitinformation(::AbstractArray) +BitInformation.mutual_information(::AbstractArray,::AbstractArray) +BitInformation.mutual_information(::Matrix) +BitInformation.redundancy(::AbstractArray,::AbstractArray) +``` + +### Bit counting and entropy + +```@docs +BitInformation.bitpattern_entropy(::AbstractArray) +BitInformation.bitcount(::AbstractArray) +BitInformation.bitcount_entropy(::AbstractArray) +BitInformation.bitpair_count(::AbstractArray,::AbstractArray) +``` + ### Significance of information ```@docs @@ -10,10 +28,43 @@ BitInformation.binom_free_entropy(::Int,::Real) ### Transformations ```@docs -bittranspose(::AbstractArray) -bitbacktranspose(::AbstractArray) -xor_delta(::Array{AbstractFloat,1}) -unxor_delta(::Array{AbstractFloat,1}) -signed_exponent(::Array{Float32}) -signed_exponent!(::Array{Float32}) -``` \ No newline at end of file +BitInformation.bittranspose(::AbstractArray) +BitInformation.bitbacktranspose(::AbstractArray) +BitInformation.xor_delta(::AbstractArray) +BitInformation.unxor_delta(::AbstractArray) +BitInformation.signed_exponent(::AbstractArray) +BitInformation.signed_exponent!(::AbstractArray) +BitInformation.biased_exponent(::AbstractArray) +BitInformation.biased_exponent!(::AbstractArray) +``` + +### Rounding + +```@docs +Base.round(::Base.IEEEFloat,::Integer) +BitInformation.round! +BitInformation.get_shift +BitInformation.get_ulp_half +BitInformation.get_keep_mask +BitInformation.get_bit_mask +Base.iseven(::Base.IEEEFloat,::Integer) +Base.isodd(::Base.IEEEFloat,::Integer) +``` + +### Shaving, halfshaving, setting and bit grooming + +```@docs +BitInformation.shave +BitInformation.halfshave +BitInformation.set_one +BitInformation.groom! +BitInformation.nsb +``` + +### Printing and BitArray conversion + +```@docs +Base.bitstring(::Base.IEEEFloat,::Symbol) +Base.BitArray(::AbstractArray) +``` + diff --git a/docs/src/index.md b/docs/src/index.md index 13baeba..6d7186b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,8 @@ ## Overview -[BitInformation.jl](https://github.com/milankl/BitInformation.jl) is a library for the analysis of bitwise information in n-dimensional Julia arrays. +[BitInformation.jl](https://github.com/milankl/BitInformation.jl) is a library for the analysis of bitwise information and +bitwise manipulation in n-dimensional Julia arrays. ## Installation @@ -14,8 +15,9 @@ where `]` opens the package manager. The latest version is automatically install ## Developers -BitInformation.jl is currently developed by [Milan Klöwer](https://github.com/milankl), any contributions are always welcome. +BitInformation.jl is currently developed by [Milan Klöwer](https://github.com/milankl). Any contributions are always welcome. ## Funding -This project is funded by the [Copernicus Programme](https://www.copernicus.eu/en/copernicus-services/atmosphere) through the [ECMWF summer of weather code 2020 and 2021](https://esowc.ecmwf.int/). \ No newline at end of file +This project is funded by the [Copernicus Programme](https://www.copernicus.eu/en/copernicus-services/atmosphere) +through the [ECMWF summer of weather code 2020 and 2021](https://esowc.ecmwf.int/). \ No newline at end of file diff --git a/docs/src/rounding.md b/docs/src/rounding.md index 0ccfd8d..d00fb52 100644 --- a/docs/src/rounding.md +++ b/docs/src/rounding.md @@ -6,6 +6,18 @@ BitInformation.jl implements them efficiently with bitwise operations, in-place or by creating a copy of the original array. +!!! tip "Bitstring split into sign, exponent and mantissa bits" + BitInformation.jl extends `Base.bitstring` with a split option to better visualise sign, exponent + and mantissa bits for floats. + ```julia + julia> bitstring(1.1f0) + "00111111100011001100110011001101" + + julia> bitstring(1.1f0,:split) + "0 01111111 00011001100110011001101" + ``` + + ## Round to nearest With binary round to nearest a full-precision number is replaced by the nearest representable float diff --git a/docs/src/transformations.md b/docs/src/transformations.md index 0d6326c..36c1c1a 100644 --- a/docs/src/transformations.md +++ b/docs/src/transformations.md @@ -6,14 +6,18 @@ for lossless compression algorithms. !!! warning "Interpretation of transformed floats" BitInformation.jl will not store the information that a transformation was applied to a value. This means - that Julia will not know about this and interpret a value incorrectly. You will have to explicitly execute - the backtransform + that Julia will not know about this and interpret/print a value incorrectly. You will have to explicitly execute + the backtransform (and know which one!) to undo the transformation ```julia julia> A = [0f0,1f0] # 0 and 1 julia> At = bittranspose(A) # are transposed into 1f-35 and 0 2-element Vector{Float32}: - 1.0026967f-35 - 0.0 + 1.0026967f-35 + 0.0 + julia> bitbacktranspose(At) # reverse transpose + 2-element Vector{Float32}: + 0.0 + 1.0 ``` ## Bit transpose (aka shuffle) @@ -125,11 +129,10 @@ julia> bitstring.(Ax) ## Signed exponent -Floating-point numbers have a biased exponent. There are +IEEE Floating-point numbers have a biased exponent. There are [other ways to encode the exponent](https://en.wikipedia.org/wiki/Signed_number_representations#Comparison_table) and BitInformation.jl implements `signed_exponent` which transforms the exponent bits of a float into a representation where also the exponent has a sign bit (which is the first exponent bit) - ```julia julia> a = [0.5f0,1.5f0] # smaller than 1 (exp sign -1), larger than 1 (exp sign +1) julia> bitstring.(a,:split) @@ -142,4 +145,22 @@ julia> bitstring.(signed_exponent(a),:split) "0 10000001 00000000000000000000000" # signed exponent: sign=1, magnitude=1, i.e. 2^-1 "0 00000000 10000000000000000000000" # signed exponent: sign=0, magnitude=0, i.e. 2^0 ``` +The transformation `signed_exponent` can be undone with `biased_exponent` +```julia +julia> A = 0.5f0,1.5f0] +2-element Vector{Float32}: + 0.5 + 1.5 +julia> signed_exponent(A) +2-element Vector{Float32}: + 4.0 + 5.877472f-39 + +julia> biased_exponent(signed_exponent(A)) +2-element Vector{Float32}: + 0.5 + 1.5 +``` +Both `signed_exponent` and `biased_exponent` also exist as in-place functions +`signed_exponent!` and `biased_exponent!`. \ No newline at end of file From 13c4e28aad7e0f4c33b870f667b20b1f32c18f54 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 17:08:48 +0000 Subject: [PATCH 17/20] Error tolerances increased --- test/information.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/information.jl b/test/information.jl index 822cfca..750b278 100644 --- a/test/information.jl +++ b/test/information.jl @@ -123,7 +123,7 @@ end R = redundancy(A,B) for r in R - @test r ≈ p rtol=1e-1 + @test r ≈ p rtol=2e-1 end end @@ -141,7 +141,7 @@ end R = redundancy(A,B) for r in R - @test r ≈ p rtol=1e-1 + @test r ≈ p rtol=2e-1 end end end From 1dbf85c8933f9de835580d7d57d487ee853e73ee Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 17:17:08 +0000 Subject: [PATCH 18/20] function signatures for docs more specific --- docs/src/functions.md | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/src/functions.md b/docs/src/functions.md index 8972cd8..829b1e0 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -3,19 +3,19 @@ ### Information ```@docs -BitInformation.bitinformation(::AbstractArray) -BitInformation.mutual_information(::AbstractArray,::AbstractArray) +BitInformation.bitinformation(::Array) +BitInformation.mutual_information(::Array,::Array) BitInformation.mutual_information(::Matrix) -BitInformation.redundancy(::AbstractArray,::AbstractArray) +BitInformation.redundancy(::Array,::Array) ``` ### Bit counting and entropy ```@docs -BitInformation.bitpattern_entropy(::AbstractArray) -BitInformation.bitcount(::AbstractArray) -BitInformation.bitcount_entropy(::AbstractArray) -BitInformation.bitpair_count(::AbstractArray,::AbstractArray) +BitInformation.bitpattern_entropy(::Array) +BitInformation.bitcount(::Array) +BitInformation.bitcount_entropy(::Array) +BitInformation.bitpair_count(::Array,::Array) ``` ### Significance of information @@ -28,14 +28,14 @@ BitInformation.binom_free_entropy(::Int,::Real) ### Transformations ```@docs -BitInformation.bittranspose(::AbstractArray) -BitInformation.bitbacktranspose(::AbstractArray) -BitInformation.xor_delta(::AbstractArray) -BitInformation.unxor_delta(::AbstractArray) -BitInformation.signed_exponent(::AbstractArray) -BitInformation.signed_exponent!(::AbstractArray) -BitInformation.biased_exponent(::AbstractArray) -BitInformation.biased_exponent!(::AbstractArray) +BitInformation.bittranspose(::Array) +BitInformation.bitbacktranspose(::Array) +BitInformation.xor_delta(::Array) +BitInformation.unxor_delta(::Array) +BitInformation.signed_exponent(::Array) +BitInformation.signed_exponent!(::Array) +BitInformation.biased_exponent(::Array) +BitInformation.biased_exponent!(::Array) ``` ### Rounding @@ -65,6 +65,6 @@ BitInformation.nsb ```@docs Base.bitstring(::Base.IEEEFloat,::Symbol) -Base.BitArray(::AbstractArray) +Base.BitArray(::Array) ``` From ca96b9476040fbd2f4fc0311197a9f1adc786d54 Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 17:25:16 +0000 Subject: [PATCH 19/20] general function signatures for docs --- docs/src/functions.md | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/docs/src/functions.md b/docs/src/functions.md index 829b1e0..3d8694d 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -3,39 +3,39 @@ ### Information ```@docs -BitInformation.bitinformation(::Array) -BitInformation.mutual_information(::Array,::Array) -BitInformation.mutual_information(::Matrix) -BitInformation.redundancy(::Array,::Array) +BitInformation.bitinformation +BitInformation.mutual_information +BitInformation.mutual_information +BitInformation.redundancy ``` ### Bit counting and entropy ```@docs -BitInformation.bitpattern_entropy(::Array) -BitInformation.bitcount(::Array) -BitInformation.bitcount_entropy(::Array) -BitInformation.bitpair_count(::Array,::Array) +BitInformation.bitpattern_entropy +BitInformation.bitcount +BitInformation.bitcount_entropy +BitInformation.bitpair_count ``` ### Significance of information ```@docs -BitInformation.binom_confidence(::Int,::Real) -BitInformation.binom_free_entropy(::Int,::Real) +BitInformation.binom_confidence +BitInformation.binom_free_entropy ``` ### Transformations ```@docs -BitInformation.bittranspose(::Array) -BitInformation.bitbacktranspose(::Array) -BitInformation.xor_delta(::Array) -BitInformation.unxor_delta(::Array) -BitInformation.signed_exponent(::Array) -BitInformation.signed_exponent!(::Array) -BitInformation.biased_exponent(::Array) -BitInformation.biased_exponent!(::Array) +BitInformation.bittranspose +BitInformation.bitbacktranspose +BitInformation.xor_delta +BitInformation.unxor_delta +BitInformation.signed_exponent +BitInformation.signed_exponent! +BitInformation.biased_exponent +BitInformation.biased_exponent! ``` ### Rounding @@ -65,6 +65,6 @@ BitInformation.nsb ```@docs Base.bitstring(::Base.IEEEFloat,::Symbol) -Base.BitArray(::Array) +Base.BitArray(::Matrix) ``` From d056c2cecf77de0b793bea620eb16f8ec89b09aa Mon Sep 17 00:00:00 2001 From: Milan Date: Thu, 3 Mar 2022 17:37:12 +0000 Subject: [PATCH 20/20] more doc strings --- docs/src/functions.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/src/functions.md b/docs/src/functions.md index 3d8694d..848eaf3 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -5,7 +5,6 @@ ```@docs BitInformation.bitinformation BitInformation.mutual_information -BitInformation.mutual_information BitInformation.redundancy ``` @@ -13,6 +12,7 @@ BitInformation.redundancy ```@docs BitInformation.bitpattern_entropy +BitInformation.bitpattern_entropy! BitInformation.bitcount BitInformation.bitcount_entropy BitInformation.bitpair_count @@ -31,7 +31,9 @@ BitInformation.binom_free_entropy BitInformation.bittranspose BitInformation.bitbacktranspose BitInformation.xor_delta +BitInformation.xor_delta! BitInformation.unxor_delta +BitInformation.unxor_delta! BitInformation.signed_exponent BitInformation.signed_exponent! BitInformation.biased_exponent @@ -55,8 +57,11 @@ Base.isodd(::Base.IEEEFloat,::Integer) ```@docs BitInformation.shave +BitInformation.shave! BitInformation.halfshave +BitInformation.halfshave! BitInformation.set_one +BitInformation.set_one! BitInformation.groom! BitInformation.nsb ```