diff --git a/.github/workflows/Documenter.yml b/.github/workflows/Documenter.yml index 22f3365..f5b7a2f 100644 --- a/.github/workflows/Documenter.yml +++ b/.github/workflows/Documenter.yml @@ -14,7 +14,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: '1.6' + version: '1.7' - name: Install dependencies run: julia --project=docs/ -e 'using Pkg; Pkg.develop(PackageSpec(path=pwd())); Pkg.instantiate()' - name: Build and deploy diff --git a/Project.toml b/Project.toml index bd1666c..f28c683 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "BitInformation" uuid = "de688a37-743e-4ac2-a6f0-bd62414d1aa7" authors = ["Milan and contributors"] -version = "0.3.0" +version = "0.4.0" [deps] Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" diff --git a/README.md b/README.md index 3a7fa61..5fec3d8 100644 --- a/README.md +++ b/README.md @@ -3,16 +3,15 @@ [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://milankl.github.io/BitInformation.jl/dev) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4774191.svg)](https://doi.org/10.5281/zenodo.4774191) -BitInformation.jl is a package for the analysis of bitwise information in Julia Arrays. -Based on counting the occurrences of bits in floats (Float16/32/64 or generally any bittype) -across various dimensions of an array, this package provides functions to calculate quantities -like the bitwise real information content, the mutual information, the redundancy or preserved -information between arrays. - -BitInformation.jl also implements various rounding modes (round,shave,set_one, etc.) -efficiently with bitwise operations. `round(x,i)` implements IEEE's round to nearest tie to even -for any float retaining `i` mantissa bits. Furthermore, transormations like XOR-delta, bittranspose, -or signed_exponent are implemented. +BitInformation.jl is a package for bitwise information analysis and manipulation in Julia arrays. +Based on counting the occurrences of bits in floats (or generally any bits type) across various dimensions, +this package calculates quantities like the bitwise real information content, the mutual information, the +redundancy or preserved information between arrays. + +For bitwise manipulation, BitInformation.jl also implements various rounding modes (IEEE round,shave,set_one, etc.) +efficiently with bitwise operations for any number of bits. E.g. `round(x,i)` implements IEEE's round to nearest +tie-to-even for any float retaining `i` mantissa bits. Furthermore, transormations like XOR-delta, bittranspose +(aka bit shuffle), or signed/biased exponents are implemented. If you'd like to propose changes, or contribute in any form raise an issue, create a [pull request](https://github.com/milankl/BitInformation.jl/pulls) diff --git a/docs/Project.toml b/docs/Project.toml index dd8d0ec..a9d839f 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,10 +1,5 @@ [deps] -Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" [compat] -Documenter = "0.26" -Distributions = "0.24, 0.25" -StatsBase = "0.32, 0.33" \ No newline at end of file +Documenter = "0.26, 0.27" \ No newline at end of file diff --git a/docs/make.jl b/docs/make.jl index 1d867b0..4b332e4 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -2,7 +2,7 @@ using Documenter, BitInformation makedocs( format = Documenter.HTML( - prettyurls = get(ENV, "CI", nothing) == "true"), + prettyurls = get(ENV, "CI", nothing) == "true"), sitename="BitInformation.jl", authors="M Klöwer", modules=[BitInformation], diff --git a/docs/src/bitinformation.md b/docs/src/bitinformation.md index f77d910..c7d9e4c 100644 --- a/docs/src/bitinformation.md +++ b/docs/src/bitinformation.md @@ -1,34 +1,36 @@ # Bitwise information content analysis -## Bitpattern entropy +## Bit pattern entropy -An $n$-bit number format has bitpatterns available to encode a real number. -For most data arrays, not all bitpatterns are used at uniform probability. -The bitpattern entropy is the +An $n$-bit number format has $2^n$ bit patterns available to encode a real number. +For most data arrays, not all bit pattern occur at equal probability. +The bit pattern entropy is the [Shannon information entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) -$H$, in units of bits, calculated from the probability $p_i$ of each bitpattern +$H$, in units of bits, calculated from the probability $p_i$ of each bit pattern ```math H = -\sum_{i=1}^{2^n}p_i \log_2(p_i) ``` -The bitpattern entropy is $H \leq n$ and maximised to $n$ bits for a uniform distribution. -The free entropy is the difference $n-H$. +The bitpattern entropy is $H \leq n$ and maximised to $n$ bits for a uniform distribution, +i.e. all bit pattern occur equally frequent. The free entropy $H_f$ is the difference $n-H$. -In BitInformation.jl, the bitpattern entropy is calculated via `bitpattern_entropy(::Array)` +In BitInformation.jl, the bitpattern entropy is calculated via `bitpattern_entropy(::AbstractArray)` ```julia -julia> A = rand(Float32,100000000); +julia> A = rand(Float32,100000000) .+ 1; julia> bitpattern_entropy(A) 22.938590744784577 ``` Here, the entropy is about 23 bit, meaning that `9` bits are effectively unused. -This is because `rand` samples in `[1,2)`, so that the sign and exponent bits are -always `0 01111111` followed by some random significant bits. +This is because all elements of A are in `[1,2)`, so that the sign and exponent bits are +always `0 01111111` followed by 23 random significant bits, each contributing about 1 bit. -The function `bitpattern_entropy` is based on sorting the array `A`. While this -avoids the allocation of a bitpattern histogram (which would make the function -unsuitable for anything larger than 32 bits) it has to allocate a sorted version of `A`. +!!! note "Implementation details" + The function `bitpattern_entropy` is based on sorting the array `A`. While this + avoids the allocation of a bitpattern histogram (which would make the function + unsuitable for anything larger than 32 bits) it has to allocate a sorted version of `A`. + If you don't mind the sorting in-place use `bitpattern_entropy!(::AbstractArray)`. ## Bit count entropy @@ -39,11 +41,12 @@ i.e. a sequence of bits of length $l$, the form H(b) = -p_0 \log_2(p_0) - p_1\log_2(p_1) ``` -with $p_0,p_1$ being the probability of a bit $b_k$ in $b$ being 0 or 1. The entropy is maximised to 1 bit for equal probabilities $p_0 = p_1 = \tfrac{1}{2}$ in $b$. The function `bitcount(A::Array)` +with $p_0,p_1$ being the probability of a bit $b_k$ in $b$ being 0 or 1. The entropy is maximised to +1 bit for equal probabilities $p_0 = p_1 = \tfrac{1}{2}$ in $b$. The function `bitcount(A::Array)` counts all occurences of the 1-bit in every bit-position in every element of `A`. E.g. ```julia -julia> bitstring.(A) # 5-elemenet Vector{UInt8} +julia> bitstring.(A) # 5-element Vector{UInt8} 5-element Array{String,1}: "10001111" "00010111" @@ -63,8 +66,8 @@ julia> bitcount(A) 3 ``` -The first bit of elements (here: `UInt8`) in `A` are 4 times `1` and so 1 times -`0`, etc. In contrast, elements drawn from a uniform distribution U(0,1) +The first bit of elements (here: `UInt8`) in `A` is 4x `1` and hence 1x `0`. +In contrast, elements drawn from a uniform distribution U(0,1) ```julia julia> A = rand(Float32,100000); @@ -89,7 +92,7 @@ Once the bits in an array are counted, the respective probabilities $p_0,p_1$ ca and the entropy derived. The function `bitcount_entropy(A::Array)` does that ```julia julia> A = rand(UInt8,100000); # entirely random bits -julia> Elefridge.bitcountentropy(A) +julia> bitcount_entropy(A) 8-element Array{Float64,1}: # entropy is for every bit position ≈ 1 0.9999998727542938 0.9999952725717266 @@ -104,14 +107,14 @@ This converges to 1 for larger arrays. ## Bit pair count -The `bitpaircount(A::Array)` function returns a `4xn` (with `n` -being the number of bits in every element of `A`) array, the counts the occurrences -of `00`,`01`,`10`,`11` for all bit-positions in `a in A` across all elements `a` in `A`. -For a length `N` of array `A` (one or multi-dimensional) the maximum occurrence -is `N-1`. E.g. +The `bitpair_count(A::AbstractArray,B::AbstractArray)` function returns a `nx2x2` array, +with `n` being the number of bits elements of `A,B`. The array contains counts the +occurrences of bit pairs between A,B, i.e. `00`,`01`,`10`,`11` for all bit positions. +E.g. ```julia -julia> A = rand(UInt8,5); +julia> A = rand(UInt8,5) +julia> B = rand(UInt8,5) julia> bitstring.(A) 5-element Array{String,1}: "01000010" @@ -120,82 +123,15 @@ julia> bitstring.(A) "01111111" "00010100" -julia> bitpaircount(A) -4×8 Array{Int64,2}: - 2 0 0 0 2 0 0 2 # occurences of `00` in the n-bits of UInt8 - 1 0 2 1 1 1 0 1 # occurences of `01` - 1 1 2 0 1 0 1 1 # occurences of `10` - 0 3 0 3 0 3 3 0 # occurences of `11` +julia> C = bitpair_count(A,B) +julia> C[1,:,:] # bitpairs of A,B in 1st bit position +2×2 Matrix{Int64}: + 2 1 + 1 1 ``` -The first bit of elements in `A` is as a sequence `01000`. Consequently, -`00` occurs 2x, `01` and `10` once each, and `11` does not occur. -Multi-dimensional arrays are unravelled into a vector, following Julia's -memory layout (column-major). - -## Bit conditional entropy - -Based on `bitpaircount` we can calculate the conditional -entropy of the state of one bit given the state of the previous bit (previous -meaning in the same bit position but in the previous element in the array `A`). -In the previous example we obtain -```julia -julia> bit_condprobability(A) -4×8 Array{Float64,2}: - 0.666667 NaN 0.0 0.0 0.666667 0.0 NaN 0.666667 - 0.333333 NaN 1.0 1.0 0.333333 1.0 NaN 0.333333 - 1.0 0.25 1.0 0.0 1.0 0.0 0.25 1.0 - 0.0 0.75 0.0 1.0 0.0 1.0 0.75 0.0 -``` -Given the previous bit being `0` there is a 2/3 chance that th next bit is a `0` -too, and a 1/3 change that the next bit is a `1`, i.e. -$p_{00} = p(\text{next}=0|\text{previous}=0) = 2/3$, and -$p_{10} = p(1|0) = \tfrac{1}{3}$, such that $p(0|0)+p(1|0)=1$ always (which are -the marginal probabilities from below), if not `NaN`, -and similarly for $p(0|1)$ and $p(1|1)$. - -The conditional entropies $H_0,H_1$ are conditioned on the state of the -previous bit $b_{j-1}$ being 0 or 1 - -```math -\begin{aligned} -H_0 &= -p_{00}\log_2(p_{00}) - p_{01}\log_2(p_[01]) \\ -H_1 &= -p_{10}\log_2(p_{10}) - p_{11}\log_2(p_[11]) \\ -\end{aligned} -``` - -The conditional entropy is maximised to 1 bit for bitstreams where the probability -of a bit being 0 or 1 does not depend on the state of the previous bit, which is -here defined as _false information_. - -```julia -julia> r = rand(Float32,100_000) -julia> H₀,H₁ = bit_condentropy(r) -julia> H₀ -32-element Vector{Float64}: - 0.0 - 0.0 - 0.0 - 0.0 - 0.0 - 0.07559467763419292 - 0.5214998930042997 - 0.9684130809383832 - ⋮ - 0.9997866754890564 - 0.999747731180627 - 0.999438123786493 - 0.9968145441905049 - 0.9878425610244357 - 0.9528602299665989 - 0.8124289058679582 - 0.0 -``` - -Sign and the first exponent bits have 0 conditional entropy, which increases to 1 bit for the fully -random significant bits. The last significant bits have lower conditional entropy due to shortcomings -in the pseudo random generator in `rand`, see a discussion -[here](https://sunoru.github.io/RandomNumbers.jl/dev/man/basics/#Conversion-to-Float). +Here, among the 5 element pairs in `A,B` there are 2 which both have a `0` in the first bit +position. One element pair is `01`, one `10` and one `11`. ## Mutual information @@ -217,12 +153,12 @@ $r,s$ is then M(r,s) = \sum_{r=0}^1 \sum_{s=0}^1 p_{rs} \log_2 \left( \frac{p_{rs}}{p_{r=r}p_{s=s}}\right) ``` -The function `bitinformation(::Array{T,N},::Array{T,N})` calculates $M$ as +The function `mutual_information(::AbstractArray,::AbstractArray)` calculates $M$ as ```julia julia> r = rand(Float32,100_000) # [0,1) random float32s julia> s = shave(r,15) # remove information in sbit 16-23 by setting to 0 -julia> bitinformation(r,s) +julia> mutual_information(r,s) 32-element Vector{Float64}: 0.0 ⋮ @@ -239,6 +175,9 @@ julia> bitinformation(r,s) 0.0 0.0 # sbit 23 ``` +The mutual information approaches 1 bit for unchaged bits between `r,s`, but bits that have +been shaved off do not contain any information, hence the mutual information also drops +as there is no entropy to start with. ## Real bitwise information @@ -253,14 +192,14 @@ bits also as I = H - q_0H_0 - q_1H_1 ``` -which is the real information content $I$. This definition is similar to Jeffress et al. (2017) [1], +which is the real information content $I$. This definition is similar to Jeffress et al. (2017) [^1], but avoids an additional assumption of an uncertainty measure. This defines the real information -as the entropy minus the false information. For bitstreams with either $p_0 = 1$ or $p_1 = 1$, +as the entropy minus the false information. For bitstreams with either $p_0 = 1$ or $p_1 = 1$, i.e. all bits are either 0 or 1, the entropies are zero $H = H_0 = H_1 = 0$ and we may refer to the bits in the bitstream as being unused. In the case where $H > p_0H_0 + p_1H_1$, the preceding bit is a predictor for the succeeding bit which means that the bitstream contains real information ($I > 0$). -The computation of $I$ is implemented in `bitinformation(::Array)` +The computation of $I$ is implemented in `bitinformation(::AbstractArray)` ```julia julia> A = rand(UInt8,1000000) # fully random bits @@ -314,14 +253,11 @@ in the last significant bit (flips randomly). The information is maximised to of such a bit one can expect the next (or previous) bit to be the same due to the correlation. -[1] Jeffress, S., Düben, P. & Palmer, T. _Bitwise efficiency in chaotic models_. -*Proc. R. Soc. Math. Phys. Eng. Sci.* 473, 20170144 (2017). - ## Multi-dimensional real information The real information content $I_m$ for an $m$-dimensional array $A$ is the sum of the real information along the dimensions. Let $b_j$ be a bitstream obtained by unravelling -a given bitposition in along its $j$-th dimension. Although the unconditional entropy $H$ +a given bitposition in along its $j$-th dimension. Although the unconditional entropy $H$ is unchanged along the $m$-dimensions, the conditional entropies $H_0,H_1$ change as the preceding and succeeding bit is found in another dimension, e.g. $b_2$ is obtained by re-ordering $b_1$. Normalization by $\tfrac{1}{m}$ is applied to $I_m$ such that the @@ -331,49 +267,34 @@ maximum information is 1 bit in $I_m^*$ I_m^* = H - \frac{p_0}{m}\sum_{j=1}^mH_0(b_j) - \frac{p_1}{m}\sum_{j=1}^mH_1(b_j) ``` -This is implemented in BitInformation.jl as `bitinformation(::Array{T,N},:all_dimensions)`, e.g. +This is implemented in BitInformation.jl as `bitinformation(::AbstractArray,dim::int)`, e.g. ```julia julia> A = rand(Float32,100,200,300) # a 3D array -julia> sort!(A,dims=1) # sort to create some auto-corelation -julia> bitinformation(A,:all_dimensions) +julia> sort!(A,dims=2) # sort to create some auto-corelation +julia> bitinformation(A,dim=2) # information along 2nd dimension 32-element Vector{Float64}: + 0.9284529526801016 + 0.12117292341467487 + 0.12117292341467487 + 0.12117292341467487 + 0.12097216822084497 + 0.11085252207817117 + 0.31618374026691876 + 0.5342647349810241 + 0.44490628938885407 + 0.2420859244356707 + 0.08943278228929732 + 0.01870979798293037 + 0.00221913365831955 0.0 0.0 - 0.0 - 0.0 - 6.447635701154324e-7 - 0.014292670110681693 - 0.31991625275425073 - 0.5440816091704278 - 0.36657938793446365 - 0.2533186226597825 - 0.13051374121057438 ⋮ 0.0 0.0 - 8.687738656254496e-7 - 6.251449893598012e-6 - 5.0038146715651134e-5 - 0.0003054976017733783 - 0.0015377166906772044 - 0.006581160530812665 - 0.022911924843179426 - 0.06155167545633838 - 0.0 ``` - -which is equivalent to - -```julia -julia> bitinformation(A,:all_dimensions) ≈ 1/3*(bitinformation(A,dims=1)+ - bitinformation(A,dims=2)+ - bitinformation(A,dims=3)) -true -``` - -The keyword `dims` will permute the dimensions in `A` to calcualte the information -in the specified dimensions. By default `dims=1`, which uses the ordering of the bits +The keyword `dim` will permute the dimensions in `A` to calcualte the information +in the specified dimensions. By default `dim=1`, which uses the ordering of the bits as they are layed out in memory. ## Redundancy @@ -386,8 +307,8 @@ R(r,s) = \frac{2M(r,s)}{H(r) + H(s)} `R` is the redundancy of information of $r$ in $s$ (and vice versa). $R = 1$ for identical bitstreams $r = s$, but $R = 0$ for statistically independent bitstreams. -BitInformation.jl implements the redundancy calculation via `redundancy(::Array{T,N},::Array{T,N})` -where the inputs have to be of same size and element type `T`. For example, shaving off some of +BitInformation.jl implements the redundancy calculation via `redundancy(::AbstractArray,::AbstractArray)` +where the inputs have to be of same size and element type. For example, shaving off some of the last significant bits will set the redundancy for those to 0, but redundancy is 1 for all bitstreams which are identical @@ -396,11 +317,11 @@ julia> r = rand(Float32,100_000) # random data julia> s = shave(r,7) # keep only sign, exp and sbits 1-7 julia> redundancy(r,s) 32-element Vector{Float64}: - 0.0 # 0 redundancy as entropy = 0 - 0.0 - 0.0 - 0.0 - 0.9999999999993566 # redundancy 1 as bitstreams are identical + 1.0 # redundancy 1 as bitstreams are identical + 1.0 + 1.0 + 1.0 + 0.9999999999993566 # rounding errors can yield ≈1 0.9999999999999962 1.0 1.0000000000000002 @@ -418,7 +339,7 @@ julia> redundancy(r,s) ## Preserved information The preserved information $P(r,s)$ between two bitstreams $r,s$ where $s$ approximates $r$ -is the redundancy-weighted real information $I$ +is the information-weighted redundancy $I$ ```math P(r,s) = R(r,s)I(r) @@ -486,4 +407,9 @@ set the real information $I = 0$. The calculation of $H_f$ is implemented as ```julia julia> BitInformation.binom_free_entropy(10_000_000,0.99) 4.786066739592698e-7 -``` \ No newline at end of file +``` + +## References + +[^1]: Jeffress, S., Düben, P. & Palmer, T. _Bitwise efficiency in chaotic models_. *Proc. R. Soc. Math. Phys. Eng. Sci.* 473, 20170144 (2017). + diff --git a/docs/src/functions.md b/docs/src/functions.md index 515b35e..848eaf3 100644 --- a/docs/src/functions.md +++ b/docs/src/functions.md @@ -1,19 +1,75 @@ # Index of functions in BitInformation.jl +### Information + +```@docs +BitInformation.bitinformation +BitInformation.mutual_information +BitInformation.redundancy +``` + +### Bit counting and entropy + +```@docs +BitInformation.bitpattern_entropy +BitInformation.bitpattern_entropy! +BitInformation.bitcount +BitInformation.bitcount_entropy +BitInformation.bitpair_count +``` + ### Significance of information ```@docs -BitInformation.binom_confidence(::Int,::Real) -BitInformation.binom_free_entropy(::Int,::Real) +BitInformation.binom_confidence +BitInformation.binom_free_entropy ``` ### Transformations ```@docs -bittranspose(::AbstractArray) -bitbacktranspose(::AbstractArray) -xor_delta(::Array{AbstractFloat,1}) -unxor_delta(::Array{AbstractFloat,1}) -signed_exponent(::Array{Float32}) -signed_exponent!(::Array{Float32}) -``` \ No newline at end of file +BitInformation.bittranspose +BitInformation.bitbacktranspose +BitInformation.xor_delta +BitInformation.xor_delta! +BitInformation.unxor_delta +BitInformation.unxor_delta! +BitInformation.signed_exponent +BitInformation.signed_exponent! +BitInformation.biased_exponent +BitInformation.biased_exponent! +``` + +### Rounding + +```@docs +Base.round(::Base.IEEEFloat,::Integer) +BitInformation.round! +BitInformation.get_shift +BitInformation.get_ulp_half +BitInformation.get_keep_mask +BitInformation.get_bit_mask +Base.iseven(::Base.IEEEFloat,::Integer) +Base.isodd(::Base.IEEEFloat,::Integer) +``` + +### Shaving, halfshaving, setting and bit grooming + +```@docs +BitInformation.shave +BitInformation.shave! +BitInformation.halfshave +BitInformation.halfshave! +BitInformation.set_one +BitInformation.set_one! +BitInformation.groom! +BitInformation.nsb +``` + +### Printing and BitArray conversion + +```@docs +Base.bitstring(::Base.IEEEFloat,::Symbol) +Base.BitArray(::Matrix) +``` + diff --git a/docs/src/index.md b/docs/src/index.md index 13baeba..6d7186b 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -2,7 +2,8 @@ ## Overview -[BitInformation.jl](https://github.com/milankl/BitInformation.jl) is a library for the analysis of bitwise information in n-dimensional Julia arrays. +[BitInformation.jl](https://github.com/milankl/BitInformation.jl) is a library for the analysis of bitwise information and +bitwise manipulation in n-dimensional Julia arrays. ## Installation @@ -14,8 +15,9 @@ where `]` opens the package manager. The latest version is automatically install ## Developers -BitInformation.jl is currently developed by [Milan Klöwer](https://github.com/milankl), any contributions are always welcome. +BitInformation.jl is currently developed by [Milan Klöwer](https://github.com/milankl). Any contributions are always welcome. ## Funding -This project is funded by the [Copernicus Programme](https://www.copernicus.eu/en/copernicus-services/atmosphere) through the [ECMWF summer of weather code 2020 and 2021](https://esowc.ecmwf.int/). \ No newline at end of file +This project is funded by the [Copernicus Programme](https://www.copernicus.eu/en/copernicus-services/atmosphere) +through the [ECMWF summer of weather code 2020 and 2021](https://esowc.ecmwf.int/). \ No newline at end of file diff --git a/docs/src/rounding.md b/docs/src/rounding.md index 0ccfd8d..d00fb52 100644 --- a/docs/src/rounding.md +++ b/docs/src/rounding.md @@ -6,6 +6,18 @@ BitInformation.jl implements them efficiently with bitwise operations, in-place or by creating a copy of the original array. +!!! tip "Bitstring split into sign, exponent and mantissa bits" + BitInformation.jl extends `Base.bitstring` with a split option to better visualise sign, exponent + and mantissa bits for floats. + ```julia + julia> bitstring(1.1f0) + "00111111100011001100110011001101" + + julia> bitstring(1.1f0,:split) + "0 01111111 00011001100110011001101" + ``` + + ## Round to nearest With binary round to nearest a full-precision number is replaced by the nearest representable float diff --git a/docs/src/transformations.md b/docs/src/transformations.md index 0d6326c..36c1c1a 100644 --- a/docs/src/transformations.md +++ b/docs/src/transformations.md @@ -6,14 +6,18 @@ for lossless compression algorithms. !!! warning "Interpretation of transformed floats" BitInformation.jl will not store the information that a transformation was applied to a value. This means - that Julia will not know about this and interpret a value incorrectly. You will have to explicitly execute - the backtransform + that Julia will not know about this and interpret/print a value incorrectly. You will have to explicitly execute + the backtransform (and know which one!) to undo the transformation ```julia julia> A = [0f0,1f0] # 0 and 1 julia> At = bittranspose(A) # are transposed into 1f-35 and 0 2-element Vector{Float32}: - 1.0026967f-35 - 0.0 + 1.0026967f-35 + 0.0 + julia> bitbacktranspose(At) # reverse transpose + 2-element Vector{Float32}: + 0.0 + 1.0 ``` ## Bit transpose (aka shuffle) @@ -125,11 +129,10 @@ julia> bitstring.(Ax) ## Signed exponent -Floating-point numbers have a biased exponent. There are +IEEE Floating-point numbers have a biased exponent. There are [other ways to encode the exponent](https://en.wikipedia.org/wiki/Signed_number_representations#Comparison_table) and BitInformation.jl implements `signed_exponent` which transforms the exponent bits of a float into a representation where also the exponent has a sign bit (which is the first exponent bit) - ```julia julia> a = [0.5f0,1.5f0] # smaller than 1 (exp sign -1), larger than 1 (exp sign +1) julia> bitstring.(a,:split) @@ -142,4 +145,22 @@ julia> bitstring.(signed_exponent(a),:split) "0 10000001 00000000000000000000000" # signed exponent: sign=1, magnitude=1, i.e. 2^-1 "0 00000000 10000000000000000000000" # signed exponent: sign=0, magnitude=0, i.e. 2^0 ``` +The transformation `signed_exponent` can be undone with `biased_exponent` +```julia +julia> A = 0.5f0,1.5f0] +2-element Vector{Float32}: + 0.5 + 1.5 +julia> signed_exponent(A) +2-element Vector{Float32}: + 4.0 + 5.877472f-39 + +julia> biased_exponent(signed_exponent(A)) +2-element Vector{Float32}: + 0.5 + 1.5 +``` +Both `signed_exponent` and `biased_exponent` also exist as in-place functions +`signed_exponent!` and `biased_exponent!`. \ No newline at end of file diff --git a/src/BitInformation.jl b/src/BitInformation.jl index d5c6040..62fb306 100644 --- a/src/BitInformation.jl +++ b/src/BitInformation.jl @@ -5,24 +5,25 @@ module BitInformation export bittranspose, bitbacktranspose, xor_delta, unxor_delta, xor_delta!, unxor_delta!, - signed_exponent + signed_exponent, biased_exponent export bitinformation, mutual_information, redundancy, bitpattern_entropy, bitcount, bitcount_entropy, bitpaircount, bit_condprobability, bit_condentropy - import StatsBase.entropy + import StatsBase: entropy + import Distributions: quantile, Normal - include("which_uint.jl") + include("uint_types.jl") include("bitstring.jl") + include("bitarray.jl") include("bittranspose.jl") include("round_nearest.jl") include("shave_set_groom.jl") include("xor_delta.jl") include("signed_exponent.jl") - include("bit_information.jl") + include("bit_count.jl") include("mutual_information.jl") include("bitpattern_entropy.jl") include("binomial.jl") - end diff --git a/src/binomial.jl b/src/binomial.jl index e455e72..3c6f18f 100644 --- a/src/binomial.jl +++ b/src/binomial.jl @@ -1,5 +1,3 @@ -import Distributions: quantile, Normal - """ ```julia p₁ = binom_confidence(n::Int,c::Real) @@ -16,7 +14,8 @@ julia> p₁ = BitInformation.binom_confidence(1000,0.95) ``` about 53.1% heads (or tails).""" function binom_confidence(n::Int,c::Real) - return 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) + p = 0.5 + quantile(Normal(),1-(1-c)/2)/(2*sqrt(n)) + return min(1.0,p) # cap probability at 1 (only important for n small) end """ diff --git a/src/bit_count.jl b/src/bit_count.jl new file mode 100644 index 0000000..d944bbb --- /dev/null +++ b/src/bit_count.jl @@ -0,0 +1,123 @@ +"""Count the occurences of the 1-bit in bit position i across all elements of A.""" +function bitcount(A::AbstractArray{T},i::Int) where {T<:Unsigned} + nbits = sizeof(T)*8 # number of bits in T + @boundscheck i <= nbits || throw(error("Can't count bit $b for $N-bit type $T.")) + + c = 0 # counter + shift = nbits-i # shift bit i in least significant position + mask = one(T) << shift # mask to isolate the bit in position i + @inbounds for a in A + c += (a & mask) >> shift # to have either 0x0 or 0x1 to increase counter + end + return c +end + +"""Count the occurences of the 1-bit in every bit position b across all elements of A.""" +function bitcount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} + nbits = 8*sizeof(T) # determine the size [bit] of elements in A + C = zeros(Int,nbits) + Auint = reinterpret(Base.uinttype(T),A) + + # loop over bit positions and for each count through all elements in A + # outer loop: bit position, inner loop: all elements in a + # note this is faster than swapping inner & outer loop + @inbounds for i in 1:nbits + C[i] = bitcount(Auint,i) + end + return C +end + +"""Entropy [bit] for bitcount functions. Maximised to 1bit for random uniformly +distributed bits in A.""" +function bitcount_entropy( A::AbstractArray{T}, # input array + base::Real=2 # entropy with base + ) where {T<:Union{Integer,AbstractFloat}} + + nbits = 8*sizeof(T) # number of bits in T + nelements = length(A) + + C = bitcount(A) # count 1 bits for each bit position + H = zeros(nbits) # allocate entropy for each bit position + + @inbounds for i in 1:nbits # loop over bit positions + p = C[i]/nelements # probability of bit 1 + H[i] = entropy([p,1-p],base) # entropy based on p + end + + return H +end + +"""Update counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in a,b.""" +function bitpair_count!(C::Array{Int,3},a::T,b::T) where {T<:Integer} + nbits = 8*sizeof(T) + mask = one(T) # start with least significant bit + @inbounds for i in 0:nbits-1 # loop from least to most significant bit + j = 1+((a & mask) >>> i) # isolate that bit in a,b + k = 1+((b & mask) >>> i) # and move to 0x0 or 0x1s + C[nbits-i,j,k] += 1 # to be used as index j,k to increase counter C + mask <<= 1 # shift mask to get the next significant bit + end +end + +"""Returns counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in elements of A,B.""" +function bitpair_count( A::AbstractArray{T}, + B::AbstractArray{T} + ) where {T<:Union{Integer,AbstractFloat}} + + @assert size(A) == size(B) "Size of A=$(size(A)) does not match size of B=$(size(B))" + + nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) + C = zeros(Int,nbits,2,2) # array of bitpair counters + + # reinterpret arrays A,B as UInt (no mem allocation required) + Auint = reinterpret(Base.uinttype(T),A) + Buint = reinterpret(Base.uinttype(T),B) + + # loop over all elements in A,B pairwise, inner loop (within bitpair_count!): bit positions + # note this is faster than swapping inner & outer loop + for (a,b) in zip(Auint,Buint) + bitpair_count!(C,a,b) # count the bits and update counter array C + end + + return C +end + + +# """Update counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in a,b.""" +# function bitpair_count!(C::Array{Int,3}, # counter array nbits x 2 x 2 +# A::AbstractArray{T}, # input array A +# B::AbstractArray{T}, # input array B +# i::Int # bit position +# ) where {T<:Integer} + +# nbits = 8*sizeof(T) # number of bits in T +# shift = nbits-i # shift bit i in least significant position +# mask = one(T) << shift # mask to isolate the bit in position i + +# @inbounds for (a,b) in zip(A,B) # loop over all elements in A,B pairwise +# j = 1+((a & mask) >>> shift) # isolate bit i in a,b +# k = 1+((b & mask) >>> shift) # and move to 0x0 or 0x1 +# C[i,j,k] += 1 # to be used as index j,k to increase counter C +# end +# end + +# """Returns counter array C of size nbits x 2 x 2 for every 00|01|10|11-bitpairing in elements of A,B.""" +# function bitpair_count( A::AbstractArray{T}, +# B::AbstractArray{T} +# ) where {T<:Union{Integer,AbstractFloat}} + +# @assert size(A) == size(B) "Size of A=$(size(A)) does not match size of B=$(size(B))" + +# nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) +# C = zeros(Int,nbits,2,2) # array of bitpair counters + +# # reinterpret arrays A,B as UInt (no mem allocation required) +# Auint = reinterpret(Base.uinttype(T),A) +# Buint = reinterpret(Base.uinttype(T),B) + +# for i in 1:nbits +# bitpair_count!(C,Auint,Buint,i) # count the bits and update counter array C +# end + +# return C +# end \ No newline at end of file diff --git a/src/bit_information.jl b/src/bit_information.jl deleted file mode 100644 index c34fc83..0000000 --- a/src/bit_information.jl +++ /dev/null @@ -1,197 +0,0 @@ -"""Count the occurences of the 1-bit in bit position b across all elements of A.""" -function bitcount(A::AbstractArray{T},b::Int) where {T<:Union{Integer,AbstractFloat}} - N = sizeof(T)*8 # number of bits in T - @boundscheck b <= N || throw(error("Can't count bit $b for $N-bit type $T.")) - UIntT = whichUInt(T) - n = 0 # counter - shift = N-b # shift desired bit b - mask = one(UIntT) << shift # leftshift always pushes 0s - for a in A # mask everything but b and shift - n += (reinterpret(UIntT,a) & mask) >>> shift # to have either 0x00 or 0x01 - end - return n -end - -"""Count the occurences of the 1-bit in every bit position b across all elements of A.""" -function bitcount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - N = 8*sizeof(T) # determine the size [bit] of elements in A - n = fill(0,N) # preallocate counters - for b in 1:N # loop over bit positions and count each - n[b] = bitcount(A,b) - end - return n -end - -"""Entropy [bit] for bitcount functions. Maximised to 1bit for random uniformly -distributed bits in A.""" -function bitcount_entropy(A::AbstractArray) - N = prod(size(A)) - ps = bitcount(A) / N - e = [abs(entropy([p,1-p],2)) for p in ps] - return e -end - -"""Count pairs of bits across elements of A for bit position b therein. -Returns a 4-element array with counts for 00,01,10,11.""" -function bitpaircount(A::AbstractArray{T},b::Int) where {T<:Union{Integer,AbstractFloat}} - N = sizeof(T)*8 # number of bits in T - @boundscheck b <= N || throw(error("Can't count bit $b for $N-bit type $T.")) - UIntT = whichUInt(T) - - n = [0,0,0,0] # counter for 00,01,10,11 - shift = N-b-1 # shift to the 2nd last position either 0x00,0x10 - shift2 = N-b # shift to last position 0x0 or 0x1 - mask = one(UIntT) << shift2 # mask everything except b - - # a1 is bit from previous entry in A, a2 is the current - # a1 is shifted to sit in the 2nd last position - # a2 sits in the last position - a1 = (reinterpret(UIntT,A[1]) & mask) >>> shift - @inbounds for i in 2:length(A) - a2 = (reinterpret(UIntT,A[i]) & mask) >>> shift2 - n[(a1 | a2)+1] += 1 - a1 = a2 << 1 - end - return n -end - -"""Count pairs of bits across elements of A for every bit position therein. -Returns a 4xn-array with counts for 00,01,10,11 in rows and every bit position -in columns.""" -function bitpaircount(A::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - N = 8*sizeof(T) # number of bits in T - n = fill(0,4,N) # store the 4 possible pair counts for every bit position - for b in 1:N - n[:,b] = bitpaircount(A,b) - end - return n -end - -"""Calculates the conditional probabilities of pairs 00,01,10,11 in the bit sequences -of array A. Returns a 4xn array with conditional probabilities p(nextbit=0|previousbit=0), -p(1|0),p(0|1),p(1|1) in rows and every bit position in columns. Returns NaN when -all bits are either 0,1 (in which case the conditional probability is not defined).""" -function bit_condprobability(A::Array{T}) where {T<:Union{Integer,AbstractFloat}} - N = prod(size(A[2:end])) # elements in array (A[1] is ) - n1 = bitcount(A[1:end-1]) - n0 = N.-n1 - npair = bitpaircount(A) - pcond = similar(npair,Float64) - pcond[1,:] = npair[1,:] ./ n0 - pcond[2,:] = npair[2,:] ./ n0 - pcond[3,:] = npair[3,:] ./ n1 - pcond[4,:] = npair[4,:] ./ n1 - return pcond -end - -"""Calculates the conditional entropy for 00,01,10,11 for every bit position across -all elements of A.""" -function bit_condentropy( A::Array{T}, - set_zero_insignificant::Bool=true, - confidence::Real=0.99) where {T<:Union{Integer,AbstractFloat}} - - pcond = bit_condprobability(A) - - # set NaN (occurs when occurences n=0) 0*-Inf = 0 here. - pcond[isnan.(pcond)] .= 0 - - # conditional entropy H given bit = 0 (H0) and bit = 1 (H1) - H0 = [entropy([p00,p01],2) for (p00,p01) in zip(pcond[1,:],pcond[2,:])] - H1 = [entropy([p10,p11],2) for (p10,p11) in zip(pcond[3,:],pcond[4,:])] - - # set insignficant to zero - if set_zero_insignificant - # get chance p for 1 (or 0) from binom distr - p = binom_confidence(length(A),confidence) - I₀ = 1 - entropy([p,1-p],2) # free entropy of random [bit] - H0[H0 .<= I₀] .= 0 # set insignficant to zero - H1[H1 .<= I₀] .= 0 - end - - return H0,H1 -end - -function bitinformation(A::AbstractArray,dims::Symbol) - if dims == :all_dimensions - N = prod(size(A))-1 # elements in array - n1 = bitcount(A)-bitcount([A[end]]) # occurences of bit = 1 - bi = fill(0.0,length(n1)) - permu = collect(1:ndims(A)) # permutation - perm0 = vcat(permu[2:end],permu[1]) # used to permute permu - for idim in 1:ndims(A) - Aperm = PermutedDimsArray(A,permu) - permute!(permu,perm0) - npair = bitpaircount(Aperm) - bi .+= bitinformation(n1,npair,N) - end - # arithmetic mean of information across the dimensions - return bi ./ ndims(A) - else - return bitinformation(A) - end -end - -function bitinformation(A::AbstractArray; - dims::Int=1, - set_zero_insignificant::Bool=true, - confidence::Real=0.99) - - N = prod(size(A))-1 # elements in array - n1 = bitcount(A)-bitcount([A[end]]) # occurences of bit = 1 - - if dims > 1 - permu = collect(1:ndims(A)) # permutation - perm0 = vcat(permu[2:end],permu[1]) # used to permute permu - for _ in 1:dims-1 - permute!(permu,perm0) - end - A = PermutedDimsArray(A,permu) - end - - npair = bitpaircount(A) - return bitinformation(n1,npair,N;set_zero_insignificant,confidence) -end - -function bitinformation(n1::Array{Int,1}, - npair::Array{Int,2}, - N::Int; - set_zero_insignificant::Bool=true, - confidence::Real=0.99) - - nbits = length(n1) - @assert size(npair) == (4,nbits) "npair array must be of size 4xnbits" - @assert maximum(n1) <= N "N cannot be smaller than maximum(n1)" - - n0 = N.-n1 # occurences of bit = 0 - q0 = n0/N # respective probabilities - q1 = n1/N - - pcond = similar(npair,Float64) # preallocate conditional probability - - pcond[1,:] = npair[1,:] ./ n0 # p(0|0) = n(00)/n(0) - pcond[2,:] = npair[2,:] ./ n0 # p(1|0) = n(01)/n(0) - pcond[3,:] = npair[3,:] ./ n1 # p(0|1) = n(10)/n(1) - pcond[4,:] = npair[4,:] ./ n1 # p(1|1) = n(11)/n(1) - - # set NaN (occurs when occurences n=0) 0*-Inf = 0 here. - pcond[isnan.(pcond)] .= 0 - - # unconditional entropy - H = [entropy([q0i,q1i],2) for (q0i,q1i) in zip(q0,q1)] - - # conditional entropy given bit = 0, bit = 1 - H0 = [entropy([p00,p01],2) for (p00,p01) in zip(pcond[1,:],pcond[2,:])] - H1 = [entropy([p10,p11],2) for (p10,p11) in zip(pcond[3,:],pcond[4,:])] - - # Information content - I = @. H - q0*H0 - q1*H1 - - # remove information that is insignificantly different from a random 50/50 - if set_zero_insignificant - p = binom_confidence(N,confidence) # get chance p for 1 (or 0) from binom distr - I₀ = 1 - entropy([p,1-p],2) # free entropy of random [bit] - I[I .<= I₀] .= 0 # set insignficant to zero - end - - return I -end diff --git a/src/bitarray.jl b/src/bitarray.jl new file mode 100644 index 0000000..8ea73f0 --- /dev/null +++ b/src/bitarray.jl @@ -0,0 +1,34 @@ +function Base.BitArray(A::AbstractArray{T}) where T + nbits = 8*sizeof(T) # number of bits in T + UIntN = Base.uinttype(T) # UInt type corresponding to T + + # allocate BitArray with additional dimension for the bits + B = BitArray(undef,nbits,size(A)...) + indices = CartesianIndices(A) + + for i in eachindex(A) # every element in A + a = reinterpret(UIntN,A[i]) # as UInt + mask = UIntN(1) # mask to isolate jth bit + for j in 0:nbits-1 # loop from less to more significant bits + bit = (a & mask) >> j # isolate bit (=true/false) + B[nbits-j,indices[i]] = bit # set BitArray entry to isolated bit in A + mask <<= 1 # shift mask towards more significant bit + end + end + return B +end + +function bitmap(A::AbstractMatrix;dim::Int=1) + @assert dim in (1,2) "dim has to be 1 or 2, $dim provided." + + #TODO bitordering for dim=2 is currently off + n,m = size(A) + B = BitArray(A) + nbits,_ = size(B) + Br = reshape(B,nbits*n,:) + Br = dim == 1 ? Br : Br' + + return Br +end + + diff --git a/src/bitpattern_entropy.jl b/src/bitpattern_entropy.jl index 0ab2818..7fea03e 100644 --- a/src/bitpattern_entropy.jl +++ b/src/bitpattern_entropy.jl @@ -1,52 +1,33 @@ -"""Calculate the bitpattern entropy in base `base` for all elements in array A.""" -function bitpattern_entropy(A::AbstractArray,base::Real=2) - nbits = sizeof(eltype(A))*8 - T = whichUInt(nbits) - return bitpattern_entropy(T,A,base) -end - -"""Calculate the bitpattern entropy in base `base` for all elements in array A. -In place version that will sort the array.""" -function bitpattern_entropy!(A::AbstractArray,base::Real=2) - nbits = sizeof(eltype(A))*8 - T = whichUInt(nbits) - return bitpattern_entropy!(T,A,base) -end - """Calculate the bitpattern entropy for an array A by reinterpreting the elements as UInts and sorting them to avoid creating a histogram.""" -function bitpattern_entropy(::Type{T},A::AbstractArray,base::Real) where {T<:Unsigned} - return bitpattern_entropy!(T,copy(A),base) +function bitpattern_entropy(A::AbstractArray{T},base::Real=2) where T + return bitpattern_entropy!(copy(A),base) # copy of array to avoid in-place changes to A end """Calculate the bitpattern entropy for an array A by reinterpreting the elements -as UInts and sorting them to avoid creating a histogram. -In-place version of bitpattern_entropy.""" -function bitpattern_entropy!(::Type{T},A::AbstractArray,base::Real) where {T<:Unsigned} +as UInts and sorting them to avoid creating a histogram. In-place version of bitpattern_entropy.""" +function bitpattern_entropy!(A::AbstractArray{T},base::Real=2) where T - # reinterpret to UInt then sort to avoid allocating a histogram - # in-place ver - sort!(reinterpret(T,vec(A))) # TODO maybe think of an inplace version? + # reinterpret to UInt then sort in-place for minimal memory allocation + sort!(reinterpret(Base.uinttype(T),vec(A))) n = length(A) E = 0.0 # entropy - m = 1 # counter + m = 1 # counter, start with 1 as only bitwise-same elements are counted for i in 1:n-1 @inbounds if A[i] == A[i+1] # check whether next entry belongs to the same bin in histogram - m += 1 + m += 1 # increase counter else - p = m/n - E -= p*log(p) - m = 1 + p = m/n # probability/frequency of ith bit pattern + E -= p*log(p) # entropy contribution + m = 1 # start with 1, A[i+1] is already 1st element of next bin end end - p = m/n # complete for last bin - E -= p*log(p) - - # convert to given base, 2 i.e. [bit] by default - E /= log(base) + p = m/n # complete loop for last bin + E -= p*log(p) # entropy contribution of last bin + E /= log(base) # convert to given base, 2 i.e. [bit] by default return E end \ No newline at end of file diff --git a/src/bitstring.jl b/src/bitstring.jl index 7a0e994..3e4ef80 100644 --- a/src/bitstring.jl +++ b/src/bitstring.jl @@ -1,32 +1,11 @@ """Bitstring function with a split-mode that splits the bitstring into sign, exponent and significant bits.""" -function Base.bitstring(x::Float16,mode::Symbol) +function Base.bitstring(x::T,mode::Symbol) where {T<:AbstractFloat} if mode == :split bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:6]) $(bstr[7:end])" + n = Base.exponent_bits(T) + return "$(bstr[1]) $(bstr[2:n+1]) $(bstr[n+2:end])" else bitstring(x) end -end - -"""Bitstring function with a split-mode that splits the bitstring into -sign, exponent and significant bits.""" -function Base.bitstring(x::Float32,mode::Symbol) - if mode == :split - bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:9]) $(bstr[10:end])" - else - bitstring(x) - end -end - -"""Bitstring function with a split-mode that splits the bitstring into -sign, exponent and significant bits.""" -function Base.bitstring(x::Float64,mode::Symbol) - if mode == :split - bstr = bitstring(x) - return "$(bstr[1]) $(bstr[2:12]) $(bstr[13:end])" - else - bitstring(x) - end -end +end \ No newline at end of file diff --git a/src/bittranspose.jl b/src/bittranspose.jl index 16f841c..22eaa36 100644 --- a/src/bittranspose.jl +++ b/src/bittranspose.jl @@ -2,9 +2,9 @@ const DEFAULT_BLOCKSIZE=2^14 """Transpose the bits (aka bit shuffle) of an array to place sign bits, etc. next to each other in memory. Back transpose via bitbacktranspose(). Inplace version that requires a -preallocated output array At, which must contain 0s everywhere.""" -function bittranspose!( ::Type{UIntT}, # UInt equiv to T - At::AbstractArray{T}, # transposed array +preallocated output array At, which must contain 0 bits everywhere.""" +function bittranspose!( ::Type{UIntT}, # UInt corresponding to T + At::AbstractArray{T}, # transposed array to be filled A::AbstractArray{T}; # original array blocksize::Int=DEFAULT_BLOCKSIZE) where {UIntT<:Unsigned,T} @@ -17,10 +17,9 @@ function bittranspose!( ::Type{UIntT}, # UInt equiv to T # At .= zero(T) # make sure output array is zero nblocks = (N-1) ÷ blocksize + 1 # number of blocks - for nb in 1:nblocks + @inbounds for nb in 1:nblocks lastindex = min(nb*blocksize,N) - Ablock = view(A,(nb-1)*blocksize+1:lastindex) Atblock = view(At,(nb-1)*blocksize+1:lastindex) @@ -44,8 +43,8 @@ function bittranspose!( ::Type{UIntT}, # UInt equiv to T # the first nbits elements go into same b in B, then next b idx = (i ÷ nbits) + 1 - @inbounds atui = reinterpret(UIntT,Atblock[idx]) - @inbounds Atblock[idx] = reinterpret(T,atui | bit) + atui = reinterpret(UIntT,Atblock[idx]) + Atblock[idx] = reinterpret(T,atui | bit) i += 1 end end @@ -57,8 +56,8 @@ end """Transpose the bits (aka bit shuffle) of an array to place sign bits, etc. next to each other in memory. Back transpose via bitbacktranspose().""" function bittranspose(A::AbstractArray{T};kwargs...) where T - UIntT = whichUInt(T) - At = fill(reinterpret(T,zero(UIntT)),size(A)) + UIntT = Base.uinttype(T) # get corresponding UInt to T + At = fill(reinterpret(T,zero(UIntT)),size(A)) # preallocate with zeros (essential) bittranspose!(UIntT,At,A;kwargs...) return At end @@ -78,7 +77,7 @@ function bitbacktranspose!( ::Type{UIntT}, # UInt equiv to T # A .= zero(T) # make sure output array is zero nblocks = (N-1) ÷ blocksize + 1 # number of blocks - for nb in 1:nblocks + @inbounds for nb in 1:nblocks lastindex = min(nb*blocksize,N) @@ -104,8 +103,8 @@ function bitbacktranspose!( ::Type{UIntT}, # UInt equiv to T # the first nbits elements go into same a in A, then next a idx = (i % nelements) + 1 - @inbounds aui = reinterpret(UIntT,Ablock[idx]) - @inbounds Ablock[idx] = reinterpret(T,aui | bit) + aui = reinterpret(UIntT,Ablock[idx]) + Ablock[idx] = reinterpret(T,aui | bit) i += 1 end end @@ -116,8 +115,8 @@ end """Backtranspose the bits of array A that were previously transposed with bittranspse().""" function bitbacktranspose(At::AbstractArray{T};kwargs...) where T - UIntT = whichUInt(T) - A = fill(reinterpret(T,zero(UIntT)),size(At)) + UIntT = Base.uinttype(T) # get corresponding UInt to T + A = fill(reinterpret(T,zero(UIntT)),size(At)) # preallocate with zeros (essential) bitbacktranspose!(UIntT,A,At;kwargs...) return A end diff --git a/src/mutual_information.jl b/src/mutual_information.jl index 98fabe7..a9afc08 100644 --- a/src/mutual_information.jl +++ b/src/mutual_information.jl @@ -1,137 +1,85 @@ - """Mutual information from the joint probability mass function p -of two variables X,Y. p is an NxM array which elements represent +of two variables X,Y. p is an nx x ny array which elements represent the probabilities of observing x~X and y~Y.""" function mutual_information(p::AbstractArray{T,2},base::Real=2) where T - @assert sum(p) ≈ one(T) "Entries in p have to sum to 1" + @assert sum(p) ≈ one(T) "Entries in p have to sum to 1" @assert all(p .>= zero(T)) "Entries in p have to be >= 1" - nx,ny = size(p) - - mpx = sum(p,dims=2) # marginal probabilities of x - mpy = sum(p,dims=1) + nx,ny = size(p) # events X are 1st dim, Y is 2nd + py = sum(p,dims=1) # marginal probabilities of y + px = sum(p,dims=2) # marginal probabilities of x - M = zero(T) - for j in 1:ny + M = zero(T) # mutual information M + for j in 1:ny # loop over all entries in p for i in 1:nx + # add entropy only for non-zero entries in p if p[i,j] > zero(T) - M += p[i,j]*log(p[i,j]/mpx[i]/mpy[j]) + M += p[i,j]*log(p[i,j]/px[i]/py[j]) end end end - M /= log(base) + M /= log(base) # convert to given base end """Mutual bitwise information of the elements in input arrays A,B. A and B have to be of same size and eltype.""" -function bitinformation(A::AbstractArray{T}, - B::AbstractArray{T}; - set_zero_insignificant::Bool=true, - confidence::Real=0.99 - ) where {T<:Union{Integer,AbstractFloat}} +function mutual_information(A::AbstractArray{T}, + B::AbstractArray{T}; + set_zero_insignificant::Bool=true, + confidence::Real=0.99 + ) where {T<:Union{Integer,AbstractFloat}} - @assert size(A) == size(B) - nelements = length(A) - nbits = 8*sizeof(T) + nelements = length(A) # number of elements in each A and B + nbits = 8*sizeof(T) # number of bits in eltype(A),eltype(B) - # array of counters - C = zeros(Int,nbits,2,2) + C = bitpair_count(A,B) # nbits x 2 x 2 array of bitpair counters + M = zeros(nbits) # allocate mutual information array + P = zeros(2,2) # allocate joint probability mass function - for (a,b) in zip(A,B) # run through all elements in A,B pairwise - bitcount!(C,a,b) # count the bits and update counter array C + @inbounds for i in 1:nbits # mutual information for every bit position + for j in 1:2, k in 1:2 # joint prob mass from counter C + P[j,k] = C[i,j,k]/nelements + end + M[i] = mutual_information(P) end - # P is the join probabilities mass function - P = [C[i,:,:]/nelements for i in 1:nbits] - M = [mutual_information(p) for p in P] - # remove information that is insignificantly different from a random 50/50 if set_zero_insignificant - p = binom_confidence(nelements,confidence) # get chance p for 1 (or 0) from binom distr - M₀ = 1 - entropy([p,1-p],2) # free entropy of random 50/50 at trial size - M[M .<= M₀] .= 0 # set zero insignificant - end - - return M -end - -"""Mutual bitwise information of the elements in input arrays A,B. -A and B have to be of same size and eltype.""" -function bitinformation(A::AbstractArray{T}, - B::AbstractArray{T}, - n::Int) where {T<:Union{Integer,AbstractFloat}} - - @assert size(A) == size(B) - nelements = length(A) - nbits = 8*sizeof(T) - - # array of counters, first dim is from last bit to first - C = [zeros(Int,2^min(i,n+1),2) for i in nbits:-1:1] - - for (a,b) in zip(A,B) # run through all elements in A,B pairwise - bitcount!(C,a,b,n) # count the bits and update counter array C + Hf = binom_free_entropy(nelements,confidence) # free entropy of random 50/50 at trial size + # get chance p for 1 (or 0) from binom distr + @inbounds for i in eachindex(M) + M[i] = M[i] <= Hf ? 0 : M[i] # set zero what's insignificant + end end - # P is the join probabilities mass function - P = [C[i]/nelements for i in 1:nbits] - M = [mutual_information(p) for p in P] - - # filter out rounding errors - M[isapprox.(M,0,atol=10eps(Float64))] .= 0 return M end -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:AbstractFloat} - uia = reinterpret(Base.uinttype(T),a) - uib = reinterpret(Base.uinttype(T),b) - bitcount!(C,uia,uib) -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:AbstractFloat} - uia = reinterpret(Base.uinttype(T),a) - uib = reinterpret(Base.uinttype(T),b) - bitcount!(C,uia,uib,n) -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:Integer} - nbits = 8*sizeof(T) - mask = one(T) << (nbits-1) - for i in 1:nbits - j = 1+((a & mask) >>> (nbits-i)) - k = 1+((b & mask) >>> (nbits-i)) - C[i,j,k] += 1 - mask >>>= 1 - end -end - -"""Update counter array of size nbits x 2 x 2 for every -00|01|10|11-bitpairing in a,b.""" -function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:Integer} - - nbits = 8*sizeof(T) - - for i = nbits:-1:1 - @boundscheck size(C[end-i+1],1) == 2^min(i,n+1) || throw(BoundsError()) +"""Bitwise real information content of array `A` calculated from the bitwise mutual information +in adjacent entries in A along dimension `dim`.""" +function bitinformation(A::AbstractArray{T}; + dim::Int=1, + kwargs...) where {T<:Union{Integer,AbstractFloat}} + + # Permute A to take adjacent entry in dimension dim + if dim > 1 + permu = collect(1:ndims(A)) # permutation + perm0 = vcat(permu[2:end],permu[1]) # used to permute permu + for _ in 1:dim-1 # create permutation array + permute!(permu,perm0) + end + A = PermutedDimsArray(A,permu) # permute A, such that desired dim is 1st dim end - maskb = one(T) << (nbits-1) - maska = reinterpret(T,signed(maskb) >> n) + # create a two views on A for pairs of adjacent entries + n = size(A)[1] # n elements in dim - for i in 1:nbits - j = 1+((a & maska) >>> max(nbits-n-i,0)) - k = 1+((b & maskb) >>> (nbits-i)) - C[i][j,k] += 1 + # as A is now permuted always select 1st dimension + A1view = selectdim(A,1,1:n-1) # no adjacent entries in A array bounds + A2view = selectdim(A,1,2:n) # for same indices A2 is the adjacent entry to A1 - maska >>>= 1 # shift through nbits - maskb >>>= 1 - end + return mutual_information(A1view,A2view;kwargs...) end """Calculate the bitwise redundancy of two arrays A,B. Redundancy @@ -139,28 +87,101 @@ is a normalised measure of the mutual information 1 for always identical/opposite bits, 0 for no mutual information.""" function redundancy(A::AbstractArray{T}, B::AbstractArray{T}) where {T<:Union{Integer,AbstractFloat}} - mutinf = bitinformation(A,B) # mutual information + + M = mutual_information(A,B) # mutual information HA = bitcount_entropy(A) # entropy of A HB = bitcount_entropy(B) # entropy of B - R = 2mutinf./(HA+HB) # redundancy (symmetric) + + R = ones(size(M)...) # allocate redundancy R + + for i in eachindex(M) # loop over bit positions + HAB = HA[i]+HB[i] # sum of entropies of A,B + if HAB > 0 # entropy = 0 means A,B are fully redundant + R[i] = 2*M[i]/HAB # symmetric redundancy + end + end - R[iszero.(HA+HB)] .= 0.0 # HA+HB = 0 creates NaN return R end -"""Calculate the bitwise redundancy of two arrays A,B. -Multi-bit predictor version which includes n lesser significant bit -as additional predictors in A for the mutual information to account -for round-to-nearest-induced carry bits. Redundancy is normalised -by the entropy of A. To be used for A being the reference array -and B an approximation of it.""" -function redundancy(A::AbstractArray{T}, - B::AbstractArray{T}, - n::Int) where {T<:Union{Integer,AbstractFloat}} - mutinf = bitinformation(A,B,n) # mutual information - HA = bitcount_entropy(A) # entropy of A - R = mutinf./HA # redundancy (asymmetric) +# """Mutual bitwise information of the elements in input arrays A,B. +# A and B have to be of same size and eltype.""" +# function bitinformation(A::AbstractArray{T}, +# B::AbstractArray{T}, +# n::Int) where {T<:Union{Integer,AbstractFloat}} + +# @assert size(A) == size(B) +# nelements = length(A) +# nbits = 8*sizeof(T) - R[iszero.(HA)] .= 0.0 # HA = 0 creates NaN - return R -end \ No newline at end of file +# # array of counters, first dim is from last bit to first +# C = [zeros(Int,2^min(i,n+1),2) for i in nbits:-1:1] + +# for (a,b) in zip(A,B) # run through all elements in A,B pairwise +# bitcount!(C,a,b,n) # count the bits and update counter array C +# end + +# # P is the join probabilities mass function +# P = [C[i]/nelements for i in 1:nbits] +# M = [mutual_information(p) for p in P] + +# # filter out rounding errors +# M[isapprox.(M,0,atol=10eps(Float64))] .= 0 +# return M +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Array{Int,3},a::T,b::T) where {T<:AbstractFloat} +# uia = reinterpret(Base.uinttype(T),a) +# uib = reinterpret(Base.uinttype(T),b) +# bitcount!(C,uia,uib) +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:AbstractFloat} +# uia = reinterpret(Base.uinttype(T),a) +# uib = reinterpret(Base.uinttype(T),b) +# bitcount!(C,uia,uib,n) +# end + +# """Update counter array of size nbits x 2 x 2 for every +# 00|01|10|11-bitpairing in a,b.""" +# function bitcount!(C::Vector{Matrix{Int}},a::T,b::T,n::Int) where {T<:Integer} + +# nbits = 8*sizeof(T) + +# for i = nbits:-1:1 +# @boundscheck size(C[end-i+1],1) == 2^min(i,n+1) || throw(BoundsError()) +# end + +# maskb = one(T) << (nbits-1) +# maska = reinterpret(T,signed(maskb) >> n) + +# for i in 1:nbits +# j = 1+((a & maska) >>> max(nbits-n-i,0)) +# k = 1+((b & maskb) >>> (nbits-i)) +# C[i][j,k] += 1 + +# maska >>>= 1 # shift through nbits +# maskb >>>= 1 +# end +# end + +# """Calculate the bitwise redundancy of two arrays A,B. +# Multi-bit predictor version which includes n lesser significant bit +# as additional predictors in A for the mutual information to account +# for round-to-nearest-induced carry bits. Redundancy is normalised +# by the entropy of A. To be used for A being the reference array +# and B an approximation of it.""" +# function redundancy(A::AbstractArray{T}, +# B::AbstractArray{T}, +# n::Int) where {T<:Union{Integer,AbstractFloat}} +# mutinf = bitinformation(A,B,n) # mutual information +# HA = bitcount_entropy(A) # entropy of A +# R = mutinf./HA # redundancy (asymmetric) + +# R[iszero.(HA)] .= 0.0 # HA = 0 creates NaN +# return R +# end \ No newline at end of file diff --git a/src/shave_set_groom.jl b/src/shave_set_groom.jl index f23513e..93dc56d 100644 --- a/src/shave_set_groom.jl +++ b/src/shave_set_groom.jl @@ -132,5 +132,5 @@ for func in (:shave,:halfshave,:set_one,:groom) end end -# """Number of significant bits `nsb` given the number of significant digits `nsd`.""" -# nsb(nsd::Integer) = Integer(ceil(log(10)/log(2)*nsd)) \ No newline at end of file +"""Number of significant bits `nsb` given the number of significant digits `nsd`.""" +nsb(nsd::Integer) = Integer(ceil(log(10)/log(2)*nsd)) \ No newline at end of file diff --git a/src/signed_exponent.jl b/src/signed_exponent.jl index 7a1f7bd..d9d9245 100644 --- a/src/signed_exponent.jl +++ b/src/signed_exponent.jl @@ -1,32 +1,59 @@ """In-place version of `signed_exponent(::Array)`.""" -function signed_exponent!(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} +function signed_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} # sign&fraction mask sfmask = Base.sign_mask(T) | Base.significand_mask(T) - emask = Base.exponent_mask(T) + emask = Base.exponent_mask(T) + esignmask = Base.sign_mask(T) >> 1 # exponent sign mask (1st exp bit) sbits = Base.significand_bits(T) bias = Base.exponent_bias(T) - ebits = Base.exponent_bits(T)-1 for i in eachindex(A) ui = reinterpret(Unsigned,A[i]) - sf = ui & sfmask # sign & fraction bits - e = ((ui & emask) >> sbits) - bias # de-biased exponent - eabs = e == -bias ? 0 : abs(e) # for iszero(A[i]) e == -bias, set to 0 - esign = (e < 0 ? 1 : 0) << ebits # determine sign of exponent - esigned = ((esign | eabs) % typeof(ui)) << sbits # concatentate exponent + sf = ui & sfmask # sign & fraction bits + e = (((ui & emask) >> sbits) % Int) - bias # de-biased exponent + eabs = abs(e) % typeof(ui) # magnitude of exponent + esign = e < 0 ? esignmask : zero(esignmask) # determine sign of exponent + esigned = esign | (eabs << sbits) # concatentate exponent A[i] = reinterpret(T,sf | esigned) # concatenate everything back together end end +"""In-place version of `biased_exponent(::Array)`. Inverse of `signed_exponent!".""" +function biased_exponent!(A::AbstractArray{T}) where {T<:Base.IEEEFloat} + + # sign&fraction mask + sfmask = Base.sign_mask(T) | Base.significand_mask(T) + emask = Base.exponent_mask(T) + esignmask = Base.sign_mask(T) >> 1 + eabsmask = emask & ~esignmask + + sbits = Base.significand_bits(T) + bias = Base.uinttype(T)(Base.exponent_bias(T)) + + for i in eachindex(A) + ui = reinterpret(Unsigned,A[i]) + sf = ui & sfmask # sign & fraction bits + eabs = ((ui & eabsmask) >> sbits) # isolate sign-magnitude exponent + esign = (ui & esignmask) == esignmask ? true : false + ebiased = bias + (esign ? -eabs : eabs) # concatenate mag&sign and add bias + ebiased <<= sbits # shit exponent in position + + # 10000..., i.e. negative zero in the sign-magintude exponent is mapped + # back to NaN/Inf to make signed_exponent fully reversible + ebiased = esign & (eabs == 0) ? emask : ebiased + A[i] = reinterpret(T,sf | ebiased) # concatenate everything back together + end +end + """ ```julia -B = signed_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} +B = signed_exponent(A::AbstractArray{T}) where {T<:Base.IEEEFloat} ``` Converts the exponent bits of Float16,Float32 or Float64-arrays from its -conventional biased-form into a sign&magnitude representation. +IEEE standard biased-form into a sign-magnitude representation. # Example @@ -38,11 +65,25 @@ julia> bitstring.(signed_exponent([10f0]),:split)[1] "0 00000011 01000000000000000000000" ``` -In the former the exponent 3 is interpret from 0b10000010=130 via subtraction of -the exponent bias of Float32 = 127. In the latter the exponent is inferred from -sign bit (0) and a magnitude represetation 2^1 + 2^1 = 3.""" +In the IEEE standard floats the exponent 3 is interpret from 0b10000010=130 +via subtraction of the exponent bias of Float32 = 127. In sign-magnitude +representation the exponent is inferred from the first exponent (0) as sign bit +and a magnitude 2^1 + 2^1 = 3. NaN/Inf exponent bits are mapped to negative zero +in sign-magnitude representation which is exactly reversed with `biased_exponent`.""" function signed_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} B = copy(A) signed_exponent!(B) return B +end + +""" +```julia +B = biased_exponent(A::AbstractArray{T}) where {T<:Base.IEEEFloat} +``` +Convert the signed exponents from `signed_exponent` back into the +standard biased exponents of IEEE floats.""" +function biased_exponent(A::Array{T}) where {T<:Union{Float16,Float32,Float64}} + B = copy(A) + biased_exponent!(B) + return B end \ No newline at end of file diff --git a/src/uint_types.jl b/src/uint_types.jl new file mode 100644 index 0000000..5a3b193 --- /dev/null +++ b/src/uint_types.jl @@ -0,0 +1,21 @@ +# define the uints for various formats +Base.uinttype(::Type{UInt8}) = UInt8 +Base.uinttype(::Type{UInt16}) = UInt16 +Base.uinttype(::Type{UInt32}) = UInt32 +Base.uinttype(::Type{UInt64}) = UInt64 + +Base.uinttype(::Type{Int8}) = UInt8 +Base.uinttype(::Type{Int16}) = UInt16 +Base.uinttype(::Type{Int32}) = UInt32 +Base.uinttype(::Type{Int64}) = UInt64 + +# uints for other types are identified by their byte size +Base.uinttype(::Type{T}) where T = Base.uinttype(sizeof(T)*8) + +function Base.uinttype(nbits::Integer) + nbits == 8 && return UInt8 + nbits == 16 && return UInt16 + nbits == 32 && return UInt32 + nbits == 64 && return UInt64 + throw(error("Only n=8,16,32,64 bits supported.")) +end \ No newline at end of file diff --git a/src/which_uint.jl b/src/which_uint.jl deleted file mode 100644 index 40d9b81..0000000 --- a/src/which_uint.jl +++ /dev/null @@ -1,9 +0,0 @@ -function whichUInt(n::Integer) - n == 8 && return UInt8 - n == 16 && return UInt16 - n == 32 && return UInt32 - n == 64 && return UInt64 - throw(error("Only n=8,16,32,64 supported.")) -end - -whichUInt(::Type{T}) where T = whichUInt(sizeof(T)*8) diff --git a/src/xor_delta.jl b/src/xor_delta.jl index 3fc9ebd..6084cc6 100644 --- a/src/xor_delta.jl +++ b/src/xor_delta.jl @@ -3,10 +3,10 @@ first element is left unchanged. E.g. [0b0011,0b0010] -> [0b0011,0b0001] """ function xor_delta!(A::Array{T,1}) where {T<:Unsigned} a = A[1] - @inbounds for i in 2:length(A) + @inbounds for i in 2:length(A) # skip first element b = A[i] - A[i] = a ⊻ b - a = b + A[i] = a ⊻ b # XOR with prev element + a = b # make next (un-XORed) element prev for next iteration end end @@ -14,9 +14,9 @@ end E.g. [0b0011,0b0001] -> [0b0011,0b0010] """ function unxor_delta!(A::Array{T,1}) where {T<:Unsigned} a = A[1] - @inbounds for i in 2:length(A) + @inbounds for i in 2:length(A) # skip first element b = A[i] - a = a ⊻ b + a = a ⊻ b # un-XOR and store un-XORed a for next iteration A[i] = a end end @@ -58,8 +58,8 @@ end """Bitwise XOR delta. Elements include A are XORed with the previous one. The first element is left unchanged. E.g. [0b0011,0b0010] -> [0b0011,0b0001]. """ -xor_delta(A::Array{T,1}) where {T<:AbstractFloat} = xor_delta(whichUInt(T),A) +xor_delta(A::Array{T,1}) where {T<:AbstractFloat} = xor_delta(Base.uinttype(T),A) """Undo bitwise XOR delta. Elements include A are XORed again to reverse xor_delta. E.g. [0b0011,0b0001] -> [0b0011,0b0010] """ -unxor_delta(A::Array{T,1}) where {T<:AbstractFloat} = unxor_delta(whichUInt(T),A) +unxor_delta(A::Array{T,1}) where {T<:AbstractFloat} = unxor_delta(Base.uinttype(T),A) diff --git a/test/bitcount.jl b/test/bitcount.jl new file mode 100644 index 0000000..08bfb7c --- /dev/null +++ b/test/bitcount.jl @@ -0,0 +1,66 @@ +@testset "Bitcount" begin + @test bitcount(UInt8[1,2,4,8,16,32,64,128]) == ones(8) + @test bitcount(collect(0x0000:0xffff)) == 2^15*ones(16) + + N = 100_000 + c = bitcount(rand(N)) + @test c[1] == 0 # sign always 0 + @test c[2] == 0 # first expbit always 0, i.e. U(0,1) < 1 + @test c[3] == N # second expont always 1 + + @test all(isapprox.(c[15:50],N/2,rtol=1e-1)) +end + +@testset "Bitcountentropy" begin + + # test the PRNG on uniformity + N = 100_000 + H = bitcount_entropy(rand(UInt8,N)) + @test all(isapprox.(H,ones(8),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt16,N)) + @test all(isapprox.(H,ones(16),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt32,N)) + @test all(isapprox.(H,ones(32),rtol=5e-4)) + + H = bitcount_entropy(rand(UInt64,N)) + @test all(isapprox.(H,ones(64),rtol=5e-4)) + + # also for random floats + H = bitcount_entropy(rand(N)) + @test H[1:5] == zeros(5) # first bits never change + @test all(isapprox.(H[16:55],ones(40),rtol=1e-4)) +end + +@testset "Bit pair count" begin + for T in (Float16,Float32,Float64) + N = 10_000 + A = rand(T,N) + C1 = bitpair_count(A,A) # count bitpairs with 2 equiv arrays + C2 = bitcount(A) # compare to bits counted in that array + + nbits = 8*sizeof(T) + for i in 1:nbits + @test C1[i,1,2] == 0 # no 01 pair for bitpair_count(A,A) + @test C1[i,2,1] == 0 # no 10 + @test C1[i,1,1] + C1[i,2,2] == N # 00 + 11 are all cases = N + @test C1[i,2,2] == C2[i] # 11 is the same as bitcount(A) + end + + Auint = reinterpret(Base.uinttype(T),A) + Buint = .~Auint # flip all bits in A + + C3 = bitpair_count(Auint,Auint) + @test C1 == C3 # same when called with uints + + C4 = bitpair_count(Auint,Buint) + for i in 1:nbits + @test C4[i,1,2] + C4[i,2,1] == N # 01, 10 are all cases = N + @test C1[i,1,1] == C4[i,1,2] # 00 before is now 01 + @test C1[i,2,2] == C4[i,2,1] # 11 before is now 10 + @test C4[i,1,1] == 0 # no 00 + @test C4[i,2,2] == 0 # no 11 + end + end +end \ No newline at end of file diff --git a/test/bitpattern_entropy.jl b/test/bitpattern_entropy.jl new file mode 100644 index 0000000..8d29694 --- /dev/null +++ b/test/bitpattern_entropy.jl @@ -0,0 +1,14 @@ +@testset "Bit pattern entropy" begin + for N in [100,1000,10000,100000] + # every bit pattern is only hit once, hence entropy = log2(N) + @test isapprox(log2(N),bitpattern_entropy(rand(Float32,N)),atol=1e-1) + @test isapprox(log2(N),bitpattern_entropy(rand(Float64,N)),atol=1e-1) + end + + N = 1000_000 # more bit pattern than there are in 8 or 16-bit + @test isapprox(16.0,bitpattern_entropy(rand(UInt16,N)),atol=1e-1) + @test isapprox(16.0,bitpattern_entropy(rand(Int16,N)),atol=1e-1) + + @test isapprox(8.0,bitpattern_entropy(rand(UInt8,N)),atol=1e-1) + @test isapprox(8.0,bitpattern_entropy(rand(Int8,N)),atol=1e-1) +end \ No newline at end of file diff --git a/test/bitstring.jl b/test/bitstring.jl new file mode 100644 index 0000000..6de4426 --- /dev/null +++ b/test/bitstring.jl @@ -0,0 +1,30 @@ +@testset "Bitstring with split" begin + for T in (Float16,Float32,Float64) + for _ in 1:30 + r = randn() + bs_split = bitstring(r,:split) + bs = bitstring(r) + @test bs == prod(split(bs_split," ")) + end + end + + # some individual values too + @test bitstring(Float16( 0),:split) == "0 00000 0000000000" + @test bitstring(Float16( 1),:split) == "0 01111 0000000000" + @test bitstring(Float16(-1),:split) == "1 01111 0000000000" + @test bitstring(Float16( 2),:split) == "0 10000 0000000000" + + @test bitstring( 0f0,:split) == "0 00000000 00000000000000000000000" + @test bitstring( 1f0,:split) == "0 01111111 00000000000000000000000" + @test bitstring(-1f0,:split) == "1 01111111 00000000000000000000000" + @test bitstring( 2f0,:split) == "0 10000000 00000000000000000000000" + + @test bitstring( 0.0,:split) == + "0 00000000000 0000000000000000000000000000000000000000000000000000" + @test bitstring( 1.0,:split) == + "0 01111111111 0000000000000000000000000000000000000000000000000000" + @test bitstring(-1.0,:split) == + "1 01111111111 0000000000000000000000000000000000000000000000000000" + @test bitstring( 2.0,:split) == + "0 10000000000 0000000000000000000000000000000000000000000000000000" +end \ No newline at end of file diff --git a/test/information.jl b/test/information.jl index d3177bd..750b278 100644 --- a/test/information.jl +++ b/test/information.jl @@ -1,87 +1,48 @@ -@testset "Bitpattern entropy" begin - for N in [100,1000,10000,100000] - # every bitpattern is only hit once, hence entropy = log2(N) - @test isapprox(log2(N),bitpattern_entropy(rand(Float32,N)),atol=1e-1) - @test isapprox(log2(N),bitpattern_entropy(rand(Float64,N)),atol=1e-1) - end - - N = 1000_000 # more bitpattern than there are in 8 or 16-bit - @test isapprox(16.0,bitpattern_entropy(rand(UInt16,N)),atol=1e-1) - @test isapprox(16.0,bitpattern_entropy(rand(Int16,N)),atol=1e-1) - - @test isapprox(8.0,bitpattern_entropy(rand(UInt8,N)),atol=1e-1) - @test isapprox(8.0,bitpattern_entropy(rand(Int8,N)),atol=1e-1) -end - -@testset "Bitcount" begin - @test bitcount(UInt8[1,2,4,8,16,32,64,128]) == ones(8) - @test bitcount(collect(0x0000:0xffff)) == 2^15*ones(16) - - N = 100_000 - c = bitcount(rand(N)) - @test c[1] == 0 # sign always 0 - @test c[2] == 0 # first expbit always 0, i.e. U(0,1) < 1 - @test c[3] == N # second expont always 1 - - @test all(isapprox.(c[15:50],N/2,rtol=1e-1)) -end - -@testset "Bitcountentropy" begin +@testset "Bitinformation random" begin + N = 10_000 - # test the PRNG on uniformity - N = 100_000 - H = bitcount_entropy(rand(UInt8,N)) - @test all(isapprox.(H,ones(8),rtol=5e-4)) + for T in (UInt8,UInt16,UInt32,UInt64) + A = rand(T,N) + @test all(bitinformation(A,set_zero_insignificant=false) .< 1e-3) + end - H = bitcount_entropy(rand(UInt16,N)) - @test all(isapprox.(H,ones(16),rtol=5e-4)) + for T in (Float16,Float32,Float64) + A = rand(T,N) - H = bitcount_entropy(rand(UInt32,N)) - @test all(isapprox.(H,ones(32),rtol=5e-4)) + # increase confidence to filter out more information for reliable tests... + # i.e. lower the risk of false positives + bi = bitinformation(A,confidence=0.9999) - H = bitcount_entropy(rand(UInt64,N)) - @test all(isapprox.(H,ones(64),rtol=5e-4)) + # no bit should contain information, insignificant + # information should be filtered out + @test all(bi .== 0.0) - # also for random floats - H = bitcount_entropy(rand(N)) - @test H[1:5] == zeros(5) # first bits never change - @test all(isapprox.(H[16:55],ones(40),rtol=1e-4)) + sort!(A) # introduce some information via sorting + @test sum(bitinformation(A)) > 9 # some bits of information (guessed) + end end -@testset "Bitinformation random" begin - N = 100_000 - A = rand(UInt64,N) - @test all(bitinformation(A) .< 1e-3) - - A = rand(Float32,N) - bi = bitinformation(A) - @test all(bi[1:4] .== 0.0) # the first bits are always 0011 - sort!(A) - @test sum(bitinformation(A)) > 12 -end +@testset "Bitinformation dimensions" begin -@testset "Bitinformation in chunks" begin - N = 1000 - Nhalf = N ÷ 2 - A = rand(UInt32,N) - n11,npair1,N1 = bitcount(A[1:Nhalf-1]),bitpaircount(A[1:Nhalf]),Nhalf - n12,npair2,N2 = bitcount(A[Nhalf:end-1]),bitpaircount(A[Nhalf:end]),Nhalf-1 - @test bitinformation(n11+n12,npair1+npair2,N1+N2) == bitinformation(A) -end + for T in (Float16,Float32,Float64) + A = rand(T,30,40,50) + @test bitinformation(A) == bitinformation(A,dim=1) + + bi1 = bitinformation(A,dim=1) + bi2 = bitinformation(A,dim=2) + bi3 = bitinformation(A,dim=3) + + nbits = 8*sizeof(T) + for i in 1:nbits + @test bi1[i] ≈ bi2[i] atol=1e-3 + @test bi2[i] ≈ bi3[i] atol=1e-3 + @test bi1[i] ≈ bi3[i] atol=1e-3 + end -@testset "Bitinformation dimensions" begin - A = rand(Float32,30,40,50) - @test bitinformation(A) == bitinformation(A,dims=1) - - # check that the :all_dimensions flag is - bi1 = bitinformation(A,dims=1) - bi2 = bitinformation(A,dims=2) - bi3 = bitinformation(A,dims=3) - @test bitinformation(A,:all_dimensions) == ((bi1 .+ bi2 .+ bi3)/3) - - # check that there is indeed more information in the sorted dimensions - sort!(A,dims=2) - @test sum(bitinformation(A,dims=1)) < sum(bitinformation(A,dims=2)) + # check that there is indeed more information in the sorted dimensions + sort!(A,dims=2) + @test sum(bitinformation(A,dim=1)) < sum(bitinformation(A,dim=2)) + end end @testset "Mutual information" begin @@ -100,8 +61,8 @@ end @test 1.0 == mutual_information([0.0 0.5;0.5 0.0]) # two independent arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] - mutinf = bitinformation(rand(T,N),rand(T,N)) + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) + mutinf = mutual_information(rand(T,N),rand(T,N)) for m in mutinf @test isapprox(0,m,atol=1e-3) end @@ -112,85 +73,100 @@ end # but so in practice slightly lower for rand(UInt), # or clearly lower for low entropy bits in Float16/32/64 # but identical to the bitcount_entropy (up to rounding errors) - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) R = rand(T,N) - mutinf = bitinformation(R,R) + mutinf = mutual_information(R,R) @test mutinf ≈ bitcount_entropy(R) end end @testset "Redundancy" begin - N = 100_000 + N = 10_000 # No redundancy in independent arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64) redun = redundancy(rand(T,N),rand(T,N)) for r in redun @test isapprox(0,r,atol=2e-3) end end + # no redundancy in the mantissa bits of rand + for T in (Float16,Float32,Float64) + redun = redundancy(rand(T,N),rand(T,N)) + for r in redun[end-Base.significand_bits(T):end] + @test isapprox(0,r,atol=2e-3) + end + end + # Full redundancy in identical arrays - for T in [UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64] + for T in (UInt8,UInt16,UInt32,UInt64,Float16,Float32,Float64) A = rand(T,N) H = bitcount_entropy(A) R = redundancy(A,A) - for (r,h) in zip(R,H) - if iszero(h) - @test iszero(r) - else - @test isapprox(1,r,atol=2e-3) - end + for r in R + @test r ≈ 1 atol=1e-3 end end -end -@testset "Mutual information with round to nearest" begin - N = 100_000 + # Some artificially introduced redundancy PART I + for T in (UInt8,UInt16,UInt32,UInt64) + + A = rand(T,N) + B = copy(A) # B is A on every second entry + B[1:2:end] .= rand(T,N÷2) # otherwise independent - # compare shaving to round to nearest - # for round to nearest take m more bits into account for the - # joint probability - m = 8 + # joint prob mass is therefore [0.375 0.125; 0.125 0.375] + # at 50% bits are identical, at 50% they are independent + # = 25% same, 25% opposite + p = mutual_information([0.375 0.125;0.125 0.375]) - for T in [Float32,Float64] - R = rand(T,N) - for keepbit in [5,10,15] - mutinf_shave = bitinformation(R,shave(R,keepbit)) - mutinf_round = bitinformation(R,round(R,keepbit),m) - for (s,r) in zip(mutinf_shave,mutinf_round) - @test isapprox(s,r,atol=1e-2) - end + R = redundancy(A,B) + for r in R + @test r ≈ p rtol=2e-1 end end - # shaving shouldn't change - for T in [Float32,Float64] - R = rand(T,N) - for keepbit in [5,10,15] - mutinf_shave = bitinformation(R,shave(R,keepbit)) - mutinf_round = bitinformation(R,shave(R,keepbit),m) - for (s,r) in zip(mutinf_shave,mutinf_round) - @test isapprox(s,r,atol=1e-2) - end + # Some artificially introduced redundancy PART II + for T in (UInt8,UInt16,UInt32,UInt64) + + A = rand(T,N) + B = copy(A) # B is A on every fourth entry + B[1:4:end] .= rand(T,N÷4) # otherwise independent + + # joint prob mass is therefore [0.4375 0.0625; 0.0625 0.4375] + # at 75% bits are identical, at 25% they are independent + # = 12.5% same, 12.5% opposite + p = mutual_information([0.4375 0.0625;0.0625 0.4375]) + + R = redundancy(A,B) + for r in R + @test r ≈ p rtol=2e-1 end end end -@testset "Information of random set to zero" begin - for T in (UInt32,UInt64,Float32,Float64) - for N in [10_000,100_000] - A = rand(T,N) - # increase confidence here from the default 0.99 to avoid test failures - # from false positives - b = bitinformation(A,confidence=0.9999) - for ib in b - @test 0 == ib - end +@testset "Mutual information with shave/round" begin + N = 10_000 - m = bitinformation(A[1:end-1],A[2:end],confidence=0.9999) - for im in m - @test 0 == im + for T in (Float16,Float32,Float64) + R = randn(T,N) + for keepbit in [5,10,15] + + Rshaved = shave(R,keepbit) + mutinf_shave = mutual_information(R,Rshaved) + H_R = bitcount_entropy(R) + H_Rs = bitcount_entropy(Rshaved) + + for (i,(s,hr,hrs)) in enumerate(zip(mutinf_shave,H_R,H_Rs)) + if i <= (1+Base.exponent_bits(T)+keepbit) + @test isapprox(s,hr,atol=1e-2) + @test isapprox(s,hrs,atol=1e-2) + else + @test s == 0 + @test hr > 0 + @test hrs == 0 + end end end end diff --git a/test/round_nearest.jl b/test/round_nearest.jl index 926a192..ac5f189 100644 --- a/test/round_nearest.jl +++ b/test/round_nearest.jl @@ -1,5 +1,3 @@ -using Test - @testset "iseven isodd" begin # check sign bits @test iseven(1f0,-8) @@ -114,17 +112,6 @@ end x = reinterpret(Float32,x) @test 1f0 == round(x,k) end - - # for T in [Float16,Float32,Float64] - # for k in 1:20 - # x = randn(T) - # xr1 = round(x,k+1) - # xr = round(xr1,k) - - # if iseven(xr,k) - - # end - # end end @testset "Round to nearest?" begin diff --git a/test/runtests.jl b/test/runtests.jl index 4928c05..b1d829b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -3,7 +3,9 @@ using Test import StatsBase.entropy import Random +include("bitstring.jl") include("round_nearest.jl") include("shave_set_groom.jl") +include("signed_exponent.jl") include("xor_transpose.jl") include("information.jl") \ No newline at end of file diff --git a/test/shave_set_groom.jl b/test/shave_set_groom.jl index a0dc3d5..bfaa9db 100644 --- a/test/shave_set_groom.jl +++ b/test/shave_set_groom.jl @@ -1,5 +1,3 @@ -using Test - @testset "Zero shaves to zero" begin for T in [Float16,Float32,Float64] for k in -5:50 @@ -57,9 +55,9 @@ end end end -@testset "Approx equal for keepbits=5,10,25" begin +@testset "Approx equal for keepbits=8,15,35" begin for (T,k) in zip([Float16,Float32,Float64], - [6,11,26]) + [8,15,35]) A = rand(T,200,300) Ar = shave(A,k) @test A ≈ Ar diff --git a/test/signed_exponent.jl b/test/signed_exponent.jl new file mode 100644 index 0000000..c4b5a71 --- /dev/null +++ b/test/signed_exponent.jl @@ -0,0 +1,15 @@ +@testset "Signed/biased exponent idempotence" begin + for T in (Float16,Float32,Float64) + for _ in 1:100 + A = [reinterpret(T,rand(Base.uinttype(T)))] + # call functions with array but evaluated only the element of it + # with === to allow to nan equality too + @test A[1] === biased_exponent(signed_exponent(A))[1] + end + + # special cases 0, NaN, Inf + @test [zero(T)] == biased_exponent(signed_exponent([zero(T)])) + @test isnan(biased_exponent(signed_exponent([T(NaN)]))[1]) + @test isinf(biased_exponent(signed_exponent([T(Inf)]))[1]) + end +end \ No newline at end of file