Skip to content

Commit

Permalink
feat: Add support for histogram (#25)
Browse files Browse the repository at this point in the history
* chore: Adds histogram support

based on crate histogram

* chore: Hide histogram behind feature histogram

* test: adds unit test for testing float to int conversion

* test: adds test to compare two histogram based on mean and std

* chore: histogram is not default feature

* ci: Adds `--all-features` to build and test step

* ci: default branch changed to main

* chore: skip serializing Histogram

* doc: Updates changelog
  • Loading branch information
dilawar authored Jan 8, 2024
1 parent cd2758f commit e23269a
Show file tree
Hide file tree
Showing 5 changed files with 130 additions and 6 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: Rust

on:
push:
branches: [ "master" ]
branches: [ "main" ]
tags:
- 'v*'
pull_request:
branches: [ "master" ]
branches: [ "main" ]

env:
CARGO_TERM_COLOR: always
Expand All @@ -18,10 +18,10 @@ jobs:
- uses: actions/checkout@v3
- name: Build
run: |
cargo clippy --no-deps
cargo build --all-targets --verbose
cargo clippy --no-deps --all-features
cargo build --all-targets --all-features --verbose
- name: Run tests
run: cargo test --verbose
run: cargo test --verbose --all-features
- name: Check semver
uses: obi1kenobi/cargo-semver-checks-action@v2

Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]

### Adds

- Adds support for computing histograms (feature 'histogram')

## [0.6.0] - 2023-12-2023
### Fixed
- Fixed #10. Custom implement for `Default`.
Expand Down
11 changes: 10 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@
name = "simple_accumulator"
version = "0.6.0"
edition = "2021"
authors = ["Sid <siddharth.naithani@subcom.tech>", "Purnata G <purnata.g@subcom.tech>", "Dilawar Singh <dilawar@subcom.tech>"]
authors = [
"Sid <siddharth.naithani@subcom.tech>",
"Purnata <purnata.g@subcom.tech>",
"Dilawar <dilawar@subcom.tech>",
]
description = "A simple accumulator for incremental statistical computations"
repository = "https://github.com/SubconsciousCompute/SimpleAccumulator"
license = "MIT"
Expand All @@ -15,17 +19,22 @@ documentation = "https://docs.rs/simple_accumulator"
num-traits = "0.2.17"
rand = "0.8.5"
serde = { version = "1", features = ["derive"], optional = true }
tracing = "0.1.40"
watermill = "0.1.1"
histogram = { version = "0.8.3", optional = true }


[dev-dependencies]
ordered-float = "4.2.0"
plotly = { version = "0.8.4" }
float_eq = { version = "1", features = ["derive"] }
watermill = "0.1.1"
tracing-test = "0.2.4"

[features]
default = []
serde = ["dep:serde"]
histogram = ["dep:histogram"]

[profile.release]
lto = true
Expand Down
71 changes: 71 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
use std::collections::VecDeque;
use std::ops::{AddAssign, SubAssign};

#[cfg(feature = "histogram")]
use histogram::Histogram;

use num_traits::{cast::FromPrimitive, float::Float};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -68,6 +71,11 @@ pub struct SimpleAccumulator<

/// Can only `push` if used, for `pop` and `remove` we return `None`
fixed_capacity: bool,

/// Histogram
#[serde(skip)]
#[cfg(feature = "histogram")]
histogram: Option<Histogram>,
}

impl<F: Float + FromPrimitive + AddAssign + SubAssign + std::default::Default>
Expand All @@ -94,6 +102,36 @@ impl<F: Float + FromPrimitive + AddAssign + SubAssign + std::default::Default>
k
}

/// Initialize a histogram The configuration of a histogram which determines the bucketing
/// strategy and therefore the relative error and memory utilization of a histogram.
///
/// `grouping_power` - controls the number of buckets that are used to span consecutive powers
/// of two. Lower values result in less memory usage since fewer buckets will be created.
/// However, this will result in larger relative error as each bucket represents a wider range
/// of values.
///
/// `max_value_power` - controls the largest value which can be stored in the histogram.
/// 2^(max_value_power) - 1 is the inclusive upper bound for the representable range of values.
///
/// Reference: <https://docs.rs/histogram/latest/histogram/struct.Config.html>
#[cfg(feature = "histogram")]
pub fn init_histogram(&mut self, grouping_power: u8, max_value_power: u8) {
assert!(
grouping_power < max_value_power,
"max_value_power must be > grouping_power"
);
if self.histogram.is_some() {
tracing::info!("Histogram is already initialize. Reinitializing...");
}
self.histogram = match histogram::Histogram::new(grouping_power, max_value_power) {
Ok(hist) => Some(hist),
Err(e) => {
tracing::warn!("Failed to initialize histogram: {e}");
None
}
}
}

/// Get the length of underlying container storing data.
#[inline]
pub fn len(&self) -> usize {
Expand Down Expand Up @@ -180,6 +218,20 @@ impl<F: Float + FromPrimitive + AddAssign + SubAssign + std::default::Default>
}
}
self.data.push_back(y);

if let Some(histogram) = self.histogram.as_mut() {
if let Some(v) = y.to_u64() {
if let Err(e) = histogram.increment(v) {
debug_assert!(false, "Failed to increment the histogram: {e}");
}
}
}
}

/// Return reference to the inner Histogram
#[cfg(feature = "histogram")]
pub fn histogram(&self) -> Option<&histogram::Histogram> {
self.histogram.as_ref()
}

/// Function similar to `append` in `Vec`, rewrites in FIFO order if `fixed_capacity` is 'true'.
Expand Down Expand Up @@ -232,3 +284,22 @@ impl<F: Float + FromPrimitive + AddAssign + SubAssign + std::default::Default>
self.sum.get()
}
}

#[cfg(test)]
mod tests {
use num_traits::ToPrimitive;

#[test]
fn test_f32_to_u64() {
let a = 40.9f32;
let ai = a.to_u64().unwrap();
println!("{a} {ai}");
assert_eq!(a.floor() as u64, ai);

for _i in 0..10000 {
let a = rand::random::<f64>() * 100.0;
let ai = a.to_u64().unwrap();
assert_eq!(a.floor() as u64, ai, "floor or {a} is not equal to {ai}");
}
}
}
40 changes: 40 additions & 0 deletions tests/test_histogram.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
//! Runs the following tests.
//!
//! 1. Check if histogram computed is 'correct'.
#![cfg(feature = "histogram")]

use simple_accumulator::SimpleAccumulator;
use std::ops::Shr;
use tracing_test::traced_test;

#[traced_test]
#[test]
fn test_hist_correctness() {
// create an accumulator with fixed capacity.
let mut acc = SimpleAccumulator::new(&[0.0], Some(10));
const N: i64 = 100_000;

acc.init_histogram(7, 8 /* 2^8 is max value */);
let mut rng = rand::thread_rng();
for _i in 0..N {
let a = rand::random::<u8>();
acc.push(a as f64);
}

let expected_mean = N / (u8::MAX as i64);
let hist = acc.histogram().unwrap().as_slice();
let mean = hist.iter().map(|x| *x as i64).sum::<i64>() / (hist.len() as i64);
println!("computed hist = {hist:?}");
println!("expected mean = {expected_mean}, mean={mean}");

let var = hist.iter().map(|x| (*x as i64 - mean).pow(2)).sum::<i64>() / (hist.len() as i64);
let std = (var as f64).powf(0.5);
println!("variance={var} std={std}");

assert!(
(mean - expected_mean).abs() < 5,
"{mean} is far away from expecte mean {expected_mean}"
);
assert!(std < 24.0, "Standard deviation is too high {std}");
}

0 comments on commit e23269a

Please sign in to comment.