-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement Chi Square engine and add different types of contingency ta…
…bles.
- Loading branch information
1 parent
4e3b550
commit b9a5431
Showing
9 changed files
with
316 additions
and
81 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
use crate::{ | ||
metrics::ResponseStatusCode, | ||
stats::{EmpiricalTable, ExpectationTable, Group, Observation}, | ||
}; | ||
|
||
use super::DecisionEngine; | ||
|
||
/// The [ChiSquareEngine] uses the Chi Square statistical | ||
/// significance test to determine whether the canary should be promoted or not. | ||
#[derive(Default)] | ||
pub struct ChiSquareEngine { | ||
control_data: ExpectationTable<ResponseStatusCode>, | ||
experimental_data: EmpiricalTable<ResponseStatusCode>, | ||
} | ||
|
||
impl DecisionEngine<ResponseStatusCode> for ChiSquareEngine { | ||
// TODO: From writing this method, it's apparent there should be a Vec implementation | ||
// that adds Vec::len() to the total and concats the vectors together, because | ||
// otherwise we're wasting a ton of cycles just incrementing counters. | ||
fn add_observation(&mut self, observation: Observation<ResponseStatusCode>) { | ||
match observation.group { | ||
Group::Control => { | ||
// • Increment the number of observations for this category. | ||
self.control_data.increment(observation.outcome); | ||
} | ||
Group::Experimental => { | ||
// • Increment the number of observations in the canary contingency table. | ||
self.experimental_data.increment(observation.outcome); | ||
// • Then, let the control contingency table know that there was | ||
// another experimental observation. | ||
self.control_data.increment_experimental_total(); | ||
} | ||
} | ||
} | ||
|
||
fn compute(&mut self) -> Option<super::Action> { | ||
todo!() | ||
} | ||
} | ||
|
||
impl ChiSquareEngine { | ||
pub fn new() -> Self { | ||
Self::default() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
/// A ContingencyTable expresses the frequency with which a group was observed. | ||
/// Usually, it tracks the number of observations in ecah group, but when the | ||
/// number is already known (i.e. its fixed, like a fair dice or coin), it can | ||
/// expose just the frequencies for each group. | ||
pub trait ContingencyTable<Group> { | ||
/// return the number of observations of the in the provided group. | ||
fn group_count(&self, cat: &Group) -> u64; | ||
|
||
/// Return the set of groups that serve as columns of the contingency table. | ||
fn groups(&self) -> Box<dyn Iterator<Item = Group>>; | ||
|
||
// returns the total number of observations made. This should be the sum | ||
// of the group count for every group. | ||
fn total_count(&self) -> u64 { | ||
self.groups() | ||
.fold(0, |sum, group| sum + self.group_count(&group)) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
use super::ContingencyTable; | ||
use crate::stats::EnumerableCategory; | ||
use std::{collections::HashMap, hash::Hash}; | ||
|
||
/// An [EmpiricalTable] is used to track observed data. It keeps | ||
/// a talley of each observed category. When queried, it uses | ||
/// the empirical values to emit an observation count. | ||
/// This is in contrast to a ExpectationTable, which also keeps a | ||
/// talley of observations made, but uses the count of observations | ||
/// from an EmpiricalTable to determine the expected ratios. | ||
/// | ||
/// The category must be hashable | ||
/// because a hashmap is used internally to store the frequencies. | ||
/// If you'd like us to add a B-Tree based alternative, please open an issue. | ||
pub struct EmpiricalTable<C> | ||
where | ||
C: EnumerableCategory + Hash + Eq, | ||
{ | ||
counts: HashMap<C, u64>, | ||
} | ||
|
||
impl<C> Default for EmpiricalTable<C> | ||
where | ||
C: EnumerableCategory + Hash + Eq, | ||
{ | ||
fn default() -> Self { | ||
Self::new() | ||
} | ||
} | ||
|
||
impl<C> EmpiricalTable<C> | ||
where | ||
C: EnumerableCategory + Hash + Eq, | ||
{ | ||
/// Construct a new, empty contingency table. All frequencies are | ||
/// initialized to zero. | ||
pub fn new() -> Self { | ||
let mut counts = HashMap::new(); | ||
for group in C::groups() { | ||
counts.entry(group).or_insert(0); | ||
} | ||
|
||
Self { counts } | ||
} | ||
|
||
pub fn increment(&mut self, cat: C) { | ||
self.counts.entry(cat).and_modify(|c| *c += 1); | ||
} | ||
|
||
/// Sets the expected count of the category to the value provided. | ||
pub fn set_group_count(&mut self, cat: C, count: u64) { | ||
self.counts.insert(cat, count); | ||
} | ||
|
||
/// Returns the number of observations that were classified as | ||
/// having this group/category. | ||
pub fn group_count(&self, cat: &C) -> u64 { | ||
self.counts[cat] | ||
} | ||
} | ||
|
||
impl<C> ContingencyTable<C> for EmpiricalTable<C> | ||
where | ||
C: EnumerableCategory + Hash + Eq, | ||
{ | ||
fn group_count(&self, cat: &C) -> u64 { | ||
// delegate to the method on the base class. | ||
Self::group_count(self, cat) | ||
} | ||
|
||
fn groups(&self) -> Box<dyn Iterator<Item = C>> { | ||
// Delegate to the fixed list provided by the EnumerableCategory. | ||
C::groups() | ||
} | ||
} |
Oops, something went wrong.