Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
experiment with builder pattern
Browse files Browse the repository at this point in the history
maxbachmann committed Nov 28, 2023
1 parent 2b8ec1c commit f9a824c
Showing 4 changed files with 483 additions and 212 deletions.
9 changes: 2 additions & 7 deletions rapidfuzz-benches/benches/bench_osa.rs
Original file line number Diff line number Diff line change
@@ -33,12 +33,7 @@ fn benchmark(c: &mut Criterion) {

group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
b.iter(|| {
black_box(distance::osa::distance(
val.0.chars(),
val.1.chars(),
None,
None,
));
black_box(distance::osa::distance().compare(val.0.chars(), val.1.chars()));
})
});

@@ -48,7 +43,7 @@ fn benchmark(c: &mut Criterion) {
&(&cached, &s2),
|b, val| {
b.iter(|| {
black_box(cached.distance(val.1.chars(), None, None));
black_box(cached.distance().compare(val.1.chars()));
})
},
);
112 changes: 112 additions & 0 deletions src/details/distance.rs
Original file line number Diff line number Diff line change
@@ -4,6 +4,34 @@ use crate::HashableChar;
pub trait MetricUsize {
fn maximum(&self, len1: usize, len2: usize) -> usize;

// todo rename if we convert everything to use the builder pattern
fn distance_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<usize>,
score_hint: Option<usize>,
) -> Option<usize>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._distance(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _distance<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -35,6 +63,34 @@ pub trait MetricUsize {
Some(dist)
}

// todo rename if we convert everything to use the builder pattern
fn similarity_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<usize>,
score_hint: Option<usize>,
) -> Option<usize>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._similarity(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _similarity<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -73,6 +129,34 @@ pub trait MetricUsize {
Some(sim)
}

// todo rename if we convert everything to use the builder pattern
fn normalized_distance_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<f64>,
score_hint: Option<f64>,
) -> Option<f64>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._normalized_distance(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _normalized_distance<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -121,6 +205,34 @@ pub trait MetricUsize {
Some(norm_dist)
}

// todo rename if we convert everything to use the builder pattern
fn normalized_similarity_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<f64>,
score_hint: Option<f64>,
) -> Option<f64>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._normalized_similarity(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _normalized_similarity<Iter1, Iter2>(
&self,
s1: Iter1,
2 changes: 1 addition & 1 deletion src/distance/damerau_levenshtein.rs
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@
//! use rapidfuzz::distance::osa;
//!
//! assert_eq!(Some(2), damerau_levenshtein::distance("CA".chars(), "ABC".chars(), None, None));
//! assert_eq!(Some(3), osa::distance("CA".chars(), "ABC".chars(), None, None));
//! assert_eq!(3, osa::distance().compare("CA".chars(), "ABC".chars()));
//! ```
//!
//! The handling of transpositions in the OSA distance is simpler, which makes it computationally less intensive.
Loading

0 comments on commit f9a824c

Please sign in to comment.