Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
experiment with builder pattern
Browse files Browse the repository at this point in the history
maxbachmann committed Nov 28, 2023

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 2b8ec1c commit f9a824c
Showing 4 changed files with 483 additions and 212 deletions.
9 changes: 2 additions & 7 deletions rapidfuzz-benches/benches/bench_osa.rs
Original file line number Diff line number Diff line change
@@ -33,12 +33,7 @@ fn benchmark(c: &mut Criterion) {

group.bench_with_input(BenchmarkId::new("rapidfuzz", i), &(&s1, &s2), |b, val| {
b.iter(|| {
black_box(distance::osa::distance(
val.0.chars(),
val.1.chars(),
None,
None,
));
black_box(distance::osa::distance().compare(val.0.chars(), val.1.chars()));
})
});

@@ -48,7 +43,7 @@ fn benchmark(c: &mut Criterion) {
&(&cached, &s2),
|b, val| {
b.iter(|| {
black_box(cached.distance(val.1.chars(), None, None));
black_box(cached.distance().compare(val.1.chars()));
})
},
);
112 changes: 112 additions & 0 deletions src/details/distance.rs
Original file line number Diff line number Diff line change
@@ -4,6 +4,34 @@ use crate::HashableChar;
pub trait MetricUsize {
fn maximum(&self, len1: usize, len2: usize) -> usize;

// todo rename if we convert everything to use the builder pattern
fn distance_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<usize>,
score_hint: Option<usize>,
) -> Option<usize>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._distance(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _distance<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -35,6 +63,34 @@ pub trait MetricUsize {
Some(dist)
}

// todo rename if we convert everything to use the builder pattern
fn similarity_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<usize>,
score_hint: Option<usize>,
) -> Option<usize>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._similarity(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _similarity<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -73,6 +129,34 @@ pub trait MetricUsize {
Some(sim)
}

// todo rename if we convert everything to use the builder pattern
fn normalized_distance_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<f64>,
score_hint: Option<f64>,
) -> Option<f64>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._normalized_distance(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _normalized_distance<Iter1, Iter2>(
&self,
s1: Iter1,
@@ -121,6 +205,34 @@ pub trait MetricUsize {
Some(norm_dist)
}

// todo rename if we convert everything to use the builder pattern
fn normalized_similarity_<Iter1, Iter2>(
&self,
s1: Iter1,
s2: Iter2,
score_cutoff: Option<f64>,
score_hint: Option<f64>,
) -> Option<f64>
where
Iter1: IntoIterator,
Iter1::IntoIter: DoubleEndedIterator + Clone,
Iter2: IntoIterator,
Iter2::IntoIter: DoubleEndedIterator + Clone,
Iter1::Item: PartialEq<Iter2::Item> + HashableChar + Copy,
Iter2::Item: PartialEq<Iter1::Item> + HashableChar + Copy,
{
let s1_iter = s1.into_iter();
let s2_iter = s2.into_iter();
self._normalized_similarity(
s1_iter.clone(),
s1_iter.count(),
s2_iter.clone(),
s2_iter.count(),
score_cutoff,
score_hint,
)
}

fn _normalized_similarity<Iter1, Iter2>(
&self,
s1: Iter1,
2 changes: 1 addition & 1 deletion src/distance/damerau_levenshtein.rs
Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@
//! use rapidfuzz::distance::osa;
//!
//! assert_eq!(Some(2), damerau_levenshtein::distance("CA".chars(), "ABC".chars(), None, None));
//! assert_eq!(Some(3), osa::distance("CA".chars(), "ABC".chars(), None, None));
//! assert_eq!(3, osa::distance().compare("CA".chars(), "ABC".chars()));
//! ```
//!
//! The handling of transpositions in the OSA distance is simpler, which makes it computationally less intensive.
Loading

0 comments on commit f9a824c

Please sign in to comment.