Skip to content

Commit

Permalink
update prefix crate
Browse files Browse the repository at this point in the history
  • Loading branch information
bastiscode committed Feb 7, 2024
1 parent c0a81d7 commit 5c26ef6
Show file tree
Hide file tree
Showing 6 changed files with 397 additions and 442 deletions.
127 changes: 26 additions & 101 deletions text-utils-prefix/benches/benchmark.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ use criterion::{criterion_group, criterion_main, Criterion};
use rand::seq::SliceRandom;
use rand::SeedableRng;
use rand_chacha::ChaCha8Rng;
use text_utils_prefix::vec::PrefixVecContinuations;
use text_utils_prefix::{optimized_prefix_order, ContinuationSearch, PrefixSearch};
use text_utils_prefix::{AdaptiveRadixTrie, AdaptiveRadixTrie};
use text_utils_prefix::vec::ContinuationsVec;
use text_utils_prefix::{
utils::optimized_prefix_order, ContinuationSearch, ContinuationTrie, PrefixSearch,
};
use text_utils_prefix::{AdaptiveRadixTrie, PatriciaTrie};

use art_tree::{Art, ByteString};
use patricia_tree::PatriciaMap;
Expand All @@ -32,7 +34,6 @@ fn bench_prefix(c: &mut Criterion) {
.collect();
let prefix = "Albert".as_bytes();
let prefixes: Vec<_> = (0..64).map(|_| prefix.to_vec()).collect();
let (permutation, skips) = optimized_prefix_order(&continuations);

group.bench_with_input("optimized_prefix_order", &continuations, |b, input| {
b.iter(|| optimized_prefix_order(input));
Expand Down Expand Up @@ -63,7 +64,7 @@ fn bench_prefix(c: &mut Criterion) {
});

// benchmark patricia trie
let mut trie: AdaptiveRadixTrie<_> = words.iter().zip(0..words.len()).collect();
let mut trie: PatriciaTrie<_> = words.iter().zip(0..words.len()).collect();
group.bench_with_input("patricia_trie_insert", word, |b, input| {
b.iter(|| trie.insert(input, 1));
});
Expand All @@ -73,60 +74,22 @@ fn bench_prefix(c: &mut Criterion) {
group.bench_with_input("patricia_trie_contains", word, |b, input| {
b.iter(|| trie.contains_prefix(&input[..input.len().saturating_sub(3)]));
});
group.bench_with_input(
"patricia_trie_continuations",
&(prefix, &continuations),
|b, input| {
let (word, continuations) = input;
b.iter(|| trie.contains_continuations(word, continuations));
},
);
group.bench_with_input(
"patricia_trie_continuations_optimized",
&(prefix, &continuations),
|b, input| {
let (word, continuations) = input;
b.iter(|| {
trie.contains_continuations_optimized(word, continuations, &permutation, &skips)
});
},
);
let trie: ContinuationTrie<_> = ContinuationTrie::new(trie, &continuations);
group.bench_with_input("patricia_trie_continuations", prefix, |b, input| {
b.iter(|| trie.contains_continuations(input));
});
group.bench_with_input(
"patricia_trie_continuations_batch",
&(&prefixes, &continuations),
|b, input| {
let (words, continuations) = input;
b.iter(|| trie.batch_contains_continuations(words, &continuations));
},
);
group.bench_with_input(
"patricia_trie_continuations_batch_optimized",
&(&prefixes, &continuations),
&prefixes,
|b, input| {
let (words, continuations) = input;
b.iter(|| {
trie.batch_contains_continuations_optimized(
words,
continuations,
&permutation,
&skips,
)
});
b.iter(|| trie.batch_contains_continuations(input));
},
);
group.bench_with_input(
"patricia_trie_continuations_batch_optimized_parallel",
&(&prefixes, &continuations),
"patricia_trie_continuations_batch_parallel",
&prefixes,
|b, input| {
let (words, continuations) = input;
b.iter(|| {
trie.batch_contains_continuations_optimized_parallel(
words,
continuations,
&permutation,
&skips,
)
});
b.iter(|| trie.batch_contains_continuations_parallel(input));
},
);

Expand All @@ -141,66 +104,28 @@ fn bench_prefix(c: &mut Criterion) {
group.bench_with_input("adaptive_radix_trie_contains", word, |b, input| {
b.iter(|| trie.contains_prefix(&input[..input.len().saturating_sub(3)]));
});
group.bench_with_input(
"adaptive_radix_trie_continuations",
&(prefix, &continuations),
|b, input| {
let (word, continuations) = input;
b.iter(|| trie.contains_continuations(&word, &continuations));
},
);
group.bench_with_input(
"adaptive_radix_trie_continuations_optimized",
&(prefix, &continuations),
|b, input| {
let (word, continuations) = input;
b.iter(|| {
trie.contains_continuations_optimized(&word, &continuations, &permutation, &skips)
});
},
);

let trie: ContinuationTrie<_> = ContinuationTrie::new(trie, &continuations);
group.bench_with_input("adaptive_radix_trie_continuations", prefix, |b, input| {
b.iter(|| trie.contains_continuations(input));
});
group.bench_with_input(
"adaptive_radix_trie_continuations_batch",
&(&prefixes, &continuations),
|b, input| {
let (words, continuations) = input;
b.iter(|| trie.batch_contains_continuations(words, continuations));
},
);
group.bench_with_input(
"adaptive_radix_trie_continuations_batch_optimized",
&(&prefixes, &continuations),
&prefixes,
|b, input| {
let (words, continuations) = input;
b.iter(|| {
trie.batch_contains_continuations_optimized(
words,
continuations,
&permutation,
&skips,
)
});
b.iter(|| trie.batch_contains_continuations(input));
},
);
group.bench_with_input(
"adaptive_radix_trie_continuations_batch_optimized_parallel",
&(&prefixes, &continuations),
"adaptive_radix_trie_continuations_batch_parallel",
&prefixes,
|b, input| {
let (words, continuations) = input;
b.iter(|| {
trie.batch_contains_continuations_optimized_parallel(
words,
continuations,
&permutation,
&skips,
)
});
b.iter(|| trie.batch_contains_continuations_parallel(input));
},
);

// benchmark prefix vec continuations
let vec =
PrefixVecContinuations::new(words.iter().zip(0..words.len()).collect(), &continuations);
let vec = ContinuationsVec::new(words.iter().zip(0..words.len()).collect(), &continuations);
group.bench_with_input("prefix_vec_continuations", word, |b, input| {
b.iter(|| vec.contains_continuations(input));
});
Expand Down
53 changes: 10 additions & 43 deletions text-utils-prefix/src/art.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::{
iter::{empty, once},
};

use crate::{ContinuationSearch, PrefixSearch};
use crate::{ContinuationsTrie, PrefixSearch};

type Index<const N: usize> = Box<[u8; N]>;
type Children<V, const N: usize> = Box<[Option<Box<Node<V>>>; N]>;
Expand Down Expand Up @@ -316,7 +316,7 @@ impl<V> Node<V> {
fn contains_prefix_iter(
&self,
mut key: impl Iterator<Item = u8>,
offset: usize,
mut offset: usize,
) -> Option<(&Self, usize)> {
let mut node = self;
loop {
Expand All @@ -326,6 +326,8 @@ impl<V> Node<V> {
Matching::FullPrefix(k) => k,
Matching::Partial(..) => break,
};
// reset offset after first node
offset = 0;

let Some(child) = node.find_child(k) else {
break;
Expand Down Expand Up @@ -609,7 +611,8 @@ impl<V> PrefixSearch for AdaptiveRadixTrie<V> {
let Node {
inner: NodeType::Leaf(value),
..
} = node.remove_child(k) else {
} = node.remove_child(k)
else {
unreachable!("should not happen");
};
return Some(value);
Expand Down Expand Up @@ -638,10 +641,7 @@ impl<V> PrefixSearch for AdaptiveRadixTrie<V> {
root.contains_prefix_iter(key, 0).is_some()
}

fn path<'a>(&'a self, prefix: &[u8]) -> Vec<(usize, &'a Self::Value)>
where
Self::Value: 'a,
{
fn path(&self, prefix: &[u8]) -> Vec<(usize, &Self::Value)> {
let Some(root) = &self.root else {
return vec![];
};
Expand Down Expand Up @@ -682,9 +682,7 @@ impl<V> PrefixSearch for AdaptiveRadixTrie<V> {
}
path
}
}

impl<V> ContinuationSearch for AdaptiveRadixTrie<V> {
fn continuations(&self, prefix: &[u8]) -> Box<dyn Iterator<Item = (Vec<u8>, &V)> + '_> {
let Some(root) = &self.root else {
return Box::new(empty());
Expand Down Expand Up @@ -713,41 +711,10 @@ impl<V> ContinuationSearch for AdaptiveRadixTrie<V> {

node.leaves_recursive(prefix)
}
}

fn contains_continuation(&self, prefix: &[u8], continuation: &[u8]) -> bool {
let Some(root) = &self.root else {
return false;
};

let key = prefix.iter().chain(continuation.iter()).copied();
root.contains_prefix_iter(key, 0).is_some()
}

fn contains_continuations(&self, prefix: &[u8], continuations: &[Vec<u8>]) -> Vec<usize> {
let Some(root) = &self.root else {
return vec![];
};

let key = prefix.iter().copied();
let Some((node, n)) = root.contains_prefix_iter(key, 0) else {
return vec![];
};

continuations
.iter()
.enumerate()
.filter_map(|(i, c)| {
let key = c.iter().copied();
if node.contains_prefix_iter(key, n).is_some() {
Some(i)
} else {
None
}
})
.collect()
}

fn contains_continuations_optimized(
impl<V> ContinuationsTrie for AdaptiveRadixTrie<V> {
fn contains_continuations(
&self,
prefix: &[u8],
continuations: &[Vec<u8>],
Expand Down
Loading

0 comments on commit 5c26ef6

Please sign in to comment.