diff --git a/Cargo.lock b/Cargo.lock index 8ee26daa..f664af21 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1933,6 +1933,7 @@ dependencies = [ "metrics 0.21.1", "metrics-macros", "moor-values", + "num-traits", "okaywal", "rand", "serde", diff --git a/crates/db/Cargo.toml b/crates/db/Cargo.toml index 1dc848d3..efdc3166 100644 --- a/crates/db/Cargo.toml +++ b/crates/db/Cargo.toml @@ -53,6 +53,7 @@ io-uring.workspace = true hi_sparse_bitset.workspace = true tokio-eventfd.workspace = true fast-counter.workspace = true +num-traits.workspace = true # For testing & benching common bits serde_json.workspace = true diff --git a/crates/db/benches/tb_single_thread.rs b/crates/db/benches/tb_single_thread.rs index 11890475..f306707d 100644 --- a/crates/db/benches/tb_single_thread.rs +++ b/crates/db/benches/tb_single_thread.rs @@ -19,12 +19,12 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use moor_db::testing::jepsen::{History, Type, Value}; use moor_db::tuplebox::{RelationInfo, TupleBox}; use moor_values::util::slice_ref::SliceRef; -use sized_chunks::SparseChunk; use std::sync::Arc; use std::time::{Duration, Instant}; // This is a struct that tells Criterion.rs to use the "futures" crate's current-thread executor use moor_db::tuplebox::{RelationId, Transaction}; +use moor_values::util::{BitArray, Bitset64}; use tokio::runtime::Runtime; /// Build a test database with a bunch of relations @@ -66,19 +66,19 @@ fn load_history() -> Vec { async fn list_append_workload( db: Arc, events: &Vec, - processes: &mut SparseChunk, 64>, + processes: &mut BitArray, 64, Bitset64<1>>, ) { for e in events { match e.r#type { Type::invoke => { // Start a transaction. let tx = Arc::new(db.clone().start_tx()); - let existing = processes.insert(e.process as usize, tx.clone()); assert!( - existing.is_none(), - "T{} already exists uncommitted", + !processes.check(e.process as usize), + "T{} already exists committed", e.process ); + processes.set(e.process as usize, tx.clone()); // Execute the actions for ev in &e.value { match ev { @@ -106,11 +106,11 @@ async fn list_append_workload( } } Type::ok => { - let tx = processes.remove(e.process as usize).unwrap(); + let tx = processes.erase(e.process as usize).unwrap(); tx.commit().await.unwrap(); } Type::fail => { - let tx = processes.remove(e.process as usize).unwrap(); + let tx = processes.erase(e.process as usize).unwrap(); tx.rollback().await.unwrap(); } } @@ -123,7 +123,7 @@ async fn do_insert_workload(iters: u64, events: &Vec) -> Duration { let db = test_db().await; // Where to track the transactions running. - let mut processes = SparseChunk::new(); + let mut processes = BitArray::new(); let start = Instant::now(); list_append_workload(db, events, &mut processes).await; diff --git a/crates/db/src/tuplebox/coldstorage.rs b/crates/db/src/tuplebox/coldstorage.rs index e90473f1..86f3d885 100644 --- a/crates/db/src/tuplebox/coldstorage.rs +++ b/crates/db/src/tuplebox/coldstorage.rs @@ -242,14 +242,14 @@ impl ColdStorage { // The pages that are modified will be need be read-locked while they are copied. let mut dirty_pages = HashSet::new(); for r in ws.relations.iter() { - for t in r.tuples() { + for t in r.1.tuples() { match t { TxTuple::Insert(_) | TxTuple::Update(_) | TxTuple::Tombstone { .. } => { let TupleId { page: page_id, slot: _slot_id, } = t.tuple_id(); - dirty_pages.insert((page_id, r.id)); + dirty_pages.insert((page_id, r.1.id)); } TxTuple::Value(_) => { // Untouched value (view), noop, should already exist in backing store. diff --git a/crates/db/src/tuplebox/tb.rs b/crates/db/src/tuplebox/tb.rs index 7727da51..43dd4692 100644 --- a/crates/db/src/tuplebox/tb.rs +++ b/crates/db/src/tuplebox/tb.rs @@ -187,8 +187,8 @@ impl TupleBox { ) -> Result { let mut commitset = CommitSet::new(commit_ts); - for (relation_id, local_relation) in tx_working_set.relations.iter().enumerate() { - let relation_id = RelationId(relation_id); + for (_, local_relation) in tx_working_set.relations.iter() { + let relation_id = local_relation.id; // scan through the local working set, and for each tuple, check to see if it's safe to // commit. If it is, then we'll add it to the commit set. // note we're not actually committing yet, just producing a candidate commit set @@ -267,7 +267,7 @@ impl TupleBox { // We have to hold a lock during the duration of this. If we fail, we will loop back // and retry. let mut canonical = self.canonical.write().await; - for relation in commit_set.iter() { + for (_, relation) in commit_set.iter() { // Did the relation get committed to by someone else in the interim? If so, return // back to the transaction letting it know that, and it can decide if it wants to // retry. @@ -279,7 +279,7 @@ impl TupleBox { // Everything passed, so we can commit the changes by swapping in the new canonical // before releasing the lock. let commit_ts = commit_set.ts; - for relation in commit_set.into_iter() { + for (_, relation) in commit_set.into_iter() { let idx = relation.id.0; canonical[idx] = relation; // And update the timestamp on the canonical relation. diff --git a/crates/db/src/tuplebox/tuples/slotbox.rs b/crates/db/src/tuplebox/tuples/slotbox.rs index 45752ac3..945273aa 100644 --- a/crates/db/src/tuplebox/tuples/slotbox.rs +++ b/crates/db/src/tuplebox/tuples/slotbox.rs @@ -35,7 +35,7 @@ use std::pin::Pin; use std::sync::atomic::Ordering::SeqCst; use std::sync::{Arc, Mutex}; -use sized_chunks::SparseChunk; +use moor_values::util::{BitArray, Bitset64}; use thiserror::Error; use tracing::error; @@ -188,8 +188,8 @@ impl SlotBox { } pub fn num_pages(&self) -> usize { - let inner = self.inner.lock().unwrap(); - inner.available_page_space.len() + let mut inner = self.inner.lock().unwrap(); + inner.available_page_space.size() } pub fn used_pages(&self) -> Vec { @@ -197,7 +197,7 @@ impl SlotBox { allocator .available_page_space .iter() - .map(|ps| ps.pages()) + .map(|(_, ps)| ps.pages()) .flatten() .collect() } @@ -209,7 +209,7 @@ struct Inner { // so we can maybe get rid of the locks in the buffer pool... pool: BufferPool, /// The set of used pages, indexed by relation, in sorted order of the free space available in them. - available_page_space: SparseChunk, + available_page_space: BitArray>, /// The "swizzelable" references to tuples, indexed by tuple id. /// There has to be a stable-memory address for each of these, as they are referenced by /// pointers in the TupleRefs themselves. @@ -221,7 +221,7 @@ struct Inner { impl Inner { fn new(pool: BufferPool) -> Self { Self { - available_page_space: SparseChunk::new(), + available_page_space: BitArray::new(), pool, swizrefs: HashMap::new(), } @@ -297,7 +297,7 @@ impl Inner { let bid = Bid(pid as u64); let Some(available_page_space) = self.available_page_space.get_mut(relation_id.0) else { self.available_page_space - .insert(relation_id.0, PageSpace::new(free_space, bid)); + .set(relation_id.0, PageSpace::new(free_space, bid)); return; }; @@ -352,7 +352,7 @@ impl Inner { Ok((bid.0 as PageId, available_page_space.len() - 1)) } None => { - self.available_page_space.insert( + self.available_page_space.set( relation_id.0, PageSpace::new(slot_page_empty_size(actual_size), bid), ); @@ -394,14 +394,14 @@ impl Inner { offset: usize, page_remaining_bytes: usize, ) { - let available_page_space = &mut self.available_page_space[relation_id.0]; + let available_page_space = self.available_page_space.get_mut(relation_id.0).unwrap(); available_page_space.finish(offset, page_remaining_bytes); } fn report_free(&mut self, pid: PageId, new_size: usize, is_empty: bool) { // Seek the page in the available_page_space vectors, and add the bytes back to its free space. // We don't know the relation id here, so we have to linear scan all of them. - for available_page_space in self.available_page_space.iter_mut() { + for (_, available_page_space) in self.available_page_space.iter_mut() { if available_page_space.update_page(pid, new_size, is_empty) { if is_empty { self.pool diff --git a/crates/db/src/tuplebox/tuples/slotted_page.rs b/crates/db/src/tuplebox/tuples/slotted_page.rs index 90572516..db007260 100644 --- a/crates/db/src/tuplebox/tuples/slotted_page.rs +++ b/crates/db/src/tuplebox/tuples/slotted_page.rs @@ -608,6 +608,7 @@ impl<'a> PageWriteGuard<'a> { sp.get_slot_mut(slot_id) } + #[inline] pub fn allocate( &mut self, size: usize, @@ -620,6 +621,7 @@ impl<'a> PageWriteGuard<'a> { }; sp.allocate(size, initial_value) } + pub fn remove_slot(&mut self, slot_id: SlotId) -> Result<(usize, usize, bool), SlotBoxError> { let sp = SlottedPage { base_address: self.base_address, diff --git a/crates/db/src/tuplebox/tx/transaction.rs b/crates/db/src/tuplebox/tx/transaction.rs index d589c6fd..44ff5f03 100644 --- a/crates/db/src/tuplebox/tx/transaction.rs +++ b/crates/db/src/tuplebox/tx/transaction.rs @@ -15,7 +15,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Arc; -use sized_chunks::SparseChunk; +use moor_values::util::{BitArray, Bitset64}; use thiserror::Error; use tokio::sync::RwLock; @@ -307,26 +307,26 @@ impl Transaction { /// working set. pub struct CommitSet { pub(crate) ts: u64, - relations: SparseChunk, + relations: BitArray>, } impl CommitSet { pub(crate) fn new(ts: u64) -> Self { Self { ts, - relations: SparseChunk::new(), + relations: BitArray::new(), } } /// Returns an iterator over the modified relations in the commit set. - pub(crate) fn iter(&self) -> impl Iterator { + pub(crate) fn iter(&self) -> impl Iterator { return self.relations.iter(); } /// Returns an iterator over the modified relations in the commit set, moving and consuming the /// commit set in the process. - pub(crate) fn into_iter(self) -> impl IntoIterator { - self.relations.into_iter() + pub(crate) fn into_iter(self) -> impl IntoIterator { + self.relations.take_all().into_iter() } /// Fork the given base relation into the commit set, if it's not already there. @@ -337,7 +337,7 @@ impl CommitSet { ) -> &mut BaseRelation { if self.relations.get(relation_id.0).is_none() { let r = canonical.clone(); - self.relations.insert(relation_id.0, r); + self.relations.set(relation_id.0, r); } self.relations.get_mut(relation_id.0).unwrap() } diff --git a/crates/db/src/tuplebox/tx/working_set.rs b/crates/db/src/tuplebox/tx/working_set.rs index e15cf4a0..a2344ada 100644 --- a/crates/db/src/tuplebox/tx/working_set.rs +++ b/crates/db/src/tuplebox/tx/working_set.rs @@ -12,6 +12,7 @@ // this program. If not, see . // +use moor_values::util::{BitArray, Bitset64}; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -29,36 +30,51 @@ use crate::tuplebox::RelationId; // TODO: see comments on BaseRelation, changes there will reqiure changes here. pub struct WorkingSet { pub(crate) ts: u64, + pub(crate) schema: Vec, pub(crate) slotbox: Arc, - pub(crate) relations: Vec, + pub(crate) relations: BitArray>, } impl WorkingSet { pub(crate) fn new(slotbox: Arc, schema: &[RelationInfo], ts: u64) -> Self { - let mut relations = Vec::new(); - for (i, r) in schema.iter().enumerate() { - relations.push(TxBaseRelation { - id: RelationId(i), - tuples: Vec::new(), - domain_index: HashMap::new(), - codomain_index: if r.secondary_indexed { - Some(HashMap::new()) - } else { - None - }, - }); - } + let relations = BitArray::new(); Self { ts, slotbox, + schema: schema.to_vec(), relations, } } pub(crate) fn clear(&mut self) { for rel in self.relations.iter_mut() { - rel.clear(); + // let Some(rel) = rel else { continue }; + rel.1.clear(); + } + } + + fn get_relation_mut<'a>( + relation_id: RelationId, + schema: &[RelationInfo], + relations: &'a mut BitArray>, + ) -> &'a mut TxBaseRelation { + if relations.check(relation_id.0) { + return relations.get_mut(relation_id.0).unwrap(); } + let r = &schema[relation_id.0]; + let new_relation = TxBaseRelation { + id: relation_id, + tuples: Vec::new(), + domain_index: HashMap::new(), + codomain_index: if r.secondary_indexed { + Some(HashMap::new()) + } else { + None + }, + }; + + relations.set(relation_id.0, new_relation); + relations.get_mut(relation_id.0).unwrap() } pub(crate) async fn seek_by_domain( @@ -67,7 +83,7 @@ impl WorkingSet { relation_id: RelationId, domain: SliceRef, ) -> Result<(SliceRef, SliceRef), TupleError> { - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // Check local first. if let Some(tuple_idx) = relation.domain_index.get(&domain) { @@ -114,7 +130,7 @@ impl WorkingSet { // TODO: There is likely a way to optimize this so we're not doing this when not necessary. // but we'll need a round of really good coherence tests before we can do that. let tuples_for_codomain = { - let relation = &self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // If there's no secondary index, we panic. You should not have tried this. if relation.codomain_index.is_none() { @@ -132,7 +148,7 @@ impl WorkingSet { let _ = self.seek_by_domain(&db, relation_id, tuple.domain()).await; } - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); let codomain_index = relation.codomain_index.as_ref().expect("No codomain index"); let tuple_indexes = codomain_index .get(&codomain) @@ -158,7 +174,7 @@ impl WorkingSet { domain: SliceRef, codomain: SliceRef, ) -> Result<(), TupleError> { - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // If we already have a local version, that's a dupe, so return an error for that. if relation.domain_index.get(&domain).is_some() { @@ -190,7 +206,7 @@ impl WorkingSet { } pub(crate) async fn predicate_scan bool>( - &self, + &mut self, db: &Arc, relation_id: RelationId, f: F, @@ -208,8 +224,7 @@ impl WorkingSet { // Now pull in the local working set. // Apply any changes to the tuples we've already collected, and add in any inserts, and // remove any tombstones. - let relation = &self.relations[relation_id.0]; - + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); for t in &relation.tuples { if t.ts() > self.ts { continue; @@ -242,7 +257,7 @@ impl WorkingSet { domain: SliceRef, codomain: SliceRef, ) -> Result<(), TupleError> { - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // If we have an existing copy, we will update it, but keep its existing derivation // timestamp and operation type. @@ -313,7 +328,7 @@ impl WorkingSet { domain: SliceRef, codomain: SliceRef, ) -> Result<(), TupleError> { - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // If we have an existing copy, we will update it, but keep its existing derivation // timestamp. @@ -412,7 +427,7 @@ impl WorkingSet { relation_id: RelationId, domain: SliceRef, ) -> Result<(), TupleError> { - let relation = &mut self.relations[relation_id.0]; + let relation = Self::get_relation_mut(relation_id, &self.schema, &mut self.relations); // Delete is basically an update but where we stick a Tombstone. if let Some(tuple_index) = relation.domain_index.get_mut(&domain).cloned() { diff --git a/crates/values/src/util/bitarray.rs b/crates/values/src/util/bitarray.rs new file mode 100644 index 00000000..1942a5de --- /dev/null +++ b/crates/values/src/util/bitarray.rs @@ -0,0 +1,257 @@ +use crate::util::BitsetTrait; +use std::mem::MaybeUninit; +use std::ops::Index; + +// BITSET_WIDTH must be RANGE_WIDTH / 16 +// Once generic_const_exprs is stabilized, we can use that to calculate this from a RANGE_WIDTH. +// Until then, don't mess up. +pub struct BitArray +where + BitsetType: BitsetTrait + std::default::Default, +{ + pub(crate) bitset: BitsetType, + storage: Box<[MaybeUninit; RANGE_WIDTH]>, +} + +impl BitArray +where + BitsetType: BitsetTrait + std::default::Default, +{ + pub fn new() -> Self { + Self { + bitset: Default::default(), + storage: Box::new(unsafe { MaybeUninit::uninit().assume_init() }), + } + } + + pub fn push(&mut self, x: X) -> Option { + let pos = self.bitset.first_empty()?; + assert!(pos < RANGE_WIDTH); + self.bitset.set(pos); + unsafe { + self.storage[pos].as_mut_ptr().write(x); + } + Some(pos) + } + + pub fn pop(&mut self) -> Option { + let pos = self.bitset.last()?; + self.bitset.unset(pos); + let old = std::mem::replace(&mut self.storage[pos], MaybeUninit::uninit()); + Some(unsafe { old.assume_init() }) + } + + pub fn last(&self) -> Option<&X> { + self.bitset + .last() + .map(|pos| unsafe { self.storage[pos].assume_init_ref() }) + } + + #[inline] + pub fn last_used_pos(&self) -> Option { + self.bitset.last() + } + + #[inline] + pub fn first_used(&self) -> Option { + self.bitset.first_set() + } + + #[inline] + pub fn first_empty(&mut self) -> Option { + // Storage size of the bitset can be larger than the range width. + // For example: we have a RANGE_WIDTH of 48 and a bitset of 64x1 or 32x2. + // So we need to check that the first empty bit is within the range width, or people could + // get the idea they could append beyond our permitted range. + let Some(first_empty) = self.bitset.first_empty() else { + return None; + }; + if first_empty > RANGE_WIDTH { + return None; + } + Some(first_empty) + } + + #[inline] + pub fn check(&self, pos: usize) -> bool { + self.bitset.check(pos) + } + + #[inline] + pub fn get(&self, pos: usize) -> Option<&X> { + assert!(pos < RANGE_WIDTH); + if self.bitset.check(pos) { + Some(unsafe { self.storage[pos].assume_init_ref() }) + } else { + None + } + } + + #[inline] + pub fn get_mut(&mut self, pos: usize) -> Option<&mut X> { + assert!(pos < RANGE_WIDTH); + if self.bitset.check(pos) { + Some(unsafe { self.storage[pos].assume_init_mut() }) + } else { + None + } + } + + #[inline] + pub fn set(&mut self, pos: usize, x: X) { + assert!(pos < RANGE_WIDTH); + unsafe { + self.storage[pos].as_mut_ptr().write(x); + }; + self.bitset.set(pos); + } + + #[inline] + pub fn update(&mut self, pos: usize, x: X) -> Option { + let old = self.take_internal(pos); + unsafe { + self.storage[pos].as_mut_ptr().write(x); + }; + self.bitset.set(pos); + old + } + + #[inline] + pub fn erase(&mut self, pos: usize) -> Option { + let old = self.take_internal(pos)?; + self.bitset.unset(pos); + Some(old) + } + + // Erase without updating index, used by update and erase + #[inline] + fn take_internal(&mut self, pos: usize) -> Option { + assert!(pos < RANGE_WIDTH); + if self.bitset.check(pos) { + let old = std::mem::replace(&mut self.storage[pos], MaybeUninit::uninit()); + Some(unsafe { old.assume_init() }) + } else { + None + } + } + + pub fn clear(&mut self) { + for i in 0..RANGE_WIDTH { + if self.bitset.check(i) { + unsafe { self.storage[i].assume_init_drop() } + } + } + self.bitset.clear(); + } + + pub fn is_empty(&self) -> bool { + self.bitset.is_empty() + } + + pub fn size(&mut self) -> usize { + self.bitset.size() + } + + pub fn iter_keys(&self) -> impl DoubleEndedIterator + '_ { + self.storage.iter().enumerate().filter_map(|x| { + if !self.bitset.check(x.0) { + None + } else { + Some(x.0) + } + }) + } + + pub fn take_all(mut self) -> Vec<(usize, X)> { + let mut vec = Vec::new(); + for i in 0..RANGE_WIDTH { + if self.bitset.check(i) { + let old = std::mem::replace(&mut self.storage[i], MaybeUninit::uninit()); + vec.push((i, unsafe { old.assume_init() })); + } + } + self.bitset.clear(); + vec + } + pub fn iter(&self) -> impl DoubleEndedIterator { + self.storage.iter().enumerate().filter_map(|x| { + if !self.bitset.check(x.0) { + None + } else { + Some((x.0, unsafe { x.1.assume_init_ref() })) + } + }) + } + + pub fn iter_mut(&mut self) -> impl DoubleEndedIterator { + self.storage.iter_mut().enumerate().filter_map(|x| { + if !self.bitset.check(x.0) { + None + } else { + Some((x.0, unsafe { x.1.assume_init_mut() })) + } + }) + } +} + +impl Default for BitArray +where + BitsetType: BitsetTrait + std::default::Default, +{ + fn default() -> Self { + Self::new() + } +} + +impl Index for BitArray +where + BitsetType: BitsetTrait + std::default::Default, +{ + type Output = X; + + fn index(&self, index: usize) -> &Self::Output { + self.get(index).unwrap() + } +} + +impl Drop for BitArray +where + BitsetType: BitsetTrait + std::default::Default, +{ + fn drop(&mut self) { + for i in 0..RANGE_WIDTH { + if self.bitset.check(i) { + unsafe { self.storage[i].assume_init_drop() } + } + } + self.bitset.clear(); + } +} + +#[cfg(test)] +mod test { + use crate::util::{BitArray, Bitset16}; + + #[test] + fn u8_vector() { + let mut vec: BitArray> = BitArray::new(); + assert_eq!(vec.first_empty(), Some(0)); + assert_eq!(vec.last_used_pos(), None); + assert_eq!(vec.push(123).unwrap(), 0); + assert_eq!(vec.first_empty(), Some(1)); + assert_eq!(vec.last_used_pos(), Some(0)); + assert_eq!(vec.get(0), Some(&123)); + assert_eq!(vec.push(124).unwrap(), 1); + assert_eq!(vec.push(55).unwrap(), 2); + assert_eq!(vec.push(126).unwrap(), 3); + assert_eq!(vec.pop(), Some(126)); + assert_eq!(vec.first_empty(), Some(3)); + vec.erase(0); + assert_eq!(vec.first_empty(), Some(0)); + assert_eq!(vec.last_used_pos(), Some(2)); + assert_eq!(vec.size(), 2); + vec.set(0, 126); + assert_eq!(vec.get(0), Some(&126)); + assert_eq!(vec.update(0, 123), Some(126)); + } +} diff --git a/crates/values/src/util/bitset.rs b/crates/values/src/util/bitset.rs new file mode 100644 index 00000000..e7d9b732 --- /dev/null +++ b/crates/values/src/util/bitset.rs @@ -0,0 +1,293 @@ +use std::cmp::min; +use std::ops::Index; + +use num_traits::PrimInt; + +pub trait BitsetTrait: Default { + // Total size of the bitset in bits. + const BITSET_WIDTH: usize; + // Total size of the bitset in bytes. + const STORAGE_WIDTH_BYTES: usize; + // Bit shift factor -- e.g. 3 for 8, 4 for 16, etc. + const BIT_SHIFT: usize; + // Bit width of each storage unit. + const STORAGE_BIT_WIDTH: usize; + // Total size of storage in its internal storage width (e.g. u16, u32, etc.) + const STORAGE_WIDTH: usize; + + fn first_empty(&self) -> Option; + fn first_set(&self) -> Option; + fn set(&mut self, pos: usize); + fn unset(&mut self, pos: usize); + fn check(&self, pos: usize) -> bool; + fn clear(&mut self); + fn last(&self) -> Option; + fn is_empty(&self) -> bool; + fn size(&self) -> usize; + fn bit_width(&self) -> usize; + fn capacity(&self) -> usize; + fn storage_width(&self) -> usize; + fn as_bitmask(&self) -> u128; +} + +pub struct Bitset +where + StorageType: PrimInt, +{ + bitset: [StorageType; STORAGE_WIDTH], +} + +impl Bitset +where + StorageType: PrimInt, +{ + pub fn new() -> Self { + Self { + bitset: [StorageType::min_value(); STORAGE_WIDTH], + } + } + + pub fn iter(&self) -> impl Iterator + '_ { + self.bitset.iter().enumerate().flat_map(|(i, b)| { + (0..Self::STORAGE_BIT_WIDTH).filter_map(move |j| { + let b: u64 = b.to_u64().unwrap(); + if (b) & (1 << j) != 0 { + Some((i << Self::BIT_SHIFT) + j) + } else { + None + } + }) + }) + } +} + +impl BitsetTrait for Bitset +where + StorageType: PrimInt, +{ + const BITSET_WIDTH: usize = Self::STORAGE_BIT_WIDTH * STORAGE_WIDTH; + const STORAGE_WIDTH_BYTES: usize = Self::BITSET_WIDTH / 8; + const BIT_SHIFT: usize = Self::STORAGE_BIT_WIDTH.trailing_zeros() as usize; + const STORAGE_BIT_WIDTH: usize = std::mem::size_of::() * 8; + const STORAGE_WIDTH: usize = STORAGE_WIDTH; + + fn first_empty(&self) -> Option { + for (i, b) in self.bitset.iter().enumerate() { + if b.is_zero() { + return Some(i << Self::BIT_SHIFT); + } + if *b != StorageType::max_value() { + return Some((i << Self::BIT_SHIFT) + b.trailing_ones() as usize); + } + } + None + } + + fn first_set(&self) -> Option { + for (i, b) in self.bitset.iter().enumerate() { + if !b.is_zero() { + return Some((i << Self::BIT_SHIFT) + b.trailing_zeros() as usize); + } + } + None + } + + #[inline] + fn set(&mut self, pos: usize) { + assert!(pos < Self::BITSET_WIDTH); + let v = self.bitset[pos >> Self::BIT_SHIFT]; + let shift: StorageType = StorageType::one() << (pos % Self::STORAGE_BIT_WIDTH); + let v = v.bitor(shift); + self.bitset[pos >> Self::BIT_SHIFT] = v; + } + + #[inline] + fn unset(&mut self, pos: usize) { + assert!(pos < Self::BITSET_WIDTH); + let v = self.bitset[pos >> Self::BIT_SHIFT]; + let shift = StorageType::one() << (pos % Self::STORAGE_BIT_WIDTH); + let v = v & shift.not(); + self.bitset[pos >> Self::BIT_SHIFT] = v; + } + + #[inline] + fn check(&self, pos: usize) -> bool { + assert!(pos < Self::BITSET_WIDTH); + let shift: StorageType = StorageType::one() << (pos % Self::STORAGE_BIT_WIDTH); + !(self.bitset[pos >> Self::BIT_SHIFT] & shift).is_zero() + } + + #[inline] + fn clear(&mut self) { + self.bitset.fill(StorageType::zero()); + } + + fn last(&self) -> Option { + for (i, b) in self.bitset.iter().enumerate() { + if !b.is_zero() { + return Some( + (i << Self::BIT_SHIFT) + (Self::STORAGE_BIT_WIDTH - 1) + - b.leading_zeros() as usize, + ); + } + } + None + } + + fn is_empty(&self) -> bool { + self.bitset.iter().all(|x| x.is_zero()) + } + + fn size(&self) -> usize { + self.bitset.iter().map(|x| x.count_ones() as usize).sum() + } + + fn bit_width(&self) -> usize { + Self::STORAGE_BIT_WIDTH + } + + fn capacity(&self) -> usize { + Self::BITSET_WIDTH + } + + fn storage_width(&self) -> usize { + Self::STORAGE_WIDTH + } + + fn as_bitmask(&self) -> u128 { + assert!(Self::STORAGE_BIT_WIDTH <= 128); + let mut mask = 0u128; + // copy bit-level representation, unsafe ptr copy + unsafe { + std::ptr::copy_nonoverlapping( + self.bitset.as_ptr() as *const u8, + &mut mask as *mut u128 as *mut u8, + min(16, Self::STORAGE_WIDTH_BYTES), + ); + } + mask + } +} + +impl Default for Bitset +where + StorageType: PrimInt, +{ + fn default() -> Self { + Self::new() + } +} + +impl Index for Bitset +where + StorageType: PrimInt, +{ + type Output = bool; + + #[inline] + fn index(&self, pos: usize) -> &Self::Output { + if self.check(pos) { + &true + } else { + &false + } + } +} + +pub type Bitset64 = Bitset; +pub type Bitset32 = Bitset; +pub type Bitset16 = Bitset; +pub type Bitset8 = Bitset; + +#[cfg(test)] +mod tests { + use crate::util::BitsetTrait; + + #[test] + fn test_first_free_8s() { + let mut bs = super::Bitset8::<4>::new(); + bs.set(1); + bs.set(3); + assert_eq!(bs.first_empty(), Some(0)); + bs.set(0); + assert_eq!(bs.first_empty(), Some(2)); + + // Now fill it up and verify none. + for i in 0..bs.capacity() { + bs.set(i); + } + assert_eq!(bs.first_empty(), None); + } + + #[test] + fn test_first_free_8_2() { + let mut bs = super::Bitset8::<2>::new(); + bs.set(1); + bs.set(3); + assert_eq!(bs.first_empty(), Some(0)); + bs.set(0); + assert_eq!(bs.first_empty(), Some(2)); + + // Now fill it up and verify none. + for i in 0..bs.capacity() { + bs.set(i); + } + assert_eq!(bs.first_empty(), None); + } + + #[test] + fn test_first_free_32s() { + let mut bs = super::Bitset32::<1>::new(); + bs.set(1); + bs.set(3); + assert_eq!(bs.first_empty(), Some(0)); + bs.set(0); + assert_eq!(bs.first_empty(), Some(2)); + + for i in 0..bs.capacity() { + bs.set(i); + } + assert_eq!(bs.first_empty(), None); + } + + #[test] + fn test_iter_16s() { + let mut bs = super::Bitset16::<4>::new(); + bs.set(0); + bs.set(1); + bs.set(2); + bs.set(4); + bs.set(8); + bs.set(16); + let v: Vec = bs.iter().collect(); + assert_eq!(v, vec![0, 1, 2, 4, 8, 16]); + } + + #[test] + fn test_first_free_64s() { + let mut bs = super::Bitset64::<4>::new(); + bs.set(1); + bs.set(3); + assert_eq!(bs.first_empty(), Some(0)); + bs.set(0); + assert_eq!(bs.first_empty(), Some(2)); + } + + #[test] + fn test_iter_64s() { + let mut bs = super::Bitset64::<4>::new(); + bs.set(0); + bs.set(1); + bs.set(2); + bs.set(4); + bs.set(8); + bs.set(16); + bs.set(32); + bs.set(47); + bs.set(48); + bs.set(49); + bs.set(127); + let v: Vec = bs.iter().collect(); + assert_eq!(v, vec![0, 1, 2, 4, 8, 16, 32, 47, 48, 49, 127]); + } +} diff --git a/crates/values/src/util/mod.rs b/crates/values/src/util/mod.rs index 116995cd..0b288c60 100644 --- a/crates/values/src/util/mod.rs +++ b/crates/values/src/util/mod.rs @@ -12,9 +12,14 @@ // this program. If not, see . // +mod bitarray; pub mod bitenum; +mod bitset; pub mod slice_ref; +pub use bitarray::*; +pub use bitset::*; + /// Check `names` for matches with wildcard prefixes. /// e.g. "dname*c" will match for any of 'dname', 'dnamec' #[must_use] diff --git a/crates/values/src/util/slice_ref.rs b/crates/values/src/util/slice_ref.rs index 81a3269f..6257633f 100644 --- a/crates/values/src/util/slice_ref.rs +++ b/crates/values/src/util/slice_ref.rs @@ -22,9 +22,7 @@ use yoke::Yoke; /// from, and a range within that storage. /// In this way it's possible to safely and conveniently pass around the 'slices' of things without /// worrying about lifetimes and borrowing. -/// This is used here for the pieces of the rope, which can all be slices out of common buffer -/// storage, and we can avoid making copies of the data when doing things like splitting nodes -/// or appending to the rope etc. +/// Used generally for passing around zero copy tuples, pieces of tuples, values, strings, etc. #[derive(Clone)] pub struct SliceRef(Yoke<&'static [u8], Arc>);