-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 8de9b47
Showing
11 changed files
with
431 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
/target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[package] | ||
name = "granges2" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | ||
|
||
[dependencies] | ||
coitrees = { version = "0.4.0", features = ["nosimd"] } | ||
genomap = "0.1.5" | ||
indexmap = "2.2.3" | ||
rand = "0.8.5" | ||
thiserror = "1.0.57" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
use thiserror::Error; | ||
|
||
use crate::Position; | ||
|
||
#[derive(Debug, Error)] | ||
pub enum GRangesError { | ||
#[error("Range invalid: start ({0}) must be greater than end ({1})")] | ||
InvalidGenomicRange(Position, Position), | ||
|
||
#[error("Range [{0}, {1}] is invalid for sequence of length {2}")] | ||
InvalidGenomicRangeForSequence(Position, Position, Position), | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
use genomap::GenomeMap; | ||
|
||
use crate::{traits::RangeContainer, ranges::{vec::{VecRanges, VecRangesIndexed, VecRangesEmpty}, RangeIndexed, RangeEmpty}, Position}; | ||
|
||
|
||
pub struct GRanges<C, T> { | ||
ranges: GenomeMap<C>, | ||
data: Option<T>, | ||
} | ||
|
||
|
||
impl<C, T> GRanges<C, T> | ||
where C: RangeContainer { | ||
|
||
/// Get the total number of ranges. | ||
pub fn len(&self) -> usize { | ||
self.ranges.values().map(|ranges| ranges.len()).sum() | ||
} | ||
|
||
/// Return whether the [`GRanges`] object is empty (contains no ranges). | ||
pub fn is_empty(&self) -> bool { | ||
self.len() == 0 | ||
} | ||
} | ||
|
||
impl<U> GRanges<VecRangesIndexed, Vec<U>> { | ||
|
||
/// Create a new [`GRanges`] object, with vector storage for ranges and data. | ||
/// | ||
/// This combination of range and data containers is used when loading data into | ||
/// a new [`GRanges`] object, and the size cannot be known beforehand. Rust's | ||
/// [`Vec`] will dynamically grow to accommodate new ranges; use [`GRanges.shrink()`] | ||
/// call the [`Vec`]'s shrink to size methods on the range and data containers | ||
/// after data loading to shrink to the minimal necessary size (this can reduce | ||
/// memory usage). | ||
pub fn new_vec() -> Self { | ||
let ranges = GenomeMap::new(); | ||
Self { | ||
ranges, | ||
data: None, | ||
} | ||
} | ||
|
||
|
||
pub fn push_range_with_data(&mut self, seqname: &str, start: Position, end: Position, data: U) { | ||
// push data to the vec data container, getting the index | ||
let index: usize = { | ||
let data_container = self.data.get_or_insert_with(Vec::new); | ||
data_container.push(data); | ||
data_container.len() - 1 // new data index | ||
}; | ||
// push an indexed range | ||
let range = RangeIndexed::new(start, end, index); | ||
self.ranges.entry_or_default(seqname).ranges.push(range); | ||
} | ||
} | ||
|
||
impl GRanges<VecRangesEmpty, ()> { | ||
|
||
/// Create a new [`GRanges`] object, with vector storage for ranges and no data container. | ||
pub fn new_vec_empty() -> Self { | ||
let ranges = GenomeMap::new(); | ||
Self { | ||
ranges, | ||
data: None, | ||
} | ||
} | ||
|
||
/// Push an empty range (no data) to the [`VecRangesEmpty`] range container. | ||
pub fn push_range_only(&mut self, seqname: &str, start: Position, end: Position) { | ||
// push an unindexed (empty) range | ||
let range = RangeEmpty::new(start, end); | ||
self.ranges.entry_or_default(seqname).ranges.push(range); | ||
} | ||
} | ||
|
||
|
||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::{prelude::*, test_utilities::random_vecranges}; | ||
|
||
#[test] | ||
fn test_new_vec() { | ||
let mut gr = GRanges::new_vec(); | ||
gr.push_range_with_data("chr1", 0, 10, 1.1); | ||
assert_eq!(gr.len(), 1); | ||
} | ||
|
||
#[test] | ||
fn test_random_vecranges() { | ||
let vr = random_vecranges(100); | ||
assert_eq!(vr.len(), 100) | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
|
||
pub mod traits; | ||
pub mod ranges; | ||
pub mod granges; | ||
pub mod error; | ||
pub mod test_utilities; | ||
|
||
pub type Position = u32; | ||
|
||
pub mod prelude { | ||
pub use crate::granges::GRanges; | ||
pub use crate::error::GRangesError; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
use coitrees::{Interval, BasicCOITree, IntervalTree, IntervalNode, GenericInterval}; | ||
|
||
use crate::{Position, traits::RangeContainer, error::GRangesError}; | ||
|
||
use super::{vec::VecRanges, RangeIndexed, validate_range}; | ||
|
||
type COITreeIntervalIndexed = Interval<usize>; | ||
|
||
impl GenericInterval<usize> for RangeIndexed { | ||
fn first(&self) -> i32 { | ||
self.start().try_into().unwrap() | ||
} | ||
fn last(&self) -> i32 { | ||
self.end().try_into().unwrap() | ||
} | ||
fn metadata(&self) -> &usize { | ||
self.index() | ||
} | ||
} | ||
|
||
/// A [`coitrees::BasicCOITree`] interval tree for a single sequence's ranges. | ||
/// | ||
/// This is generic over the interval type, to handle the case where one | ||
/// may want to do overlap operations on ranges without associated data in | ||
/// a data container (e.g. ranges that just define megabase windwows). | ||
pub struct COITreeRangeContainer<R: Clone> { | ||
ranges: BasicCOITree<R, usize>, | ||
/// The sequence length, used to validate new ranges. | ||
length: Position, | ||
} | ||
|
||
impl<R: Clone> COITreeRangeContainer<R> { | ||
pub fn validate_range(&self, start: Position, end: Position) -> Result<(), GRangesError> { | ||
let range = start..end; | ||
validate_range(&range, self.length) | ||
} | ||
|
||
pub fn query<F>(&self, start: Position, end: Position, visit: F) | ||
where F: FnMut(&IntervalNode<R, usize>) { | ||
// Note the terminology change to match coitrees (and uses i32s) | ||
let first = start.try_into().expect("could not covert"); | ||
let end: i32 = end.try_into().expect("could not covert"); | ||
// internally coitrees uses 0-indexed, right-inclusive "last" | ||
self.ranges.query(first, end - 1, visit) | ||
} | ||
|
||
/// Return the number of ranges in this [`COITreeRangeContainer`] container. | ||
pub fn len(&self) -> usize { | ||
self.ranges.len() | ||
} | ||
|
||
/// Return whether the [`COITreeRangeContainer`] object is empty (contains no ranges). | ||
pub fn is_empty(&self) -> bool { | ||
self.len() == 0 | ||
} | ||
|
||
} | ||
|
||
impl<R: Clone + GenericInterval<R>> From<VecRanges<R>> for COITreeRangeContainer<R> { | ||
fn from(value: VecRanges<R>) -> Self { | ||
let ranges = BasicCOITree::new(&value.ranges); | ||
let length = value.length; | ||
Self { | ||
ranges, | ||
length | ||
} | ||
} | ||
} | ||
|
||
impl<R: Clone> RangeContainer for COITreeRangeContainer<R> { | ||
fn len(&self) -> usize { | ||
self.ranges.len() | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
use std::ops::Range; | ||
|
||
use crate::{Position, error::GRangesError}; | ||
|
||
pub mod coitrees; | ||
pub mod vec; | ||
|
||
|
||
#[derive(Clone, Default)] | ||
pub struct RangeEmpty { | ||
range: Range<Position>, | ||
} | ||
|
||
impl RangeEmpty { | ||
/// Create a new 0-indexed right-exclusive range. | ||
pub fn new(start: Position, end: Position) -> Self { | ||
let start = start.try_into().unwrap(); | ||
let end = end.try_into().unwrap(); | ||
Self { | ||
range: start..end, | ||
} | ||
} | ||
|
||
pub fn start(&self) -> Position { | ||
self.range.start | ||
} | ||
|
||
pub fn end(&self) -> Position { | ||
self.range.end | ||
} | ||
} | ||
|
||
#[derive(Clone, Debug, Default)] | ||
pub struct RangeIndexed { | ||
range: Range<Position>, | ||
index: usize, | ||
} | ||
|
||
impl RangeIndexed { | ||
/// Create a new 0-indexed right-exclusive range. | ||
pub fn new(start: Position, end: Position, index: usize) -> Self { | ||
let start = start.try_into().unwrap(); | ||
let end = end.try_into().unwrap(); | ||
Self { | ||
range: start..end, | ||
index | ||
} | ||
} | ||
|
||
pub fn start(&self) -> Position { | ||
self.range.start | ||
} | ||
|
||
pub fn end(&self) -> Position { | ||
self.range.end | ||
} | ||
|
||
// Note: this returning a reference is required to | ||
// implement coitrees's GenericInterval trait. | ||
pub fn index(&self) -> &usize { | ||
&self.index | ||
} | ||
} | ||
|
||
/// Validates whether a given range is valid for accessing a sequence of a given `length`. | ||
/// | ||
/// # Arguments | ||
/// | ||
/// * `range` - The range to validate. | ||
/// * `length` - The length of the sequence. | ||
/// | ||
/// # Returns | ||
/// | ||
/// * `bool` - `true` if the range is valid for the sequence; otherwise, `false`. | ||
pub fn validate_range(range: &std::ops::Range<Position>, length: Position) -> | ||
Result<(), GRangesError> { | ||
let start = range.start; | ||
let end = range.start; | ||
dbg!(&start); | ||
dbg!(&end); | ||
if start > end { | ||
GRangesError::InvalidGenomicRange(start, end); | ||
} | ||
|
||
if end >= length { | ||
GRangesError::InvalidGenomicRangeForSequence(start, end, length); | ||
} | ||
Ok(()) | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use crate::prelude::*; | ||
use super::validate_range; | ||
|
||
#[test] | ||
fn test_invalid_range_start_end() { | ||
let range = 10..1; | ||
let result = validate_range(&range, 10); | ||
dbg!(&range); | ||
assert!(matches!(result, Err(GRangesError::InvalidGenomicRange(10, 0)))); | ||
|
||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
use crate::{traits::RangeContainer, Position, error::GRangesError}; | ||
|
||
use super::{RangeIndexed, validate_range, RangeEmpty}; | ||
pub type VecRangesIndexed = VecRanges<RangeIndexed>; | ||
pub type VecRangesEmpty = VecRanges<RangeEmpty>; | ||
|
||
#[derive(Clone, Default)] | ||
pub struct VecRanges<R: Clone> { | ||
pub (crate) ranges: Vec<R>, | ||
pub length: Position, | ||
} | ||
|
||
impl<R: Clone> VecRanges<R> { | ||
pub fn validate_range(&self, start: Position, end: Position) -> Result<(), GRangesError> { | ||
let range = start..end; | ||
validate_range(&range, self.length) | ||
} | ||
|
||
/// Create a new empty [`VecRanges`] container. | ||
pub fn new(length: Position) -> Self { | ||
Self { | ||
ranges: Vec::new(), | ||
length, | ||
} | ||
} | ||
|
||
/// Add a new range to the [`VecRanges`] container. | ||
pub fn push_range(&mut self, range: R) { | ||
self.ranges.push(range) | ||
} | ||
|
||
/// Return the number of ranges in this [`VecRanges`] container. | ||
pub fn len(&self) -> usize { | ||
self.ranges.len() | ||
} | ||
|
||
/// Return whether the [`VecRanges`] object is empty (contains no ranges). | ||
pub fn is_empty(&self) -> bool { | ||
self.len() == 0 | ||
} | ||
} | ||
|
||
impl<R: Clone> RangeContainer for VecRanges<R> { | ||
fn len(&self) -> usize { | ||
self.ranges.len() | ||
} | ||
} |
Oops, something went wrong.