From 69e0a3e69eecd874104870e48e2121175900e52a Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 15:50:17 -0800 Subject: [PATCH 1/7] feat: added an ambiguous input format which reads in all 3+ columns into a tab-delim string --- src/commands/get_fasta.rs | 1 + src/commands/merge.rs | 2 +- src/commands/random.rs | 1 + src/commands/sample.rs | 4 ++ src/dispatch.rs | 12 ++++ src/io/read/bed_reader.rs | 21 ++++++- src/io/read/meta_interval.rs | 107 ++++++++++++++++++++++++++++++++ src/io/read/mod.rs | 2 + src/io/write/iter.rs | 38 +++++++++++- src/io/write/utils.rs | 2 + src/types/formats/in_formats.rs | 18 ++++-- src/types/mod.rs | 6 +- src/types/translater.rs | 26 +++++++- 13 files changed, 229 insertions(+), 11 deletions(-) create mode 100644 src/io/read/meta_interval.rs diff --git a/src/commands/get_fasta.rs b/src/commands/get_fasta.rs index ae26375..a32043f 100644 --- a/src/commands/get_fasta.rs +++ b/src/commands/get_fasta.rs @@ -153,5 +153,6 @@ pub fn get_fasta(args: GetFastaArgs) -> Result<()> { InputFormat::Bed4 => get_fasta_bed4(&mut csv_reader, &mut byterecord, fasta, writer), InputFormat::Bed6 => get_fasta_bed6(&mut csv_reader, &mut byterecord, fasta, writer), InputFormat::Bed12 => get_fasta_bed12(&mut csv_reader, &mut byterecord, fasta, writer), + _ => anyhow::bail!("Unable to process ambiguous input format"), } } diff --git a/src/commands/merge.rs b/src/commands/merge.rs index 52d781b..5737f4d 100644 --- a/src/commands/merge.rs +++ b/src/commands/merge.rs @@ -52,7 +52,7 @@ fn merge_streamed_by_format(bed_reader: BedReader, writer: W) -> Resul let input_format = bed_reader.input_format(); let mut csv_reader = build_reader(bed_reader.reader()); match input_format { - InputFormat::Bed3 => { + InputFormat::Bed3 | InputFormat::Ambiguous => { let record_iter: Box> = iter_unnamed(&mut csv_reader); merge_streamed(record_iter, writer) } diff --git a/src/commands/random.rs b/src/commands/random.rs index bc2c190..c467181 100644 --- a/src/commands/random.rs +++ b/src/commands/random.rs @@ -189,5 +189,6 @@ pub fn random(args: RandomArgs) -> Result<()> { InputFormat::Bed4 => random_bed4(args, writer), InputFormat::Bed6 => random_bed6(args, writer), InputFormat::Bed12 => random_bed12(args, writer), + _ => anyhow::bail!("Unable to process ambiguous input format"), } } diff --git a/src/commands/sample.rs b/src/commands/sample.rs index 6aadaa0..3ca0166 100644 --- a/src/commands/sample.rs +++ b/src/commands/sample.rs @@ -71,5 +71,9 @@ pub fn sample(args: SampleArgs) -> Result<()> { let (mut set, translater) = reader.bed12_set()?; sample_from_set(&mut set, translater.as_ref(), args.params, writer) } + InputFormat::Ambiguous => { + let (mut set, translater) = reader.meta_interval_set()?; + sample_from_set(&mut set, translater.as_ref(), args.params, writer) + } } } diff --git a/src/dispatch.rs b/src/dispatch.rs index 7378be4..307fb38 100644 --- a/src/dispatch.rs +++ b/src/dispatch.rs @@ -20,6 +20,10 @@ macro_rules! dispatch_single { let (set, translater) = $reader.bed12_set()?; $func(set, translater, $params, $writer) } + InputFormat::Ambiguous => { + let (set, translater) = $reader.meta_interval_set()?; + $func(set, translater, $params, $writer) + } } }; } @@ -55,6 +59,10 @@ macro_rules! dispatch_to_lhs { let set_a = $reader_a.bed12_set_with($translater.as_mut())?; $crate::dispatch_to_rhs!(set_a, $reader_b, $translater, $writer, $params, $func) } + InputFormat::Ambiguous => { + let set_a = $reader_a.meta_interval_set_with($translater.as_mut())?; + $crate::dispatch_to_rhs!(set_a, $reader_b, $translater, $writer, $params, $func) + } } }; } @@ -81,6 +89,10 @@ macro_rules! dispatch_to_rhs { let set_b = $reader_b.bed12_set_with($translater.as_mut())?; $func($set_a, set_b, $translater.as_ref(), $params, $writer) } + InputFormat::Ambiguous => { + let set_b = $reader_b.meta_interval_set_with($translater.as_mut())?; + $func($set_a, set_b, $translater.as_ref(), $params, $writer) + } } }; } diff --git a/src/io/read/bed_reader.rs b/src/io/read/bed_reader.rs index 10e7e81..ec7f9b3 100644 --- a/src/io/read/bed_reader.rs +++ b/src/io/read/bed_reader.rs @@ -1,8 +1,11 @@ use super::{ read_bed12_set, read_bed12_set_with, read_bed3_set, read_bed3_set_with, read_bed4_set, - read_bed4_set_with, read_bed6_set, read_bed6_set_with, + read_bed4_set_with, read_bed6_set, read_bed6_set_with, read_meta_interval_set, + read_meta_interval_set_with, +}; +use crate::types::{ + Bed12Set, Bed3Set, Bed4Set, Bed6Set, FieldFormat, InputFormat, MetaIntervalSet, Translater, }; -use crate::types::{Bed12Set, Bed3Set, Bed4Set, Bed6Set, FieldFormat, InputFormat, Translater}; use anyhow::Result; use flate2::read::MultiGzDecoder; use gzp::BgzfSyncReader; @@ -123,6 +126,12 @@ impl BedReader { read_bed12_set(self.reader(), is_named) } + /// Returns a MetaIntervalSet from the reader with an Option + pub fn meta_interval_set(self) -> Result<(MetaIntervalSet, Option)> { + let is_named = self.is_named(); + read_meta_interval_set(self.reader(), is_named) + } + /// Returns a Bed3Set from the reader pub fn bed3_set_with(self, translater: Option<&mut Translater>) -> Result { read_bed3_set_with(self.reader(), translater) @@ -142,4 +151,12 @@ impl BedReader { pub fn bed12_set_with(self, translater: Option<&mut Translater>) -> Result { read_bed12_set_with(self.reader(), translater) } + + /// Returns a MetaIntervalSet from the reader + pub fn meta_interval_set_with( + self, + translater: Option<&mut Translater>, + ) -> Result { + read_meta_interval_set_with(self.reader(), translater) + } } diff --git a/src/io/read/meta_interval.rs b/src/io/read/meta_interval.rs new file mode 100644 index 0000000..d2f1bd7 --- /dev/null +++ b/src/io/read/meta_interval.rs @@ -0,0 +1,107 @@ +use super::build_reader; +use crate::types::{MetaIntervalSet, NumericMetaInterval, Translater}; +use anyhow::{bail, Result}; +use csv::ByteRecord; +use std::{io::Read, str::from_utf8}; + +pub fn read_meta_interval_set( + reader: R, + named: bool, +) -> Result<(MetaIntervalSet, Option)> { + if named { + let (set, translater) = read_meta_interval_set_named(reader)?; + Ok((set, Some(translater))) + } else { + let set = read_meta_interval_set_unnamed(reader)?; + Ok((set, None)) + } +} + +pub fn read_meta_interval_set_with( + reader: R, + translater: Option<&mut Translater>, +) -> Result { + if let Some(translater) = translater { + convert_meta_interval_set(reader, translater) + } else { + read_meta_interval_set_unnamed(reader) + } +} + +fn read_meta_interval_set_unnamed(reader: R) -> Result { + let mut reader = build_reader(reader); + let set = reader + .deserialize() + .map(|record| { + let record: NumericMetaInterval= match record { + Ok(record) => record, + Err(e) => { + bail!("Could not build bed record:\n\nIf your BED has non-integer chromosome names try rerunning with the `-N` flag:\n\nERROR: {}", e) + } + }; + Ok(record) + }) + .collect::>()?; + Ok(set) +} + +/// Reads a single file into a GenomicIntervalSet and a Translater +fn read_meta_interval_set_named(reader: R) -> Result<(MetaIntervalSet, Translater)> { + let mut translater = Translater::new(); + let set = convert_meta_interval_set(reader, &mut translater)?; + Ok((set, translater)) +} + +/// Convert a CSV reader into a GenomicIntervalSet +/// +/// It uses an externally initialized name map and index map to keep track of +/// chromosome names and indices. This is useful for reading multiple files +/// and keeping track of the same chromosome names and indices. +fn convert_meta_interval_set( + reader: R, + translater: &mut Translater, +) -> Result { + let mut reader = build_reader(reader); + let mut raw_record = ByteRecord::new(); + let mut set = MetaIntervalSet::empty(); + let mut buffer = String::new(); + while reader.read_byte_record(&mut raw_record)? { + // Iterate over the fields of the record + let mut record_iter = raw_record.iter(); + + // Parse the chromosome + let chr = record_iter.next().map(from_utf8).unwrap()?; + + // Parse the start and end + let start = record_iter + .next() + .map(from_utf8) + .unwrap()? + .parse::()?; + let end = record_iter + .next() + .map(from_utf8) + .unwrap()? + .parse::()?; + + // Parse the metadata into a single long string + buffer.clear(); + let first_meta = record_iter.next().unwrap(); + buffer.push_str(from_utf8(first_meta)?); + for field in record_iter { + buffer.push('\t'); + buffer.push_str(from_utf8(field)?); + } + + // Add the chromosome and metadata to the translater + translater.add_name(chr); + translater.add_name(&buffer); + let chr_int = translater.get_idx(chr).unwrap(); + let name_int = translater.get_idx(&buffer).unwrap(); + + // Create the interval and add it to the set + let interval = NumericMetaInterval::new(chr_int, start, end, name_int); + set.insert(interval); + } + Ok(set) +} diff --git a/src/io/read/mod.rs b/src/io/read/mod.rs index 65c1f52..a18fd3b 100644 --- a/src/io/read/mod.rs +++ b/src/io/read/mod.rs @@ -4,6 +4,7 @@ pub mod bed4; pub mod bed6; pub mod bed_reader; pub mod iter; +pub mod meta_interval; pub mod utils; pub use bed12::{read_bed12_set, read_bed12_set_with}; pub use bed3::{read_bed3_set, read_bed3_set_with}; @@ -11,4 +12,5 @@ pub use bed4::{read_bed4_set, read_bed4_set_with}; pub use bed6::{read_bed6_set, read_bed6_set_with}; pub use bed_reader::BedReader; pub use iter::iter_unnamed; +pub use meta_interval::{read_meta_interval_set, read_meta_interval_set_with}; pub use utils::build_reader; diff --git a/src/io/write/iter.rs b/src/io/write/iter.rs index 587174f..1216b6f 100644 --- a/src/io/write/iter.rs +++ b/src/io/write/iter.rs @@ -1,5 +1,7 @@ use super::build_writer; -use crate::types::{NumericBed12, NumericBed3, NumericBed4, NumericBed6, Translate}; +use crate::types::{ + NumericBed12, NumericBed3, NumericBed4, NumericBed6, NumericMetaInterval, Translate, +}; use anyhow::Result; use bedrs::Coordinates; use serde::Serialize; @@ -294,3 +296,37 @@ impl<'a> WriteNamedIter<&'a NumericBed12> for WriteNamedIterImpl { Ok(()) } } +impl WriteNamedIter for WriteNamedIterImpl { + fn write_named_iter, Tr: Translate>( + writer: W, + iterator: It, + translater: &Tr, + ) -> Result<()> { + let mut wtr = build_writer(writer); + for interval in iterator { + let chr = translater.get_name(*interval.chr()).unwrap(); + let name = translater.get_name(*interval.meta()).unwrap(); + let named_interval = (chr, interval.start(), interval.end(), name); + wtr.serialize(named_interval)?; + } + wtr.flush()?; + Ok(()) + } +} +impl<'a> WriteNamedIter<&'a NumericMetaInterval> for WriteNamedIterImpl { + fn write_named_iter, Tr: Translate>( + writer: W, + iterator: It, + translater: &Tr, + ) -> Result<()> { + let mut wtr = build_writer(writer); + for interval in iterator { + let chr = translater.get_name(*interval.chr()).unwrap(); + let name = translater.get_name(*interval.meta()).unwrap(); + let named_interval = (chr, interval.start(), interval.end(), name); + wtr.serialize(named_interval)?; + } + wtr.flush()?; + Ok(()) + } +} diff --git a/src/io/write/utils.rs b/src/io/write/utils.rs index ad876fd..bcb2f93 100644 --- a/src/io/write/utils.rs +++ b/src/io/write/utils.rs @@ -3,6 +3,7 @@ use crate::types::{ }; use anyhow::Result; use bedrs::{traits::IntervalBounds, Coordinates}; +use csv::QuoteStyle; use serde::Serialize; use std::io::Write; @@ -10,6 +11,7 @@ pub fn build_writer(writer: W) -> csv::Writer { csv::WriterBuilder::new() .delimiter(b'\t') .has_headers(false) + .quote_style(QuoteStyle::Never) .from_writer(writer) } diff --git a/src/types/formats/in_formats.rs b/src/types/formats/in_formats.rs index a87dcc2..5e480a1 100644 --- a/src/types/formats/in_formats.rs +++ b/src/types/formats/in_formats.rs @@ -14,6 +14,7 @@ pub enum InputFormat { Bed4, Bed6, Bed12, + Ambiguous, } impl InputFormat { pub fn predict(bufreader: &BufReader) -> Result { @@ -28,14 +29,12 @@ impl InputFormat { }; let num_fields = first.split(|b| *b == b'\t').count(); match num_fields { + 1..=2 => bail!("Too few fields in line: {}", from_utf8(first)?), 3 => Ok(InputFormat::Bed3), 4 => Ok(InputFormat::Bed4), 6 => Ok(InputFormat::Bed6), 12 => Ok(InputFormat::Bed12), - _ => bail!( - "Cannot predict input format from line: {}", - std::str::from_utf8(first)? - ), + _ => Ok(InputFormat::Ambiguous), } } } @@ -79,6 +78,17 @@ impl FieldFormat { Ok(FieldFormat::IntegerBased) } } + InputFormat::Ambiguous => { + let all_int = fields + .iter() + .filter_map(|f| from_utf8(f).ok()) + .all(|f| f.parse::().is_ok()); + if all_int { + Ok(FieldFormat::IntegerBased) + } else { + Ok(FieldFormat::StringBased) + } + } } } } diff --git a/src/types/mod.rs b/src/types/mod.rs index 4fc68d8..0735d8b 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -2,7 +2,7 @@ mod depth; mod formats; mod pairs; mod translater; -use bedrs::{Bed12, Bed3, Bed4, Bed6, IntervalContainer}; +use bedrs::{Bed12, Bed3, Bed4, Bed6, IntervalContainer, MetaInterval}; pub use depth::IntervalDepth; pub use formats::{FieldFormat, Genome, InputFormat}; pub use pairs::IntervalPair; @@ -26,3 +26,7 @@ pub type NumericBed12 = Bed12 = Bed12<&'a str, usize, &'a str, f64, usize, usize, &'a str, &'a str, &'a str>; pub type Bed12Set = IntervalContainer; + +pub type NumericMetaInterval = MetaInterval; +pub type NamedMetaInterval<'a> = MetaInterval<&'a str, usize, &'a str>; +pub type MetaIntervalSet = IntervalContainer; diff --git a/src/types/translater.rs b/src/types/translater.rs index d5dd9dd..3294f2b 100644 --- a/src/types/translater.rs +++ b/src/types/translater.rs @@ -1,6 +1,6 @@ use super::{ - NamedBed12, NamedBed3, NamedBed4, NamedBed6, NumericBed12, NumericBed3, NumericBed4, - NumericBed6, + NamedBed12, NamedBed3, NamedBed4, NamedBed6, NamedMetaInterval, NumericBed12, NumericBed3, + NumericBed4, NumericBed6, NumericMetaInterval, }; use bedrs::{traits::IntervalBounds, Coordinates, IntervalContainer}; use dashmap::DashMap; @@ -187,6 +187,21 @@ impl Reorder for NumericBed12 { retranslate } } +impl Reorder for NumericMetaInterval { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + let new_name = retranslate.get_rank(*iv.meta()).unwrap(); + iv.update_chr(&new_chr); + iv.update_meta(&new_name); + }); + retranslate + } +} pub struct Renamer; pub trait Rename<'a, Ia, Ib> @@ -246,3 +261,10 @@ impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { ) } } +impl<'a> Rename<'a, NumericMetaInterval, NamedMetaInterval<'a>> for Renamer { + fn rename_with(iv: &NumericMetaInterval, translater: &'a Translater) -> NamedMetaInterval<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + let meta = translater.get_name(*iv.meta()).unwrap(); + NamedMetaInterval::new(chr, iv.start(), iv.end(), meta) + } +} From c1c1a77f3502fe7eca8b47d7eb361341c7c25606 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 16:09:37 -0800 Subject: [PATCH 2/7] refactor: put translater into separate module and split sub structs and traits into files --- src/types/depth.rs | 5 +- src/types/mod.rs | 4 +- src/types/pairs.rs | 5 +- src/types/translate/mod.rs | 14 ++ src/types/translate/rename.rs | 72 ++++++ src/types/translate/reorder.rs | 92 ++++++++ src/types/translate/retranslater.rs | 26 +++ src/types/translate/stream_translater.rs | 30 +++ src/types/translate/translater.rs | 54 +++++ src/types/translater.rs | 270 ----------------------- 10 files changed, 292 insertions(+), 280 deletions(-) create mode 100644 src/types/translate/mod.rs create mode 100644 src/types/translate/rename.rs create mode 100644 src/types/translate/reorder.rs create mode 100644 src/types/translate/retranslater.rs create mode 100644 src/types/translate/stream_translater.rs create mode 100644 src/types/translate/translater.rs delete mode 100644 src/types/translater.rs diff --git a/src/types/depth.rs b/src/types/depth.rs index e8b75d7..cc0fda8 100644 --- a/src/types/depth.rs +++ b/src/types/depth.rs @@ -1,7 +1,4 @@ -use super::{ - translater::{Rename, Renamer}, - Translater, -}; +use super::{Rename, Renamer, Translater}; use bedrs::traits::IntervalBounds; pub struct IntervalDepth<'a, I, N> diff --git a/src/types/mod.rs b/src/types/mod.rs index 0735d8b..1c721e0 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -1,12 +1,12 @@ mod depth; mod formats; mod pairs; -mod translater; +mod translate; use bedrs::{Bed12, Bed3, Bed4, Bed6, IntervalContainer, MetaInterval}; pub use depth::IntervalDepth; pub use formats::{FieldFormat, Genome, InputFormat}; pub use pairs::IntervalPair; -pub use translater::{ +pub use translate::{ Rename, Renamer, Reorder, Retranslater, StreamTranslater, Translate, Translater, }; diff --git a/src/types/pairs.rs b/src/types/pairs.rs index ce12cdb..57d866f 100644 --- a/src/types/pairs.rs +++ b/src/types/pairs.rs @@ -1,7 +1,4 @@ -use super::{ - translater::{Rename, Renamer}, - Translater, -}; +use super::{Rename, Renamer, Translater}; use bedrs::traits::IntervalBounds; pub struct IntervalPair<'a, Ia, Ib, Na, Nb> diff --git a/src/types/translate/mod.rs b/src/types/translate/mod.rs new file mode 100644 index 0000000..2350a04 --- /dev/null +++ b/src/types/translate/mod.rs @@ -0,0 +1,14 @@ +mod rename; +mod reorder; +mod retranslater; +mod stream_translater; +mod translater; +pub use rename::{Rename, Renamer}; +pub use reorder::Reorder; +pub use retranslater::Retranslater; +pub use stream_translater::StreamTranslater; +pub use translater::Translater; + +pub trait Translate { + fn get_name(&self, idx: usize) -> Option<&str>; +} diff --git a/src/types/translate/rename.rs b/src/types/translate/rename.rs new file mode 100644 index 0000000..47c56a8 --- /dev/null +++ b/src/types/translate/rename.rs @@ -0,0 +1,72 @@ +use super::{Translate, Translater}; +use crate::types::{ + NamedBed12, NamedBed3, NamedBed4, NamedBed6, NamedMetaInterval, NumericBed12, NumericBed3, + NumericBed4, NumericBed6, NumericMetaInterval, +}; +use bedrs::{traits::IntervalBounds, Coordinates}; + +pub struct Renamer; +pub trait Rename<'a, Ia, Ib> +where + Ia: IntervalBounds, + Ib: IntervalBounds<&'a str, usize>, +{ + fn rename_with(iv: &Ia, translater: &'a Translater) -> Ib; +} +impl<'a> Rename<'a, NumericBed3, NamedBed3<'a>> for Renamer { + fn rename_with(iv: &NumericBed3, translater: &'a Translater) -> NamedBed3<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + NamedBed3::new(chr, iv.start(), iv.end()) + } +} +impl<'a> Rename<'a, NumericBed4, NamedBed4<'a>> for Renamer { + fn rename_with(iv: &NumericBed4, translater: &'a Translater) -> NamedBed4<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + let name = translater.get_name(*iv.name()).unwrap(); + NamedBed4::new(chr, iv.start(), iv.end(), name) + } +} +impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { + fn rename_with(iv: &NumericBed6, translater: &'a Translater) -> NamedBed6<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + let name = translater.get_name(*iv.name()).unwrap(); + NamedBed6::new( + chr, + iv.start(), + iv.end(), + name, + *iv.score(), + iv.strand().unwrap_or_default(), + ) + } +} +impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { + fn rename_with(iv: &NumericBed12, translater: &'a Translater) -> NamedBed12<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + let name = translater.get_name(*iv.name()).unwrap(); + let item_rgb = translater.get_name(*iv.item_rgb()).unwrap(); + let block_sizes = translater.get_name(*iv.block_sizes()).unwrap(); + let block_starts = translater.get_name(*iv.block_starts()).unwrap(); + NamedBed12::new( + chr, + iv.start(), + iv.end(), + name, + *iv.score(), + iv.strand().unwrap_or_default(), + iv.thick_start(), + iv.thick_end(), + item_rgb, + iv.block_count(), + block_sizes, + block_starts, + ) + } +} +impl<'a> Rename<'a, NumericMetaInterval, NamedMetaInterval<'a>> for Renamer { + fn rename_with(iv: &NumericMetaInterval, translater: &'a Translater) -> NamedMetaInterval<'a> { + let chr = translater.get_name(*iv.chr()).unwrap(); + let meta = translater.get_name(*iv.meta()).unwrap(); + NamedMetaInterval::new(chr, iv.start(), iv.end(), meta) + } +} diff --git a/src/types/translate/reorder.rs b/src/types/translate/reorder.rs new file mode 100644 index 0000000..ffb5067 --- /dev/null +++ b/src/types/translate/reorder.rs @@ -0,0 +1,92 @@ +use super::{Retranslater, Translater}; +use crate::types::{NumericBed12, NumericBed3, NumericBed4, NumericBed6, NumericMetaInterval}; +use bedrs::{traits::IntervalBounds, Coordinates, IntervalContainer}; + +pub trait Reorder +where + C: IntervalBounds, +{ + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater; +} +impl Reorder for NumericBed3 { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + iv.update_chr(&new_chr); + }); + retranslate + } +} +impl Reorder for NumericBed4 { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + let new_name = retranslate.get_rank(*iv.name()).unwrap(); + iv.update_chr(&new_chr); + iv.update_name(&new_name); + }); + retranslate + } +} +impl Reorder for NumericBed6 { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + let new_name = retranslate.get_rank(*iv.name()).unwrap(); + iv.update_chr(&new_chr); + iv.update_name(&new_name); + }); + retranslate + } +} +impl Reorder for NumericBed12 { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + let new_name = retranslate.get_rank(*iv.name()).unwrap(); + let new_item_rgb = retranslate.get_rank(*iv.item_rgb()).unwrap(); + let new_block_sizes = retranslate.get_rank(*iv.block_sizes()).unwrap(); + let new_block_starts = retranslate.get_rank(*iv.block_starts()).unwrap(); + iv.update_chr(&new_chr); + iv.update_name(&new_name); + iv.update_item_rgb(&new_item_rgb); + iv.update_block_sizes(&new_block_sizes); + iv.update_block_starts(&new_block_starts); + }); + retranslate + } +} +impl Reorder for NumericMetaInterval { + fn reorder_translater( + set: &mut IntervalContainer, + translater: Translater, + ) -> Retranslater { + let retranslate = translater.lex_sort(); + set.apply_mut(|iv| { + let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); + let new_name = retranslate.get_rank(*iv.meta()).unwrap(); + iv.update_chr(&new_chr); + iv.update_meta(&new_name); + }); + retranslate + } +} diff --git a/src/types/translate/retranslater.rs b/src/types/translate/retranslater.rs new file mode 100644 index 0000000..2f2543a --- /dev/null +++ b/src/types/translate/retranslater.rs @@ -0,0 +1,26 @@ +use hashbrown::HashMap; + +use super::Translate; + +#[derive(Debug)] +pub struct Retranslater { + idx_to_rank: HashMap, + rank_to_name: HashMap, +} +impl Retranslater { + pub fn new(idx_to_rank: HashMap, rank_to_name: HashMap) -> Self { + Self { + idx_to_rank, + rank_to_name, + } + } + + pub fn get_rank(&self, idx: usize) -> Option { + self.idx_to_rank.get(&idx).copied() + } +} +impl Translate for Retranslater { + fn get_name(&self, rank: usize) -> Option<&str> { + self.rank_to_name.get(&rank).map(|s| s.as_str()) + } +} diff --git a/src/types/translate/stream_translater.rs b/src/types/translate/stream_translater.rs new file mode 100644 index 0000000..1ca9ae9 --- /dev/null +++ b/src/types/translate/stream_translater.rs @@ -0,0 +1,30 @@ +use dashmap::DashMap; + +pub struct StreamTranslater { + name_to_idx: DashMap, + idx_to_name: DashMap, +} +impl StreamTranslater { + pub fn new() -> Self { + Self { + name_to_idx: DashMap::new(), + idx_to_name: DashMap::new(), + } + } + pub fn has_name(&self, name: &str) -> bool { + self.name_to_idx.contains_key(name) + } + pub fn add_name(&self, name: &str) { + if !self.has_name(name) { + let idx = self.name_to_idx.len(); + self.name_to_idx.insert(name.to_string(), idx); + self.idx_to_name.insert(idx, name.to_string()); + } + } + pub fn get_name_to_idx(&self) -> &DashMap { + &self.name_to_idx + } + pub fn get_idx_to_name(&self) -> &DashMap { + &self.idx_to_name + } +} diff --git a/src/types/translate/translater.rs b/src/types/translate/translater.rs new file mode 100644 index 0000000..626d600 --- /dev/null +++ b/src/types/translate/translater.rs @@ -0,0 +1,54 @@ +use super::{Retranslater, Translate}; +use hashbrown::HashMap; +use human_sort::compare; + +pub struct Translater { + name_to_idx: HashMap, + idx_to_name: HashMap, +} +impl Translater { + pub fn new() -> Self { + Self { + name_to_idx: HashMap::new(), + idx_to_name: HashMap::new(), + } + } + pub fn has_name(&self, name: &str) -> bool { + self.name_to_idx.contains_key(name) + } + pub fn add_name(&mut self, name: &str) { + if !self.has_name(name) { + let idx = self.name_to_idx.len(); + self.name_to_idx.insert(name.to_string(), idx); + self.idx_to_name.insert(idx, name.to_string()); + } + } + #[allow(dead_code)] + pub fn get_idx(&self, name: &str) -> Option { + self.name_to_idx.get(name).copied() + } + #[allow(dead_code)] + pub fn get_name_to_idx(&self) -> &HashMap { + &self.name_to_idx + } + pub fn lex_sort(self) -> Retranslater { + let mut idx_to_rank = HashMap::with_capacity(self.idx_to_name.len()); + let mut rank_to_name = HashMap::with_capacity(self.idx_to_name.len()); + let mut ordering = self + .idx_to_name + .iter() + .map(|(idx, name)| (name, idx)) + .collect::>(); + ordering.sort_by(|a, b| compare(a.0, b.0)); + for (order, (name, idx)) in ordering.into_iter().enumerate() { + rank_to_name.insert(order, name.to_string()); + idx_to_rank.insert(*idx, order); + } + Retranslater::new(idx_to_rank, rank_to_name) + } +} +impl Translate for Translater { + fn get_name(&self, idx: usize) -> Option<&str> { + self.idx_to_name.get(&idx).map(|s| s.as_str()) + } +} diff --git a/src/types/translater.rs b/src/types/translater.rs deleted file mode 100644 index 3294f2b..0000000 --- a/src/types/translater.rs +++ /dev/null @@ -1,270 +0,0 @@ -use super::{ - NamedBed12, NamedBed3, NamedBed4, NamedBed6, NamedMetaInterval, NumericBed12, NumericBed3, - NumericBed4, NumericBed6, NumericMetaInterval, -}; -use bedrs::{traits::IntervalBounds, Coordinates, IntervalContainer}; -use dashmap::DashMap; -use hashbrown::HashMap; -use human_sort::compare; - -pub trait Translate { - fn get_name(&self, idx: usize) -> Option<&str>; -} - -pub struct Translater { - name_to_idx: HashMap, - idx_to_name: HashMap, -} -impl Translater { - pub fn new() -> Self { - Self { - name_to_idx: HashMap::new(), - idx_to_name: HashMap::new(), - } - } - pub fn has_name(&self, name: &str) -> bool { - self.name_to_idx.contains_key(name) - } - pub fn add_name(&mut self, name: &str) { - if !self.has_name(name) { - let idx = self.name_to_idx.len(); - self.name_to_idx.insert(name.to_string(), idx); - self.idx_to_name.insert(idx, name.to_string()); - } - } - #[allow(dead_code)] - pub fn get_idx(&self, name: &str) -> Option { - self.name_to_idx.get(name).copied() - } - #[allow(dead_code)] - pub fn get_name_to_idx(&self) -> &HashMap { - &self.name_to_idx - } - pub fn lex_sort(self) -> Retranslater { - let mut idx_to_rank = HashMap::with_capacity(self.idx_to_name.len()); - let mut rank_to_name = HashMap::with_capacity(self.idx_to_name.len()); - let mut ordering = self - .idx_to_name - .iter() - .map(|(idx, name)| (name, idx)) - .collect::>(); - ordering.sort_by(|a, b| compare(a.0, b.0)); - for (order, (name, idx)) in ordering.into_iter().enumerate() { - rank_to_name.insert(order, name.to_string()); - idx_to_rank.insert(*idx, order); - } - Retranslater::new(idx_to_rank, rank_to_name) - } -} -impl Translate for Translater { - fn get_name(&self, idx: usize) -> Option<&str> { - self.idx_to_name.get(&idx).map(|s| s.as_str()) - } -} - -#[derive(Debug)] -pub struct Retranslater { - idx_to_rank: HashMap, - rank_to_name: HashMap, -} -impl Retranslater { - pub fn new(idx_to_rank: HashMap, rank_to_name: HashMap) -> Self { - Self { - idx_to_rank, - rank_to_name, - } - } - - pub fn get_rank(&self, idx: usize) -> Option { - self.idx_to_rank.get(&idx).copied() - } -} -impl Translate for Retranslater { - fn get_name(&self, rank: usize) -> Option<&str> { - self.rank_to_name.get(&rank).map(|s| s.as_str()) - } -} - -pub struct StreamTranslater { - name_to_idx: DashMap, - idx_to_name: DashMap, -} -impl StreamTranslater { - pub fn new() -> Self { - Self { - name_to_idx: DashMap::new(), - idx_to_name: DashMap::new(), - } - } - pub fn has_name(&self, name: &str) -> bool { - self.name_to_idx.contains_key(name) - } - pub fn add_name(&self, name: &str) { - if !self.has_name(name) { - let idx = self.name_to_idx.len(); - self.name_to_idx.insert(name.to_string(), idx); - self.idx_to_name.insert(idx, name.to_string()); - } - } - pub fn get_name_to_idx(&self) -> &DashMap { - &self.name_to_idx - } - pub fn get_idx_to_name(&self) -> &DashMap { - &self.idx_to_name - } -} - -pub trait Reorder -where - C: IntervalBounds, -{ - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater; -} -impl Reorder for NumericBed3 { - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater { - let retranslate = translater.lex_sort(); - set.apply_mut(|iv| { - let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - iv.update_chr(&new_chr); - }); - retranslate - } -} -impl Reorder for NumericBed4 { - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater { - let retranslate = translater.lex_sort(); - set.apply_mut(|iv| { - let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); - iv.update_chr(&new_chr); - iv.update_name(&new_name); - }); - retranslate - } -} -impl Reorder for NumericBed6 { - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater { - let retranslate = translater.lex_sort(); - set.apply_mut(|iv| { - let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); - iv.update_chr(&new_chr); - iv.update_name(&new_name); - }); - retranslate - } -} -impl Reorder for NumericBed12 { - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater { - let retranslate = translater.lex_sort(); - set.apply_mut(|iv| { - let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); - let new_item_rgb = retranslate.get_rank(*iv.item_rgb()).unwrap(); - let new_block_sizes = retranslate.get_rank(*iv.block_sizes()).unwrap(); - let new_block_starts = retranslate.get_rank(*iv.block_starts()).unwrap(); - iv.update_chr(&new_chr); - iv.update_name(&new_name); - iv.update_item_rgb(&new_item_rgb); - iv.update_block_sizes(&new_block_sizes); - iv.update_block_starts(&new_block_starts); - }); - retranslate - } -} -impl Reorder for NumericMetaInterval { - fn reorder_translater( - set: &mut IntervalContainer, - translater: Translater, - ) -> Retranslater { - let retranslate = translater.lex_sort(); - set.apply_mut(|iv| { - let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.meta()).unwrap(); - iv.update_chr(&new_chr); - iv.update_meta(&new_name); - }); - retranslate - } -} - -pub struct Renamer; -pub trait Rename<'a, Ia, Ib> -where - Ia: IntervalBounds, - Ib: IntervalBounds<&'a str, usize>, -{ - fn rename_with(iv: &Ia, translater: &'a Translater) -> Ib; -} -impl<'a> Rename<'a, NumericBed3, NamedBed3<'a>> for Renamer { - fn rename_with(iv: &NumericBed3, translater: &'a Translater) -> NamedBed3<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - NamedBed3::new(chr, iv.start(), iv.end()) - } -} -impl<'a> Rename<'a, NumericBed4, NamedBed4<'a>> for Renamer { - fn rename_with(iv: &NumericBed4, translater: &'a Translater) -> NamedBed4<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); - NamedBed4::new(chr, iv.start(), iv.end(), name) - } -} -impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { - fn rename_with(iv: &NumericBed6, translater: &'a Translater) -> NamedBed6<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); - NamedBed6::new( - chr, - iv.start(), - iv.end(), - name, - *iv.score(), - iv.strand().unwrap_or_default(), - ) - } -} -impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { - fn rename_with(iv: &NumericBed12, translater: &'a Translater) -> NamedBed12<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); - let item_rgb = translater.get_name(*iv.item_rgb()).unwrap(); - let block_sizes = translater.get_name(*iv.block_sizes()).unwrap(); - let block_starts = translater.get_name(*iv.block_starts()).unwrap(); - NamedBed12::new( - chr, - iv.start(), - iv.end(), - name, - *iv.score(), - iv.strand().unwrap_or_default(), - iv.thick_start(), - iv.thick_end(), - item_rgb, - iv.block_count(), - block_sizes, - block_starts, - ) - } -} -impl<'a> Rename<'a, NumericMetaInterval, NamedMetaInterval<'a>> for Renamer { - fn rename_with(iv: &NumericMetaInterval, translater: &'a Translater) -> NamedMetaInterval<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let meta = translater.get_name(*iv.meta()).unwrap(); - NamedMetaInterval::new(chr, iv.start(), iv.end(), meta) - } -} From 62320a241097d30f84e4e5beec54ec8117e8d5c9 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 16:27:23 -0800 Subject: [PATCH 3/7] feat: added a split translater which keeps an internal translator for the chr and metadata separately --- src/io/write/iter.rs | 54 ++++++++++++------------- src/types/formats/genome.rs | 6 +-- src/types/mod.rs | 3 +- src/types/translate/mod.rs | 11 ++++- src/types/translate/rename.rs | 24 +++++------ src/types/translate/retranslater.rs | 5 ++- src/types/translate/split_translater.rs | 53 ++++++++++++++++++++++++ src/types/translate/translater.rs | 5 ++- 8 files changed, 115 insertions(+), 46 deletions(-) create mode 100644 src/types/translate/split_translater.rs diff --git a/src/io/write/iter.rs b/src/io/write/iter.rs index 1216b6f..66979a8 100644 --- a/src/io/write/iter.rs +++ b/src/io/write/iter.rs @@ -105,7 +105,7 @@ where ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); let named_interval = (chr, interval.start(), interval.end()); wtr.serialize(named_interval)?; } @@ -122,7 +122,7 @@ impl WriteNamedIter for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); let named_interval = (chr, interval.start(), interval.end()); wtr.serialize(named_interval)?; } @@ -138,7 +138,7 @@ impl<'a> WriteNamedIter<&'a NumericBed3> for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); let named_interval = (chr, interval.start(), interval.end()); wtr.serialize(named_interval)?; } @@ -154,8 +154,8 @@ impl WriteNamedIter for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); let named_interval = (chr, interval.start(), interval.end(), name); wtr.serialize(named_interval)?; } @@ -171,8 +171,8 @@ impl<'a> WriteNamedIter<&'a NumericBed4> for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); let named_interval = (chr, interval.start(), interval.end(), name); wtr.serialize(named_interval)?; } @@ -188,8 +188,8 @@ impl WriteNamedIter for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); let named_interval = ( chr, interval.start(), @@ -212,8 +212,8 @@ impl<'a> WriteNamedIter<&'a NumericBed6> for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); let named_interval = ( chr, interval.start(), @@ -236,12 +236,12 @@ impl WriteNamedIter for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); - let item_rgb = translater.get_name(*interval.item_rgb()).unwrap(); - let block_count = translater.get_name(interval.block_count()).unwrap(); - let block_sizes = translater.get_name(*interval.block_sizes()).unwrap(); - let block_starts = translater.get_name(*interval.block_starts()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); + let item_rgb = translater.get_meta_name(*interval.item_rgb()).unwrap(); + let block_count = translater.get_meta_name(interval.block_count()).unwrap(); + let block_sizes = translater.get_meta_name(*interval.block_sizes()).unwrap(); + let block_starts = translater.get_meta_name(*interval.block_starts()).unwrap(); let named_interval = ( chr, interval.start(), @@ -270,12 +270,12 @@ impl<'a> WriteNamedIter<&'a NumericBed12> for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.name()).unwrap(); - let item_rgb = translater.get_name(*interval.item_rgb()).unwrap(); - let block_count = translater.get_name(interval.block_count()).unwrap(); - let block_sizes = translater.get_name(*interval.block_sizes()).unwrap(); - let block_starts = translater.get_name(*interval.block_starts()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.name()).unwrap(); + let item_rgb = translater.get_meta_name(*interval.item_rgb()).unwrap(); + let block_count = translater.get_meta_name(interval.block_count()).unwrap(); + let block_sizes = translater.get_meta_name(*interval.block_sizes()).unwrap(); + let block_starts = translater.get_meta_name(*interval.block_starts()).unwrap(); let named_interval = ( chr, interval.start(), @@ -304,8 +304,8 @@ impl WriteNamedIter for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.meta()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.meta()).unwrap(); let named_interval = (chr, interval.start(), interval.end(), name); wtr.serialize(named_interval)?; } @@ -321,8 +321,8 @@ impl<'a> WriteNamedIter<&'a NumericMetaInterval> for WriteNamedIterImpl { ) -> Result<()> { let mut wtr = build_writer(writer); for interval in iterator { - let chr = translater.get_name(*interval.chr()).unwrap(); - let name = translater.get_name(*interval.meta()).unwrap(); + let chr = translater.get_chr_name(*interval.chr()).unwrap(); + let name = translater.get_meta_name(*interval.meta()).unwrap(); let named_interval = (chr, interval.start(), interval.end(), name); wtr.serialize(named_interval)?; } diff --git a/src/types/formats/genome.rs b/src/types/formats/genome.rs index 422fbc7..37b0d74 100644 --- a/src/types/formats/genome.rs +++ b/src/types/formats/genome.rs @@ -170,9 +170,9 @@ mod testing { assert_eq!(genome.chr_size_unchecked(2), 3000); assert!(genome.translater().is_some()); let translater = genome.translater().unwrap(); - assert_eq!(translater.get_name(0).unwrap(), "chr1"); - assert_eq!(translater.get_name(1).unwrap(), "chr2"); - assert_eq!(translater.get_name(2).unwrap(), "chr3"); + assert_eq!(translater.get_chr_name(0).unwrap(), "chr1"); + assert_eq!(translater.get_chr_name(1).unwrap(), "chr2"); + assert_eq!(translater.get_chr_name(2).unwrap(), "chr3"); } #[test] diff --git a/src/types/mod.rs b/src/types/mod.rs index 1c721e0..5b98015 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -7,7 +7,8 @@ pub use depth::IntervalDepth; pub use formats::{FieldFormat, Genome, InputFormat}; pub use pairs::IntervalPair; pub use translate::{ - Rename, Renamer, Reorder, Retranslater, StreamTranslater, Translate, Translater, + Rename, Renamer, Reorder, Retranslater, SplitTranslater, StreamTranslater, Translate, + Translater, }; pub type NumericBed3 = Bed3; diff --git a/src/types/translate/mod.rs b/src/types/translate/mod.rs index 2350a04..eb9c8ce 100644 --- a/src/types/translate/mod.rs +++ b/src/types/translate/mod.rs @@ -1,14 +1,23 @@ mod rename; mod reorder; mod retranslater; +mod split_translater; mod stream_translater; mod translater; pub use rename::{Rename, Renamer}; pub use reorder::Reorder; pub use retranslater::Retranslater; +pub use split_translater::SplitTranslater; pub use stream_translater::StreamTranslater; pub use translater::Translater; pub trait Translate { - fn get_name(&self, idx: usize) -> Option<&str>; + fn get_chr_name(&self, idx: usize) -> Option<&str>; + fn get_meta_name(&self, idx: usize) -> Option<&str>; +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +pub enum TranslateGroup { + Chr, + Meta, } diff --git a/src/types/translate/rename.rs b/src/types/translate/rename.rs index 47c56a8..a79d58d 100644 --- a/src/types/translate/rename.rs +++ b/src/types/translate/rename.rs @@ -15,21 +15,21 @@ where } impl<'a> Rename<'a, NumericBed3, NamedBed3<'a>> for Renamer { fn rename_with(iv: &NumericBed3, translater: &'a Translater) -> NamedBed3<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); + let chr = translater.get_chr_name(*iv.chr()).unwrap(); NamedBed3::new(chr, iv.start(), iv.end()) } } impl<'a> Rename<'a, NumericBed4, NamedBed4<'a>> for Renamer { fn rename_with(iv: &NumericBed4, translater: &'a Translater) -> NamedBed4<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); + let chr = translater.get_chr_name(*iv.chr()).unwrap(); + let name = translater.get_meta_name(*iv.name()).unwrap(); NamedBed4::new(chr, iv.start(), iv.end(), name) } } impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { fn rename_with(iv: &NumericBed6, translater: &'a Translater) -> NamedBed6<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); + let chr = translater.get_chr_name(*iv.chr()).unwrap(); + let name = translater.get_meta_name(*iv.name()).unwrap(); NamedBed6::new( chr, iv.start(), @@ -42,11 +42,11 @@ impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { } impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { fn rename_with(iv: &NumericBed12, translater: &'a Translater) -> NamedBed12<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let name = translater.get_name(*iv.name()).unwrap(); - let item_rgb = translater.get_name(*iv.item_rgb()).unwrap(); - let block_sizes = translater.get_name(*iv.block_sizes()).unwrap(); - let block_starts = translater.get_name(*iv.block_starts()).unwrap(); + let chr = translater.get_chr_name(*iv.chr()).unwrap(); + let name = translater.get_meta_name(*iv.name()).unwrap(); + let item_rgb = translater.get_meta_name(*iv.item_rgb()).unwrap(); + let block_sizes = translater.get_meta_name(*iv.block_sizes()).unwrap(); + let block_starts = translater.get_meta_name(*iv.block_starts()).unwrap(); NamedBed12::new( chr, iv.start(), @@ -65,8 +65,8 @@ impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { } impl<'a> Rename<'a, NumericMetaInterval, NamedMetaInterval<'a>> for Renamer { fn rename_with(iv: &NumericMetaInterval, translater: &'a Translater) -> NamedMetaInterval<'a> { - let chr = translater.get_name(*iv.chr()).unwrap(); - let meta = translater.get_name(*iv.meta()).unwrap(); + let chr = translater.get_chr_name(*iv.chr()).unwrap(); + let meta = translater.get_chr_name(*iv.meta()).unwrap(); NamedMetaInterval::new(chr, iv.start(), iv.end(), meta) } } diff --git a/src/types/translate/retranslater.rs b/src/types/translate/retranslater.rs index 2f2543a..ab1d2bd 100644 --- a/src/types/translate/retranslater.rs +++ b/src/types/translate/retranslater.rs @@ -20,7 +20,10 @@ impl Retranslater { } } impl Translate for Retranslater { - fn get_name(&self, rank: usize) -> Option<&str> { + fn get_chr_name(&self, rank: usize) -> Option<&str> { self.rank_to_name.get(&rank).map(|s| s.as_str()) } + fn get_meta_name(&self, rank: usize) -> Option<&str> { + self.get_chr_name(rank) + } } diff --git a/src/types/translate/split_translater.rs b/src/types/translate/split_translater.rs new file mode 100644 index 0000000..e8942b7 --- /dev/null +++ b/src/types/translate/split_translater.rs @@ -0,0 +1,53 @@ +use super::{Retranslater, Translate, TranslateGroup, Translater}; +use hashbrown::HashMap; + +pub struct SplitTranslater { + chr_tl: Translater, + meta_tl: Translater, +} +impl SplitTranslater { + pub fn new() -> Self { + Self { + chr_tl: Translater::new(), + meta_tl: Translater::new(), + } + } + pub fn has_name(&self, name: &str, group: TranslateGroup) -> bool { + match group { + TranslateGroup::Chr => self.chr_tl.has_name(name), + TranslateGroup::Meta => self.meta_tl.has_name(name), + } + } + pub fn add_name(&mut self, name: &str, group: TranslateGroup) { + match group { + TranslateGroup::Chr => self.chr_tl.add_name(name), + TranslateGroup::Meta => self.meta_tl.add_name(name), + } + } + pub fn get_idx(&self, name: &str, group: TranslateGroup) -> Option { + match group { + TranslateGroup::Chr => self.chr_tl.get_idx(name), + TranslateGroup::Meta => self.meta_tl.get_idx(name), + } + } + pub fn get_name_to_idx(&self, group: TranslateGroup) -> &HashMap { + match group { + TranslateGroup::Chr => self.chr_tl.get_name_to_idx(), + TranslateGroup::Meta => self.meta_tl.get_name_to_idx(), + } + } + pub fn lex_sort(self, group: TranslateGroup) -> Retranslater { + match group { + TranslateGroup::Chr => self.chr_tl.lex_sort(), + TranslateGroup::Meta => self.meta_tl.lex_sort(), + } + } +} +impl Translate for SplitTranslater { + fn get_chr_name(&self, idx: usize) -> Option<&str> { + self.chr_tl.get_chr_name(idx) + } + fn get_meta_name(&self, idx: usize) -> Option<&str> { + self.meta_tl.get_meta_name(idx) + } +} diff --git a/src/types/translate/translater.rs b/src/types/translate/translater.rs index 626d600..75f99e8 100644 --- a/src/types/translate/translater.rs +++ b/src/types/translate/translater.rs @@ -48,7 +48,10 @@ impl Translater { } } impl Translate for Translater { - fn get_name(&self, idx: usize) -> Option<&str> { + fn get_chr_name(&self, idx: usize) -> Option<&str> { self.idx_to_name.get(&idx).map(|s| s.as_str()) } + fn get_meta_name(&self, idx: usize) -> Option<&str> { + self.get_chr_name(idx) + } } From 02626b86e7ac7a9861b4f20edca42a6c71a4f319 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:21:01 -0800 Subject: [PATCH 4/7] feat: added a split translater which contains two internal translaters. one for the chr translation and one for the meta translation. During sorting, only the chr translater is sorted which heavily reduces the amount of keys to reorder --- src/commands/closest.rs | 4 +- src/commands/coverage.rs | 4 +- src/commands/extend.rs | 8 ++-- src/commands/flank.rs | 8 ++-- src/commands/intersect/run.rs | 6 +-- src/commands/merge.rs | 6 +-- src/commands/sample.rs | 4 +- src/commands/shift.rs | 12 ++++-- src/commands/sort.rs | 17 ++++---- src/commands/subtract.rs | 4 +- src/commands/window.rs | 4 +- src/dispatch.rs | 33 +++++++++++++++- src/io/read/bed12.rs | 47 +++++++++++++++-------- src/io/read/bed3.rs | 23 ++++++----- src/io/read/bed4.rs | 29 ++++++++------ src/io/read/bed6.rs | 29 ++++++++------ src/io/read/bed_reader.rs | 32 +++++++-------- src/io/read/meta_interval.rs | 22 +++++------ src/io/write/utils.rs | 6 +-- src/types/depth.rs | 8 ++-- src/types/formats/in_formats.rs | 13 ++++++- src/types/mod.rs | 4 +- src/types/pairs.rs | 8 ++-- src/types/translate/mod.rs | 2 + src/types/translate/rename.rs | 17 ++++---- src/types/translate/reorder.rs | 14 ------- src/types/translate/retranslater.rs | 7 ++-- src/types/translate/split_retranslater.rs | 19 +++++++++ src/types/translate/split_translater.rs | 22 +++-------- 29 files changed, 244 insertions(+), 168 deletions(-) create mode 100644 src/types/translate/split_retranslater.rs diff --git a/src/commands/closest.rs b/src/commands/closest.rs index 3f88ab2..1947cc2 100644 --- a/src/commands/closest.rs +++ b/src/commands/closest.rs @@ -2,7 +2,7 @@ use crate::{ cli::{ClosestArgs, ClosestParams}, dispatch_pair, io::write_pairs_iter_with, - types::{InputFormat, IntervalPair, Rename, Renamer, Translater}, + types::{InputFormat, IntervalPair, Rename, Renamer, SplitTranslater}, utils::sort_pairs, }; use anyhow::Result; @@ -38,7 +38,7 @@ impl From for ClosestType { fn run_closest<'a, Ia, Ib, Na, Nb, W>( mut a_set: IntervalContainer, mut b_set: IntervalContainer, - translater: Option<&'a Translater>, + translater: Option<&'a SplitTranslater>, params: ClosestParams, output: W, ) -> Result<()> diff --git a/src/commands/coverage.rs b/src/commands/coverage.rs index bdad70f..2bcfa33 100644 --- a/src/commands/coverage.rs +++ b/src/commands/coverage.rs @@ -8,14 +8,14 @@ use crate::{ cli::{CoverageArgs, CoverageParams}, dispatch_pair, io::write_depth_iter_with, - types::{InputFormat, IntervalDepth, Rename, Renamer, Translater}, + types::{InputFormat, IntervalDepth, Rename, Renamer, SplitTranslater}, utils::sort_pairs, }; fn run_coverage<'a, Ia, Ib, Na, W>( mut set_a: IntervalContainer, mut set_b: IntervalContainer, - translater: Option<&'a Translater>, + translater: Option<&'a SplitTranslater>, params: CoverageParams, writer: W, ) -> Result<()> diff --git a/src/commands/extend.rs b/src/commands/extend.rs index 9bb42c4..86948a2 100644 --- a/src/commands/extend.rs +++ b/src/commands/extend.rs @@ -2,7 +2,7 @@ use crate::{ cli::{ExtendArgs, Growth}, dispatch_single, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{Genome, InputFormat, Translater}, + types::{Genome, InputFormat, SplitTranslater, TranslateGroup}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntervalContainer}; @@ -28,7 +28,7 @@ where fn extend_set( set: IntervalContainer, - translater: Option, + translater: Option<&SplitTranslater>, growth: Growth, output: W, ) -> Result<()> @@ -38,13 +38,13 @@ where WriteNamedIterImpl: WriteNamedIter, { growth.warn_args(); - let genome = growth.get_genome(translater.as_ref())?; + let genome = growth.get_genome(translater.map(|x| x.get_translater(TranslateGroup::Chr)))?; let extend_iter = set.into_iter().map(|mut iv| { let (left, right) = growth.get_values(&iv); extend_interval(&mut iv, left, right, genome.as_ref()); iv }); - write_records_iter_with(extend_iter, output, translater.as_ref()) + write_records_iter_with(extend_iter, output, translater) } pub fn extend(args: ExtendArgs) -> Result<()> { diff --git a/src/commands/flank.rs b/src/commands/flank.rs index 0edbf5a..660bc1d 100644 --- a/src/commands/flank.rs +++ b/src/commands/flank.rs @@ -2,7 +2,7 @@ use crate::{ cli::{FlankArgs, Growth}, dispatch_single, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{Genome, InputFormat, Translater}, + types::{Genome, InputFormat, SplitTranslater, TranslateGroup}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntervalContainer}; @@ -80,7 +80,7 @@ where /// Flank the intervals in the set fn flank_set( set: IntervalContainer, - translater: Option, + translater: Option<&SplitTranslater>, growth: Growth, output: W, ) -> Result<()> @@ -90,12 +90,12 @@ where WriteNamedIterImpl: WriteNamedIter, { growth.warn_args(); - let genome = growth.get_genome(translater.as_ref())?; + let genome = growth.get_genome(translater.map(|x| x.get_translater(TranslateGroup::Chr)))?; let flank_iter = set.iter().flat_map(|iv| { let (left, right) = growth.get_values(iv); flank_interval(*iv, left, right, genome.as_ref()) }); - write_records_iter_with(flank_iter, output, translater.as_ref()) + write_records_iter_with(flank_iter, output, translater) } pub fn flank(args: FlankArgs) -> Result<()> { diff --git a/src/commands/intersect/run.rs b/src/commands/intersect/run.rs index 650cec0..835df50 100644 --- a/src/commands/intersect/run.rs +++ b/src/commands/intersect/run.rs @@ -6,7 +6,7 @@ use crate::{ build_reader, write_named_records_iter_dashmap, write_records_iter_with, NamedIter, UnnamedIter, WriteNamedIter, WriteNamedIterImpl, }, - types::{InputFormat, NumericBed3, StreamTranslater, Translater}, + types::{InputFormat, NumericBed3, SplitTranslater, StreamTranslater}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntersectIter, IntervalContainer, MergeIter}; @@ -16,7 +16,7 @@ use std::io::Write; pub fn intersect_sets( set_a: IntervalContainer, set_b: IntervalContainer, - translater: Option<&Translater>, + translater: Option<&SplitTranslater>, params: IntersectParams, writer: W, ) -> Result<()> @@ -93,7 +93,7 @@ fn intersect_stream(args: IntersectArgs) -> Result<()> { let merged_target_iter = MergeIter::new(target_iter); let intersect_iter = IntersectIter::new_with_method(merged_query_iter, merged_target_iter, method); - write_records_iter_with(intersect_iter, writer, None::<&Translater>)?; + write_records_iter_with(intersect_iter, writer, None::<&SplitTranslater>)?; } Ok(()) } diff --git a/src/commands/merge.rs b/src/commands/merge.rs index 5737f4d..4af9e40 100644 --- a/src/commands/merge.rs +++ b/src/commands/merge.rs @@ -5,7 +5,7 @@ use crate::{ build_reader, iter_unnamed, write_3col_iter_with, write_records_iter, BedReader, WriteNamedIter, WriteNamedIterImpl, }, - types::{InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, Translater}, + types::{InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, SplitTranslater}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntervalContainer, MergeIter}; @@ -14,7 +14,7 @@ use std::io::Write; fn merge_in_memory( mut set: IntervalContainer, - translater: Option, + translater: Option<&SplitTranslater>, params: MergeParams, writer: W, ) -> Result<()> @@ -29,7 +29,7 @@ where set.set_sorted(); } let merged = set.merge()?; - write_3col_iter_with(merged.into_iter(), writer, translater.as_ref())?; + write_3col_iter_with(merged.into_iter(), writer, translater)?; Ok(()) } diff --git a/src/commands/sample.rs b/src/commands/sample.rs index 3ca0166..182311a 100644 --- a/src/commands/sample.rs +++ b/src/commands/sample.rs @@ -1,7 +1,7 @@ use crate::{ cli::{SampleArgs, SampleParams}, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{InputFormat, Translater}, + types::{InputFormat, SplitTranslater}, }; use anyhow::{bail, Result}; use bedrs::{traits::IntervalBounds, IntervalContainer}; @@ -10,7 +10,7 @@ use std::io::Write; fn sample_from_set( set: &mut IntervalContainer, - translater: Option<&Translater>, + translater: Option<&SplitTranslater>, params: SampleParams, writer: W, ) -> Result<()> diff --git a/src/commands/shift.rs b/src/commands/shift.rs index 9d6fffb..be7e3e6 100644 --- a/src/commands/shift.rs +++ b/src/commands/shift.rs @@ -2,7 +2,7 @@ use crate::{ cli::{ShiftArgs, ShiftParams}, dispatch_single, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{Genome, InputFormat, Translater}, + types::{Genome, InputFormat, SplitTranslater, TranslateGroup}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntervalContainer}; @@ -55,7 +55,7 @@ where fn shift_set( set: IntervalContainer, - translater: Option, + translater: Option<&SplitTranslater>, params: ShiftParams, output: W, ) -> Result<()> @@ -65,11 +65,15 @@ where WriteNamedIterImpl: WriteNamedIter, { params.warn_args(); - let genome = Genome::from_opt_path_immutable_with(params.genome, translater.as_ref(), false)?; + let genome = Genome::from_opt_path_immutable_with( + params.genome, + translater.map(|x| x.get_translater(TranslateGroup::Chr)), + false, + )?; let shift_iter = set .into_iter() .map(|iv| shift_interval(iv, params.amount, params.percent, genome.as_ref())); - write_records_iter_with(shift_iter, output, translater.as_ref()) + write_records_iter_with(shift_iter, output, translater) } pub fn shift(args: ShiftArgs) -> Result<()> { diff --git a/src/commands/sort.rs b/src/commands/sort.rs index 40984dd..90745db 100644 --- a/src/commands/sort.rs +++ b/src/commands/sort.rs @@ -1,8 +1,8 @@ use crate::{ cli::{SortArgs, SortParams}, - dispatch_single, + dispatch_single_owned_tl, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{InputFormat, Reorder, Retranslater, Translater}, + types::{InputFormat, Reorder, SplitRetranslater, SplitTranslater}, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntervalContainer}; @@ -11,15 +11,16 @@ use std::io::Write; fn sort_set( set: &mut IntervalContainer, - translater: Option, + translater: Option, parallel: bool, -) -> Option +) -> Option where I: IntervalBounds + Reorder, { let translater = if let Some(translater) = translater { - let retranslater = I::reorder_translater(set, translater); - Some(retranslater) + let (chr_tl, meta_tl) = translater.disband(); + let retranslater = I::reorder_translater(set, chr_tl); + Some(SplitRetranslater::new(retranslater, meta_tl)) } else { None }; @@ -33,7 +34,7 @@ where fn sort_and_write( mut set: IntervalContainer, - translater: Option, + translater: Option, params: SortParams, writer: W, ) -> Result<()> @@ -50,5 +51,5 @@ where pub fn sort(args: SortArgs) -> Result<()> { let reader = args.input.get_reader()?; let writer = args.output.get_writer()?; - dispatch_single!(reader, writer, args.params, sort_and_write) + dispatch_single_owned_tl!(reader, writer, args.params, sort_and_write) } diff --git a/src/commands/subtract.rs b/src/commands/subtract.rs index c031265..2a42bad 100644 --- a/src/commands/subtract.rs +++ b/src/commands/subtract.rs @@ -2,7 +2,7 @@ use crate::{ cli::{SubtractArgs, SubtractParams}, dispatch_pair, io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{InputFormat, Translater}, + types::{InputFormat, SplitTranslater}, utils::sort_pairs, }; use anyhow::Result; @@ -64,7 +64,7 @@ where fn run_subtract( mut aset: IntervalContainer, mut bset: IntervalContainer, - translater: Option<&Translater>, + translater: Option<&SplitTranslater>, params: SubtractParams, writer: W, ) -> Result<()> diff --git a/src/commands/window.rs b/src/commands/window.rs index 90e701c..d87b58c 100644 --- a/src/commands/window.rs +++ b/src/commands/window.rs @@ -7,14 +7,14 @@ use crate::{ cli::{WindowArgs, WindowParams}, dispatch_pair, io::{write_pairs_iter_with, write_records_iter_with, WriteNamedIter, WriteNamedIterImpl}, - types::{InputFormat, IntervalPair, Rename, Renamer, Translater}, + types::{InputFormat, IntervalPair, Rename, Renamer, SplitTranslater}, utils::sort_pairs, }; fn windowed_set_overlaps<'a, Ia, Ib, Na, Nb, W>( mut set_a: IntervalContainer, mut set_b: IntervalContainer, - translater: Option<&'a Translater>, + translater: Option<&'a SplitTranslater>, params: WindowParams, output: W, ) -> Result<()> diff --git a/src/dispatch.rs b/src/dispatch.rs index 307fb38..4a77f49 100644 --- a/src/dispatch.rs +++ b/src/dispatch.rs @@ -2,6 +2,35 @@ /// a writer. #[macro_export] macro_rules! dispatch_single { + ($reader:expr, $writer:expr, $params:expr, $func:expr) => { + match $reader.input_format() { + InputFormat::Bed3 => { + let (set, translater) = $reader.bed3_set()?; + $func(set, translater.as_ref(), $params, $writer) + } + InputFormat::Bed4 => { + let (set, translater) = $reader.bed4_set()?; + $func(set, translater.as_ref(), $params, $writer) + } + InputFormat::Bed6 => { + let (set, translater) = $reader.bed6_set()?; + $func(set, translater.as_ref(), $params, $writer) + } + InputFormat::Bed12 => { + let (set, translater) = $reader.bed12_set()?; + $func(set, translater.as_ref(), $params, $writer) + } + InputFormat::Ambiguous => { + let (set, translater) = $reader.meta_interval_set()?; + $func(set, translater.as_ref(), $params, $writer) + } + } + }; +} +/// This is a macro to match the input format and dispatch to some function with some parameters and +/// a writer. +#[macro_export] +macro_rules! dispatch_single_owned_tl { ($reader:expr, $writer:expr, $params:expr, $func:expr) => { match $reader.input_format() { InputFormat::Bed3 => { @@ -33,7 +62,9 @@ macro_rules! dispatch_single { #[macro_export] macro_rules! dispatch_pair { ($reader_a:expr, $reader_b:expr, $writer:expr, $params:expr, $func:expr) => {{ - let mut translater = $reader_a.is_named().then_some(Translater::new()); + let mut translater = $reader_a + .is_named() + .then_some($crate::types::SplitTranslater::new()); $crate::dispatch_to_lhs!($reader_a, $reader_b, translater, $writer, $params, $func) }}; } diff --git a/src/io/read/bed12.rs b/src/io/read/bed12.rs index c1799eb..11d9e0e 100644 --- a/src/io/read/bed12.rs +++ b/src/io/read/bed12.rs @@ -1,11 +1,14 @@ use super::build_reader; -use crate::types::{Bed12Set, NamedBed12, NumericBed12, Translater}; +use crate::types::{Bed12Set, NamedBed12, NumericBed12, SplitTranslater, TranslateGroup}; use anyhow::{bail, Result}; use bedrs::{Coordinates, IntervalContainer}; use csv::ByteRecord; use std::io::Read; -pub fn read_bed12_set(reader: R, named: bool) -> Result<(Bed12Set, Option)> { +pub fn read_bed12_set( + reader: R, + named: bool, +) -> Result<(Bed12Set, Option)> { if named { let (set, translater) = read_bed12_set_named(reader)?; Ok((set, Some(translater))) @@ -17,7 +20,7 @@ pub fn read_bed12_set(reader: R, named: bool) -> Result<(Bed12Set, Opti pub fn read_bed12_set_with( reader: R, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { if let Some(translater) = translater { convert_bed12_set(reader, translater) @@ -43,9 +46,9 @@ fn read_bed12_set_unnamed(reader: R) -> Result { Ok(set) } -/// Reads a single file into a GenomicIntervalSet and a Translater -fn read_bed12_set_named(reader: R) -> Result<(Bed12Set, Translater)> { - let mut translater = Translater::new(); +/// Reads a single file into a GenomicIntervalSet and a SplitTranslater +fn read_bed12_set_named(reader: R) -> Result<(Bed12Set, SplitTranslater)> { + let mut translater = SplitTranslater::new(); let set = convert_bed12_set(reader, &mut translater)?; Ok((set, translater)) } @@ -55,23 +58,33 @@ fn read_bed12_set_named(reader: R) -> Result<(Bed12Set, Translater)> { /// It uses an externally initialized name map and index map to keep track of /// chromosome names and indices. This is useful for reading multiple files /// and keeping track of the same chromosome names and indices. -fn convert_bed12_set(reader: R, translater: &mut Translater) -> Result { +fn convert_bed12_set(reader: R, translater: &mut SplitTranslater) -> Result { let mut reader = build_reader(reader); let mut raw_record = ByteRecord::new(); let mut set = Bed12Set::empty(); while reader.read_byte_record(&mut raw_record)? { let record: NamedBed12 = raw_record.deserialize(None)?; - translater.add_name(record.chr()); - translater.add_name(record.name()); - translater.add_name(record.item_rgb()); - translater.add_name(record.block_sizes()); - translater.add_name(record.block_starts()); + translater.add_name(record.chr(), TranslateGroup::Chr); + translater.add_name(record.name(), TranslateGroup::Meta); + translater.add_name(record.item_rgb(), TranslateGroup::Meta); + translater.add_name(record.block_sizes(), TranslateGroup::Meta); + translater.add_name(record.block_starts(), TranslateGroup::Meta); - let chr_int = translater.get_idx(record.chr()).unwrap(); - let name_int = translater.get_idx(record.name()).unwrap(); - let item_rgb_int = translater.get_idx(record.item_rgb()).unwrap(); - let block_sizes_int = translater.get_idx(record.block_sizes()).unwrap(); - let block_starts_int = translater.get_idx(record.block_starts()).unwrap(); + let chr_int = translater + .get_idx(record.chr(), TranslateGroup::Chr) + .unwrap(); + let name_int = translater + .get_idx(record.name(), TranslateGroup::Meta) + .unwrap(); + let item_rgb_int = translater + .get_idx(record.item_rgb(), TranslateGroup::Meta) + .unwrap(); + let block_sizes_int = translater + .get_idx(record.block_sizes(), TranslateGroup::Meta) + .unwrap(); + let block_starts_int = translater + .get_idx(record.block_starts(), TranslateGroup::Meta) + .unwrap(); let interval = NumericBed12::new( chr_int, record.start(), diff --git a/src/io/read/bed3.rs b/src/io/read/bed3.rs index da1b26f..8bb2035 100644 --- a/src/io/read/bed3.rs +++ b/src/io/read/bed3.rs @@ -1,14 +1,17 @@ use super::build_reader; use crate::{ io::NamedInterval, - types::{Bed3Set, NumericBed3, Translater}, + types::{Bed3Set, NumericBed3, SplitTranslater, TranslateGroup}, }; use anyhow::{bail, Result}; use bedrs::IntervalContainer; use csv::ByteRecord; use std::io::Read; -pub fn read_bed3_set(reader: R, named: bool) -> Result<(Bed3Set, Option)> { +pub fn read_bed3_set( + reader: R, + named: bool, +) -> Result<(Bed3Set, Option)> { if named { let (set, idx_map) = read_bed3_set_named(reader)?; Ok((set, Some(idx_map))) @@ -20,7 +23,7 @@ pub fn read_bed3_set(reader: R, named: bool) -> Result<(Bed3Set, Option pub fn read_bed3_set_with( reader: R, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { if let Some(translater) = translater { convert_bed3_set(reader, translater) @@ -46,9 +49,9 @@ fn read_bed3_set_unnamed(reader: R) -> Result { Ok(set) } -/// Reads a single file into a GenomicIntervalSet and a Translater -fn read_bed3_set_named(reader: R) -> Result<(Bed3Set, Translater)> { - let mut translater = Translater::new(); +/// Reads a single file into a GenomicIntervalSet and a SplitTranslater +fn read_bed3_set_named(reader: R) -> Result<(Bed3Set, SplitTranslater)> { + let mut translater = SplitTranslater::new(); let set = convert_bed3_set(reader, &mut translater)?; Ok((set, translater)) } @@ -58,14 +61,16 @@ fn read_bed3_set_named(reader: R) -> Result<(Bed3Set, Translater)> { /// It uses an externally initialized name map and index map to keep track of /// chromosome names and indices. This is useful for reading multiple files /// and keeping track of the same chromosome names and indices. -fn convert_bed3_set(reader: R, translater: &mut Translater) -> Result { +fn convert_bed3_set(reader: R, translater: &mut SplitTranslater) -> Result { let mut reader = build_reader(reader); let mut raw_record = ByteRecord::new(); let mut set = IntervalContainer::empty(); while reader.read_byte_record(&mut raw_record)? { let record: NamedInterval = raw_record.deserialize(None)?; - translater.add_name(record.name); - let chr_int = translater.get_idx(record.name).unwrap(); + translater.add_name(record.name, TranslateGroup::Chr); + let chr_int = translater + .get_idx(record.name, TranslateGroup::Chr) + .unwrap(); let interval = NumericBed3::new(chr_int, record.start, record.end); set.insert(interval); } diff --git a/src/io/read/bed4.rs b/src/io/read/bed4.rs index 9bcc98c..319f3e1 100644 --- a/src/io/read/bed4.rs +++ b/src/io/read/bed4.rs @@ -1,11 +1,14 @@ use super::build_reader; -use crate::types::{Bed4Set, NamedBed4, NumericBed4, Translater}; +use crate::types::{Bed4Set, NamedBed4, NumericBed4, SplitTranslater, TranslateGroup}; use anyhow::{bail, Result}; use bedrs::Coordinates; use csv::ByteRecord; use std::io::Read; -pub fn read_bed4_set(reader: R, named: bool) -> Result<(Bed4Set, Option)> { +pub fn read_bed4_set( + reader: R, + named: bool, +) -> Result<(Bed4Set, Option)> { if named { let (set, translater) = read_bed4_set_named(reader)?; Ok((set, Some(translater))) @@ -17,7 +20,7 @@ pub fn read_bed4_set(reader: R, named: bool) -> Result<(Bed4Set, Option pub fn read_bed4_set_with( reader: R, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { if let Some(translater) = translater { convert_bed4_set(reader, translater) @@ -43,9 +46,9 @@ fn read_bed4_set_unnamed(reader: R) -> Result { Ok(set) } -/// Reads a single file into a GenomicIntervalSet and a Translater -fn read_bed4_set_named(reader: R) -> Result<(Bed4Set, Translater)> { - let mut translater = Translater::new(); +/// Reads a single file into a GenomicIntervalSet and a SplitTranslater +fn read_bed4_set_named(reader: R) -> Result<(Bed4Set, SplitTranslater)> { + let mut translater = SplitTranslater::new(); let set = convert_bed4_set(reader, &mut translater)?; Ok((set, translater)) } @@ -55,16 +58,20 @@ fn read_bed4_set_named(reader: R) -> Result<(Bed4Set, Translater)> { /// It uses an externally initialized name map and index map to keep track of /// chromosome names and indices. This is useful for reading multiple files /// and keeping track of the same chromosome names and indices. -fn convert_bed4_set(reader: R, translater: &mut Translater) -> Result { +fn convert_bed4_set(reader: R, translater: &mut SplitTranslater) -> Result { let mut reader = build_reader(reader); let mut raw_record = ByteRecord::new(); let mut set = Bed4Set::empty(); while reader.read_byte_record(&mut raw_record)? { let record: NamedBed4 = raw_record.deserialize(None)?; - translater.add_name(record.chr()); - translater.add_name(record.name()); - let chr_int = translater.get_idx(record.chr()).unwrap(); - let name_int = translater.get_idx(record.name()).unwrap(); + translater.add_name(record.chr(), TranslateGroup::Chr); + translater.add_name(record.name(), TranslateGroup::Meta); + let chr_int = translater + .get_idx(record.chr(), TranslateGroup::Chr) + .unwrap(); + let name_int = translater + .get_idx(record.name(), TranslateGroup::Meta) + .unwrap(); let interval = NumericBed4::new(chr_int, record.start(), record.end(), name_int); set.insert(interval); } diff --git a/src/io/read/bed6.rs b/src/io/read/bed6.rs index 04d6a2f..caebae9 100644 --- a/src/io/read/bed6.rs +++ b/src/io/read/bed6.rs @@ -1,11 +1,14 @@ use super::build_reader; -use crate::types::{Bed6Set, NamedBed6, NumericBed6, Translater}; +use crate::types::{Bed6Set, NamedBed6, NumericBed6, SplitTranslater, TranslateGroup}; use anyhow::{bail, Result}; use bedrs::Coordinates; use csv::ByteRecord; use std::io::Read; -pub fn read_bed6_set(reader: R, named: bool) -> Result<(Bed6Set, Option)> { +pub fn read_bed6_set( + reader: R, + named: bool, +) -> Result<(Bed6Set, Option)> { if named { let (set, translater) = read_bed6_set_named(reader)?; Ok((set, Some(translater))) @@ -17,7 +20,7 @@ pub fn read_bed6_set(reader: R, named: bool) -> Result<(Bed6Set, Option pub fn read_bed6_set_with( reader: R, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { if let Some(translater) = translater { convert_bed6_set(reader, translater) @@ -43,9 +46,9 @@ fn read_bed6_set_unnamed(reader: R) -> Result { Ok(set) } -/// Reads a single file into a GenomicIntervalSet and a Translater -fn read_bed6_set_named(reader: R) -> Result<(Bed6Set, Translater)> { - let mut translater = Translater::new(); +/// Reads a single file into a GenomicIntervalSet and a SplitTranslater +fn read_bed6_set_named(reader: R) -> Result<(Bed6Set, SplitTranslater)> { + let mut translater = SplitTranslater::new(); let set = convert_bed6_set(reader, &mut translater)?; Ok((set, translater)) } @@ -55,16 +58,20 @@ fn read_bed6_set_named(reader: R) -> Result<(Bed6Set, Translater)> { /// It uses an externally initialized name map and index map to keep track of /// chromosome names and indices. This is useful for reading multiple files /// and keeping track of the same chromosome names and indices. -fn convert_bed6_set(reader: R, translater: &mut Translater) -> Result { +fn convert_bed6_set(reader: R, translater: &mut SplitTranslater) -> Result { let mut reader = build_reader(reader); let mut raw_record = ByteRecord::new(); let mut set = Bed6Set::empty(); while reader.read_byte_record(&mut raw_record)? { let record: NamedBed6 = raw_record.deserialize(None)?; - translater.add_name(record.chr()); - translater.add_name(record.name()); - let chr_int = translater.get_idx(record.chr()).unwrap(); - let name_int = translater.get_idx(record.name()).unwrap(); + translater.add_name(record.chr(), TranslateGroup::Chr); + translater.add_name(record.name(), TranslateGroup::Meta); + let chr_int = translater + .get_idx(record.chr(), TranslateGroup::Chr) + .unwrap(); + let name_int = translater + .get_idx(record.name(), TranslateGroup::Meta) + .unwrap(); let interval = NumericBed6::new( chr_int, record.start(), diff --git a/src/io/read/bed_reader.rs b/src/io/read/bed_reader.rs index ec7f9b3..08dd570 100644 --- a/src/io/read/bed_reader.rs +++ b/src/io/read/bed_reader.rs @@ -4,7 +4,7 @@ use super::{ read_meta_interval_set_with, }; use crate::types::{ - Bed12Set, Bed3Set, Bed4Set, Bed6Set, FieldFormat, InputFormat, MetaIntervalSet, Translater, + Bed12Set, Bed3Set, Bed4Set, Bed6Set, FieldFormat, InputFormat, MetaIntervalSet, SplitTranslater, }; use anyhow::Result; use flate2::read::MultiGzDecoder; @@ -102,60 +102,60 @@ impl BedReader { )) } - /// Returns a Bed3Set from the reader with an Option - pub fn bed3_set(self) -> Result<(Bed3Set, Option)> { + /// Returns a Bed3Set from the reader with an Option + pub fn bed3_set(self) -> Result<(Bed3Set, Option)> { let is_named = self.is_named(); read_bed3_set(self.reader(), is_named) } - /// Returns a Bed4Set from the reader with an Option - pub fn bed4_set(self) -> Result<(Bed4Set, Option)> { + /// Returns a Bed4Set from the reader with an Option + pub fn bed4_set(self) -> Result<(Bed4Set, Option)> { let is_named = self.is_named(); read_bed4_set(self.reader(), is_named) } - /// Returns a Bed6Set from the reader with an Option - pub fn bed6_set(self) -> Result<(Bed6Set, Option)> { + /// Returns a Bed6Set from the reader with an Option + pub fn bed6_set(self) -> Result<(Bed6Set, Option)> { let is_named = self.is_named(); read_bed6_set(self.reader(), is_named) } - /// Returns a Bed6Set from the reader with an Option - pub fn bed12_set(self) -> Result<(Bed12Set, Option)> { + /// Returns a Bed6Set from the reader with an Option + pub fn bed12_set(self) -> Result<(Bed12Set, Option)> { let is_named = self.is_named(); read_bed12_set(self.reader(), is_named) } - /// Returns a MetaIntervalSet from the reader with an Option - pub fn meta_interval_set(self) -> Result<(MetaIntervalSet, Option)> { + /// Returns a MetaIntervalSet from the reader with an Option + pub fn meta_interval_set(self) -> Result<(MetaIntervalSet, Option)> { let is_named = self.is_named(); read_meta_interval_set(self.reader(), is_named) } /// Returns a Bed3Set from the reader - pub fn bed3_set_with(self, translater: Option<&mut Translater>) -> Result { + pub fn bed3_set_with(self, translater: Option<&mut SplitTranslater>) -> Result { read_bed3_set_with(self.reader(), translater) } /// Returns a Bed4Set from the reader - pub fn bed4_set_with(self, translater: Option<&mut Translater>) -> Result { + pub fn bed4_set_with(self, translater: Option<&mut SplitTranslater>) -> Result { read_bed4_set_with(self.reader(), translater) } /// Returns a Bed6Set from the reader - pub fn bed6_set_with(self, translater: Option<&mut Translater>) -> Result { + pub fn bed6_set_with(self, translater: Option<&mut SplitTranslater>) -> Result { read_bed6_set_with(self.reader(), translater) } /// Returns a Bed6Set from the reader - pub fn bed12_set_with(self, translater: Option<&mut Translater>) -> Result { + pub fn bed12_set_with(self, translater: Option<&mut SplitTranslater>) -> Result { read_bed12_set_with(self.reader(), translater) } /// Returns a MetaIntervalSet from the reader pub fn meta_interval_set_with( self, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { read_meta_interval_set_with(self.reader(), translater) } diff --git a/src/io/read/meta_interval.rs b/src/io/read/meta_interval.rs index d2f1bd7..81abbce 100644 --- a/src/io/read/meta_interval.rs +++ b/src/io/read/meta_interval.rs @@ -1,5 +1,5 @@ use super::build_reader; -use crate::types::{MetaIntervalSet, NumericMetaInterval, Translater}; +use crate::types::{MetaIntervalSet, NumericMetaInterval, SplitTranslater, TranslateGroup}; use anyhow::{bail, Result}; use csv::ByteRecord; use std::{io::Read, str::from_utf8}; @@ -7,7 +7,7 @@ use std::{io::Read, str::from_utf8}; pub fn read_meta_interval_set( reader: R, named: bool, -) -> Result<(MetaIntervalSet, Option)> { +) -> Result<(MetaIntervalSet, Option)> { if named { let (set, translater) = read_meta_interval_set_named(reader)?; Ok((set, Some(translater))) @@ -19,7 +19,7 @@ pub fn read_meta_interval_set( pub fn read_meta_interval_set_with( reader: R, - translater: Option<&mut Translater>, + translater: Option<&mut SplitTranslater>, ) -> Result { if let Some(translater) = translater { convert_meta_interval_set(reader, translater) @@ -45,9 +45,9 @@ fn read_meta_interval_set_unnamed(reader: R) -> Result Ok(set) } -/// Reads a single file into a GenomicIntervalSet and a Translater -fn read_meta_interval_set_named(reader: R) -> Result<(MetaIntervalSet, Translater)> { - let mut translater = Translater::new(); +/// Reads a single file into a GenomicIntervalSet and a SplitTranslater +fn read_meta_interval_set_named(reader: R) -> Result<(MetaIntervalSet, SplitTranslater)> { + let mut translater = SplitTranslater::new(); let set = convert_meta_interval_set(reader, &mut translater)?; Ok((set, translater)) } @@ -59,7 +59,7 @@ fn read_meta_interval_set_named(reader: R) -> Result<(MetaIntervalSet, /// and keeping track of the same chromosome names and indices. fn convert_meta_interval_set( reader: R, - translater: &mut Translater, + translater: &mut SplitTranslater, ) -> Result { let mut reader = build_reader(reader); let mut raw_record = ByteRecord::new(); @@ -94,10 +94,10 @@ fn convert_meta_interval_set( } // Add the chromosome and metadata to the translater - translater.add_name(chr); - translater.add_name(&buffer); - let chr_int = translater.get_idx(chr).unwrap(); - let name_int = translater.get_idx(&buffer).unwrap(); + translater.add_name(chr, TranslateGroup::Chr); + translater.add_name(&buffer, TranslateGroup::Meta); + let chr_int = translater.get_idx(chr, TranslateGroup::Chr).unwrap(); + let name_int = translater.get_idx(&buffer, TranslateGroup::Meta).unwrap(); // Create the interval and add it to the set let interval = NumericMetaInterval::new(chr_int, start, end, name_int); diff --git a/src/io/write/utils.rs b/src/io/write/utils.rs index bcb2f93..b85b4dd 100644 --- a/src/io/write/utils.rs +++ b/src/io/write/utils.rs @@ -1,5 +1,5 @@ use crate::types::{ - IntervalDepth, IntervalPair, NumericBed3, Rename, Renamer, StreamTranslater, Translater, + IntervalDepth, IntervalPair, NumericBed3, Rename, Renamer, SplitTranslater, StreamTranslater, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, Coordinates}; @@ -32,7 +32,7 @@ where pub fn write_depth_iter_with<'a, W, I, N, It>( records: It, writer: W, - translater: Option<&Translater>, + translater: Option<&SplitTranslater>, ) -> Result<()> where I: IntervalBounds + Serialize, @@ -83,7 +83,7 @@ where pub fn write_pairs_iter_with<'a, W, Ia, Ib, Na, Nb, It>( records: It, writer: W, - translater: Option<&Translater>, + translater: Option<&SplitTranslater>, ) -> Result<()> where Ia: IntervalBounds + Serialize, diff --git a/src/types/depth.rs b/src/types/depth.rs index cc0fda8..bacc1a7 100644 --- a/src/types/depth.rs +++ b/src/types/depth.rs @@ -1,4 +1,4 @@ -use super::{Rename, Renamer, Translater}; +use super::{Rename, Renamer, SplitTranslater}; use bedrs::traits::IntervalBounds; pub struct IntervalDepth<'a, I, N> @@ -8,7 +8,7 @@ where { pub iv: I, pub n_overlaps: usize, - pub translater: Option<&'a Translater>, + pub translater: Option<&'a SplitTranslater>, phantom: std::marker::PhantomData, } impl<'a, I, N> IntervalDepth<'a, I, N> @@ -17,7 +17,7 @@ where N: IntervalBounds<&'a str, usize>, Renamer: Rename<'a, I, N>, { - pub fn new(iv: I, n_overlaps: usize, translater: Option<&'a Translater>) -> Self { + pub fn new(iv: I, n_overlaps: usize, translater: Option<&'a SplitTranslater>) -> Self { Self { iv, n_overlaps, @@ -33,7 +33,7 @@ where let n = Renamer::rename_with(&self.iv, translater); (n, self.n_overlaps) } else { - panic!("Translater was not provided but get_named_tuple was called - there is a bug somewhere!") + panic!("SplitTranslater was not provided but get_named_tuple was called - there is a bug somewhere!") } } } diff --git a/src/types/formats/in_formats.rs b/src/types/formats/in_formats.rs index 5e480a1..62f0b53 100644 --- a/src/types/formats/in_formats.rs +++ b/src/types/formats/in_formats.rs @@ -127,6 +127,15 @@ mod testing { assert_eq!(input_format, InputFormat::Bed3); } + #[test] + fn input_format_bed4() { + let line = b"chr1\t1\t2\tname"; + let mut buffer = BufReader::new(line.as_slice()); + buffer.fill_buf().unwrap(); + let input_format = InputFormat::predict(&buffer).unwrap(); + assert_eq!(input_format, InputFormat::Bed4); + } + #[test] fn input_format_bed6() { let line = b"chr1\t1\t2\tname\t0\t+"; @@ -141,8 +150,8 @@ mod testing { let line = b"chr1\t1\t2\tname\t0\t+\textra"; let mut buffer = BufReader::new(line.as_slice()); buffer.fill_buf().unwrap(); - let input_format = InputFormat::predict(&buffer); - assert!(input_format.is_err()); + let input_format = InputFormat::predict(&buffer).unwrap(); + assert_eq!(input_format, InputFormat::Ambiguous); } #[test] diff --git a/src/types/mod.rs b/src/types/mod.rs index 5b98015..8814562 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -7,8 +7,8 @@ pub use depth::IntervalDepth; pub use formats::{FieldFormat, Genome, InputFormat}; pub use pairs::IntervalPair; pub use translate::{ - Rename, Renamer, Reorder, Retranslater, SplitTranslater, StreamTranslater, Translate, - Translater, + Rename, Renamer, Reorder, SplitRetranslater, SplitTranslater, StreamTranslater, Translate, + TranslateGroup, Translater, }; pub type NumericBed3 = Bed3; diff --git a/src/types/pairs.rs b/src/types/pairs.rs index 57d866f..2312286 100644 --- a/src/types/pairs.rs +++ b/src/types/pairs.rs @@ -1,4 +1,4 @@ -use super::{Rename, Renamer, Translater}; +use super::{Rename, Renamer, SplitTranslater}; use bedrs::traits::IntervalBounds; pub struct IntervalPair<'a, Ia, Ib, Na, Nb> @@ -11,7 +11,7 @@ where { pub iv_a: Ia, pub iv_b: Ib, - pub translater: Option<&'a Translater>, + pub translater: Option<&'a SplitTranslater>, phantom_a: std::marker::PhantomData, phantom_b: std::marker::PhantomData, } @@ -23,7 +23,7 @@ where Nb: IntervalBounds<&'a str, usize>, Renamer: Rename<'a, Ia, Na> + Rename<'a, Ib, Nb>, { - pub fn new(iv_a: Ia, iv_b: Ib, translater: Option<&'a Translater>) -> Self { + pub fn new(iv_a: Ia, iv_b: Ib, translater: Option<&'a SplitTranslater>) -> Self { Self { iv_a, iv_b, @@ -41,7 +41,7 @@ where let named_b = Renamer::rename_with(&self.iv_b, translater); (named_a, named_b) } else { - panic!("Translater was not provided but get_named_tuple was called - there is a bug somewhere!") + panic!("SplitTranslater was not provided but get_named_tuple was called - there is a bug somewhere!") } } } diff --git a/src/types/translate/mod.rs b/src/types/translate/mod.rs index eb9c8ce..eff0fdb 100644 --- a/src/types/translate/mod.rs +++ b/src/types/translate/mod.rs @@ -1,12 +1,14 @@ mod rename; mod reorder; mod retranslater; +mod split_retranslater; mod split_translater; mod stream_translater; mod translater; pub use rename::{Rename, Renamer}; pub use reorder::Reorder; pub use retranslater::Retranslater; +pub use split_retranslater::SplitRetranslater; pub use split_translater::SplitTranslater; pub use stream_translater::StreamTranslater; pub use translater::Translater; diff --git a/src/types/translate/rename.rs b/src/types/translate/rename.rs index a79d58d..bc91b00 100644 --- a/src/types/translate/rename.rs +++ b/src/types/translate/rename.rs @@ -1,4 +1,4 @@ -use super::{Translate, Translater}; +use super::{SplitTranslater, Translate}; use crate::types::{ NamedBed12, NamedBed3, NamedBed4, NamedBed6, NamedMetaInterval, NumericBed12, NumericBed3, NumericBed4, NumericBed6, NumericMetaInterval, @@ -11,23 +11,23 @@ where Ia: IntervalBounds, Ib: IntervalBounds<&'a str, usize>, { - fn rename_with(iv: &Ia, translater: &'a Translater) -> Ib; + fn rename_with(iv: &Ia, translater: &'a SplitTranslater) -> Ib; } impl<'a> Rename<'a, NumericBed3, NamedBed3<'a>> for Renamer { - fn rename_with(iv: &NumericBed3, translater: &'a Translater) -> NamedBed3<'a> { + fn rename_with(iv: &NumericBed3, translater: &'a SplitTranslater) -> NamedBed3<'a> { let chr = translater.get_chr_name(*iv.chr()).unwrap(); NamedBed3::new(chr, iv.start(), iv.end()) } } impl<'a> Rename<'a, NumericBed4, NamedBed4<'a>> for Renamer { - fn rename_with(iv: &NumericBed4, translater: &'a Translater) -> NamedBed4<'a> { + fn rename_with(iv: &NumericBed4, translater: &'a SplitTranslater) -> NamedBed4<'a> { let chr = translater.get_chr_name(*iv.chr()).unwrap(); let name = translater.get_meta_name(*iv.name()).unwrap(); NamedBed4::new(chr, iv.start(), iv.end(), name) } } impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { - fn rename_with(iv: &NumericBed6, translater: &'a Translater) -> NamedBed6<'a> { + fn rename_with(iv: &NumericBed6, translater: &'a SplitTranslater) -> NamedBed6<'a> { let chr = translater.get_chr_name(*iv.chr()).unwrap(); let name = translater.get_meta_name(*iv.name()).unwrap(); NamedBed6::new( @@ -41,7 +41,7 @@ impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { } } impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { - fn rename_with(iv: &NumericBed12, translater: &'a Translater) -> NamedBed12<'a> { + fn rename_with(iv: &NumericBed12, translater: &'a SplitTranslater) -> NamedBed12<'a> { let chr = translater.get_chr_name(*iv.chr()).unwrap(); let name = translater.get_meta_name(*iv.name()).unwrap(); let item_rgb = translater.get_meta_name(*iv.item_rgb()).unwrap(); @@ -64,7 +64,10 @@ impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { } } impl<'a> Rename<'a, NumericMetaInterval, NamedMetaInterval<'a>> for Renamer { - fn rename_with(iv: &NumericMetaInterval, translater: &'a Translater) -> NamedMetaInterval<'a> { + fn rename_with( + iv: &NumericMetaInterval, + translater: &'a SplitTranslater, + ) -> NamedMetaInterval<'a> { let chr = translater.get_chr_name(*iv.chr()).unwrap(); let meta = translater.get_chr_name(*iv.meta()).unwrap(); NamedMetaInterval::new(chr, iv.start(), iv.end(), meta) diff --git a/src/types/translate/reorder.rs b/src/types/translate/reorder.rs index ffb5067..c85252d 100644 --- a/src/types/translate/reorder.rs +++ b/src/types/translate/reorder.rs @@ -32,9 +32,7 @@ impl Reorder for NumericBed4 { let retranslate = translater.lex_sort(); set.apply_mut(|iv| { let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); iv.update_chr(&new_chr); - iv.update_name(&new_name); }); retranslate } @@ -47,9 +45,7 @@ impl Reorder for NumericBed6 { let retranslate = translater.lex_sort(); set.apply_mut(|iv| { let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); iv.update_chr(&new_chr); - iv.update_name(&new_name); }); retranslate } @@ -62,15 +58,7 @@ impl Reorder for NumericBed12 { let retranslate = translater.lex_sort(); set.apply_mut(|iv| { let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.name()).unwrap(); - let new_item_rgb = retranslate.get_rank(*iv.item_rgb()).unwrap(); - let new_block_sizes = retranslate.get_rank(*iv.block_sizes()).unwrap(); - let new_block_starts = retranslate.get_rank(*iv.block_starts()).unwrap(); iv.update_chr(&new_chr); - iv.update_name(&new_name); - iv.update_item_rgb(&new_item_rgb); - iv.update_block_sizes(&new_block_sizes); - iv.update_block_starts(&new_block_starts); }); retranslate } @@ -83,9 +71,7 @@ impl Reorder for NumericMetaInterval { let retranslate = translater.lex_sort(); set.apply_mut(|iv| { let new_chr = retranslate.get_rank(*iv.chr()).unwrap(); - let new_name = retranslate.get_rank(*iv.meta()).unwrap(); iv.update_chr(&new_chr); - iv.update_meta(&new_name); }); retranslate } diff --git a/src/types/translate/retranslater.rs b/src/types/translate/retranslater.rs index ab1d2bd..907bcf9 100644 --- a/src/types/translate/retranslater.rs +++ b/src/types/translate/retranslater.rs @@ -1,11 +1,10 @@ -use hashbrown::HashMap; - use super::Translate; +use hashbrown::HashMap; #[derive(Debug)] pub struct Retranslater { - idx_to_rank: HashMap, - rank_to_name: HashMap, + pub idx_to_rank: HashMap, + pub rank_to_name: HashMap, } impl Retranslater { pub fn new(idx_to_rank: HashMap, rank_to_name: HashMap) -> Self { diff --git a/src/types/translate/split_retranslater.rs b/src/types/translate/split_retranslater.rs new file mode 100644 index 0000000..a88d6c3 --- /dev/null +++ b/src/types/translate/split_retranslater.rs @@ -0,0 +1,19 @@ +use super::{Retranslater, Translate, Translater}; + +pub struct SplitRetranslater { + chr_tl: Retranslater, + meta_tl: Translater, +} +impl SplitRetranslater { + pub fn new(chr_tl: Retranslater, meta_tl: Translater) -> Self { + Self { chr_tl, meta_tl } + } +} +impl Translate for SplitRetranslater { + fn get_chr_name(&self, idx: usize) -> Option<&str> { + self.chr_tl.get_chr_name(idx) + } + fn get_meta_name(&self, idx: usize) -> Option<&str> { + self.meta_tl.get_meta_name(idx) + } +} diff --git a/src/types/translate/split_translater.rs b/src/types/translate/split_translater.rs index e8942b7..30870c3 100644 --- a/src/types/translate/split_translater.rs +++ b/src/types/translate/split_translater.rs @@ -1,5 +1,4 @@ -use super::{Retranslater, Translate, TranslateGroup, Translater}; -use hashbrown::HashMap; +use super::{Translate, TranslateGroup, Translater}; pub struct SplitTranslater { chr_tl: Translater, @@ -12,12 +11,6 @@ impl SplitTranslater { meta_tl: Translater::new(), } } - pub fn has_name(&self, name: &str, group: TranslateGroup) -> bool { - match group { - TranslateGroup::Chr => self.chr_tl.has_name(name), - TranslateGroup::Meta => self.meta_tl.has_name(name), - } - } pub fn add_name(&mut self, name: &str, group: TranslateGroup) { match group { TranslateGroup::Chr => self.chr_tl.add_name(name), @@ -30,17 +23,14 @@ impl SplitTranslater { TranslateGroup::Meta => self.meta_tl.get_idx(name), } } - pub fn get_name_to_idx(&self, group: TranslateGroup) -> &HashMap { + pub fn get_translater(&self, group: TranslateGroup) -> &Translater { match group { - TranslateGroup::Chr => self.chr_tl.get_name_to_idx(), - TranslateGroup::Meta => self.meta_tl.get_name_to_idx(), + TranslateGroup::Chr => &self.chr_tl, + TranslateGroup::Meta => &self.meta_tl, } } - pub fn lex_sort(self, group: TranslateGroup) -> Retranslater { - match group { - TranslateGroup::Chr => self.chr_tl.lex_sort(), - TranslateGroup::Meta => self.meta_tl.lex_sort(), - } + pub fn disband(self) -> (Translater, Translater) { + (self.chr_tl, self.meta_tl) } } impl Translate for SplitTranslater { From a22790bcd9fb7dd401e4ebdafd6eadec011e70cf Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:33:26 -0800 Subject: [PATCH 5/7] fix: bug where intersect was skipping sorting file pairs --- src/cli/intersect.rs | 4 ++++ src/commands/intersect/run.rs | 6 ++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/cli/intersect.rs b/src/cli/intersect.rs index af805a3..503c4f2 100644 --- a/src/cli/intersect.rs +++ b/src/cli/intersect.rs @@ -26,6 +26,10 @@ pub struct IntersectParams { /// (only works if both files are sorted) #[clap(short = 'S', long, conflicts_with_all = &["with_query", "with_target", "unique", "inverse"])] pub stream: bool, + + /// Assert the inputs are pre-sorted + #[clap(short, long)] + pub sorted: bool, } #[derive(Parser, Debug)] diff --git a/src/commands/intersect/run.rs b/src/commands/intersect/run.rs index 835df50..025502c 100644 --- a/src/commands/intersect/run.rs +++ b/src/commands/intersect/run.rs @@ -7,6 +7,7 @@ use crate::{ UnnamedIter, WriteNamedIter, WriteNamedIterImpl, }, types::{InputFormat, NumericBed3, SplitTranslater, StreamTranslater}, + utils::sort_pairs, }; use anyhow::Result; use bedrs::{traits::IntervalBounds, IntersectIter, IntervalContainer, MergeIter}; @@ -14,8 +15,8 @@ use serde::Serialize; use std::io::Write; pub fn intersect_sets( - set_a: IntervalContainer, - set_b: IntervalContainer, + mut set_a: IntervalContainer, + mut set_b: IntervalContainer, translater: Option<&SplitTranslater>, params: IntersectParams, writer: W, @@ -28,6 +29,7 @@ where { let query_method = params.overlap_predicates.into(); let output_method = params.output_predicates.try_into()?; + sort_pairs(&mut set_a, &mut set_b, params.sorted); match output_method { // Output the target intervals OutputMethod::Target => { From aa2bf03eb10fd8c91e0c90201d3dc6faf42614d6 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Thu, 8 Feb 2024 17:54:24 -0800 Subject: [PATCH 6/7] fix: force meta intervals to always be named because their metadata must always be interpreted as a string --- src/io/read/bed_reader.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/io/read/bed_reader.rs b/src/io/read/bed_reader.rs index 08dd570..ca2f1d5 100644 --- a/src/io/read/bed_reader.rs +++ b/src/io/read/bed_reader.rs @@ -128,8 +128,7 @@ impl BedReader { /// Returns a MetaIntervalSet from the reader with an Option pub fn meta_interval_set(self) -> Result<(MetaIntervalSet, Option)> { - let is_named = self.is_named(); - read_meta_interval_set(self.reader(), is_named) + read_meta_interval_set(self.reader(), true) // meta intervals are always named } /// Returns a Bed3Set from the reader From 39a036faded2afde49395e0fe20e64b7a78aabb8 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Fri, 9 Feb 2024 10:26:08 -0800 Subject: [PATCH 7/7] fix: update subtract tests to follow inheritance rules of scores. remove score types from generic --- src/commands/flank.rs | 34 +++++++++++++++++--- src/commands/random.rs | 6 ++-- src/commands/shift.rs | 17 ++++++++-- src/io/iter.rs | 2 +- src/io/read/bed12.rs | 2 +- src/io/read/bed6.rs | 2 +- src/types/mod.rs | 9 +++--- src/types/translate/rename.rs | 4 +-- tests/subtract.rs | 60 +++++++++++++++++++---------------- 9 files changed, 89 insertions(+), 47 deletions(-) diff --git a/src/commands/flank.rs b/src/commands/flank.rs index 660bc1d..cd57742 100644 --- a/src/commands/flank.rs +++ b/src/commands/flank.rs @@ -135,7 +135,7 @@ mod testing { #[test] fn test_flank_left_bed6() { - let iv = Bed6::new(1, 100, 400, 1, 2, Strand::default()); + let iv = Bed6::new(1, 100, 400, 1, 2.into(), Strand::default()); let left = left_flank(iv, 50).unwrap(); assert_eq!(left.start(), 50); assert_eq!(left.end(), 100); @@ -146,7 +146,20 @@ mod testing { #[test] fn test_flank_left_bed12() { - let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8); + let iv = Bed12::new( + 1, + 100, + 400, + 1, + 2.into(), + Strand::default(), + 3, + 4, + 5, + 6, + 7, + 8, + ); let left = left_flank(iv, 50).unwrap(); assert_eq!(left.start(), 50); assert_eq!(left.end(), 100); @@ -187,7 +200,7 @@ mod testing { #[test] fn test_flank_right_bed6() { - let iv = Bed6::new(1, 100, 400, 1, 2, Strand::default()); + let iv = Bed6::new(1, 100, 400, 1, 2.into(), Strand::default()); let right = right_flank(iv, 50, None).unwrap(); assert_eq!(right.start(), 400); assert_eq!(right.end(), 450); @@ -198,7 +211,20 @@ mod testing { #[test] fn test_flank_right_bed12() { - let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8); + let iv = Bed12::new( + 1, + 100, + 400, + 1, + 2.into(), + Strand::default(), + 3, + 4, + 5, + 6, + 7, + 8, + ); let right = right_flank(iv, 50, None).unwrap(); assert_eq!(right.start(), 400); assert_eq!(right.end(), 450); diff --git a/src/commands/random.rs b/src/commands/random.rs index c467181..731bbd0 100644 --- a/src/commands/random.rs +++ b/src/commands/random.rs @@ -4,7 +4,7 @@ use crate::{ types::{Genome, InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, Translater}, }; use anyhow::Result; -use bedrs::Strand; +use bedrs::{Score, Strand}; use rand::Rng; use std::io::Write; @@ -123,7 +123,7 @@ pub fn random_bed6(args: RandomArgs, writer: W) -> Result<()> { (c, x, y, s) }) // build the interval - .map(|(c, x, y, s)| NumericBed6::new(c, x, y, 0, 0.0, s)); + .map(|(c, x, y, s)| NumericBed6::new(c, x, y, 0, Score::Empty, s)); write_records_iter_with(interval_gen, writer, genome_sizes.translater())?; @@ -175,7 +175,7 @@ pub fn random_bed12(args: RandomArgs, writer: W) -> Result<()> { (c, x, y, t, u, s) }) // build the interval - .map(|(c, x, y, t, u, s)| NumericBed12::new(c, x, y, 0, 0.0, s, t, u, 0, 0, 0, 0)); + .map(|(c, x, y, t, u, s)| NumericBed12::new(c, x, y, 0, Score::Empty, s, t, u, 0, 0, 0, 0)); write_records_iter_with(interval_gen, writer, genome_sizes.translater())?; diff --git a/src/commands/shift.rs b/src/commands/shift.rs index be7e3e6..174636f 100644 --- a/src/commands/shift.rs +++ b/src/commands/shift.rs @@ -162,7 +162,7 @@ mod testing { #[test] fn test_shift_bed6() { - let iv = Bed6::new(1, 100, 200, 1, 2, Strand::default()); + let iv = Bed6::new(1, 100, 200, 1, 2.into(), Strand::default()); let si = shift_interval(iv, 50.0, false, None); assert_eq!(si.start(), 150); assert_eq!(si.end(), 250); @@ -173,7 +173,20 @@ mod testing { #[test] fn test_shift_bed12() { - let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8); + let iv = Bed12::new( + 1, + 100, + 400, + 1, + 2.into(), + Strand::default(), + 3, + 4, + 5, + 6, + 7, + 8, + ); let si = shift_interval(iv, 50.0, false, None); assert_eq!(si.start(), 150); assert_eq!(si.end(), 450); diff --git a/src/io/iter.rs b/src/io/iter.rs index 3264e5c..961a251 100644 --- a/src/io/iter.rs +++ b/src/io/iter.rs @@ -77,7 +77,7 @@ impl<'a, 'b, R: Read> Iterator for NamedIter<'a, 'b, R, NumericBed6> { record.start(), record.end(), *name_idx, - *record.score(), + record.score(), record.strand().unwrap_or_default(), ); Some(iv) diff --git a/src/io/read/bed12.rs b/src/io/read/bed12.rs index 11d9e0e..748883b 100644 --- a/src/io/read/bed12.rs +++ b/src/io/read/bed12.rs @@ -90,7 +90,7 @@ fn convert_bed12_set(reader: R, translater: &mut SplitTranslater) -> Re record.start(), record.end(), name_int, - *record.score(), + record.score(), record.strand().unwrap_or_default(), record.thick_start(), record.thick_end(), diff --git a/src/io/read/bed6.rs b/src/io/read/bed6.rs index caebae9..5f30ffc 100644 --- a/src/io/read/bed6.rs +++ b/src/io/read/bed6.rs @@ -77,7 +77,7 @@ fn convert_bed6_set(reader: R, translater: &mut SplitTranslater) -> Res record.start(), record.end(), name_int, - *record.score(), + record.score(), record.strand().unwrap_or_default(), ); set.insert(interval); diff --git a/src/types/mod.rs b/src/types/mod.rs index 8814562..f51d62d 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -19,13 +19,12 @@ pub type NumericBed4 = Bed4; pub type NamedBed4<'a> = Bed4<&'a str, usize, &'a str>; pub type Bed4Set = IntervalContainer; -pub type NumericBed6 = Bed6; -pub type NamedBed6<'a> = Bed6<&'a str, usize, &'a str, f64>; +pub type NumericBed6 = Bed6; +pub type NamedBed6<'a> = Bed6<&'a str, usize, &'a str>; pub type Bed6Set = IntervalContainer; -pub type NumericBed12 = Bed12; -pub type NamedBed12<'a> = - Bed12<&'a str, usize, &'a str, f64, usize, usize, &'a str, &'a str, &'a str>; +pub type NumericBed12 = Bed12; +pub type NamedBed12<'a> = Bed12<&'a str, usize, &'a str, usize, usize, &'a str, &'a str, &'a str>; pub type Bed12Set = IntervalContainer; pub type NumericMetaInterval = MetaInterval; diff --git a/src/types/translate/rename.rs b/src/types/translate/rename.rs index bc91b00..ec0fc1c 100644 --- a/src/types/translate/rename.rs +++ b/src/types/translate/rename.rs @@ -35,7 +35,7 @@ impl<'a> Rename<'a, NumericBed6, NamedBed6<'a>> for Renamer { iv.start(), iv.end(), name, - *iv.score(), + iv.score(), iv.strand().unwrap_or_default(), ) } @@ -52,7 +52,7 @@ impl<'a> Rename<'a, NumericBed12, NamedBed12<'a>> for Renamer { iv.start(), iv.end(), name, - *iv.score(), + iv.score(), iv.strand().unwrap_or_default(), iv.thick_start(), iv.thick_end(), diff --git a/tests/subtract.rs b/tests/subtract.rs index 0499120..40cc7a1 100644 --- a/tests/subtract.rs +++ b/tests/subtract.rs @@ -23,7 +23,7 @@ mod testing { .iter() .map(|(chr, start, end, name, score, strand)| { format!( - "{}\t{}\t{}\t{}\t{:.1}\t{}\n", + "{}\t{}\t{}\t{}\t{:.3}\t{}\n", chr, start, end, name, score, strand ) }) @@ -52,7 +52,7 @@ mod testing { block_starts, )| { format!( - "{}\t{}\t{}\t{}\t{:.1}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n", + "{}\t{}\t{}\t{}\t{:.3}\t{}\t{}\t{}\t{}\t{}\t{}\t{}\n", chr, start, end, @@ -114,12 +114,12 @@ mod testing { .output()?; let expected = vec![ - (1, 100, 120, 0, 0.0, '+'), - (1, 125, 150, 0, 0.0, '+'), - (1, 160, 300, 0, 0.0, '+'), - (1, 400, 460, 0, 0.0, '+'), - (1, 470, 475, 0, 0.0, '+'), - (1, 500, 550, 0, 0.0, '+'), + (1, 100, 120, 0, '.', '+'), + (1, 125, 150, 0, '.', '+'), + (1, 160, 300, 0, '.', '+'), + (1, 400, 460, 0, '.', '+'), + (1, 470, 475, 0, '.', '+'), + (1, 500, 550, 0, '.', '+'), ]; let expected_str = build_expected_str_bed6(&expected); assert_eq!(output.stdout, expected_str.as_bytes()); @@ -141,12 +141,12 @@ mod testing { .output()?; let expected = vec![ - (1, 100, 120, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 125, 150, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 160, 300, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 400, 460, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 470, 475, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 500, 550, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), + (1, 100, 120, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 125, 150, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 160, 300, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 400, 460, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 470, 475, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 500, 550, 0, ".", '+', 0, 0, 0, 0, 0, 0), ]; let expected_str = build_expected_str_bed12(&expected); assert_eq!(output.stdout, expected_str.as_bytes()); @@ -198,13 +198,13 @@ mod testing { .output()?; let expected = vec![ - (1, 100, 120, 0, 0.0, '+'), - (1, 125, 150, 0, 0.0, '+'), - (1, 160, 200, 0, 0.0, '+'), - (1, 200, 300, 0, 0.0, '+'), - (1, 400, 460, 0, 0.0, '+'), - (1, 470, 475, 0, 0.0, '+'), - (1, 500, 550, 0, 0.0, '+'), + (1, 100, 120, 0, ".", '+'), + (1, 125, 150, 0, ".", '+'), + (1, 160, 200, 0, ".", '+'), + (1, 200, 300, 0, "0.0", '+'), + (1, 400, 460, 0, ".", '+'), + (1, 470, 475, 0, ".", '+'), + (1, 500, 550, 0, "0.0", '+'), ]; let expected_str = build_expected_str_bed6(&expected); assert_eq!(output.stdout, expected_str.as_bytes()); @@ -227,15 +227,19 @@ mod testing { .output()?; let expected = vec![ - (1, 100, 120, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 125, 150, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 160, 200, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 200, 300, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 400, 460, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 470, 475, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), - (1, 500, 550, 0, 0.0, '+', 0, 0, 0, 0, 0, 0), + (1, 100, 120, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 125, 150, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 160, 200, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 200, 300, 0, "0.0", '+', 0, 0, 0, 0, 0, 0), + (1, 400, 460, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 470, 475, 0, ".", '+', 0, 0, 0, 0, 0, 0), + (1, 500, 550, 0, "0.0", '+', 0, 0, 0, 0, 0, 0), ]; let expected_str = build_expected_str_bed12(&expected); + + println!("{}", std::str::from_utf8(&output.stdout).unwrap()); + println!("{}", expected_str); + assert_eq!(output.stdout, expected_str.as_bytes()); Ok(()) }