Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

85 add a catch all meta interval for formats that cant be automatically determined #86

4 changes: 4 additions & 0 deletions src/cli/intersect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ pub struct IntersectParams {
/// (only works if both files are sorted)
#[clap(short = 'S', long, conflicts_with_all = &["with_query", "with_target", "unique", "inverse"])]
pub stream: bool,

/// Assert the inputs are pre-sorted
#[clap(short, long)]
pub sorted: bool,
}

#[derive(Parser, Debug)]
Expand Down
4 changes: 2 additions & 2 deletions src/commands/closest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
cli::{ClosestArgs, ClosestParams},
dispatch_pair,
io::write_pairs_iter_with,
types::{InputFormat, IntervalPair, Rename, Renamer, Translater},
types::{InputFormat, IntervalPair, Rename, Renamer, SplitTranslater},
utils::sort_pairs,
};
use anyhow::Result;
Expand Down Expand Up @@ -38,7 +38,7 @@ impl From<ClosestParams> for ClosestType {
fn run_closest<'a, Ia, Ib, Na, Nb, W>(
mut a_set: IntervalContainer<Ia, usize, usize>,
mut b_set: IntervalContainer<Ib, usize, usize>,
translater: Option<&'a Translater>,
translater: Option<&'a SplitTranslater>,
params: ClosestParams,
output: W,
) -> Result<()>
Expand Down
4 changes: 2 additions & 2 deletions src/commands/coverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@ use crate::{
cli::{CoverageArgs, CoverageParams},
dispatch_pair,
io::write_depth_iter_with,
types::{InputFormat, IntervalDepth, Rename, Renamer, Translater},
types::{InputFormat, IntervalDepth, Rename, Renamer, SplitTranslater},
utils::sort_pairs,
};

fn run_coverage<'a, Ia, Ib, Na, W>(
mut set_a: IntervalContainer<Ia, usize, usize>,
mut set_b: IntervalContainer<Ib, usize, usize>,
translater: Option<&'a Translater>,
translater: Option<&'a SplitTranslater>,
params: CoverageParams,
writer: W,
) -> Result<()>
Expand Down
8 changes: 4 additions & 4 deletions src/commands/extend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
cli::{ExtendArgs, Growth},
dispatch_single,
io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl},
types::{Genome, InputFormat, Translater},
types::{Genome, InputFormat, SplitTranslater, TranslateGroup},
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, IntervalContainer};
Expand All @@ -28,7 +28,7 @@ where

fn extend_set<I, W>(
set: IntervalContainer<I, usize, usize>,
translater: Option<Translater>,
translater: Option<&SplitTranslater>,
growth: Growth,
output: W,
) -> Result<()>
Expand All @@ -38,13 +38,13 @@ where
WriteNamedIterImpl: WriteNamedIter<I>,
{
growth.warn_args();
let genome = growth.get_genome(translater.as_ref())?;
let genome = growth.get_genome(translater.map(|x| x.get_translater(TranslateGroup::Chr)))?;
let extend_iter = set.into_iter().map(|mut iv| {
let (left, right) = growth.get_values(&iv);
extend_interval(&mut iv, left, right, genome.as_ref());
iv
});
write_records_iter_with(extend_iter, output, translater.as_ref())
write_records_iter_with(extend_iter, output, translater)
}

pub fn extend(args: ExtendArgs) -> Result<()> {
Expand Down
42 changes: 34 additions & 8 deletions src/commands/flank.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
cli::{FlankArgs, Growth},
dispatch_single,
io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl},
types::{Genome, InputFormat, Translater},
types::{Genome, InputFormat, SplitTranslater, TranslateGroup},
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, IntervalContainer};
Expand Down Expand Up @@ -80,7 +80,7 @@ where
/// Flank the intervals in the set
fn flank_set<I, W>(
set: IntervalContainer<I, usize, usize>,
translater: Option<Translater>,
translater: Option<&SplitTranslater>,
growth: Growth,
output: W,
) -> Result<()>
Expand All @@ -90,12 +90,12 @@ where
WriteNamedIterImpl: WriteNamedIter<I>,
{
growth.warn_args();
let genome = growth.get_genome(translater.as_ref())?;
let genome = growth.get_genome(translater.map(|x| x.get_translater(TranslateGroup::Chr)))?;
let flank_iter = set.iter().flat_map(|iv| {
let (left, right) = growth.get_values(iv);
flank_interval(*iv, left, right, genome.as_ref())
});
write_records_iter_with(flank_iter, output, translater.as_ref())
write_records_iter_with(flank_iter, output, translater)
}

pub fn flank(args: FlankArgs) -> Result<()> {
Expand Down Expand Up @@ -135,7 +135,7 @@ mod testing {

#[test]
fn test_flank_left_bed6() {
let iv = Bed6::new(1, 100, 400, 1, 2, Strand::default());
let iv = Bed6::new(1, 100, 400, 1, 2.into(), Strand::default());
let left = left_flank(iv, 50).unwrap();
assert_eq!(left.start(), 50);
assert_eq!(left.end(), 100);
Expand All @@ -146,7 +146,20 @@ mod testing {

#[test]
fn test_flank_left_bed12() {
let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8);
let iv = Bed12::new(
1,
100,
400,
1,
2.into(),
Strand::default(),
3,
4,
5,
6,
7,
8,
);
let left = left_flank(iv, 50).unwrap();
assert_eq!(left.start(), 50);
assert_eq!(left.end(), 100);
Expand Down Expand Up @@ -187,7 +200,7 @@ mod testing {

#[test]
fn test_flank_right_bed6() {
let iv = Bed6::new(1, 100, 400, 1, 2, Strand::default());
let iv = Bed6::new(1, 100, 400, 1, 2.into(), Strand::default());
let right = right_flank(iv, 50, None).unwrap();
assert_eq!(right.start(), 400);
assert_eq!(right.end(), 450);
Expand All @@ -198,7 +211,20 @@ mod testing {

#[test]
fn test_flank_right_bed12() {
let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8);
let iv = Bed12::new(
1,
100,
400,
1,
2.into(),
Strand::default(),
3,
4,
5,
6,
7,
8,
);
let right = right_flank(iv, 50, None).unwrap();
assert_eq!(right.start(), 400);
assert_eq!(right.end(), 450);
Expand Down
1 change: 1 addition & 0 deletions src/commands/get_fasta.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,5 +153,6 @@ pub fn get_fasta(args: GetFastaArgs) -> Result<()> {
InputFormat::Bed4 => get_fasta_bed4(&mut csv_reader, &mut byterecord, fasta, writer),
InputFormat::Bed6 => get_fasta_bed6(&mut csv_reader, &mut byterecord, fasta, writer),
InputFormat::Bed12 => get_fasta_bed12(&mut csv_reader, &mut byterecord, fasta, writer),
_ => anyhow::bail!("Unable to process ambiguous input format"),
}
}
12 changes: 7 additions & 5 deletions src/commands/intersect/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,18 @@ use crate::{
build_reader, write_named_records_iter_dashmap, write_records_iter_with, NamedIter,
UnnamedIter, WriteNamedIter, WriteNamedIterImpl,
},
types::{InputFormat, NumericBed3, StreamTranslater, Translater},
types::{InputFormat, NumericBed3, SplitTranslater, StreamTranslater},
utils::sort_pairs,
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, IntersectIter, IntervalContainer, MergeIter};
use serde::Serialize;
use std::io::Write;

pub fn intersect_sets<Ia, Ib, W>(
set_a: IntervalContainer<Ia, usize, usize>,
set_b: IntervalContainer<Ib, usize, usize>,
translater: Option<&Translater>,
mut set_a: IntervalContainer<Ia, usize, usize>,
mut set_b: IntervalContainer<Ib, usize, usize>,
translater: Option<&SplitTranslater>,
params: IntersectParams,
writer: W,
) -> Result<()>
Expand All @@ -28,6 +29,7 @@ where
{
let query_method = params.overlap_predicates.into();
let output_method = params.output_predicates.try_into()?;
sort_pairs(&mut set_a, &mut set_b, params.sorted);
match output_method {
// Output the target intervals
OutputMethod::Target => {
Expand Down Expand Up @@ -93,7 +95,7 @@ fn intersect_stream(args: IntersectArgs) -> Result<()> {
let merged_target_iter = MergeIter::new(target_iter);
let intersect_iter =
IntersectIter::new_with_method(merged_query_iter, merged_target_iter, method);
write_records_iter_with(intersect_iter, writer, None::<&Translater>)?;
write_records_iter_with(intersect_iter, writer, None::<&SplitTranslater>)?;
}
Ok(())
}
8 changes: 4 additions & 4 deletions src/commands/merge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::{
build_reader, iter_unnamed, write_3col_iter_with, write_records_iter, BedReader,
WriteNamedIter, WriteNamedIterImpl,
},
types::{InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, Translater},
types::{InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, SplitTranslater},
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, IntervalContainer, MergeIter};
Expand All @@ -14,7 +14,7 @@ use std::io::Write;

fn merge_in_memory<I, W>(
mut set: IntervalContainer<I, usize, usize>,
translater: Option<Translater>,
translater: Option<&SplitTranslater>,
params: MergeParams,
writer: W,
) -> Result<()>
Expand All @@ -29,7 +29,7 @@ where
set.set_sorted();
}
let merged = set.merge()?;
write_3col_iter_with(merged.into_iter(), writer, translater.as_ref())?;
write_3col_iter_with(merged.into_iter(), writer, translater)?;
Ok(())
}

Expand All @@ -52,7 +52,7 @@ fn merge_streamed_by_format<W: Write>(bed_reader: BedReader, writer: W) -> Resul
let input_format = bed_reader.input_format();
let mut csv_reader = build_reader(bed_reader.reader());
match input_format {
InputFormat::Bed3 => {
InputFormat::Bed3 | InputFormat::Ambiguous => {
let record_iter: Box<dyn Iterator<Item = NumericBed3>> = iter_unnamed(&mut csv_reader);
merge_streamed(record_iter, writer)
}
Expand Down
7 changes: 4 additions & 3 deletions src/commands/random.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::{
types::{Genome, InputFormat, NumericBed12, NumericBed3, NumericBed4, NumericBed6, Translater},
};
use anyhow::Result;
use bedrs::Strand;
use bedrs::{Score, Strand};
use rand::Rng;
use std::io::Write;

Expand Down Expand Up @@ -123,7 +123,7 @@ pub fn random_bed6<W: Write>(args: RandomArgs, writer: W) -> Result<()> {
(c, x, y, s)
})
// build the interval
.map(|(c, x, y, s)| NumericBed6::new(c, x, y, 0, 0.0, s));
.map(|(c, x, y, s)| NumericBed6::new(c, x, y, 0, Score::Empty, s));

write_records_iter_with(interval_gen, writer, genome_sizes.translater())?;

Expand Down Expand Up @@ -175,7 +175,7 @@ pub fn random_bed12<W: Write>(args: RandomArgs, writer: W) -> Result<()> {
(c, x, y, t, u, s)
})
// build the interval
.map(|(c, x, y, t, u, s)| NumericBed12::new(c, x, y, 0, 0.0, s, t, u, 0, 0, 0, 0));
.map(|(c, x, y, t, u, s)| NumericBed12::new(c, x, y, 0, Score::Empty, s, t, u, 0, 0, 0, 0));

write_records_iter_with(interval_gen, writer, genome_sizes.translater())?;

Expand All @@ -189,5 +189,6 @@ pub fn random(args: RandomArgs) -> Result<()> {
InputFormat::Bed4 => random_bed4(args, writer),
InputFormat::Bed6 => random_bed6(args, writer),
InputFormat::Bed12 => random_bed12(args, writer),
_ => anyhow::bail!("Unable to process ambiguous input format"),
}
}
8 changes: 6 additions & 2 deletions src/commands/sample.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
cli::{SampleArgs, SampleParams},
io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl},
types::{InputFormat, Translater},
types::{InputFormat, SplitTranslater},
};
use anyhow::{bail, Result};
use bedrs::{traits::IntervalBounds, IntervalContainer};
Expand All @@ -10,7 +10,7 @@ use std::io::Write;

fn sample_from_set<I, W>(
set: &mut IntervalContainer<I, usize, usize>,
translater: Option<&Translater>,
translater: Option<&SplitTranslater>,
params: SampleParams,
writer: W,
) -> Result<()>
Expand Down Expand Up @@ -71,5 +71,9 @@ pub fn sample(args: SampleArgs) -> Result<()> {
let (mut set, translater) = reader.bed12_set()?;
sample_from_set(&mut set, translater.as_ref(), args.params, writer)
}
InputFormat::Ambiguous => {
let (mut set, translater) = reader.meta_interval_set()?;
sample_from_set(&mut set, translater.as_ref(), args.params, writer)
}
}
}
29 changes: 23 additions & 6 deletions src/commands/shift.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use crate::{
cli::{ShiftArgs, ShiftParams},
dispatch_single,
io::{write_records_iter_with, WriteNamedIter, WriteNamedIterImpl},
types::{Genome, InputFormat, Translater},
types::{Genome, InputFormat, SplitTranslater, TranslateGroup},
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, IntervalContainer};
Expand Down Expand Up @@ -55,7 +55,7 @@ where

fn shift_set<I, W>(
set: IntervalContainer<I, usize, usize>,
translater: Option<Translater>,
translater: Option<&SplitTranslater>,
params: ShiftParams,
output: W,
) -> Result<()>
Expand All @@ -65,11 +65,15 @@ where
WriteNamedIterImpl: WriteNamedIter<I>,
{
params.warn_args();
let genome = Genome::from_opt_path_immutable_with(params.genome, translater.as_ref(), false)?;
let genome = Genome::from_opt_path_immutable_with(
params.genome,
translater.map(|x| x.get_translater(TranslateGroup::Chr)),
false,
)?;
let shift_iter = set
.into_iter()
.map(|iv| shift_interval(iv, params.amount, params.percent, genome.as_ref()));
write_records_iter_with(shift_iter, output, translater.as_ref())
write_records_iter_with(shift_iter, output, translater)
}

pub fn shift(args: ShiftArgs) -> Result<()> {
Expand Down Expand Up @@ -158,7 +162,7 @@ mod testing {

#[test]
fn test_shift_bed6() {
let iv = Bed6::new(1, 100, 200, 1, 2, Strand::default());
let iv = Bed6::new(1, 100, 200, 1, 2.into(), Strand::default());
let si = shift_interval(iv, 50.0, false, None);
assert_eq!(si.start(), 150);
assert_eq!(si.end(), 250);
Expand All @@ -169,7 +173,20 @@ mod testing {

#[test]
fn test_shift_bed12() {
let iv = Bed12::new(1, 100, 400, 1, 2, Strand::default(), 3, 4, 5, 6, 7, 8);
let iv = Bed12::new(
1,
100,
400,
1,
2.into(),
Strand::default(),
3,
4,
5,
6,
7,
8,
);
let si = shift_interval(iv, 50.0, false, None);
assert_eq!(si.start(), 150);
assert_eq!(si.end(), 450);
Expand Down
Loading
Loading