From 776dc95385c8d8b9d0c1f7beff837b37025a81d5 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 10:38:06 -0800 Subject: [PATCH 1/6] style: replace write methods with correct writeln and write_all --- src/commands/get_fasta.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/commands/get_fasta.rs b/src/commands/get_fasta.rs index 2b7693f..cbf7f84 100644 --- a/src/commands/get_fasta.rs +++ b/src/commands/get_fasta.rs @@ -24,17 +24,17 @@ fn get_fasta_bed3( let record: NamedBed3 = byterecord.deserialize(None)?; match fasta.query_buffer(record.chr(), record.start(), record.end()) { Ok(buffer) => { - write!( + writeln!( output, - ">{}:{}-{}\n", + ">{}:{}-{}", record.chr(), record.start(), record.end() )?; for subseq in buffer.split_str("\n") { - output.write(subseq)?; + output.write_all(subseq)?; } - output.write(b"\n")?; + output.write_all(b"\n")?; } Err(_) => continue, } @@ -52,9 +52,9 @@ fn get_fasta_bed6( let record: NamedBed6 = byterecord.deserialize(None)?; match fasta.query_buffer(record.chr(), record.start(), record.end()) { Ok(buffer) => { - write!( + writeln!( output, - ">{}:{}-{}::{}::{}::{}\n", + ">{}:{}-{}::{}::{}::{}", record.chr(), record.start(), record.end(), @@ -63,9 +63,9 @@ fn get_fasta_bed6( record.strand().unwrap_or_default(), )?; for subseq in buffer.split_str("\n") { - output.write(subseq)?; + output.write_all(subseq)?; } - output.write(b"\n")?; + output.write_all(b"\n")?; } Err(_) => continue, } @@ -83,9 +83,9 @@ fn get_fasta_bed12( let record: NamedBed12 = byterecord.deserialize(None)?; match fasta.query_buffer(record.chr(), record.start(), record.end()) { Ok(buffer) => { - write!( + writeln!( output, - ">{}:{}-{}::{}::{}::{}::{}::{}::{}::{}::{}::{}\n", + ">{}:{}-{}::{}::{}::{}::{}::{}::{}::{}::{}::{}", record.chr(), record.start(), record.end(), @@ -100,9 +100,9 @@ fn get_fasta_bed12( record.block_starts(), )?; for subseq in buffer.split_str("\n") { - output.write(subseq)?; + output.write_all(subseq)?; } - output.write(b"\n")?; + output.write_all(b"\n")?; } Err(_) => continue, } From 14bf97f13bbe9f4535c7e216e50c65632ae08cf9 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 11:53:05 -0800 Subject: [PATCH 2/6] feat: added window overlaps as subcommand --- src/cli.rs | 33 ++++++ src/commands/mod.rs | 2 + src/commands/window.rs | 230 +++++++++++++++++++++++++++++++++++++++++ src/main.rs | 21 +++- 4 files changed, 285 insertions(+), 1 deletion(-) create mode 100644 src/commands/window.rs diff --git a/src/cli.rs b/src/cli.rs index 910645d..685fc93 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -456,4 +456,37 @@ pub enum Command { #[clap(short, long)] unmerged: bool, }, + + /// Finds all the overlapping intervals in Set B after adding a window around all + /// intervals in Set A + Window { + /// Input BED file to subtract from (default=stdin) + #[clap(short, long)] + a: Option, + + /// Secondary BED file to subtract with + #[clap(short, long)] + b: String, + + /// Output BED file to write to (default=stdout) + #[clap(short, long)] + output: Option, + + /// windows intervals on both sides by the same provided amount + #[clap(short = 'w', long, required_unless_present_any(["left", "right"]), conflicts_with_all(&["left", "right"]))] + both: Option, + + /// windows intervals on the left side by the provided amount + #[clap(short, long, required_unless_present_any(["both", "right"]))] + left: Option, + + /// windows intervals on the right side by the provided amount + #[clap(short, long, required_unless_present_any(["both", "left"]))] + right: Option, + + /// Only report the intervals in the query that do not overlap with the target + /// (i.e. the inverse of the intersection) + #[clap(short = 'v', long, conflicts_with_all = &["with_query", "with_target", "unique"])] + inverse: bool, + }, } diff --git a/src/commands/mod.rs b/src/commands/mod.rs index d8d2b4d..a4e49a4 100644 --- a/src/commands/mod.rs +++ b/src/commands/mod.rs @@ -11,6 +11,7 @@ mod sample; mod shift; mod sort; mod subtract; +mod window; pub use closest::closest; pub use complement::complement; pub use extend::extend; @@ -24,3 +25,4 @@ pub use sample::sample; pub use shift::shift; pub use sort::sort; pub use subtract::subtract; +pub use window::window; diff --git a/src/commands/window.rs b/src/commands/window.rs new file mode 100644 index 0000000..84d5a53 --- /dev/null +++ b/src/commands/window.rs @@ -0,0 +1,230 @@ +use std::io::Write; + +use anyhow::{bail, Result}; +use bedrs::{traits::IntervalBounds, IntervalContainer}; +use serde::Serialize; + +use crate::{ + io::{ + match_output, write_pairs_iter_with, write_records_iter_with, BedReader, WriteNamedIter, + WriteNamedIterImpl, + }, + types::{InputFormat, IntervalPair, Rename, Renamer, Translater}, + utils::sort_pairs, +}; + +fn windowed_set_overlaps<'a, Ia, Ib, Na, Nb, W>( + set_a: &'a mut IntervalContainer, + set_b: &'a mut IntervalContainer, + translater: Option<&'a Translater>, + left: usize, + right: usize, + inverse: bool, + mut output: W, +) -> Result<()> +where + Ia: IntervalBounds + Serialize + Copy, + Ib: IntervalBounds + Serialize + Copy, + Na: IntervalBounds<&'a str, usize> + Serialize, + Nb: IntervalBounds<&'a str, usize> + Serialize, + W: Write, + WriteNamedIterImpl: WriteNamedIter + WriteNamedIter, + Renamer: Rename<'a, Ia, Na> + Rename<'a, Ib, Nb>, +{ + sort_pairs(set_a, set_b, false); + if inverse { + let iv_iter = set_a + .iter() + .map(|iv| { + let mut w_iv = *iv; + w_iv.extend_left(&left); + w_iv.extend_right(&right, None); + (iv, w_iv) + }) + .filter(|(_iv, w_iv)| { + let overlaps = set_b.find_iter(w_iv).count(); + overlaps == 0 + }) + .map(|(iv, _w_iv)| *iv); + write_records_iter_with(iv_iter, output, translater)?; + } else { + let windows_iter = set_a.iter().map(|iv| { + let mut w_iv = *iv; + w_iv.extend_left(&left); + w_iv.extend_right(&right, None); + (iv, w_iv) + }); + for (iv, w_iv) in windows_iter { + let overlaps = set_b.find_iter(&w_iv); + let pairs = overlaps.map(|ov| IntervalPair::new(*iv, *ov, translater)); + write_pairs_iter_with(pairs, &mut output, translater)?; + } + } + Ok(()) +} + +fn dispatch_window( + reader_a: BedReader, + reader_b: BedReader, + both: Option, + left: Option, + right: Option, + inverse: bool, + output: W, +) -> Result<()> { + if reader_a.is_named() != reader_b.is_named() { + bail!("Input files must both be named or both be unnamed"); + } + let mut translater = if reader_a.is_named() { + Some(Translater::new()) + } else { + None + }; + let (left, right) = if let Some(b) = both { + (b, b) + } else { + (left.unwrap_or(0), right.unwrap_or(0)) + }; + match reader_a.input_format() { + InputFormat::Bed3 => { + let mut set_a = reader_a.bed3_set_with(translater.as_mut())?; + match reader_b.input_format() { + InputFormat::Bed3 => { + let mut set_b = reader_b.bed3_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed6 => { + let mut set_b = reader_b.bed6_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed12 => { + let mut set_b = reader_b.bed12_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + } + } + InputFormat::Bed6 => { + let mut set_a = reader_a.bed6_set_with(translater.as_mut())?; + match reader_b.input_format() { + InputFormat::Bed3 => { + let mut set_b = reader_b.bed3_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed6 => { + let mut set_b = reader_b.bed6_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed12 => { + let mut set_b = reader_b.bed12_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + } + } + InputFormat::Bed12 => { + let mut set_a = reader_a.bed12_set_with(translater.as_mut())?; + match reader_b.input_format() { + InputFormat::Bed3 => { + let mut set_b = reader_b.bed3_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed6 => { + let mut set_b = reader_b.bed6_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + InputFormat::Bed12 => { + let mut set_b = reader_b.bed12_set_with(translater.as_mut())?; + windowed_set_overlaps( + &mut set_a, + &mut set_b, + translater.as_ref(), + left, + right, + inverse, + output, + ) + } + } + } + } +} + +pub fn window( + path_a: Option, + path_b: String, + output: Option, + both: Option, + left: Option, + right: Option, + inverse: bool, + compression_threads: usize, + compression_level: u32, +) -> Result<()> { + let bed_a = BedReader::from_path(path_a, None, None)?; + let bed_b = BedReader::from_path(Some(path_b), None, None)?; + let output_handle = match_output(output, compression_threads, compression_level)?; + dispatch_window(bed_a, bed_b, both, left, right, inverse, output_handle) +} diff --git a/src/main.rs b/src/main.rs index 9ef071b..c5c4e88 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,7 +9,7 @@ use clap::Parser; use cli::{Cli, Command}; use commands::{ closest, complement, extend, flank, get_fasta, intersect, merge, name_map, random, sample, - shift, sort, subtract, + shift, sort, subtract, window, }; fn main() -> Result<()> { @@ -254,6 +254,25 @@ fn main() -> Result<()> { cli.compression_level, )?; } + Command::Window { + a, + b, + output, + both, + left, + right, + inverse, + } => window( + a, + b, + output, + both, + left, + right, + inverse, + cli.compression_threads, + cli.compression_level, + )?, } Ok(()) } From 341b9d5cf9323da9138476a7cef52d7e29171a9a Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 12:25:57 -0800 Subject: [PATCH 3/6] refactor: used owned find iter to avoid constant rebuffering of output --- Cargo.toml | 15 ++++++++++----- src/commands/window.rs | 14 +++++++------- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index be6b051..2fa66e7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,15 +6,18 @@ description = "A tool for set theoretic operations of genomic intervals" license = "MIT" homepage = "https://noamteyssier.github.io/gia" repository = "https://github.com/noamteyssier/gia" -categories = [ "science", "command-line-utilities" ] -keywords = [ "genomics", "bioinformatics", "bed", "set", "interval" ] +categories = ["science", "command-line-utilities"] +keywords = ["genomics", "bioinformatics", "bed", "set", "interval"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] anyhow = "1.0.72" # bedrs = { version = "0.1.11", features = ["serde", "rayon"] } -bedrs = { git = "https://github.com/noamteyssier/bedrs", branch="bedrs-2.0", features = ["serde", "rayon"] } +bedrs = { git = "https://github.com/noamteyssier/bedrs", branch = "bedrs-2.0", features = [ + "serde", + "rayon", +] } bstr = "1.6.0" clap = { version = "4.3.19", features = ["derive"] } csv = "1.2.2" @@ -26,8 +29,10 @@ memmap2 = "0.7.1" rand = "0.8.5" rand_chacha = "0.3.1" serde = { version = "1.0.183", features = ["derive"] } -niffler = {version = "2.5.0", default-features = false, features = ["gz"]} -gzp = { version="0.11.3", features=["deflate_rust"], default-features = false } +niffler = { version = "2.5.0", default-features = false, features = ["gz"] } +gzp = { version = "0.11.3", features = [ + "deflate_rust", +], default-features = false } rayon = "1.8.0" flate2 = "1.0.28" diff --git a/src/commands/window.rs b/src/commands/window.rs index 84d5a53..dda9b44 100644 --- a/src/commands/window.rs +++ b/src/commands/window.rs @@ -20,7 +20,7 @@ fn windowed_set_overlaps<'a, Ia, Ib, Na, Nb, W>( left: usize, right: usize, inverse: bool, - mut output: W, + output: W, ) -> Result<()> where Ia: IntervalBounds + Serialize + Copy, @@ -42,7 +42,7 @@ where (iv, w_iv) }) .filter(|(_iv, w_iv)| { - let overlaps = set_b.find_iter(w_iv).count(); + let overlaps = set_b.find_iter_sorted_unchecked(w_iv).count(); overlaps == 0 }) .map(|(iv, _w_iv)| *iv); @@ -54,11 +54,11 @@ where w_iv.extend_right(&right, None); (iv, w_iv) }); - for (iv, w_iv) in windows_iter { - let overlaps = set_b.find_iter(&w_iv); - let pairs = overlaps.map(|ov| IntervalPair::new(*iv, *ov, translater)); - write_pairs_iter_with(pairs, &mut output, translater)?; - } + let pairs_iter = windows_iter.flat_map(|(iv, w_iv)| { + let overlaps = set_b.find_iter_sorted_owned_unchecked(w_iv); + overlaps.map(|ov| IntervalPair::new(*iv, *ov, translater)) + }); + write_pairs_iter_with(pairs_iter, output, translater)?; } Ok(()) } From 9b503318696ca645f94b97c692cbd04a1ad21bed Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:51:05 -0800 Subject: [PATCH 4/6] fix: update cli to remove argument bounds on inverse for windows --- src/cli.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index 685fc93..a18a407 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -486,7 +486,7 @@ pub enum Command { /// Only report the intervals in the query that do not overlap with the target /// (i.e. the inverse of the intersection) - #[clap(short = 'v', long, conflicts_with_all = &["with_query", "with_target", "unique"])] + #[clap(short = 'v', long)] inverse: bool, }, } From 4dd78ab5d4d471ff294c8296f3c5404c7d64fa6f Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:51:18 -0800 Subject: [PATCH 5/6] tests: added testing for windows subcommand --- tests/datasets/windows/windows_a.bed | 10 ++ tests/datasets/windows/windows_a.bed12 | 10 ++ tests/datasets/windows/windows_a.bed6 | 10 ++ tests/datasets/windows/windows_b.bed | 10 ++ tests/datasets/windows/windows_b.bed12 | 10 ++ tests/datasets/windows/windows_b.bed6 | 10 ++ tests/windows.rs | 212 +++++++++++++++++++++++++ 7 files changed, 272 insertions(+) create mode 100644 tests/datasets/windows/windows_a.bed create mode 100644 tests/datasets/windows/windows_a.bed12 create mode 100644 tests/datasets/windows/windows_a.bed6 create mode 100644 tests/datasets/windows/windows_b.bed create mode 100644 tests/datasets/windows/windows_b.bed12 create mode 100644 tests/datasets/windows/windows_b.bed6 create mode 100644 tests/windows.rs diff --git a/tests/datasets/windows/windows_a.bed b/tests/datasets/windows/windows_a.bed new file mode 100644 index 0000000..abc19c9 --- /dev/null +++ b/tests/datasets/windows/windows_a.bed @@ -0,0 +1,10 @@ +1 72 222 +1 257 407 +1 268 418 +1 467 617 +1 819 969 +2 174 324 +2 587 737 +3 395 545 +3 554 704 +3 653 803 diff --git a/tests/datasets/windows/windows_a.bed12 b/tests/datasets/windows/windows_a.bed12 new file mode 100644 index 0000000..97b9f53 --- /dev/null +++ b/tests/datasets/windows/windows_a.bed12 @@ -0,0 +1,10 @@ +1 72 222 0 0.0 + 0 0 0 0 0 0 +1 257 407 0 0.0 + 0 0 0 0 0 0 +1 268 418 0 0.0 + 0 0 0 0 0 0 +1 467 617 0 0.0 + 0 0 0 0 0 0 +1 819 969 0 0.0 + 0 0 0 0 0 0 +2 174 324 0 0.0 + 0 0 0 0 0 0 +2 587 737 0 0.0 + 0 0 0 0 0 0 +3 395 545 0 0.0 + 0 0 0 0 0 0 +3 554 704 0 0.0 + 0 0 0 0 0 0 +3 653 803 0 0.0 + 0 0 0 0 0 0 diff --git a/tests/datasets/windows/windows_a.bed6 b/tests/datasets/windows/windows_a.bed6 new file mode 100644 index 0000000..802646e --- /dev/null +++ b/tests/datasets/windows/windows_a.bed6 @@ -0,0 +1,10 @@ +1 72 222 0 0.0 + +1 257 407 0 0.0 + +1 268 418 0 0.0 + +1 467 617 0 0.0 + +1 819 969 0 0.0 + +2 174 324 0 0.0 + +2 587 737 0 0.0 + +3 395 545 0 0.0 + +3 554 704 0 0.0 + +3 653 803 0 0.0 + diff --git a/tests/datasets/windows/windows_b.bed b/tests/datasets/windows/windows_b.bed new file mode 100644 index 0000000..7c20624 --- /dev/null +++ b/tests/datasets/windows/windows_b.bed @@ -0,0 +1,10 @@ +1 55 205 +1 69 219 +1 93 243 +1 156 306 +1 603 753 +1 837 987 +2 39 189 +2 71 221 +2 672 822 +3 138 288 diff --git a/tests/datasets/windows/windows_b.bed12 b/tests/datasets/windows/windows_b.bed12 new file mode 100644 index 0000000..515e704 --- /dev/null +++ b/tests/datasets/windows/windows_b.bed12 @@ -0,0 +1,10 @@ +1 55 205 0 0.0 + 0 0 0 0 0 0 +1 69 219 0 0.0 + 0 0 0 0 0 0 +1 93 243 0 0.0 + 0 0 0 0 0 0 +1 156 306 0 0.0 + 0 0 0 0 0 0 +1 603 753 0 0.0 + 0 0 0 0 0 0 +1 837 987 0 0.0 + 0 0 0 0 0 0 +2 39 189 0 0.0 + 0 0 0 0 0 0 +2 71 221 0 0.0 + 0 0 0 0 0 0 +2 672 822 0 0.0 + 0 0 0 0 0 0 +3 138 288 0 0.0 + 0 0 0 0 0 0 diff --git a/tests/datasets/windows/windows_b.bed6 b/tests/datasets/windows/windows_b.bed6 new file mode 100644 index 0000000..c8611d5 --- /dev/null +++ b/tests/datasets/windows/windows_b.bed6 @@ -0,0 +1,10 @@ +1 55 205 0 0.0 + +1 69 219 0 0.0 + +1 93 243 0 0.0 + +1 156 306 0 0.0 + +1 603 753 0 0.0 + +1 837 987 0 0.0 + +2 39 189 0 0.0 + +2 71 221 0 0.0 + +2 672 822 0 0.0 + +3 138 288 0 0.0 + diff --git a/tests/windows.rs b/tests/windows.rs new file mode 100644 index 0000000..abace72 --- /dev/null +++ b/tests/windows.rs @@ -0,0 +1,212 @@ +#[cfg(test)] +mod testing { + use anyhow::Result; + use assert_cmd::prelude::*; + use std::process::Command; + + #[test] + fn test_windows() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed"; + let b = "tests/datasets/windows/windows_b.bed"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("100") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 18); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 6); + Ok(()) + } + + #[test] + fn test_windows_inv_bed3() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed"; + let b = "tests/datasets/windows/windows_b.bed"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("100") + .arg("-v") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 3); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 3); + Ok(()) + } + + #[test] + fn test_windows_inv_bed6() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed6"; + let b = "tests/datasets/windows/windows_b.bed"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("100") + .arg("-v") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 3); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 6); + Ok(()) + } + + #[test] + fn test_windows_inv_bed12() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed12"; + let b = "tests/datasets/windows/windows_b.bed"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("100") + .arg("-v") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 3); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 12); + Ok(()) + } + + #[test] + fn test_windows_bed3_bed3() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed"; + let b = "tests/datasets/windows/windows_b.bed"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("0") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 11); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 6); + Ok(()) + } + + #[test] + fn test_windows_bed3_bed6() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed"; + let b = "tests/datasets/windows/windows_b.bed6"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("0") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 11); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 9); + Ok(()) + } + + #[test] + fn test_windows_bed3_bed12() -> Result<()> { + let a = "tests/datasets/windows/windows_a.bed"; + let b = "tests/datasets/windows/windows_b.bed12"; + + let mut cmd = Command::cargo_bin("gia")?; + let output = cmd + .arg("window") + .arg("-a") + .arg(a) + .arg("-b") + .arg(b) + .arg("-w") + .arg("0") + .output()?; + let num_intervals = output.stdout.split(|&c| c == b'\n').count() - 1; + assert_eq!(num_intervals, 11); + + let num_fields = output + .stdout + .split(|&c| c == b'\n') + .next() + .unwrap() + .split(|&c| c == b'\t') + .count(); + assert_eq!(num_fields, 15); + Ok(()) + } +} From 7a4f105a0d46f33afe0b386ed279d2a27f281b67 Mon Sep 17 00:00:00 2001 From: noam teyssier <22600644+noamteyssier@users.noreply.github.com> Date: Mon, 5 Feb 2024 13:51:39 -0800 Subject: [PATCH 6/6] chore: update patch version --- Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index 2fa66e7..8e28b0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gia" -version = "0.2.4" +version = "0.2.5" edition = "2021" description = "A tool for set theoretic operations of genomic intervals" license = "MIT"