Skip to content

Commit

Permalink
Merge pull request #96 from noamteyssier/95-implement-cluster-subcommand
Browse files Browse the repository at this point in the history
95 implement cluster subcommand
  • Loading branch information
noamteyssier authored Apr 4, 2024
2 parents 25d5033 + a4020c8 commit 29b05dd
Show file tree
Hide file tree
Showing 11 changed files with 151 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gia"
version = "0.2.13"
version = "0.2.14"
edition = "2021"
description = "A tool for set theoretic operations of genomic intervals"
license = "MIT"
Expand Down
21 changes: 21 additions & 0 deletions src/cli/cluster.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
use super::{Output, SingleInput};
use clap::Parser;

#[derive(Parser, Debug)]
pub struct ClusterArgs {
#[clap(flatten)]
pub input: SingleInput,

#[clap(flatten)]
pub output: Output,

#[clap(flatten)]
pub params: ClusterParams,
}

#[derive(Parser, Debug)]
pub struct ClusterParams {
/// Assume input is sorted (default=false)
#[clap(short, long)]
pub sorted: bool,
}
9 changes: 6 additions & 3 deletions src/cli/commands.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{
ClosestArgs, ComplementArgs, CoverageArgs, ExtendArgs, FlankArgs, GetFastaArgs, IntersectArgs,
MergeArgs, RandomArgs, SampleArgs, SegmentArgs, ShiftArgs, SortArgs, SpacingArgs, SubtractArgs,
UnionBedGraphArgs, WindowArgs,
ClosestArgs, ClusterArgs, ComplementArgs, CoverageArgs, ExtendArgs, FlankArgs, GetFastaArgs,
IntersectArgs, MergeArgs, RandomArgs, SampleArgs, SegmentArgs, ShiftArgs, SortArgs,
SpacingArgs, SubtractArgs, UnionBedGraphArgs, WindowArgs,
};
use clap::Subcommand;

Expand All @@ -10,6 +10,9 @@ pub enum Command {
/// Finds the closest interval in a secondary BED file for all intervals in a primary BED file
Closest(ClosestArgs),

/// Annotates the intervals of a BED file with their Cluster ID
Cluster(ClusterArgs),

/// Generates the complement of a BED file
///
/// This reports the regions that are not covered by the input
Expand Down
3 changes: 3 additions & 0 deletions src/cli/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod closest;
mod cluster;
mod commands;
mod complement;
mod coverage;
Expand All @@ -20,7 +21,9 @@ mod spacing;
mod subtract;
mod unionbedg;
mod window;

pub use closest::{ClosestArgs, ClosestParams};
pub use cluster::{ClusterArgs, ClusterParams};
pub use commands::Command;
pub use complement::ComplementArgs;
pub use coverage::{CoverageArgs, CoverageParams};
Expand Down
39 changes: 39 additions & 0 deletions src/commands/cluster.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
use crate::{
cli::{ClusterArgs, ClusterParams},
dispatch_single,
io::{write_depth_iter_with, WriteNamedIter, WriteNamedIterImpl},
types::{InputFormat, IntervalDepth, Rename, Renamer, SplitTranslater},
};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, types::ClusterIter, IntervalContainer};
use serde::Serialize;
use std::io::Write;

fn cluster_in_memory<'a, I, N, W>(
mut set: IntervalContainer<I, usize, usize>,
translater: Option<&'a SplitTranslater>,
params: ClusterParams,
writer: W,
) -> Result<()>
where
W: Write,
I: IntervalBounds<usize, usize> + Copy + Serialize,
N: IntervalBounds<&'a str, usize> + Serialize,
WriteNamedIterImpl: WriteNamedIter<I>,
Renamer: Rename<'a, I, N>,
{
if !params.sorted {
set.sort();
} else {
set.set_sorted();
}
let cluster_iter =
ClusterIter::new(set.into_iter()).map(|(iv, cid)| IntervalDepth::new(iv, cid, translater));
write_depth_iter_with(cluster_iter, writer, translater)
}

pub fn cluster(args: ClusterArgs) -> Result<()> {
let reader = args.input.get_reader()?;
let writer = args.output.get_writer()?;
dispatch_single!(reader, writer, args.params, cluster_in_memory)
}
2 changes: 2 additions & 0 deletions src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
mod closest;
mod cluster;
mod complement;
mod coverage;
mod extend;
Expand All @@ -17,6 +18,7 @@ mod unionbedg;
mod window;

pub use closest::closest;
pub use cluster::cluster;
pub use complement::complement;
pub use coverage::coverage;
pub use extend::extend;
Expand Down
5 changes: 3 additions & 2 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,15 @@ use anyhow::Result;
use clap::Parser;
use cli::{Cli, Command};
use commands::{
closest, complement, coverage, extend, flank, get_fasta, intersect, merge, random, sample,
segment, shift, sort, spacing, subtract, unionbedgraph, window,
closest, cluster, complement, coverage, extend, flank, get_fasta, intersect, merge, random,
sample, segment, shift, sort, spacing, subtract, unionbedgraph, window,
};

fn main() -> Result<()> {
let cli = Cli::parse();
match cli.command {
Command::Closest(args) => closest(args)?,
Command::Cluster(args) => cluster(args)?,
Command::Complement(args) => complement(args)?,
Command::Coverage(args) => coverage(args)?,
Command::Extend(args) => extend(args)?,
Expand Down
61 changes: 61 additions & 0 deletions tests/cluster.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#[cfg(test)]
mod testing {
use anyhow::Result;
use assert_cmd::prelude::*;
use std::{fmt::Display, process::Command};

fn build_expected_str<T: Display>(expected: &[(T, u32, u32, u32)]) -> String {
expected
.iter()
.map(|(chr, start, end, depth)| format!("{}\t{}\t{}\t{}\n", chr, start, end, depth))
.collect::<Vec<String>>()
.join("")
}

fn calculate_n_fields(output: &[u8]) -> usize {
output
.split(|&c| c == b'\n')
.next()
.unwrap()
.split(|&c| c == b'\t')
.count()
}

#[test]
fn test_cluster_bed3() -> Result<()> {
let input = "tests/datasets/cluster/test.bed3";
let mut cmd = Command::cargo_bin("gia")?;
let output = cmd.arg("cluster").arg("-i").arg(input).output()?;
let expected = vec![
(1, 83, 233, 0),
(1, 142, 292, 0),
(1, 349, 499, 1),
(1, 437, 587, 1),
(1, 704, 854, 2),
];
let expected_str = build_expected_str(&expected);
let observed_str = String::from_utf8(output.stdout)?;
assert_eq!(observed_str, expected_str);
Ok(())
}

#[test]
fn test_cluster_bed6() -> Result<()> {
let input = "tests/datasets/cluster/test.bed6";
let mut cmd = Command::cargo_bin("gia")?;
let output = cmd.arg("cluster").arg("-i").arg(input).output()?;
let n_fields = calculate_n_fields(&output.stdout);
assert_eq!(n_fields, 7);
Ok(())
}

#[test]
fn test_cluster_bed12() -> Result<()> {
let input = "tests/datasets/cluster/test.bed12";
let mut cmd = Command::cargo_bin("gia")?;
let output = cmd.arg("cluster").arg("-i").arg(input).output()?;
let n_fields = calculate_n_fields(&output.stdout);
assert_eq!(n_fields, 13);
Ok(())
}
}
5 changes: 5 additions & 0 deletions tests/datasets/cluster/test.bed12
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 83 233 0 . + 150 158 0 0 0 0
1 142 292 0 . + 151 174 0 0 0 0
1 349 499 0 . - 401 441 0 0 0 0
1 437 587 0 . - 462 526 0 0 0 0
1 704 854 0 . + 760 838 0 0 0 0
5 changes: 5 additions & 0 deletions tests/datasets/cluster/test.bed3
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 83 233
1 142 292
1 349 499
1 437 587
1 704 854
5 changes: 5 additions & 0 deletions tests/datasets/cluster/test.bed6
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
1 83 233 0 . +
1 142 292 0 . +
1 349 499 0 . -
1 437 587 0 . -
1 704 854 0 . +

0 comments on commit 29b05dd

Please sign in to comment.