Skip to content

Commit

Permalink
Merge pull request #103 from noamteyssier/31-stranded-methods
Browse files Browse the repository at this point in the history
31 stranded methods
  • Loading branch information
noamteyssier authored Apr 11, 2024
2 parents 2816fa8 + 0452016 commit 9db4274
Show file tree
Hide file tree
Showing 22 changed files with 315 additions and 118 deletions.
9 changes: 6 additions & 3 deletions src/cli/closest.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use super::{outputs::Output, overlap_predicates::WrapStrandedness, DualInput};
use clap::Parser;

use super::{outputs::Output, DualInput};

/// Finds the closest interval in a secondary BED file for all intervals in a primary BED file
#[derive(Parser, Debug)]
#[clap(next_help_heading = "Global options")]
Expand All @@ -27,7 +26,11 @@ pub struct ClosestParams {
#[clap(short = 'd', long, conflicts_with = "upstream")]
pub downstream: bool,

/// Strand-specificity of closest intervals
#[clap(short, long, default_value = "i")]
pub strandedness: WrapStrandedness,

/// Specify that the input files are already presorted
#[clap(short, long)]
#[clap(short = 'S', long)]
pub sorted: bool,
}
2 changes: 1 addition & 1 deletion src/cli/coverage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub struct CoverageArgs {
pub struct CoverageParams {
/// Assert that the intervals are presorted in BOTH files (unexpected behavior if they are
/// not)
#[clap(short, long)]
#[clap(short = 'S', long)]
pub sorted: bool,

#[clap(flatten)]
Expand Down
16 changes: 14 additions & 2 deletions src/cli/growth.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use crate::types::{Genome, Translater};
use anyhow::Result;
use bedrs::traits::IntervalBounds;
use bedrs::{traits::IntervalBounds, Strand};
use clap::Parser;

#[derive(Parser, Debug, Clone)]
Expand All @@ -25,6 +25,13 @@ pub struct Growth {
/// Genome file to validate growth against
#[clap(short, long)]
pub genome: Option<String>,

/// Follow strand specificity when applying growth
///
/// i.e. if the strand is negative, apply growth to the right side of the interval
/// when the left side is requested (and vice versa) [default = false]
#[clap(short, long)]
pub stranded: bool,
}
impl Growth {
#[allow(clippy::option_map_unit_fn)]
Expand Down Expand Up @@ -52,14 +59,19 @@ impl Growth {
where
I: IntervalBounds<usize, usize>,
{
if let Some(val) = self.both {
let (left, right) = if let Some(val) = self.both {
self.calculate_percentage(iv, val, val)
} else {
self.calculate_percentage(
iv,
self.left.unwrap_or_default(),
self.right.unwrap_or_default(),
)
};
if self.stranded && iv.strand() == Some(Strand::Reverse) {
(right, left)
} else {
(left, right)
}
}

Expand Down
4 changes: 2 additions & 2 deletions src/cli/intersect.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ pub struct IntersectArgs {
pub struct IntersectParams {
/// Stream the input files instead of loading them into memory
/// (only works if both files are sorted)
#[clap(short = 'S', long, conflicts_with_all = &["with_query", "with_target", "unique", "inverse"])]
#[clap(long, conflicts_with_all = &["with_query", "with_target", "unique", "inverse"])]
pub stream: bool,

/// Assert the inputs are pre-sorted
#[clap(short, long)]
#[clap(short = 'S', long)]
pub sorted: bool,

#[clap(flatten)]
Expand Down
35 changes: 33 additions & 2 deletions src/cli/merge.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use super::{Output, SingleInput};
use clap::Parser;
use bedrs::Strand;
use clap::{Parser, ValueEnum};

#[derive(Parser, Debug)]
pub struct MergeArgs {
Expand All @@ -16,6 +17,20 @@ pub struct MergeArgs {
#[derive(Parser, Debug)]
#[clap(next_help_heading = "Parameters")]
pub struct MergeParams {
/// Only merge intervals that share strandedness (will ignore intervals that have unknown
/// strand)
#[clap(short = 'r', long, conflicts_with("specific"))]
pub stranded: bool,

/// Only merge intervals that belong to a specific strand (will ignore all intervals that do
/// not share the specified strand)
#[clap(short = 'R', long, conflicts_with("stranded"))]
pub specific: Option<StrandEnum>,

/// Demote all merged intervals into BED3 format if they are not already in that format
#[clap(short, long)]
pub demote: bool,

/// Assume input is sorted (default=false)
#[clap(short, long)]
pub sorted: bool,
Expand All @@ -26,6 +41,22 @@ pub struct MergeParams {
/// and will result in undefined behavior if it is not.
///
/// Currently does not support non-integer chromosome names.
#[clap(short = 'S', long)]
#[clap(short = 'S', long, conflicts_with_all(&["stranded", "specific"]))]
pub stream: bool,
}

#[derive(Debug, Clone, Parser, ValueEnum)]
pub enum StrandEnum {
#[clap(name = "+")]
Plus,
#[clap(name = "-")]
Minus,
}
impl From<StrandEnum> for Strand {
fn from(value: StrandEnum) -> Self {
match value {
StrandEnum::Plus => Strand::Forward,
StrandEnum::Minus => Strand::Reverse,
}
}
}
60 changes: 50 additions & 10 deletions src/cli/overlap_predicates.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use bedrs::{traits::ValueBounds, types::QueryMethod};
use clap::Parser;
use bedrs::{
traits::ValueBounds,
types::{Query, QueryMethod, StrandMethod},
};
use clap::{Parser, ValueEnum};

#[derive(Parser, Debug, Clone, Copy)]
#[clap(next_help_heading = "Overlap Predicates")]
Expand All @@ -25,18 +28,30 @@ pub struct OverlapPredicates {
/// Requires that either fraction provided with `-f` or `-F` is met
#[clap(short, long, requires_all=&["fraction_query", "fraction_target"], conflicts_with = "reciprocal")]
pub either: bool,
}

impl<T: ValueBounds> From<OverlapPredicates> for QueryMethod<T> {
fn from(args: OverlapPredicates) -> Self {
let fraction_target = if args.reciprocal {
args.fraction_query
/// Strand-specificity to use when comparing intervals
///
/// i: Ignore strand (default)
///
/// m: Match strand (+/+ or -/- only)
///
/// o: Opposite strand (+/- or -/+ only)
#[clap(short = 's', long, default_value = "i")]
pub strandedness: WrapStrandedness,
}
impl OverlapPredicates {
fn get_strand_method(&self) -> StrandMethod {
self.strandedness.into()
}
fn get_overlap_method<T: ValueBounds>(&self) -> QueryMethod<T> {
let fraction_target = if self.reciprocal {
self.fraction_query
} else {
args.fraction_target
self.fraction_target
};
match (args.fraction_query, fraction_target) {
match (self.fraction_query, fraction_target) {
(Some(fraction_query), Some(fraction_target)) => {
if args.either {
if self.either {
QueryMethod::CompareReciprocalFractionOr(fraction_query, fraction_target)
} else {
QueryMethod::CompareReciprocalFractionAnd(fraction_query, fraction_target)
Expand All @@ -48,3 +63,28 @@ impl<T: ValueBounds> From<OverlapPredicates> for QueryMethod<T> {
}
}
}

#[derive(Parser, Debug, Clone, Copy, ValueEnum)]
pub enum WrapStrandedness {
#[clap(name = "i")]
Ignore,
#[clap(name = "m")]
Match,
#[clap(name = "o")]
Opposite,
}
impl From<WrapStrandedness> for StrandMethod {
fn from(wrap: WrapStrandedness) -> Self {
match wrap {
WrapStrandedness::Ignore => StrandMethod::Ignore,
WrapStrandedness::Match => StrandMethod::MatchStrand,
WrapStrandedness::Opposite => StrandMethod::OppositeStrand,
}
}
}

impl<T: ValueBounds> From<OverlapPredicates> for Query<T> {
fn from(args: OverlapPredicates) -> Self {
Query::new(args.get_overlap_method(), args.get_strand_method())
}
}
33 changes: 14 additions & 19 deletions src/commands/bam/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@ use crate::{
cli::bam::{FilterArgs, FilterParams},
dispatch_single_with_htslib,
io::{WriteNamedIter, WriteNamedIterImpl},
types::{InputFormat, NumericBed3, SplitTranslater},
types::{InputFormat, SplitTranslater},
};

use super::utils::{parse_chr_name, parse_endpoints};
use super::utils::{parse_chr_name, parse_endpoints, parse_strand};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, types::QueryMethod, IntervalContainer};
use bedrs::{traits::IntervalBounds, types::Query, IntervalContainer, StrandedBed3};
use rust_htslib::bam::{HeaderView, Read, Reader as BamReader, Record, Writer as BamWriter};
use serde::Serialize;

fn temp_bed3(
fn temp_sbed3(
record: &Record,
header: &HeaderView,
translater: &SplitTranslater,
) -> Result<Option<NumericBed3>> {
) -> Result<Option<StrandedBed3<usize>>> {
let chr_bytes = parse_chr_name(record, header)?;
let chr_name = std::str::from_utf8(chr_bytes)?;
let chr_idx = if let Some(idx) = translater.get_chr_idx(chr_name) {
Expand All @@ -24,26 +24,24 @@ fn temp_bed3(
return Ok(None);
};
let (start, end) = parse_endpoints(record)?;
Ok(Some(NumericBed3::new(chr_idx, start, end)))
let strand = parse_strand(record);
Ok(Some(StrandedBed3::new(chr_idx, start, end, strand)))
}

fn run_inverted_overlap<I>(
record: &Record,
header: &HeaderView,
set: &IntervalContainer<I, usize, usize>,
translater: &SplitTranslater,
query_method: QueryMethod<usize>,
query_method: Query<usize>,
wtr: &mut BamWriter,
) -> Result<()>
where
I: IntervalBounds<usize, usize> + Copy + Serialize,
WriteNamedIterImpl: WriteNamedIter<I>,
{
if let Some(bed) = temp_bed3(record, header, translater)? {
let no_overlaps = set
.find_iter_sorted_method_unchecked(&bed, query_method)?
.next()
.is_none();
if let Some(bed) = temp_sbed3(record, header, translater)? {
let no_overlaps = set.query_iter(&bed, query_method)?.next().is_none();
if no_overlaps {
wtr.write(record)?;
}
Expand All @@ -58,18 +56,15 @@ fn run_overlap<I>(
header: &HeaderView,
set: &IntervalContainer<I, usize, usize>,
translater: &SplitTranslater,
query_method: QueryMethod<usize>,
query_method: Query<usize>,
wtr: &mut BamWriter,
) -> Result<()>
where
I: IntervalBounds<usize, usize> + Copy + Serialize,
WriteNamedIterImpl: WriteNamedIter<I>,
{
if let Some(bed) = temp_bed3(record, header, translater)? {
let any_overlaps = set
.find_iter_sorted_method_unchecked(&bed, query_method)?
.next()
.is_some();
if let Some(bed) = temp_sbed3(record, header, translater)? {
let any_overlaps = set.query_iter(&bed, query_method)?.next().is_some();
if any_overlaps {
wtr.write(record)?;
}
Expand Down Expand Up @@ -98,7 +93,7 @@ where
let translater = translater.unwrap();

// Initialize the overlap query method
let query_method: QueryMethod<usize> = params.overlap_predicates.into();
let query_method = params.overlap_predicates.into();

// Initialize an empty record to avoid repeated allocations
let mut record = Record::new();
Expand Down
9 changes: 9 additions & 0 deletions src/commands/bam/utils.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use anyhow::{bail, Result};
use bedrs::Strand;
use rust_htslib::bam::{HeaderView, Record};

pub fn parse_chr_name<'a>(record: &Record, header: &'a HeaderView) -> Result<&'a [u8]> {
Expand Down Expand Up @@ -42,3 +43,11 @@ pub fn get_strand(record: &Record) -> char {
'+'
}
}

pub fn parse_strand(record: &Record) -> Strand {
match get_strand(record) {
'+' => Strand::Forward,
'-' => Strand::Reverse,
_ => Strand::Unknown,
}
}
18 changes: 6 additions & 12 deletions src/commands/bcf/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{

use super::utils::{parse_chr_name, parse_endpoints};
use anyhow::Result;
use bedrs::{traits::IntervalBounds, types::QueryMethod, IntervalContainer};
use bedrs::{traits::IntervalBounds, types::Query, IntervalContainer};
use rust_htslib::bcf::{
header::HeaderView, Read as VcfRead, Reader as VcfReader, Record, Writer as VcfWriter,
};
Expand All @@ -34,18 +34,15 @@ fn run_inverted_overlap<I>(
header: &HeaderView,
set: &IntervalContainer<I, usize, usize>,
translater: &SplitTranslater,
query_method: QueryMethod<usize>,
query_method: Query<usize>,
wtr: &mut VcfWriter,
) -> Result<()>
where
I: IntervalBounds<usize, usize> + Copy + Serialize,
WriteNamedIterImpl: WriteNamedIter<I>,
{
if let Some(bed) = temp_bed3(record, header, translater)? {
let no_overlaps = set
.find_iter_sorted_method_unchecked(&bed, query_method)?
.next()
.is_none();
let no_overlaps = set.query_iter(&bed, query_method)?.next().is_none();
if no_overlaps {
wtr.write(record)?;
}
Expand All @@ -60,18 +57,15 @@ fn run_overlap<I>(
header: &HeaderView,
set: &IntervalContainer<I, usize, usize>,
translater: &SplitTranslater,
query_method: QueryMethod<usize>,
query_method: Query<usize>,
wtr: &mut VcfWriter,
) -> Result<()>
where
I: IntervalBounds<usize, usize> + Copy + Serialize,
WriteNamedIterImpl: WriteNamedIter<I>,
{
if let Some(bed) = temp_bed3(record, header, translater)? {
let any_overlaps = set
.find_iter_sorted_method_unchecked(&bed, query_method)?
.next()
.is_some();
let any_overlaps = set.query_iter(&bed, query_method)?.next().is_some();
if any_overlaps {
wtr.write(record)?;
}
Expand Down Expand Up @@ -100,7 +94,7 @@ where
let translater = translater.unwrap();

// Initialize the overlap query method
let query_method: QueryMethod<usize> = params.overlap_predicates.into();
let query_method = params.overlap_predicates.into();

// Initialize an empty VCF record to avoid repeated allocations
let mut record = vcf.empty_record();
Expand Down
Loading

0 comments on commit 9db4274

Please sign in to comment.