Skip to content

Commit

Permalink
Merge pull request #102 from noamteyssier/100-add-vcf-centric-commands
Browse files Browse the repository at this point in the history
100 add vcf centric commands
  • Loading branch information
noamteyssier authored Apr 8, 2024
2 parents c9db82f + e612747 commit 2816fa8
Show file tree
Hide file tree
Showing 27 changed files with 13,954 additions and 48 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "gia"
version = "0.2.16"
version = "0.2.17"
edition = "2021"
description = "A tool for set theoretic operations of genomic intervals"
license = "MIT"
Expand Down
2 changes: 1 addition & 1 deletion src/cli/bam/commands.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{ConvertArgs, FilterArgs};
use clap::Parser;

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
pub enum BamCommand {
/// Convert BAM to different formats
Convert(ConvertArgs),
Expand Down
8 changes: 4 additions & 4 deletions src/cli/bam/convert.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::cli::SingleInputBam;
use clap::{Parser, ValueEnum};

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
pub struct ConvertArgs {
#[clap(flatten)]
pub input: SingleInputBam,
Expand All @@ -10,7 +10,7 @@ pub struct ConvertArgs {
pub params: ConvertParams,
}

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Parameters")]
pub struct ConvertParams {
#[clap(short, long, default_value = "bed")]
Expand All @@ -23,15 +23,15 @@ pub struct ConvertParams {
pub fastq: FastqConversionParams,
}

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "BED Conversion Options")]
pub struct BedConversionParams {
#[clap(short = 'C', long)]
/// Include CIGAR string in BED output
pub cigar: bool,
}

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "FASTQ Conversion Options")]
pub struct FastqConversionParams {}

Expand Down
10 changes: 5 additions & 5 deletions src/cli/bam/filter.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
use crate::cli::{BamOutput, MixedInput, OverlapPredicates};
use crate::cli::{BamOutput, MixedInputBam, OverlapPredicates};

use clap::Parser;

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
pub struct FilterArgs {
#[clap(flatten)]
pub inputs: MixedInput,
pub inputs: MixedInputBam,

#[clap(flatten)]
pub params: FilterParams,
Expand All @@ -14,7 +14,7 @@ pub struct FilterArgs {
pub output: BamOutput,
}

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Parameters")]
pub struct FilterParams {
#[clap(flatten)]
Expand All @@ -24,7 +24,7 @@ pub struct FilterParams {
pub output_predicates: OutputPredicates,
}

#[derive(Parser, Debug)]
#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Output Predicates")]
pub struct OutputPredicates {
/// Only return the records from a that DON'T overlap with b
Expand Down
8 changes: 8 additions & 0 deletions src/cli/bcf/commands.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
use super::FilterArgs;
use clap::Parser;

#[derive(Parser, Debug, Clone)]
pub enum BcfCommand {
/// Filter BCF records based on overlap criteria to other regions
Filter(FilterArgs),
}
33 changes: 33 additions & 0 deletions src/cli/bcf/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
use crate::cli::{outputs::VcfOutput, MixedInputVcf, OverlapPredicates};

use clap::Parser;

#[derive(Parser, Debug, Clone)]
pub struct FilterArgs {
#[clap(flatten)]
pub inputs: MixedInputVcf,

#[clap(flatten)]
pub params: FilterParams,

#[clap(flatten)]
pub output: VcfOutput,
}

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Parameters")]
pub struct FilterParams {
#[clap(flatten)]
pub overlap_predicates: OverlapPredicates,

#[clap(flatten)]
pub output_predicates: OutputPredicates,
}

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Output Predicates")]
pub struct OutputPredicates {
/// Only return the records from a that DON'T overlap with b
#[clap(short = 'v', long)]
pub invert: bool,
}
5 changes: 5 additions & 0 deletions src/cli/bcf/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
mod commands;
mod filter;

pub use commands::BcfCommand;
pub use filter::{FilterArgs, FilterParams};
10 changes: 7 additions & 3 deletions src/cli/commands.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{
bam::BamCommand, ClosestArgs, ClusterArgs, ComplementArgs, CoverageArgs, ExtendArgs, FlankArgs,
GetFastaArgs, IntersectArgs, MergeArgs, RandomArgs, SampleArgs, SegmentArgs, ShiftArgs,
SortArgs, SpacingArgs, SubtractArgs, UnionBedGraphArgs, WindowArgs,
bam::BamCommand, bcf::BcfCommand, ClosestArgs, ClusterArgs, ComplementArgs, CoverageArgs,
ExtendArgs, FlankArgs, GetFastaArgs, IntersectArgs, MergeArgs, RandomArgs, SampleArgs,
SegmentArgs, ShiftArgs, SortArgs, SpacingArgs, SubtractArgs, UnionBedGraphArgs, WindowArgs,
};
use clap::Subcommand;

Expand All @@ -11,6 +11,10 @@ pub enum Command {
#[clap(subcommand)]
Bam(BamCommand),

/// BCF-centric commands
#[clap(subcommand)]
Bcf(BcfCommand),

/// Finds the closest interval in a secondary BED file for all intervals in a primary BED file
Closest(ClosestArgs),

Expand Down
32 changes: 29 additions & 3 deletions src/cli/inputs.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use crate::{
io::BedReader,
io::{match_bam_input, match_bcf_input, BedReader},
types::{FieldFormat, InputFormat},
};
use anyhow::{bail, Result};
use clap::Parser;
use rust_htslib::{bam::Reader as BamReader, bcf::Reader as BcfReader};

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Single Input Options")]
Expand Down Expand Up @@ -36,19 +37,44 @@ pub struct SingleInputBam {

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Mixed BAM/Bed Dual Input")]
pub struct MixedInput {
pub struct MixedInputBam {
/// Input BAM file to process (default=stdin)
#[clap(short = 'a', long)]
pub bam: Option<String>,
/// Input BED file to process
#[clap(short = 'b', long)]
pub bed: String,
}
impl MixedInput {
impl MixedInputBam {
pub fn get_reader_bed(&self) -> Result<BedReader> {
// The bed format must always be read as string-based when working with BAM files
BedReader::from_path(Some(self.bed.clone()), None, Some(FieldFormat::StringBased))
}

pub fn get_reader_bam(&self) -> Result<BamReader> {
match_bam_input(self.bam.clone())
}
}

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "Mixed BAM/Bed Dual Input")]
pub struct MixedInputVcf {
/// Input BCF/VCF file to process (default=stdin)
#[clap(short = 'a', long)]
pub bcf: Option<String>,
/// Input BED file to process
#[clap(short = 'b', long)]
pub bed: String,
}
impl MixedInputVcf {
pub fn get_reader_bed(&self) -> Result<BedReader> {
// The bed format must always be read as string-based when working with BCF files
BedReader::from_path(Some(self.bed.clone()), None, Some(FieldFormat::StringBased))
}

pub fn get_reader_bcf(&self) -> Result<BcfReader> {
match_bcf_input(self.bcf.clone())
}
}

#[derive(Parser, Debug, Clone)]
Expand Down
5 changes: 4 additions & 1 deletion src/cli/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub mod bam;
pub mod bcf;
mod closest;
mod cluster;
mod commands;
Expand Down Expand Up @@ -32,7 +33,9 @@ pub use extend::ExtendArgs;
pub use flank::FlankArgs;
pub use get_fasta::GetFastaArgs;
pub use growth::Growth;
pub use inputs::{DualInput, MixedInput, MultiInput, SingleInput, SingleInputBam};
pub use inputs::{
DualInput, MixedInputBam, MixedInputVcf, MultiInput, SingleInput, SingleInputBam,
};
pub use intersect::{IntersectArgs, IntersectParams, OutputMethod};
pub use merge::{MergeArgs, MergeParams};
pub use outputs::{BamOutput, Output};
Expand Down
82 changes: 72 additions & 10 deletions src/cli/outputs.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
use crate::io::{match_bam_output, match_output};
use crate::io::{match_bam_output, match_bcf_output, match_output};
use anyhow::Result;
use clap::{Parser, ValueEnum};
use rust_htslib::bam::{Format, HeaderView, Writer as BamWriter};
use rust_htslib::{
bam::{Format as SamFormat, HeaderView as BamHeaderView, Writer as BamWriter},
bcf::{header::HeaderView as VcfHeaderView, Format as VcfFormat, Writer as VcfWriter},
};
use std::io::Write;

#[derive(Parser, Debug, Clone)]
Expand Down Expand Up @@ -38,14 +41,14 @@ pub struct BamOutput {

/// Output Format to write to (default=BAM)
#[clap(short = 'O', long, default_value = "bam")]
pub format: WrapHtsFormat,
pub format: WrapSamFormat,

/// Threads to use when writing BAM files
#[clap(short = 't', long, default_value = "1")]
pub threads: usize,
}
impl BamOutput {
pub fn get_writer(&self, header: &HeaderView) -> Result<BamWriter> {
pub fn get_writer(&self, header: &BamHeaderView) -> Result<BamWriter> {
match_bam_output(
self.output.clone(),
header,
Expand All @@ -55,18 +58,77 @@ impl BamOutput {
}
}

#[derive(Parser, Debug, Clone)]
#[clap(next_help_heading = "BAM Output Options")]
pub struct VcfOutput {
/// Output BCF file to write to (default=stdout)
#[clap(short, long)]
pub output: Option<String>,

/// Output Format to write to
///
/// v/z: VCF (uncompressed/compressed)
///
/// u/b: BCF (uncompressed/compressed)
#[clap(short = 'O', long, default_value = "b")]
pub format: WrapVcfFormat,

/// Threads to use when writing BCF/VCF files
#[clap(short = 't', long, default_value = "1")]
pub threads: usize,
}
impl VcfOutput {
pub fn get_writer(&self, header: &VcfHeaderView) -> Result<VcfWriter> {
match_bcf_output(
self.output.clone(),
header,
self.format.into(),
self.format.into(),
self.threads,
)
}
}

#[derive(Parser, Debug, Clone, ValueEnum, Copy)]
pub enum WrapHtsFormat {
pub enum WrapSamFormat {
Bam,
Sam,
Cram,
}
impl From<WrapHtsFormat> for Format {
fn from(format: WrapHtsFormat) -> Self {
impl From<WrapSamFormat> for SamFormat {
fn from(format: WrapSamFormat) -> Self {
match format {
WrapSamFormat::Bam => SamFormat::Bam,
WrapSamFormat::Sam => SamFormat::Sam,
WrapSamFormat::Cram => SamFormat::Cram,
}
}
}

#[derive(Parser, Debug, Clone, ValueEnum, Copy)]
pub enum WrapVcfFormat {
#[clap(name = "z")]
VcfCompressed,
#[clap(name = "v")]
VcfUncompressed,
#[clap(name = "b")]
BcfCompressed,
#[clap(name = "u")]
BcfUncompressed,
}
impl From<WrapVcfFormat> for VcfFormat {
fn from(format: WrapVcfFormat) -> Self {
match format {
WrapVcfFormat::VcfCompressed | WrapVcfFormat::VcfUncompressed => VcfFormat::Vcf,
WrapVcfFormat::BcfCompressed | WrapVcfFormat::BcfUncompressed => VcfFormat::Bcf,
}
}
}
impl From<WrapVcfFormat> for bool {
fn from(format: WrapVcfFormat) -> Self {
match format {
WrapHtsFormat::Bam => Format::Bam,
WrapHtsFormat::Sam => Format::Sam,
WrapHtsFormat::Cram => Format::Cram,
WrapVcfFormat::VcfCompressed | WrapVcfFormat::BcfCompressed => true,
WrapVcfFormat::VcfUncompressed | WrapVcfFormat::BcfUncompressed => false,
}
}
}
16 changes: 11 additions & 5 deletions src/commands/bam/filter.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
cli::bam::{FilterArgs, FilterParams},
dispatch_single_with_bam,
io::{match_bam_input, WriteNamedIter, WriteNamedIterImpl},
dispatch_single_with_htslib,
io::{WriteNamedIter, WriteNamedIterImpl},
types::{InputFormat, NumericBed3, SplitTranslater},
};

Expand Down Expand Up @@ -119,7 +119,13 @@ where

pub fn filter(args: FilterArgs) -> Result<()> {
let bed_reader = args.inputs.get_reader_bed()?;
let mut bam = match_bam_input(args.inputs.bam)?;
let mut writer = args.output.get_writer(bam.header())?;
dispatch_single_with_bam!(&mut bam, bed_reader, &mut writer, args.params, run_filter)
let mut bam_reader = args.inputs.get_reader_bam()?;
let mut writer = args.output.get_writer(bam_reader.header())?;
dispatch_single_with_htslib!(
&mut bam_reader,
bed_reader,
&mut writer,
args.params,
run_filter
)
}
Loading

0 comments on commit 2816fa8

Please sign in to comment.