Skip to content

Commit

Permalink
refactor: update all bam IO with htslib for increased performance
Browse files Browse the repository at this point in the history
  • Loading branch information
noamteyssier committed Apr 5, 2024
1 parent ebc26a5 commit 53bab87
Show file tree
Hide file tree
Showing 9 changed files with 127 additions and 289 deletions.
2 changes: 0 additions & 2 deletions src/cli/bam/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
mod commands;
mod convert;
mod filter;
mod wrap;

pub use commands::BamCommand;
pub use convert::{BamConversionType, ConvertArgs, ConvertParams};
pub use filter::{FilterArgs, FilterParams};
pub use wrap::WrapCigar;
98 changes: 0 additions & 98 deletions src/cli/bam/wrap.rs

This file was deleted.

36 changes: 33 additions & 3 deletions src/cli/outputs.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::io::{match_bam_output, match_output};
use anyhow::Result;
use clap::Parser;
use clap::{Parser, ValueEnum};
use rust_htslib::bam::{Format, HeaderView, Writer as BamWriter};
use std::io::Write;

#[derive(Parser, Debug, Clone)]
Expand Down Expand Up @@ -34,9 +35,38 @@ pub struct BamOutput {
/// Output BAM file to write to (default=stdout)
#[clap(short, long)]
pub output: Option<String>,

/// Output Format to write to (default=BAM)
#[clap(short = 'O', long, default_value = "bam")]
pub format: WrapHtsFormat,

/// Threads to use when writing BAM files
#[clap(short = 't', long, default_value = "1")]
pub threads: usize,
}
impl BamOutput {
pub fn get_writer(&self) -> Result<Box<dyn Write>> {
match_bam_output(self.output.clone())
pub fn get_writer(&self, header: &HeaderView) -> Result<BamWriter> {
match_bam_output(
self.output.clone(),
header,
self.format.into(),
self.threads,
)
}
}

#[derive(Parser, Debug, Clone, ValueEnum, Copy)]
pub enum WrapHtsFormat {
Bam,
Sam,
Cram,
}
impl From<WrapHtsFormat> for Format {
fn from(format: WrapHtsFormat) -> Self {
match format {
WrapHtsFormat::Bam => Format::Bam,
WrapHtsFormat::Sam => Format::Sam,
WrapHtsFormat::Cram => Format::Cram,
}
}
}
30 changes: 13 additions & 17 deletions src/commands/bam/convert/bed.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,17 @@
use crate::cli::bam::{ConvertParams, WrapCigar};
use crate::cli::bam::ConvertParams;
use crate::commands::bam::utils::{
get_strand, parse_chr_name, parse_endpoints, parse_mapping_quality, parse_query_name,
};
use crate::io::build_writer;

use anyhow::Result;
use noodles::bam::io::Reader;
use noodles::bam::Record as BamRecord;
use noodles::sam::Header;
use std::io::{stdout, BufWriter, Read, Write};
use rust_htslib::bam::{HeaderView, Read, Reader as BamReader, Record};
use std::io::{stdout, Write};
use std::str::from_utf8;

fn format_print_record<W: Write>(
record: &BamRecord,
header: &Header,
record: &Record,
header: &HeaderView,
params: &ConvertParams,
wtr: &mut csv::Writer<W>,
) -> Result<()> {
Expand All @@ -22,9 +20,9 @@ fn format_print_record<W: Write>(
let qname = parse_query_name(record)?;
let mapq = parse_mapping_quality(record);
let strand = get_strand(record);

//
if params.bed.cigar {
let cigar: WrapCigar = record.cigar().into();
let cigar = record.cigar();
let tuple = (
from_utf8(chr_name)?,
start,
Expand All @@ -49,14 +47,12 @@ fn format_print_record<W: Write>(
Ok(())
}

pub fn convert_bed<R: Read>(
mut bam: Reader<R>,
header: Header,
params: ConvertParams,
) -> Result<()> {
let mut wtr = build_writer(BufWriter::new(stdout()));
for record in bam.records() {
let record = record?;
pub fn convert_bed(mut bam: BamReader, params: ConvertParams) -> Result<()> {
let header = bam.header().clone();
let mut wtr = build_writer(stdout());
let mut record = Record::new();
while let Some(result) = bam.read(&mut record) {
result?;
format_print_record(&record, &header, &params, &mut wtr)?;
}
wtr.flush()?;
Expand Down
19 changes: 5 additions & 14 deletions src/commands/bam/convert/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,18 @@ use crate::cli::bam::{BamConversionType, ConvertArgs, ConvertParams};
use crate::io::match_bam_input;

use anyhow::{bail, Result};
use noodles::bam::io::reader::Builder;
use noodles::bam::io::Reader;
use noodles::sam::Header;
use std::io::Read;
use rust_htslib::bam::Reader as BamReader;

fn dispatch_conversion<R: Read>(
bam: Reader<R>,
header: Header,
params: ConvertParams,
) -> Result<()> {
fn dispatch_conversion(bam: BamReader, params: ConvertParams) -> Result<()> {
match params.conv {
BamConversionType::Bed => convert_bed(bam, header, params),
BamConversionType::Bed => convert_bed(bam, params),
_ => bail!(
"FASTQ conversion is not implemented yet - but checkout samtools fastq for a solution"
),
}
}

pub fn convert(args: ConvertArgs) -> Result<()> {
let in_handle = match_bam_input(args.input.input)?;
let mut bam = Builder.build_from_reader(in_handle);
let header = bam.read_header()?;
dispatch_conversion(bam, header, args.params)
let bam = match_bam_input(args.input.input)?;
dispatch_conversion(bam, args.params)
}
Loading

0 comments on commit 53bab87

Please sign in to comment.