Skip to content

Commit

Permalink
feat: Add fas2one and fqs2one commands for batch file conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
cauliyang committed Jan 21, 2025
1 parent a927fa5 commit eb54b5d
Show file tree
Hide file tree
Showing 10 changed files with 85 additions and 48 deletions.
6 changes: 5 additions & 1 deletion crates/deepbiop-cli/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ pub use extractfq::*;
pub mod extractfa;
pub use extractfa::*;

use anyhow::Result;
pub mod fqs2one;
pub use fqs2one::*;
pub mod fas2one;
pub use fas2one::*;

use anyhow::Result;
// Set up threads only once, using the common_opts from the top-level Cli struct
pub fn set_up_threads(threads: Option<usize>) -> Result<()> {
log::info!("Threads number: {:?}", threads.unwrap());
Expand Down
5 changes: 2 additions & 3 deletions crates/deepbiop-cli/src/cli/extractfq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,12 @@ use bstr::BString;
use clap::Parser;
use deepbiop_fq as fq;

use super::set_up_threads;
use log::info;
use std::io::BufRead;
use std::io::BufReader;
use std::path::{Path, PathBuf};

use super::set_up_threads;
use log::info;

#[derive(Debug, Parser)]
pub struct ExtractFq {
/// path to the bam file
Expand Down
29 changes: 29 additions & 0 deletions crates/deepbiop-cli/src/cli/fas2one.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use anyhow::Result;
use clap::Parser;
use deepbiop_fa as fa;

use std::path::PathBuf;

use super::set_up_threads;

#[derive(Debug, Parser)]
pub struct FasToOne {
/// path to the fa file
#[arg(value_name = "fas", action=clap::ArgAction::Append)]
fas: Vec<PathBuf>,

/// output bgzip compressed file
#[arg(long, value_name = "output")]
output: PathBuf,

#[arg(short, long, default_value = "2")]
threads: Option<usize>,
}

impl FasToOne {
pub fn run(&self) -> Result<()> {
set_up_threads(self.threads)?;
fa::io::convert_multiple_fas_to_one_bgzip_fa(&self.fas, &self.output, true)?;
Ok(())
}
}
2 changes: 1 addition & 1 deletion crates/deepbiop-cli/src/cli/fq2parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ pub struct FqToParquet {
chunk_size: usize,

/// result path
#[arg(long, value_name = "result")]
#[arg(long, value_name = "output")]
output: Option<PathBuf>,

/// threads number
Expand Down
29 changes: 29 additions & 0 deletions crates/deepbiop-cli/src/cli/fqs2one.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use anyhow::Result;
use clap::Parser;
use deepbiop_fq as fq;

use std::path::PathBuf;

use super::set_up_threads;

#[derive(Debug, Parser)]
pub struct FqsToOne {
/// path to the fq file
#[arg(value_name = "fqs", action=clap::ArgAction::Append)]
fqs: Vec<PathBuf>,

/// output bgzip compressed file
#[arg(long, value_name = "output")]
output: PathBuf,

#[arg(short, long, default_value = "2")]
threads: Option<usize>,
}

impl FqsToOne {
pub fn run(&self) -> Result<()> {
set_up_threads(self.threads)?;
fq::io::convert_multiple_fqs_to_one_bgzip_fq(&self.fqs, &self.output, true)?;
Ok(())
}
}
16 changes: 16 additions & 0 deletions crates/deepbiop-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,12 @@ pub enum Commands {

/// Extract fasta reads from a fasta file.
ExtractFa(cli::ExtractFa),

/// Multiple Fastqs to one Fastq conversion.
FqsToOne(cli::FqsToOne),

/// Multiple Fastas to one Fasta conversion.
FasToOne(cli::FasToOne),
}

impl Display for Commands {
Expand All @@ -64,6 +70,8 @@ impl Display for Commands {
Commands::ExtractFq(_) => write!(f, "extractfq"),
Commands::ExtractFa(_) => write!(f, "extractfa"),
Commands::FqToParquet(_) => write!(f, "fq2parquet"),
Commands::FqsToOne(_) => write!(f, "fqs2one"),
Commands::FasToOne(_) => write!(f, "fas2one"),
}
}
}
Expand Down Expand Up @@ -132,6 +140,14 @@ fn main() -> Result<()> {
extractfa.run().unwrap();
}

Some(Commands::FqsToOne(fqs2one)) => {
fqs2one.run().unwrap();
}

Some(Commands::FasToOne(fas2one)) => {
fas2one.run().unwrap();
}

None => {
println!("No command provided!");
}
Expand Down
22 changes: 1 addition & 21 deletions crates/deepbiop-fa/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,27 +116,7 @@ pub fn write_bzip_fa_parallel_for_noodle_record(
Ok(())
}

pub fn convert_multiple_fas_to_one_zip_fa<P: AsRef<Path>>(
paths: &[PathBuf],
result_path: P,
parallel: bool,
) -> Result<()> {
let records = if parallel {
paths
.par_iter()
.flat_map(|path| read_noodle_records(path).unwrap())
.collect::<Vec<FastaRecord>>()
} else {
paths
.iter()
.flat_map(|path| read_noodle_records(path).unwrap())
.collect::<Vec<FastaRecord>>()
};
write_bzip_fa_parallel_for_noodle_record(&records, result_path.as_ref().to_path_buf(), None)?;
Ok(())
}

pub fn convert_multiple_zip_fas_to_one_zip_fa<P: AsRef<Path>>(
pub fn convert_multiple_fas_to_one_bgzip_fa<P: AsRef<Path>>(
paths: &[PathBuf],
result_path: P,
parallel: bool,
Expand Down
2 changes: 1 addition & 1 deletion crates/deepbiop-fa/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ fn convert_multiple_fas_to_one_fa(
let is_zip = paths[0].extension().unwrap() == "gz";

if is_zip {
io::convert_multiple_fas_to_one_zip_fa(&paths, result_path, parallel)?;
io::convert_multiple_fas_to_one_bzip_fa(&paths, result_path, parallel)?;

Check failure on line 179 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / MSRV

cannot find function `convert_multiple_fas_to_one_bzip_fa` in module `io`

Check failure on line 179 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (beta)

cannot find function `convert_multiple_fas_to_one_bzip_fa` in module `io`

Check failure on line 179 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (macos)

cannot find function `convert_multiple_fas_to_one_bzip_fa` in module `io`

Check failure on line 179 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (stable)

cannot find function `convert_multiple_fas_to_one_bzip_fa` in module `io`

Check failure on line 179 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (windows)

cannot find function `convert_multiple_fas_to_one_bzip_fa` in module `io`
} else {
io::convert_multiple_zip_fas_to_one_zip_fa(&paths, result_path, parallel)?;

Check failure on line 181 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / MSRV

cannot find function `convert_multiple_zip_fas_to_one_zip_fa` in module `io`

Check failure on line 181 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (beta)

cannot find function `convert_multiple_zip_fas_to_one_zip_fa` in module `io`

Check failure on line 181 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (macos)

cannot find function `convert_multiple_zip_fas_to_one_zip_fa` in module `io`

Check failure on line 181 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (stable)

cannot find function `convert_multiple_zip_fas_to_one_zip_fa` in module `io`

Check failure on line 181 in crates/deepbiop-fa/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (windows)

cannot find function `convert_multiple_zip_fas_to_one_zip_fa` in module `io`
}
Expand Down
20 changes: 0 additions & 20 deletions crates/deepbiop-fq/src/io.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,26 +100,6 @@ pub fn write_bgzip_fq_parallel_for_noodle_record(
Ok(())
}

pub fn convert_multiple_bgzip_fqs_to_one_bgzip_fq<P: AsRef<Path>>(
paths: &[PathBuf],
result_path: P,
parallel: bool,
) -> Result<()> {
let records = if parallel {
paths
.par_iter()
.flat_map(|path| read_noodle_records(path).unwrap())
.collect::<Vec<FastqRecord>>()
} else {
paths
.iter()
.flat_map(|path| read_noodle_records(path).unwrap())
.collect::<Vec<FastqRecord>>()
};
write_bgzip_fq_parallel_for_noodle_record(&records, result_path.as_ref().to_path_buf(), None)?;
Ok(())
}

pub fn convert_multiple_fqs_to_one_bgzip_fq<P: AsRef<Path>>(
paths: &[PathBuf],
result_path: P,
Expand Down
2 changes: 1 addition & 1 deletion crates/deepbiop-fq/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ fn convert_multiple_fqs_to_one_fq(
if is_zip {
io::convert_multiple_fqs_to_one_bgzip_fq(&paths, result_path, parallel)?;
} else {
io::convert_multiple_bgzip_fqs_to_one_bgzip_fq(&paths, result_path, parallel)?;
io::convert_multiple_fqs_to_one_bgzip_fq(&paths, result_path, parallel)?;
}

Ok(())
Expand Down

0 comments on commit eb54b5d

Please sign in to comment.