diff --git a/Cargo.toml b/Cargo.toml index db94ad7..233ff3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ repository = "https://github.com/cauliyang/DeepBioP" license = "Apache-2.0" [workspace.dependencies] -pyo3 = { version = "0.21.2", features = [ +pyo3 = { version = "0.23.3", features = [ "abi3-py39", "extension-module", "anyhow", @@ -23,7 +23,7 @@ anyhow = "1.0" walkdir = { version = "2.5" } rayon = { version = "1.10" } log = "0.4" -pyo3-log = "0.11" +pyo3-log = "0.12.1" noodles = { version = "0.87.0", features = [ "bgzf", "core", @@ -38,22 +38,22 @@ bio = "2.0" needletail = "0.6" ahash = "0.8.11" -numpy = "0.21" -ndarray = { version = "0.15", features = ["serde", "rayon"] } +numpy = "0.23" +ndarray = { version = "0.16", features = ["serde", "rayon"] } num-traits = { version = "0.2" } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" rand = "0.8" rand_distr = "0.4" bitvec = "1.0" -itertools = "0.13.0" +itertools = "0.14.0" derive_builder = "0.20" lexical = "7.0" bstr = "1.11.3" lazy_static = "1.5.0" tempfile = "3.15" -parquet = "52.2.0" -arrow = "52.2" +parquet = "54.0" +arrow = "54.0" candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.8.1" } colored = "2.2" textwrap = "0.16" diff --git a/crates/deepbiop-bam/src/python.rs b/crates/deepbiop-bam/src/python.rs index 2af894d..e21ec9b 100644 --- a/crates/deepbiop-bam/src/python.rs +++ b/crates/deepbiop-bam/src/python.rs @@ -13,6 +13,7 @@ use pyo3_stub_gen::derive::*; /// Calculate the number of chimeric reads in a BAM file. #[gen_stub_pyfunction(module = "deepbiop.bam")] #[pyfunction] +#[pyo3(signature = (bam, threads=None))] fn count_chimeric_reads_for_path(bam: PathBuf, threads: Option) -> Result { chimeric::count_chimeric_reads_for_path(bam, threads) } @@ -20,6 +21,7 @@ fn count_chimeric_reads_for_path(bam: PathBuf, threads: Option) -> Result /// Calculate the number of chimeric reads in multiple BAM files. #[gen_stub_pyfunction(module = "deepbiop.bam")] #[pyfunction] +#[pyo3(signature = (bams, threads=None))] fn count_chimeric_reads_for_paths( bams: Vec, threads: Option, @@ -38,7 +40,7 @@ fn left_right_soft_clip(cigar_string: &str) -> Result<(usize, usize)> { // register bam sub module pub fn register_bam_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { let sub_module_name = "bam"; - let child_module = PyModule::new_bound(parent_module.py(), sub_module_name)?; + let child_module = PyModule::new(parent_module.py(), sub_module_name)?; child_module.add_function(wrap_pyfunction!(left_right_soft_clip, &child_module)?)?; child_module.add_function(wrap_pyfunction!( diff --git a/crates/deepbiop-cli/Cargo.toml b/crates/deepbiop-cli/Cargo.toml index 91b5a94..7c1b645 100644 --- a/crates/deepbiop-cli/Cargo.toml +++ b/crates/deepbiop-cli/Cargo.toml @@ -15,6 +15,7 @@ noodles = { workspace = true } deepbiop-fq = { workspace = true } deepbiop-bam = { workspace = true } deepbiop-utils = { workspace = true } +deepbiop-fa = { workspace = true } rayon = { workspace = true } log = { workspace = true } @@ -23,7 +24,7 @@ ahash = { workspace = true } clap = { version = "4.5", features = ["derive"] } clap_complete = "4.5" -clap-verbosity-flag = "2.2" +clap-verbosity-flag = "3.0" ctrlc = "3.4" human-panic = "2.0" env_logger = "0.11.6" diff --git a/crates/deepbiop-cli/src/cli.rs b/crates/deepbiop-cli/src/cli.rs index d11413d..e3938f4 100644 --- a/crates/deepbiop-cli/src/cli.rs +++ b/crates/deepbiop-cli/src/cli.rs @@ -6,6 +6,8 @@ pub mod fa2fq; pub use fa2fq::*; pub mod chimeric_count; pub use chimeric_count::*; +pub mod fa2parquet; +pub use fa2parquet::*; use anyhow::Result; diff --git a/crates/deepbiop-cli/src/cli/fa2parquet.rs b/crates/deepbiop-cli/src/cli/fa2parquet.rs new file mode 100644 index 0000000..e2898f2 --- /dev/null +++ b/crates/deepbiop-cli/src/cli/fa2parquet.rs @@ -0,0 +1,65 @@ +use anyhow::Result; +use clap::Parser; +use log::warn; + +use std::path::PathBuf; + +use super::set_up_threads; +use deepbiop_fa as fa; +use fa::encode::Encoder; + +use deepbiop_utils as utils; + +#[derive(Debug, Parser)] +pub struct FaToParquet { + /// path to the fa file + #[arg(value_name = "fa")] + fa: PathBuf, + + /// if convert the fa file to parquet by chunk or not + #[arg(long)] + chunk: bool, + + /// chunk size + #[arg(long, default_value = "1000000")] + chunk_size: usize, + + /// result path + #[arg(long, value_name = "result")] + output: Option, + + /// threads number + #[arg(short, long, default_value = "2")] + threads: Option, +} + +impl FaToParquet { + pub fn run(&self) -> Result<()> { + set_up_threads(self.threads)?; + let option = fa::encode::FaEncoderOptionBuilder::default() + .bases(fa::encode::BASES.to_vec()) + .build()?; + let mut fa_encoder = fa::encode::ParquetEncoderBuilder::default() + .option(option) + .build()?; + + if self.chunk { + fa_encoder.encode_chunk(&self.fa, self.chunk_size, false)?; + return Ok(()); + } + + let (record_batch, schema) = fa_encoder.encode(&self.fa)?; + // result file is fq_path with .parquet extension + let parquet_path = if let Some(path) = &self.output { + if path.with_extension("parquet").exists() { + warn!("{} already exists, overwriting", path.display()); + } + path.with_extension("parquet") + } else { + self.fa.with_extension("parquet") + }; + utils::io::write_parquet(parquet_path, record_batch, schema)?; + + Ok(()) + } +} diff --git a/crates/deepbiop-cli/src/main.rs b/crates/deepbiop-cli/src/main.rs index bdab89a..5b3e60d 100644 --- a/crates/deepbiop-cli/src/main.rs +++ b/crates/deepbiop-cli/src/main.rs @@ -39,6 +39,9 @@ pub enum Commands { /// Fastq to fasta conversion. FaToFq(cli::FaToFq), + + /// Fastq to parquet conversion. + FaToParquet(cli::FaToParquet), } impl Display for Commands { @@ -48,6 +51,7 @@ impl Display for Commands { Commands::BamToFq(_) => write!(f, "bam2fq"), Commands::FqToFa(_) => write!(f, "fq2fa"), Commands::FaToFq(_) => write!(f, "fa2fq"), + Commands::FaToParquet(_) => write!(f, "fa2parquet"), } } } @@ -100,6 +104,10 @@ fn main() -> Result<()> { fa2fq.run().unwrap(); } + Some(Commands::FaToParquet(fa2parquet)) => { + fa2parquet.run().unwrap(); + } + None => { println!("No command provided!"); } diff --git a/crates/deepbiop-fa/src/encode/option.rs b/crates/deepbiop-fa/src/encode/option.rs index eb90b94..e41b52c 100644 --- a/crates/deepbiop-fa/src/encode/option.rs +++ b/crates/deepbiop-fa/src/encode/option.rs @@ -6,34 +6,24 @@ use pyo3::prelude::*; use pyo3_stub_gen::derive::*; pub const BASES: &[u8] = b"ATCGN"; -pub const QUAL_OFFSET: u8 = 33; #[gen_stub_pyclass] #[pyclass(module = "deepbiop.fa")] #[derive(Debug, Builder, Default, Clone, Serialize, Deserialize)] pub struct FaEncoderOption { - #[pyo3(get, set)] - #[builder(default = "QUAL_OFFSET")] - pub qual_offset: u8, - #[pyo3(get, set)] #[builder(default = "BASES.to_vec()")] pub bases: Vec, - - #[pyo3(get, set)] - #[builder(default = "2")] - pub threads: usize, } #[gen_stub_pymethods] #[pymethods] impl FaEncoderOption { #[new] - fn py_new(qual_offset: u8, bases: String, threads: Option) -> Self { + #[pyo3(signature = (bases))] + fn py_new(bases: String) -> Self { FaEncoderOptionBuilder::default() - .qual_offset(qual_offset) .bases(bases.as_bytes().to_vec()) - .threads(threads.unwrap_or(2)) .build() .expect("Failed to build FqEncoderOption from Python arguments.") } @@ -41,10 +31,6 @@ impl FaEncoderOption { impl Display for FaEncoderOption { fn fmt(&self, f: &mut Formatter) -> fmt::Result { - write!( - f, - "FaEncoderOption {{ qual_offset: {}, bases: {:?}}}", - self.qual_offset, self.bases - ) + write!(f, "FaEncoderOption {{ bases: {:?} }}", self.bases) } } diff --git a/crates/deepbiop-fa/src/encode/parquet.rs b/crates/deepbiop-fa/src/encode/parquet.rs index eb2a9d6..5cd61c0 100644 --- a/crates/deepbiop-fa/src/encode/parquet.rs +++ b/crates/deepbiop-fa/src/encode/parquet.rs @@ -178,11 +178,11 @@ mod tests { #[test] fn test_encode_fq_for_parquet() { let option = FaEncoderOptionBuilder::default().build().unwrap(); - let mut encoder = ParquetEncoderBuilder::default() .option(option) .build() .unwrap(); + let (record_batch, scheme) = encoder.encode("tests/data/test.fa").unwrap(); write_parquet("test.parquet", record_batch, scheme).unwrap(); // remove test.parquet diff --git a/crates/deepbiop-fa/src/python.rs b/crates/deepbiop-fa/src/python.rs index 5647680..09c4b75 100644 --- a/crates/deepbiop-fa/src/python.rs +++ b/crates/deepbiop-fa/src/python.rs @@ -79,6 +79,7 @@ impl PyRecordData { #[gen_stub_pyfunction(module = "deepbiop.fa")] #[pyfunction] +#[pyo3(signature = (records_data, file_path=None))] fn write_fa(records_data: Vec, file_path: Option) -> Result<()> { let records: Vec = records_data .into_par_iter() @@ -108,11 +109,9 @@ fn encode_fa_path_to_parquet_chunk( chunk_size: usize, parallel: bool, bases: String, - qual_offset: usize, ) -> Result<()> { let option = encode::FaEncoderOptionBuilder::default() .bases(bases.as_bytes().to_vec()) - .qual_offset(qual_offset as u8) .build()?; let mut fa_encoder = encode::ParquetEncoderBuilder::default() @@ -124,15 +123,14 @@ fn encode_fa_path_to_parquet_chunk( #[gen_stub_pyfunction(module = "deepbiop.fa")] #[pyfunction] +#[pyo3(signature = (fa_path, bases, result_path=None))] fn encode_fa_path_to_parquet( fa_path: PathBuf, bases: String, - qual_offset: usize, result_path: Option, ) -> Result<()> { let option = encode::FaEncoderOptionBuilder::default() .bases(bases.as_bytes().to_vec()) - .qual_offset(qual_offset as u8) .build()?; let mut fa_encoder = encode::ParquetEncoderBuilder::default() @@ -155,13 +153,9 @@ fn encode_fa_path_to_parquet( #[gen_stub_pyfunction(module = "deepbiop.fa")] #[pyfunction] -fn encode_fa_paths_to_parquet( - fa_path: Vec, - bases: String, - qual_offset: usize, -) -> Result<()> { +fn encode_fa_paths_to_parquet(fa_path: Vec, bases: String) -> Result<()> { fa_path.iter().for_each(|path| { - encode_fa_path_to_parquet(path.clone(), bases.clone(), qual_offset, None).unwrap(); + encode_fa_path_to_parquet(path.clone(), bases.clone(), None).unwrap(); }); Ok(()) } @@ -191,7 +185,7 @@ fn convert_multiple_fas_to_one_fa( // register fq sub_module pub fn register_fa_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { let sub_module_name = "fa"; - let child_module = PyModule::new_bound(parent_module.py(), sub_module_name)?; + let child_module = PyModule::new(parent_module.py(), sub_module_name)?; child_module.add_class::()?; child_module.add_class::()?; diff --git a/crates/deepbiop-fa/tests/data/test.parquet b/crates/deepbiop-fa/tests/data/test.parquet new file mode 100644 index 0000000..83cb373 Binary files /dev/null and b/crates/deepbiop-fa/tests/data/test.parquet differ diff --git a/crates/deepbiop-fq/src/encode/option.rs b/crates/deepbiop-fq/src/encode/option.rs index 5e09547..f77d16e 100644 --- a/crates/deepbiop-fq/src/encode/option.rs +++ b/crates/deepbiop-fq/src/encode/option.rs @@ -35,6 +35,7 @@ pub struct FqEncoderOption { #[pymethods] impl FqEncoderOption { #[new] + #[pyo3(signature = (kmer_size, qual_offset, bases, vectorized_target, threads=None))] fn py_new( kmer_size: u8, qual_offset: u8, diff --git a/crates/deepbiop-fq/src/predicts.rs b/crates/deepbiop-fq/src/predicts.rs index 0734ed5..fff6fc1 100644 --- a/crates/deepbiop-fq/src/predicts.rs +++ b/crates/deepbiop-fq/src/predicts.rs @@ -66,6 +66,7 @@ pub struct Predict { #[pymethods] impl Predict { #[new] + #[pyo3(signature = (prediction, seq, id, is_truncated, qual=None))] pub fn new( prediction: Vec, seq: String, @@ -148,6 +149,7 @@ impl Predict { } /// Show the information of the prediction + #[pyo3(signature = (smooth_interval, text_width=None))] pub fn show_info( &self, smooth_interval: Vec<(usize, usize)>, @@ -175,15 +177,16 @@ impl Predict { })?; // Convert JSON string to Python bytes - Ok(PyBytes::new_bound(py, serialized.as_bytes()).into()) + Ok(PyBytes::new(py, serialized.as_bytes()).into()) } fn __setstate__(&mut self, py: Python, state: PyObject) -> PyResult<()> { - // Expect a bytes object for state - let state_bytes: &PyBytes = state.extract(py)?; + // Convert PyObject to PyBytes + let state_bytes = state.downcast_bound::(py)?; - // Deserialize the JSON string into the current instance - *self = serde_json::from_slice(state_bytes.as_bytes()).map_err(|e| { + // Get the bytes and deserialize + let bytes = state_bytes.as_bytes(); + *self = serde_json::from_slice(bytes).map_err(|e| { PyErr::new::(format!( "Failed to deserialize: {}", e diff --git a/crates/deepbiop-fq/src/python.rs b/crates/deepbiop-fq/src/python.rs index 6431eb1..953b5da 100644 --- a/crates/deepbiop-fq/src/python.rs +++ b/crates/deepbiop-fq/src/python.rs @@ -1,3 +1,4 @@ +use numpy::IntoPyArray; use std::path::PathBuf; use crate::{ @@ -13,7 +14,7 @@ use anyhow::Result; use log::warn; use needletail::Sequence; use noodles::fasta; -use numpy::{IntoPyArray, PyArray2, PyArray3}; +use numpy::{PyArray2, PyArray3}; use pyo3::prelude::*; use rayon::prelude::*; @@ -23,6 +24,7 @@ use pyo3_stub_gen::derive::*; #[pymethods] impl encode::TensorEncoder { #[new] + #[pyo3(signature = (option, tensor_max_width=None, tensor_max_seq_len=None))] fn py_new( option: encode::FqEncoderOption, tensor_max_width: Option, @@ -117,6 +119,7 @@ impl PyRecordData { #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (records_data, file_path=None))] fn write_fq(records_data: Vec, file_path: Option) -> Result<()> { let records: Vec = records_data .into_par_iter() @@ -196,6 +199,7 @@ fn normalize_seq(seq: String, iupac: bool) -> String { #[allow(clippy::too_many_arguments, clippy::type_complexity)] #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (fq_paths, k, bases, qual_offset, vectorized_target, parallel_for_files, max_width=None, max_seq_len=None))] fn encode_fq_paths_to_tensor( py: Python, fq_paths: Vec, @@ -234,9 +238,9 @@ fn encode_fq_paths_to_tensor( .collect(); Ok(( - input.into_pyarray_bound(py), - target.into_pyarray_bound(py), - qual.into_pyarray_bound(py), + input.into_pyarray(py), + target.into_pyarray(py), + qual.into_pyarray(py), kmer2id, )) } @@ -244,6 +248,7 @@ fn encode_fq_paths_to_tensor( #[allow(clippy::too_many_arguments, clippy::type_complexity)] #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (fq_path, k, bases, qual_offset, vectorized_target, max_width=None, max_seq_len=None))] fn encode_fq_path_to_tensor( py: Python, fq_path: PathBuf, @@ -281,15 +286,16 @@ fn encode_fq_path_to_tensor( .collect(); Ok(( - input.into_pyarray_bound(py), - target.into_pyarray_bound(py), - qual.into_pyarray_bound(py), + input.into_pyarray(py), + target.into_pyarray(py), + qual.into_pyarray(py), kmer2id, )) } #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (fq_path, k, bases, qual_offset, vectorized_target, result_path=None))] fn encode_fq_path_to_json( fq_path: PathBuf, k: usize, @@ -350,6 +356,7 @@ fn encode_fq_path_to_parquet_chunk( #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (fq_path, bases, qual_offset, vectorized_target, result_path=None))] fn encode_fq_path_to_parquet( fq_path: PathBuf, bases: String, @@ -471,6 +478,7 @@ pub fn load_predicts_from_batch_pt( #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] +#[pyo3(signature = (pt_path, ignore_label, id_table, max_predicts=None))] pub fn load_predicts_from_batch_pts( pt_path: PathBuf, ignore_label: i64, @@ -495,7 +503,7 @@ pub fn fastq_to_fasta(fastq_path: PathBuf, fasta_path: PathBuf) -> Result<()> { // register fq sub_module pub fn register_fq_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { let sub_module_name = "fq"; - let child_module = PyModule::new_bound(parent_module.py(), sub_module_name)?; + let child_module = PyModule::new(parent_module.py(), sub_module_name)?; child_module.add_class::()?; child_module.add_class::()?; diff --git a/crates/deepbiop-utils/src/lib.rs b/crates/deepbiop-utils/src/lib.rs index 0305002..1de3e47 100644 --- a/crates/deepbiop-utils/src/lib.rs +++ b/crates/deepbiop-utils/src/lib.rs @@ -16,6 +16,7 @@ use pyo3_stub_gen::derive::*; #[gen_stub_pyfunction(module = "deepbiop.utils")] #[pyfunction] +#[pyo3(signature = (sequence, targets, text_width=None))] pub fn highlight_targets( sequence: &str, targets: Vec<(usize, usize)>, diff --git a/crates/deepbiop-utils/src/python.rs b/crates/deepbiop-utils/src/python.rs index d7b4659..7a38e44 100644 --- a/crates/deepbiop-utils/src/python.rs +++ b/crates/deepbiop-utils/src/python.rs @@ -116,7 +116,7 @@ fn reverse_complement(seq: String) -> String { // register utils module pub fn register_utils_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { let sub_module_name = "utils"; - let child_module = PyModule::new_bound(parent_module.py(), sub_module_name)?; + let child_module = PyModule::new(parent_module.py(), sub_module_name)?; child_module.add_class::()?; child_module.add_class::()?; diff --git a/py-deepbiop/Makefile b/py-deepbiop/Makefile index bc44ef2..534fdaf 100644 --- a/py-deepbiop/Makefile +++ b/py-deepbiop/Makefile @@ -1,5 +1,8 @@ .PHONY: clean build +# add build target to the default target +all: build + clean: rm -rf build dist diff --git a/py-deepbiop/deepbiop/__init__.pyi b/py-deepbiop/deepbiop/__init__.pyi index b66a686..dd2dbda 100644 --- a/py-deepbiop/deepbiop/__init__.pyi +++ b/py-deepbiop/deepbiop/__init__.pyi @@ -3,6 +3,4 @@ from deepbiop import bam, fq, utils -def add(a:int,b:int) -> int: - ... - +def add(a: int, b: int) -> int: ... diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi index 7f5a8c8..3b312de 100644 --- a/py-deepbiop/deepbiop/bam.pyi +++ b/py-deepbiop/deepbiop/bam.pyi @@ -5,10 +5,10 @@ import os import pathlib import typing -def count_chimeric_reads_for_path(bam:str | os.PathLike | pathlib.Path,threads:int | None) -> int: +def count_chimeric_reads_for_path(bam,threads = ...) -> int: r"""Calculate the number of chimeric reads in a BAM file.""" -def count_chimeric_reads_for_paths(bams:typing.Sequence[str | os.PathLike | pathlib.Path],threads:int | None) -> dict[str, int]: +def count_chimeric_reads_for_paths(bams,threads = ...) -> dict[str, int]: r"""Calculate the number of chimeric reads in multiple BAM files.""" def left_right_soft_clip(cigar_string:str) -> tuple[int, int]: diff --git a/py-deepbiop/deepbiop/fa.pyi b/py-deepbiop/deepbiop/fa.pyi index f1c411e..73e725d 100644 --- a/py-deepbiop/deepbiop/fa.pyi +++ b/py-deepbiop/deepbiop/fa.pyi @@ -6,10 +6,8 @@ import pathlib import typing class FaEncoderOption: - qual_offset: int bases: list[int] - threads: int - def __new__(cls,qual_offset:int, bases:str, threads:int | None): ... + def __new__(cls,bases): ... class ParquetEncoder: def __new__(cls,option:FaEncoderOption): ... @@ -28,16 +26,16 @@ class RecordData: def convert_multiple_fas_to_one_fa(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None: ... -def encode_fa_path_to_parquet(fa_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,result_path:str | os.PathLike | pathlib.Path | None) -> None: +def encode_fa_path_to_parquet(fa_path,bases,result_path = ...) -> None: ... -def encode_fa_path_to_parquet_chunk(fa_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int) -> None: +def encode_fa_path_to_parquet_chunk(fa_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str) -> None: ... -def encode_fa_paths_to_parquet(fa_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int) -> None: +def encode_fa_paths_to_parquet(fa_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str) -> None: ... -def write_fa(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path | None) -> None: +def write_fa(records_data,file_path = ...) -> None: ... def write_fa_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None: diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi index 1e680ef..dc8db1a 100644 --- a/py-deepbiop/deepbiop/fq.pyi +++ b/py-deepbiop/deepbiop/fq.pyi @@ -14,7 +14,7 @@ class FqEncoderOption: bases: list[int] vectorized_target: bool threads: int - def __new__(cls,kmer_size:int, qual_offset:int, bases:str, vectorized_target:bool, threads:int | None): ... + def __new__(cls,kmer_size,qual_offset,bases,vectorized_target,threads = ...): ... class JsonEncoder: def __new__(cls,option:FqEncoderOption): ... @@ -30,7 +30,7 @@ class Predict: id: str is_truncated: bool qual: str | None - def __new__(cls,prediction:typing.Sequence[int], seq:str, id:str, is_truncated:bool, qual:str | None): ... + def __new__(cls,prediction,seq,id,is_truncated,qual = ...): ... def __repr__(self) -> str: ... @@ -52,7 +52,7 @@ class Predict: def qual_array(self) -> list[int]: r"""Get the quality score array.""" - def show_info(self, smooth_interval:typing.Sequence[tuple[int, int]], text_width:int | None) -> str: + def show_info(self, smooth_interval,text_width = ...) -> str: r"""Show the information of the prediction.""" def __getstate__(self) -> typing.Any: @@ -82,27 +82,27 @@ class TensorEncoder: tensor_max_seq_len: int kmer2id_table: dict[list[int], int] id2kmer_table: dict[int, list[int]] - def __new__(cls,option:FqEncoderOption, tensor_max_width:int | None, tensor_max_seq_len:int | None): ... + def __new__(cls,option,tensor_max_width = ...,tensor_max_seq_len = ...): ... def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None: ... -def encode_fq_path_to_json(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,result_path:str | os.PathLike | pathlib.Path | None) -> None: +def encode_fq_path_to_json(fq_path,k,bases,qual_offset,vectorized_target,result_path = ...) -> None: ... -def encode_fq_path_to_parquet(fq_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,vectorized_target:bool,result_path:str | os.PathLike | pathlib.Path | None) -> None: +def encode_fq_path_to_parquet(fq_path,bases,qual_offset,vectorized_target,result_path = ...) -> None: ... def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int,vectorized_target:bool) -> None: ... -def encode_fq_path_to_tensor(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,max_width:int | None,max_seq_len:int | None) -> tuple[numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], dict[str, int]]: +def encode_fq_path_to_tensor(fq_path,k,bases,qual_offset,vectorized_target,max_width = ...,max_seq_len = ...) -> tuple[numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], dict[str, int]]: ... def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int,vectorized_target:bool) -> None: ... -def encode_fq_paths_to_tensor(fq_paths:typing.Sequence[str | os.PathLike | pathlib.Path],k:int,bases:str,qual_offset:int,vectorized_target:bool,parallel_for_files:bool,max_width:int | None,max_seq_len:int | None) -> tuple[numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], dict[str, int]]: +def encode_fq_paths_to_tensor(fq_paths,k,bases,qual_offset,vectorized_target,parallel_for_files,max_width = ...,max_seq_len = ...) -> tuple[numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], numpy.typing.NDArray[numpy.int32], dict[str, int]]: ... def encode_qual(qual:str,qual_offset:int) -> list[int]: @@ -126,7 +126,7 @@ def kmers_to_seq(kmers:typing.Sequence[str]) -> str: def load_predicts_from_batch_pt(pt_path:str | os.PathLike | pathlib.Path,ignore_label:int,id_table:typing.Mapping[int, str]) -> dict[str, Predict]: ... -def load_predicts_from_batch_pts(pt_path:str | os.PathLike | pathlib.Path,ignore_label:int,id_table:typing.Mapping[int, str],max_predicts:int | None) -> dict[str, Predict]: +def load_predicts_from_batch_pts(pt_path,ignore_label,id_table,max_predicts = ...) -> dict[str, Predict]: ... def normalize_seq(seq:str,iupac:bool) -> str: @@ -152,7 +152,7 @@ def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]: def test_predicts(predicts:typing.Sequence[Predict]) -> None: ... -def write_fq(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path | None) -> None: +def write_fq(records_data,file_path = ...) -> None: ... def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None: diff --git a/py-deepbiop/deepbiop/utils.pyi b/py-deepbiop/deepbiop/utils.pyi index ef297b6..96a7427 100644 --- a/py-deepbiop/deepbiop/utils.pyi +++ b/py-deepbiop/deepbiop/utils.pyi @@ -43,7 +43,7 @@ class PslAlignment: def generate_unmaped_intervals(input:typing.Sequence[tuple[int, int]],total_length:int) -> list[tuple[int, int]]: ... -def highlight_targets(sequence:str,targets:typing.Sequence[tuple[int, int]],text_width:int | None) -> str: +def highlight_targets(sequence,targets,text_width = ...) -> str: ... def majority_voting(labels:typing.Sequence[int],window_size:int) -> list[int]: diff --git a/py-deepbiop/src/python_module.rs b/py-deepbiop/src/python_module.rs index 3198190..9101a19 100644 --- a/py-deepbiop/src/python_module.rs +++ b/py-deepbiop/src/python_module.rs @@ -5,7 +5,7 @@ use pyo3::prelude::*; // register default sub_module pub fn register_default_module(parent_module: &Bound<'_, PyModule>) -> PyResult<()> { let sub_module_name = "default"; - let child_module = PyModule::new_bound(parent_module.py(), sub_module_name)?; + let child_module = PyModule::new(parent_module.py(), sub_module_name)?; child_module.add("QUAL_OFFSET", deepbiop_fq::default::QUAL_OFFSET)?; child_module.add(