From 69e21d9737c7ac7476aca003d14335ccb9efffb1 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 19 Aug 2024 23:05:23 -0500 Subject: [PATCH 1/2] feat: Update Python function signatures and add normalization function --- Cargo.toml | 2 +- crates/deepbiop-fq/src/python.rs | 32 +++++ py-deepbiop/deepbiop/__init__.pyi | 6 +- py-deepbiop/deepbiop/bam.pyi | 9 +- py-deepbiop/deepbiop/fq.pyi | 197 +++++++++++++++++++++--------- py-deepbiop/deepbiop/utils.pyi | 47 ++++--- 6 files changed, 213 insertions(+), 80 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index b3e89c1..bed6008 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ pyo3 = { version = "0.21.0", features = [ "extension-module", "anyhow", ] } -pyo3-stub-gen = "0.5" +pyo3-stub-gen = "0.5.1" thiserror = "1.0" anyhow = "1.0" walkdir = { version = "2.4" } diff --git a/crates/deepbiop-fq/src/python.rs b/crates/deepbiop-fq/src/python.rs index 3f1cb96..f4f5095 100644 --- a/crates/deepbiop-fq/src/python.rs +++ b/crates/deepbiop-fq/src/python.rs @@ -31,6 +31,7 @@ impl encode::TensorEncoder { } } +#[gen_stub_pymethods] #[pymethods] impl encode::JsonEncoder { #[new] @@ -39,6 +40,7 @@ impl encode::JsonEncoder { } } +#[gen_stub_pymethods] #[pymethods] impl encode::ParquetEncoder { #[new] @@ -112,6 +114,7 @@ impl PyRecordData { } } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn write_fq(records_data: Vec, file_path: Option) -> Result<()> { let records: Vec = records_data @@ -121,6 +124,7 @@ fn write_fq(records_data: Vec, file_path: Option) -> Resu io::write_fq(&records, file_path) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn write_fq_parallel( records_data: Vec, @@ -168,6 +172,28 @@ fn generate_kmers(base: String, k: usize) -> Vec { .collect() } +/// Normalize a DNA sequence by converting any non-standard nucleotides to standard ones. +/// +/// This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes. +/// It returns a normalized DNA sequence as a `String`. +/// +/// # Arguments +/// +/// * `seq` - A DNA sequence as a `String`. +/// * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes. +/// +/// # Returns +/// +/// A normalized DNA sequence as a `String`. +/// +/// # Examples +/// +/// ``` +/// use deepbiop_fq as fq; +/// let seq = "acGTN".to_string(); +/// let normalized_seq = fq::normalize_seq(seq, false); +/// assert_eq!(normalized_seq, "ACGTN"); +/// ``` #[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn normalize_seq(seq: String, iupac: bool) -> String { @@ -267,6 +293,7 @@ fn encode_fq_path_to_tensor( )) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn encode_fq_path_to_json( fq_path: PathBuf, @@ -302,6 +329,7 @@ fn encode_fq_path_to_json( Ok(()) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn encode_fq_path_to_parquet_chunk( fq_path: PathBuf, @@ -325,6 +353,7 @@ fn encode_fq_path_to_parquet_chunk( Ok(()) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn encode_fq_path_to_parquet( fq_path: PathBuf, @@ -358,6 +387,7 @@ fn encode_fq_path_to_parquet( Ok(()) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn encode_fq_paths_to_parquet( fq_path: Vec, @@ -387,6 +417,7 @@ fn get_label_region(labels: Vec) -> Vec<(usize, usize)> { .collect() } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] fn convert_multiple_fqs_to_one_fq( paths: Vec, @@ -452,6 +483,7 @@ pub fn load_predicts_from_batch_pts( predicts::load_predicts_from_batch_pts(pt_path, ignore_label, &id_table, max_predicts) } +#[gen_stub_pyfunction(module = "deepbiop.fq")] #[pyfunction] pub fn fastq_to_fasta(fastq_path: PathBuf, fasta_path: PathBuf) -> Result<()> { let fa_records = io::fastq_to_fasta(&fastq_path)?; diff --git a/py-deepbiop/deepbiop/__init__.pyi b/py-deepbiop/deepbiop/__init__.pyi index e47b099..37cbdba 100644 --- a/py-deepbiop/deepbiop/__init__.pyi +++ b/py-deepbiop/deepbiop/__init__.pyi @@ -1,6 +1,8 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 -from deepbiop import utils +from . import utils + +def add(a:int,b:int) -> int: + ... -def add(a: int, b: int) -> int: ... diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi index e45f449..a8455fb 100644 --- a/py-deepbiop/deepbiop/bam.pyi +++ b/py-deepbiop/deepbiop/bam.pyi @@ -1,5 +1,10 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 -def left_right_soft_clip(cigar_string: str) -> tuple[int, int]: - r"""Calculate left and right soft clips from a cigar string.""" + +def left_right_soft_clip(cigar_string:str) -> tuple[int, int]: + r""" + Calculate left and right soft clips from a cigar string. + """ + ... + diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi index 0abf63c..6a061e2 100644 --- a/py-deepbiop/deepbiop/fq.pyi +++ b/py-deepbiop/deepbiop/fq.pyi @@ -1,6 +1,8 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 +import os +import pathlib import typing class FqEncoderOption: @@ -9,81 +11,162 @@ class FqEncoderOption: bases: list[int] vectorized_target: bool threads: int - def __new__( - cls, - kmer_size: int, - qual_offset: int, - bases: str, - vectorized_target: bool, - threads: int | None, - ): ... - -class JsonEncoder: ... -class ParquetEncoder: ... + def __new__(cls,kmer_size:int, qual_offset:int, bases:str, vectorized_target:bool, threads:typing.Optional[int]): ... -class Predict: - r"""A struct to store the prediction result.""" +class JsonEncoder: + def __new__(cls,option:FqEncoderOption): ... + ... + +class ParquetEncoder: + def __new__(cls,option:FqEncoderOption): ... + ... +class Predict: + r""" + A struct to store the prediction result + """ prediction: list[int] seq: str id: str is_truncated: bool - qual: str | None - def __new__( - cls, - prediction: typing.Sequence[int], - seq: str, - id: str, - is_truncated: bool, - qual: str | None, - ): ... - def __repr__(self) -> str: ... + qual: typing.Optional[str] + def __new__(cls,prediction:typing.Sequence[int], seq:str, id:str, is_truncated:bool, qual:typing.Optional[str]): ... + def __repr__(self) -> str: + ... + def prediction_region(self) -> list[tuple[int, int]]: - r"""Get the prediction region.""" + r""" + Get the prediction region + """ + ... - def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]: - r"""Get the smooth prediction region.""" + def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]: + r""" + Get the smooth prediction region + """ + ... - def smooth_label(self, window_size: int) -> list[int]: - r"""Get the smooth label.""" + def smooth_label(self, window_size:int) -> list[int]: + r""" + Get the smooth label + """ + ... - def smooth_and_select_intervals( - self, - smooth_window_size: int, - min_interval_size: int, - append_interval_number: int, - ) -> list[tuple[int, int]]: - r"""Smooth and select intervals.""" + def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]: + r""" + Smooth and select intervals + """ + ... def seq_len(self) -> int: - r"""Get the sequence length.""" + r""" + Get the sequence length + """ + ... def qual_array(self) -> list[int]: - r"""Get the quality score array.""" + r""" + Get the quality score array + """ + ... - def show_info( - self, smooth_interval: typing.Sequence[tuple[int, int]], text_width: int | None - ) -> str: - r"""Show the information of the prediction.""" + def show_info(self, smooth_interval:typing.Sequence[tuple[int, int]], text_width:typing.Optional[int]) -> str: + r""" + Show the information of the prediction + """ + ... + + def __getstate__(self) -> typing.Any: + ... + + def __setstate__(self, state:typing.Any) -> None: + ... - def __getstate__(self) -> typing.Any: ... - def __setstate__(self, state: typing.Any) -> None: ... class RecordData: id: str seq: str qual: str - def __new__(cls, id: str, seq: str, qual: str): ... - def set_id(self, id: str) -> None: ... - def set_seq(self, seq: str) -> None: ... - def set_qual(self, qual: str) -> None: ... - -def encode_qual(qual: str, qual_offset: int) -> list[int]: - r"""Convert ASCII quality to Phred score for Phred+33 encoding.""" - -def generate_kmers(base: str, k: int) -> list[str]: ... -def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ... -def kmers_to_seq(kmers: typing.Sequence[str]) -> str: ... -def normalize_seq(seq: str, iupac: bool) -> str: ... -def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]: ... -def test_predicts(predicts: typing.Sequence[Predict]) -> None: ... + def __new__(cls,id:str, seq:str, qual:str): ... + def set_id(self, id:str) -> None: + ... + + def set_seq(self, seq:str) -> None: + ... + + def set_qual(self, qual:str) -> None: + ... + + +def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None: + ... + +def encode_fq_path_to_json(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: + ... + +def encode_fq_path_to_parquet(fq_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: + ... + +def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int,vectorized_target:bool) -> None: + ... + +def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int,vectorized_target:bool) -> None: + ... + +def encode_qual(qual:str,qual_offset:int) -> list[int]: + r""" + Convert ASCII quality to Phred score for Phred+33 encoding + """ + ... + +def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None: + ... + +def generate_kmers(base:str,k:int) -> list[str]: + ... + +def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]: + ... + +def kmers_to_seq(kmers:typing.Sequence[str]) -> str: + ... + +def normalize_seq(seq:str,iupac:bool) -> str: + r""" + Normalize a DNA sequence by converting any non-standard nucleotides to standard ones. + + This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes. + It returns a normalized DNA sequence as a `String`. + + # Arguments + + * `seq` - A DNA sequence as a `String`. + * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes. + + # Returns + + A normalized DNA sequence as a `String`. + + # Examples + + ``` + use deepbiop_fq as fq; + let seq = "acGTN".to_string(); + let normalized_seq = fq::normalize_seq(seq, false); + assert_eq!(normalized_seq, "ACGTN"); + ``` + """ + ... + +def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]: + ... + +def test_predicts(predicts:typing.Sequence[Predict]) -> None: + ... + +def write_fq(records_data:typing.Sequence[RecordData],file_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: + ... + +def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None: + ... + diff --git a/py-deepbiop/deepbiop/utils.pyi b/py-deepbiop/deepbiop/utils.pyi index 6ce9f22..d062947 100644 --- a/py-deepbiop/deepbiop/utils.pyi +++ b/py-deepbiop/deepbiop/utils.pyi @@ -8,14 +8,19 @@ class GenomicInterval: A segment is a genomic interval defined by a chromosome, a start position and an end position. The start position is inclusive and the end position is exclusive. """ - start: int end: int chr: str - def __new__(cls, chr: str, start: int, end: int): ... - def set_chr(self, chr: str) -> None: ... - def overlap(self, other: GenomicInterval) -> bool: ... - def __repr__(self) -> str: ... + def __new__(cls,chr:str, start:int, end:int): ... + def set_chr(self, chr:str) -> None: + ... + + def overlap(self, other:GenomicInterval) -> bool: + ... + + def __repr__(self) -> str: + ... + class PslAlignment: qname: str @@ -28,16 +33,22 @@ class PslAlignment: tstart: int tend: int identity: float - def __repr__(self) -> str: ... - -def generate_unmaped_intervals( - input: typing.Sequence[tuple[int, int]], total_length: int -) -> list[tuple[int, int]]: ... -def highlight_targets( - sequence: str, targets: typing.Sequence[tuple[int, int]], text_width: int | None -) -> str: ... -def majority_voting(labels: typing.Sequence[int], window_size: int) -> list[int]: ... -def remove_intervals_and_keep_left( - seq: str, intervals: typing.Sequence[tuple[int, int]] -) -> tuple[list[str], list[tuple[int, int]]]: ... -def reverse_complement(seq: str) -> str: ... + def __repr__(self) -> str: + ... + + +def generate_unmaped_intervals(input:typing.Sequence[tuple[int, int]],total_length:int) -> list[tuple[int, int]]: + ... + +def highlight_targets(sequence:str,targets:typing.Sequence[tuple[int, int]],text_width:typing.Optional[int]) -> str: + ... + +def majority_voting(labels:typing.Sequence[int],window_size:int) -> list[int]: + ... + +def remove_intervals_and_keep_left(seq:str,intervals:typing.Sequence[tuple[int, int]]) -> tuple[list[str], list[tuple[int, int]]]: + ... + +def reverse_complement(seq:str) -> str: + ... + From 3398e0083796f8e3d4502a5eea7dde9295406637 Mon Sep 17 00:00:00 2001 From: Yangyang Li Date: Mon, 19 Aug 2024 23:09:48 -0500 Subject: [PATCH 2/2] refactor: Update function definitions and imports --- py-deepbiop/deepbiop/__init__.pyi | 6 +- py-deepbiop/deepbiop/bam.pyi | 9 +- py-deepbiop/deepbiop/fq.pyi | 232 +++++++++++++++--------------- py-deepbiop/deepbiop/utils.pyi | 47 +++--- py-deepbiop/docs/conf.py | 4 - 5 files changed, 135 insertions(+), 163 deletions(-) diff --git a/py-deepbiop/deepbiop/__init__.pyi b/py-deepbiop/deepbiop/__init__.pyi index 37cbdba..e47b099 100644 --- a/py-deepbiop/deepbiop/__init__.pyi +++ b/py-deepbiop/deepbiop/__init__.pyi @@ -1,8 +1,6 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 -from . import utils - -def add(a:int,b:int) -> int: - ... +from deepbiop import utils +def add(a: int, b: int) -> int: ... diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi index a8455fb..e45f449 100644 --- a/py-deepbiop/deepbiop/bam.pyi +++ b/py-deepbiop/deepbiop/bam.pyi @@ -1,10 +1,5 @@ # This file is automatically generated by pyo3_stub_gen # ruff: noqa: E501, F401 - -def left_right_soft_clip(cigar_string:str) -> tuple[int, int]: - r""" - Calculate left and right soft clips from a cigar string. - """ - ... - +def left_right_soft_clip(cigar_string: str) -> tuple[int, int]: + r"""Calculate left and right soft clips from a cigar string.""" diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi index 6a061e2..fd39870 100644 --- a/py-deepbiop/deepbiop/fq.pyi +++ b/py-deepbiop/deepbiop/fq.pyi @@ -11,144 +11,140 @@ class FqEncoderOption: bases: list[int] vectorized_target: bool threads: int - def __new__(cls,kmer_size:int, qual_offset:int, bases:str, vectorized_target:bool, threads:typing.Optional[int]): ... + def __new__( + cls, + kmer_size: int, + qual_offset: int, + bases: str, + vectorized_target: bool, + threads: int | None, + ): ... class JsonEncoder: - def __new__(cls,option:FqEncoderOption): ... - ... + def __new__(cls, option: FqEncoderOption): ... class ParquetEncoder: - def __new__(cls,option:FqEncoderOption): ... - ... + def __new__(cls, option: FqEncoderOption): ... class Predict: - r""" - A struct to store the prediction result - """ + r"""A struct to store the prediction result.""" + prediction: list[int] seq: str id: str is_truncated: bool - qual: typing.Optional[str] - def __new__(cls,prediction:typing.Sequence[int], seq:str, id:str, is_truncated:bool, qual:typing.Optional[str]): ... - def __repr__(self) -> str: - ... - + qual: str | None + def __new__( + cls, + prediction: typing.Sequence[int], + seq: str, + id: str, + is_truncated: bool, + qual: str | None, + ): ... + def __repr__(self) -> str: ... def prediction_region(self) -> list[tuple[int, int]]: - r""" - Get the prediction region - """ - ... - - def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]: - r""" - Get the smooth prediction region - """ - ... - - def smooth_label(self, window_size:int) -> list[int]: - r""" - Get the smooth label - """ - ... - - def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]: - r""" - Smooth and select intervals - """ - ... + r"""Get the prediction region.""" - def seq_len(self) -> int: - r""" - Get the sequence length - """ - ... + def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]: + r"""Get the smooth prediction region.""" - def qual_array(self) -> list[int]: - r""" - Get the quality score array - """ - ... + def smooth_label(self, window_size: int) -> list[int]: + r"""Get the smooth label.""" + + def smooth_and_select_intervals( + self, + smooth_window_size: int, + min_interval_size: int, + append_interval_number: int, + ) -> list[tuple[int, int]]: + r"""Smooth and select intervals.""" - def show_info(self, smooth_interval:typing.Sequence[tuple[int, int]], text_width:typing.Optional[int]) -> str: - r""" - Show the information of the prediction - """ - ... + def seq_len(self) -> int: + r"""Get the sequence length.""" - def __getstate__(self) -> typing.Any: - ... + def qual_array(self) -> list[int]: + r"""Get the quality score array.""" - def __setstate__(self, state:typing.Any) -> None: - ... + def show_info( + self, smooth_interval: typing.Sequence[tuple[int, int]], text_width: int | None + ) -> str: + r"""Show the information of the prediction.""" + def __getstate__(self) -> typing.Any: ... + def __setstate__(self, state: typing.Any) -> None: ... class RecordData: id: str seq: str qual: str - def __new__(cls,id:str, seq:str, qual:str): ... - def set_id(self, id:str) -> None: - ... - - def set_seq(self, seq:str) -> None: - ... - - def set_qual(self, qual:str) -> None: - ... - - -def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None: - ... - -def encode_fq_path_to_json(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: - ... - -def encode_fq_path_to_parquet(fq_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: - ... - -def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int,vectorized_target:bool) -> None: - ... - -def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int,vectorized_target:bool) -> None: - ... - -def encode_qual(qual:str,qual_offset:int) -> list[int]: - r""" - Convert ASCII quality to Phred score for Phred+33 encoding - """ - ... - -def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None: - ... - -def generate_kmers(base:str,k:int) -> list[str]: - ... - -def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]: - ... - -def kmers_to_seq(kmers:typing.Sequence[str]) -> str: - ... - -def normalize_seq(seq:str,iupac:bool) -> str: + def __new__(cls, id: str, seq: str, qual: str): ... + def set_id(self, id: str) -> None: ... + def set_seq(self, seq: str) -> None: ... + def set_qual(self, qual: str) -> None: ... + +def convert_multiple_fqs_to_one_fq( + paths: typing.Sequence[str | os.PathLike | pathlib.Path], + result_path: str | os.PathLike | pathlib.Path, + parallel: bool, +) -> None: ... +def encode_fq_path_to_json( + fq_path: str | os.PathLike | pathlib.Path, + k: int, + bases: str, + qual_offset: int, + vectorized_target: bool, + result_path: str | os.PathLike | pathlib.Path | None, +) -> None: ... +def encode_fq_path_to_parquet( + fq_path: str | os.PathLike | pathlib.Path, + bases: str, + qual_offset: int, + vectorized_target: bool, + result_path: str | os.PathLike | pathlib.Path | None, +) -> None: ... +def encode_fq_path_to_parquet_chunk( + fq_path: str | os.PathLike | pathlib.Path, + chunk_size: int, + parallel: bool, + bases: str, + qual_offset: int, + vectorized_target: bool, +) -> None: ... +def encode_fq_paths_to_parquet( + fq_path: typing.Sequence[str | os.PathLike | pathlib.Path], + bases: str, + qual_offset: int, + vectorized_target: bool, +) -> None: ... +def encode_qual(qual: str, qual_offset: int) -> list[int]: + r"""Convert ASCII quality to Phred score for Phred+33 encoding.""" + +def fastq_to_fasta( + fastq_path: str | os.PathLike | pathlib.Path, + fasta_path: str | os.PathLike | pathlib.Path, +) -> None: ... +def generate_kmers(base: str, k: int) -> list[str]: ... +def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ... +def kmers_to_seq(kmers: typing.Sequence[str]) -> str: ... +def normalize_seq(seq: str, iupac: bool) -> str: r""" Normalize a DNA sequence by converting any non-standard nucleotides to standard ones. - + This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes. It returns a normalized DNA sequence as a `String`. - + # Arguments - + * `seq` - A DNA sequence as a `String`. * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes. - + # Returns - + A normalized DNA sequence as a `String`. - + # Examples - + ``` use deepbiop_fq as fq; let seq = "acGTN".to_string(); @@ -156,17 +152,15 @@ def normalize_seq(seq:str,iupac:bool) -> str: assert_eq!(normalized_seq, "ACGTN"); ``` """ - ... - -def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]: - ... - -def test_predicts(predicts:typing.Sequence[Predict]) -> None: - ... - -def write_fq(records_data:typing.Sequence[RecordData],file_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None: - ... - -def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None: - ... +def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]: ... +def test_predicts(predicts: typing.Sequence[Predict]) -> None: ... +def write_fq( + records_data: typing.Sequence[RecordData], + file_path: str | os.PathLike | pathlib.Path | None, +) -> None: ... +def write_fq_parallel( + records_data: typing.Sequence[RecordData], + file_path: str | os.PathLike | pathlib.Path, + threads: int, +) -> None: ... diff --git a/py-deepbiop/deepbiop/utils.pyi b/py-deepbiop/deepbiop/utils.pyi index d062947..6ce9f22 100644 --- a/py-deepbiop/deepbiop/utils.pyi +++ b/py-deepbiop/deepbiop/utils.pyi @@ -8,19 +8,14 @@ class GenomicInterval: A segment is a genomic interval defined by a chromosome, a start position and an end position. The start position is inclusive and the end position is exclusive. """ + start: int end: int chr: str - def __new__(cls,chr:str, start:int, end:int): ... - def set_chr(self, chr:str) -> None: - ... - - def overlap(self, other:GenomicInterval) -> bool: - ... - - def __repr__(self) -> str: - ... - + def __new__(cls, chr: str, start: int, end: int): ... + def set_chr(self, chr: str) -> None: ... + def overlap(self, other: GenomicInterval) -> bool: ... + def __repr__(self) -> str: ... class PslAlignment: qname: str @@ -33,22 +28,16 @@ class PslAlignment: tstart: int tend: int identity: float - def __repr__(self) -> str: - ... - - -def generate_unmaped_intervals(input:typing.Sequence[tuple[int, int]],total_length:int) -> list[tuple[int, int]]: - ... - -def highlight_targets(sequence:str,targets:typing.Sequence[tuple[int, int]],text_width:typing.Optional[int]) -> str: - ... - -def majority_voting(labels:typing.Sequence[int],window_size:int) -> list[int]: - ... - -def remove_intervals_and_keep_left(seq:str,intervals:typing.Sequence[tuple[int, int]]) -> tuple[list[str], list[tuple[int, int]]]: - ... - -def reverse_complement(seq:str) -> str: - ... - + def __repr__(self) -> str: ... + +def generate_unmaped_intervals( + input: typing.Sequence[tuple[int, int]], total_length: int +) -> list[tuple[int, int]]: ... +def highlight_targets( + sequence: str, targets: typing.Sequence[tuple[int, int]], text_width: int | None +) -> str: ... +def majority_voting(labels: typing.Sequence[int], window_size: int) -> list[int]: ... +def remove_intervals_and_keep_left( + seq: str, intervals: typing.Sequence[tuple[int, int]] +) -> tuple[list[str], list[tuple[int, int]]]: ... +def reverse_complement(seq: str) -> str: ... diff --git a/py-deepbiop/docs/conf.py b/py-deepbiop/docs/conf.py index 38cfc07..340aa6b 100644 --- a/py-deepbiop/docs/conf.py +++ b/py-deepbiop/docs/conf.py @@ -1,10 +1,6 @@ """Sphinx configuration.""" -import sys from datetime import datetime -from pathlib import Path - -# sys.path.insert(0, (Path().resolve() / "../deepbiop").as_posix()) project = "deepbiop" author = "Yangyang Li"