From 69e21d9737c7ac7476aca003d14335ccb9efffb1 Mon Sep 17 00:00:00 2001
From: Yangyang Li <yangyang.li@northwestern.edu>
Date: Mon, 19 Aug 2024 23:05:23 -0500
Subject: [PATCH 1/2] feat: Update Python function signatures and add
 normalization function

---
 Cargo.toml                        |   2 +-
 crates/deepbiop-fq/src/python.rs  |  32 +++++
 py-deepbiop/deepbiop/__init__.pyi |   6 +-
 py-deepbiop/deepbiop/bam.pyi      |   9 +-
 py-deepbiop/deepbiop/fq.pyi       | 197 +++++++++++++++++++++---------
 py-deepbiop/deepbiop/utils.pyi    |  47 ++++---
 6 files changed, 213 insertions(+), 80 deletions(-)
diff --git a/Cargo.toml b/Cargo.toml
index b3e89c1..bed6008 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -17,7 +17,7 @@ pyo3 = { version = "0.21.0", features = [
     "extension-module",
     "anyhow",
 ] }
-pyo3-stub-gen = "0.5"
+pyo3-stub-gen = "0.5.1"
 thiserror = "1.0"
 anyhow = "1.0"
 walkdir = { version = "2.4" }
diff --git a/crates/deepbiop-fq/src/python.rs b/crates/deepbiop-fq/src/python.rs
index 3f1cb96..f4f5095 100644
--- a/crates/deepbiop-fq/src/python.rs
+++ b/crates/deepbiop-fq/src/python.rs
@@ -31,6 +31,7 @@ impl encode::TensorEncoder {
     }
 }
 
+#[gen_stub_pymethods]
 #[pymethods]
 impl encode::JsonEncoder {
     #[new]
@@ -39,6 +40,7 @@ impl encode::JsonEncoder {
     }
 }
 
+#[gen_stub_pymethods]
 #[pymethods]
 impl encode::ParquetEncoder {
     #[new]
@@ -112,6 +114,7 @@ impl PyRecordData {
     }
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn write_fq(records_data: Vec<PyRecordData>, file_path: Option<PathBuf>) -> Result<()> {
     let records: Vec<encode::RecordData> = records_data
@@ -121,6 +124,7 @@ fn write_fq(records_data: Vec<PyRecordData>, file_path: Option<PathBuf>) -> Resu
     io::write_fq(&records, file_path)
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn write_fq_parallel(
     records_data: Vec<PyRecordData>,
@@ -168,6 +172,28 @@ fn generate_kmers(base: String, k: usize) -> Vec<String> {
         .collect()
 }
 
+/// Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
+///
+/// This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes.
+/// It returns a normalized DNA sequence as a `String`.
+///
+/// # Arguments
+///
+/// * `seq` - A DNA sequence as a `String`.
+/// * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes.
+///
+/// # Returns
+///
+/// A normalized DNA sequence as a `String`.
+///
+/// # Examples
+///
+/// ```
+/// use deepbiop_fq as fq;
+/// let seq = "acGTN".to_string();
+/// let normalized_seq = fq::normalize_seq(seq, false);
+/// assert_eq!(normalized_seq, "ACGTN");
+/// ```
 #[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn normalize_seq(seq: String, iupac: bool) -> String {
@@ -267,6 +293,7 @@ fn encode_fq_path_to_tensor(
     ))
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn encode_fq_path_to_json(
     fq_path: PathBuf,
@@ -302,6 +329,7 @@ fn encode_fq_path_to_json(
     Ok(())
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn encode_fq_path_to_parquet_chunk(
     fq_path: PathBuf,
@@ -325,6 +353,7 @@ fn encode_fq_path_to_parquet_chunk(
     Ok(())
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn encode_fq_path_to_parquet(
     fq_path: PathBuf,
@@ -358,6 +387,7 @@ fn encode_fq_path_to_parquet(
     Ok(())
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn encode_fq_paths_to_parquet(
     fq_path: Vec<PathBuf>,
@@ -387,6 +417,7 @@ fn get_label_region(labels: Vec<i8>) -> Vec<(usize, usize)> {
         .collect()
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 fn convert_multiple_fqs_to_one_fq(
     paths: Vec<PathBuf>,
@@ -452,6 +483,7 @@ pub fn load_predicts_from_batch_pts(
     predicts::load_predicts_from_batch_pts(pt_path, ignore_label, &id_table, max_predicts)
 }
 
+#[gen_stub_pyfunction(module = "deepbiop.fq")]
 #[pyfunction]
 pub fn fastq_to_fasta(fastq_path: PathBuf, fasta_path: PathBuf) -> Result<()> {
     let fa_records = io::fastq_to_fasta(&fastq_path)?;
diff --git a/py-deepbiop/deepbiop/__init__.pyi b/py-deepbiop/deepbiop/__init__.pyi
index e47b099..37cbdba 100644
--- a/py-deepbiop/deepbiop/__init__.pyi
+++ b/py-deepbiop/deepbiop/__init__.pyi
@@ -1,6 +1,8 @@
 # This file is automatically generated by pyo3_stub_gen
 # ruff: noqa: E501, F401
 
-from deepbiop import utils
+from . import utils
+
+def add(a:int,b:int) -> int:
+    ...
 
-def add(a: int, b: int) -> int: ...
diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi
index e45f449..a8455fb 100644
--- a/py-deepbiop/deepbiop/bam.pyi
+++ b/py-deepbiop/deepbiop/bam.pyi
@@ -1,5 +1,10 @@
 # This file is automatically generated by pyo3_stub_gen
 # ruff: noqa: E501, F401
 
-def left_right_soft_clip(cigar_string: str) -> tuple[int, int]:
-    r"""Calculate left and right soft clips from a cigar string."""
+
+def left_right_soft_clip(cigar_string:str) -> tuple[int, int]:
+    r"""
+    Calculate left and right soft clips from a cigar string.
+    """
+    ...
+
diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi
index 0abf63c..6a061e2 100644
--- a/py-deepbiop/deepbiop/fq.pyi
+++ b/py-deepbiop/deepbiop/fq.pyi
@@ -1,6 +1,8 @@
 # This file is automatically generated by pyo3_stub_gen
 # ruff: noqa: E501, F401
 
+import os
+import pathlib
 import typing
 
 class FqEncoderOption:
@@ -9,81 +11,162 @@ class FqEncoderOption:
     bases: list[int]
     vectorized_target: bool
     threads: int
-    def __new__(
-        cls,
-        kmer_size: int,
-        qual_offset: int,
-        bases: str,
-        vectorized_target: bool,
-        threads: int | None,
-    ): ...
-
-class JsonEncoder: ...
-class ParquetEncoder: ...
+    def __new__(cls,kmer_size:int, qual_offset:int, bases:str, vectorized_target:bool, threads:typing.Optional[int]): ...
 
-class Predict:
-    r"""A struct to store the prediction result."""
+class JsonEncoder:
+    def __new__(cls,option:FqEncoderOption): ...
+    ...
+
+class ParquetEncoder:
+    def __new__(cls,option:FqEncoderOption): ...
+    ...
 
+class Predict:
+    r"""
+    A struct to store the prediction result
+    """
     prediction: list[int]
     seq: str
     id: str
     is_truncated: bool
-    qual: str | None
-    def __new__(
-        cls,
-        prediction: typing.Sequence[int],
-        seq: str,
-        id: str,
-        is_truncated: bool,
-        qual: str | None,
-    ): ...
-    def __repr__(self) -> str: ...
+    qual: typing.Optional[str]
+    def __new__(cls,prediction:typing.Sequence[int], seq:str, id:str, is_truncated:bool, qual:typing.Optional[str]): ...
+    def __repr__(self) -> str:
+        ...
+
     def prediction_region(self) -> list[tuple[int, int]]:
-        r"""Get the prediction region."""
+        r"""
+        Get the prediction region
+        """
+        ...
 
-    def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]:
-        r"""Get the smooth prediction region."""
+    def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]:
+        r"""
+        Get the smooth prediction region
+        """
+        ...
 
-    def smooth_label(self, window_size: int) -> list[int]:
-        r"""Get the smooth label."""
+    def smooth_label(self, window_size:int) -> list[int]:
+        r"""
+        Get the smooth label
+        """
+        ...
 
-    def smooth_and_select_intervals(
-        self,
-        smooth_window_size: int,
-        min_interval_size: int,
-        append_interval_number: int,
-    ) -> list[tuple[int, int]]:
-        r"""Smooth and select intervals."""
+    def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]:
+        r"""
+        Smooth and select intervals
+        """
+        ...
 
     def seq_len(self) -> int:
-        r"""Get the sequence length."""
+        r"""
+        Get the sequence length
+        """
+        ...
 
     def qual_array(self) -> list[int]:
-        r"""Get the quality score array."""
+        r"""
+        Get the quality score array
+        """
+        ...
 
-    def show_info(
-        self, smooth_interval: typing.Sequence[tuple[int, int]], text_width: int | None
-    ) -> str:
-        r"""Show the information of the prediction."""
+    def show_info(self, smooth_interval:typing.Sequence[tuple[int, int]], text_width:typing.Optional[int]) -> str:
+        r"""
+        Show the information of the prediction
+        """
+        ...
+
+    def __getstate__(self) -> typing.Any:
+        ...
+
+    def __setstate__(self, state:typing.Any) -> None:
+        ...
 
-    def __getstate__(self) -> typing.Any: ...
-    def __setstate__(self, state: typing.Any) -> None: ...
 
 class RecordData:
     id: str
     seq: str
     qual: str
-    def __new__(cls, id: str, seq: str, qual: str): ...
-    def set_id(self, id: str) -> None: ...
-    def set_seq(self, seq: str) -> None: ...
-    def set_qual(self, qual: str) -> None: ...
-
-def encode_qual(qual: str, qual_offset: int) -> list[int]:
-    r"""Convert ASCII quality to Phred score for Phred+33 encoding."""
-
-def generate_kmers(base: str, k: int) -> list[str]: ...
-def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ...
-def kmers_to_seq(kmers: typing.Sequence[str]) -> str: ...
-def normalize_seq(seq: str, iupac: bool) -> str: ...
-def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]: ...
-def test_predicts(predicts: typing.Sequence[Predict]) -> None: ...
+    def __new__(cls,id:str, seq:str, qual:str): ...
+    def set_id(self, id:str) -> None:
+        ...
+
+    def set_seq(self, seq:str) -> None:
+        ...
+
+    def set_qual(self, qual:str) -> None:
+        ...
+
+
+def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
+    ...
+
+def encode_fq_path_to_json(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
+    ...
+
+def encode_fq_path_to_parquet(fq_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
+    ...
+
+def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int,vectorized_target:bool) -> None:
+    ...
+
+def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int,vectorized_target:bool) -> None:
+    ...
+
+def encode_qual(qual:str,qual_offset:int) -> list[int]:
+    r"""
+    Convert ASCII quality to Phred score for Phred+33 encoding
+    """
+    ...
+
+def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None:
+    ...
+
+def generate_kmers(base:str,k:int) -> list[str]:
+    ...
+
+def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]:
+    ...
+
+def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
+    ...
+
+def normalize_seq(seq:str,iupac:bool) -> str:
+    r"""
+    Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
+    
+    This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes.
+    It returns a normalized DNA sequence as a `String`.
+    
+    # Arguments
+    
+    * `seq` - A DNA sequence as a `String`.
+    * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes.
+    
+    # Returns
+    
+    A normalized DNA sequence as a `String`.
+    
+    # Examples
+    
+    ```
+    use deepbiop_fq as fq;
+    let seq = "acGTN".to_string();
+    let normalized_seq = fq::normalize_seq(seq, false);
+    assert_eq!(normalized_seq, "ACGTN");
+    ```
+    """
+    ...
+
+def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
+    ...
+
+def test_predicts(predicts:typing.Sequence[Predict]) -> None:
+    ...
+
+def write_fq(records_data:typing.Sequence[RecordData],file_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
+    ...
+
+def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
+    ...
+
diff --git a/py-deepbiop/deepbiop/utils.pyi b/py-deepbiop/deepbiop/utils.pyi
index 6ce9f22..d062947 100644
--- a/py-deepbiop/deepbiop/utils.pyi
+++ b/py-deepbiop/deepbiop/utils.pyi
@@ -8,14 +8,19 @@ class GenomicInterval:
     A segment is a genomic interval defined by a chromosome, a start position and an end position.
     The start position is inclusive and the end position is exclusive.
     """
-
     start: int
     end: int
     chr: str
-    def __new__(cls, chr: str, start: int, end: int): ...
-    def set_chr(self, chr: str) -> None: ...
-    def overlap(self, other: GenomicInterval) -> bool: ...
-    def __repr__(self) -> str: ...
+    def __new__(cls,chr:str, start:int, end:int): ...
+    def set_chr(self, chr:str) -> None:
+        ...
+
+    def overlap(self, other:GenomicInterval) -> bool:
+        ...
+
+    def __repr__(self) -> str:
+        ...
+
 
 class PslAlignment:
     qname: str
@@ -28,16 +33,22 @@ class PslAlignment:
     tstart: int
     tend: int
     identity: float
-    def __repr__(self) -> str: ...
-
-def generate_unmaped_intervals(
-    input: typing.Sequence[tuple[int, int]], total_length: int
-) -> list[tuple[int, int]]: ...
-def highlight_targets(
-    sequence: str, targets: typing.Sequence[tuple[int, int]], text_width: int | None
-) -> str: ...
-def majority_voting(labels: typing.Sequence[int], window_size: int) -> list[int]: ...
-def remove_intervals_and_keep_left(
-    seq: str, intervals: typing.Sequence[tuple[int, int]]
-) -> tuple[list[str], list[tuple[int, int]]]: ...
-def reverse_complement(seq: str) -> str: ...
+    def __repr__(self) -> str:
+        ...
+
+
+def generate_unmaped_intervals(input:typing.Sequence[tuple[int, int]],total_length:int) -> list[tuple[int, int]]:
+    ...
+
+def highlight_targets(sequence:str,targets:typing.Sequence[tuple[int, int]],text_width:typing.Optional[int]) -> str:
+    ...
+
+def majority_voting(labels:typing.Sequence[int],window_size:int) -> list[int]:
+    ...
+
+def remove_intervals_and_keep_left(seq:str,intervals:typing.Sequence[tuple[int, int]]) -> tuple[list[str], list[tuple[int, int]]]:
+    ...
+
+def reverse_complement(seq:str) -> str:
+    ...
+

From 3398e0083796f8e3d4502a5eea7dde9295406637 Mon Sep 17 00:00:00 2001
From: Yangyang Li <yangyang.li@northwestern.edu>
Date: Mon, 19 Aug 2024 23:09:48 -0500
Subject: [PATCH 2/2] refactor: Update function definitions and imports

---
 py-deepbiop/deepbiop/__init__.pyi |   6 +-
 py-deepbiop/deepbiop/bam.pyi      |   9 +-
 py-deepbiop/deepbiop/fq.pyi       | 232 +++++++++++++++---------------
 py-deepbiop/deepbiop/utils.pyi    |  47 +++---
 py-deepbiop/docs/conf.py          |   4 -
 5 files changed, 135 insertions(+), 163 deletions(-)

diff --git a/py-deepbiop/deepbiop/__init__.pyi b/py-deepbiop/deepbiop/__init__.pyi
index 37cbdba..e47b099 100644
--- a/py-deepbiop/deepbiop/__init__.pyi
+++ b/py-deepbiop/deepbiop/__init__.pyi
@@ -1,8 +1,6 @@
 # This file is automatically generated by pyo3_stub_gen
 # ruff: noqa: E501, F401
 
-from . import utils
-
-def add(a:int,b:int) -> int:
-    ...
+from deepbiop import utils
 
+def add(a: int, b: int) -> int: ...
diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi
index a8455fb..e45f449 100644
--- a/py-deepbiop/deepbiop/bam.pyi
+++ b/py-deepbiop/deepbiop/bam.pyi
@@ -1,10 +1,5 @@
 # This file is automatically generated by pyo3_stub_gen
 # ruff: noqa: E501, F401
 
-
-def left_right_soft_clip(cigar_string:str) -> tuple[int, int]:
-    r"""
-    Calculate left and right soft clips from a cigar string.
-    """
-    ...
-
+def left_right_soft_clip(cigar_string: str) -> tuple[int, int]:
+    r"""Calculate left and right soft clips from a cigar string."""
diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi
index 6a061e2..fd39870 100644
--- a/py-deepbiop/deepbiop/fq.pyi
+++ b/py-deepbiop/deepbiop/fq.pyi
@@ -11,144 +11,140 @@ class FqEncoderOption:
     bases: list[int]
     vectorized_target: bool
     threads: int
-    def __new__(cls,kmer_size:int, qual_offset:int, bases:str, vectorized_target:bool, threads:typing.Optional[int]): ...
+    def __new__(
+        cls,
+        kmer_size: int,
+        qual_offset: int,
+        bases: str,
+        vectorized_target: bool,
+        threads: int | None,
+    ): ...
 
 class JsonEncoder:
-    def __new__(cls,option:FqEncoderOption): ...
-    ...
+    def __new__(cls, option: FqEncoderOption): ...
 
 class ParquetEncoder:
-    def __new__(cls,option:FqEncoderOption): ...
-    ...
+    def __new__(cls, option: FqEncoderOption): ...
 
 class Predict:
-    r"""
-    A struct to store the prediction result
-    """
+    r"""A struct to store the prediction result."""
+
     prediction: list[int]
     seq: str
     id: str
     is_truncated: bool
-    qual: typing.Optional[str]
-    def __new__(cls,prediction:typing.Sequence[int], seq:str, id:str, is_truncated:bool, qual:typing.Optional[str]): ...
-    def __repr__(self) -> str:
-        ...
-
+    qual: str | None
+    def __new__(
+        cls,
+        prediction: typing.Sequence[int],
+        seq: str,
+        id: str,
+        is_truncated: bool,
+        qual: str | None,
+    ): ...
+    def __repr__(self) -> str: ...
     def prediction_region(self) -> list[tuple[int, int]]:
-        r"""
-        Get the prediction region
-        """
-        ...
-
-    def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]:
-        r"""
-        Get the smooth prediction region
-        """
-        ...
-
-    def smooth_label(self, window_size:int) -> list[int]:
-        r"""
-        Get the smooth label
-        """
-        ...
-
-    def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]:
-        r"""
-        Smooth and select intervals
-        """
-        ...
+        r"""Get the prediction region."""
 
-    def seq_len(self) -> int:
-        r"""
-        Get the sequence length
-        """
-        ...
+    def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]:
+        r"""Get the smooth prediction region."""
 
-    def qual_array(self) -> list[int]:
-        r"""
-        Get the quality score array
-        """
-        ...
+    def smooth_label(self, window_size: int) -> list[int]:
+        r"""Get the smooth label."""
+
+    def smooth_and_select_intervals(
+        self,
+        smooth_window_size: int,
+        min_interval_size: int,
+        append_interval_number: int,
+    ) -> list[tuple[int, int]]:
+        r"""Smooth and select intervals."""
 
-    def show_info(self, smooth_interval:typing.Sequence[tuple[int, int]], text_width:typing.Optional[int]) -> str:
-        r"""
-        Show the information of the prediction
-        """
-        ...
+    def seq_len(self) -> int:
+        r"""Get the sequence length."""
 
-    def __getstate__(self) -> typing.Any:
-        ...
+    def qual_array(self) -> list[int]:
+        r"""Get the quality score array."""
 
-    def __setstate__(self, state:typing.Any) -> None:
-        ...
+    def show_info(
+        self, smooth_interval: typing.Sequence[tuple[int, int]], text_width: int | None
+    ) -> str:
+        r"""Show the information of the prediction."""
 
+    def __getstate__(self) -> typing.Any: ...
+    def __setstate__(self, state: typing.Any) -> None: ...
 
 class RecordData:
     id: str
     seq: str
     qual: str
-    def __new__(cls,id:str, seq:str, qual:str): ...
-    def set_id(self, id:str) -> None:
-        ...
-
-    def set_seq(self, seq:str) -> None:
-        ...
-
-    def set_qual(self, qual:str) -> None:
-        ...
-
-
-def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
-    ...
-
-def encode_fq_path_to_json(fq_path:str | os.PathLike | pathlib.Path,k:int,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
-    ...
-
-def encode_fq_path_to_parquet(fq_path:str | os.PathLike | pathlib.Path,bases:str,qual_offset:int,vectorized_target:bool,result_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
-    ...
-
-def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int,vectorized_target:bool) -> None:
-    ...
-
-def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int,vectorized_target:bool) -> None:
-    ...
-
-def encode_qual(qual:str,qual_offset:int) -> list[int]:
-    r"""
-    Convert ASCII quality to Phred score for Phred+33 encoding
-    """
-    ...
-
-def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None:
-    ...
-
-def generate_kmers(base:str,k:int) -> list[str]:
-    ...
-
-def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]:
-    ...
-
-def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
-    ...
-
-def normalize_seq(seq:str,iupac:bool) -> str:
+    def __new__(cls, id: str, seq: str, qual: str): ...
+    def set_id(self, id: str) -> None: ...
+    def set_seq(self, seq: str) -> None: ...
+    def set_qual(self, qual: str) -> None: ...
+
+def convert_multiple_fqs_to_one_fq(
+    paths: typing.Sequence[str | os.PathLike | pathlib.Path],
+    result_path: str | os.PathLike | pathlib.Path,
+    parallel: bool,
+) -> None: ...
+def encode_fq_path_to_json(
+    fq_path: str | os.PathLike | pathlib.Path,
+    k: int,
+    bases: str,
+    qual_offset: int,
+    vectorized_target: bool,
+    result_path: str | os.PathLike | pathlib.Path | None,
+) -> None: ...
+def encode_fq_path_to_parquet(
+    fq_path: str | os.PathLike | pathlib.Path,
+    bases: str,
+    qual_offset: int,
+    vectorized_target: bool,
+    result_path: str | os.PathLike | pathlib.Path | None,
+) -> None: ...
+def encode_fq_path_to_parquet_chunk(
+    fq_path: str | os.PathLike | pathlib.Path,
+    chunk_size: int,
+    parallel: bool,
+    bases: str,
+    qual_offset: int,
+    vectorized_target: bool,
+) -> None: ...
+def encode_fq_paths_to_parquet(
+    fq_path: typing.Sequence[str | os.PathLike | pathlib.Path],
+    bases: str,
+    qual_offset: int,
+    vectorized_target: bool,
+) -> None: ...
+def encode_qual(qual: str, qual_offset: int) -> list[int]:
+    r"""Convert ASCII quality to Phred score for Phred+33 encoding."""
+
+def fastq_to_fasta(
+    fastq_path: str | os.PathLike | pathlib.Path,
+    fasta_path: str | os.PathLike | pathlib.Path,
+) -> None: ...
+def generate_kmers(base: str, k: int) -> list[str]: ...
+def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ...
+def kmers_to_seq(kmers: typing.Sequence[str]) -> str: ...
+def normalize_seq(seq: str, iupac: bool) -> str:
     r"""
     Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
-    
+
     This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes.
     It returns a normalized DNA sequence as a `String`.
-    
+
     # Arguments
-    
+
     * `seq` - A DNA sequence as a `String`.
     * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes.
-    
+
     # Returns
-    
+
     A normalized DNA sequence as a `String`.
-    
+
     # Examples
-    
+
     ```
     use deepbiop_fq as fq;
     let seq = "acGTN".to_string();
@@ -156,17 +152,15 @@ def normalize_seq(seq:str,iupac:bool) -> str:
     assert_eq!(normalized_seq, "ACGTN");
     ```
     """
-    ...
-
-def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
-    ...
-
-def test_predicts(predicts:typing.Sequence[Predict]) -> None:
-    ...
-
-def write_fq(records_data:typing.Sequence[RecordData],file_path:typing.Optional[str | os.PathLike | pathlib.Path]) -> None:
-    ...
-
-def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
-    ...
 
+def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]: ...
+def test_predicts(predicts: typing.Sequence[Predict]) -> None: ...
+def write_fq(
+    records_data: typing.Sequence[RecordData],
+    file_path: str | os.PathLike | pathlib.Path | None,
+) -> None: ...
+def write_fq_parallel(
+    records_data: typing.Sequence[RecordData],
+    file_path: str | os.PathLike | pathlib.Path,
+    threads: int,
+) -> None: ...
diff --git a/py-deepbiop/deepbiop/utils.pyi b/py-deepbiop/deepbiop/utils.pyi
index d062947..6ce9f22 100644
--- a/py-deepbiop/deepbiop/utils.pyi
+++ b/py-deepbiop/deepbiop/utils.pyi
@@ -8,19 +8,14 @@ class GenomicInterval:
     A segment is a genomic interval defined by a chromosome, a start position and an end position.
     The start position is inclusive and the end position is exclusive.
     """
+
     start: int
     end: int
     chr: str
-    def __new__(cls,chr:str, start:int, end:int): ...
-    def set_chr(self, chr:str) -> None:
-        ...
-
-    def overlap(self, other:GenomicInterval) -> bool:
-        ...
-
-    def __repr__(self) -> str:
-        ...
-
+    def __new__(cls, chr: str, start: int, end: int): ...
+    def set_chr(self, chr: str) -> None: ...
+    def overlap(self, other: GenomicInterval) -> bool: ...
+    def __repr__(self) -> str: ...
 
 class PslAlignment:
     qname: str
@@ -33,22 +28,16 @@ class PslAlignment:
     tstart: int
     tend: int
     identity: float
-    def __repr__(self) -> str:
-        ...
-
-
-def generate_unmaped_intervals(input:typing.Sequence[tuple[int, int]],total_length:int) -> list[tuple[int, int]]:
-    ...
-
-def highlight_targets(sequence:str,targets:typing.Sequence[tuple[int, int]],text_width:typing.Optional[int]) -> str:
-    ...
-
-def majority_voting(labels:typing.Sequence[int],window_size:int) -> list[int]:
-    ...
-
-def remove_intervals_and_keep_left(seq:str,intervals:typing.Sequence[tuple[int, int]]) -> tuple[list[str], list[tuple[int, int]]]:
-    ...
-
-def reverse_complement(seq:str) -> str:
-    ...
-
+    def __repr__(self) -> str: ...
+
+def generate_unmaped_intervals(
+    input: typing.Sequence[tuple[int, int]], total_length: int
+) -> list[tuple[int, int]]: ...
+def highlight_targets(
+    sequence: str, targets: typing.Sequence[tuple[int, int]], text_width: int | None
+) -> str: ...
+def majority_voting(labels: typing.Sequence[int], window_size: int) -> list[int]: ...
+def remove_intervals_and_keep_left(
+    seq: str, intervals: typing.Sequence[tuple[int, int]]
+) -> tuple[list[str], list[tuple[int, int]]]: ...
+def reverse_complement(seq: str) -> str: ...
diff --git a/py-deepbiop/docs/conf.py b/py-deepbiop/docs/conf.py
index 38cfc07..340aa6b 100644
--- a/py-deepbiop/docs/conf.py
+++ b/py-deepbiop/docs/conf.py
@@ -1,10 +1,6 @@
 """Sphinx configuration."""
 
-import sys
 from datetime import datetime
-from pathlib import Path
-
-# sys.path.insert(0, (Path().resolve() / "../deepbiop").as_posix())
 
 project = "deepbiop"
 author = "Yangyang Li"