refactor: Clean up code formatting and improve function signatures fo…

…r consistency
cauliyang · Jan 16, 2025 · ee57553 · ee57553
1 parent 0ea69aa
commit ee57553
Show file tree

Hide file tree

Showing 7 changed files with 152 additions and 134 deletions.
diff --git a/py-deepbiop/Makefile b/py-deepbiop/Makefile
@@ -8,6 +8,6 @@ clean:
 
 build: clean
 	uv sync
-	uv tool maturin develop -r
+	uv tool run maturin develop -r
 	cargo run --bin stub_gen
 	ruff check --fix --unsafe-fixes
diff --git a/py-deepbiop/deepbiop/bam.pyi b/py-deepbiop/deepbiop/bam.pyi
@@ -5,12 +5,11 @@ import os
 import pathlib
 import typing
 
-def count_chimeric_reads_for_path(bam,threads = ...) -> int:
+def count_chimeric_reads_for_path(bam, threads=...) -> int:
     r"""Calculate the number of chimeric reads in a BAM file."""
 
-def count_chimeric_reads_for_paths(bams,threads = ...) -> dict[str, int]:
+def count_chimeric_reads_for_paths(bams, threads=...) -> dict[str, int]:
     r"""Calculate the number of chimeric reads in multiple BAM files."""
 
-def left_right_soft_clip(cigar_string:str) -> tuple[int, int]:
+def left_right_soft_clip(cigar_string: str) -> tuple[int, int]:
     r"""Calculate left and right soft clips from a cigar string."""
-
diff --git a/py-deepbiop/deepbiop/core.pyi b/py-deepbiop/deepbiop/core.pyi
@@ -3,7 +3,7 @@
 
 import typing
 
-def generate_kmers(base:str,k:int) -> list[str]:
+def generate_kmers(base: str, k: int) -> list[str]:
     r"""
     Generate all possible k-mers from a set of base characters.
 
@@ -20,7 +20,7 @@ def generate_kmers(base:str,k:int) -> list[str]:
     A vector containing all possible k-mer combinations as strings
     """
 
-def generate_kmers_table(base:str,k:int) -> dict[list[int], int]:
+def generate_kmers_table(base: str, k: int) -> dict[list[int], int]:
     r"""
     Generate a lookup table mapping k-mers to unique IDs.
 
@@ -37,7 +37,7 @@ def generate_kmers_table(base:str,k:int) -> dict[list[int], int]:
     A HashMap mapping k-mer byte sequences to integer IDs
     """
 
-def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
+def kmers_to_seq(kmers: typing.Sequence[str]) -> str:
     r"""
     Convert k-mers back into a DNA sequence.
 
@@ -53,7 +53,7 @@ def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
     The reconstructed DNA sequence as a `String`, wrapped in a `Result`
     """
 
-def normalize_seq(seq:str,iupac:bool) -> str:
+def normalize_seq(seq: str, iupac: bool) -> str:
     r"""
     Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
 
@@ -70,7 +70,7 @@ def normalize_seq(seq:str,iupac:bool) -> str:
     A normalized DNA sequence as a `String`.
     """
 
-def reverse_complement(seq:str) -> str:
+def reverse_complement(seq: str) -> str:
     r"""
     Generate the reverse complement of a DNA sequence.
 
@@ -97,7 +97,7 @@ def reverse_complement(seq:str) -> str:
     ```
     """
 
-def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
+def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]:
     r"""
     Convert a DNA sequence into k-mers.
 
@@ -114,4 +114,3 @@ def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
 
     A vector of k-mers as `String`s
     """
-
diff --git a/py-deepbiop/deepbiop/fa.pyi b/py-deepbiop/deepbiop/fa.pyi
@@ -26,7 +26,7 @@ class EncoderOption:
     """
 
     bases: list[int]
-    def __new__(cls,bases): ...
+    def __new__(cls, bases): ...
 
 class ParquetEncoder:
     r"""
@@ -49,37 +49,38 @@ class ParquetEncoder:
     ```
     """
 
-    def __new__(cls,option:EncoderOption): ...
+    def __new__(cls, option: EncoderOption): ...
 
 class RecordData:
     id: str
     seq: str
-    def __new__(cls,id:str, seq:str): ...
-    def set_id(self, id:str) -> None:
-        ...
-
-    def set_seq(self, seq:str) -> None:
-        ...
-
-
-def convert_multiple_fas_to_one_fa(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
-    ...
-
-def encode_fa_path_to_parquet(fa_path,bases,result_path = ...) -> None:
-    ...
-
-def encode_fa_path_to_parquet_chunk(fa_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str) -> None:
-    ...
-
-def encode_fa_paths_to_parquet(fa_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str) -> None:
-    ...
-
-def select_record_from_fa(selected_reads:typing.Sequence[str],fq:str | os.PathLike | pathlib.Path,output:str | os.PathLike | pathlib.Path) -> None:
-    ...
-
-def write_fa(records_data,file_path = ...) -> None:
-    ...
-
-def write_fa_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
-    ...
-
+    def __new__(cls, id: str, seq: str): ...
+    def set_id(self, id: str) -> None: ...
+    def set_seq(self, seq: str) -> None: ...
+
+def convert_multiple_fas_to_one_fa(
+    paths: typing.Sequence[str | os.PathLike | pathlib.Path],
+    result_path: str | os.PathLike | pathlib.Path,
+    parallel: bool,
+) -> None: ...
+def encode_fa_path_to_parquet(fa_path, bases, result_path=...) -> None: ...
+def encode_fa_path_to_parquet_chunk(
+    fa_path: str | os.PathLike | pathlib.Path,
+    chunk_size: int,
+    parallel: bool,
+    bases: str,
+) -> None: ...
+def encode_fa_paths_to_parquet(
+    fa_path: typing.Sequence[str | os.PathLike | pathlib.Path], bases: str
+) -> None: ...
+def select_record_from_fa(
+    selected_reads: typing.Sequence[str],
+    fq: str | os.PathLike | pathlib.Path,
+    output: str | os.PathLike | pathlib.Path,
+) -> None: ...
+def write_fa(records_data, file_path=...) -> None: ...
+def write_fa_parallel(
+    records_data: typing.Sequence[RecordData],
+    file_path: str | os.PathLike | pathlib.Path,
+    threads: int,
+) -> None: ...
diff --git a/py-deepbiop/deepbiop/fq.pyi b/py-deepbiop/deepbiop/fq.pyi
@@ -9,10 +9,10 @@ class EncoderOption:
     qual_offset: int
     bases: list[int]
     threads: int
-    def __new__(cls,qual_offset,bases,threads = ...): ...
+    def __new__(cls, qual_offset, bases, threads=...): ...
 
 class ParquetEncoder:
-    def __new__(cls,option:EncoderOption): ...
+    def __new__(cls, option: EncoderOption): ...
 
 class Predict:
     r"""A struct to store the prediction result."""
@@ -22,20 +22,23 @@ class Predict:
     id: str
     is_truncated: bool
     qual: str | None
-    def __new__(cls,prediction,seq,id,is_truncated,qual = ...): ...
-    def __repr__(self) -> str:
-        ...
-
+    def __new__(cls, prediction, seq, id, is_truncated, qual=...): ...
+    def __repr__(self) -> str: ...
     def prediction_region(self) -> list[tuple[int, int]]:
         r"""Get the prediction region."""
 
-    def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]:
+    def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]:
         r"""Get the smooth prediction region."""
 
-    def smooth_label(self, window_size:int) -> list[int]:
+    def smooth_label(self, window_size: int) -> list[int]:
         r"""Get the smooth label."""
 
-    def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]:
+    def smooth_and_select_intervals(
+        self,
+        smooth_window_size: int,
+        min_interval_size: int,
+        append_interval_number: int,
+    ) -> list[tuple[int, int]]:
         r"""Smooth and select intervals."""
 
     def seq_len(self) -> int:
@@ -44,67 +47,64 @@ class Predict:
     def qual_array(self) -> list[int]:
         r"""Get the quality score array."""
 
-    def show_info(self, smooth_interval,text_width = ...) -> str:
+    def show_info(self, smooth_interval, text_width=...) -> str:
         r"""Show the information of the prediction."""
 
-    def __getstate__(self) -> typing.Any:
-        ...
-
-    def __setstate__(self, state:typing.Any) -> None:
-        ...
-
+    def __getstate__(self) -> typing.Any: ...
+    def __setstate__(self, state: typing.Any) -> None: ...
 
 class RecordData:
     id: str
     seq: str
     qual: str
-    def __new__(cls,id:str, seq:str, qual:str): ...
-    def set_id(self, id:str) -> None:
-        ...
-
-    def set_seq(self, seq:str) -> None:
-        ...
-
-    def set_qual(self, qual:str) -> None:
-        ...
-
-
-def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
-    ...
-
-def encode_fq_path_to_parquet(fq_path,bases,qual_offset,result_path = ...) -> None:
-    ...
-
-def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int) -> None:
-    ...
-
-def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int) -> None:
-    ...
-
-def encode_qual(qual:str,qual_offset:int) -> list[int]:
+    def __new__(cls, id: str, seq: str, qual: str): ...
+    def set_id(self, id: str) -> None: ...
+    def set_seq(self, seq: str) -> None: ...
+    def set_qual(self, qual: str) -> None: ...
+
+def convert_multiple_fqs_to_one_fq(
+    paths: typing.Sequence[str | os.PathLike | pathlib.Path],
+    result_path: str | os.PathLike | pathlib.Path,
+    parallel: bool,
+) -> None: ...
+def encode_fq_path_to_parquet(fq_path, bases, qual_offset, result_path=...) -> None: ...
+def encode_fq_path_to_parquet_chunk(
+    fq_path: str | os.PathLike | pathlib.Path,
+    chunk_size: int,
+    parallel: bool,
+    bases: str,
+    qual_offset: int,
+) -> None: ...
+def encode_fq_paths_to_parquet(
+    fq_path: typing.Sequence[str | os.PathLike | pathlib.Path],
+    bases: str,
+    qual_offset: int,
+) -> None: ...
+def encode_qual(qual: str, qual_offset: int) -> list[int]:
     r"""Convert ASCII quality to Phred score for Phred+33 encoding."""
 
-def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None:
-    ...
-
-def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]:
-    ...
-
-def load_predicts_from_batch_pt(pt_path:str | os.PathLike | pathlib.Path,ignore_label:int,id_table:typing.Mapping[int, str]) -> dict[str, Predict]:
-    ...
-
-def load_predicts_from_batch_pts(pt_path,ignore_label,id_table,max_predicts = ...) -> dict[str, Predict]:
-    ...
-
-def select_record_from_fq(selected_reads:typing.Sequence[str],fq:str | os.PathLike | pathlib.Path,output:str | os.PathLike | pathlib.Path) -> None:
-    ...
-
-def test_predicts(predicts:typing.Sequence[Predict]) -> None:
-    ...
-
-def write_fq(records_data,file_path = ...) -> None:
-    ...
-
-def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
-    ...
-
+def fastq_to_fasta(
+    fastq_path: str | os.PathLike | pathlib.Path,
+    fasta_path: str | os.PathLike | pathlib.Path,
+) -> None: ...
+def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ...
+def load_predicts_from_batch_pt(
+    pt_path: str | os.PathLike | pathlib.Path,
+    ignore_label: int,
+    id_table: typing.Mapping[int, str],
+) -> dict[str, Predict]: ...
+def load_predicts_from_batch_pts(
+    pt_path, ignore_label, id_table, max_predicts=...
+) -> dict[str, Predict]: ...
+def select_record_from_fq(
+    selected_reads: typing.Sequence[str],
+    fq: str | os.PathLike | pathlib.Path,
+    output: str | os.PathLike | pathlib.Path,
+) -> None: ...
+def test_predicts(predicts: typing.Sequence[Predict]) -> None: ...
+def write_fq(records_data, file_path=...) -> None: ...
+def write_fq_parallel(
+    records_data: typing.Sequence[RecordData],
+    file_path: str | os.PathLike | pathlib.Path,
+    threads: int,
+) -> None: ...