Skip to content

Commit

Permalink
refactor: Clean up code formatting and improve function signatures fo…
Browse files Browse the repository at this point in the history
…r consistency
  • Loading branch information
cauliyang committed Jan 16, 2025
1 parent 0ea69aa commit ee57553
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 134 deletions.
2 changes: 1 addition & 1 deletion py-deepbiop/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ clean:

build: clean
uv sync
uv tool maturin develop -r
uv tool run maturin develop -r
cargo run --bin stub_gen
ruff check --fix --unsafe-fixes
7 changes: 3 additions & 4 deletions py-deepbiop/deepbiop/bam.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ import os
import pathlib
import typing

def count_chimeric_reads_for_path(bam,threads = ...) -> int:
def count_chimeric_reads_for_path(bam, threads=...) -> int:
r"""Calculate the number of chimeric reads in a BAM file."""

def count_chimeric_reads_for_paths(bams,threads = ...) -> dict[str, int]:
def count_chimeric_reads_for_paths(bams, threads=...) -> dict[str, int]:
r"""Calculate the number of chimeric reads in multiple BAM files."""

def left_right_soft_clip(cigar_string:str) -> tuple[int, int]:
def left_right_soft_clip(cigar_string: str) -> tuple[int, int]:
r"""Calculate left and right soft clips from a cigar string."""

13 changes: 6 additions & 7 deletions py-deepbiop/deepbiop/core.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import typing

def generate_kmers(base:str,k:int) -> list[str]:
def generate_kmers(base: str, k: int) -> list[str]:
r"""
Generate all possible k-mers from a set of base characters.
Expand All @@ -20,7 +20,7 @@ def generate_kmers(base:str,k:int) -> list[str]:
A vector containing all possible k-mer combinations as strings
"""

def generate_kmers_table(base:str,k:int) -> dict[list[int], int]:
def generate_kmers_table(base: str, k: int) -> dict[list[int], int]:
r"""
Generate a lookup table mapping k-mers to unique IDs.
Expand All @@ -37,7 +37,7 @@ def generate_kmers_table(base:str,k:int) -> dict[list[int], int]:
A HashMap mapping k-mer byte sequences to integer IDs
"""

def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
def kmers_to_seq(kmers: typing.Sequence[str]) -> str:
r"""
Convert k-mers back into a DNA sequence.
Expand All @@ -53,7 +53,7 @@ def kmers_to_seq(kmers:typing.Sequence[str]) -> str:
The reconstructed DNA sequence as a `String`, wrapped in a `Result`
"""

def normalize_seq(seq:str,iupac:bool) -> str:
def normalize_seq(seq: str, iupac: bool) -> str:
r"""
Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
Expand All @@ -70,7 +70,7 @@ def normalize_seq(seq:str,iupac:bool) -> str:
A normalized DNA sequence as a `String`.
"""

def reverse_complement(seq:str) -> str:
def reverse_complement(seq: str) -> str:
r"""
Generate the reverse complement of a DNA sequence.
Expand All @@ -97,7 +97,7 @@ def reverse_complement(seq:str) -> str:
```
"""

def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]:
r"""
Convert a DNA sequence into k-mers.
Expand All @@ -114,4 +114,3 @@ def seq_to_kmers(seq:str,k:int,overlap:bool) -> list[str]:
A vector of k-mers as `String`s
"""

63 changes: 32 additions & 31 deletions py-deepbiop/deepbiop/fa.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class EncoderOption:
"""

bases: list[int]
def __new__(cls,bases): ...
def __new__(cls, bases): ...

class ParquetEncoder:
r"""
Expand All @@ -49,37 +49,38 @@ class ParquetEncoder:
```
"""

def __new__(cls,option:EncoderOption): ...
def __new__(cls, option: EncoderOption): ...

class RecordData:
id: str
seq: str
def __new__(cls,id:str, seq:str): ...
def set_id(self, id:str) -> None:
...

def set_seq(self, seq:str) -> None:
...


def convert_multiple_fas_to_one_fa(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
...

def encode_fa_path_to_parquet(fa_path,bases,result_path = ...) -> None:
...

def encode_fa_path_to_parquet_chunk(fa_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str) -> None:
...

def encode_fa_paths_to_parquet(fa_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str) -> None:
...

def select_record_from_fa(selected_reads:typing.Sequence[str],fq:str | os.PathLike | pathlib.Path,output:str | os.PathLike | pathlib.Path) -> None:
...

def write_fa(records_data,file_path = ...) -> None:
...

def write_fa_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
...

def __new__(cls, id: str, seq: str): ...
def set_id(self, id: str) -> None: ...
def set_seq(self, seq: str) -> None: ...

def convert_multiple_fas_to_one_fa(
paths: typing.Sequence[str | os.PathLike | pathlib.Path],
result_path: str | os.PathLike | pathlib.Path,
parallel: bool,
) -> None: ...
def encode_fa_path_to_parquet(fa_path, bases, result_path=...) -> None: ...
def encode_fa_path_to_parquet_chunk(
fa_path: str | os.PathLike | pathlib.Path,
chunk_size: int,
parallel: bool,
bases: str,
) -> None: ...
def encode_fa_paths_to_parquet(
fa_path: typing.Sequence[str | os.PathLike | pathlib.Path], bases: str
) -> None: ...
def select_record_from_fa(
selected_reads: typing.Sequence[str],
fq: str | os.PathLike | pathlib.Path,
output: str | os.PathLike | pathlib.Path,
) -> None: ...
def write_fa(records_data, file_path=...) -> None: ...
def write_fa_parallel(
records_data: typing.Sequence[RecordData],
file_path: str | os.PathLike | pathlib.Path,
threads: int,
) -> None: ...
128 changes: 64 additions & 64 deletions py-deepbiop/deepbiop/fq.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@ class EncoderOption:
qual_offset: int
bases: list[int]
threads: int
def __new__(cls,qual_offset,bases,threads = ...): ...
def __new__(cls, qual_offset, bases, threads=...): ...

class ParquetEncoder:
def __new__(cls,option:EncoderOption): ...
def __new__(cls, option: EncoderOption): ...

class Predict:
r"""A struct to store the prediction result."""
Expand All @@ -22,20 +22,23 @@ class Predict:
id: str
is_truncated: bool
qual: str | None
def __new__(cls,prediction,seq,id,is_truncated,qual = ...): ...
def __repr__(self) -> str:
...

def __new__(cls, prediction, seq, id, is_truncated, qual=...): ...
def __repr__(self) -> str: ...
def prediction_region(self) -> list[tuple[int, int]]:
r"""Get the prediction region."""

def smooth_prediction(self, window_size:int) -> list[tuple[int, int]]:
def smooth_prediction(self, window_size: int) -> list[tuple[int, int]]:
r"""Get the smooth prediction region."""

def smooth_label(self, window_size:int) -> list[int]:
def smooth_label(self, window_size: int) -> list[int]:
r"""Get the smooth label."""

def smooth_and_select_intervals(self, smooth_window_size:int, min_interval_size:int, append_interval_number:int) -> list[tuple[int, int]]:
def smooth_and_select_intervals(
self,
smooth_window_size: int,
min_interval_size: int,
append_interval_number: int,
) -> list[tuple[int, int]]:
r"""Smooth and select intervals."""

def seq_len(self) -> int:
Expand All @@ -44,67 +47,64 @@ class Predict:
def qual_array(self) -> list[int]:
r"""Get the quality score array."""

def show_info(self, smooth_interval,text_width = ...) -> str:
def show_info(self, smooth_interval, text_width=...) -> str:
r"""Show the information of the prediction."""

def __getstate__(self) -> typing.Any:
...

def __setstate__(self, state:typing.Any) -> None:
...

def __getstate__(self) -> typing.Any: ...
def __setstate__(self, state: typing.Any) -> None: ...

class RecordData:
id: str
seq: str
qual: str
def __new__(cls,id:str, seq:str, qual:str): ...
def set_id(self, id:str) -> None:
...

def set_seq(self, seq:str) -> None:
...

def set_qual(self, qual:str) -> None:
...


def convert_multiple_fqs_to_one_fq(paths:typing.Sequence[str | os.PathLike | pathlib.Path],result_path:str | os.PathLike | pathlib.Path,parallel:bool) -> None:
...

def encode_fq_path_to_parquet(fq_path,bases,qual_offset,result_path = ...) -> None:
...

def encode_fq_path_to_parquet_chunk(fq_path:str | os.PathLike | pathlib.Path,chunk_size:int,parallel:bool,bases:str,qual_offset:int) -> None:
...

def encode_fq_paths_to_parquet(fq_path:typing.Sequence[str | os.PathLike | pathlib.Path],bases:str,qual_offset:int) -> None:
...

def encode_qual(qual:str,qual_offset:int) -> list[int]:
def __new__(cls, id: str, seq: str, qual: str): ...
def set_id(self, id: str) -> None: ...
def set_seq(self, seq: str) -> None: ...
def set_qual(self, qual: str) -> None: ...

def convert_multiple_fqs_to_one_fq(
paths: typing.Sequence[str | os.PathLike | pathlib.Path],
result_path: str | os.PathLike | pathlib.Path,
parallel: bool,
) -> None: ...
def encode_fq_path_to_parquet(fq_path, bases, qual_offset, result_path=...) -> None: ...
def encode_fq_path_to_parquet_chunk(
fq_path: str | os.PathLike | pathlib.Path,
chunk_size: int,
parallel: bool,
bases: str,
qual_offset: int,
) -> None: ...
def encode_fq_paths_to_parquet(
fq_path: typing.Sequence[str | os.PathLike | pathlib.Path],
bases: str,
qual_offset: int,
) -> None: ...
def encode_qual(qual: str, qual_offset: int) -> list[int]:
r"""Convert ASCII quality to Phred score for Phred+33 encoding."""

def fastq_to_fasta(fastq_path:str | os.PathLike | pathlib.Path,fasta_path:str | os.PathLike | pathlib.Path) -> None:
...

def get_label_region(labels:typing.Sequence[int]) -> list[tuple[int, int]]:
...

def load_predicts_from_batch_pt(pt_path:str | os.PathLike | pathlib.Path,ignore_label:int,id_table:typing.Mapping[int, str]) -> dict[str, Predict]:
...

def load_predicts_from_batch_pts(pt_path,ignore_label,id_table,max_predicts = ...) -> dict[str, Predict]:
...

def select_record_from_fq(selected_reads:typing.Sequence[str],fq:str | os.PathLike | pathlib.Path,output:str | os.PathLike | pathlib.Path) -> None:
...

def test_predicts(predicts:typing.Sequence[Predict]) -> None:
...

def write_fq(records_data,file_path = ...) -> None:
...

def write_fq_parallel(records_data:typing.Sequence[RecordData],file_path:str | os.PathLike | pathlib.Path,threads:int) -> None:
...

def fastq_to_fasta(
fastq_path: str | os.PathLike | pathlib.Path,
fasta_path: str | os.PathLike | pathlib.Path,
) -> None: ...
def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ...
def load_predicts_from_batch_pt(
pt_path: str | os.PathLike | pathlib.Path,
ignore_label: int,
id_table: typing.Mapping[int, str],
) -> dict[str, Predict]: ...
def load_predicts_from_batch_pts(
pt_path, ignore_label, id_table, max_predicts=...
) -> dict[str, Predict]: ...
def select_record_from_fq(
selected_reads: typing.Sequence[str],
fq: str | os.PathLike | pathlib.Path,
output: str | os.PathLike | pathlib.Path,
) -> None: ...
def test_predicts(predicts: typing.Sequence[Predict]) -> None: ...
def write_fq(records_data, file_path=...) -> None: ...
def write_fq_parallel(
records_data: typing.Sequence[RecordData],
file_path: str | os.PathLike | pathlib.Path,
threads: int,
) -> None: ...
Loading

0 comments on commit ee57553

Please sign in to comment.