Skip to content

Commit

Permalink
Merge pull request #20 from cauliyang/dev
Browse files Browse the repository at this point in the history
feat: Update Python function signatures and add normalization function
  • Loading branch information
cauliyang authored Aug 20, 2024
2 parents 2bfbec9 + 3398e00 commit d8a4ede
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 8 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pyo3 = { version = "0.21.0", features = [
"extension-module",
"anyhow",
] }
pyo3-stub-gen = "0.5"
pyo3-stub-gen = "0.5.1"
thiserror = "1.0"
anyhow = "1.0"
walkdir = { version = "2.4" }
Expand Down
32 changes: 32 additions & 0 deletions crates/deepbiop-fq/src/python.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ impl encode::TensorEncoder {
}
}

#[gen_stub_pymethods]
#[pymethods]
impl encode::JsonEncoder {
#[new]
Expand All @@ -39,6 +40,7 @@ impl encode::JsonEncoder {
}
}

#[gen_stub_pymethods]
#[pymethods]
impl encode::ParquetEncoder {
#[new]
Expand Down Expand Up @@ -112,6 +114,7 @@ impl PyRecordData {
}
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn write_fq(records_data: Vec<PyRecordData>, file_path: Option<PathBuf>) -> Result<()> {
let records: Vec<encode::RecordData> = records_data
Expand All @@ -121,6 +124,7 @@ fn write_fq(records_data: Vec<PyRecordData>, file_path: Option<PathBuf>) -> Resu
io::write_fq(&records, file_path)
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn write_fq_parallel(
records_data: Vec<PyRecordData>,
Expand Down Expand Up @@ -168,6 +172,28 @@ fn generate_kmers(base: String, k: usize) -> Vec<String> {
.collect()
}

/// Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
///
/// This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes.
/// It returns a normalized DNA sequence as a `String`.
///
/// # Arguments
///
/// * `seq` - A DNA sequence as a `String`.
/// * `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes.
///
/// # Returns
///
/// A normalized DNA sequence as a `String`.
///
/// # Examples
///
/// ```
/// use deepbiop_fq as fq;
/// let seq = "acGTN".to_string();
/// let normalized_seq = fq::normalize_seq(seq, false);
/// assert_eq!(normalized_seq, "ACGTN");

Check failure on line 195 in crates/deepbiop-fq/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (stable)

cannot find function `normalize_seq` in crate `fq`

Check failure on line 195 in crates/deepbiop-fq/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (beta)

cannot find function `normalize_seq` in crate `fq`

Check failure on line 195 in crates/deepbiop-fq/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (macos)

cannot find function `normalize_seq` in crate `fq`

Check failure on line 195 in crates/deepbiop-fq/src/python.rs

View workflow job for this annotation

GitHub Actions / Testing (windows)

cannot find function `normalize_seq` in crate `fq`
/// ```
#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn normalize_seq(seq: String, iupac: bool) -> String {
Expand Down Expand Up @@ -267,6 +293,7 @@ fn encode_fq_path_to_tensor(
))
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn encode_fq_path_to_json(
fq_path: PathBuf,
Expand Down Expand Up @@ -302,6 +329,7 @@ fn encode_fq_path_to_json(
Ok(())
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn encode_fq_path_to_parquet_chunk(
fq_path: PathBuf,
Expand All @@ -325,6 +353,7 @@ fn encode_fq_path_to_parquet_chunk(
Ok(())
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn encode_fq_path_to_parquet(
fq_path: PathBuf,
Expand Down Expand Up @@ -358,6 +387,7 @@ fn encode_fq_path_to_parquet(
Ok(())
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn encode_fq_paths_to_parquet(
fq_path: Vec<PathBuf>,
Expand Down Expand Up @@ -387,6 +417,7 @@ fn get_label_region(labels: Vec<i8>) -> Vec<(usize, usize)> {
.collect()
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
fn convert_multiple_fqs_to_one_fq(
paths: Vec<PathBuf>,
Expand Down Expand Up @@ -452,6 +483,7 @@ pub fn load_predicts_from_batch_pts(
predicts::load_predicts_from_batch_pts(pt_path, ignore_label, &id_table, max_predicts)
}

#[gen_stub_pyfunction(module = "deepbiop.fq")]
#[pyfunction]
pub fn fastq_to_fasta(fastq_path: PathBuf, fasta_path: PathBuf) -> Result<()> {
let fa_records = io::fastq_to_fasta(&fastq_path)?;
Expand Down
83 changes: 80 additions & 3 deletions py-deepbiop/deepbiop/fq.pyi
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# This file is automatically generated by pyo3_stub_gen
# ruff: noqa: E501, F401

import os
import pathlib
import typing

class FqEncoderOption:
Expand All @@ -18,8 +20,11 @@ class FqEncoderOption:
threads: int | None,
): ...

class JsonEncoder: ...
class ParquetEncoder: ...
class JsonEncoder:
def __new__(cls, option: FqEncoderOption): ...

class ParquetEncoder:
def __new__(cls, option: FqEncoderOption): ...

class Predict:
r"""A struct to store the prediction result."""
Expand Down Expand Up @@ -78,12 +83,84 @@ class RecordData:
def set_seq(self, seq: str) -> None: ...
def set_qual(self, qual: str) -> None: ...

def convert_multiple_fqs_to_one_fq(
paths: typing.Sequence[str | os.PathLike | pathlib.Path],
result_path: str | os.PathLike | pathlib.Path,
parallel: bool,
) -> None: ...
def encode_fq_path_to_json(
fq_path: str | os.PathLike | pathlib.Path,
k: int,
bases: str,
qual_offset: int,
vectorized_target: bool,
result_path: str | os.PathLike | pathlib.Path | None,
) -> None: ...
def encode_fq_path_to_parquet(
fq_path: str | os.PathLike | pathlib.Path,
bases: str,
qual_offset: int,
vectorized_target: bool,
result_path: str | os.PathLike | pathlib.Path | None,
) -> None: ...
def encode_fq_path_to_parquet_chunk(
fq_path: str | os.PathLike | pathlib.Path,
chunk_size: int,
parallel: bool,
bases: str,
qual_offset: int,
vectorized_target: bool,
) -> None: ...
def encode_fq_paths_to_parquet(
fq_path: typing.Sequence[str | os.PathLike | pathlib.Path],
bases: str,
qual_offset: int,
vectorized_target: bool,
) -> None: ...
def encode_qual(qual: str, qual_offset: int) -> list[int]:
r"""Convert ASCII quality to Phred score for Phred+33 encoding."""

def fastq_to_fasta(
fastq_path: str | os.PathLike | pathlib.Path,
fasta_path: str | os.PathLike | pathlib.Path,
) -> None: ...
def generate_kmers(base: str, k: int) -> list[str]: ...
def get_label_region(labels: typing.Sequence[int]) -> list[tuple[int, int]]: ...
def kmers_to_seq(kmers: typing.Sequence[str]) -> str: ...
def normalize_seq(seq: str, iupac: bool) -> str: ...
def normalize_seq(seq: str, iupac: bool) -> str:
r"""
Normalize a DNA sequence by converting any non-standard nucleotides to standard ones.
This function takes a DNA sequence as a `String` and a boolean flag `iupac` indicating whether to normalize using IUPAC ambiguity codes.
It returns a normalized DNA sequence as a `String`.
# Arguments
* `seq` - A DNA sequence as a `String`.
* `iupac` - A boolean flag indicating whether to normalize using IUPAC ambiguity codes.
# Returns
A normalized DNA sequence as a `String`.
# Examples
```
use deepbiop_fq as fq;
let seq = "acGTN".to_string();
let normalized_seq = fq::normalize_seq(seq, false);
assert_eq!(normalized_seq, "ACGTN");
```
"""

def seq_to_kmers(seq: str, k: int, overlap: bool) -> list[str]: ...
def test_predicts(predicts: typing.Sequence[Predict]) -> None: ...
def write_fq(
records_data: typing.Sequence[RecordData],
file_path: str | os.PathLike | pathlib.Path | None,
) -> None: ...
def write_fq_parallel(
records_data: typing.Sequence[RecordData],
file_path: str | os.PathLike | pathlib.Path,
threads: int,
) -> None: ...
4 changes: 0 additions & 4 deletions py-deepbiop/docs/conf.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
"""Sphinx configuration."""

import sys
from datetime import datetime
from pathlib import Path

# sys.path.insert(0, (Path().resolve() / "../deepbiop").as_posix())

project = "deepbiop"
author = "Yangyang Li"
Expand Down

0 comments on commit d8a4ede

Please sign in to comment.