Skip to content

Commit

Permalink
Merge pull request #132 from epifluidlab/fix-unaccepted-types
Browse files Browse the repository at this point in the history
Fixed wps issue
  • Loading branch information
ravibandaru-lab authored Dec 20, 2024
2 parents 03ea6d3 + 30f4412 commit 90df4a4
Show file tree
Hide file tree
Showing 12 changed files with 213 additions and 131 deletions.
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ The format is based on
and this project adheres to
[Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.10.2] - 2024-12-19

### Fixed
- changed default args for `wps` would lead to errors. Now `wps` defaults to
LWPS fragment lengths (120-180nt).

### Changed
- made `finaeltoolkit.utils.typing` public. This is a module containing some
useful type aliases
- minor formatting and typing changes
- renamed `fraction_low` and `fraction_high` to `min_length` and `max_length`
for `.utils.frag_array` and `.frag.wps`. `wps` retains the deprecated arg names
but issues a warning.

## [0.10.1] - 2024-12-19

### Fixed
Expand Down
10 changes: 6 additions & 4 deletions src/finaletoolkit/cli/main_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,16 +383,18 @@ def main_cli_parser():
cli_wps.add_argument(
'-min',
'--min-length',
default=0,
default=120,
type=int,
help='Minimum length for a fragment to be included.'
help='Minimum length for a fragment to be included. Default is 120,'
' corresponding to L-WPS.'
)
cli_wps.add_argument(
'-max',
'--max-length',
default=None,
default=180,
type=int,
help='Maximum length for a fragment to be included.'
help='Maximum length for a fragment to be included. Default is 180,'
' corresponding to L-WPS.'
)
cli_wps.add_argument(
'-lo',
Expand Down
6 changes: 3 additions & 3 deletions src/finaletoolkit/frag/_cleavage_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
frag_array, chrom_sizes_to_list, _reduce_overlaps_in_file,
_convert_to_list, _merge_all_intervals, chrom_sizes_to_dict
)
from finaletoolkit.utils._typing import FragFile
from finaletoolkit.utils.typing import FragFile


def cleavage_profile(
Expand Down Expand Up @@ -130,8 +130,8 @@ def cleavage_profile(
quality_threshold=quality_threshold,
start=adj_start,
stop=adj_stop,
fraction_low=min_length,
fraction_high=max_length,
min_length=min_length,
max_length=max_length,
intersect_policy="any"
)

Expand Down
41 changes: 27 additions & 14 deletions src/finaletoolkit/frag/_multi_wps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations
import gzip
from os import PathLike
import time
from multiprocessing.pool import Pool
from typing import Union
Expand All @@ -12,6 +13,7 @@

from finaletoolkit.frag._wps import wps
from finaletoolkit.utils.utils import chrom_sizes_to_list
from ..utils.typing import FragFile, ChromSizes, Intervals


def _wps_star(args):
Expand All @@ -20,19 +22,19 @@ def _wps_star(args):


def multi_wps(
input_file: Union[pysam.AlignmentFile, str],
site_bed: str,
chrom_sizes: str=None,
output_file: Union[str, None]=None,
input_file: FragFile,
site_bed: Intervals,
chrom_sizes: ChromSizes | None = None,
output_file: str | None = None,
window_size: int=120,
interval_size: int=5000,
min_length: int=120,
max_length: int=180,
quality_threshold: int=30,
workers: int=1,
verbose: Union[bool, int]=0,
fraction_low: int=None,
fraction_high: int=None,
fraction_low: int | None = None,
fraction_high: int | None = None,
):
"""
Function that aggregates WPS over sites in BED file according to the
Expand All @@ -43,10 +45,12 @@ def multi_wps(
input_file : str or pysam.AlignmentFile
BAM, SAM, or tabix file containing paired-end fragment reads or its
path. `AlignmentFile` must be opened in read mode.
site_bed: str
site_bed: str or pathlike
BED file containing sites to perform WPS on. The intervals
in this BED file should be sorted, first by `contig` then
`start`.
`start`. The intervals over which WPS is calculated by finding the
midpoint of these sites and creating a window of `window_size` length
centered on that midpoint.
chrom_sizes: str or pathlike, optional
Tab separated file containing names and sizes of chromosomes in
`input_file`. Required if `input_file` is tabix-indexed.
Expand Down Expand Up @@ -125,15 +129,19 @@ def multi_wps(
'fraction_high and max_length cannot both be specified')

# get chrom sizes from input_file or chrom_sizes
if (input_file.endswith('.sam')
or input_file.endswith('.bam')
or input_file.endswith('.cram')):
with pysam.AlignmentFile(input_file, 'r') as bam:
if (isinstance(input_file, pysam.AlignmentFile)):
references = input_file.references
lengths = input_file.lengths
header = list(zip(references, lengths))
elif (
isinstance(input_file, (str, PathLike))
and (str(input_file).endswith('.sam')
or str(input_file).endswith('.bam')
or str(input_file).endswith('.cram'))):
with pysam.AlignmentFile(str(input_file), 'r') as bam:
references = bam.references
lengths = bam.lengths
header = list(zip(references, lengths))
elif (isinstance(input_file, pysam.AlignmentFile)):
pass
else:
if chrom_sizes is None:
raise ValueError(
Expand Down Expand Up @@ -193,6 +201,10 @@ def multi_wps(
finally:
if site_bed != '-':
bed.close()

chrom_sizes_dict = dict(header)

chrom_sizes_intervals = [chrom_sizes_dict[contig] for contig in contigs]

count = len(contigs)

Expand All @@ -204,6 +216,7 @@ def multi_wps(
contigs,
starts,
stops,
chrom_sizes_intervals,
count*[None],
count*[window_size],
count*[min_length],
Expand Down
92 changes: 64 additions & 28 deletions src/finaletoolkit/frag/_wps.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
import time
from typing import Union
from sys import stdout, stderr
import warnings

import pysam
import numpy as np
from numba import jit

from finaletoolkit.utils import frag_array
from ..utils.typing import ChromSizes

@jit(nopython=True)
def _single_wps(contig: str,
def _single_nt_wps(chrom: str,
window_start: int,
window_stop: int,
window_position: int,
Expand All @@ -31,19 +33,22 @@ def _single_wps(contig: str,
num_end_in = np.sum(is_end_in)

# calculate wps and return
return (contig, window_position, num_spanning - num_end_in)
return (chrom, window_position, num_spanning - num_end_in)


def wps(input_file: Union[str, pysam.AlignmentFile],
contig: str,
start: Union[int, str],
stop: Union[int, str],
output_file: str=None,
window_size: int=120,
fraction_low: int=120,
fraction_high: int=180,
quality_threshold: int=30,
verbose: Union[bool, int]=0
chrom: str,
start: int,
stop: int,
chrom_size: int,
output_file: str | None = None,
window_size: int = 120,
min_length: int = 120,
max_length: int = 180,
quality_threshold: int = 30,
verbose: bool | int = 0,
fraction_low: int | None = None,
fraction_high: int | None = None,
) -> np.ndarray:
"""
Return (raw) Windowed Protection Scores as specified in Snyder et al
Expand All @@ -54,22 +59,28 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
input_file : str or pysam.AlignmentFile
BAM, SAM or tabix file containing paired-end fragment reads or its
path. `AlignmentFile` must be opened in read mode.
contig : str
chrom : str
start : int
stop : int
chrom_size : int
Size of chrom
output_file : string, optional
window_size : int, optional
Size of window to calculate WPS. Default is k = 120, equivalent
to L-WPS.
fraction_low : int, optional
min_length : int, optional
Specifies lowest fragment length included in calculation.
Default is 120, equivalent to long fraction.
fraction_high : int, optional
Default is 120, equivalent to long WPS.
max_length : int, optional
Specifies highest fragment length included in calculation.
Default is 180, equivalent to long fraction.
Default is 180, equivalent to long WPS.
quality_threshold : int, optional
workers : int, optional
verbose : bool, optional
fraction_low : int, optional
Deprecated alias for `min_length`
fraction_high : int, optional
Deprecated alias for `max_length`
Returns
-------
Expand All @@ -80,25 +91,50 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
if (verbose):
start_time = time.time()
stderr.write("[finaletoolkit-wps] Reading fragments\n")
stderr.write(f'Region: {contig}:{start}-{stop}\n')
stderr.write(f'Region: {chrom}:{start}-{stop}\n')

# Pass aliases and check for conflicts
if fraction_low is not None and min_length is None:
min_length = fraction_low
warnings.warn("fraction_low is deprecated. Use min_length instead.",
category=DeprecationWarning,
stacklevel=2)
elif fraction_low is not None and min_length is not None:
warnings.warn("fraction_low is deprecated. Use min_length instead.",
category=DeprecationWarning,
stacklevel=2)
raise ValueError(
'fraction_low and min_length cannot both be specified')

if fraction_high is not None and max_length is None:
max_length = fraction_high
warnings.warn("fraction_high is deprecated. Use max_length instead.",
category=DeprecationWarning,
stacklevel=2)
elif fraction_high is not None and max_length is not None:
warnings.warn("fraction_high is deprecated. Use max_length instead.",
category=DeprecationWarning,
stacklevel=2)
raise ValueError(
'fraction_high and max_length cannot both be specified')

# set start and stop to ints
start = int(start)
stop = int(stop)

# set minimum and maximum values for fragments. These extend farther
# than needed
minimum = max(round(start - fraction_high), 0)
maximum = round(stop + fraction_high)
minimum = max(round(start - max_length), 0)
maximum = min(round(stop + max_length), chrom_size)

# read fragments from file
frag_ends = frag_array(input_file,
contig,
chrom,
quality_threshold,
start=minimum,
stop=maximum,
fraction_low=fraction_low,
fraction_high=fraction_high,
min_length=min_length,
max_length=max_length,
verbose=(verbose>=2))

if (verbose):
Expand All @@ -115,7 +151,7 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
]
)
scores['start'] = np.arange(start, stop, dtype=int)
scores['contig'] = contig
scores['contig'] = chrom
else:

window_centers = np.arange(start, stop, dtype=np.int64)
Expand All @@ -134,8 +170,8 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
]
)
for i in range(stop-start):
scores[i] = _single_wps(
contig,
scores[i] = _single_nt_wps(
chrom,
window_starts[i],
window_stops[i],
window_centers[i],
Expand All @@ -153,7 +189,7 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
with gzip.open(output_file, 'wt') as out:
# declaration line
out.write(
f'fixedStep\tchrom={contig}\tstart={start}\t'
f'fixedStep\tchrom={chrom}\tstart={start}\t'
f'step={1}\tspan={stop-start}\n'
)
for score in scores['wps']:
Expand All @@ -163,15 +199,15 @@ def wps(input_file: Union[str, pysam.AlignmentFile],
with open(output_file, 'wt') as out:
# declaration line
out.write(
f'fixedStep\tchrom={contig}\tstart={start}\tstep='
f'fixedStep\tchrom={chrom}\tstart={start}\tstep='
f'{1}\tspan={stop-start}\n'
)
for score in scores['wps']:
out.write(f'{score}\n')

elif output_file == '-': #stdout
stdout.write(
f'fixedStep\tchrom={contig}\tstart={start}\tstep='
f'fixedStep\tchrom={chrom}\tstart={start}\tstep='
f'{1}\tspan={stop-start}\n'
)
for score in scores['wps']:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
# _typing
Some useful type aliases
"""
from __future__ import annotations
Expand All @@ -10,4 +9,5 @@

# files accepted by frag_generator
FragFile = Union[str, PathLike, AlignmentFile, TabixFile]
ChromSizes = Union[str, PathLike]
Intervals = Union[str, PathLike]
Loading

0 comments on commit 90df4a4

Please sign in to comment.