Skip to content

Commit

Permalink
Refine docstrings and comments for LocalAncestryObject and GlobalAnce…
Browse files Browse the repository at this point in the history
…stryObject
  • Loading branch information
miriambt committed Nov 15, 2024
1 parent c24bd46 commit 224fdd8
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 96 deletions.
124 changes: 69 additions & 55 deletions snputils/ancestry/genobj/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,25 @@ class LocalAncestryObject(AncestryObject):
"""
def __init__(
self,
haplotypes: List,
haplotypes: List[str],
lai: np.ndarray,
samples: Optional[List] = None,
ancestry_map: Optional[Dict] = None,
samples: Optional[List[str]] = None,
ancestry_map: Optional[Dict[str, str]] = None,
window_sizes: Optional[np.ndarray] = None,
centimorgan_pos: Optional[np.ndarray] = None,
chromosomes: Optional[np.ndarray] = None,
physical_pos: Optional[np.ndarray] = None
) -> None:
"""
Args:
haplotypes (list):
A list of unique haplotype identifiers with a length of `n_haplotypes`.
haplotypes (list of str of length n_haplotypes):
A list of unique haplotype identifiers.
lai (array of shape (n_windows, n_haplotypes)):
A 2D array containing local ancestry inference values, where each row represents a
genomic window, and each column corresponds to a haplotype phase for each sample.
samples (list, optional):
A list of unique sample identifiers with a length of `n_samples`.
ancestry_map (dict, optional):
samples (list of str of length n_samples, optional):
A list of unique sample identifiers.
ancestry_map (dict of str to str, optional):
A dictionary mapping ancestry codes to region names.
window_sizes (array of shape (n_windows,), optional):
An array specifying the number of SNPs in each genomic window.
Expand Down Expand Up @@ -83,32 +83,33 @@ def __setitem__(self, key, value):
setattr(self, key, value)
except AttributeError:
raise KeyError(f'Invalid key: {key}')

@property
def haplotypes(self) -> List:
def haplotypes(self) -> List[str]:
"""
Retrieve `haplotypes`.
Returns:
List: A list of unique haplotype identifiers with a length of `n_samples*2`.
**list of length n_haplotypes:** A list of unique haplotype identifiers.
"""
return self.__haplotypes

@haplotypes.setter
def haplotypes(self, x):
"""
Update `haplotypes`.
"""
self.__haplotypes = x

@property
def lai(self) -> np.ndarray:
"""
Retrieve `lai`.
Returns:
numpy.ndarray: A 2D array containing local ancestry inference values, where each row represents a
genomic window, and each column corresponds to a haplotype phase for each sample.
**array of shape (n_windows, n_haplotypes):**
A 2D array containing local ancestry inference values, where each row represents a
genomic window, and each column corresponds to a haplotype phase for each sample.
"""
return self.__lai

Expand All @@ -120,12 +121,12 @@ def lai(self, x):
self.__lai = x

@property
def samples(self) -> Optional[List]:
def samples(self) -> Optional[List[str]]:
"""
Retrieve `samples`.
Returns:
List: A list of unique sample identifiers with a length of `n_samples`.
**list of str:** A list of unique sample identifiers.
"""
return self.__samples

Expand All @@ -137,12 +138,12 @@ def samples(self, x):
self.__samples = x

@property
def ancestry_map(self) -> Optional[Dict]:
def ancestry_map(self) -> Optional[Dict[str, str]]:
"""
Retrieve `ancestry_map`.
Returns:
Dict: A dictionary mapping ancestry codes to region names.
**dict of str to str:** A dictionary mapping ancestry codes to region names.
"""
return self.__ancestry_map

Expand All @@ -159,7 +160,8 @@ def window_sizes(self) -> Optional[np.ndarray]:
Retrieve `window_sizes`.
Returns:
numpy.ndarray: An array specifying the number of SNPs in each genomic window.
**array of shape (n_windows,):**
An array specifying the number of SNPs in each genomic window.
"""
return self.__window_sizes

Expand All @@ -176,7 +178,8 @@ def centimorgan_pos(self) -> Optional[np.ndarray]:
Retrieve `centimorgan_pos`.
Returns:
numpy.ndarray: A 2D array containing the start and end centimorgan positions for each window.
**array of shape (n_windows, 2):**
A 2D array containing the start and end centimorgan positions for each window.
"""
return self.__centimorgan_pos

Expand All @@ -193,7 +196,8 @@ def chromosomes(self) -> Optional[np.ndarray]:
Retrieve `chromosomes`.
Returns:
numpy.ndarray: An array with chromosome numbers corresponding to each genomic window.
**array of shape (n_windows,):**
An array with chromosome numbers corresponding to each genomic window.
"""
return self.__chromosomes

Expand All @@ -210,7 +214,8 @@ def physical_pos(self) -> Optional[np.ndarray]:
Retrieve `physical_pos`.
Returns:
numpy.ndarray: A 2D array containing the start and end physical positions for each window.
**array of shape (n_windows, 2):**
A 2D array containing the start and end physical positions for each window.
"""
return self.__physical_pos

Expand All @@ -227,8 +232,8 @@ def n_samples(self) -> int:
Retrieve `n_samples`.
Returns:
int: The total number of samples. If `samples` is available, returns its length;
otherwise, calculates based on the number of `haplotypes` or `lai` array dimensions.
**int:**
The total number of samples.
"""
if self.__samples is not None:
return len(self.__samples)
Expand All @@ -245,7 +250,7 @@ def n_ancestries(self) -> int:
Retrieve `n_ancestries`.
Returns:
int: The total number of unique ancestries.
**int:** The total number of unique ancestries.
"""
return len(np.unique(self.__lai))

Expand All @@ -255,7 +260,7 @@ def n_haplotypes(self) -> int:
Retrieve `n_haplotypes`.
Returns:
int: The total number of haplotypes.
**int:** The total number of haplotypes.
"""
if self.__haplotypes is not None:
return len(self.__haplotypes)
Expand All @@ -268,27 +273,28 @@ def n_windows(self) -> int:
Retrieve `n_windows`.
Returns:
int: The total number of genomic windows.
**int:** The total number of genomic windows.
"""
return self.__lai.shape[0]

def copy(self) -> 'LocalAncestryObject':
"""
Create and return a copy of the current `LocalAncestryObject` instance.
Create and return a copy of `self`.
Returns:
LocalAncestryObject:
**LocalAncestryObject:**
A new instance of the current object.
"""
return copy.copy(self)

def keys(self) -> List:
def keys(self) -> List[str]:
"""
Retrieve a list of public attribute names for this `LocalAncestryObject` instance.
Retrieve a list of public attribute names for `self`.
Returns:
List: A list of attribute names, with internal name-mangling removed,
for easier reference to public attributes in the instance.
**list of str:**
A list of attribute names, with internal name-mangling removed,
for easier reference to public attributes in the instance.
"""
return [attr.replace('_LocalAncestryObject__', '').replace('_AncestryObject__', '') for attr in vars(self)]

Expand All @@ -299,16 +305,18 @@ def filter_windows(
inplace: bool = False
) -> Optional['LocalAncestryObject']:
"""
Filter genomic windows in the `LocalAncestryObject` based on their indexes.
Filter genomic windows based on specified indexes.
This method allows inclusion or exclusion of specific genomic windows from the
`LocalAncestryObject` by specifying their indexes. Negative indexes are supported
and follow NumPy's indexing conventions. It updates the `lai`, `chromosomes`,
`centimorgan_pos`, and `physical_pos` attributes accordingly.
This method updates the `lai` attribute to include or exclude the specified genomic windows.
Attributes such as `chromosomes`, `centimorgan_pos` and `physical_pos` will also be updated
accordingly if they are not None. The order of genomic windows is preserved.
Negative indexes are supported and follow
[NumPy's indexing conventions](https://numpy.org/doc/stable/user/basics.indexing.html).
Args:
indexes (int or array-like of int):
Indexes of the windows to include or exclude. Can be a single integer or a
Index(es) of the windows to include or exclude. Can be a single integer or a
sequence of integers. Negative indexes are supported.
include (bool, default=True):
If True, includes only the specified windows. If False, excludes the specified
Expand All @@ -318,9 +326,9 @@ def filter_windows(
the windows filtered. Default is False.
Returns:
Optional[LocalAncestryObject]: Returns a new `LocalAncestryObject` with the specified
windows filtered if `inplace=False`. If `inplace=True`, modifies the object in place and
returns None.
**Optional[LocalAncestryObject]:**
A new `LocalAncestryObject` with the specified windows filtered if `inplace=False`.
If `inplace=True`, modifies `self` in place and returns None.
"""
# Convert indexes to a NumPy array
indexes = np.atleast_1d(indexes)
Expand Down Expand Up @@ -371,25 +379,30 @@ def filter_windows(

def filter_samples(
self,
samples: Union[str, Sequence[str], np.ndarray, None] = None,
indexes: Union[int, Sequence[int], np.ndarray, None] = None,
samples: Optional[Union[str, Sequence[str], np.ndarray, None]] = None,
indexes: Optional[Union[int, Sequence[int], np.ndarray, None]] = None,
include: bool = True,
inplace: bool = False
) -> Optional['LocalAncestryObject']:
"""
Filter samples in the `LocalAncestryObject` based on sample names or indexes.
Filter samples based on specified names or indexes.
This method updates the `lai`, `haplotypes`, and `samples` attributes to include or exclude the specified
samples. Each sample is associated with two haplotypes, which are included or excluded together.
The order of the samples is preserved.
This method allows inclusion or exclusion of specific samples by their names,
indexes, or both. When both samples and indexes are provided, the union of
the specified samples is used. Negative indexes are supported and follow NumPy's indexing
conventions. It updates the `lai`, `samples`, and `haplotypes` attributes accordingly.
If both samples and indexes are provided, any sample matching either a name in samples or an index in
indexes will be included or excluded.
Negative indexes are supported and follow
[NumPy's indexing conventions](https://numpy.org/doc/stable/user/basics.indexing.html).
Args:
samples (str or array_like of str, optional):
Names of the samples to include or exclude. Can be a single sample name or a
Name(s) of the samples to include or exclude. Can be a single sample name or a
sequence of sample names. Default is None.
indexes (int or array_like of int, optional):
Indexes of the samples to include or exclude. Can be a single index or a sequence
Index(es) of the samples to include or exclude. Can be a single index or a sequence
of indexes. Negative indexes are supported. Default is None.
include (bool, default=True):
If True, includes only the specified samples. If False, excludes the specified
Expand All @@ -399,8 +412,9 @@ def filter_samples(
samples filtered. Default is False.
Returns:
Optional[LocalAncestryObject]: A new LocalAncestryObject with the specified samples
filtered if `inplace=False`. If inplace=True, modifies `self` in place and returns None.
**Optional[LocalAncestryObject]:**
A new `LocalAncestryObject` with the specified samples filtered if `inplace=False`.
If `inplace=True`, modifies `self` in place and returns None.
"""
if samples is None and indexes is None:
raise UserWarning("At least one of 'samples' or 'indexes' must be provided.")
Expand Down Expand Up @@ -491,7 +505,7 @@ def _sanity_check(self) -> None:

def save(self, file: Union[str, pathlib.Path]) -> None:
"""
Save the data stored in the `LocalAncestryObject` instance to a `.msp` file.
Save the data stored in `self` to a `.msp` file.
Args:
file (str or pathlib.Path):
Expand Down
Loading

0 comments on commit 224fdd8

Please sign in to comment.