Skip to content

Commit

Permalink
Merge branch 'main' into can_compile
Browse files Browse the repository at this point in the history
  • Loading branch information
ori-kron-wis authored Jan 16, 2025
2 parents 24394b6 + 958c253 commit 3473bee
Show file tree
Hide file tree
Showing 21 changed files with 90 additions and 62 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ repos:
)$
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.8.4
rev: v0.9.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ to [Semantic Versioning]. Full commit history is available in the

#### Fixed

- Fixed bug in distributed `scvi.dataloaders._concat_dataloader` {pr}`3053`.
- Fixed bug in distributed {class}`scvi.dataloaders.ConcatDataLoader` {pr}`3053`.

#### Changed

Expand All @@ -37,6 +37,8 @@ to [Semantic Versioning]. Full commit history is available in the

#### Fixed

- Fixed batch_size pop to get in {class}`scvi.dataloaders.DataSplitter` {pr}`3128`.

#### Changed

- Updated the CI workflow with internet, private and optional tests {pr}`3082`.
Expand Down
8 changes: 3 additions & 5 deletions docs/developer/maintenance.md
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,7 @@ example).

#### Update Docker images

Finally, build new Docker images with the `stable` and semantic versioning tags using the
[release image workflow].
Finally, build new Docker images with the correct branch tag using the [Docker image build].

## Continuous integration

Expand Down Expand Up @@ -289,10 +288,9 @@ We use the `BREAKING CHANGE` footer to indicate that a commit introduces a break
[Semantic Versioning]: https://semver.org/
[release checklist]: https://github.com/scverse/scvi-tools/blob/main/.github/ISSUE_TEMPLATE/release_checklist.md
[tutorials]: https://github.com/scverse/scvi-tutorials
[Docker image build]: https://github.com/YosefLab/scvi-tools-docker/actions/workflows/linux_cuda_manual.yaml
[run the tutorials]: https://github.com/scverse/scvi-tutorials/actions/workflows/run_linux_cuda_branch.yml
[Docker image build]: https://github.com/scverse/scvi-tools/actions/workflows/build_image_latest.yaml
[run the tutorials]: https://github.com/scverse/scvi-tutorials/actions/workflows/run_notebook_individual.yaml
[tutorial checklist]: https://github.com/scverse/scvi-tutorials/blob/main/.github/ISSUE_TEMPLATE/release_checklist.md
[release image workflow]: https://github.com/YosefLab/scvi-tools-docker/actions/workflows/linux_cuda_release.yaml
[release workflow]: https://github.com/scverse/scvi-tools/actions/workflows/release.yml
[PyPI]: https://pypi.org/project/scvi-tools/
[feedstock repository]: https://github.com/conda-forge/scvi-tools-feedstock
Expand Down
2 changes: 1 addition & 1 deletion docs/extensions/typed_returns.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
def _process_return(lines: Iterable[str]) -> Generator[str, None, None]:
for line in lines:
if m := re.fullmatch(r"(?P<param>\w+)\s+:\s+(?P<type>[\w.]+)", line):
yield f'-{m["param"]} (:class:`~{m["type"]}`)'
yield f"-{m['param']} (:class:`~{m['type']}`)"
else:
yield line

Expand Down
2 changes: 1 addition & 1 deletion docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pip install -U scvi-tools[dev]
## Docker

If you plan on running scvi-tools in a containerized environment, we provide various Docker
[images](https://hub.docker.com/repository/docker/scverse/scvi-tools/general) hosted on Docker Hub.
[images](https://github.com/scverse/scvi-tools/pkgs/container/scvi-tools) hosted on GHCR.

## R

Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/notebooks
7 changes: 3 additions & 4 deletions src/scvi/data/_built_in_data/_dataset_10x.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from scvi import settings
from scvi.data._download import _download
from scvi.utils import dependencies

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -74,6 +75,7 @@
}


@dependencies("scanpy")
def _load_dataset_10x(
dataset_name: str = None,
filename: str = None,
Expand All @@ -83,10 +85,7 @@ def _load_dataset_10x(
remove_extracted_data: bool = False,
**scanpy_read_10x_kwargs,
):
try:
import scanpy
except ImportError as err:
raise ImportError("Please install scanpy -- `pip install scanpy`") from err
import scanpy

# form data url and filename unless manual override
if dataset_name is not None:
Expand Down
15 changes: 14 additions & 1 deletion src/scvi/data/_built_in_data/_loom.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,23 @@
import logging
import os

import pooch
from anndata import AnnData, read_h5ad

from scvi.utils import dependencies

logger = logging.getLogger(__name__)


@dependencies("pooch")
def _load_retina(save_path: str = "data/") -> AnnData:
"""Loads retina dataset.
The dataset of bipolar cells contains after their original pipeline for filtering 27,499 cells
and 13,166 genes coming from two batches. We use the cluster annotation from 15 cell-types from
the author. We also extract their normalized data with Combat and use it for benchmarking.
"""
import pooch

save_path = os.path.abspath(save_path)
adata = read_h5ad(
pooch.retrieve(
Expand All @@ -27,11 +31,14 @@ def _load_retina(save_path: str = "data/") -> AnnData:
return adata


@dependencies("pooch")
def _load_prefrontalcortex_starmap(save_path: str = "data/") -> AnnData:
"""Loads a starMAP dataset from the mouse pre-frontal cortex :cite:p:`Wang18`.
Contains 3,704 cells and 166 genes.
"""
import pooch

save_path = os.path.abspath(save_path)
adata = read_h5ad(
pooch.retrieve(
Expand All @@ -45,7 +52,10 @@ def _load_prefrontalcortex_starmap(save_path: str = "data/") -> AnnData:
return adata


@dependencies("pooch")
def _load_frontalcortex_dropseq(save_path: str = "data/") -> AnnData:
import pooch

save_path = os.path.abspath(save_path)
adata = read_h5ad(
pooch.retrieve(
Expand All @@ -62,6 +72,7 @@ def _load_frontalcortex_dropseq(save_path: str = "data/") -> AnnData:
return adata


@dependencies("pooch")
def _load_annotation_simulation(name: str, save_path: str = "data/") -> AnnData:
"""Simulated datasets for scANVI tutorials.
Expand All @@ -75,6 +86,8 @@ def _load_annotation_simulation(name: str, save_path: str = "data/") -> AnnData:
save_path
Location for saving the dataset.
"""
import pooch

if name == "1":
fileid = "51086192"
known_hash = "5d604adce93b3034885646605c2e9a72f5ccf8163caffb2930485f93a9fcb3a3"
Expand Down
6 changes: 5 additions & 1 deletion src/scvi/data/_built_in_data/_smfish.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

import anndata
import pandas as pd
import pooch

from scvi.utils import dependencies

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -41,7 +42,10 @@
}


@dependencies("pooch")
def _load_smfish(save_path: str = "data/", use_high_level_cluster=True) -> anndata.AnnData:
import pooch

save_path = os.path.abspath(save_path)
adata = anndata.read_h5ad(
pooch.retrieve(
Expand Down
31 changes: 20 additions & 11 deletions src/scvi/data/_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,19 @@

def _download(url: str | None, save_path: str, filename: str):
"""Writes data from url to file."""
if os.path.exists(os.path.join(save_path, filename)):
logger.info(f"File {os.path.join(save_path, filename)} already downloaded")
download_link = os.path.join(save_path, filename)
if os.path.exists(download_link):
logger.info(f"File {download_link} already downloaded")
return
elif url is None:
logger.info(f"No backup URL provided for missing file {os.path.join(save_path, filename)}")
logger.info(f"No backup URL provided for missing file {download_link}")
return
req = urllib.request.Request(url, headers={"User-Agent": "Magic Browser"})
try:
r = urllib.request.urlopen(req)
if r.getheader("Content-Length") is None:
if (r.getheader("Content-Length") is None) and (
r.getheader("Content-Type") != "text/tab-separated-values"
):
raise FileNotFoundError(
f"Found file with no content at {url}. "
"This is possibly a directory rather than a file path."
Expand All @@ -29,7 +32,7 @@ def _download(url: str | None, save_path: str, filename: str):
if exc.code == "404":
raise FileNotFoundError(f"Could not find file at {url}") from exc
raise exc
logger.info(f"Downloading file at {os.path.join(save_path, filename)}")
logger.info(f"Downloading file at {download_link}")

def read_iter(file, block_size=1000):
"""Iterates through file.
Expand All @@ -48,9 +51,15 @@ def read_iter(file, block_size=1000):
os.makedirs(save_path)
block_size = 1000

filesize = int(r.getheader("Content-Length"))
filesize = np.rint(filesize / block_size)
with open(os.path.join(save_path, filename), "wb") as f:
iterator = read_iter(r, block_size=block_size)
for data in track(iterator, style="tqdm", total=filesize, description="Downloading..."):
f.write(data)
if r.getheader("Content-Length") is not None:
filesize = int(r.getheader("Content-Length"))
filesize = np.rint(filesize / block_size)
with open(download_link, "wb") as f:
iterator = read_iter(r, block_size=block_size)
for data in track(
iterator, style="tqdm", total=filesize, description="Downloading..."
):
f.write(data)
else:
urllib.request.urlretrieve(url, download_link)
print(f"File downloaded successfully and saved as {download_link}")
2 changes: 1 addition & 1 deletion src/scvi/data/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def scipy_to_torch_sparse(x: sp_sparse.csr_matrix | sp_sparse.csc_matrix) -> Ten
)
else:
raise TypeError(
"`x` must be of type `scipy.sparse.csr_matrix` or " "`scipy.sparse.csc_matrix`."
"`x` must be of type `scipy.sparse.csr_matrix` or `scipy.sparse.csc_matrix`."
)


Expand Down
21 changes: 12 additions & 9 deletions src/scvi/data/fields/_dataframe_field.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Literal

import numpy as np
import pandas as pd
import rich
from anndata import AnnData
from pandas.api.types import CategoricalDtype
Expand Down Expand Up @@ -211,15 +212,17 @@ def transfer_field(
mapping = state_registry[self.CATEGORICAL_MAPPING_KEY].copy()

# extend mapping for new categories
for c in np.unique(self._get_original_column(adata_target)):
if c not in mapping:
if extend_categories:
mapping = np.concatenate([mapping, [c]])
else:
raise ValueError(
f"Category {c} not found in source registry. "
f"Cannot transfer setup without `extend_categories = True`."
)
missing_categories = (
pd.Index(np.unique(self._get_original_column(adata_target)))
.difference(pd.Index(mapping))
.to_numpy()
)
if missing_categories.any() and not extend_categories:
raise ValueError(
f"Category {missing_categories[0]} not found in source registry. "
f"Cannot transfer setup without `extend_categories = True`."
)
mapping = np.concatenate([mapping, missing_categories])
cat_dtype = CategoricalDtype(categories=mapping, ordered=True)
new_mapping = _make_column_categorical(
getattr(adata_target, self.attr_name),
Expand Down
14 changes: 7 additions & 7 deletions src/scvi/dataloaders/_data_splitting.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def validate_data_split_with_external_indexing(
warnings.warn(
f"Last batch will have a small size of {num_of_cells} "
f"samples. Consider changing settings.batch_size or batch_size in model.train "
f"from currently {settings.batch_size} to avoid errors during model training "
f"from currently {batch_size} to avoid errors during model training "
f"or change the given external indices accordingly.",
UserWarning,
stacklevel=settings.warnings_stacklevel,
Expand Down Expand Up @@ -251,15 +251,15 @@ def __init__(
self.n_train, self.n_val = validate_data_split_with_external_indexing(
self.adata_manager.adata.n_obs,
self.external_indexing,
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
)
else:
self.n_train, self.n_val = validate_data_split(
self.adata_manager.adata.n_obs,
self.train_size,
self.validation_size,
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
self.train_size_is_none,
)
Expand Down Expand Up @@ -434,15 +434,15 @@ def setup(self, stage: str | None = None):
n_labeled_train, n_labeled_val = validate_data_split_with_external_indexing(
n_labeled_idx,
[labeled_idx_train, labeled_idx_val, labeled_idx_test],
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
)
else:
n_labeled_train, n_labeled_val = validate_data_split(
n_labeled_idx,
self.train_size,
self.validation_size,
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
self.train_size_is_none,
)
Expand Down Expand Up @@ -475,15 +475,15 @@ def setup(self, stage: str | None = None):
n_unlabeled_train, n_unlabeled_val = validate_data_split_with_external_indexing(
n_unlabeled_idx,
[unlabeled_idx_train, unlabeled_idx_val, unlabeled_idx_test],
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
)
else:
n_unlabeled_train, n_unlabeled_val = validate_data_split(
n_unlabeled_idx,
self.train_size,
self.validation_size,
self.data_loader_kwargs.pop("batch_size", settings.batch_size),
self.data_loader_kwargs.get("batch_size", settings.batch_size),
self.drop_last,
self.train_size_is_none,
)
Expand Down
6 changes: 3 additions & 3 deletions src/scvi/external/cellassign/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def __init__(
except KeyError as err:
raise KeyError("Anndata and cell type markers do not contain the same genes.") from err

assert (
not cell_type_markers.index.has_duplicates
), "There are duplicates in cell type markers (rows in cell_type_markers)"
assert not cell_type_markers.index.has_duplicates, (
"There are duplicates in cell type markers (rows in cell_type_markers)"
)

super().__init__(adata)

Expand Down
6 changes: 3 additions & 3 deletions src/scvi/external/contrastivevi/_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,9 +745,9 @@ def differential_expression(
if target_idx is not None:
target_idx = np.array(target_idx)
if target_idx.dtype is np.dtype("bool"):
assert (
len(target_idx) == adata.n_obs
), "target_idx mask must be the same length as adata!"
assert len(target_idx) == adata.n_obs, (
"target_idx mask must be the same length as adata!"
)
target_idx = np.arange(adata.n_obs)[target_idx]
model_fn = partial(
self.get_specific_normalized_expression,
Expand Down
6 changes: 3 additions & 3 deletions src/scvi/model/_multivi.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,9 +169,9 @@ def __init__(
super().__init__(adata)

if n_genes is None or n_regions is None:
assert isinstance(
adata, MuData
), "n_genes and n_regions must be provided if using AnnData"
assert isinstance(adata, MuData), (
"n_genes and n_regions must be provided if using AnnData"
)
n_genes = self.summary_stats.get("n_vars", 0)
n_regions = self.summary_stats.get("n_atac", 0)

Expand Down
3 changes: 1 addition & 2 deletions src/scvi/model/base/_archesmixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,7 @@ def load_query_data(
version_split = adata_manager.registry[_constants._SCVI_VERSION_KEY].split(".")
if int(version_split[1]) < 8 and int(version_split[0]) == 0:
warnings.warn(
"Query integration should be performed using models trained with "
"version >= 0.8",
"Query integration should be performed using models trained with version >= 0.8",
UserWarning,
stacklevel=settings.warnings_stacklevel,
)
Expand Down
Loading

0 comments on commit 3473bee

Please sign in to comment.