From 6323c1c98625fcfa96de86cc486e470b0ddcaad3 Mon Sep 17 00:00:00 2001 From: Phil Schaf Date: Mon, 21 Aug 2023 18:05:35 +0200 Subject: [PATCH 1/6] add failing test --- tests/test_py2rpy.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index c03109b..68d0b74 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -102,3 +102,15 @@ def test_df_error() -> None: adata.obsm['stuff'] = DataFrame(dict(a=[1, 2], b=list('ab'), c=[1.0, 2.0]), index=adata.obs_names) with pytest.raises(ValueError, match=r"DataFrame contains non-numeric columns \['b'\]"): anndata2ri.converter.py2rpy(adata) + + +def test_localconverter_scipy() -> None: + from numpy.random import default_rng + from rpy2.robjects import globalenv + from scipy.sparse import csr_matrix + + rng = default_rng(1337) + mat = csr_matrix(rng.poisson(1, size=(100, 2000))) + + with localconverter(anndata2ri.converter): + globalenv['mat'] = mat From 9af3e7c1ba28fa5aa719fdf3af69840a0b640c12 Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 22 Aug 2023 13:02:51 +0200 Subject: [PATCH 2/6] improve tests --- tests/test_py2rpy.py | 38 +++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index 68d0b74..e5284d6 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -10,6 +10,7 @@ from pandas import DataFrame from rpy2.robjects import baseenv, globalenv from rpy2.robjects.conversion import localconverter +from scipy import sparse import anndata2ri from anndata2ri._rpy2_ext import importr @@ -31,6 +32,25 @@ def mk_ad_simple() -> AnnData: ) +@pytest.mark.parametrize('dtype', [np.float32, np.float64, np.int32, np.int64]) +@pytest.mark.parametrize('mat_type', [np.asarray, sparse.csr_matrix]) +def test_py2rpy_simple( + py2r: Py2R, + dtype: np.dtype, + mat_type: Callable[[np.ndarray], np.ndarray | sparse.spmatrix], +) -> None: + data = mk_ad_simple() + if data.X is not None: + data.X = mat_type(data.X, dtype=dtype) + ex = py2r(anndata2ri, data) + assert tuple(baseenv['dim'](ex)[::-1]) == data.shape + + +def krumsiek() -> AnnData: + with pytest.warns(UserWarning, match=r'Observation names are not unique'): + return sc.datasets.krumsiek11() + + def check_empty(_: Sexp) -> None: pass @@ -45,18 +65,18 @@ def check_pca(ex: Sexp) -> None: datasets = [ pytest.param(check_empty, (0, 0), AnnData, id='empty'), pytest.param(check_pca, (2, 3), mk_ad_simple, id='simple'), - pytest.param(check_empty, (640, 11), sc.datasets.krumsiek11, id='krumsiek'), + pytest.param(check_empty, (640, 11), krumsiek, id='krumsiek'), ] @pytest.mark.parametrize(('check', 'shape', 'dataset'), datasets) -def test_py2rpy( +def test_py2rpy_datasets( py2r: Py2R, check: Callable[[Sexp], None], shape: tuple[int, ...], dataset: Callable[[], AnnData], ) -> None: - if dataset is sc.datasets.krumsiek11: + if dataset is krumsiek: with pytest.warns(UserWarning, match=r'Duplicated obs_names'): ex = py2r(anndata2ri, dataset()) else: @@ -102,15 +122,3 @@ def test_df_error() -> None: adata.obsm['stuff'] = DataFrame(dict(a=[1, 2], b=list('ab'), c=[1.0, 2.0]), index=adata.obs_names) with pytest.raises(ValueError, match=r"DataFrame contains non-numeric columns \['b'\]"): anndata2ri.converter.py2rpy(adata) - - -def test_localconverter_scipy() -> None: - from numpy.random import default_rng - from rpy2.robjects import globalenv - from scipy.sparse import csr_matrix - - rng = default_rng(1337) - mat = csr_matrix(rng.poisson(1, size=(100, 2000))) - - with localconverter(anndata2ri.converter): - globalenv['mat'] = mat From 1a0e295fc39c67c0f8d5b277a021fd2a5f8fb79d Mon Sep 17 00:00:00 2001 From: Philipp A Date: Tue, 22 Aug 2023 13:05:47 +0200 Subject: [PATCH 3/6] rename tests --- tests/test_py2rpy.py | 8 ++++---- tests/test_rpy2py.py | 6 +++--- tests/test_scipy_py2rpy.py | 2 +- tests/test_scipy_rpy2py.py | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index e5284d6..07b982e 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -34,10 +34,10 @@ def mk_ad_simple() -> AnnData: @pytest.mark.parametrize('dtype', [np.float32, np.float64, np.int32, np.int64]) @pytest.mark.parametrize('mat_type', [np.asarray, sparse.csr_matrix]) -def test_py2rpy_simple( +def test_simple( py2r: Py2R, dtype: np.dtype, - mat_type: Callable[[np.ndarray], np.ndarray | sparse.spmatrix], + mat_type: Callable[[np.ndarray, np.dtype], np.ndarray | sparse.spmatrix], ) -> None: data = mk_ad_simple() if data.X is not None: @@ -70,7 +70,7 @@ def check_pca(ex: Sexp) -> None: @pytest.mark.parametrize(('check', 'shape', 'dataset'), datasets) -def test_py2rpy_datasets( +def test_datasets( py2r: Py2R, check: Callable[[Sexp], None], shape: tuple[int, ...], @@ -85,7 +85,7 @@ def test_py2rpy_datasets( check(ex) -def test_py2rpy2_numpy_pbmc68k() -> None: +def test_numpy_pbmc68k() -> None: """Not tested above as the pbmc68k dataset has some weird metadata.""" from scanpy.datasets import pbmc68k_reduced diff --git a/tests/test_rpy2py.py b/tests/test_rpy2py.py index f809266..0c975d0 100644 --- a/tests/test_rpy2py.py +++ b/tests/test_rpy2py.py @@ -69,7 +69,7 @@ def check_example(adata: AnnData) -> None: @pytest.mark.parametrize(('check', 'shape', 'dataset'), expression_sets) -def test_convert_manual( +def test_manual( r2py: R2Py, check: Callable[[AnnData], None], shape: tuple[int, ...], @@ -81,7 +81,7 @@ def test_convert_manual( check(ad) -def test_convert_empty_df_with_rows(r2py: R2Py) -> None: +def test_empty_df_with_rows(r2py: R2Py) -> None: df = r('S4Vectors::DataFrame(a=1:10)[, -1]') assert df.slots['nrows'][0] == 10 @@ -89,7 +89,7 @@ def test_convert_empty_df_with_rows(r2py: R2Py) -> None: assert isinstance(df_py, pd.DataFrame) -def test_convert_factor(r2py: R2Py) -> None: +def test_factor(r2py: R2Py) -> None: code = """ SingleCellExperiment::SingleCellExperiment( assays = list(counts = matrix(rpois(6*4, 5), ncol=4)), diff --git a/tests/test_scipy_py2rpy.py b/tests/test_scipy_py2rpy.py index 03bf145..f76ed99 100644 --- a/tests/test_scipy_py2rpy.py +++ b/tests/test_scipy_py2rpy.py @@ -27,7 +27,7 @@ @pytest.mark.parametrize('typ', ['l', 'd']) @pytest.mark.parametrize(('shape', 'dataset', 'cls'), mats) -def test_py2rpy( +def test_mats( py2r: Py2R, typ: Literal['l', 'd'], shape: tuple[int, ...], diff --git a/tests/test_scipy_rpy2py.py b/tests/test_scipy_rpy2py.py index be846b2..c8ed647 100644 --- a/tests/test_scipy_rpy2py.py +++ b/tests/test_scipy_rpy2py.py @@ -63,7 +63,7 @@ @pytest.mark.parametrize(('shape', 'cls', 'dtype', 'arr', 'dataset'), mats) -def test_py2rpy( +def test_mats( r2py: R2Py, shape: tuple[int, int], cls: type[sparse.spmatrix], From d79beeaae4b58a5f3f2c92e63dd338b4345a372b Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 29 Aug 2024 09:48:40 +0200 Subject: [PATCH 4/6] simplify --- .github/dependabot.yml | 2 +- .github/workflows/run_tests.yml | 11 +++-------- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index bb9c784..abd2e58 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -1,6 +1,6 @@ version: 2 updates: - - package-ecosystem: "github-actions" + - package-ecosystem: github-actions directory: / schedule: interval: weekly diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml index b6f281d..cecd186 100644 --- a/.github/workflows/run_tests.yml +++ b/.github/workflows/run_tests.yml @@ -2,11 +2,8 @@ name: Unit Tests on: push: - branches: - - main + branches: [main] pull_request: - branches: - - "*" concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -47,14 +44,12 @@ jobs: packages: pandoc gfortran libblas-dev liblapack-dev libedit-dev llvm-dev libcurl4-openssl-dev ffmpeg libhdf5-dev version: 1.0 - - name: Set up Python - uses: actions/setup-python@v5 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.config.python }} cache: pip - - name: Set up R - id: setup-r + - id: setup-r uses: r-lib/actions/setup-r@v2 with: r-version: ${{ matrix.config.r }} From 7e751fa800a228c03fa8ec9883b037249005697c Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 29 Aug 2024 10:35:49 +0200 Subject: [PATCH 5/6] Fix it --- src/anndata2ri/_r2py.py | 5 +--- src/anndata2ri/_rpy2_ext.py | 9 ++++--- src/anndata2ri/scipy2ri/_py2r.py | 42 ++++++++++++++++---------------- tests/test_py2rpy.py | 8 +++--- 4 files changed, 31 insertions(+), 33 deletions(-) diff --git a/src/anndata2ri/_r2py.py b/src/anndata2ri/_r2py.py index 7de52e5..26d9314 100644 --- a/src/anndata2ri/_r2py.py +++ b/src/anndata2ri/_r2py.py @@ -11,7 +11,7 @@ from . import _conv_name from ._conv import converter, mat_rpy2py -from ._rpy2_ext import importr +from ._rpy2_ext import R_INT_BYTES, importr from .scipy2ri import supported_r_matrix_classes from .scipy2ri._r2py import rmat_to_spmat @@ -23,9 +23,6 @@ from scipy.sparse import spmatrix -R_INT_BYTES = 4 - - @converter.rpy2py.register(SexpS4) def rpy2py_s4(obj: SexpS4) -> pd.DataFrame | AnnData | None: """Convert known S4 class instance to Python object. diff --git a/src/anndata2ri/_rpy2_ext.py b/src/anndata2ri/_rpy2_ext.py index 9dfcae1..bccf65f 100644 --- a/src/anndata2ri/_rpy2_ext.py +++ b/src/anndata2ri/_rpy2_ext.py @@ -1,16 +1,19 @@ from __future__ import annotations -from functools import lru_cache +from functools import cache from rpy2.robjects import Environment, packages -@lru_cache +R_INT_BYTES = 4 + + +@cache def importr(name: str) -> packages.Package: return packages.importr(name) -@lru_cache +@cache def data(package: str, name: str | None = None) -> packages.PackageData | Environment: if name is None: return packages.data(importr(package)) diff --git a/src/anndata2ri/scipy2ri/_py2r.py b/src/anndata2ri/scipy2ri/_py2r.py index c456305..788462e 100644 --- a/src/anndata2ri/scipy2ri/_py2r.py +++ b/src/anndata2ri/scipy2ri/_py2r.py @@ -1,16 +1,16 @@ from __future__ import annotations -from functools import lru_cache, wraps +from functools import cache, wraps from importlib.resources import files from typing import TYPE_CHECKING import numpy as np from rpy2.robjects import default_converter, numpy2ri from rpy2.robjects.conversion import localconverter -from rpy2.robjects.packages import Package, SignatureTranslatedAnonymousPackage +from rpy2.robjects.packages import InstalledSTPackage, SignatureTranslatedAnonymousPackage from scipy import sparse -from anndata2ri._rpy2_ext import importr +from anndata2ri._rpy2_ext import R_INT_BYTES, importr from ._conv import converter @@ -21,21 +21,26 @@ from rpy2.rinterface import Sexp -matrix: SignatureTranslatedAnonymousPackage | None = None -base: Package | None = None +@cache +def baseenv() -> InstalledSTPackage: + return importr('base') -@lru_cache -def get_r_code() -> str: - return files('anndata2ri').joinpath('scipy2ri', '_py2r_helpers.r').read_text() +@cache +def matrixenv() -> SignatureTranslatedAnonymousPackage: + importr('Matrix') # make class available + r_code = files('anndata2ri').joinpath('scipy2ri', '_py2r_helpers.r').read_text() + return SignatureTranslatedAnonymousPackage(r_code, 'matrix') def get_type_conv(dtype: np.dtype) -> Callable[[np.ndarray], Sexp]: - global base # noqa: PLW0603 - if base is None: - base = importr('base') + base = baseenv() if np.issubdtype(dtype, np.floating): return base.as_double + if np.issubdtype(dtype, np.integer): + if dtype.itemsize <= R_INT_BYTES: + return base.as_integer + return base.as_numeric # maybe uses R_xlen_t? if np.issubdtype(dtype, np.bool_): return base.as_logical msg = f'Unknown dtype {dtype!r} cannot be converted to ?gRMatrix.' @@ -47,12 +52,7 @@ def py2r_context(f: Callable[[sparse.spmatrix], Sexp]) -> Callable[[sparse.spmat @wraps(f) def wrapper(obj: sparse.spmatrix) -> Sexp: - global matrix # noqa: PLW0603 - if matrix is None: - importr('Matrix') # make class available - r_code = get_r_code() - matrix = SignatureTranslatedAnonymousPackage(r_code, 'matrix') - + matrixenv() # make Matrix class available return f(obj) return wrapper @@ -64,7 +64,7 @@ def csc_to_rmat(csc: sparse.csc_matrix) -> Sexp: csc.sort_indices() conv_data = get_type_conv(csc.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrix.from_csc(i=csc.indices, p=csc.indptr, x=csc.data, dims=list(csc.shape), conv_data=conv_data) + return matrixenv().from_csc(i=csc.indices, p=csc.indptr, x=csc.data, dims=list(csc.shape), conv_data=conv_data) @converter.py2rpy.register(sparse.csr_matrix) @@ -73,7 +73,7 @@ def csr_to_rmat(csr: sparse.csr_matrix) -> Sexp: csr.sort_indices() conv_data = get_type_conv(csr.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrix.from_csr( + return matrixenv().from_csr( j=csr.indices, p=csr.indptr, x=csr.data, @@ -87,7 +87,7 @@ def csr_to_rmat(csr: sparse.csr_matrix) -> Sexp: def coo_to_rmat(coo: sparse.coo_matrix) -> Sexp: conv_data = get_type_conv(coo.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrix.from_coo( + return matrixenv().from_coo( i=coo.row, j=coo.col, x=coo.data, @@ -107,7 +107,7 @@ def dia_to_rmat(dia: sparse.dia_matrix) -> Sexp: ) raise ValueError(msg) with localconverter(default_converter + numpy2ri.converter): - return matrix.from_dia( + return matrixenv().from_dia( n=dia.shape[0], x=dia.data, conv_data=conv_data, diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index 4b78b0e..d6657f0 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -47,11 +47,9 @@ def test_simple( def krumsiek() -> AnnData: - with ( - pytest.warns(UserWarning, match=r'Duplicated obs_names'), - pytest.warns(UserWarning, match=r'Observation names are not unique'), - ): - return sc.datasets.krumsiek11() + adata = sc.datasets.krumsiek11() + adata.obs_names_make_unique() + return adata def check_empty(_: Sexp) -> None: From a550491157addaf70873565d2fd13e620828f181 Mon Sep 17 00:00:00 2001 From: "Philipp A." Date: Thu, 29 Aug 2024 10:49:25 +0200 Subject: [PATCH 6/6] Fix --- src/anndata2ri/scipy2ri/_py2r.py | 39 ++++++++++++-------------------- tests/test_py2rpy.py | 4 +++- 2 files changed, 18 insertions(+), 25 deletions(-) diff --git a/src/anndata2ri/scipy2ri/_py2r.py b/src/anndata2ri/scipy2ri/_py2r.py index 788462e..a8cd5bb 100644 --- a/src/anndata2ri/scipy2ri/_py2r.py +++ b/src/anndata2ri/scipy2ri/_py2r.py @@ -1,6 +1,6 @@ from __future__ import annotations -from functools import cache, wraps +from functools import cache from importlib.resources import files from typing import TYPE_CHECKING @@ -23,14 +23,16 @@ @cache def baseenv() -> InstalledSTPackage: - return importr('base') + with localconverter(default_converter): + return importr('base') @cache def matrixenv() -> SignatureTranslatedAnonymousPackage: - importr('Matrix') # make class available - r_code = files('anndata2ri').joinpath('scipy2ri', '_py2r_helpers.r').read_text() - return SignatureTranslatedAnonymousPackage(r_code, 'matrix') + with localconverter(default_converter): + importr('Matrix') # make class available + r_code = files('anndata2ri').joinpath('scipy2ri', '_py2r_helpers.r').read_text() + return SignatureTranslatedAnonymousPackage(r_code, 'matrix') def get_type_conv(dtype: np.dtype) -> Callable[[np.ndarray], Sexp]: @@ -47,33 +49,22 @@ def get_type_conv(dtype: np.dtype) -> Callable[[np.ndarray], Sexp]: raise ValueError(msg) -def py2r_context(f: Callable[[sparse.spmatrix], Sexp]) -> Callable[[sparse.spmatrix], Sexp]: - """R globalenv context with some helper functions.""" - - @wraps(f) - def wrapper(obj: sparse.spmatrix) -> Sexp: - matrixenv() # make Matrix class available - return f(obj) - - return wrapper - - @converter.py2rpy.register(sparse.csc_matrix) -@py2r_context def csc_to_rmat(csc: sparse.csc_matrix) -> Sexp: + matrix = matrixenv() csc.sort_indices() conv_data = get_type_conv(csc.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrixenv().from_csc(i=csc.indices, p=csc.indptr, x=csc.data, dims=list(csc.shape), conv_data=conv_data) + return matrix.from_csc(i=csc.indices, p=csc.indptr, x=csc.data, dims=list(csc.shape), conv_data=conv_data) @converter.py2rpy.register(sparse.csr_matrix) -@py2r_context def csr_to_rmat(csr: sparse.csr_matrix) -> Sexp: + matrix = matrixenv() csr.sort_indices() conv_data = get_type_conv(csr.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrixenv().from_csr( + return matrix.from_csr( j=csr.indices, p=csr.indptr, x=csr.data, @@ -83,11 +74,11 @@ def csr_to_rmat(csr: sparse.csr_matrix) -> Sexp: @converter.py2rpy.register(sparse.coo_matrix) -@py2r_context def coo_to_rmat(coo: sparse.coo_matrix) -> Sexp: + matrix = matrixenv() conv_data = get_type_conv(coo.dtype) with localconverter(default_converter + numpy2ri.converter): - return matrixenv().from_coo( + return matrix.from_coo( i=coo.row, j=coo.col, x=coo.data, @@ -97,8 +88,8 @@ def coo_to_rmat(coo: sparse.coo_matrix) -> Sexp: @converter.py2rpy.register(sparse.dia_matrix) -@py2r_context def dia_to_rmat(dia: sparse.dia_matrix) -> Sexp: + matrix = matrixenv() conv_data = get_type_conv(dia.dtype) if len(dia.offsets) > 1: msg = ( @@ -107,7 +98,7 @@ def dia_to_rmat(dia: sparse.dia_matrix) -> Sexp: ) raise ValueError(msg) with localconverter(default_converter + numpy2ri.converter): - return matrixenv().from_dia( + return matrix.from_dia( n=dia.shape[0], x=dia.data, conv_data=conv_data, diff --git a/tests/test_py2rpy.py b/tests/test_py2rpy.py index d6657f0..74805e5 100644 --- a/tests/test_py2rpy.py +++ b/tests/test_py2rpy.py @@ -1,7 +1,7 @@ from __future__ import annotations from typing import TYPE_CHECKING -from warnings import catch_warnings, simplefilter +from warnings import catch_warnings, filterwarnings, simplefilter import numpy as np import pytest @@ -81,6 +81,8 @@ def test_datasets( # TODO(flying-sheep): Adapt to rpy2 changes instead # https://github.com/theislab/anndata2ri/issues/109 with pytest.warns(DeprecationWarning, match=r'rpy2\.robjects\.conversion is deprecated'): + filterwarnings('ignore', r'Duplicated obs_names', UserWarning) + filterwarnings('ignore', r'Observation names are not unique', UserWarning) ex = py2r(anndata2ri, dataset()) else: ex = py2r(anndata2ri, dataset())