nf-core · lea-lenhardtackovic · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024 · Feb 6, 2025
diff --git a/modules/nf-core/celda/decontx/environment.yml b/modules/nf-core/celda/decontx/environment.yml
@@ -0,0 +1,8 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::anndata2ri=1.3.1
+  - bioconda::bioconductor-celda=1.18.1
+  - conda-forge::anndata=0.10.7
+  - conda-forge::numpy=1.26.4
diff --git a/modules/nf-core/celda/decontx/main.nf b/modules/nf-core/celda/decontx/main.nf
@@ -0,0 +1,40 @@
+process CELDA_DECONTX {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata2ri_bioconductor-celda_anndata_numpy:2aed5fa978c663d9':
+        'community.wave.seqera.io/library/anndata2ri_bioconductor-celda_anndata_numpy:63af229ac9152259' }"
+
+    input:
+    tuple val(meta), path(h5ad), path(raw)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    batch_col = task.ext.batch_col ?: "batch"
+    template 'decontx.py'
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    batch_col = task.ext.batch_col ?: "batch"
+    """
+    touch ${prefix}.h5ad
+
+    cat <<-END_VERSIONS > versions.yml
+    ${task.process}:
+        python: \$(python3 -c "import platform; print(platform.python_version())")
+        anndata: \$(python3 -c "import anndata as ad; print(ad.__version__)")
+        anndata2ri: \$(python3 -c "import anndata2ri; print(anndata2ri.__version__)")
+        rpy2: \$(python3 -c "import rpy2; print(rpy2.__version__)")
+        celda: \$(python3 -c "import anndata2ri; import rpy2; import rpy2.robjects as ro; celda = ro.packages.importr('celda'); print(celda.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/celda/decontx/meta.yml b/modules/nf-core/celda/decontx/meta.yml
@@ -0,0 +1,51 @@
+name: celda_decontx
+description: DecontX is a novel Bayesian method to computationally estimate and remove RNA contamination in individual cells without empty droplet information.
+keywords:
+  - single-cell
+  - decontamination
+  - transcriptomics
+tools:
+  - "decontx":
+      description: "Decontamination of single cell genomics data"
+      homepage: "https://bioconductor.org/packages/release/bioc/html/decontX.html"
+      documentation: "https://bioconductor.org/packages/release/bioc/manuals/decontX/man/decontX.pdf"
+      tool_dev_url: "https://git.bioconductor.org/packages/decontX"
+      doi: "10.18129/B9.bioc.decontX"
+      licence: ["MIT"]
+
+input:
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. `[ id:'sample1' ]`
+    - h5ad:
+        type: file
+        description: Filtered H5AD file
+        pattern: "*.h5ad"
+    - raw:
+        type: file
+        description: Unfiltered H5AD file
+        pattern: "*.h5ad"
+
+output:
+  - h5ad:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. `[ id:'sample1' ]`
+      - "*.h5ad":
+          type: file
+          description: Decontaminated H5AD object
+          pattern: "*.h5ad"
+  - versions:
+      - "versions.yml":
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
+
+authors:
+  - "@lea-lenhardtackovic"
+maintainers:
+  - "@lea-lenhardtackovic"
diff --git a/modules/nf-core/celda/decontx/templates/decontx.py b/modules/nf-core/celda/decontx/templates/decontx.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+import os
+import platform
+
+import anndata as ad
+import anndata2ri
+import rpy2
+import rpy2.robjects as ro
+
+celda = ro.packages.importr("celda")
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+
+adata = ad.read_h5ad("${h5ad}")
+
+if "counts" not in adata.layers:
+    adata.layers["counts"] = adata.X
+
+sce = anndata2ri.py2rpy(adata)
+
+kwargs = {}
+
+if len(adata.obs["${batch_col}"].unique()) > 1:
+    kwargs["batch"] = adata.obs["${batch_col}"].tolist()
+
+raw_path = "${raw}"
+if os.path.exists(raw_path):
+    raw = ad.read_h5ad(raw_path)
+    if "counts" not in raw.layers:
+        raw.layers["counts"] = raw.X
+    kwargs["background"] = anndata2ri.py2rpy(raw)
+
+corrected = celda.decontX(sce, **kwargs)
+counts = celda.decontXcounts(corrected)
+
+adata.layers["ambient"] = anndata2ri.rpy2py(counts).T
+del adata.layers["counts"]
+
+adata.write_h5ad("${prefix}.h5ad")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "anndata2ri": anndata2ri.__version__,
+        "rpy2": rpy2.__version__,
+        "celda": celda.__version__,
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/modules/nf-core/celda/decontx/tests/main.nf.test b/modules/nf-core/celda/decontx/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+    name "Test Process CELDA_DECONTX"
+    script "../main.nf"
+    config "./nextflow.config"
+    process "CELDA_DECONTX"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "celda"
+    tag "celda/decontx"
+
+
+    test("h5ad input") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad', checkIfExists: true),
+                    file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix.h5ad', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("h5ad input - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_custom_emptydrops_filter_matrix.h5ad', checkIfExists: true),
+                    file('https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix.h5ad', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/celda/decontx/tests/main.nf.test.snap b/modules/nf-core/celda/decontx/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+    "h5ad input": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,68306f1a7eda214b37439f929460ca42"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,e2a939b3524a1d5e764a5022728c80c7"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,68306f1a7eda214b37439f929460ca42"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e2a939b3524a1d5e764a5022728c80c7"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-11-08T09:49:42.850348211"
+    },
+    "h5ad input - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,f56c2de7d188e634774f11eaee298530"
+                ],
+                "h5ad": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.h5ad:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,f56c2de7d188e634774f11eaee298530"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.1",
+            "nextflow": "24.10.0"
+        },
+        "timestamp": "2024-11-08T09:50:17.72543765"
+    }
+}
diff --git a/modules/nf-core/celda/decontx/tests/nextflow.config b/modules/nf-core/celda/decontx/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+     withName: CELDA_DECONTX {
+        ext.batch_col = 'sample'
+    }
+}