Skip to content

Commit

Permalink
Merge pull request #129 from CCBR/deseq-docker
Browse files Browse the repository at this point in the history
Use docker container for rules that use R
  • Loading branch information
kelly-sovacool authored May 29, 2024
2 parents bb5c9ec + 220d9ea commit 4e2b72c
Show file tree
Hide file tree
Showing 19 changed files with 124 additions and 201 deletions.
5 changes: 0 additions & 5 deletions .github/workflows/docker-auto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@ on:
- main
paths:
- "docker/**"
pull_request:
branches:
- main
paths:
- "docker/**"

jobs:
generate-matrix:
Expand Down
13 changes: 8 additions & 5 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
## CARLISLE development version

- Bug fixes (#127, @epehrsson)
- Removes single-sample group check for DESeq.
- Increases memory for DESeq.
- Ensures control replicate number is an integer.
- Fixes FDR cutoff misassigned to log2FC cutoff.
- Fixes `no_dedup` variable names in library normalization scripts.
- Removes single-sample group check for DESeq.
- Increases memory for DESeq.
- Ensures control replicate number is an integer.
- Fixes FDR cutoff misassigned to log2FC cutoff.
- Fixes `no_dedup` variable names in library normalization scripts.
- Containerize rules that require R (`deseq`, `go_enrichment`, and `spikein_assessment`) to fix installation issues with common R library path. (#129, @kelly-sovacool)
The `Rlib_dir` and `Rpkg_config` config options have been removed as they are no longer needed.

## CARLISLE v2.5.0
- Refactors R packages to a common source location (#118, @slsevilla)
Expand Down
11 changes: 6 additions & 5 deletions carlisle
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ tools_specific_yaml="tools_biowulf.yaml"
# these are copied into the WORKDIR
ESSENTIAL_FILES="config/config.yaml config/samples.tsv config/contrasts.tsv config/fqscreen_config.conf config/multiqc_config.yaml config/rpackages.csv"
ESSENTIAL_FOLDERS="workflow/scripts annotation"
# set extra singularity bindings
EXTRA_SINGULARITY_BINDS="-B /data/CCBR_Pipeliner/,/lscratch"

# ## setting PIPELINE_HOME
PIPELINE_HOME=$(readlink -f $(dirname "$0"))
Expand Down Expand Up @@ -144,7 +142,10 @@ function check_essential_files() {
function set_singularity_binds(){
# this functions tries find what folders to bind
# biowulf specific
echo "$PIPELINE_HOME" > ${WORKDIR}/tmp1
# set extra singularity bindings
EXTRA_SINGULARITY_BINDS="/lscratch"

echo "$PIPELINE_HOME" >> ${WORKDIR}/tmp1
echo "$WORKDIR" >> ${WORKDIR}/tmp1
grep -o '\/.*' <(cat ${WORKDIR}/config/config.yaml ${WORKDIR}/config/samples.tsv)|tr '\t' '\n'|grep -v ' \|\/\/'|sort|uniq >> ${WORKDIR}/tmp1
grep gpfs ${WORKDIR}/tmp1|awk -F'/' -v OFS='/' '{print $1,$2,$3,$4,$5}' |sort|uniq > ${WORKDIR}/tmp2
Expand All @@ -153,7 +154,8 @@ function set_singularity_binds(){
binds=$(cat ${WORKDIR}/tmp2 ${WORKDIR}/tmp3 ${WORKDIR}/tmp4|sort|uniq |tr '\n' ',')
rm -f ${WORKDIR}/tmp?
binds=$(echo $binds|awk '{print substr($1,1,length($1)-1)}')
SINGULARITY_BINDS="-B $EXTRA_SINGULARITY_BINDS,$binds"

SINGULARITY_BINDS=" -B $EXTRA_SINGULARITY_BINDS,$binds "
}

function rescript(){
Expand All @@ -168,7 +170,6 @@ function runcheck(){
check_essential_files
module load $PYTHON_VERSION
module load $SNAKEMAKE_VERSION
# SINGULARITY_BINDS="$EXTRA_SINGULARITY_BINDS -B ${PIPELINE_HOME}:${PIPELINE_HOME} -B ${WORKDIR}:${WORKDIR}"
}

function controlcheck(){
Expand Down
13 changes: 10 additions & 3 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@
# The working dir... output will be in the results subfolder of the workdir
workdir: "WORKDIR"

# scripts directory
# by default, use the scripts copied to the working directory.
# alternatively, use the scripts from the pipeline source.
scriptsdir: "WORKDIR/scripts"
#scriptsdir: "PIPELINE_HOME/workflow/scripts"

# tab delimited samples file .. see samplefile for format details
samplemanifest: "WORKDIR/config/samples.tsv"

Expand Down Expand Up @@ -150,7 +156,8 @@ spikein_reference:
adapters: "PIPELINE_HOME/resources/other/adapters.fa"

#####################################################################################
# R Packages
# CONTAINERS
#####################################################################################
Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_carlisle/"
Rpkg_config: "WORKDIR/config/rpackages.csv"
containers:
base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v6"
carlisle_r: "docker://nciccbr/carlisle_r:v1"
33 changes: 0 additions & 33 deletions config/rpackages.csv

This file was deleted.

30 changes: 30 additions & 0 deletions docker/carlisle_r/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
FROM nciccbr/ccbr_ubuntu_base_20.04:v6

# build time variables
ARG BUILD_DATE="000000"
ENV BUILD_DATE=${BUILD_DATE}
ARG BUILD_TAG="000000"
ENV BUILD_TAG=${BUILD_TAG}
ARG REPONAME="000000"
ENV REPONAME=${REPONAME}

# install conda packages
COPY packages.txt /data2/
RUN mamba install \
--no-channel-priority \
-c bioconda -c conda-forge -c r \
--file /data2/packages.txt
ENV PATH="/opt2/conda/bin:$PATH"
ENV R_LIBS_USER=/opt2/conda/lib/R/library/
# install ELBOW manually, fails with mamba
RUN wget --no-check-certificate https://bioconductor.riken.jp/packages/3.4/bioc/src/contrib/ELBOW_1.10.0.tar.gz && \
R -e 'install.packages("ELBOW_1.10.0.tar.gz", repos = NULL, type="source", INSTALL_opts = "--no-lock")'

# Save Dockerfile in the docker
COPY Dockerfile /opt2/Dockerfile_${REPONAME}.${BUILD_TAG}
RUN chmod a+r /opt2/Dockerfile_${REPONAME}.${BUILD_TAG}

# cleanup
WORKDIR /data2
RUN apt-get clean && apt-get purge \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
4 changes: 4 additions & 0 deletions docker/carlisle_r/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
dockerhub_namespace: nciccbr
image_name: carlisle_r
version: v1
container: "$(dockerhub_namespace)/$(image_name):$(version)"
28 changes: 28 additions & 0 deletions docker/carlisle_r/packages.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
bioconductor-bsgenome.hsapiens.ncbi.t2t.chm13v2.0
bioconductor-chipenrich
bioconductor-chipseeker
bioconductor-deseq2
bioconductor-edger
bioconductor-enhancedvolcano
bioconductor-genomicfeatures
bioconductor-htsfilter
bioconductor-org.Hs.eg.db
bioconductor-org.Mm.eg.db
bioconductor-rtracklayer
bioconductor-txdb.hsapiens.ucsc.hg19.knowngene
bioconductor-TxDb.Hsapiens.UCSC.hg38.knownGene
bioconductor-TxDb.Mmusculus.UCSC.mm10.knownGene
r-argparse
r-DT
r-ggfortify
r-ggvenn
r-htmltools
r-latticeextra
r-pander
r-pdp
r-plotly
r-rcolorbrewer
r-reshape2
r-tidyverse
r-xfun>=0.43
r-yaml
7 changes: 1 addition & 6 deletions workflow/rules/annotations.smk
Original file line number Diff line number Diff line change
Expand Up @@ -286,18 +286,15 @@ if config["run_contrasts"] == "Y":
rscript_wrapper=join(SCRIPTSDIR,"_go_enrichment_wrapper.R"),
rmd=join(SCRIPTSDIR,"_go_enrichment.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rscript_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
output_dir = join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment"),
species = config["genome"],
geneset_id = GENESET_ID,
dedup_status = "{dupstatus}"
envmodules:
TOOLS["R"],
output:
html=join(RESULTSDIR,"peaks","{qthresholds}","{peak_caller}","annotation","go_enrichment","{contrast_list}.{dupstatus}.go_enrichment.html"),
container: config['containers']['carlisle_r']
shell:
"""
set -exo pipefail
Expand All @@ -310,8 +307,6 @@ if config["run_contrasts"] == "Y":
Rscript {params.rscript_wrapper} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--output_dir {params.output_dir} \\
--report {output.html} \\
--peak_list "$clean_sample_list" \\
Expand Down
9 changes: 1 addition & 8 deletions workflow/rules/diff.smk
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,6 @@ rule DESeq:
params:
rmd=join(SCRIPTSDIR,"_diff_markdown.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
rscript_venn=join(SCRIPTSDIR,"_plot_results_venn.R"),
contrast_list="{contrast_list}",
Expand All @@ -169,8 +167,7 @@ rule DESeq:
spiked = NORM_METHOD,
species = config["genome"],
gtf=config["reference"][config["genome"]]["gtf"]
envmodules:
TOOLS["R"]
container: config['containers']['carlisle_r']
shell:
"""
set -exo pipefail
Expand All @@ -197,8 +194,6 @@ rule DESeq:
Rscript {params.rscript_diff} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--countsmatrix {input.cm_auc} \\
--sampleinfo {input.si} \\
--dupstatus {params.dupstatus} \\
Expand Down Expand Up @@ -231,8 +226,6 @@ rule DESeq:
Rscript {params.rscript_diff} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--countsmatrix {input.cm_frag} \\
--sampleinfo {input.si} \\
--dupstatus {params.dupstatus} \\
Expand Down
11 changes: 3 additions & 8 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -92,18 +92,15 @@ rule spikein_assessment:
"""
input:
bams = expand(rules.align.output.bamidxstats,replicate=REPLICATES,dupstatus=DUPSTATUS),
output:
html=join(RESULTSDIR,'qc',"spikein_qc_report.html"),
params:
rscript_wrapper=join(SCRIPTSDIR,"_generate_spikein_wrapper.R"),
rmd=join(SCRIPTSDIR,"_generate_spikein_plots.Rmd"),
carlisle_functions=join(SCRIPTSDIR,"_carlisle_functions.R"),
Rlib_dir=config["Rlib_dir"],
Rpkg_config=config["Rpkg_config"],
rscript_diff=join(SCRIPTSDIR,"_diff_markdown_wrapper.R"),
spikein=config["spikein_genome"],
envmodules:
TOOLS["R"],
output:
html=join(RESULTSDIR,'qc',"spikein_qc_report.html"),
container: config['containers']['carlisle_r']
shell:
"""
if [[ {params.spikein} == "ecoli" ]]; then species_name="NC_000913.3"; else species_name=""; fi
Expand All @@ -116,8 +113,6 @@ rule spikein_assessment:
Rscript {params.rscript_wrapper} \\
--rmd {params.rmd} \\
--carlisle_functions {params.carlisle_functions} \\
--Rlib_dir {params.Rlib_dir} \\
--Rpkg_config {params.Rpkg_config} \\
--report {output.html} \\
--bam_list "$clean_sample_list" \\
--spikein_control $species_name
Expand Down
49 changes: 30 additions & 19 deletions workflow/scripts/_carlisle_functions.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,36 @@
########################################################################
# LIBRARY
########################################################################
CARLISLE_HANDLE_PACKAGES<-function(pkg_df){
for (rowid in rownames(pkg_df)){
pkg=pkg_df[rowid,"package"]
source=pkg_df[rowid,"source"]
version=pkg_df[rowid,"version"]
gh_name=pkg_df[rowid,"gh_name"]

need_install <- pkg[!(pkg %in% installed.packages()[,"Package"])]
if (length(need_install)!=0){
print(paste0("Installing: ", pkg))
if (source=="bc") BiocManager::install(pkg,ask=FALSE,update=FALSE)
if (source=="cr") install.packages(pkg,version=version,repos = "http://cran.us.r-project.org",
local = FALSE,ask=FALSE,update=FALSE,dependencies=TRUE)
if (source=="gh") remotes::install_github(gh_name,version=version,local = FALSE,update=FALSE)
}

print(paste0("Loading: ",pkg))
invisible(lapply(pkg, library, character.only = TRUE))
}
library(tidyverse)
load_packages <- function(){
pkgs <- 'BSgenome.Hsapiens.NCBI.T2T.CHM13v2.0
chipenrich
ChIPseeker
DESeq2
edgeR
ELBOW
EnhancedVolcano
GenomicFeatures
ggfortify
ggrepel
htmltools
HTSFilter
latticeExtra
org.Hs.eg.db
org.Mm.eg.db
pander
pdp
plotly
RColorBrewer
reshape2
rtracklayer
tidyverse
TxDb.Hsapiens.UCSC.hg19.knownGene
TxDb.Hsapiens.UCSC.hg38.knownGene
TxDb.Mmusculus.UCSC.mm10.knownGene
yaml'
package_vctr <- pkgs %>% stringr::str_split('\n') %>% unlist()
invisible(lapply(package_vctr, library, character.only = TRUE))
}

########################################################################
Expand Down
27 changes: 1 addition & 26 deletions workflow/scripts/_diff_markdown.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@ output:
html_document:
params:
carlisle_functions: "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/workflow/scripts/_carlisle_functions.R"
Rlib_dir: "/data/CCBR_Pipeliner/db/PipeDB/Rlibrary_4.3_carlisle/"
Rpkg_config: "/data/CCBR_Pipeliner/Pipelines/CARLISLE/latest/conf/rpackages.csv"
rawcountsmatrix: "~/../../../Volumes/ccbr1155/CS030666/analysis/results/peaks/contrasts/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed_countsmatrix.txt"
coldata: "~/../../../Volumes/ccbr1155/CS030666/analysis/results/peaks/contrasts/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed/siNC_H4K20me3_vs_siSmyd3_H4K20me3__dedup__narrowGo_peaks.bed_sampleinfo.txt"
dupstatus: "dedup" # dedup or no_dedup
Expand All @@ -32,30 +30,7 @@ knitr::opts_chunk$set(echo = TRUE)
# source functions file
source(params$carlisle_functions)
# set library dir, load this and remove any other dirs to avoid confusion
# between personally created pkgs and the pipeline package
## saving old path "/Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/library"
Rlib_dir=params$Rlib_dir
Rpkg_config=params$Rpkg_config
print(paste0("Using the lib.loc location: ",Rlib_dir))
assign(".lib.loc", Rlib_dir, envir = environment(.libPaths))
.libPaths()
# read in package info
pkg_df=read.csv(Rpkg_config)
pkg_df=subset(pkg_df,diff=="Y")
pkg_df
# for each package check installation, if present then load library
CARLISLE_HANDLE_PACKAGES(pkg_df)
##handle ELBOW separately - it requires an older version of bioconductor / R
if("ELBOW" %in% new.packages){
install.packages(paste0(Rlib_dir,"ELBOW_1.10.0.tar.gz"),
repos = NULL, type="source", INSTALL_opts = '--no-lock')
new.packages=new.packages[names(new.packages) != "ELBOW"]
}
load_packages()
```

## Loading SampleInfo and Counts
Expand Down
Loading

0 comments on commit 4e2b72c

Please sign in to comment.