refactor: use profiles for slurm config

will allow different batch schedulers, e.g. altair gridengine (#134)
CCBR · Dec 16, 2024 · f207842 · f207842
1 parent af2a834
commit f207842
Show file tree

Hide file tree

Showing 10 changed files with 116 additions and 162 deletions.
diff --git a/charlie b/charlie
@@ -52,31 +52,34 @@ GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME)
 
 PYTHONVERSION="3"
 SNAKEMAKEVERSION="7"
-CLUSTER_SBATCH_CMD="sbatch --parsable --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}"
-PARTITION='norm'
 CONDA_ACTIVATE=''
 PATH_PREPEND=''
 MODULE_LOAD=''
 PLATFORM=$(get_platform)
+PARTITION='norm'
+EXTRA_SINGULARITY_BINDS=""
+TEMP_DIR=""
+REFS_DIR=""
+CLUSTER_PROFILE="config/unknown"
 if [ "$PLATFORM" == "biowulf" ]; then
-  EXTRA_SINGULARITY_BINDS="/lscratch"
-  CLUSTER_SBATCH_CMD="$CLUSTER_SBATCH_CMD --gres {cluster.gres}"
+  CLUSTER_PROFILE="config/slurm-biowulf"
   PARTITION="ccr,$PARTITION"
+  EXTRA_SINGULARITY_BINDS="/lscratch"
   CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311'
   MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE"
+  TEMP_DIR='/lscratch/$SLURM_JOB_ID/'
+  REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/"
 elif [ "$PLATFORM" == "fnlcr" ]; then
+  CLUSTER_PROFILE="config/slurm-fnlcr"
   EXTRA_SINGULARITY_BINDS="/scratch/local"
-  # activate conda env
   CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311"
   # make sure spooker is in the path
   PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"'
   MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE"
+  TEMP_DIR="/scratch/local/"
+  REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/"
 else
-  EXTRA_SINGULARITY_BINDS=""
-  echo """WARNING: detected platform is $PLATFORM. Please edit the following files for compatibility with your computing environment:
-        config.yaml
-        cluster.json
-        submit_script.sbatch
+  echo """WARNING: detected platform is $PLATFORM. Please edit the files in config/unknown/ & config.yaml for compatibility with your computing environment
     """
 fi
 
@@ -213,22 +216,26 @@ function init() {
 if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi
 mkdir -p $WORKDIR
 
-# copy config and samples files
+# copy config resources
+cp -r ${PIPELINE_HOME}/config $WORKDIR/
+
+# copy config template and samples files
 if [ ! -f $CONFIGFILE ];then
 sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
     -e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
     -e "s/HOST/${HOST}/g" \
     -e "s/ADDITIVES/${ADDITIVES}/g" \
     -e "s/VIRUSES/${VIRUSES}/g" \
-    ${PIPELINE_HOME}/config/$PLATFORM/config.yaml |\
-    cat - ${PIPELINE_HOME}/config/containers.yaml > $CONFIGFILE
+    -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
+    -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
+    -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
+    ${PIPELINE_HOME}/config/config.yaml \
+    > $CONFIGFILE
 fi
 if [ ! -f $WORKDIR/nclscan.config ];then
 sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config
 fi
-if [ ! -f $CLUSTERFILE ];then
-cp ${PIPELINE_HOME}/config/$PLATFORM/cluster.json $CLUSTERFILE
-fi
+
 if [ ! -f $WORKDIR/samples.tsv ];then
 cp $MANIFEST $WORKDIR/samples.tsv
 fi
@@ -247,7 +254,7 @@ echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WOR
 
 function check_essential_files() {
   if [ ! -d $WORKDIR ];then err "Folder $WORKDIR does not exist!"; fi
-  for f in config.yaml samples.tsv nclscan.config cluster.json; do
+  for f in config.yaml samples.tsv nclscan.config; do
     if [ ! -f $WORKDIR/$f ]; then err "Error: '${f}' file not found in workdir ... initialize first!";fi
   done
 }
@@ -299,8 +306,11 @@ function reconfig(){
     -e "s/HOST/${HOST}/g" \
     -e "s/ADDITIVES/${ADDITIVES}/g" \
     -e "s/VIRUSES/${VIRUSES}/g" \
-    ${PIPELINE_HOME}/config/$PLATFORM/config.yaml |\
-    cat - ${PIPELINE_HOME}/config/containers.yaml > $CONFIGFILE
+    -e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
+    -e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
+    -e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
+    ${PIPELINE_HOME}/config/config.yaml \
+    > $CONFIGFILE
   echo "$WORKDIR/config.yaml has been updated!"
 }
 
@@ -523,9 +533,7 @@ snakemake -s $SNAKEFILE \
     --printshellcmds \
     --latency-wait 300 \
     --configfile $CONFIGFILE \
-    --cluster-config $CLUSTERFILE \
-    --cluster "$CLUSTER_SBATCH_CMD" \
-    --cluster-status $CLUSTERSTATUSCMD \
+    --profile $CLUSTER_PROFILE \
     -j 500 \
     --rerun-incomplete \
     --rerun-triggers $trigger \
@@ -554,8 +562,6 @@ EOF
 
   else # dry-run and unlock
 
-    echo $CLUSTER_SBATCH_CMD
-
     snakemake $1 -s $SNAKEFILE \
     --directory $WORKDIR \
     --use-envmodules \
@@ -564,8 +570,7 @@ EOF
     --printshellcmds \
     --latency-wait 300 \
     --configfile $CONFIGFILE \
-    --cluster-config $CLUSTERFILE \
-    --cluster "$CLUSTER_SBATCH_CMD" \
+    --profile $CLUSTER_PROFILE \
     -j 500 \
     --rerun-incomplete \
     --rerun-triggers $trigger \
@@ -656,8 +661,6 @@ function main(){
 
   # required files
   CONFIGFILE="${WORKDIR}/config.yaml"
-  CLUSTERFILE="${WORKDIR}/cluster.json"
-  CLUSTERSTATUSCMD="${PIPELINE_HOME}/resources/cluster_status.sh"
 
   # change group to Ziegelbauer_lab before doing anything
   if [ "$CHANGEGRP" == "1" ]; then change_grp "$allargs"; fi

diff --git a/config/biowulf/config.yaml → config/config.yaml b/config/biowulf/config.yaml → config/config.yaml
@@ -4,7 +4,7 @@
 workdir: "WORKDIR"
 
 # temporary directory for intermediate files that are not saved
-tempdir: "/lscratch/$SLURM_JOB_ID"
+tempdir: "TEMP_DIR"
 
 # tab delimited samples file ... should have the following 3 columns
 # sampleName	path_to_R1_fastq	path_to_R2_fastq
@@ -90,7 +90,7 @@ resourcesdir: "PIPELINE_HOME/resources"
 
 # default cluster
 # cluster: "PIPELINE_HOME/resources/cluster.json"
-cluster: "WORKDIR/cluster.json"
+cluster: "WORKDIR/CLUSTER_PROFILE/cluster.json"
 
 adapters: "PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa"
 circexplorer_bsj_circRNA_min_reads: 3 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number
@@ -107,8 +107,25 @@ high_confidence_core_callers_plus_n: 1
 
 ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container
 # change this path to a directory containing fasta and GTF files for all host and virus genomes
-fastas_gtfs_dir: "/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs"
+fastas_gtfs_dir: "REFS_DIR"
 
 annotation_lookups:
   hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt"
   mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt"
+
+containers:
+  base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v7"
+  bowtie1: "docker://nciccbr/charlie_bowtie1:v0.1.1"
+  circexplorer: "docker://nciccbr/ccbr_circexplorer:v1.0"
+  circRNA_finder: "docker://nciccbr/charlie_circrna_finder:v1.0.1"
+  ciri: "docker://nciccbr/charlie_ciri2:v1.0.1"
+  clear: "docker://nciccbr/ccbr_clear:v2.0.1"
+  cutadapt: "docker://nciccbr/charlie_cutadapt_fqfilter:v1.0.1"
+  dcc: "docker://nciccbr/charlie_dcc:v0.2.1"
+  fastqc: "docker://nciccbr/ccrgb_qctools:v4.0"
+  mapsplice: "docker://cgrlab/mapsplice2:latest"
+  multiqc: "docker://nciccbr/ccbr_multiqc_1.15:v1"
+  picard: "docker://nciccbr/ccbr_picard_2.27.5:v1"
+  R: "docker://nciccbr/ccbr_r_4.3.0:v1"
+  star: "docker://nciccbr/ccbr_star_2.7.6a:latest"
+  star_ucsc_cufflinks: "docker://nciccbr/charlie_star_ucsc_cufflinks:v0.4.1"
diff --git a/config/containers.yaml b/config/containers.yaml
diff --git a/config/fnlcr/config.yaml b/config/fnlcr/config.yaml
diff --git a/config/biowulf/cluster.json → config/slurm-biowulf/cluster.json b/config/biowulf/cluster.json → config/slurm-biowulf/cluster.json
diff --git a/resources/cluster_status.sh → config/slurm-biowulf/cluster_status.sh b/resources/cluster_status.sh → config/slurm-biowulf/cluster_status.sh
@@ -16,4 +16,4 @@ then
   echo running
 else
   echo failed
-fi
+fi
diff --git a/config/slurm-biowulf/config.yaml b/config/slurm-biowulf/config.yaml
@@ -0,0 +1,23 @@
+cluster: sbatch
+  --parsable
+  --cpus-per-task {cluster.threads}
+  -p {cluster.partition}
+  -t {cluster.time}
+  --mem {cluster.mem}
+  --job-name {cluster.name}
+  --output {cluster.output}
+  --error {cluster.error}
+  --gres {cluster.gres}
+cluster-config: "cluster.json"
+cluster-status: "cluster_status.sh"
+jobs: 499
+immediate-submit: false
+verbose: true
+notemp: true
+latency-wait: 300
+printshellcmds: true
+use-singularity: true
+rerun-incomplete: true
+rerun-triggers: mtime
+retries: 2
+keep-going: true
diff --git a/config/fnlcr/cluster.json → config/slurm-fnlcr/cluster.json b/config/fnlcr/cluster.json → config/slurm-fnlcr/cluster.json
diff --git a/config/slurm-fnlcr/cluster_status.sh b/config/slurm-fnlcr/cluster_status.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Check status of Slurm job
+jobid="$1"
+if [[ "$jobid" == Submitted ]]
+then
+  echo smk-simple-slurm: Invalid job ID: "$jobid" >&2
+  echo smk-simple-slurm: Did you remember to add the flag --parsable to your sbatch call? >&2
+  exit 1
+fi
+output=`sacct -j "$jobid" --format State --noheader | head -n 1 | awk '{print $1}'`
+if [[ $output =~ ^(COMPLETED).* ]]
+then
+  echo success
+elif [[ $output =~ ^(RUNNING|PENDING|COMPLETING|CONFIGURING|SUSPENDED).* ]]
+then
+  echo running
+else
+  echo failed
+fi
diff --git a/config/slurm-fnlcr/config.yaml b/config/slurm-fnlcr/config.yaml
@@ -0,0 +1,22 @@
+cluster: sbatch
+  --parsable
+  --cpus-per-task {cluster.threads}
+  -p {cluster.partition}
+  -t {cluster.time}
+  --mem {cluster.mem}
+  --job-name {cluster.name}
+  --output {cluster.output}
+  --error {cluster.error}
+cluster-config: "cluster.json"
+cluster-status: "cluster_status.sh"
+jobs: 499
+immediate-submit: false
+verbose: true
+notemp: true
+latency-wait: 300
+printshellcmds: true
+use-singularity: true
+rerun-incomplete: true
+rerun-triggers: mtime
+retries: 2
+keep-going: true
-Original file line number
+Diff line change
@@ Expand Up / @@ -16,4 +16,4 @@ then @@
       echo running
     else
       echo failed
-    fi
+    fi