Skip to content

Commit

Permalink
refactor: use profiles for slurm config
Browse files Browse the repository at this point in the history
will allow different batch schedulers, e.g. altair gridengine (#134)
  • Loading branch information
kelly-sovacool committed Dec 16, 2024
1 parent af2a834 commit f207842
Show file tree
Hide file tree
Showing 10 changed files with 116 additions and 162 deletions.
59 changes: 31 additions & 28 deletions charlie
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,34 @@ GIT_COMMIT_TAG=$(get_git_commitid_tag $PIPELINE_HOME)

PYTHONVERSION="3"
SNAKEMAKEVERSION="7"
CLUSTER_SBATCH_CMD="sbatch --parsable --cpus-per-task {cluster.threads} -p {cluster.partition} -t {cluster.time} --mem {cluster.mem} --job-name {cluster.name} --output {cluster.output} --error {cluster.error}"
PARTITION='norm'
CONDA_ACTIVATE=''
PATH_PREPEND=''
MODULE_LOAD=''
PLATFORM=$(get_platform)
PARTITION='norm'
EXTRA_SINGULARITY_BINDS=""
TEMP_DIR=""
REFS_DIR=""
CLUSTER_PROFILE="config/unknown"
if [ "$PLATFORM" == "biowulf" ]; then
EXTRA_SINGULARITY_BINDS="/lscratch"
CLUSTER_SBATCH_CMD="$CLUSTER_SBATCH_CMD --gres {cluster.gres}"
CLUSTER_PROFILE="config/slurm-biowulf"
PARTITION="ccr,$PARTITION"
EXTRA_SINGULARITY_BINDS="/lscratch"
CONDA_ACTIVATE='. "/data/CCBR_Pipeliner/db/PipeDB/Conda/etc/profile.d/conda.sh" && conda activate py311'
MODULE_LOAD="module load python/$PYTHONVERSION snakemake/$SNAKEMAKEVERSION singularity; $CONDA_ACTIVATE"
TEMP_DIR='/lscratch/$SLURM_JOB_ID/'
REFS_DIR="/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs/"
elif [ "$PLATFORM" == "fnlcr" ]; then
CLUSTER_PROFILE="config/slurm-fnlcr"
EXTRA_SINGULARITY_BINDS="/scratch/local"
# activate conda env
CONDA_ACTIVATE=". '/mnt/projects/CCBR-Pipelines/resources/miniconda3/etc/profile.d/conda.sh' && conda activate py311"
# make sure spooker is in the path
PATH_PREPEND='export PATH="/mnt/projects/CCBR-Pipelines/bin:$PATH"'
MODULE_LOAD="module load singularity; $PATH_PREPEND; $CONDA_ACTIVATE"
TEMP_DIR="/scratch/local/"
REFS_DIR="/mnt/projects/CCBR-Pipelines/db/charlie/fastas_gtfs/"
else
EXTRA_SINGULARITY_BINDS=""
echo """WARNING: detected platform is $PLATFORM. Please edit the following files for compatibility with your computing environment:
config.yaml
cluster.json
submit_script.sbatch
echo """WARNING: detected platform is $PLATFORM. Please edit the files in config/unknown/ & config.yaml for compatibility with your computing environment
"""
fi

Expand Down Expand Up @@ -213,22 +216,26 @@ function init() {
if [ -d $WORKDIR ];then err "Folder $WORKDIR already exists!"; fi
mkdir -p $WORKDIR

# copy config and samples files
# copy config resources
cp -r ${PIPELINE_HOME}/config $WORKDIR/

# copy config template and samples files
if [ ! -f $CONFIGFILE ];then
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" \
-e "s/WORKDIR/${WORKDIR//\//\\/}/g" \
-e "s/HOST/${HOST}/g" \
-e "s/ADDITIVES/${ADDITIVES}/g" \
-e "s/VIRUSES/${VIRUSES}/g" \
${PIPELINE_HOME}/config/$PLATFORM/config.yaml |\
cat - ${PIPELINE_HOME}/config/containers.yaml > $CONFIGFILE
-e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
-e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
-e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
${PIPELINE_HOME}/config/config.yaml \
> $CONFIGFILE
fi
if [ ! -f $WORKDIR/nclscan.config ];then
sed -e "s/PIPELINE_HOME/${PIPELINE_HOME//\//\\/}/g" -e "s/WORKDIR/${WORKDIR//\//\\/}/g" ${PIPELINE_HOME}/resources/NCLscan.config.template > $WORKDIR/nclscan.config
fi
if [ ! -f $CLUSTERFILE ];then
cp ${PIPELINE_HOME}/config/$PLATFORM/cluster.json $CLUSTERFILE
fi

if [ ! -f $WORKDIR/samples.tsv ];then
cp $MANIFEST $WORKDIR/samples.tsv
fi
Expand All @@ -247,7 +254,7 @@ echo "Done Initializing $WORKDIR. You can now edit $WORKDIR/config.yaml and $WOR

function check_essential_files() {
if [ ! -d $WORKDIR ];then err "Folder $WORKDIR does not exist!"; fi
for f in config.yaml samples.tsv nclscan.config cluster.json; do
for f in config.yaml samples.tsv nclscan.config; do
if [ ! -f $WORKDIR/$f ]; then err "Error: '${f}' file not found in workdir ... initialize first!";fi
done
}
Expand Down Expand Up @@ -299,8 +306,11 @@ function reconfig(){
-e "s/HOST/${HOST}/g" \
-e "s/ADDITIVES/${ADDITIVES}/g" \
-e "s/VIRUSES/${VIRUSES}/g" \
${PIPELINE_HOME}/config/$PLATFORM/config.yaml |\
cat - ${PIPELINE_HOME}/config/containers.yaml > $CONFIGFILE
-e "s/TEMP_DIR/${TEMP_DIR//\//\\/}/g" \
-e "s/REFS_DIR/${REFS_DIR//\//\\/}/g" \
-e "s|CLUSTER_PROFILE|${CLUSTER_PROFILE}|g" \
${PIPELINE_HOME}/config/config.yaml \
> $CONFIGFILE
echo "$WORKDIR/config.yaml has been updated!"
}

Expand Down Expand Up @@ -523,9 +533,7 @@ snakemake -s $SNAKEFILE \
--printshellcmds \
--latency-wait 300 \
--configfile $CONFIGFILE \
--cluster-config $CLUSTERFILE \
--cluster "$CLUSTER_SBATCH_CMD" \
--cluster-status $CLUSTERSTATUSCMD \
--profile $CLUSTER_PROFILE \
-j 500 \
--rerun-incomplete \
--rerun-triggers $trigger \
Expand Down Expand Up @@ -554,8 +562,6 @@ EOF

else # dry-run and unlock

echo $CLUSTER_SBATCH_CMD

snakemake $1 -s $SNAKEFILE \
--directory $WORKDIR \
--use-envmodules \
Expand All @@ -564,8 +570,7 @@ EOF
--printshellcmds \
--latency-wait 300 \
--configfile $CONFIGFILE \
--cluster-config $CLUSTERFILE \
--cluster "$CLUSTER_SBATCH_CMD" \
--profile $CLUSTER_PROFILE \
-j 500 \
--rerun-incomplete \
--rerun-triggers $trigger \
Expand Down Expand Up @@ -656,8 +661,6 @@ function main(){

# required files
CONFIGFILE="${WORKDIR}/config.yaml"
CLUSTERFILE="${WORKDIR}/cluster.json"
CLUSTERSTATUSCMD="${PIPELINE_HOME}/resources/cluster_status.sh"

# change group to Ziegelbauer_lab before doing anything
if [ "$CHANGEGRP" == "1" ]; then change_grp "$allargs"; fi
Expand Down
23 changes: 20 additions & 3 deletions config/biowulf/config.yaml → config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
workdir: "WORKDIR"

# temporary directory for intermediate files that are not saved
tempdir: "/lscratch/$SLURM_JOB_ID"
tempdir: "TEMP_DIR"

# tab delimited samples file ... should have the following 3 columns
# sampleName path_to_R1_fastq path_to_R2_fastq
Expand Down Expand Up @@ -90,7 +90,7 @@ resourcesdir: "PIPELINE_HOME/resources"

# default cluster
# cluster: "PIPELINE_HOME/resources/cluster.json"
cluster: "WORKDIR/cluster.json"
cluster: "WORKDIR/CLUSTER_PROFILE/cluster.json"

adapters: "PIPELINE_HOME/resources/TruSeq_and_nextera_adapters.consolidated.fa"
circexplorer_bsj_circRNA_min_reads: 3 # in addition to "known" and "low-conf" circRNAs identified by circexplorer, we also include those found in back_spliced.bed file but not classified as known/low-conf only if the number of reads supporting the BSJ call is greater than this number
Expand All @@ -107,8 +107,25 @@ high_confidence_core_callers_plus_n: 1

ciri_perl_script: "/opt2/CIRI_v2.0.6/CIRI2.pl" # path in docker container
# change this path to a directory containing fasta and GTF files for all host and virus genomes
fastas_gtfs_dir: "/gpfs/gsfs10/users/CCBR_Pipeliner/db/PipeDB/charlie/fastas_gtfs"
fastas_gtfs_dir: "REFS_DIR"

annotation_lookups:
hg38: "PIPELINE_HOME/resources/hg38_2_hg19_lookup.txt"
mm39: "PIPELINE_HOME/resources/mm39_circBase_annotation_lookup.txt"

containers:
base: "docker://nciccbr/ccbr_ubuntu_base_20.04:v7"
bowtie1: "docker://nciccbr/charlie_bowtie1:v0.1.1"
circexplorer: "docker://nciccbr/ccbr_circexplorer:v1.0"
circRNA_finder: "docker://nciccbr/charlie_circrna_finder:v1.0.1"
ciri: "docker://nciccbr/charlie_ciri2:v1.0.1"
clear: "docker://nciccbr/ccbr_clear:v2.0.1"
cutadapt: "docker://nciccbr/charlie_cutadapt_fqfilter:v1.0.1"
dcc: "docker://nciccbr/charlie_dcc:v0.2.1"
fastqc: "docker://nciccbr/ccrgb_qctools:v4.0"
mapsplice: "docker://cgrlab/mapsplice2:latest"
multiqc: "docker://nciccbr/ccbr_multiqc_1.15:v1"
picard: "docker://nciccbr/ccbr_picard_2.27.5:v1"
R: "docker://nciccbr/ccbr_r_4.3.0:v1"
star: "docker://nciccbr/ccbr_star_2.7.6a:latest"
star_ucsc_cufflinks: "docker://nciccbr/charlie_star_ucsc_cufflinks:v0.4.1"
16 changes: 0 additions & 16 deletions config/containers.yaml

This file was deleted.

114 changes: 0 additions & 114 deletions config/fnlcr/config.yaml

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ then
echo running
else
echo failed
fi
fi
23 changes: 23 additions & 0 deletions config/slurm-biowulf/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
cluster: sbatch
--parsable
--cpus-per-task {cluster.threads}
-p {cluster.partition}
-t {cluster.time}
--mem {cluster.mem}
--job-name {cluster.name}
--output {cluster.output}
--error {cluster.error}
--gres {cluster.gres}
cluster-config: "cluster.json"
cluster-status: "cluster_status.sh"
jobs: 499
immediate-submit: false
verbose: true
notemp: true
latency-wait: 300
printshellcmds: true
use-singularity: true
rerun-incomplete: true
rerun-triggers: mtime
retries: 2
keep-going: true
File renamed without changes.
19 changes: 19 additions & 0 deletions config/slurm-fnlcr/cluster_status.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/usr/bin/env bash
# Check status of Slurm job
jobid="$1"
if [[ "$jobid" == Submitted ]]
then
echo smk-simple-slurm: Invalid job ID: "$jobid" >&2
echo smk-simple-slurm: Did you remember to add the flag --parsable to your sbatch call? >&2
exit 1
fi
output=`sacct -j "$jobid" --format State --noheader | head -n 1 | awk '{print $1}'`
if [[ $output =~ ^(COMPLETED).* ]]
then
echo success
elif [[ $output =~ ^(RUNNING|PENDING|COMPLETING|CONFIGURING|SUSPENDED).* ]]
then
echo running
else
echo failed
fi
22 changes: 22 additions & 0 deletions config/slurm-fnlcr/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
cluster: sbatch
--parsable
--cpus-per-task {cluster.threads}
-p {cluster.partition}
-t {cluster.time}
--mem {cluster.mem}
--job-name {cluster.name}
--output {cluster.output}
--error {cluster.error}
cluster-config: "cluster.json"
cluster-status: "cluster_status.sh"
jobs: 499
immediate-submit: false
verbose: true
notemp: true
latency-wait: 300
printshellcmds: true
use-singularity: true
rerun-incomplete: true
rerun-triggers: mtime
retries: 2
keep-going: true

0 comments on commit f207842

Please sign in to comment.