example.panfish.config

# 
# Example panfish.config file
#
#
# This configuration file has two parts, one part consists of
# global parameters (this.cluster and cluster.list) and the second
# part consists of cluster specific parameters.  The cluster
# specific parameters are prefixed with the cluster's shadow queue
# name
#
# The format of the parameters is:
#
#   key = value
#


#
#   Global parameter that defines the shadow queue for this cluster.
#   The value set here should match the name of the shadow queue setup
#   on the local Open Grid Engine installation.  ie foo_shadow.q
#
this.cluster=foo_shadow.q


#
#   Global parameter that contains comma delimited list of shadow queues
#   that correspond to clusters is allowed to submit jobs to.  The local
#   cluster needs to be specified in this list as well.
#
cluster.list=foo_shadow.q,gordon_shadow.q,stampede_shadow.q,comet_shadow.q

#######################################################################
#
# local cluster configuration.  In this example the local cluster has a
# shadow queue named foo_shadow.q so all parameters for the cluster are
# prefixed with "foo_shadow.q."
#
#######################################################################


#
# Host of remote cluster to submit jobs on and to copy data to/from.  
# This should be of the form (user)@(host) 
# ex:  bob@gordon.sdsc.edu
foo_shadow.q.host=

#
# Batch processing system used by cluster.  SGE, PBS, and SLURM are 
# currently supported.  
# NOTE:  Only SGE is supported for the local cluster
#
foo_shadow.q.engine=SGE

#
# Any jobs on this cluster will have PANFISH_BASEDIR environment
# variable set to this path.  On local cluster its usually
# left empty, but on remote clusters it needs to be set
#
foo_shadow.basedir=

#
# Directory where database of jobs is stored
#
foo_shadow.q.database.dir=/tmp/panfish/jobs

#
# Contains job template files for the various clusters
# Each template file has the same name as the shadow queue
#
# See templates/ folder in Panfish source tree for examples
#
foo_shadow.q.job.template.dir=/tmp/panfish/templates

#
# Full path to qsub 
#
foo_shadow.q.submit=/opt/gridengine/bin/linux-x64/qsub

#
# Full path to qstat
#
foo_shadow.q.stat=/opt/gridengine/bin/linux-x64/qstat

#
# Bin directory containing panfish scripts/binaries
#
foo_shadow.q.bin.dir=/tmp/panfish/bin

#
# Maximum number of jobs allowed to run on this cluster
#
foo_shadow.q.max.num.running.jobs=1

#
# Number of seconds to sleep between submissions of jobs
#
foo_shadow.q.submit.sleep=1

#
# Scratch or temporary directory for jobs on this cluster.  This
# path is accessible via PANFISH_SCRATCH environment variable
# can execute a command if backticks are employed. 
#
# Example:
#
# `/bin/ls /scratch/$USER/[0-9]* -d`
#
#
foo_shadow.q.scratch=/tmp

#
# Sets number of "same" serials that can be batched on one node.  
# Usually set to of cores on node
#
foo_shadow.q.jobs.per.node=1

#
# Number of seconds to wait before sending out a batch job with
# an insufficient number of jobs batched together
#
foo_shadow.q.job.batcher.override.timeout=10

#
# Number of seconds the panfishline shadow job should sleep before
# querying the database to see if the real job has changed state
#
foo_shadow.q.line.sleep.time=180

#
#
# Directory to write the standard out/error stream for the shadow job.
# This output needs to go somewhere and is not relevant to the user so 
# we have it written to a special side directory.  The output is merged 
# into a single file to reduce disk IO.  Setting to /dev/null will 
# disable writing of any output which should be the default setting 
# unless low level debugging is needed.
#
foo_shadow.q.line.stdout.path=/dev/null

#
# Level of logging verbosity for panfishline
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages. 
#
foo_shadow.q.line.log.verbosity=1

#
# Number of retries panfishland command should make when attempting a 
# retreival of data.
#
foo_shadow.q.land.max.retries=10

#
# Number of seconds panfishland command should wait between transfer 
# retries.
#
foo_shadow.q.land.wait=100

#
# Sets the rsync IO timeout in seconds. (--timeout)
#
foo_shadow.q.land.rsync.timeout=180

#
# Sets the rsync connection timeout in seconds. (--contimeout)
#
foo_shadow.q.land.rsync.contimeout=100

#
# Level of verbosity for panfish
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages. 
#
foo_shadow.q.panfish.log.verbosity=1

#
# Level of logging verbosity for panfishsubmit
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages. 
#
foo_shadow.q.panfishsubmit.log.verbosity=1

#
# Number of times to retry an IO operation such as ssh or copy
#
foo_shadow.q.io.retry.count=2

#
# Seconds to wait before a retry of an IO operation
#
foo_shadow.q.io.retry.sleep=5

#
# Seconds to set for timeout of IO operation
#
foo_shadow.q.io.timeout=30

#
# Seconds to set for connection timeout of IO operation
#
foo_shadow.q.io.connect.timeout=30

#
# Account, if any, that should be set when submitting a job
# This value is used to replace the @PANFISH_ACCOUNT@ token
# that can optionally be set in the template file
#
foo_shadow.q.job.account=

#
# Walltime to set for job.  Format is: HH:MM:SS ie 12:00:00 means
# 12 hours
#
foo_shadow.q.job.walltime=12:00:00


#######################################################################
#
# Default configuration for Gordon XSEDE cluster
#
# This config will create a panfish directory under the /home/$USER
# directory on gordon which is used to store the job files that are
# submitted to the Gordon cluster.  The actual data for the job is
# set to /oasis/scratch/$USER/temp_project via the .basedir flag
# 
#
# Be sure to replace <YOUR USERNAME> on the remote host line below
# and to replace <YOUR ACCOUNT> with your account which can be obtained
# by running show_accounts and looking at the value under the "project"
# column
#
#######################################################################
gordon_shadow.q.host=<YOUR USERNAME>@gordon.sdsc.edu
gordon_shadow.q.engine=PBS
gordon_shadow.q.basedir=/oasis/scratch/$USER/temp_project
gordon_shadow.q.submit=/opt/torque/bin/qsub
gordon_shadow.q.stat=/opt/torque/bin/qstat
gordon_shadow.q.bin.dir=/home/$USER/panfish/bin
gordon_shadow.q.database.dir=/home/$USER/panfish/jobs
gordon_shadow.q.max.num.running.jobs=20
gordon_shadow.q.submit.sleep=5
gordon_shadow.q.scratch=`/bin/ls /scratch/$USER/[0-9]* -d`
gordon_shadow.q.jobs.per.node=16
gordon_shadow.q.job.batcher.override.timeout=1800
gordon_shadow.q.line.sleep.time=60
gordon_shadow.q.land.max.retries=10
gordon_shadow.q.land.wait=100
gordon_shadow.q.land.rsync.timeout=180
gordon_shadow.q.land.rsync.contimeout=100
gordon_shadow.q.panfish.log.verbosity=1
gordon_shadow.q.panfishsubmit.log.verbosity=1
gordon_shadow.q.panfish.sleep=60
gordon_shadow.q.io.retry.count=2
gordon_shadow.q.io.retry.sleep=5
gordon_shadow.q.io.timeout=30
gordon_shadow.q.io.connect.timeout=30
gordon_shadow.q.job.account=<YOUR ACCOUNT>
gordon_shadow.q.job.walltime=12:00:00


#######################################################################
#
# Default configuration for Stampede XSEDE cluster
#
# Be sure to replace values in <> below 
#
#######################################################################
stampede_shadow.q.host=<YOUR USERNAME>@stampede.tacc.xsede.org
stampede_shadow.q.engine=SLURM
stampede_shadow.q.basedir=<YOUR $WORK DIR>/panfish
stampede_shadow.q.database.dir=<YOUR $HOME DIR>/panfish/jobs
stampede_shadow.q.submit=/usr/bin/sbatch
stampede_shadow.q.stat=/usr/bin/squeue -u tg802810
stampede_shadow.q.bin.dir=<YOUR $HOME DIR>/panfish/bin
stampede_shadow.q.max.num.running.jobs=50
stampede_shadow.q.submit.sleep=1
stampede_shadow.q.scratch=/tmp
stampede_shadow.q.jobs.per.node=16
stampede_shadow.q.job.batcher.override.timeout=1800
stampede_shadow.q.panfish.log.verbosity=2
stampede_shadow.q.panfishsubmit.log.verbosity=1
stampede_shadow.q.panfish.sleep=60
stampede_shadow.q.io.retry.count=2
stampede_shadow.q.io.retry.sleep=5
stampede_shadow.q.io.timeout=30
stampede_shadow.q.io.connect.timeout=30
stampede_shadow.q.job.account=<YOUR ACCOUNT>
stampede_shadow.q.job.walltime=12:00:00


#######################################################################
#
# Default configuration for Rocce NBCR cluster
#
# Be sure to replace values in <> below
#
# The config below assumes space has been given to the account under
# /data/$USER directory
#
#######################################################################
rocce_shadow.q.host=<YOUR USERNAME>@rocce.ucsd.edu
rocce_shadow.q.engine=SGE
rocce_shadow.q.basedir=/data/<YOUR USERNAME>/panfish/shadow
rocce_shadow.q.database.dir=/data/<YOUR USERNAME>/panfish/jobs
rocce_shadow.q.submit=/opt/gridengine/bin/lx26-amd64/qsub
rocce_shadow.q.stat=/opt/gridengine/bin/lx26-amd64/qstat
rocce_shadow.q.bin.dir=/data/<YOUR USERNAME>/panfish/bin
rocce_shadow.q.max.num.running.jobs=50
rocce_shadow.q.submit.sleep=1
rocce_shadow.q.scratch=/state/partition1
rocce_shadow.q.jobs.per.node=1
rocce_shadow.q.job.batcher.override.timeout=300
rocce_shadow.q.panfish.log.verbosity=2
rocce_shadow.q.panfishsubmit.log.verbosity=1
rocce_shadow.q.panfish.sleep=60
rocce_shadow.q.io.retry.count=2
rocce_shadow.q.io.retry.sleep=5
rocce_shadow.q.io.timeout=30
rocce_shadow.q.io.connect.timeout=30
rocce_shadow.q.job.account=
rocce_shadow.q.job.walltime=12:00:00

#######################################################################
#
# Default configuration for Comet cluster
#
# Be sure to replace values in <> below
#
#######################################################################
comet_shadow.q.host=<YOUR USERNAME>@comet.sdsc.edu
comet_shadow.q.engine=SLURM
comet_shadow.q.basedir=/oasis/projects/nsf/<YOUR ACCOUNT>/churas
comet_shadow.q.database.dir=/home/<YOUR USERNAME>/comet/panfish/jobs
comet_shadow.q.submit=/usr/bin/sbatch
comet_shadow.q.stat=/usr/bin/squeue -u <YOUR USERNAME>
comet_shadow.q.bin.dir=/home/<YOUR USERNAME>/comet/panfish/bin
comet_shadow.q.max.num.running.jobs=50
comet_shadow.q.submit.sleep=1
comet_shadow.q.scratch=`/bin/ls /scratch/$USER/[0-9]* -d`
comet_shadow.q.jobs.per.node=24
comet_shadow.q.job.batcher.override.timeout=60
comet_shadow.q.panfish.log.verbosity=2
comet_shadow.q.panfishsubmit.log.verbosity=1
comet_shadow.q.panfish.sleep=60
comet_shadow.q.io.retry.count=2
comet_shadow.q.io.retry.sleep=5
comet_shadow.q.io.timeout=30
comet_shadow.q.io.connect.timeout=30
comet_shadow.q.job.account=<YOUR ACCOUNT>
comet_shadow.q.job.walltime=12:00:00