-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample.panfish.config
357 lines (312 loc) · 10.4 KB
/
example.panfish.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
#
# Example panfish.config file
#
#
# This configuration file has two parts, one part consists of
# global parameters (this.cluster and cluster.list) and the second
# part consists of cluster specific parameters. The cluster
# specific parameters are prefixed with the cluster's shadow queue
# name
#
# The format of the parameters is:
#
# key = value
#
#
# Global parameter that defines the shadow queue for this cluster.
# The value set here should match the name of the shadow queue setup
# on the local Open Grid Engine installation. ie foo_shadow.q
#
this.cluster=foo_shadow.q
#
# Global parameter that contains comma delimited list of shadow queues
# that correspond to clusters is allowed to submit jobs to. The local
# cluster needs to be specified in this list as well.
#
cluster.list=foo_shadow.q,gordon_shadow.q,stampede_shadow.q,comet_shadow.q
#######################################################################
#
# local cluster configuration. In this example the local cluster has a
# shadow queue named foo_shadow.q so all parameters for the cluster are
# prefixed with "foo_shadow.q."
#
#######################################################################
#
# Host of remote cluster to submit jobs on and to copy data to/from.
# This should be of the form (user)@(host)
# ex: bob@gordon.sdsc.edu
foo_shadow.q.host=
#
# Batch processing system used by cluster. SGE, PBS, and SLURM are
# currently supported.
# NOTE: Only SGE is supported for the local cluster
#
foo_shadow.q.engine=SGE
#
# Any jobs on this cluster will have PANFISH_BASEDIR environment
# variable set to this path. On local cluster its usually
# left empty, but on remote clusters it needs to be set
#
foo_shadow.basedir=
#
# Directory where database of jobs is stored
#
foo_shadow.q.database.dir=/tmp/panfish/jobs
#
# Contains job template files for the various clusters
# Each template file has the same name as the shadow queue
#
# See templates/ folder in Panfish source tree for examples
#
foo_shadow.q.job.template.dir=/tmp/panfish/templates
#
# Full path to qsub
#
foo_shadow.q.submit=/opt/gridengine/bin/linux-x64/qsub
#
# Full path to qstat
#
foo_shadow.q.stat=/opt/gridengine/bin/linux-x64/qstat
#
# Bin directory containing panfish scripts/binaries
#
foo_shadow.q.bin.dir=/tmp/panfish/bin
#
# Maximum number of jobs allowed to run on this cluster
#
foo_shadow.q.max.num.running.jobs=1
#
# Number of seconds to sleep between submissions of jobs
#
foo_shadow.q.submit.sleep=1
#
# Scratch or temporary directory for jobs on this cluster. This
# path is accessible via PANFISH_SCRATCH environment variable
# can execute a command if backticks are employed.
#
# Example:
#
# `/bin/ls /scratch/$USER/[0-9]* -d`
#
#
foo_shadow.q.scratch=/tmp
#
# Sets number of "same" serials that can be batched on one node.
# Usually set to of cores on node
#
foo_shadow.q.jobs.per.node=1
#
# Number of seconds to wait before sending out a batch job with
# an insufficient number of jobs batched together
#
foo_shadow.q.job.batcher.override.timeout=10
#
# Number of seconds the panfishline shadow job should sleep before
# querying the database to see if the real job has changed state
#
foo_shadow.q.line.sleep.time=180
#
#
# Directory to write the standard out/error stream for the shadow job.
# This output needs to go somewhere and is not relevant to the user so
# we have it written to a special side directory. The output is merged
# into a single file to reduce disk IO. Setting to /dev/null will
# disable writing of any output which should be the default setting
# unless low level debugging is needed.
#
foo_shadow.q.line.stdout.path=/dev/null
#
# Level of logging verbosity for panfishline
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages.
#
foo_shadow.q.line.log.verbosity=1
#
# Number of retries panfishland command should make when attempting a
# retreival of data.
#
foo_shadow.q.land.max.retries=10
#
# Number of seconds panfishland command should wait between transfer
# retries.
#
foo_shadow.q.land.wait=100
#
# Sets the rsync IO timeout in seconds. (--timeout)
#
foo_shadow.q.land.rsync.timeout=180
#
# Sets the rsync connection timeout in seconds. (--contimeout)
#
foo_shadow.q.land.rsync.contimeout=100
#
# Level of verbosity for panfish
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages.
#
foo_shadow.q.panfish.log.verbosity=1
#
# Level of logging verbosity for panfishsubmit
# 0 = outputs only error,warning, and fatal messages.
# 1 = adds info messages.
# 2 = adds debug messages.
#
foo_shadow.q.panfishsubmit.log.verbosity=1
#
# Number of times to retry an IO operation such as ssh or copy
#
foo_shadow.q.io.retry.count=2
#
# Seconds to wait before a retry of an IO operation
#
foo_shadow.q.io.retry.sleep=5
#
# Seconds to set for timeout of IO operation
#
foo_shadow.q.io.timeout=30
#
# Seconds to set for connection timeout of IO operation
#
foo_shadow.q.io.connect.timeout=30
#
# Account, if any, that should be set when submitting a job
# This value is used to replace the @PANFISH_ACCOUNT@ token
# that can optionally be set in the template file
#
foo_shadow.q.job.account=
#
# Walltime to set for job. Format is: HH:MM:SS ie 12:00:00 means
# 12 hours
#
foo_shadow.q.job.walltime=12:00:00
#######################################################################
#
# Default configuration for Gordon XSEDE cluster
#
# This config will create a panfish directory under the /home/$USER
# directory on gordon which is used to store the job files that are
# submitted to the Gordon cluster. The actual data for the job is
# set to /oasis/scratch/$USER/temp_project via the .basedir flag
#
#
# Be sure to replace <YOUR USERNAME> on the remote host line below
# and to replace <YOUR ACCOUNT> with your account which can be obtained
# by running show_accounts and looking at the value under the "project"
# column
#
#######################################################################
gordon_shadow.q.host=<YOUR USERNAME>@gordon.sdsc.edu
gordon_shadow.q.engine=PBS
gordon_shadow.q.basedir=/oasis/scratch/$USER/temp_project
gordon_shadow.q.submit=/opt/torque/bin/qsub
gordon_shadow.q.stat=/opt/torque/bin/qstat
gordon_shadow.q.bin.dir=/home/$USER/panfish/bin
gordon_shadow.q.database.dir=/home/$USER/panfish/jobs
gordon_shadow.q.max.num.running.jobs=20
gordon_shadow.q.submit.sleep=5
gordon_shadow.q.scratch=`/bin/ls /scratch/$USER/[0-9]* -d`
gordon_shadow.q.jobs.per.node=16
gordon_shadow.q.job.batcher.override.timeout=1800
gordon_shadow.q.line.sleep.time=60
gordon_shadow.q.land.max.retries=10
gordon_shadow.q.land.wait=100
gordon_shadow.q.land.rsync.timeout=180
gordon_shadow.q.land.rsync.contimeout=100
gordon_shadow.q.panfish.log.verbosity=1
gordon_shadow.q.panfishsubmit.log.verbosity=1
gordon_shadow.q.panfish.sleep=60
gordon_shadow.q.io.retry.count=2
gordon_shadow.q.io.retry.sleep=5
gordon_shadow.q.io.timeout=30
gordon_shadow.q.io.connect.timeout=30
gordon_shadow.q.job.account=<YOUR ACCOUNT>
gordon_shadow.q.job.walltime=12:00:00
#######################################################################
#
# Default configuration for Stampede XSEDE cluster
#
# Be sure to replace values in <> below
#
#######################################################################
stampede_shadow.q.host=<YOUR USERNAME>@stampede.tacc.xsede.org
stampede_shadow.q.engine=SLURM
stampede_shadow.q.basedir=<YOUR $WORK DIR>/panfish
stampede_shadow.q.database.dir=<YOUR $HOME DIR>/panfish/jobs
stampede_shadow.q.submit=/usr/bin/sbatch
stampede_shadow.q.stat=/usr/bin/squeue -u tg802810
stampede_shadow.q.bin.dir=<YOUR $HOME DIR>/panfish/bin
stampede_shadow.q.max.num.running.jobs=50
stampede_shadow.q.submit.sleep=1
stampede_shadow.q.scratch=/tmp
stampede_shadow.q.jobs.per.node=16
stampede_shadow.q.job.batcher.override.timeout=1800
stampede_shadow.q.panfish.log.verbosity=2
stampede_shadow.q.panfishsubmit.log.verbosity=1
stampede_shadow.q.panfish.sleep=60
stampede_shadow.q.io.retry.count=2
stampede_shadow.q.io.retry.sleep=5
stampede_shadow.q.io.timeout=30
stampede_shadow.q.io.connect.timeout=30
stampede_shadow.q.job.account=<YOUR ACCOUNT>
stampede_shadow.q.job.walltime=12:00:00
#######################################################################
#
# Default configuration for Rocce NBCR cluster
#
# Be sure to replace values in <> below
#
# The config below assumes space has been given to the account under
# /data/$USER directory
#
#######################################################################
rocce_shadow.q.host=<YOUR USERNAME>@rocce.ucsd.edu
rocce_shadow.q.engine=SGE
rocce_shadow.q.basedir=/data/<YOUR USERNAME>/panfish/shadow
rocce_shadow.q.database.dir=/data/<YOUR USERNAME>/panfish/jobs
rocce_shadow.q.submit=/opt/gridengine/bin/lx26-amd64/qsub
rocce_shadow.q.stat=/opt/gridengine/bin/lx26-amd64/qstat
rocce_shadow.q.bin.dir=/data/<YOUR USERNAME>/panfish/bin
rocce_shadow.q.max.num.running.jobs=50
rocce_shadow.q.submit.sleep=1
rocce_shadow.q.scratch=/state/partition1
rocce_shadow.q.jobs.per.node=1
rocce_shadow.q.job.batcher.override.timeout=300
rocce_shadow.q.panfish.log.verbosity=2
rocce_shadow.q.panfishsubmit.log.verbosity=1
rocce_shadow.q.panfish.sleep=60
rocce_shadow.q.io.retry.count=2
rocce_shadow.q.io.retry.sleep=5
rocce_shadow.q.io.timeout=30
rocce_shadow.q.io.connect.timeout=30
rocce_shadow.q.job.account=
rocce_shadow.q.job.walltime=12:00:00
#######################################################################
#
# Default configuration for Comet cluster
#
# Be sure to replace values in <> below
#
#######################################################################
comet_shadow.q.host=<YOUR USERNAME>@comet.sdsc.edu
comet_shadow.q.engine=SLURM
comet_shadow.q.basedir=/oasis/projects/nsf/<YOUR ACCOUNT>/churas
comet_shadow.q.database.dir=/home/<YOUR USERNAME>/comet/panfish/jobs
comet_shadow.q.submit=/usr/bin/sbatch
comet_shadow.q.stat=/usr/bin/squeue -u <YOUR USERNAME>
comet_shadow.q.bin.dir=/home/<YOUR USERNAME>/comet/panfish/bin
comet_shadow.q.max.num.running.jobs=50
comet_shadow.q.submit.sleep=1
comet_shadow.q.scratch=`/bin/ls /scratch/$USER/[0-9]* -d`
comet_shadow.q.jobs.per.node=24
comet_shadow.q.job.batcher.override.timeout=60
comet_shadow.q.panfish.log.verbosity=2
comet_shadow.q.panfishsubmit.log.verbosity=1
comet_shadow.q.panfish.sleep=60
comet_shadow.q.io.retry.count=2
comet_shadow.q.io.retry.sleep=5
comet_shadow.q.io.timeout=30
comet_shadow.q.io.connect.timeout=30
comet_shadow.q.job.account=<YOUR ACCOUNT>
comet_shadow.q.job.walltime=12:00:00