-
Notifications
You must be signed in to change notification settings - Fork 1
/
saas_submitit.py
50 lines (42 loc) · 1.62 KB
/
saas_submitit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# %% imports
# NOTE: `pip install pyro-ppl` to use FULLYBAYESIAN (SAASBO)
from submitit import AutoExecutor
import cloudpickle as pickle
from utils.matbench import matbench_fold, collect_results, task, savepath, dummy
print(f"dummy: {dummy}")
# %% submission
log_folder = "log_ax/%j"
walltime = 4320 # 4320 min == 3 days
# partition, account = ["notchpeak-gpu-guest", "owner-gpu-guest"]
partition, account = ["notchpeak-gpu", "notchpeak-gpu"]
# partition, account = ["notchpeak-guest", "owner-guest"]
executor = AutoExecutor(folder=log_folder)
executor.update_parameters(
timeout_min=walltime,
slurm_partition=partition,
slurm_gpus_per_task=1,
slurm_mem_per_gpu=6000,
slurm_cpus_per_gpu=4,
slurm_additional_parameters={"account": account},
)
jobs = executor.map_array(matbench_fold, task.folds) # sbatch array
job_ids = [job.job_id for job in jobs]
# https://www.hpc2n.umu.se/documentation/batchsystem/job-dependencies
job_ids_str = ":".join(job_ids) # e.g. "3937257_0:3937257_1:..."
with open("jobs.pkl", "wb") as f:
pickle.dump(jobs, f)
collect_folder = "log_matbench/%j"
walltime = 10
collector = AutoExecutor(folder=collect_folder)
collector.update_parameters(
timeout_min=walltime,
slurm_partition=partition,
slurm_additional_parameters={
"account": account,
"dependency": f"afterok:{job_ids_str}",
},
)
collector_job = collector.submit(collect_results) # sbatch array
print(
f"Waiting for submission jobs ({job_ids_str}) to complete before running collector job ({collector_job.job_id}). Use the matbench output file that will be saved to {savepath} after all jobs have run."
)