diff --git a/SETUP_FOR_INSTRUCTORS.md b/SETUP_FOR_INSTRUCTORS.md index 73490f9f..d523a368 100644 --- a/SETUP_FOR_INSTRUCTORS.md +++ b/SETUP_FOR_INSTRUCTORS.md @@ -10,7 +10,7 @@ Get files. ``` wget https://raw.githubusercontent.com/nesi/hpc-intro/gh-pages-nesi/_includes/example_scripts/example-job.sh -wget https://raw.githubusercontent.com/nesi/hpc-intro/gh-pages-nesi/_includes/example_scripts/array_sum2.r -O {{ site.example.script }} +wget https://raw.githubusercontent.com/nesi/hpc-intro/gh-pages-nesi/_includes/example_scripts/matrix_sum.r -O {{ site.example.script }} wget https://raw.githubusercontent.com/nesi/hpc-intro/gh-pages-nesi/_includes/example_scripts/whothis.sh wget wget diff --git a/_config.yml b/_config.yml index beb2fb87..77d549d5 100644 --- a/_config.yml +++ b/_config.yml @@ -67,7 +67,7 @@ sched: example: lang: "R" shell: "Rscript " - script: "array_sum.r" + script: "sum_matrix.r" module: "R/4.3.1-gimkl-2022a" # For 'python' @@ -89,6 +89,7 @@ episode_order: - 09-scaling + #------------------------------------------------------------ # Values for this lesson #------------------------------------------------------------ diff --git a/_episodes/064-parallel.md b/_episodes/064-parallel.md index a1cc3ebf..0396a25b 100644 --- a/_episodes/064-parallel.md +++ b/_episodes/064-parallel.md @@ -53,7 +53,7 @@ This means that all CPUs must be on the same node, most Mahuika nodes have 72 CP Shared memory parallelism is what is used in our example script `{{ site.example.script }}`. Number of threads to use is specified by the Slurm option `--cpus-per-task`. - + ### Distributed-Memory (MPI) @@ -114,7 +114,7 @@ Number of tasks to use is specified by the Slurm option `--ntasks`, because the Tasks cannot share cores, this means in most circumstances leaving `--cpus-per-task` unspecified will get you `2`. -> ## Distributed Memory Example + Using a combination of Shared and Distributed memory is called _Hybrid Parallel_. - + ### GPGPU's @@ -209,7 +209,7 @@ GPUs can be requested using `--gpus-per-node=:` Depending on the GPU type, we *may* also need to specify a partition using `--partition`. -> ## GPU Job Example + ### Job Array @@ -282,7 +282,7 @@ A job array can be specified using `--array` If you are writing your own code, then this is something you will probably have to specify yourself. -> ## Job Array Example + + +## Summary + +| Name | Other Names | Slurm Option | Pros/cons | +| - | - | - | - | +| Shared Memory Parallelism | Multithreading, Multiproccessing | `--cpus-per-task` | | +| Distrubuted Memory Parallelism | MPI, OpenMPI | `--ntasks` and add `srun` before command | | +| Hybrid | | `--ntasks` and `--cpus-per-task` and add `srun` before command | | +| Job Array | | `--array` | | +| General Purpose GPU | | `--gpus-per-node` | | + +> ## Running a Parallel Job. +> +> Pick one of the method of Paralellism mentioned above, and modify your `example.sl` script to use this method. +> +> +> +> > ## Solution +> > +> > What does the printout say at the start of your job about number and location of node. +> > {: .output} +> {: .solution} +{: .challenge} ## How to Utilise Multiple CPUs diff --git a/_episodes/095-writing-good-code.md b/_episodes/095-writing-good-code.md index e7e8d1ae..089b1635 100644 --- a/_episodes/095-writing-good-code.md +++ b/_episodes/095-writing-good-code.md @@ -210,7 +210,7 @@ set.seed(seed) Now your script should look something like this; ``` -{% include example_scripts/array_sum2.r %} +{% include example_scripts/sum_matrix.r %} ``` {: .language-r} diff --git a/_includes/example_scripts/array_sum.r b/_includes/example_scripts/array_sum.r deleted file mode 100644 index 2c28a6ee..00000000 --- a/_includes/example_scripts/array_sum.r +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env {{ site.example.shell }} - -library(doParallel) - -num_cpus <- 2 # Use this value to request num cpus. -size_array <- 20000 - -registerDoParallel(num_cpus) - -sprintf("Using %i cpus to sum [ %e x %e ] matrix.",num_cpus,size_array,size_array) - -results <- foreach(z=0:size_array) %dopar% { - percent_complete= z*100/size_array - if (percent_complete%%1==0){ - cat(sprintf(" %i%% done...\r", percent_complete)) - } - sum(rnorm(size_array)) -} -sprintf("Sums to %f", Reduce("+",results)) diff --git a/_includes/example_scripts/array_sum2.r b/_includes/example_scripts/array_sum2.r deleted file mode 100644 index 932408c5..00000000 --- a/_includes/example_scripts/array_sum2.r +++ /dev/null @@ -1,28 +0,0 @@ -#!/usr/bin/env {{ site.example.shell }} - -library(doParallel) - -num_cpus <- strtoi(Sys.getenv('SLURM_CPUS_PER_TASK', unset = "1")) -size_x <-60000 # This on makes memorier -size_y <-20000 # This one to make longer - -# Time = (size_x/n) * size_y + c -# Mem = (size_x * n) * c1 + size_y * c2 - -print_progress <- interactive() # Whether to print progress or not. - -seed <- strtoi(Sys.getenv('SLURM_ARRAY_TASK_ID', unset = "0")) -set.seed(seed) - -registerDoParallel(num_cpus) - -sprintf("Using %i cpus to sum [ %e x %e ] matrix.",num_cpus,size_x,size_y) - -results <- foreach(z=0:size_x) %dopar% { - p_complete= z*100/size_x - if (print_progress && percent_complete%%1==0){ - cat(sprintf(" %i%% done...\r", percent_complete)) - } - sum(rnorm(size_y)) -} -sprintf("Seed '%s' sums to %f", seed, Reduce("+",results)) diff --git a/_includes/example_scripts/example-job.sh b/_includes/example_scripts/example-job.sh deleted file mode 100644 index c93597b2..00000000 --- a/_includes/example_scripts/example-job.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -e - -module purge -module load {{ site.example.module }} -{{ site.example.shell }} {{ site.example.script }} -echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example-job.sl.1 b/_includes/example_scripts/example-job.sl.1 deleted file mode 100644 index b4b25c3b..00000000 --- a/_includes/example_scripts/example-job.sl.1 +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -e - -#SBATCH --job-name my_job -#SBATCH --account nesi99991 -#SBATCH --mem 300M -#SBATCH --time 00:15:00 - -module purge -module load {{ site.example.module }} -{{ site.example.shell }} {{ site.example.script }} -echo "Done!" diff --git a/_includes/example_scripts/example_dmp.sl b/_includes/example_scripts/example_dmp.sl new file mode 100644 index 00000000..5ff50d9f --- /dev/null +++ b/_includes/example_scripts/example_dmp.sl @@ -0,0 +1,11 @@ +#!/bin/bash -e + +#SBATCH --job-name dmp_job +#SBATCH --output %x.out +#SBATCH --mem-per-cpu 500 +#SBATCH --ntasks 4 + +module purge +module load R/4.3.1-gimkl-2022a +srun Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example_hybrid.sl b/_includes/example_scripts/example_hybrid.sl index 86d6a879..65d996fb 100644 --- a/_includes/example_scripts/example_hybrid.sl +++ b/_includes/example_scripts/example_hybrid.sl @@ -7,4 +7,7 @@ #SBATCH --ntasks 2 #SBATCH --cpus-per-task 4 -srun echo "I am task #${SLURM_PROCID} running on node '$(hostname)' with $(nproc) CPUs" +module purge +module load R/4.3.1-gimkl-2022a +srun Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example_job.sh b/_includes/example_scripts/example_job.sh new file mode 100644 index 00000000..3bc68e9d --- /dev/null +++ b/_includes/example_scripts/example_job.sh @@ -0,0 +1,6 @@ +#!/bin/bash -e + +module purge +module load R/4.3.1-gimkl-2022a +Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example_job.sl.1 b/_includes/example_scripts/example_job.sl.1 new file mode 100644 index 00000000..b426e336 --- /dev/null +++ b/_includes/example_scripts/example_job.sl.1 @@ -0,0 +1,11 @@ +#!/bin/bash -e + +#SBATCH --job-name example_job +#SBATCH --account nesi99991 +#SBATCH --mem 300M +#SBATCH --time 00:15:00 + +module purge +module load R/4.3.1-gimkl-2022a +Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example-job.sl.2 b/_includes/example_scripts/example_job.sl.2 similarity index 70% rename from _includes/example_scripts/example-job.sl.2 rename to _includes/example_scripts/example_job.sl.2 index 7c4280b8..9f46903a 100644 --- a/_includes/example_scripts/example-job.sl.2 +++ b/_includes/example_scripts/example_job.sl.2 @@ -1,6 +1,6 @@ #!/bin/bash -e -#SBATCH --job-name my_job +#SBATCH --job-name example_job #SBATCH --account nesi99991 #SBATCH --mem 300M #SBATCH --time 00:15:00 @@ -8,5 +8,5 @@ module purge module load R/4.3.1-gimkl-2022a -{{ site.example.shell }} {{ site.example.script }} +Rscript sum_matrix.r echo "Done!" diff --git a/_includes/example_scripts/example-job.sl.3 b/_includes/example_scripts/example_job.sl.3 similarity index 70% rename from _includes/example_scripts/example-job.sl.3 rename to _includes/example_scripts/example_job.sl.3 index 04a3121a..240e1f31 100644 --- a/_includes/example_scripts/example-job.sl.3 +++ b/_includes/example_scripts/example_job.sl.3 @@ -1,6 +1,6 @@ #!/bin/bash -e -#SBATCH --job-name my_job +#SBATCH --job-name example_job #SBATCH --account nesi99991 #SBATCH --mem 600M #SBATCH --time 00:10:00 @@ -8,5 +8,5 @@ module purge module load R/4.3.1-gimkl-2022a -{{ site.example.shell }} {{ site.example.script }} +Rscript sum_matrix.r echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example_jobarray.sl b/_includes/example_scripts/example_jobarray.sl index 6c4cde8b..d84e21f9 100644 --- a/_includes/example_scripts/example_jobarray.sl +++ b/_includes/example_scripts/example_jobarray.sl @@ -6,4 +6,7 @@ #SBATCH --mem-per-cpu 500 #SBATCH --array 0-3 -srun echo "I am task #${SLURM_PROCID} running on node '$(hostname)' with $(nproc) CPUs" +module purge +module load R/4.3.1-gimkl-2022a +Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/example_mpi.sl b/_includes/example_scripts/example_mpi.sl deleted file mode 100644 index 327c559a..00000000 --- a/_includes/example_scripts/example_mpi.sl +++ /dev/null @@ -1,8 +0,0 @@ -#!/bin/bash -e - -#SBATCH --job-name mpi_job -#SBATCH --output %x.out -#SBATCH --mem-per-cpu 500 -#SBATCH --ntasks 4 - -srun bash whothis.sh diff --git a/_includes/example_scripts/example_smp.sl b/_includes/example_scripts/example_smp.sl index 5a1f9b87..3bb1008c 100644 --- a/_includes/example_scripts/example_smp.sl +++ b/_includes/example_scripts/example_smp.sl @@ -1,9 +1,12 @@ #!/bin/bash -e -#SBATCH --job-name smp_job +#SBATCH --job-name smp #SBATCH --account nesi99991 #SBATCH --output %x.out #SBATCH --mem-per-cpu 500 #SBATCH --cpus-per-task 8 -echo "I am task #${SLURM_PROCID} running on node '$(hostname)' with $(nproc) CPUs" +module purge +module load R/4.3.1-gimkl-2022a +Rscript sum_matrix.r +echo "Done!" \ No newline at end of file diff --git a/_includes/example_scripts/shared-mem-job.sl b/_includes/example_scripts/shared-mem-job.sl deleted file mode 100644 index 9d144429..00000000 --- a/_includes/example_scripts/shared-mem-job.sl +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -e - -#SBATCH --job-name my_job -#SBATCH --mem 300M -#SBATCH --time 00:15:00 -#SBATCH --cpus-per-task 4 - -module load R/4.3.1-gimkl-2022a -{{ site.example.shell }} {{ site.example.script }} -echo "Done!" diff --git a/_includes/example_scripts/array_sum.py b/_includes/example_scripts/sum_matrix.py similarity index 100% rename from _includes/example_scripts/array_sum.py rename to _includes/example_scripts/sum_matrix.py diff --git a/_includes/example_scripts/sum_matrix.r b/_includes/example_scripts/sum_matrix.r new file mode 100644 index 00000000..00d53124 --- /dev/null +++ b/_includes/example_scripts/sum_matrix.r @@ -0,0 +1,55 @@ +#!/usr/bin/env Rscript + + +# Function for shared memory execution +doTask <- function(size_x, size_y, seed, print_progress){ + suppressPackageStartupMessages(library(doParallel)) + + message(sprintf("Summing [ %e x %e ] matrix, seed = '%i'",size_x,size_y, seed)) + message(sprintf("Running on '%s' with %i CPU(s).", Sys.info()["nodename"], num_cpus)) + + set.seed(seed) + + registerDoParallel((num_cpus/2)) + + results_all <- foreach(z=0:size_x) %dopar% { + percent_complete= z*100/size_x + if (print_progress && percent_complete%%1==0){ + message(sprintf(" %i%% done...\r", percent_complete)) + } + sum(rnorm(size_y)) + } + Reduce("+",results_all) +} + +# 50 calculations, store the result in 'x' + +ntasks <- strtoi(Sys.getenv('SLURM_NTASKS', unset = "1")) +seed <- strtoi(Sys.getenv('SLURM_ARRAY_TASK_ID', unset = "0")) +num_cpus <- as.integer(strtoi(Sys.getenv('SLURM_CPUS_PER_TASK', unset = "1"))) + +size_x <-60000 # This on makes memorier +size_y <-40000 # This one to make longer + +# Time = (size_x/n) * size_y + c +# Mem = (size_x * n) * c1 + size_y * c2 + +print_progress <- TRUE +# print_progress <- interactive() # Whether to print progress or not. + +#If more than 1 task, use doMPI +if (ntasks > 1){ + suppressPackageStartupMessages(library(doSNOW)) + cl <- makeMPIcluster(outfile="") + + results_all <- foreach(z=1:ntasks) %dopar% { + doTask(size_x, ceiling(size_y/ntasks), z+seed, print_progress) + } + + results = Reduce("+",results_all) + stopCluster(cl) + message(sprintf("Sums to %f", results)) +}else{ + results = doTask(size_x, size_y, seed, print_progress) + message(sprintf("Sums to %f", results)) +}