diff --git a/checks/apps/vasp/src/makefile.include.gh200 b/checks/apps/vasp/src/makefile.include.gh200 new file mode 100644 index 000000000..b7ee4416a --- /dev/null +++ b/checks/apps/vasp/src/makefile.include.gh200 @@ -0,0 +1,122 @@ +# Default precompiler options +CPP_OPTIONS = -DHOST=\"LinuxNV\" \ + -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \ + -DscaLAPACK \ + -DCACHE_SIZE=4000 \ + -Davoidalloc \ + -Dvasp6 \ + -Duse_bse_te \ + -Dtbdyn \ + -Dqd_emulate \ + -Dfock_dblbuf \ + -D_OPENMP \ + -D_OPENACC \ + -DUSENCCL -DUSENCCLP2P + +CPP = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX) > $*$(SUFFIX) + +CUDA_VERSION = $(shell nvcc -V | grep -E -o -m 1 "[0-9][0-9]\.[0-9]," | rev | cut -c 2- | rev) + +CC = mpicc -acc -gpu=cc90,cuda${CUDA_VERSION} -mp +FC = mpif90 -acc -gpu=cc90,cuda${CUDA_VERSION} -mp +FCL = mpif90 -acc -gpu=cc90,cuda${CUDA_VERSION} -mp -c++libs + +FREE = -Mfree + +FFLAGS = -Mbackslash -Mlarge_arrays + +OFLAG = -fast + +DEBUG = -Mfree -O0 -traceback + +OBJECTS = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o + +LLIBS = -cudalib=cublas,cusolver,cufft,nccl -cuda + +# Redefine the standard list of O1 and O2 objects +SOURCE_O1 := pade_fit.o minimax_dependence.o +SOURCE_O2 := pead.o + +# For what used to be vasp.5.lib +CPP_LIB = $(CPP) +FC_LIB = $(FC) +CC_LIB = $(CC) +CFLAGS_LIB = -O -w +FFLAGS_LIB = -O1 -Mfixed +FREE_LIB = $(FREE) + +OBJECTS_LIB = linpack_double.o + +# For the parser library +CXX_PARS = nvc++ --no_warnings + +## +## Customize as of this point! Of course you may change the preceding +## part of this file as well if you like, but it should rarely be +## necessary ... +## +# When compiling on the target machine itself , change this to the +# relevant target when cross-compiling for another architecture +# +# NOTE: Using "-tp neoverse-v2" causes some tests to fail. On GH200 architecture, "-tp host" +# is recommended. +VASP_TARGET_CPU ?= -tp host +FFLAGS += $(VASP_TARGET_CPU) + +# Specify your NV HPC-SDK installation (mandatory) +#... first try to set it automatically +NVROOT =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }') + +# If the above fails, then NVROOT needs to be set manually +#NVHPC ?= /opt/nvidia/hpc_sdk +#NVVERSION = 21.11 +#NVROOT = $(NVHPC)/Linux_x86_64/$(NVVERSION) + +## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2 +#OFLAG_IN = -fast -Mwarperf +#SOURCE_IN := nonlr.o + +# Software emulation of quadruple precsion (mandatory) +QD ?= $(NVROOT)/compilers/extras/qd +LLIBS += -L$(QD)/lib -lqdmod -lqd -Wl,-rpath,$(QD)/lib +INCS += -I$(QD)/include/qd + +# BLAS (mandatory) +BLAS = -lnvpl_blas_lp64_gomp -lnvpl_blas_core + +# LAPACK (mandatory) +LAPACK = -lnvpl_lapack_lp64_gomp -lnvpl_lapack_core + +# scaLAPACK (mandatory) +SCALAPACK = -lscalapack + +LLIBS += $(SCALAPACK) $(LAPACK) $(BLAS) -Wl,-rpath,/user-environment/env/develop/lib -Wl,-rpath,/user-environment/env/develop/lib64 -Wl,--disable-new-dtags + +# FFTW (mandatory) +FFTW_ROOT ?= /user-environment/env/develop +LLIBS += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp +INCS += -I$(FFTW_ROOT)/include + +# Use cusolvermp (optional) +# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8) +#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP +#LLIBS += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal + +# HDF5-support (optional but strongly recommended) +CPP_OPTIONS+= -DVASP_HDF5 +HDF5_ROOT ?= /user-environment/env/develop +LLIBS += -L$(HDF5_ROOT)/lib -lhdf5_fortran +INCS += -I$(HDF5_ROOT)/include + +# For the VASP-2-Wannier90 interface (optional) +CPP_OPTIONS += -DVASP2WANNIER90 +WANNIER90_ROOT ?= /user-environment/env/develop +LLIBS += -L$(WANNIER90_ROOT)/lib -lwannier + +# For the fftlib library (recommended) +#CPP_OPTIONS+= -Dsysv +#FCL += fftlib.o +#CXX_FFTLIB = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_THREADSAFE +#INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include +#LIBS += fftlib +#LLIBS += -ldl diff --git a/checks/apps/vasp/vasp_check.py b/checks/apps/vasp/vasp_check.py index a2ec0bb14..81263ae5c 100644 --- a/checks/apps/vasp/vasp_check.py +++ b/checks/apps/vasp/vasp_check.py @@ -1,124 +1,181 @@ -# Copyright 2016-2022 Swiss National Supercomputing Centre (CSCS/ETH Zurich) +# Copyright 2016-2024 Swiss National Supercomputing Centre (CSCS/ETH Zurich) # ReFrame Project Developers. See the top-level LICENSE file for details. # # SPDX-License-Identifier: BSD-3-Clause +import os +import shutil + import reframe as rfm import reframe.utility.sanity as sn +import reframe.utility.udeps as udeps + +import uenv + +vasp_references = { + 'CeO2': {'gh200': { + 1: {'elapsed_time': (71, None, 0.10, 's')}, + 2: {'elapsed_time': (90, None, 0.10, 's')} + }}, +} + + +slurm_config = { + 'CeO2': { + 'gh200': { + 'ntasks-per-node': 4, + 'cpus-per-task': 16, + 'walltime': '0d0h5m0s', + } + }, +} @rfm.simple_test -class VASPCheck(rfm.RunOnlyRegressionTest): - modules = ['VASP'] +class VaspCheck(rfm.RunOnlyRegressionTest): executable = 'vasp_std' - extra_resources = { - 'switches': { - 'num_switches': 1 - } - } - keep_files = ['OUTCAR'] - strict_check = False - use_multithreading = False - tags = {'maintenance', 'production'} - maintainers = ['LM'] - - num_nodes = parameter([6, 16], loggable=True) - allref = { - 6: { - 'sm_60': { - 'dom:gpu': {'elapsed_time': (66.811, None, 0.10, 's')}, - 'daint:gpu': {'elapsed_time': (67.407, None, 0.10, 's')}, - }, - 'broadwell': { - 'dom:mc': {'elapsed_time': (57.745, None, 0.10, 's')}, - 'daint:mc': {'elapsed_time': (65.62, None, 0.10, 's')}, - }, - 'zen2': { - 'eiger:mc': {'elapsed_time': (112.347, None, 0.10, 's')}, - 'pilatus:mc': {'elapsed_time': (89.083, None, 0.10, 's')}, - }, - }, - 16: { - 'sm_60': { - 'daint:gpu': {'elapsed_time': (61.393, None, 0.10, 's')}, - }, - 'broadwell': { - 'daint:mc': {'elapsed_time': (45.404, None, 0.10, 's')}, - }, - 'zen2': { - 'eiger:mc': {'elapsed_time': (69.459, None, 0.10, 's')}, - 'pilatus:mc': {'elapsed_time': (100.0, None, 0.10, 's')} + maintainers = ['SSA'] + valid_systems = ['*'] + + valid_prog_environs = ['+vasp'] + test_name = 'CeO2' + force_reference = -.85026214E+03 + num_nodes = parameter([1, 2], loggable=True) + tags = {'uenv', 'production'} + + @run_before('run') + def prepare_run(self): + self.uarch = uenv.uarch(self.current_partition) + config = slurm_config[self.test_name][self.uarch] + # sbatch options + self.job.options = [ + f'--nodes={self.num_nodes}', + ] + self.num_tasks_per_node = config['ntasks-per-node'] + self.num_tasks = self.num_nodes * self.num_tasks_per_node + self.num_cpus_per_task = config['cpus-per-task'] + self.num_tasks_per_socket = 1 + self.ntasks_per_core = 1 + self.time_limit = config['walltime'] + + # srun options + self.job.launcher.options = [ + '--cpu-bind=cores', + # For multi-node, VASP gpu selection doesn't work properly. + # CUDA_VISIBLE_DEVICES must be set to one GPU. + '--gpus-per-task=1' + ] + + # environment variables + self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task + + if self.uarch == 'gh200': + self.env_vars['MPICH_GPU_SUPPORT_ENABLED'] = '1' + self.env_vars['NCCL_IGNORE_CPU_AFFINITY'] = '1' + + # set reference + if self.uarch is not None and \ + self.uarch in vasp_references[self.test_name]: + self.reference = { + self.current_partition.fullname: + vasp_references[self.test_name][self.uarch][self.num_nodes] } - } - } + @sanity_function + def assert_reference(self): + force = sn.extractsingle(r'1 F=\s+(?P\S+)', + self.stdout, 'result', float) + return sn.assert_reference(force, self.force_reference, -1e-5, 1e-5) + + # INFO: The name of this function needs to match with the reference dict! @performance_function('s') def elapsed_time(self): return sn.extractsingle(r'Elapsed time \(sec\):' r'\s+(?P