diff --git a/checks/apps/vasp/src/makefile.include.gh200 b/checks/apps/vasp/src/makefile.include.gh200
new file mode 100644
index 000000000..b7ee4416a
--- /dev/null
+++ b/checks/apps/vasp/src/makefile.include.gh200
@@ -0,0 +1,122 @@
+# Default precompiler options
+CPP_OPTIONS = -DHOST=\"LinuxNV\" \
+              -DMPI -DMPI_INPLACE -DMPI_BLOCK=8000 -Duse_collective \
+              -DscaLAPACK \
+              -DCACHE_SIZE=4000 \
+              -Davoidalloc \
+              -Dvasp6 \
+              -Duse_bse_te \
+              -Dtbdyn \
+              -Dqd_emulate \
+              -Dfock_dblbuf \
+              -D_OPENMP \
+              -D_OPENACC \
+              -DUSENCCL -DUSENCCLP2P
+  
+CPP         = nvfortran -Mpreprocess -Mfree -Mextend -E $(CPP_OPTIONS) $*$(FUFFIX)  > $*$(SUFFIX)
+ 
+CUDA_VERSION = $(shell nvcc -V | grep -E -o -m 1 "[0-9][0-9]\.[0-9]," | rev | cut -c 2- | rev)
+ 
+CC          = mpicc -acc -gpu=cc90,cuda${CUDA_VERSION} -mp
+FC          = mpif90 -acc -gpu=cc90,cuda${CUDA_VERSION} -mp
+FCL         = mpif90 -acc -gpu=cc90,cuda${CUDA_VERSION} -mp -c++libs
+  
+FREE        = -Mfree
+  
+FFLAGS      = -Mbackslash -Mlarge_arrays
+  
+OFLAG       = -fast
+  
+DEBUG       = -Mfree -O0 -traceback
+  
+OBJECTS     = fftmpiw.o fftmpi_map.o fftw3d.o fft3dlib.o
+  
+LLIBS       = -cudalib=cublas,cusolver,cufft,nccl -cuda
+  
+# Redefine the standard list of O1 and O2 objects
+SOURCE_O1  := pade_fit.o minimax_dependence.o
+SOURCE_O2  := pead.o
+  
+# For what used to be vasp.5.lib
+CPP_LIB     = $(CPP)
+FC_LIB      = $(FC)
+CC_LIB      = $(CC)
+CFLAGS_LIB  = -O -w
+FFLAGS_LIB  = -O1 -Mfixed
+FREE_LIB    = $(FREE)
+  
+OBJECTS_LIB = linpack_double.o
+  
+# For the parser library
+CXX_PARS    = nvc++ --no_warnings
+  
+##
+## Customize as of this point! Of course you may change the preceding
+## part of this file as well if you like, but it should rarely be
+## necessary ...
+##
+# When compiling on the target machine itself , change this to the
+# relevant target when cross-compiling for another architecture
+#
+# NOTE: Using "-tp neoverse-v2" causes some tests to fail. On GH200 architecture, "-tp host"
+# is recommended.
+VASP_TARGET_CPU ?= -tp host
+FFLAGS     += $(VASP_TARGET_CPU)
+  
+# Specify your NV HPC-SDK installation (mandatory)
+#... first try to set it automatically
+NVROOT      =$(shell which nvfortran | awk -F /compilers/bin/nvfortran '{ print $$1 }')
+  
+# If the above fails, then NVROOT needs to be set manually
+#NVHPC      ?= /opt/nvidia/hpc_sdk
+#NVVERSION   = 21.11
+#NVROOT      = $(NVHPC)/Linux_x86_64/$(NVVERSION)
+  
+## Improves performance when using NV HPC-SDK >=21.11 and CUDA >11.2
+#OFLAG_IN   = -fast -Mwarperf
+#SOURCE_IN  := nonlr.o
+  
+# Software emulation of quadruple precsion (mandatory)
+QD         ?= $(NVROOT)/compilers/extras/qd
+LLIBS      += -L$(QD)/lib -lqdmod -lqd -Wl,-rpath,$(QD)/lib
+INCS       += -I$(QD)/include/qd
+  
+# BLAS (mandatory)
+BLAS        = -lnvpl_blas_lp64_gomp -lnvpl_blas_core
+  
+# LAPACK (mandatory)
+LAPACK      = -lnvpl_lapack_lp64_gomp -lnvpl_lapack_core
+  
+# scaLAPACK (mandatory)
+SCALAPACK   = -lscalapack
+  
+LLIBS      += $(SCALAPACK) $(LAPACK) $(BLAS) -Wl,-rpath,/user-environment/env/develop/lib -Wl,-rpath,/user-environment/env/develop/lib64 -Wl,--disable-new-dtags
+  
+# FFTW (mandatory)
+FFTW_ROOT  ?= /user-environment/env/develop
+LLIBS      += -L$(FFTW_ROOT)/lib -lfftw3 -lfftw3_omp
+INCS       += -I$(FFTW_ROOT)/include
+  
+# Use cusolvermp (optional)
+# supported as of NVHPC-SDK 24.1 (and needs CUDA-11.8)
+#CPP_OPTIONS+= -DCUSOLVERMP -DCUBLASMP
+#LLIBS      += -cudalib=cusolvermp,cublasmp -lnvhpcwrapcal
+ 
+# HDF5-support (optional but strongly recommended)
+CPP_OPTIONS+= -DVASP_HDF5
+HDF5_ROOT  ?= /user-environment/env/develop
+LLIBS      += -L$(HDF5_ROOT)/lib -lhdf5_fortran
+INCS       += -I$(HDF5_ROOT)/include
+  
+# For the VASP-2-Wannier90 interface (optional)
+CPP_OPTIONS    += -DVASP2WANNIER90
+WANNIER90_ROOT ?= /user-environment/env/develop
+LLIBS          += -L$(WANNIER90_ROOT)/lib -lwannier
+  
+# For the fftlib library (recommended)
+#CPP_OPTIONS+= -Dsysv
+#FCL        += fftlib.o
+#CXX_FFTLIB  = nvc++ -mp --no_warnings -std=c++11 -DFFTLIB_THREADSAFE
+#INCS_FFTLIB = -I./include -I$(FFTW_ROOT)/include
+#LIBS       += fftlib
+#LLIBS      += -ldl
diff --git a/checks/apps/vasp/vasp_check.py b/checks/apps/vasp/vasp_check.py
index a2ec0bb14..81263ae5c 100644
--- a/checks/apps/vasp/vasp_check.py
+++ b/checks/apps/vasp/vasp_check.py
@@ -1,124 +1,181 @@
-# Copyright 2016-2022 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
+# Copyright 2016-2024 Swiss National Supercomputing Centre (CSCS/ETH Zurich)
 # ReFrame Project Developers. See the top-level LICENSE file for details.
 #
 # SPDX-License-Identifier: BSD-3-Clause
 
+import os
+import shutil
+
 import reframe as rfm
 import reframe.utility.sanity as sn
+import reframe.utility.udeps as udeps
+
+import uenv
+
+vasp_references = {
+        'CeO2': {'gh200': {
+            1: {'elapsed_time': (71, None, 0.10, 's')},
+            2: {'elapsed_time': (90, None, 0.10, 's')}
+        }},
+}
+
+
+slurm_config = {
+    'CeO2': {
+        'gh200': {
+            'ntasks-per-node': 4,
+            'cpus-per-task': 16,
+            'walltime': '0d0h5m0s',
+        }
+    },
+}
 
 
 @rfm.simple_test
-class VASPCheck(rfm.RunOnlyRegressionTest):
-    modules = ['VASP']
+class VaspCheck(rfm.RunOnlyRegressionTest):
     executable = 'vasp_std'
-    extra_resources = {
-        'switches': {
-            'num_switches': 1
-        }
-    }
-    keep_files = ['OUTCAR']
-    strict_check = False
-    use_multithreading = False
-    tags = {'maintenance', 'production'}
-    maintainers = ['LM']
-
-    num_nodes = parameter([6, 16], loggable=True)
-    allref = {
-        6: {
-            'sm_60': {
-                'dom:gpu': {'elapsed_time': (66.811, None, 0.10, 's')},
-                'daint:gpu': {'elapsed_time': (67.407, None, 0.10, 's')},
-            },
-            'broadwell': {
-                'dom:mc': {'elapsed_time': (57.745, None, 0.10, 's')},
-                'daint:mc': {'elapsed_time': (65.62, None, 0.10, 's')},
-            },
-            'zen2': {
-                'eiger:mc': {'elapsed_time': (112.347, None, 0.10, 's')},
-                'pilatus:mc': {'elapsed_time': (89.083, None, 0.10, 's')},
-            },
-        },
-        16: {
-            'sm_60': {
-                'daint:gpu': {'elapsed_time': (61.393, None, 0.10, 's')},
-            },
-            'broadwell': {
-                'daint:mc': {'elapsed_time': (45.404, None, 0.10, 's')},
-            },
-            'zen2': {
-                'eiger:mc': {'elapsed_time': (69.459, None, 0.10, 's')},
-                'pilatus:mc': {'elapsed_time': (100.0, None, 0.10, 's')}
+    maintainers = ['SSA']
+    valid_systems = ['*']
+
+    valid_prog_environs = ['+vasp']
+    test_name = 'CeO2'
+    force_reference = -.85026214E+03
+    num_nodes = parameter([1, 2], loggable=True)
+    tags = {'uenv', 'production'}
+
+    @run_before('run')
+    def prepare_run(self):
+        self.uarch = uenv.uarch(self.current_partition)
+        config = slurm_config[self.test_name][self.uarch]
+        # sbatch options
+        self.job.options = [
+            f'--nodes={self.num_nodes}',
+        ]
+        self.num_tasks_per_node = config['ntasks-per-node']
+        self.num_tasks = self.num_nodes * self.num_tasks_per_node
+        self.num_cpus_per_task = config['cpus-per-task']
+        self.num_tasks_per_socket = 1
+        self.ntasks_per_core = 1
+        self.time_limit = config['walltime']
+
+        # srun options
+        self.job.launcher.options = [
+                '--cpu-bind=cores',
+                # For multi-node, VASP gpu selection doesn't work properly.
+                # CUDA_VISIBLE_DEVICES must be set to one GPU.
+                '--gpus-per-task=1'
+                ]
+
+        # environment variables
+        self.env_vars['OMP_NUM_THREADS'] = self.num_cpus_per_task
+
+        if self.uarch == 'gh200':
+            self.env_vars['MPICH_GPU_SUPPORT_ENABLED'] = '1'
+            self.env_vars['NCCL_IGNORE_CPU_AFFINITY'] = '1'
+
+        # set reference
+        if self.uarch is not None and \
+           self.uarch in vasp_references[self.test_name]:
+            self.reference = {
+                self.current_partition.fullname:
+                    vasp_references[self.test_name][self.uarch][self.num_nodes]
             }
-        }
-    }
 
+    @sanity_function
+    def assert_reference(self):
+        force = sn.extractsingle(r'1 F=\s+(?P<result>\S+)',
+                                 self.stdout, 'result', float)
+        return sn.assert_reference(force, self.force_reference, -1e-5, 1e-5)
+
+    # INFO: The name of this function needs to match with the reference dict!
     @performance_function('s')
     def elapsed_time(self):
         return sn.extractsingle(r'Elapsed time \(sec\):'
                                 r'\s+(?P<time>\S+)', 'OUTCAR',
                                 'time', float)
 
-    @sanity_function
-    def assert_reference(self):
-        force = sn.extractsingle(r'1 F=\s+(?P<result>\S+)',
-                                 self.stdout, 'result', float)
-        return sn.assert_reference(force, -.85026214E+03, -1e-5, 1e-5)
 
-    @run_after('init')
-    def setup_system_filtering(self):
-        self.descr = f'VASP check ({self.num_nodes} node(s))'
-
-        # setup system filter
-        valid_systems = {
-            6: ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc',
-                'eiger:mc', 'pilatus:mc'],
-            16: ['daint:gpu', 'daint:mc', 'eiger:mc']
-        }
+@rfm.simple_test
+class VaspBuildTest(rfm.CompileOnlyRegressionTest):
+    '''
+    Test VASP build from source.
+    '''
+
+    descr = 'VASP Build Test'
+    version = variable(str, value='6.4.3')
+    valid_prog_environs = ['+vasp-dev']
+    valid_systems = ['*']
+    build_system = 'Make'
+    # only build std target
+    maintainers = ['SSA']
+    # run on node to load uenv
+    build_locally = False
+    tags = {'uenv'}
+
+    @run_before('compile')
+    def prepare_build(self):
+        self.build_system.options = ['DEPS=1', 'std']
+
+        self.skip_if_no_procinfo()
+        cpu = self.current_partition.processor
+        self.build_system.max_concurrency = cpu.info['num_cpus_per_socket']
+
+        # Don't set FC variable, which breaks the makefile
+        self.build_system.flags_from_environ = False
+
+        self.uarch = uenv.uarch(self.current_partition)
+        self.build_system.builddir = self.stagedir
+        self.build_time_limit = '0d1h0m0s'
+
+        makefile = f'makefile.include.{self.uarch}'
+        makefile_path = os.path.join(self.prefix, self.sourcesdir, makefile)
+
+        if not os.path.isfile(makefile_path):
+            self.skip(f'No makefile for uarch {self.uarch}')
+
+        vasp_download_cmd = (
+            f'curl --retry 5 '
+            f'-u ${{CSCS_REGISTRY_USERNAME}}:${{CSCS_REGISTRY_PASSWORD}} '
+            '-X GET https://jfrog.svc.cscs.ch/artifactory'
+            f'/uenv-sources/vasp/vasp-{self.version}.tar.bz2 '
+            '-o vasp_src.tar.bz2'
+        )
+
+        # Download source and copy makfile matching uarch
+        self.prebuild_cmds = [
+            vasp_download_cmd,
+            'tar -xf vasp_src.tar.bz2',
+            # The vasp tar ball contains inconsistent directory names between
+            # versions, so we find the directory name and change it to vasp_src
+            'find . -maxdepth 1 -type d -name "vasp*" -exec mv {} vasp_src \\;',
+            'cd vasp_src',
+            f'cp ../{makefile} makefile.include'
+        ]
+
+        self.vasp_std_executable = os.path.join(
+            self.stagedir, 'vasp_src', 'bin', 'vasp_std'
+        )
 
-        self.skip_if(self.num_nodes not in valid_systems,
-                     f'No valid systems found for {self.num_nodes}(s)')
-        self.valid_systems = valid_systems[self.num_nodes]
+    @sanity_function
+    def validate_test(self):
+        return os.path.isfile(self.vasp_std_executable)
 
-        # setup programming environment filter
-        if self.current_system.name in ['eiger', 'pilatus']:
-            self.valid_prog_environs = ['cpeIntel']
-        else:
-            self.valid_prog_environs = ['builtin']
 
-    @run_before('run')
-    def setup_run(self):
-        # set auto-detected architecture
-        self.skip_if_no_procinfo()
-        proc = self.current_partition.processor
-        arch = proc.arch
-
-        # set architecture for GPU partition (no auto-detection)
-        if self.current_partition.fullname in ['daint:gpu', 'dom:gpu']:
-            arch = 'sm_60'
-
-        try:
-            found = self.allref[self.num_nodes][arch]
-        except KeyError:
-            self.skip(f'Configuration with {self.num_nodes} node(s) '
-                      f'is not supported on {arch!r}')
-
-        # common setup for every architecture
-        self.job.launcher.options = ['--cpu-bind=cores']
-        self.job.options = ['--distribution=block:block']
-        self.num_tasks_per_node = proc.num_sockets
-        self.num_cpus_per_task = proc.num_cores // self.num_tasks_per_node
-        self.num_tasks = self.num_nodes * self.num_tasks_per_node
-        self.env_vars = {
-            'OMP_NUM_THREADS': self.num_cpus_per_task,
-            'OMP_PLACES': 'cores',
-            'OMP_PROC_BIND': 'close'
-        }
+@rfm.simple_test
+class VaspBuildCheck(VaspCheck):
+    valid_prog_environs = ['+vasp-dev']
+    tags = {'uenv'}
 
-        # custom settings for selected architectures
-        if arch == 'zen2':
-            self.env_vars.update({
-                'MPICH_OFI_STARTUP_CONNECT': 1
-            })
+    @run_after('init')
+    def setup_dependency(self):
+        self.depends_on('VaspBuildTest', udeps.fully)
+
+    @run_after('setup')
+    def setup_executable(self):
+        parent = self.getdep('VaspBuildTest')
+        self.executable = f'{parent.vasp_std_executable}'
+        # VASP checks for CUDA aware MPI, which does not work with cray-mpich
+        # The uenv version is patched, but for the source build we set this
+        # as a workaround
+        self.env_vars['PMPI_GPU_AWARE'] = '1'
 
-        # setup performance references
-        self.reference = self.allref[self.num_nodes][arch]