Skip to content

Commit

Permalink
Added H100 and P100 benchmark scripts (#18)
Browse files Browse the repository at this point in the history
Added benchmark scripts for P100 (based on the configuration of A100) and H100 GPUs
  • Loading branch information
iomaganaris authored Apr 8, 2024
1 parent 8575e02 commit 33658cd
Show file tree
Hide file tree
Showing 3 changed files with 357 additions and 0 deletions.
4 changes: 4 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,14 @@ def pybind11_extension(m):
'console_scripts': [
'sbench=stencil_benchmarks.scripts.sbench:main',
'sbench-analyze=stencil_benchmarks.scripts.sbench_analyze:main',
'sbench-h100-collection=stencil_benchmarks.scripts'
'.sbench_h100_collection:main',
'sbench-a100-collection=stencil_benchmarks.scripts'
'.sbench_a100_collection:main',
'sbench-v100-collection=stencil_benchmarks.scripts'
'.sbench_v100_collection:main',
'sbench-p100-collection=stencil_benchmarks.scripts'
'.sbench_p100_collection:main',
'sbench-mi50-collection=stencil_benchmarks.scripts'
'.sbench_mi50_collection:main',
'sbench-mi100-collection=stencil_benchmarks.scripts'
Expand Down
176 changes: 176 additions & 0 deletions stencil_benchmarks/scripts/sbench_h100_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
# Stencil Benchmarks
#
# Copyright (c) 2017-2021, ETH Zurich and MeteoSwiss
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# SPDX-License-Identifier: BSD-3-Clause

import click

from stencil_benchmarks.benchmarks_collection.stencils.cuda_hip import (
basic, horizontal_diffusion as hdiff, vertical_advection as vadv)
from stencil_benchmarks.tools.multirun import (Configuration,
run_scaling_benchmark,
truncate_block_size_to_domain,
default_kwargs)


@click.group()
def main():
pass


common_kwargs = default_kwargs(backend='cuda',
compiler='nvcc',
gpu_architecture='sm_90',
verify=False,
dry_runs=1,
alignment=128,
dtype='float32')


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def basic_bandwidth(output, executions, option):
kwargs = common_kwargs(
option,
loop='3D',
block_size=(128, 2, 1),
halo=(1, 1, 1),
)

stream_kwargs = kwargs.copy()
stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=(0, 0, 0))

configurations = [
Configuration(basic.Copy, name='stream', **stream_kwargs),
Configuration(basic.Empty, name='empty', **kwargs),
Configuration(basic.Copy, name='copy', **kwargs),
Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs),
Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs),
Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-i',
axis=0,
**kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-j',
axis=1,
**kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-k',
axis=2,
**kwargs),
Configuration(basic.Laplacian,
name='lap-ij',
along_x=True,
along_y=True,
along_z=False,
**kwargs)
]

table = run_scaling_benchmark(configurations, executions)
table.to_csv(output)


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def horizontal_diffusion_bandwidth(output, executions, option):
kwargs = common_kwargs(option)

configurations = [
Configuration(hdiff.Classic, block_size=(32, 12, 1), **kwargs),
Configuration(hdiff.OnTheFly,
block_size=(32, 16, 1),
loop='3D',
**kwargs),
Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 1), **kwargs),
Configuration(hdiff.JScanSharedMem, block_size=(256, 32, 1), **kwargs),
Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs),
Configuration(hdiff.JScanOtf, block_size=(128, 4, 1), **kwargs),
Configuration(hdiff.JScanShuffleIncache,
block_size=(28, 8, 2),
**kwargs),
Configuration(hdiff.JScanShuffle, block_size=(28, 8, 2), **kwargs),
Configuration(hdiff.JScanShuffleSystolic,
block_size=(28, 4, 3),
**kwargs)
]

def truncate_block_size_to_domain_if_possible(**kwargs):
if kwargs['block_size'][0] != 28:
return truncate_block_size_to_domain(**kwargs)
return kwargs

table = run_scaling_benchmark(
configurations,
executions,
preprocess_args=truncate_block_size_to_domain_if_possible)
table.to_csv(output)


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def vertical_advection_bandwidth(output, executions, option):
kwargs = common_kwargs(option)

configurations = [
Configuration(vadv.Classic,
block_size=(128, 1),
unroll_factor=8,
**kwargs),
Configuration(vadv.LocalMem,
block_size=(128, 1),
unroll_factor=28,
**kwargs),
Configuration(vadv.SharedMem,
block_size=(64, 1),
unroll_factor=0,
**kwargs),
Configuration(vadv.LocalMemMerged,
block_size=(128, 1),
unroll_factor=2,
**kwargs)
]

table = run_scaling_benchmark(
configurations,
executions,
preprocess_args=truncate_block_size_to_domain)
table.to_csv(output)


if __name__ == '__main__':
main()
177 changes: 177 additions & 0 deletions stencil_benchmarks/scripts/sbench_p100_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Stencil Benchmarks
#
# Copyright (c) 2017-2021, ETH Zurich and MeteoSwiss
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
# SPDX-License-Identifier: BSD-3-Clause

import click

from stencil_benchmarks.benchmarks_collection.stencils.cuda_hip import (
basic, horizontal_diffusion as hdiff, vertical_advection as vadv)
from stencil_benchmarks.tools.multirun import (Configuration,
run_scaling_benchmark,
truncate_block_size_to_domain,
default_kwargs)


@click.group()
def main():
pass


common_kwargs = default_kwargs(backend='cuda',
compiler='nvcc',
gpu_architecture='sm_60',
verify=False,
dry_runs=1,
alignment=128,
dtype='float32',
print_code=True)


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def basic_bandwidth(output, executions, option):
kwargs = common_kwargs(
option,
loop='3D',
block_size=(128, 2, 1),
halo=(1, 1, 1),
)

stream_kwargs = kwargs.copy()
stream_kwargs.update(loop='1D', block_size=(1024, 1, 1), halo=(0, 0, 0))

configurations = [
Configuration(basic.Copy, name='stream', **stream_kwargs),
Configuration(basic.Empty, name='empty', **kwargs),
Configuration(basic.Copy, name='copy', **kwargs),
Configuration(basic.OnesidedAverage, name='avg-i', axis=0, **kwargs),
Configuration(basic.OnesidedAverage, name='avg-j', axis=1, **kwargs),
Configuration(basic.OnesidedAverage, name='avg-k', axis=2, **kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-i',
axis=0,
**kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-j',
axis=1,
**kwargs),
Configuration(basic.SymmetricAverage,
name='sym-avg-k',
axis=2,
**kwargs),
Configuration(basic.Laplacian,
name='lap-ij',
along_x=True,
along_y=True,
along_z=False,
**kwargs)
]

table = run_scaling_benchmark(configurations, executions)
table.to_csv(output)


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def horizontal_diffusion_bandwidth(output, executions, option):
kwargs = common_kwargs(option)

configurations = [
Configuration(hdiff.Classic, block_size=(32, 16, 1), **kwargs),
Configuration(hdiff.OnTheFly,
block_size=(32, 16, 1),
loop='3D',
**kwargs),
Configuration(hdiff.OnTheFlyIncache, block_size=(32, 8, 1), **kwargs),
Configuration(hdiff.JScanSharedMem, block_size=(256, 32, 1), **kwargs),
Configuration(hdiff.JScanOtfIncache, block_size=(128, 4, 1), **kwargs),
Configuration(hdiff.JScanOtf, block_size=(128, 4, 1), **kwargs),
Configuration(hdiff.JScanShuffleIncache,
block_size=(28, 8, 2),
**kwargs),
Configuration(hdiff.JScanShuffle, block_size=(28, 8, 2), **kwargs),
Configuration(hdiff.JScanShuffleSystolic,
block_size=(28, 4, 3),
**kwargs)
]

def truncate_block_size_to_domain_if_possible(**kwargs):
if kwargs['block_size'][0] != 28:
return truncate_block_size_to_domain(**kwargs)
return kwargs

table = run_scaling_benchmark(
configurations,
executions,
preprocess_args=truncate_block_size_to_domain_if_possible)
table.to_csv(output)


@main.command()
@click.argument('output', type=click.Path())
@click.option('--executions', '-e', type=int, default=101)
@click.option('--option', '-o', multiple=True)
def vertical_advection_bandwidth(output, executions, option):
kwargs = common_kwargs(option)

configurations = [
Configuration(vadv.Classic,
block_size=(512, 1),
unroll_factor=8,
**kwargs),
Configuration(vadv.LocalMem,
block_size=(128, 1),
unroll_factor=28,
**kwargs),
Configuration(vadv.SharedMem,
block_size=(64, 1),
unroll_factor=0,
**kwargs),
Configuration(vadv.LocalMemMerged,
block_size=(512, 1),
unroll_factor=2,
**kwargs)
]

table = run_scaling_benchmark(
configurations,
executions,
preprocess_args=truncate_block_size_to_domain)
table.to_csv(output)


if __name__ == '__main__':
main()

0 comments on commit 33658cd

Please sign in to comment.