Skip to content

Commit

Permalink
add method to query HW size
Browse files Browse the repository at this point in the history
Signed-off-by: Jeff Hammond <jehammond@nvidia.com>
  • Loading branch information
jeffhammond committed May 3, 2022
1 parent 1c73340 commit 8bd9160
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 0 deletions.
1 change: 1 addition & 0 deletions frame/include/blis.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ extern "C" {

#include "bli_thread.h"
#include "bli_pthread.h"
#include "bli_affinity.h"


// -- Constant definitions --
Expand Down
105 changes: 105 additions & 0 deletions frame/thread/bli_affinity.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 NVIDIA
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

// we need a way to detect oversubscription of the kind where
// hierarchical parallelism is used and the affinity mask within
// which BLIS runs does not have enough hardware threads to support
// the requested software threads.
//
// this is motivated by, or related to:
// https://github.com/flame/blis/issues/588
// https://github.com/flame/blis/pull/607
// https://github.com/flame/blis/issues/604
// https://github.com/flame/blis/issues/603

#include "bli_affinity.h"

#ifndef BLIS_ENABLE_AFFINITY

// define the symbol for platforms like Windows and MacOS that do not support the Linux affinity API

dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope)
{
// this is the largest possible value returned by this function
// and it means that the affinity mask does not constrain the current scope.
return (dim_t)1024;
}

#else

// this macro has to come before any other headers
#define _GNU_SOURCE

#include <sched.h>
#include <unistd.h>

// scope is either the calling process or the calling thread:
// 0 = calling process
// 1 = calling thread

dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope)
{
int rc;
int active_cpus;
pid_t pid;
cpu_set_t mask;

if (scope == 0) {
pid = getpid();
} else {
// this means the current thread
pid = 0;
}

CPU_ZERO(&mask);

// if the CPU mask is larger than 1024 bits, this needs to change.
// see https://man7.org/linux/man-pages/man2/sched_getaffinity.2.html for details.
rc = sched_getaffinity(pid, sizeof(cpu_set_t), &mask);
if (rc) {
bli_print_msg( "sched_getaffinity failed",
__FILE__, __LINE__ );
bli_abort();
}

active_cpus = 0;
for (int i=0; i<sizeof(cpu_set_t); i++) {
const int on = CPU_ISSET(i, &mask);
if (on) active_cpus++;
}

return active_cpus;
}

#endif // BLIS_ENABLE_AFFINITY
44 changes: 44 additions & 0 deletions frame/thread/bli_affinity.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2022 NVIDIA
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name(s) of the copyright holder(s) nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#ifndef BLIS_AFFINITY_H
#define BLIS_AFFINITY_H

#include "blis.h"

typedef enum { process = 0, thread = 1 } bli_affinity_scope_t;

dim_t bli_affinity_get_hw_size(bli_affinity_scope_t scope);

#endif // BLIS_AFFINITY_H
26 changes: 26 additions & 0 deletions frame/thread/bli_l3_decor_openmp.c
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ void bli_l3_thread_decorator_thread_check
)
{
dim_t n_threads_real = omp_get_num_threads();
dim_t n_threads_hwmask;
if ( omp_in_parallel() ) {
n_threads_hwmask = bli_affinity_get_hw_size(thread);
} else {
n_threads_hwmask = bli_affinity_get_hw_size(process);
}

// Check if the number of OpenMP threads created within this parallel
// region is different from the number of threads that were requested
Expand Down Expand Up @@ -241,6 +247,26 @@ void bli_l3_thread_decorator_thread_check

// Synchronize all threads and continue.
_Pragma( "omp barrier" )

return;
}

// Check if the number of OpenMP threads created within this parallel
// region is different from the number of threads that are available
// to BLIS in the calling context.
if ( n_threads_hwmask < n_threads || n_threads_hwmask < n_threads_real)
{
bli_print_msg( "The affinity mask on this process does not have "
"enough HW threads for your requested SW threads.",
__FILE__, __LINE__ );

bli_thrcomm_init( n_threads_hwmask, gl_comm );
bli_rntm_set_num_threads_only( n_threads_hwmask, rntm );
#warning HELP ME HERE
bli_rntm_set_ways_only( 1, 1, 1, 1, 1, rntm );

// Synchronize all threads and continue.
_Pragma( "omp barrier" )
}
}

Expand Down

0 comments on commit 8bd9160

Please sign in to comment.