diff --git a/config/m4/cuda.m4 b/config/m4/cuda.m4 index 4163f898e19..7f477259676 100644 --- a/config/m4/cuda.m4 +++ b/config/m4/cuda.m4 @@ -66,10 +66,10 @@ AS_IF([test "x$cuda_checked" != "xyes"], [AC_MSG_ERROR([libnvidia-ml not found. Install appropriate nvidia-driver package])]) cuda_happy="no"])]) - # Check for nvmlDeviceGetGpuFabricInfoV - AC_CHECK_DECLS([nvmlDeviceGetGpuFabricInfoV], + # Check for nvmlDeviceGetGpuFabricInfo + AC_CHECK_DECLS([nvmlDeviceGetGpuFabricInfo], [AC_DEFINE([HAVE_NVML_FABRIC_INFO], 1, [Enable NVML GPU fabric info support])], - [AC_MSG_NOTICE([nvmlDeviceGetGpuFabricInfoV function not found in libnvidia-ml. MNNVL support will be disabled.])], + [AC_MSG_NOTICE([nvmlDeviceGetGpuFabricInfo function not found in libnvidia-ml. MNNVL support will be disabled.])], [[#include ]]) diff --git a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c index fccd071a8f8..abc74fac39d 100644 --- a/src/uct/cuda/cuda_ipc/cuda_ipc_md.c +++ b/src/uct/cuda/cuda_ipc/cuda_ipc_md.c @@ -416,7 +416,7 @@ uct_cuda_ipc_md_check_fabric_info(uct_cuda_ipc_md_t *md, static int mnnvl_supported = 0; #else static int mnnvl_supported = -1; - nvmlGpuFabricInfoV_t fabric_info; + nvmlGpuFabricInfo_t fabric_info; nvmlDevice_t device; ucs_status_t status; char buf[64]; @@ -436,19 +436,17 @@ uct_cuda_ipc_md_check_fabric_info(uct_cuda_ipc_md_t *md, goto out_not_supported; } - fabric_info.version = nvmlGpuFabricInfo_v2; - status = UCT_NVML_FUNC_LOG_ERR( - nvmlDeviceGetGpuFabricInfoV(device, &fabric_info)); + status = UCT_NVML_FUNC_LOG_ERR( + nvmlDeviceGetGpuFabricInfo(device, &fabric_info)); if (status != UCS_OK) { goto out_not_supported; } - ucs_debug("fabric_info: healthmask=%u state=%u status=%u clique=%u uuid=%s", - fabric_info.healthMask, fabric_info.state, fabric_info.status, - fabric_info.cliqueId, - ucs_str_dump_hex( - fabric_info.clusterUuid, NVML_GPU_FABRIC_UUID_LEN, buf, - sizeof(buf), SIZE_MAX)); + ucs_debug("fabric_info: state=%u status=%u uuid=%s", fabric_info.state, + fabric_info.status, + ucs_str_dump_hex(fabric_info.clusterUuid, + NVML_GPU_FABRIC_UUID_LEN, buf, sizeof(buf), + SIZE_MAX)); if ((fabric_info.state == NVML_GPU_FABRIC_STATE_COMPLETED) && (fabric_info.status == NVML_SUCCESS)) {