From 75d5c4b88d7086d1fb0b26bc87dae3309ab88486 Mon Sep 17 00:00:00 2001
From: Hui Zhou <hzhou321@anl.gov>
Date: Fri, 22 Nov 2024 17:28:10 -0600
Subject: [PATCH] ch4/ofi: sparsely poll global progress in
 MPIDI_OFI_retry_progress

In most cases, we only need to poll per-vci OFI progress to resolve the
EAGAIN issue. Only poll global progress sparsely -- every 1000 in this
commit.
---
 src/mpid/ch4/netmod/ofi/ofi_impl.h | 10 +++-------
 src/mpid/ch4/netmod/ofi/util.c     | 15 +++++++++++++--
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/mpid/ch4/netmod/ofi/ofi_impl.h b/src/mpid/ch4/netmod/ofi/ofi_impl.h
index df45c3c32c1..4c7608befca 100644
--- a/src/mpid/ch4/netmod/ofi/ofi_impl.h
+++ b/src/mpid/ch4/netmod/ofi/ofi_impl.h
@@ -94,9 +94,7 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
          * for recursive locking in more than one lock (currently limited
          * to one due to scalar TLS counter), this lock yielding
          * operation can be avoided since we are inside a finite loop. */ \
-        MPIDI_OFI_THREAD_CS_EXIT_VCI_OPTIONAL(vci_);			  \
-        mpi_errno = MPIDI_OFI_retry_progress();                      \
-        MPIDI_OFI_THREAD_CS_ENTER_VCI_OPTIONAL(vci_);			     \
+        mpi_errno = MPIDI_OFI_retry_progress(vci_, _retry); \
         MPIR_ERR_CHECK(mpi_errno);                               \
     } while (1);                                            \
     } while (0)
@@ -113,9 +111,7 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
                 _retry--; \
                 MPIR_ERR_CHKANDJUMP(_retry == 0, mpi_errno, MPIX_ERR_EAGAIN, "**eagain"); \
             } \
-            MPIDI_OFI_THREAD_CS_EXIT_VCI_OPTIONAL(vci_); \
-            mpi_errno = MPIDI_OFI_retry_progress(); \
-            MPIDI_OFI_THREAD_CS_ENTER_VCI_OPTIONAL(vci_); \
+            mpi_errno = MPIDI_OFI_retry_progress(vci_, _retry); \
         } \
     } while (0)
 
@@ -295,7 +291,7 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_mr_bind(struct fi_info *prov, struct fid_
 #define MPIDI_OFI_LOCAL_MR_KEY 0
 #define MPIDI_OFI_COLL_MR_KEY 1
 #define MPIDI_OFI_INVALID_MR_KEY 0xFFFFFFFFFFFFFFFFULL
-int MPIDI_OFI_retry_progress(void);
+int MPIDI_OFI_retry_progress(int vci, int retry);
 int MPIDI_OFI_recv_huge_event(int vci, struct fi_cq_tagged_entry *wc, MPIR_Request * rreq);
 int MPIDI_OFI_recv_huge_control(int vci, MPIR_Context_id_t comm_id, int rank, int tag,
                                 MPIDI_OFI_huge_remote_info_t * info);
diff --git a/src/mpid/ch4/netmod/ofi/util.c b/src/mpid/ch4/netmod/ofi/util.c
index cb075e25afc..7e11ca837e8 100644
--- a/src/mpid/ch4/netmod/ofi/util.c
+++ b/src/mpid/ch4/netmod/ofi/util.c
@@ -7,12 +7,23 @@
 #include "ofi_impl.h"
 #include "ofi_events.h"
 
-int MPIDI_OFI_retry_progress(void)
+int MPIDI_OFI_retry_progress(int vci, int retry)
 {
     /* We do not call progress on hooks form netmod level
      * because it is not reentrant safe.
      */
-    return MPID_Progress_test(NULL);
+    int mpi_errno;
+    /* call global progress sparingly. I assume the netmod progress on its own
+     * will resolve most of the resource busy issue. Call global progress when
+     * that is not resolving. */
+    if ((retry & 0xff) == 0) {
+        MPIDI_OFI_THREAD_CS_EXIT_VCI_OPTIONAL(vci);
+        mpi_errno = MPID_Progress_test(NULL);
+        MPIDI_OFI_THREAD_CS_ENTER_VCI_OPTIONAL(vci);
+    } else {
+        mpi_errno = MPIDI_OFI_progress_uninlined(vci);
+    }
+    return mpi_errno;
 }
 
 typedef struct MPIDI_OFI_mr_key_allocator_t {