Skip to content

Commit

Permalink
ch4/ofi: Remove redundant error info
Browse files Browse the repository at this point in the history
The MPIR error checking macros already add __LINE__ and __func__
information when an error is reported. __SHORT_FILE__ is not necessary
since we have the function name, which any decent editor can find
automatically.
  • Loading branch information
raffenet committed Jan 8, 2025
1 parent 96f033f commit 70bbdf7
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 56 deletions.
6 changes: 3 additions & 3 deletions maint/extracterrmsgs
Original file line number Diff line number Diff line change
Expand Up @@ -680,12 +680,12 @@ sub ProcessFile
# add longnames since we omit errnames.txt for these
$longnames{"**ofid_$name"} = "OFI call $name failed";
$longnamesDefined{"**ofid_$name"} = "$filename:$linecount";
$longnames{"**ofid_$name %s %d %s %s"} = "OFI call $name failed (%s:%d:%s:%s)";
$longnamesDefined{"**ofid_$name %s %d %s %s"} = "$filename:$linecount";
$longnames{"**ofid_$name %s"} = "OFI call $name failed (%s)";
$longnamesDefined{"**ofid_$name %s"} = "$filename:$linecount";
}

$generic_msgs{"**ofid_$name"}++;
$specific_msgs{"**ofid_$name %s %d %s %s"}++;
$specific_msgs{"**ofid_$name %s"}++;

next;
}
Expand Down
10 changes: 4 additions & 6 deletions src/mpid/ch4/netmod/ofi/coll/ofi_bcast_tree_rma.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_Bcast_intra_triggered_rma(void *buffer, i
if (0 != strcmp(MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name, "cxi")) {
do {
ret = fi_cntr_wait(rcv_cntr, 1, 1);
MPIR_ERR_CHKANDJUMP4(ret < 0 && ret != -FI_ETIMEDOUT,
MPIR_ERR_CHKANDJUMP1(ret < 0 && ret != -FI_ETIMEDOUT,
mpi_errno, MPI_ERR_RMA_RANGE,
"**ofid_cntr_wait", "**ofid_cntr_wait %s %d %s %s",
__SHORT_FILE__, __LINE__, __func__, fi_strerror(-ret));
"**ofid_cntr_wait", "**ofid_cntr_wait %s", fi_strerror(-ret));
MPID_Progress_test(NULL);
} while (ret == -FI_ETIMEDOUT);
} else {
Expand All @@ -90,10 +89,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_Bcast_intra_triggered_rma(void *buffer, i
if (0 != strcmp(MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name, "cxi")) {
do {
ret = fi_cntr_wait(snd_cntr, num_children, 1);
MPIR_ERR_CHKANDJUMP4(ret < 0 && ret != -FI_ETIMEDOUT,
MPIR_ERR_CHKANDJUMP1(ret < 0 && ret != -FI_ETIMEDOUT,
mpi_errno, MPI_ERR_RMA_RANGE,
"**ofid_cntr_wait", "**ofid_cntr_wait %s %d %s %s",
__SHORT_FILE__, __LINE__, __func__, fi_strerror(-ret));
"**ofid_cntr_wait", "**ofid_cntr_wait %s", fi_strerror(-ret));
MPID_Progress_test(NULL);
} while (ret == -FI_ETIMEDOUT);
} else {
Expand Down
10 changes: 4 additions & 6 deletions src/mpid/ch4/netmod/ofi/coll/ofi_bcast_tree_tagged.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_Bcast_intra_triggered_tagged(void *buffer
if (0 != strcmp(MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name, "cxi")) {
do {
ret = fi_cntr_wait(rcv_cntr, 1, 1);
MPIR_ERR_CHKANDJUMP4(ret < 0 && ret != -FI_ETIMEDOUT,
MPIR_ERR_CHKANDJUMP1(ret < 0 && ret != -FI_ETIMEDOUT,
mpi_errno, MPI_ERR_RMA_RANGE,
"**ofid_cntr_wait", "**ofid_cntr_wait %s %d %s %s",
__SHORT_FILE__, __LINE__, __func__, fi_strerror(-ret));
"**ofid_cntr_wait", "**ofid_cntr_wait %s", fi_strerror(-ret));
MPID_Progress_test(NULL);
} while (ret == -FI_ETIMEDOUT);
} else {
Expand All @@ -89,10 +88,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_Bcast_intra_triggered_tagged(void *buffer
if (0 != strcmp(MPIDI_OFI_global.prov_use[0]->fabric_attr->prov_name, "cxi")) {
do {
ret = fi_cntr_wait(snd_cntr, num_children, 1);
MPIR_ERR_CHKANDJUMP4(ret < 0 && ret != -FI_ETIMEDOUT,
MPIR_ERR_CHKANDJUMP1(ret < 0 && ret != -FI_ETIMEDOUT,
mpi_errno, MPI_ERR_RMA_RANGE,
"**ofid_cntr_wait", "**ofid_cntr_wait %s %d %s %s",
__SHORT_FILE__, __LINE__, __func__, fi_strerror(-ret));
"**ofid_cntr_wait", "**ofid_cntr_wait %s", fi_strerror(-ret));
MPID_Progress_test(NULL);
} while (ret == -FI_ETIMEDOUT);
} else {
Expand Down
12 changes: 6 additions & 6 deletions src/mpid/ch4/netmod/ofi/errnames.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
# Most of the libfabric call error names are generated from MPIDI_OFI_CALL macros,
# some of them are explicitly used via MPIR_ERR_CHKANDJUMP4, they need be listed here.
**ofid_cancel:OFI cancel failed
**ofid_cancel %s %d %s %s:OFI cancel failed (%s:%d:%s:%s)
**ofid_cancel %s:OFI cancel failed (%s)
**ofid_cntr_open:OFI Counter open failed
**ofid_cntr_open %s %d %s %s:OFI OFI Counter open failed (%s:%d:%s:%s)
**ofid_cntr_open %s:OFI OFI Counter open failed (%s)
**ofid_cntr_wait:OFI Counter wait failed
**ofid_cntr_wait %s %d %s %s:OFI OFI Counter wait failed (%s:%d:%s:%s)
**ofid_cntr_wait %s:OFI OFI Counter wait failed (%s)
**ofid_enable_trigger:OFI triggered ops enable failed
**ofid_enable_trigger %s %d %s %s:OFI triggered ops enable failed (%s:%d:%s:%s)
**ofid_enable_trigger %s:OFI triggered ops enable failed (%s)
**ofid_issue_trigger:OFI triggered ops issue failed
**ofid_issue_trigger %s %d %s %s:OFI triggered ops issue failed (%s:%d:%s:%s)
**ofid_issue_trigger %s:OFI triggered ops issue failed (%s)
**ofid_poll:OFI poll failed
**ofid_poll %s %d %s %s:OFI poll failed (%s:%d:%s:%s)
**ofid_poll %s:OFI poll failed (%s)
15 changes: 6 additions & 9 deletions src/mpid/ch4/netmod/ofi/ofi_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -737,9 +737,8 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret)
break;

default:
MPIR_ERR_SETFATALANDJUMP4(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s %d %s %s", __SHORT_FILE__,
__LINE__, __func__, fi_strerror(e.err));
MPIR_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s", fi_strerror(e.err));
}

break;
Expand Down Expand Up @@ -782,17 +781,15 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret)
break;

default:
MPIR_ERR_SETFATALANDJUMP4(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s %d %s %s", __SHORT_FILE__,
__LINE__, __func__, fi_strerror(e.err));
MPIR_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s", fi_strerror(e.err));
}

break;

default:
MPIR_ERR_SETFATALANDJUMP4(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s %d %s %s", __SHORT_FILE__, __LINE__,
__func__, fi_strerror(errno));
MPIR_ERR_SETFATALANDJUMP1(mpi_errno, MPI_ERR_OTHER, "**ofid_poll",
"**ofid_poll %s", fi_strerror(errno));
}

fn_exit:
Expand Down
22 changes: 5 additions & 17 deletions src/mpid/ch4/netmod/ofi/ofi_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,15 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
#define MPIDI_OFI_PROGRESS_WHILE(cond, vci) \
while (cond) MPIDI_OFI_PROGRESS(vci)

#define MPIDI_OFI_ERR MPIR_ERR_CHKANDJUMP4
#define MPIDI_OFI_ERR MPIR_ERR_CHKANDJUMP1
#define MPIDI_OFI_CALL(FUNC,STR) \
do { \
ssize_t _ret = FUNC; \
MPIDI_OFI_ERR(_ret<0, \
mpi_errno, \
MPI_ERR_OTHER, \
"**ofid_"#STR, \
"**ofid_"#STR" %s %d %s %s", \
__SHORT_FILE__, \
__LINE__, \
__func__, \
"**ofid_"#STR" %s", \
fi_strerror(-_ret)); \
} while (0)

Expand All @@ -81,10 +78,7 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
mpi_errno, \
MPI_ERR_OTHER, \
"**ofid_"#STR, \
"**ofid_"#STR" %s %d %s %s", \
__SHORT_FILE__, \
__LINE__, \
__func__, \
"**ofid_"#STR" %s", \
fi_strerror(-_ret)); \
if (_retry > 0) { \
_retry--; \
Expand Down Expand Up @@ -129,10 +123,7 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
mpi_errno, \
MPI_ERR_OTHER, \
"**ofid_"#STR, \
"**ofid_"#STR" %s %d %s %s", \
__SHORT_FILE__, \
__LINE__, \
__func__, \
"**ofid_"#STR" %s", \
fi_strerror(-_ret)); \
mpi_errno = MPIDI_OFI_progress_do_queue(vci_); \
if (mpi_errno != MPI_SUCCESS) \
Expand Down Expand Up @@ -176,10 +167,7 @@ int MPIDI_OFI_handle_cq_error(int vci, int nic, ssize_t ret);
mpi_errno, \
MPI_ERR_OTHER, \
"**ofid_"#STR, \
"**ofid_"#STR" %s %d %s %s", \
__SHORT_FILE__, \
__LINE__, \
__func__, \
"**ofid_"#STR" %s", \
fi_strerror(-_ret)); \
} while (0)

Expand Down
10 changes: 4 additions & 6 deletions src/mpid/ch4/netmod/ofi/ofi_spawn.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@ int MPIDI_OFI_dynamic_send(uint64_t remote_gpid, int tag, const void *buf, int s
int rc;
rc = fi_cancel((fid_t) MPIDI_OFI_global.ctx[ctx_idx].tx, (void *) &req.context);
if (rc && rc != -FI_ENOENT) {
MPIR_ERR_CHKANDJUMP4(rc < 0, mpi_errno, MPI_ERR_OTHER, "**ofid_cancel",
"**ofid_cancel %s %d %s %s", __SHORT_FILE__, __LINE__, __func__,
fi_strerror(-rc));
MPIR_ERR_CHKANDJUMP1(rc < 0, mpi_errno, MPI_ERR_OTHER, "**ofid_cancel",
"**ofid_cancel %s", fi_strerror(-rc));

}
while (!req.done) {
Expand Down Expand Up @@ -112,9 +111,8 @@ int MPIDI_OFI_dynamic_recv(int tag, void *buf, int size, int timeout)
int rc;
rc = fi_cancel((fid_t) MPIDI_OFI_global.ctx[ctx_idx].rx, (void *) &req.context);
if (rc && rc != -FI_ENOENT) {
MPIR_ERR_CHKANDJUMP4(rc < 0, mpi_errno, MPI_ERR_OTHER, "**ofid_cancel",
"**ofid_cancel %s %d %s %s", __SHORT_FILE__, __LINE__, __func__,
fi_strerror(-rc));
MPIR_ERR_CHKANDJUMP1(rc < 0, mpi_errno, MPI_ERR_OTHER, "**ofid_cancel",
"**ofid_cancel %s", fi_strerror(-rc));

}
while (!req.done) {
Expand Down
5 changes: 2 additions & 3 deletions src/mpid/ch4/netmod/ofi/ofi_win.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,9 @@ MPL_STATIC_INLINE_PREFIX int MPIDI_OFI_win_do_progress(MPIR_Win * win, int vci)

if (itercount == 1000 && MPIDI_OFI_COUNTER_WAIT_OBJECTS) {
ret = fi_cntr_wait(MPIDI_OFI_WIN(win).cmpl_cntr, tcount, 0);
MPIR_ERR_CHKANDJUMP4(ret < 0 && ret != -FI_ETIMEDOUT,
MPIR_ERR_CHKANDJUMP1(ret < 0 && ret != -FI_ETIMEDOUT,
mpi_errno, MPI_ERR_RMA_RANGE,
"**ofid_cntr_wait", "**ofid_cntr_wait %s %d %s %s",
__SHORT_FILE__, __LINE__, __func__, fi_strerror(-ret));
"**ofid_cntr_wait", "**ofid_cntr_wait %s", fi_strerror(-ret));
itercount = 0;
DEBUG_PROGRESS_CHECK;
}
Expand Down

0 comments on commit 70bbdf7

Please sign in to comment.