Skip to content

Commit

Permalink
prov/efa: Test and fix issues in Sai's PR
Browse files Browse the repository at this point in the history
  • Loading branch information
shijin-aws committed Feb 5, 2025
1 parent fb66376 commit a50bd49
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 25 deletions.
2 changes: 1 addition & 1 deletion fabtests/benchmarks/rma_bw.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ int main(int argc, char **argv)

hints->caps = FI_MSG | FI_RMA;
hints->domain_attr->resource_mgmt = FI_RM_ENABLED;
hints->mode = FI_CONTEXT | FI_CONTEXT2;
hints->mode = FI_CONTEXT | FI_CONTEXT2 | FI_RX_CQ_DATA;
hints->domain_attr->threading = FI_THREAD_DOMAIN;
hints->addr_format = opts.address_format;

Expand Down
3 changes: 1 addition & 2 deletions fabtests/common/shared.c
Original file line number Diff line number Diff line change
Expand Up @@ -3706,18 +3706,17 @@ int ft_parse_api_opts(int op, char *optarg, struct fi_info *hints,
{
switch (op) {
case 'o':
hints->mode |= FI_RX_CQ_DATA;
if (!strcasecmp(optarg, "read")) {
hints->caps |= FI_READ | FI_REMOTE_READ;
opts->rma_op = FT_RMA_READ;
} else if (!strcasecmp(optarg, "writedata")) {
hints->caps |= FI_WRITE | FI_REMOTE_WRITE;
hints->mode |= FI_RX_CQ_DATA;
hints->domain_attr->cq_data_size = 4;
opts->rma_op = FT_RMA_WRITEDATA;
opts->cqdata_op = FT_CQDATA_WRITEDATA;
cq_attr.format = FI_CQ_FORMAT_DATA;
} else if (!strcasecmp(optarg, "senddata")) {
hints->mode |= FI_RX_CQ_DATA;
hints->domain_attr->cq_data_size = 4;
opts->cqdata_op = FT_CQDATA_SENDDATA;
cq_attr.format = FI_CQ_FORMAT_DATA;
Expand Down
9 changes: 3 additions & 6 deletions prov/efa/src/efa_av.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ int efa_av_insert_one(struct efa_av *av, struct efa_ep_addr *addr,
goto out;
}

EFA_INFO(FI_LOG_AV, "Inserting address GID[%s] QP[%u] QKEY[%u] to AV ....\n",
EFA_WARN(FI_LOG_AV, "Inserting address GID[%s] QP[%u] QKEY[%u] to AV ....\n",
raw_gid_str, addr->qpn, addr->qkey);

/*
Expand Down Expand Up @@ -904,9 +904,8 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
if (ret)
goto err;

if (EFA_INFO_TYPE_IS_RDM(efa_domain->info)) {
av->ep_type = FI_EP_RDM;

av->ep_type = efa_domain->info->ep_attr->type;
if (av->ep_type == FI_EP_RDM) {
av_attr = *attr;
if (efa_domain->fabric && efa_domain->fabric->shm_fabric) {
/*
Expand All @@ -928,8 +927,6 @@ int efa_av_open(struct fid_domain *domain_fid, struct fi_av_attr *attr,
if (ret)
goto err_close_util_av;
}
} else {
av->ep_type = FI_EP_DGRAM;
}

EFA_INFO(FI_LOG_AV, "fi_av_attr:%" PRId64 "\n",
Expand Down
8 changes: 6 additions & 2 deletions prov/efa/src/efa_base_ep.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,10 +167,12 @@ static int efa_base_ep_modify_qp_rst2rts(struct efa_base_ep *base_ep,
return err;

if (base_ep->util_ep.type != FI_EP_DGRAM &&
efa_domain_support_rnr_retry_modify(base_ep->domain))
efa_domain_support_rnr_retry_modify(base_ep->domain)) {
EFA_WARN(FI_LOG_EP_CTRL, "enable qp with rnr retry\n");
return efa_base_ep_modify_qp_state(
base_ep, qp, IBV_QPS_RTS,
IBV_QP_STATE | IBV_QP_SQ_PSN | IBV_QP_RNR_RETRY);
}

return efa_base_ep_modify_qp_state(base_ep, qp, IBV_QPS_RTS,
IBV_QP_STATE | IBV_QP_SQ_PSN);
Expand Down Expand Up @@ -268,7 +270,7 @@ int efa_base_ep_enable_qp(struct efa_base_ep *base_ep, struct efa_qp *qp)

qp->qp_num = qp->ibv_qp->qp_num;
base_ep->domain->qp_table[qp->qp_num & base_ep->domain->qp_table_sz_m1] = qp;
EFA_INFO(FI_LOG_EP_CTRL, "QP enabled! qp_n: %d qkey: %d\n", qp->qp_num, qp->qkey);
EFA_WARN(FI_LOG_EP_CTRL, "QP enabled! qp_n: %d qkey: %d\n", qp->qp_num, qp->qkey);

return err;
}
Expand Down Expand Up @@ -574,6 +576,8 @@ void efa_base_ep_construct_ibv_qp_init_attr_ex(struct efa_base_ep *ep,
attr_ex->cap.max_send_sge = info->tx_attr->iov_limit;
attr_ex->cap.max_recv_wr = info->rx_attr->size;
attr_ex->cap.max_recv_sge = info->rx_attr->iov_limit;
EFA_WARN(FI_LOG_EP_CTRL, "max_send_wr: %zu, max_send_sge: %zu, max_recv_wr: %zu, max_recv_sge: %zu\n",
info->tx_attr->size, info->tx_attr->iov_limit, info->rx_attr->size, info->rx_attr->iov_limit);
attr_ex->cap.max_inline_data = ep->domain->device->efa_attr.inline_buf_size;
attr_ex->pd = ep->domain->ibv_pd;
attr_ex->qp_context = ep;
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/efa_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ static int efa_domain_init_device_and_pd(struct efa_domain *efa_domain,
if (i == g_device_cnt)
return -FI_ENODEV;

EFA_INFO(FI_LOG_DOMAIN, "Domain %s selected device %s\n", domain_name, device_name);
EFA_WARN(FI_LOG_DOMAIN, "Domain %s selected device %s\n", domain_name, device_name);
efa_domain->ibv_pd = efa_domain->device->ibv_pd;
return 0;
}
Expand Down
2 changes: 1 addition & 1 deletion prov/efa/src/efa_mr.c
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ static int efa_mr_reg_impl(struct efa_mr *efa_mr, uint64_t flags, const void *at
}
efa_mr->domain->ibv_mr_reg_ct++;
efa_mr->domain->ibv_mr_reg_sz += efa_mr->ibv_mr->length;
EFA_INFO(FI_LOG_MR, "Registered memory of size %zu for ibv pd %p, total mr reg size %zu, mr reg count %zu\n",
EFA_WARN(FI_LOG_MR, "Registered memory of size %zu for ibv pd %p, total mr reg size %zu, mr reg count %zu\n",
efa_mr->ibv_mr->length, efa_mr->domain->ibv_pd, efa_mr->domain->ibv_mr_reg_sz, efa_mr->domain->ibv_mr_reg_ct);
efa_mr->mr_fid.key = efa_mr->ibv_mr->rkey;
}
Expand Down
4 changes: 2 additions & 2 deletions prov/efa/src/efa_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ static inline ssize_t efa_post_recv(struct efa_base_ep *base_ep, const struct fi

efa_tracepoint(recv_begin_msg_context, (size_t) msg->context, (size_t) msg->addr);

EFA_DBG(FI_LOG_EP_DATA,
EFA_WARN(FI_LOG_EP_DATA,
"total len: %zu, addr: %lu, context: %lx, flags: %lx\n",
ofi_total_iov_len(msg->msg_iov, msg->iov_count),
msg->addr, (size_t) msg->context, flags);
Expand Down Expand Up @@ -208,7 +208,7 @@ static inline ssize_t efa_post_send(struct efa_base_ep *base_ep, const struct fi

efa_tracepoint(send_begin_msg_context, (size_t) msg->context, (size_t) msg->addr);

EFA_DBG(FI_LOG_EP_DATA,
EFA_WARN(FI_LOG_EP_DATA,
"total len: %zu, addr: %lu, context: %lx, flags: %lx\n",
ofi_total_iov_len(msg->msg_iov, msg->iov_count),
msg->addr, (size_t) msg->context, flags);
Expand Down
7 changes: 4 additions & 3 deletions prov/efa/src/efa_prov.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@ static int efa_util_prov_initialize()
* So we populate the efa-direct info objects first
*/
for (i = 0; i < g_device_cnt; ++i) {
prov_info_direct = fi_dupinfo(g_device_list[i].direct_info);
if (!prov_info_direct) {
EFA_WARN(FI_LOG_DOMAIN, "Failed to allocate prov_info for EFA direct\n");
err = efa_prov_info_alloc_for_direct(&prov_info_direct, &g_device_list[i]);
if (err) {
EFA_WARN(FI_LOG_DOMAIN, "Failed to allocate prov_info for direct. error: %d\n",
err);
continue;
}

Expand Down
50 changes: 44 additions & 6 deletions prov/efa/src/efa_prov_info.c
Original file line number Diff line number Diff line change
Expand Up @@ -177,14 +177,9 @@ void efa_prov_info_set_ep_attr(struct fi_info *prov_info,
prov_info->ep_attr->max_msg_size = device->ibv_port_attr.max_msg_sz;

switch (info_type) {
case EFA_INFO_RDM:
prov_info->ep_attr->type = FI_EP_RDM;
/* max_msg_size for RDM path is set in efa_prov_info_alloc_for_rdm */
break;
case EFA_INFO_RDM: /* fall through */
case EFA_INFO_DIRECT:
prov_info->ep_attr->type = FI_EP_RDM;
/* max_msg_size in ep_attr should be the maximum of send/recv and RMA sizes */
prov_info->ep_attr->max_msg_size = MAX(device->ibv_port_attr.max_msg_sz, device->max_rdma_size);
break;
case EFA_INFO_DGRAM:
prov_info->ep_attr->type = FI_EP_DGRAM;
Expand Down Expand Up @@ -719,6 +714,49 @@ int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm_ptr,
}


int efa_prov_info_alloc_for_direct(struct fi_info **prov_info_direct_ptr,
struct efa_device *device)
{
struct fi_info *prov_info_direct;

assert(device->direct_info);

prov_info_direct = fi_dupinfo(device->direct_info);
if (!prov_info_direct)
return -FI_ENOMEM;

/* EFA direct endpoint ensure thread safety by pthread lock */
prov_info_direct->domain_attr->threading = FI_THREAD_SAFE;
/* EFA direct endpoint handles Receiver Not Ready (RNR) events doing
* rnr retry in firmware level
*/
prov_info_direct->domain_attr->resource_mgmt = FI_RM_ENABLED;

prov_info_direct->ep_attr->protocol = FI_PROTO_EFA;
prov_info_direct->tx_attr->op_flags = FI_INJECT | FI_COMPLETION | FI_TRANSMIT_COMPLETE |
FI_DELIVERY_COMPLETE;
prov_info_direct->rx_attr->op_flags = FI_COMPLETION;

*prov_info_direct_ptr = prov_info_direct;

if (efa_device_support_rdma_read()
&& efa_device_support_rdma_write()) {
EFA_WARN(FI_LOG_CORE, "Adding RMA caps to efa direct info\n");
prov_info_direct->caps |= OFI_TX_RMA_CAPS | OFI_RX_RMA_CAPS;
prov_info_direct->tx_attr->caps |= OFI_TX_RMA_CAPS;
prov_info_direct->tx_attr->caps |= OFI_RX_RMA_CAPS;
/* max_msg_size in ep_attr should be the maximum of send/recv and RMA sizes */
prov_info_direct->ep_attr->max_msg_size = MAX(device->ibv_port_attr.max_msg_sz, device->max_rdma_size);
if (!efa_device_support_unsolicited_write_recv()) {
prov_info_direct->mode |= FI_RX_CQ_DATA;
prov_info_direct->rx_attr->mode |= FI_RX_CQ_DATA;
}
}

return 0;
}


static int efa_node_matches_addr(struct efa_ep_addr *addr, const char *node)
{
struct efa_ep_addr eaddr;
Expand Down
4 changes: 4 additions & 0 deletions prov/efa/src/efa_prov_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ int efa_prov_info_alloc(struct fi_info **prov_info,
int efa_prov_info_alloc_for_rdm(struct fi_info **prov_info_rdm,
struct efa_device *device);


int efa_prov_info_alloc_for_direct(struct fi_info **prov_info_direct,
struct efa_device *device);

int efa_prov_info_compare_src_addr(const char *node, uint64_t flags, const struct fi_info *hints,
const struct fi_info *fi);

Expand Down
2 changes: 1 addition & 1 deletion prov/util/src/util_attr.c
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ int ofi_check_domain_attr(const struct fi_provider *prov, uint32_t api_version,

if (fi_resource_mgmt_level(user_attr->resource_mgmt) <
fi_resource_mgmt_level(prov_attr->resource_mgmt)) {
FI_INFO(prov, FI_LOG_CORE, "Invalid resource mgmt model\n");
FI_INFO(prov, FI_LOG_CORE, "Invalid resource mgmt model: user %d, prov: %d\n", fi_resource_mgmt_level(user_attr->resource_mgmt), fi_resource_mgmt_level(prov_attr->resource_mgmt));
return -FI_ENODATA;
}

Expand Down

0 comments on commit a50bd49

Please sign in to comment.