From 7693cde10da71ee5ec89d7173368c769d54a981c Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Thu, 17 Sep 2020 10:09:34 +0300 Subject: [PATCH] UCP/RNDV: Init callbacks to NULL, add rndv-cancel debug --- src/ucp/core/ucp_request.inl | 3 +++ src/ucp/tag/rndv.c | 22 ++++++++++++---------- test/apps/iodemo/ucx_wrapper.cc | 1 + 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/ucp/core/ucp_request.inl b/src/ucp/core/ucp_request.inl index 3fa6aeab480..4240af89b46 100644 --- a/src/ucp/core/ucp_request.inl +++ b/src/ucp/core/ucp_request.inl @@ -84,6 +84,9 @@ ucp_request_put(ucp_request_t *req) { ucs_trace_req("put request %p", req); UCS_PROFILE_REQUEST_FREE(req); + req->send.cb = NULL; + req->recv.tag.cb = NULL; + req->recv.stream.cb = NULL; ucs_mpool_put_inline(req); } diff --git a/src/ucp/tag/rndv.c b/src/ucp/tag/rndv.c index 8548d08c5f6..025b525278d 100644 --- a/src/ucp/tag/rndv.c +++ b/src/ucp/tag/rndv.c @@ -38,7 +38,8 @@ static int ucp_rndv_is_recv_pipeline_needed(ucp_request_t *rndv_req, return 1; } -static void ucp_rndv_complete_send(ucp_request_t *sreq, ucs_status_t status) +static void ucp_rndv_complete_send(ucp_request_t *sreq, ucs_status_t status, + const char *debug_status) { ucp_worker_h worker; khiter_t iter; @@ -62,7 +63,7 @@ static void ucp_rndv_complete_send(ucp_request_t *sreq, ucs_status_t status) sreq, sreq->send.rndv_req_id, worker); } - ucp_send_request_update_data(sreq, "rndv_done"); + ucp_send_request_update_data(sreq, debug_status); ucp_request_complete_send(sreq, status); } @@ -233,7 +234,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_proto_progress_rndv_rts, (self), return UCS_ERR_NO_RESOURCE; } else { ucs_assert(UCS_STATUS_IS_ERR(status)); - ucp_rndv_complete_send(sreq, status); + ucp_rndv_complete_send(sreq, status, "rts_cancel"); return UCS_OK; } } @@ -267,7 +268,8 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_proto_progress_rndv_cancel, (self), } else if (packed_len == UCS_ERR_NO_RESOURCE) { return UCS_ERR_NO_RESOURCE; } else { - ucp_rndv_complete_send(sreq, (ucs_status_t)packed_len); + ucp_rndv_complete_send(sreq, (ucs_status_t)packed_len, + "progress_rndv_cancel"); return UCS_OK; } } @@ -401,7 +403,7 @@ ucs_status_t ucp_tag_send_start_rndv(ucp_request_t *sreq) void ucp_tag_rndv_cancel(ucp_request_t *sreq) { if (!(sreq->send.ep->flags & UCP_EP_FLAG_REMOTE_CONNECTED)) { - ucp_rndv_complete_send(sreq, UCS_ERR_CANCELED); + ucp_rndv_complete_send(sreq, UCS_ERR_CANCELED, "rndv_cancel"); } else { sreq->send.uct.func = ucp_proto_progress_rndv_cancel; if (sreq->flags & UCP_REQUEST_FLAG_RNDV_RTS_SENT) { @@ -417,7 +419,8 @@ void ucp_ep_complete_rndv_reqs(ucp_ep_h ep) ucs_list_for_each_safe(sreq, tmp, &worker->rndv_reqs_list, send.list) { if (sreq->send.ep == ep) { - ucp_rndv_complete_send(sreq, UCS_ERR_CANCELED); + ucp_rndv_complete_send(sreq, UCS_ERR_CANCELED, + "ep_closed_rndv_cancel"); } } } @@ -1363,7 +1366,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_ats_handler, if (sreq->flags & UCP_REQUEST_FLAG_OFFLOADED) { ucp_tag_offload_cancel_rndv(sreq); } - ucp_rndv_complete_send(sreq, rep_hdr->status); + ucp_rndv_complete_send(sreq, rep_hdr->status, "ats_recv"); return UCS_OK; } @@ -1404,7 +1407,7 @@ UCS_PROFILE_FUNC(ucs_status_t, ucp_rndv_progress_am_bcopy, (self), ucp_rndv_pack_data, 1); } if (status == UCS_OK) { - ucp_rndv_complete_send(sreq, UCS_OK); + ucp_rndv_complete_send(sreq, UCS_OK, "rndv_am_bcopy_done"); } else if (status == UCP_STATUS_PENDING_SWITCH) { status = UCS_OK; } @@ -1477,8 +1480,7 @@ static void ucp_rndv_am_zcopy_send_req_complete(ucp_request_t *req, ucs_status_t status) { ucs_assert(req->send.state.uct_comp.count == 0); - ucp_request_send_buffer_dereg(req); - ucp_request_complete_send(req, status); + ucp_rndv_complete_send(req, status, "rndv_zcopy_complete"); } static void ucp_rndv_am_zcopy_completion(uct_completion_t *self, diff --git a/test/apps/iodemo/ucx_wrapper.cc b/test/apps/iodemo/ucx_wrapper.cc index 8136f4b0af0..cec24f56b9c 100644 --- a/test/apps/iodemo/ucx_wrapper.cc +++ b/test/apps/iodemo/ucx_wrapper.cc @@ -702,6 +702,7 @@ bool UcxConnection::process_request(const char *what, ucx_request *r = reinterpret_cast(ptr_status); if (r->completed) { // already completed by callback + assert(ucp_request_is_completed(r)); status = r->status; (*callback)(status); UcxContext::request_release(r);