From a75b17665d835e92166eeaea51e4109d0f9e8ab3 Mon Sep 17 00:00:00 2001 From: Brian Barrett Date: Thu, 24 Aug 2023 17:54:52 +0000 Subject: [PATCH] aws: Properly initialize optlen for getopt() call The optlen argument for fi_getopt() should be initialized to the length of the optval datatype. The EFA provider does not currently enforce this check, but will do so in the future (as it's a bug in the provider that they do not currently check the length before writing). Signed-off-by: Brian Barrett (cherry picked from commit 69d632033877f688463bc6ab3aa33c5ecb42480e) --- src/platform-aws.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/platform-aws.c b/src/platform-aws.c index ffde8643a..ade1306e5 100644 --- a/src/platform-aws.c +++ b/src/platform-aws.c @@ -172,10 +172,10 @@ static ncclResult_t validate_rdma_write(struct fid_ep *ep) int ret = ncclSuccess; #if HAVE_DECL_FI_OPT_EFA_EMULATED_WRITE bool optval; - size_t optlen = 0; + size_t optlen = sizeof(optval); ret = fi_getopt(&ep->fid, FI_OPT_ENDPOINT, FI_OPT_EFA_EMULATED_WRITE, &optval, &optlen); - if(ret != 0 || optlen != sizeof(bool)) { + if(ret != 0 || optlen != sizeof(optval)) { NCCL_OFI_WARN("Couldn't get FI_OPT_EFA_EMULATED_WRITE. optlen: %lu, RC: %d, ERROR: %s", optlen, ret, fi_strerror(-ret)); ret = ncclSystemError;