From ecbdf0ddb7c71992feea31ad5f98468becb5edd0 Mon Sep 17 00:00:00 2001 From: Lingxiao Ma Date: Wed, 31 Aug 2022 08:34:18 +0000 Subject: [PATCH] Fix result inplace opt when a node has multi-output --- .../engine/pass/codegen/cuda_codegen_pass.cpp | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp b/src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp index a80139f61..b16aa950f 100644 --- a/src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp +++ b/src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp @@ -325,14 +325,27 @@ bool CudaCodegenPass::collect_funcs(std::shared_ptr ctx, // todo: this hack is to eliminate d2d copy caused by extern result memory if (FLAGS_fextern_result_memory && gnode) { + size_t non_control_edge = 0; + std::shared_ptr out_edge; for (size_t i = 0; i < gnode->get_out_edges().size(); i++) { - auto out_tensor = - kernel->m_context->outputs[gnode->get_out_edges()[i]->get_src_output()]; - if (gnode->get_out_edges()[i]->get_dst()->get_op_ptr()->is_output() && + if (!gnode->get_out_edges()[i]->is_control_edge()) + { + non_control_edge++; + out_edge = gnode->get_out_edges()[i]; + if (non_control_edge > 1) + break; + } + } + + // inplace the result tensor into kernel only if there is one out edge + if (non_control_edge == 1) + { + auto out_tensor = kernel->m_context->outputs[out_edge->get_src_output()]; + if (out_edge->get_dst()->get_op_ptr()->is_output() && !is_ref_tensor(ins, out_tensor)) { - std::shared_ptr output = gnode->get_out_edges()[i]->get_dst(); + std::shared_ptr output = out_edge->get_dst(); std::string in_name = output->get_input_tensor(0).get_name(); std::string out_name = output->get_output_tensor(0).get_name(); int pos = call_str.find(", " + in_name);