From 0777e297299c4e7a5071dc2ee97b87adcd22840e Mon Sep 17 00:00:00 2001 From: Lingxiao Ma Date: Tue, 23 Nov 2021 15:52:08 +0800 Subject: [PATCH] Fix AntaresCpuKernelEmitter and add ir_based_fusion in GENERIC_CPU backend (#351) * Update AntaresCpuKernelEmitter for Antares v0.2.x * Add IRBasedFusion for GENERIC_CPU backend Co-authored-by: Lingxiao Ma --- src/nnfusion/core/kernels/cpu/cpu_kernel_emitter.cpp | 7 +++---- src/nnfusion/engine/device/cpu.cpp | 5 +++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nnfusion/core/kernels/cpu/cpu_kernel_emitter.cpp b/src/nnfusion/core/kernels/cpu/cpu_kernel_emitter.cpp index ce659396c..145c22121 100644 --- a/src/nnfusion/core/kernels/cpu/cpu_kernel_emitter.cpp +++ b/src/nnfusion/core/kernels/cpu/cpu_kernel_emitter.cpp @@ -151,20 +151,19 @@ LanguageUnit_p cpu::AntaresCpuKernelEmitter::emit_function_body() auto& lu = *_lu; // extract kernel code - const char* s_func_pattern = "// [thread_compute]\n"; + const char* s_func_pattern = "// [thread_extent] "; const char* e_func_pattern = "\n}\n"; const char* s_rank_pattern = "__rank__ = "; const char* e_rank_pattern = "\n"; std::string::size_type s_func_pos = antares_code.find(s_func_pattern); std::string::size_type e_func_pos = antares_code.rfind(e_func_pattern); - if (s_func_pos != std::string::npos || e_func_pos != std::string::npos) + if (s_func_pos == std::string::npos || e_func_pos == std::string::npos) return nullptr; NNFUSION_CHECK(s_func_pos != std::string::npos && e_func_pos != std::string::npos); - std::string func_body = antares_code.substr(s_func_pos + strlen(s_func_pattern), - e_func_pos - s_func_pos - strlen(s_func_pattern)); + std::string func_body = antares_code.substr(s_func_pos, e_func_pos - s_func_pos); std::string::size_type s_rank_pos = func_body.find(s_rank_pattern); std::string::size_type e_rank_pos = func_body.find(e_rank_pattern); std::string rank_str = func_body.substr(s_rank_pos + strlen(s_rank_pattern), diff --git a/src/nnfusion/engine/device/cpu.cpp b/src/nnfusion/engine/device/cpu.cpp index f1c7a7447..5fa241bf1 100644 --- a/src/nnfusion/engine/device/cpu.cpp +++ b/src/nnfusion/engine/device/cpu.cpp @@ -4,6 +4,7 @@ #include "cpu.hpp" #include "reversed_dfs_visitor.hpp" +#include "nnfusion/engine/pass/extract_graph_signature.hpp" #include "nnfusion/engine/pass/graph/assign_async_info_pass.hpp" #include "nnfusion/engine/pass/graph/assign_layout_pass.hpp" #include "nnfusion/engine/pass/graph/autodiff_pass.hpp" @@ -14,6 +15,7 @@ #include "nnfusion/engine/pass/graph/gemm_fusion_pass.hpp" #include "nnfusion/engine/pass/graph/gnode_device_dispatcher.hpp" #include "nnfusion/engine/pass/graph/gradient_weight_mapping_pass.hpp" +#include "nnfusion/engine/pass/graph/ir_based_fusion_pass.hpp" #include "nnfusion/engine/pass/graph/kernel_fusion_pass.hpp" #include "nnfusion/engine/pass/graph/kernel_profiling_pass.hpp" #include "nnfusion/engine/pass/graph/kernel_selection.hpp" @@ -23,8 +25,6 @@ #include "nnfusion/engine/pass/graph/pattern_substitution.hpp" #include "nnfusion/engine/pass/graph/runtime_const_folding_pass.hpp" #include "nnfusion/engine/pass/graph/vector_dot_transpose_pass.hpp" - -#include "nnfusion/engine/pass/extract_graph_signature.hpp" #include "nnfusion/engine/pass/tensor/inplace_tensor_analysis.hpp" #include "nnfusion/engine/pass/tensor/liveness_analysis.hpp" #include "nnfusion/engine/pass/tensor/tensor_device_dispatcher.hpp" @@ -51,6 +51,7 @@ CpuEngine::CpuEngine() g_passes->push_back(make_shared()); g_passes->push_back(make_shared()); + g_passes->push_back(make_shared()); g_passes->push_back(make_shared()); // Kernel selection