Skip to content

Commit

Permalink
revert broadcast execution and fix batchnorm folding
Browse files Browse the repository at this point in the history
  • Loading branch information
jlxue committed Apr 15, 2022
1 parent 50e2c59 commit f72b206
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/nnfusion/engine/pass/codegen/cuda_codegen_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ void CudaCodegenPass::create_main_file(std::shared_ptr<InterpreterContext> ctx,
lu_main << get_d2hcopy(tu)->get_code();
lu_main << get_sync()->get_code();
}
/*
for (size_t i = 0; i < tu->out.size(); i++)
{
auto& tensor = *tu->out[i];
Expand All @@ -1143,7 +1144,7 @@ void CudaCodegenPass::create_main_file(std::shared_ptr<InterpreterContext> ctx,
<< "\nprintf(\" .. (size = " << tensor.get_tensor_layout()->get_size()
<< ", ends with %e);\\n\", (float)" << tensor.get_name() << "_host["
<< tensor.get_tensor_layout()->get_size() - 1 << "]);\n";
}
}*/
lu_main.block_end();

lu_main << "\n//GPU time measurement\n";
Expand Down
26 changes: 13 additions & 13 deletions src/nnfusion/engine/pass/graph/batchnorm_inference_folding_pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -622,10 +622,10 @@ class BatchNormInferenceOptimizer
auto new_broadcast_gnode = m_graph->add_node_and_edge(
std::make_shared<op::Broadcast>(conv_output_shape, broadcast_axes),
{new_conv_bias_gnode});
shared_ptr<KernelContext> ke_ctx(new KernelContext(new_broadcast_gnode));
KernelEmitter::Pointer any_op_ke = std::make_shared<nnfusion::kernels::cuda::AnyOP>(ke_ctx);
any_op_ke->get_or_emit_source();
(*new_broadcast_gnode)["Kernel_Selection_Result"] = std::make_pair(NNFusion_DeviceType::CUDA_GPU, any_op_ke);
// shared_ptr<KernelContext> ke_ctx(new KernelContext(new_broadcast_gnode));
// KernelEmitter::Pointer any_op_ke = std::make_shared<nnfusion::kernels::cuda::AnyOP>(ke_ctx);
// any_op_ke->get_or_emit_source();
// (*new_broadcast_gnode)["Kernel_Selection_Result"] = std::make_pair(NNFusion_DeviceType::CUDA_GPU, any_op_ke);

m_nodes.resize(m_graph->get_max_node_id());
m_nodes[new_broadcast_gnode->get_id()] = std::make_shared<TaggedNode>();
Expand Down Expand Up @@ -796,10 +796,10 @@ class BatchNormInferenceOptimizer
auto new_broadcast_gnode = m_graph->add_node_and_edge(
std::make_shared<op::Broadcast>(conv_output_shape, broadcast_axes),
{bn_node->get_in_edge(1)->get_src()});
shared_ptr<KernelContext> ke_ctx(new KernelContext(new_broadcast_gnode));
KernelEmitter::Pointer any_op_ke = std::make_shared<nnfusion::kernels::cuda::AnyOP>(ke_ctx);
any_op_ke->get_or_emit_source();
(*new_broadcast_gnode)["Kernel_Selection_Result"] = std::make_pair(NNFusion_DeviceType::CUDA_GPU, any_op_ke);;
// shared_ptr<KernelContext> ke_ctx(new KernelContext(new_broadcast_gnode));
// KernelEmitter::Pointer any_op_ke = std::make_shared<nnfusion::kernels::cuda::AnyOP>(ke_ctx);
// any_op_ke->get_or_emit_source();
// (*new_broadcast_gnode)["Kernel_Selection_Result"] = std::make_pair(NNFusion_DeviceType::CUDA_GPU, any_op_ke);;
m_nodes.resize(m_graph->get_max_node_id());
m_nodes[new_broadcast_gnode->get_id()] = std::make_shared<TaggedNode>();
m_nodes[new_broadcast_gnode->get_id()]->node = new_broadcast_gnode;
Expand Down Expand Up @@ -903,11 +903,11 @@ bool BatchNormInferenceFoldingPass::run_on_graph(std::shared_ptr<nnfusion::graph
BatchNormInferenceOptimizer optimizer(graph, pattern);
optimizer.MatchAndFolding();
}
if (FLAGS_fconst_folding_backend != "")
{
auto const_folding_optimizer = RuntimeConstantFoldingPass();
const_folding_optimizer.run_on_graph(graph);
}
// if (FLAGS_fconst_folding_backend != "")
// {
// auto const_folding_optimizer = RuntimeConstantFoldingPass();
// const_folding_optimizer.run_on_graph(graph);
// }
NNFUSION_LOG(INFO) << "batchnorm inference folding Pass ends for Graph: "
<< graph->get_name();
}
Expand Down

0 comments on commit f72b206

Please sign in to comment.