From 2912c2a38db1572c9eaf1a1eae86f610bba17610 Mon Sep 17 00:00:00 2001 From: fishbell Date: Tue, 17 Dec 2024 16:59:40 +0800 Subject: [PATCH 1/6] initial trial Signed-off-by: fishbell --- .../include/intel_gpu/graph/program.hpp | 1 + .../include/intel_gpu/primitives/reorder.hpp | 7 +- .../graph/graph_optimizer/compile_graph.cpp | 6 +- .../graph_optimizer/cpu_ref_runtime_model.bin | 0 .../graph_optimizer/cpu_ref_runtime_model.xml | 8905 +++++++++++++++++ .../graph/graph_optimizer/kernel.errors.txt | 10 + .../reorder_reshape_permute_opt.cpp | 203 + .../src/graph/impls/ocl/kernel.errors.txt | 10 + .../src/graph/impls/onednn/kernel.errors.txt | 10 + .../src/graph/impls/onednn/reorder_onednn.cpp | 6 +- .../src/graph/include/pass_manager.h | 16 + .../intel_gpu/src/graph/kernel.errors.txt | 10 + src/plugins/intel_gpu/src/graph/program.cpp | 4 +- src/plugins/intel_gpu/src/graph/reorder.cpp | 35 +- .../unit/passes/reorder_reshape_permute.cpp | 55 + 15 files changed, 9271 insertions(+), 7 deletions(-) create mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin create mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml create mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt create mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp create mode 100644 src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt create mode 100644 src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt create mode 100644 src/plugins/intel_gpu/src/graph/kernel.errors.txt create mode 100644 src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 379d7b3b64a222..5d035ed9f9ae24 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -47,6 +47,7 @@ struct program { friend class post_optimize_weights; // to be removed when possible friend class prepare_primitive_fusing_through; // to be removed when possible friend class reorder_transfer; // to be removed when possible + friend class reorder_reshape_transpose_fuse; // to be removed when possible friend class fuse_constant_transposes; // to be removed when possible friend class program_wrapper; // this class is intended to extend the interface of program for // the usage within tests_core_internal project only diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp index ee53cbd8027087..79256f500d20c2 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp @@ -223,12 +223,17 @@ struct reorder : public primitive_base { memory_type input_mem_type = memory_type::buffer; /// @brief Parameters required for reorder weights. std::shared_ptr weights_reorder_params = {}; - + /// @brief Parameters required for source transpose. + std::vector src_permutation; inline bool has_surface_input() const { return input.size() == 1 && input_mem_type == memory_type::surface; } + void set_src_permutation(const std::vector & src_perm) { + this->src_permutation = src_perm; + } + /// @brief Convert truncation Mode bool truncate = false; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index fb4d6bfa590312..585dfb505aa344 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -44,6 +44,8 @@ void compile_graph::run(program& p) { std::string fail_reason = ""; try { if (selected_impl_manager) { + if (node->id() == "reorder:/detect/Reshape_14_reorder") + std::cout << "break" << std::endl; node->selected_impl = selected_impl_manager->create(*node, *params); } } catch (std::exception& e) { @@ -62,8 +64,8 @@ void compile_graph::run(program& p) { }); } } - - task_executor->run_and_wait(tasks); + for (auto& iter : tasks) + task_executor->run_and_wait({iter}); tasks.clear(); if (exception) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin b/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml b/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml new file mode 100644 index 00000000000000..6c824cb465dde4 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml @@ -0,0 +1,8905 @@ + + + + + + + + 1 + 1088 + 1920 + 3 + + + + + + + + 1 + 1088 + 1920 + 3 + + + + + 1 + 1088 + 1920 + 3 + + + + + + + + 1 + 1088 + 1920 + 3 + + + + + 1 + 3 + 1088 + 1920 + + + + + + + + 16 + 3 + 3 + 3 + + + + + + + + 1 + 16 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 16 + 1 + 1 + + + 1 + + + + + 16 + + + + + + + + 1 + 3 + 1088 + 1920 + + + 16 + 3 + 3 + 3 + + + 16 + + + + + 1 + 16 + 544 + 960 + + + + + + + + 32 + 16 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 16 + 544 + 960 + + + 32 + 16 + 3 + 3 + + + 32 + + + + + 1 + 32 + 272 + 480 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 272 + 480 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 272 + 480 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 272 + 480 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 272 + 480 + + + + + + + + 64 + 32 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 32 + 272 + 480 + + + 64 + 32 + 3 + 3 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 128 + 64 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 64 + 136 + 240 + + + 128 + 64 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 68 + 120 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 68 + 120 + + + + + + + + 256 + 128 + 3 + 3 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 256 + 1 + 1 + + + 1 + + + + + 256 + + + + + + + + 1 + 128 + 68 + 120 + + + 256 + 128 + 3 + 3 + + + 256 + + + + + 1 + 256 + 34 + 60 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 256 + 1 + 1 + + + 1 + + + + + 256 + + + + + + + + 1 + 256 + 34 + 60 + + + 256 + 256 + 3 + 3 + + + 256 + + + + + 1 + 256 + 34 + 60 + + + + + + + + 256 + 256 + 3 + 3 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 256 + 1 + 1 + + + 1 + + + + + 256 + + + + + + + + 1 + 256 + 34 + 60 + + + 256 + 256 + 3 + 3 + + + 256 + + + + + 1 + 256 + 34 + 60 + + + + + + + + 128 + 256 + 1 + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 256 + 34 + 60 + + + 128 + 256 + 1 + 1 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 256 + 1 + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 256 + 34 + 60 + + + 128 + 256 + 1 + 1 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 1 + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 1 + 1 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + 1 + 128 + 34 + 60 + + + 1 + 128 + 34 + 60 + + + 1 + 128 + 34 + 60 + + + + + 1 + 512 + 34 + 60 + + + + + + + + 128 + 512 + 1 + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 512 + 34 + 60 + + + 128 + 512 + 1 + 1 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + 1 + 128 + 34 + 60 + + + + + 1 + 256 + 34 + 60 + + + + + + + + 256 + 256 + 1 + 1 + + + + + + + + 1 + 256 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 256 + 1 + 1 + + + 1 + + + + + 256 + + + + + + + + 1 + 256 + 34 + 60 + + + 256 + 256 + 1 + 1 + + + 256 + + + + + 1 + 256 + 34 + 60 + + + + + + + + 64 + 256 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 256 + 34 + 60 + + + 64 + 256 + 1 + 1 + + + 64 + + + + + 1 + 64 + 34 + 60 + + + + + + + + 4 + + + + + + + + 4 + + + + + + + + 1 + 64 + 34 + 60 + + + 4 + + + 4 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 128 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 128 + 68 + 120 + + + 64 + 128 + 1 + 1 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 1 + 1 + + + 64 + + + + + 1 + 64 + 136 + 240 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 136 + 240 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 1 + 64 + 68 + 120 + + + 1 + 64 + 68 + 120 + + + 1 + 64 + 68 + 120 + + + + + 1 + 192 + 68 + 120 + + + + + + + + 64 + 192 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 192 + 68 + 120 + + + 64 + 192 + 1 + 1 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 32 + 64 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 64 + 68 + 120 + + + 32 + 64 + 1 + 1 + + + 32 + + + + + 1 + 32 + 68 + 120 + + + + + + + + 4 + + + + + + + + 1 + 32 + 68 + 120 + + + 4 + + + 4 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 64 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 64 + 136 + 240 + + + 32 + 64 + 1 + 1 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 272 + 480 + + + 32 + 32 + 1 + 1 + + + 32 + + + + + 1 + 32 + 272 + 480 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 272 + 480 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 1 + 32 + 136 + 240 + + + 1 + 32 + 136 + 240 + + + 1 + 32 + 136 + 240 + + + + + 1 + 96 + 136 + 240 + + + + + + + + 32 + 96 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 96 + 136 + 240 + + + 32 + 96 + 1 + 1 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 1 + 1 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 4 + 32 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 32 + 136 + 240 + + + 4 + 32 + 1 + 1 + + + 4 + + + + + 1 + 4 + 136 + 240 + + + + + + + + 1 + 4 + 136 + 240 + + + + + 1 + 4 + 136 + 240 + + + + + + + + 3 + + + + + + + + 1 + 4 + 136 + 240 + + + 3 + + + + + 1 + 4 + 32640 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 68 + 120 + + + + + + + + 1 + 32 + 68 + 120 + + + 1 + 32 + 68 + 120 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 1 + 1 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 4 + 64 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 64 + 68 + 120 + + + 4 + 64 + 1 + 1 + + + 4 + + + + + 1 + 4 + 68 + 120 + + + + + + + + 1 + 4 + 68 + 120 + + + + + 1 + 4 + 68 + 120 + + + + + + + + 3 + + + + + + + + 1 + 4 + 68 + 120 + + + 3 + + + + + 1 + 4 + 8160 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 34 + 60 + + + + + + + + 1 + 64 + 34 + 60 + + + 1 + 64 + 34 + 60 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 1 + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 1 + 1 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 4 + 128 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 128 + 34 + 60 + + + 4 + 128 + 1 + 1 + + + 4 + + + + + 1 + 4 + 34 + 60 + + + + + + + + 1 + 4 + 34 + 60 + + + + + 1 + 4 + 34 + 60 + + + + + + + + 3 + + + + + + + + 1 + 4 + 34 + 60 + + + 3 + + + + + 1 + 4 + 2040 + + + + + + + + 1 + 4 + 32640 + + + 1 + 4 + 8160 + + + 1 + 4 + 2040 + + + + + 1 + 4 + 42840 + + + + + + + + 3 + + + + + + + + 1 + 4 + 42840 + + + 3 + + + + + 1 + 42840 + 4 + + + + + + + + 1 + + + + + + + + 2 + + + + + + + + 1 + 42840 + 4 + + + 1 + + + 2 + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + + + 1 + 42840 + 4 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 4 + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 4 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 13 + 32 + 1 + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + 1 + + + + + 13 + + + + + + + + 1 + 32 + 136 + 240 + + + 13 + 32 + 1 + 1 + + + 13 + + + + + 1 + 13 + 136 + 240 + + + + + + + + 1 + 13 + 136 + 240 + + + + + 1 + 13 + 136 + 240 + + + + + + + + 3 + + + + + + + + 1 + 13 + 136 + 240 + + + 3 + + + + + 1 + 13 + 32640 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 13 + 64 + 1 + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + 1 + + + + + 13 + + + + + + + + 1 + 64 + 68 + 120 + + + 13 + 64 + 1 + 1 + + + 13 + + + + + 1 + 13 + 68 + 120 + + + + + + + + 1 + 13 + 68 + 120 + + + + + 1 + 13 + 68 + 120 + + + + + + + + 3 + + + + + + + + 1 + 13 + 68 + 120 + + + 3 + + + + + 1 + 13 + 8160 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 13 + 128 + 1 + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 13 + 1 + 1 + + + 1 + + + + + 13 + + + + + + + + 1 + 128 + 34 + 60 + + + 13 + 128 + 1 + 1 + + + 13 + + + + + 1 + 13 + 34 + 60 + + + + + + + + 1 + 13 + 34 + 60 + + + + + 1 + 13 + 34 + 60 + + + + + + + + 3 + + + + + + + + 1 + 13 + 34 + 60 + + + 3 + + + + + 1 + 13 + 2040 + + + + + + + + 1 + 13 + 32640 + + + 1 + 13 + 8160 + + + 1 + 13 + 2040 + + + + + 1 + 13 + 42840 + + + + + + + + 3 + + + + + + + + 1 + 13 + 42840 + + + 3 + + + + + 1 + 42840 + 13 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 3 + 32 + 1 + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + 1 + + + + + 3 + + + + + + + + 1 + 32 + 136 + 240 + + + 3 + 32 + 1 + 1 + + + 3 + + + + + 1 + 3 + 136 + 240 + + + + + + + + 1 + 3 + 136 + 240 + + + + + 1 + 3 + 136 + 240 + + + + + + + + 1 + 3 + 136 + 240 + + + + + 1 + 32640 + 3 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 3 + 64 + 1 + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + 1 + + + + + 3 + + + + + + + + 1 + 64 + 68 + 120 + + + 3 + 64 + 1 + 1 + + + 3 + + + + + 1 + 3 + 68 + 120 + + + + + + + + 1 + 3 + 68 + 120 + + + + + 1 + 3 + 68 + 120 + + + + + + + + 1 + 3 + 68 + 120 + + + + + 1 + 8160 + 3 + + + + + + + + 128 + 128 + 3 + 3 + + + + + + + + 1 + 128 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 128 + 1 + 1 + + + 1 + + + + + 128 + + + + + + + + 1 + 128 + 34 + 60 + + + 128 + 128 + 3 + 3 + + + 128 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 3 + 128 + 1 + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 3 + 1 + 1 + + + 1 + + + + + 3 + + + + + + + + 1 + 128 + 34 + 60 + + + 3 + 128 + 1 + 1 + + + 3 + + + + + 1 + 3 + 34 + 60 + + + + + + + + 1 + 3 + 34 + 60 + + + + + 1 + 3 + 34 + 60 + + + + + + + + 1 + 3 + 34 + 60 + + + + + 1 + 2040 + 3 + + + + + + + + 1 + 32640 + 3 + + + 1 + 8160 + 3 + + + 1 + 2040 + 3 + + + + + 1 + 42840 + 3 + + + + + + + + 3 + + + + + + + + 3 + + + + + + + + 3 + + + + + + + + 1 + 42840 + 4 + + + 3 + + + 3 + + + 3 + + + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 2 + + + + + + + + 1 + 42840 + 2 + + + 1 + 42840 + 2 + + + + + 1 + 42840 + 4 + + + + + + + + 32 + 32 + 1 + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 1 + 1 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 32 + 32 + 3 + 3 + + + + + + + + 1 + 32 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 32 + 1 + 1 + + + 1 + + + + + 32 + + + + + + + + 1 + 32 + 136 + 240 + + + 32 + 32 + 3 + 3 + + + 32 + + + + + 1 + 32 + 136 + 240 + + + + + + + + 4 + 32 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 32 + 136 + 240 + + + 4 + 32 + 1 + 1 + + + 4 + + + + + 1 + 4 + 136 + 240 + + + + + + + + 1 + 4 + 136 + 240 + + + + + 1 + 32640 + 4 + + + + + + + + 1 + 32640 + 4 + + + + + 1 + 32640 + 4 + + + + + + + + 64 + 64 + 1 + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 1 + 1 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 64 + 64 + 3 + 3 + + + + + + + + 1 + 64 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 64 + 1 + 1 + + + 1 + + + + + 64 + + + + + + + + 1 + 64 + 68 + 120 + + + 64 + 64 + 3 + 3 + + + 64 + + + + + 1 + 64 + 68 + 120 + + + + + + + + 4 + 64 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 64 + 68 + 120 + + + 4 + 64 + 1 + 1 + + + 4 + + + + + 1 + 4 + 68 + 120 + + + + + + + + 1 + 4 + 68 + 120 + + + + + 1 + 8160 + 4 + + + + + + + + 1 + 8160 + 4 + + + + + 1 + 8160 + 4 + + + + + + + + 1 + 128 + 34 + 60 + + + + + + + + 1 + 128 + 34 + 60 + + + + + 1 + 128 + 34 + 60 + + + + + + + + 4 + 128 + 1 + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 4 + 1 + 1 + + + 1 + + + + + 4 + + + + + + + + 1 + 128 + 34 + 60 + + + 4 + 128 + 1 + 1 + + + 4 + + + + + 1 + 4 + 34 + 60 + + + + + + + + 1 + 4 + 34 + 60 + + + + + 1 + 2040 + 4 + + + + + + + + 1 + 2040 + 4 + + + + + 1 + 2040 + 4 + + + + + + + + 1 + 32640 + 4 + + + 1 + 8160 + 4 + + + 1 + 2040 + 4 + + + + + 1 + 42840 + 4 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 4 + + + 1 + 42840 + 4 + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 4 + + + + + + + + 12 + 32 + 1 + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + 1 + + + + + 12 + + + + + + + + 1 + 32 + 136 + 240 + + + 12 + 32 + 1 + 1 + + + 12 + + + + + 1 + 12 + 136 + 240 + + + + + + + + 1 + 12 + 136 + 240 + + + + + 1 + 32640 + 12 + + + + + + + + 12 + 64 + 1 + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + 1 + + + + + 12 + + + + + + + + 1 + 64 + 68 + 120 + + + 12 + 64 + 1 + 1 + + + 12 + + + + + 1 + 12 + 68 + 120 + + + + + + + + 1 + 12 + 68 + 120 + + + + + 1 + 8160 + 12 + + + + + + + + 12 + 128 + 1 + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + + + + + + 1 + + + + + + + + 1 + 12 + 1 + 1 + + + 1 + + + + + 12 + + + + + + + + 1 + 128 + 34 + 60 + + + 12 + 128 + 1 + 1 + + + 12 + + + + + 1 + 12 + 34 + 60 + + + + + + + + 1 + 12 + 34 + 60 + + + + + 1 + 2040 + 12 + + + + + + + + 1 + 32640 + 12 + + + 1 + 8160 + 12 + + + 1 + 2040 + 12 + + + + + 1 + 42840 + 12 + + + + + + + + 1 + 42840 + 4 + + + 1 + 42840 + 1 + + + 1 + 42840 + 13 + + + 1 + 42840 + 3 + + + 1 + 42840 + 4 + + + 1 + 42840 + 12 + + + + + 1 + 42840 + 37 + + + + + + + + 1 + + + + + + + + 5 + + + + + + + + 1 + 42840 + 37 + + + 1 + + + 5 + + + + + 1 + 42840 + 4 + + + 1 + 42840 + 1 + + + 1 + 42840 + 13 + + + 1 + 42840 + 7 + + + 1 + 42840 + 12 + + + + + + + + 1 + + + + + + + + 1 + 42840 + 12 + + + 1 + + + + + 1 + 42840 + 1 + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + + + + + + + + 1 + 42840 + 13 + + + 1 + + + + + 1 + 42840 + 1 + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 1 + + + + + + + + 1 + + + + + + + + 1 + 42840 + 13 + + + 1 + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 1 + + + + + + + + 1 + + + + + + + + 1 + 42840 + 12 + + + 1 + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 1 + + + + + + + + 1 + 42840 + 4 + + + 1 + 42840 + 1 + + + 1 + 42840 + 1 + + + 1 + 42840 + 7 + + + 1 + 42840 + 1 + + + 1 + 42840 + 1 + + + + + 1 + 42840 + 15 + + + + + + + + 1 + 42840 + 15 + + + + + 1 + 42840 + 15 + + + + + + + + 1 + 42840 + 15 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt new file mode 100644 index 00000000000000..9541c79acd3a3a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt @@ -0,0 +1,10 @@ +Instruction / Operand / Region Errors: + +/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ +Error in CISA routine with name: igc_check + Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) +\----------------------------------------------------------------------------------------------------------------------/ + + + + diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp new file mode 100644 index 00000000000000..2f79a73ef298b3 --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp @@ -0,0 +1,203 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "pass_manager.h" +#include "permute_inst.h" +#include "program_helpers.h" +#include "reorder_inst.h" +#include "reshape_inst.h" + +using namespace cldnn; +bool reorder_reshape_transpose_fuse::check_order(const std::vector& transpose_order, + const std::vector& layout_order, + const std::vector& reorder_inorder, + const std::vector& reorder_outorder) { + if (transpose_order.size() != layout_order.size() || layout_order.size() != reorder_inorder.size() || + reorder_inorder.size() != reorder_outorder.size()) { + return false; + } + auto rev_layout_order = std::vector(layout_order.size()); + for (size_t i = 0; i < rev_layout_order.size(); i++) { + rev_layout_order[layout_order[i]] = i; + } + + auto new_transpose_order = std::vector(transpose_order.size()); + for (size_t i = 0; i < new_transpose_order.size(); i++) { + new_transpose_order[i] = layout_order[transpose_order[rev_layout_order[i]]]; + } + + auto reorder_order = std::vector(reorder_outorder.size()); + for (size_t i = 0; i < reorder_order.size(); i++) { + for (size_t j = 0; j < reorder_order.size(); j++) { + if (reorder_outorder[i] == reorder_inorder[j]) { + reorder_order[i] = j; + continue; + } + } + } + + auto summary_order = std::vector(transpose_order.size()); + for (size_t i = 0; i < summary_order.size(); i++) { + summary_order[i] = reorder_order[new_transpose_order[i]]; + } + + for (size_t i = 0; i < summary_order.size(); i++) { + if (summary_order[i] != i) { + return false; + } + } + return true; +} + +void reorder_reshape_transpose_fuse::run(program& p) { + bool update_processing_order = false; + // temp code to validate reorder + reshape + permute opt + // other patterns to consider: permute + (reshape) + reorder? + auto is_suitable_reorder = [](cldnn::program_node* node) { + return node->get_users().size() == 1 && node->is_dynamic() == false; + }; + auto is_suitable_reshape = [](cldnn::program_node* node) { + if (node->get_users().size() != 1 || node->is_dynamic()) + return false; + const auto& in_shape = node->get_input_layout(0).get_dims(); + const auto& out_shape = node->get_output_layout().get_dims(); + return in_shape.size() == out_shape.size(); + }; + auto is_suitable_transpose = [](cldnn::program_node* node) { + return node->get_users().size() == 1 && node->is_dynamic() == false; + }; + auto update_order = [](std::vector original_order, cldnn::program_node* reshape) { + if (!reshape) + return original_order; + // Example. For this sequence: + // [1,3,4,6] -> Reshape[1,3,24,1]-> [1,24,3,1] + // org order as (0,2,1) + // first reshape to [1,3,24] ->transpose(0,2,1) -> [1,24,3] + // updated order must be (0,2,3,1): + // dim with index=2 is split into 2 parts: 2 and 3 + const auto& reshape_in_shape = reshape->get_input_layout().get_dims(); + const auto& reshape_out_dim = reshape->get_output_layout().get_dims(); + auto reshape_out_shape = reshape_out_dim; + auto transformed_order = original_order; + ov::Shape new_shape(transformed_order.size()); + if (original_order.size() < reshape_out_dim.size() && reshape_out_dim.size() == 4) { + // if order dims is less than reshape dims, means reshape shape has been converted to upper dims + // merge spatial dims + reshape_out_shape.resize(original_order.size()); + for (size_t i = 0; i < reshape_out_dim.size(); ++i) { + if (i < 2) { + reshape_out_shape[i] = reshape_out_dim[i]; + } else { + reshape_out_shape[2] *= reshape_out_dim[i]; + } + } + const size_t merge_dim_idx = [&]() { + for (size_t i = 0; i < reshape_in_shape.size(); ++i) { + if (reshape_in_shape[i] != reshape_out_shape[i]) + return i; + } + OPENVINO_THROW("merged_dim_idx can not be found"); + }(); + auto insertIt = transformed_order.end(); + for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { + auto& elem = *it; + if (elem > merge_dim_idx) { + elem++; + } else if (elem == merge_dim_idx) { + insertIt = it + 1; + } + } + transformed_order.insert(insertIt, merge_dim_idx + 1); + } else { + auto reorder_orders = [](std::vector& order, std::vector place_order) { + // for all elements to put in place + for (size_t i = 0; i < order.size() - 1; ++i) { + while (i != place_order[i]) { + // swap it with the element at its final place + auto alt = place_order[i]; + std::swap(order[i], order[alt]); + std::swap(place_order[i], place_order[alt]); + } + } + }; + reorder_orders(transformed_order, std::vector({0, 1, 3, 2})); + } + return transformed_order; + }; + + auto itr = p.get_processing_order().begin(); + while (itr != p.get_processing_order().end()) { + auto& node = *itr++; + if (!node->is_type()) + continue; + auto& transpose_node = node->as(); + if (transpose_node.id() == "transpose:/detect/Transpose") + std::cout << "break" << std::endl; + if (!is_suitable_transpose(&transpose_node)) + continue; + auto& child_node = transpose_node; + auto parent_node = child_node.get_dependency_with_port(0).first; + cldnn::program_node* inter_node; + if (parent_node->is_type()) { + inter_node = parent_node; + if (!is_suitable_reshape(inter_node)) { + continue; + } + parent_node = inter_node->get_dependency_with_port(0).first; + } else { + continue; // to be matched further + } + + if (!is_suitable_reorder(parent_node)) { + continue; + } + auto& reshape_node = inter_node->as(); // TODO: check null reshape node + + auto transpose_order = update_order(transpose_node.get_permute_order(), &reshape_node); + auto next_node = transpose_node.get_users().front(); + auto next_layout = next_node->get_input_layout(); + auto order_after_transpose = next_node->get_output_layout().get_dims_order(); + auto reorder_in_dims_order = parent_node->get_input_layout().get_dims_order(); + auto reorder_out_dims_order = parent_node->get_output_layout().get_dims_order(); + + if (check_order(transpose_order, order_after_transpose, reorder_in_dims_order, reorder_out_dims_order)) { + std::cout << "debug: " << transpose_node.id() << std::endl; + // qualified for merge + // making new reorder + const auto& prev_node = parent_node->get_dependency_with_port(0).first; + auto new_reorder = std::make_shared(parent_node->id() + reshape_node.id() + transpose_node.id(), + prev_node->id(), + parent_node->get_output_layout()); + std::vector permute_order(transpose_order.size()); + std::copy_n(transpose_order.begin(), transpose_order.size(), permute_order.begin()); + new_reorder->set_src_permutation(permute_order); + auto& new_reorder_node = p.get_or_create(new_reorder); + p.remove_all_connections(transpose_node); + p.remove_all_connections(reshape_node); + p.remove_all_connections(*parent_node); + p.remove_if_dangling(transpose_node); + p.remove_if_dangling(reshape_node); + p.remove_if_dangling(*parent_node); + p.add_connection(*prev_node, *next_node); + p.add_intermediate(new_reorder_node, *next_node, *prev_node); + new_reorder_node.recalc_output_layouts(false); + new_reorder_node.can_be_optimized(true); + update_processing_order = true; + + // if shapes don't match, another reshape must be inserted to perform shape alignment with next node + if (next_layout.get_dims() != new_reorder_node.get_output_layout().get_dims()) { + auto new_reshape = std::make_shared(parent_node->id() + reshape_node.id() + transpose_node.id() + "fake_reshape", + next_node->id(), + next_layout.get_tensor()); + auto& new_reshape_node = p.get_or_create(new_reshape); + p.add_intermediate(new_reshape_node, *next_node, new_reorder_node); + } + } + } + if (update_processing_order) { + p.get_processing_order().calc_processing_order(p); + } +} diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt new file mode 100644 index 00000000000000..9541c79acd3a3a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt @@ -0,0 +1,10 @@ +Instruction / Operand / Region Errors: + +/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ +Error in CISA routine with name: igc_check + Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) +\----------------------------------------------------------------------------------------------------------------------/ + + + + diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt new file mode 100644 index 00000000000000..9541c79acd3a3a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt @@ -0,0 +1,10 @@ +Instruction / Operand / Region Errors: + +/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ +Error in CISA routine with name: igc_check + Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) +\----------------------------------------------------------------------------------------------------------------------/ + + + + diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 7e24cebd6b9ee9..12738741f9c7b6 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -54,7 +54,11 @@ struct reorder_onednn : typed_primitive_onednn_impl()->src_permutation; + if (permute_order.size()) + input_md = test_md.permute_axes({0, 3, 1, 2}); auto output_md = onednn::layout_to_memory_desc(output_layout); OPENVINO_ASSERT(input_md.get_format_kind() != dnnl::memory::format_kind::any, diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index 490076a37f788e..e007ae8d6855b6 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -363,6 +363,22 @@ class reorder_transfer : public base_pass { reorder_transfer() : base_pass("reorder_transfer") {} private: + bool check_order(const std::vector& transpose_order, + const std::vector& layout_order, + const std::vector& reorder_inorder, + const std::vector& reorder_outorder); + void run(program& p) override; +}; + +class reorder_reshape_transpose_fuse : public base_pass { +public: + reorder_reshape_transpose_fuse() : base_pass("reorder_reshape_transpose") {} + +private: + bool check_order(const std::vector& transpose_order, + const std::vector& layout_order, + const std::vector& reorder_inorder, + const std::vector& reorder_outorder); void run(program& p) override; }; diff --git a/src/plugins/intel_gpu/src/graph/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/kernel.errors.txt new file mode 100644 index 00000000000000..9541c79acd3a3a --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/kernel.errors.txt @@ -0,0 +1,10 @@ +Instruction / Operand / Region Errors: + +/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ +Error in CISA routine with name: igc_check + Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) +\----------------------------------------------------------------------------------------------------------------------/ + + + + diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index bdffb9c4980722..a03b59a52a4773 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -596,7 +596,9 @@ void program::pre_optimize_graph(bool is_internal) { if (optimize_data) { apply_opt_pass(); } - + // temp place to fuse reorder+transpose + apply_opt_pass(); + apply_opt_pass(); // check if there exists some layout incompatibilities and add an reorder node if required apply_opt_pass(); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 93698432e73be0..5a2e83eeb8631f 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -5,6 +5,7 @@ #include "primitive_type_base.h" #include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" +#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/convolution.hpp" #include "intel_gpu/primitives/eltwise.hpp" #ifdef ENABLE_ONEDNN_FOR_GPU @@ -169,7 +170,22 @@ layout reorder_inst::calc_output_layout(reorder_node const& node, kernel_impl_pa // TODO Shouldn't transform be called every time ifmt != ofmt? return layout(odt, ofmt, input_layout.get_tensor().transform(ofmt, 1), op); } else { - return layout(odt, ofmt, input_layout.get_tensor(), op); + // debug code, to fuse reorder with src permute + auto org_ps = input_layout.get_partial_shape(); + auto output_shape = ov::PartialShape(); + int64_t input_static_rank = org_ps.rank().get_length(); + auto permute_order = desc->src_permutation; + if (permute_order.empty()) { + for (int64_t i = 0; i <= input_static_rank - 1; ++i) { + permute_order.emplace_back(i); // for compliance first + } + } + + for (int64_t i = 0; i < input_static_rank; ++i) { + output_shape.push_back(org_ps[permute_order[i]]); + } + return { layout(odt, ofmt, ov::intel_gpu::tensor_from_dims(output_shape.to_shape()), desc->output_paddings[0]) }; + //return layout(odt, ofmt, input_layout.get_tensor(), op); } } @@ -190,7 +206,22 @@ std::vector reorder_inst::calc_output_layouts(reorder_node const& /*node #endif // ENABLE_ONEDNN_FOR_GPU return { desc->weights_reorder_params->get_output_layout() }; } else { - return { layout(input_layout.get(), desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; + // debug code, to fuse reorder with src permute + auto org_ps = input_layout.get(); + ShapeType output_shape; + int64_t input_static_rank = org_ps.rank().get_length(); + auto permute_order = desc->src_permutation; + if (permute_order.empty()) { + for (int64_t i = 0; i <= input_static_rank - 1; ++i) { + permute_order.emplace_back(i); // for compliance first + } + } + + for (int64_t i = 0; i < input_static_rank; ++i) { + output_shape.push_back(org_ps[permute_order[i]]); + } + return { layout(output_shape, desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; + //return { layout(input_layout.get(), desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; } } diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp new file mode 100644 index 00000000000000..6b1d4027289898 --- /dev/null +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp @@ -0,0 +1,55 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "test_utils.h" +#include "program_wrapper.h" +#include "convolution_inst.h" +#include "permute_inst.h" +#include "reshape_inst.h" + +using namespace cldnn; +using namespace ::tests; + +TEST(merge_reorder_reshape_permute, optimize_yolo) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, { 2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, + 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, + 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, + 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, + 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); + topology.add(reshape("reshape", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); + topology.add(permute("permute_inter", input_info("reshape"), {0, 2, 1, 3})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + + network net(prog); + + net.set_input_data("input", input); + auto output = net.execute(); + + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + std::cout << actual << ", "; + } + std::cout << std::endl; +} From 59a00b066690ec070b47364f77b44bec20a0861a Mon Sep 17 00:00:00 2001 From: fishbell Date: Mon, 23 Dec 2024 18:31:07 +0800 Subject: [PATCH 2/6] sink reshape for further optimization Signed-off-by: fishbell --- .../include/intel_gpu/graph/program.hpp | 2 +- .../graph_optimizer/cpu_ref_runtime_model.bin | 0 .../graph_optimizer/cpu_ref_runtime_model.xml | 8905 ----------------- .../graph/graph_optimizer/kernel.errors.txt | 10 - .../reorder_reshape_permute_opt.cpp | 203 - .../graph_optimizer/reshape_transfer.cpp | 155 + .../src/graph/impls/ocl/kernel.errors.txt | 10 - .../src/graph/impls/onednn/kernel.errors.txt | 10 - .../src/graph/include/pass_manager.h | 12 +- .../intel_gpu/src/graph/kernel.errors.txt | 10 - src/plugins/intel_gpu/src/graph/program.cpp | 4 +- .../unit/passes/reorder_reshape_permute.cpp | 261 +- 12 files changed, 412 insertions(+), 9170 deletions(-) delete mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin delete mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml delete mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt delete mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp create mode 100644 src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp delete mode 100644 src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt delete mode 100644 src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt delete mode 100644 src/plugins/intel_gpu/src/graph/kernel.errors.txt diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp index 5d035ed9f9ae24..4c51ee2bacc04e 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/program.hpp @@ -47,7 +47,7 @@ struct program { friend class post_optimize_weights; // to be removed when possible friend class prepare_primitive_fusing_through; // to be removed when possible friend class reorder_transfer; // to be removed when possible - friend class reorder_reshape_transpose_fuse; // to be removed when possible + friend class reshape_transfer; // to be removed when possible friend class fuse_constant_transposes; // to be removed when possible friend class program_wrapper; // this class is intended to extend the interface of program for // the usage within tests_core_internal project only diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin b/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.bin deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml b/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml deleted file mode 100644 index 6c824cb465dde4..00000000000000 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/cpu_ref_runtime_model.xml +++ /dev/null @@ -1,8905 +0,0 @@ - - - - - - - - 1 - 1088 - 1920 - 3 - - - - - - - - 1 - 1088 - 1920 - 3 - - - - - 1 - 1088 - 1920 - 3 - - - - - - - - 1 - 1088 - 1920 - 3 - - - - - 1 - 3 - 1088 - 1920 - - - - - - - - 16 - 3 - 3 - 3 - - - - - - - - 1 - 16 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 16 - 1 - 1 - - - 1 - - - - - 16 - - - - - - - - 1 - 3 - 1088 - 1920 - - - 16 - 3 - 3 - 3 - - - 16 - - - - - 1 - 16 - 544 - 960 - - - - - - - - 32 - 16 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 16 - 544 - 960 - - - 32 - 16 - 3 - 3 - - - 32 - - - - - 1 - 32 - 272 - 480 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 272 - 480 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 272 - 480 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 272 - 480 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 272 - 480 - - - - - - - - 64 - 32 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 32 - 272 - 480 - - - 64 - 32 - 3 - 3 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 128 - 64 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 64 - 136 - 240 - - - 128 - 64 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 68 - 120 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 68 - 120 - - - - - - - - 256 - 128 - 3 - 3 - - - - - - - - 1 - 256 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 256 - 1 - 1 - - - 1 - - - - - 256 - - - - - - - - 1 - 128 - 68 - 120 - - - 256 - 128 - 3 - 3 - - - 256 - - - - - 1 - 256 - 34 - 60 - - - - - - - - 256 - 256 - 3 - 3 - - - - - - - - 1 - 256 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 256 - 1 - 1 - - - 1 - - - - - 256 - - - - - - - - 1 - 256 - 34 - 60 - - - 256 - 256 - 3 - 3 - - - 256 - - - - - 1 - 256 - 34 - 60 - - - - - - - - 256 - 256 - 3 - 3 - - - - - - - - 1 - 256 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 256 - 1 - 1 - - - 1 - - - - - 256 - - - - - - - - 1 - 256 - 34 - 60 - - - 256 - 256 - 3 - 3 - - - 256 - - - - - 1 - 256 - 34 - 60 - - - - - - - - 128 - 256 - 1 - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 256 - 34 - 60 - - - 128 - 256 - 1 - 1 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 256 - 1 - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 256 - 34 - 60 - - - 128 - 256 - 1 - 1 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 1 - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 1 - 1 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - 1 - 128 - 34 - 60 - - - 1 - 128 - 34 - 60 - - - 1 - 128 - 34 - 60 - - - - - 1 - 512 - 34 - 60 - - - - - - - - 128 - 512 - 1 - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 512 - 34 - 60 - - - 128 - 512 - 1 - 1 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - 1 - 128 - 34 - 60 - - - - - 1 - 256 - 34 - 60 - - - - - - - - 256 - 256 - 1 - 1 - - - - - - - - 1 - 256 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 256 - 1 - 1 - - - 1 - - - - - 256 - - - - - - - - 1 - 256 - 34 - 60 - - - 256 - 256 - 1 - 1 - - - 256 - - - - - 1 - 256 - 34 - 60 - - - - - - - - 64 - 256 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 256 - 34 - 60 - - - 64 - 256 - 1 - 1 - - - 64 - - - - - 1 - 64 - 34 - 60 - - - - - - - - 4 - - - - - - - - 4 - - - - - - - - 1 - 64 - 34 - 60 - - - 4 - - - 4 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 128 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 128 - 68 - 120 - - - 64 - 128 - 1 - 1 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 1 - 1 - - - 64 - - - - - 1 - 64 - 136 - 240 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 136 - 240 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 1 - 64 - 68 - 120 - - - 1 - 64 - 68 - 120 - - - 1 - 64 - 68 - 120 - - - - - 1 - 192 - 68 - 120 - - - - - - - - 64 - 192 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 192 - 68 - 120 - - - 64 - 192 - 1 - 1 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 32 - 64 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 64 - 68 - 120 - - - 32 - 64 - 1 - 1 - - - 32 - - - - - 1 - 32 - 68 - 120 - - - - - - - - 4 - - - - - - - - 1 - 32 - 68 - 120 - - - 4 - - - 4 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 64 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 64 - 136 - 240 - - - 32 - 64 - 1 - 1 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 272 - 480 - - - 32 - 32 - 1 - 1 - - - 32 - - - - - 1 - 32 - 272 - 480 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 272 - 480 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 1 - 32 - 136 - 240 - - - 1 - 32 - 136 - 240 - - - 1 - 32 - 136 - 240 - - - - - 1 - 96 - 136 - 240 - - - - - - - - 32 - 96 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 96 - 136 - 240 - - - 32 - 96 - 1 - 1 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 1 - 1 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 4 - 32 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 32 - 136 - 240 - - - 4 - 32 - 1 - 1 - - - 4 - - - - - 1 - 4 - 136 - 240 - - - - - - - - 1 - 4 - 136 - 240 - - - - - 1 - 4 - 136 - 240 - - - - - - - - 3 - - - - - - - - 1 - 4 - 136 - 240 - - - 3 - - - - - 1 - 4 - 32640 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 68 - 120 - - - - - - - - 1 - 32 - 68 - 120 - - - 1 - 32 - 68 - 120 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 1 - 1 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 4 - 64 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 64 - 68 - 120 - - - 4 - 64 - 1 - 1 - - - 4 - - - - - 1 - 4 - 68 - 120 - - - - - - - - 1 - 4 - 68 - 120 - - - - - 1 - 4 - 68 - 120 - - - - - - - - 3 - - - - - - - - 1 - 4 - 68 - 120 - - - 3 - - - - - 1 - 4 - 8160 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 34 - 60 - - - - - - - - 1 - 64 - 34 - 60 - - - 1 - 64 - 34 - 60 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 1 - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 1 - 1 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 4 - 128 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 128 - 34 - 60 - - - 4 - 128 - 1 - 1 - - - 4 - - - - - 1 - 4 - 34 - 60 - - - - - - - - 1 - 4 - 34 - 60 - - - - - 1 - 4 - 34 - 60 - - - - - - - - 3 - - - - - - - - 1 - 4 - 34 - 60 - - - 3 - - - - - 1 - 4 - 2040 - - - - - - - - 1 - 4 - 32640 - - - 1 - 4 - 8160 - - - 1 - 4 - 2040 - - - - - 1 - 4 - 42840 - - - - - - - - 3 - - - - - - - - 1 - 4 - 42840 - - - 3 - - - - - 1 - 42840 - 4 - - - - - - - - 1 - - - - - - - - 2 - - - - - - - - 1 - 42840 - 4 - - - 1 - - - 2 - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - - - 1 - 42840 - 4 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 4 - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 4 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 13 - 32 - 1 - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - 1 - - - - - 13 - - - - - - - - 1 - 32 - 136 - 240 - - - 13 - 32 - 1 - 1 - - - 13 - - - - - 1 - 13 - 136 - 240 - - - - - - - - 1 - 13 - 136 - 240 - - - - - 1 - 13 - 136 - 240 - - - - - - - - 3 - - - - - - - - 1 - 13 - 136 - 240 - - - 3 - - - - - 1 - 13 - 32640 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 13 - 64 - 1 - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - 1 - - - - - 13 - - - - - - - - 1 - 64 - 68 - 120 - - - 13 - 64 - 1 - 1 - - - 13 - - - - - 1 - 13 - 68 - 120 - - - - - - - - 1 - 13 - 68 - 120 - - - - - 1 - 13 - 68 - 120 - - - - - - - - 3 - - - - - - - - 1 - 13 - 68 - 120 - - - 3 - - - - - 1 - 13 - 8160 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 13 - 128 - 1 - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 13 - 1 - 1 - - - 1 - - - - - 13 - - - - - - - - 1 - 128 - 34 - 60 - - - 13 - 128 - 1 - 1 - - - 13 - - - - - 1 - 13 - 34 - 60 - - - - - - - - 1 - 13 - 34 - 60 - - - - - 1 - 13 - 34 - 60 - - - - - - - - 3 - - - - - - - - 1 - 13 - 34 - 60 - - - 3 - - - - - 1 - 13 - 2040 - - - - - - - - 1 - 13 - 32640 - - - 1 - 13 - 8160 - - - 1 - 13 - 2040 - - - - - 1 - 13 - 42840 - - - - - - - - 3 - - - - - - - - 1 - 13 - 42840 - - - 3 - - - - - 1 - 42840 - 13 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 3 - 32 - 1 - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - 1 - - - - - 3 - - - - - - - - 1 - 32 - 136 - 240 - - - 3 - 32 - 1 - 1 - - - 3 - - - - - 1 - 3 - 136 - 240 - - - - - - - - 1 - 3 - 136 - 240 - - - - - 1 - 3 - 136 - 240 - - - - - - - - 1 - 3 - 136 - 240 - - - - - 1 - 32640 - 3 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 3 - 64 - 1 - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - 1 - - - - - 3 - - - - - - - - 1 - 64 - 68 - 120 - - - 3 - 64 - 1 - 1 - - - 3 - - - - - 1 - 3 - 68 - 120 - - - - - - - - 1 - 3 - 68 - 120 - - - - - 1 - 3 - 68 - 120 - - - - - - - - 1 - 3 - 68 - 120 - - - - - 1 - 8160 - 3 - - - - - - - - 128 - 128 - 3 - 3 - - - - - - - - 1 - 128 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 128 - 1 - 1 - - - 1 - - - - - 128 - - - - - - - - 1 - 128 - 34 - 60 - - - 128 - 128 - 3 - 3 - - - 128 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 3 - 128 - 1 - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 3 - 1 - 1 - - - 1 - - - - - 3 - - - - - - - - 1 - 128 - 34 - 60 - - - 3 - 128 - 1 - 1 - - - 3 - - - - - 1 - 3 - 34 - 60 - - - - - - - - 1 - 3 - 34 - 60 - - - - - 1 - 3 - 34 - 60 - - - - - - - - 1 - 3 - 34 - 60 - - - - - 1 - 2040 - 3 - - - - - - - - 1 - 32640 - 3 - - - 1 - 8160 - 3 - - - 1 - 2040 - 3 - - - - - 1 - 42840 - 3 - - - - - - - - 3 - - - - - - - - 3 - - - - - - - - 3 - - - - - - - - 1 - 42840 - 4 - - - 3 - - - 3 - - - 3 - - - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 2 - - - - - - - - 1 - 42840 - 2 - - - 1 - 42840 - 2 - - - - - 1 - 42840 - 4 - - - - - - - - 32 - 32 - 1 - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 1 - 1 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 32 - 32 - 3 - 3 - - - - - - - - 1 - 32 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 32 - 1 - 1 - - - 1 - - - - - 32 - - - - - - - - 1 - 32 - 136 - 240 - - - 32 - 32 - 3 - 3 - - - 32 - - - - - 1 - 32 - 136 - 240 - - - - - - - - 4 - 32 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 32 - 136 - 240 - - - 4 - 32 - 1 - 1 - - - 4 - - - - - 1 - 4 - 136 - 240 - - - - - - - - 1 - 4 - 136 - 240 - - - - - 1 - 32640 - 4 - - - - - - - - 1 - 32640 - 4 - - - - - 1 - 32640 - 4 - - - - - - - - 64 - 64 - 1 - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 1 - 1 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 64 - 64 - 3 - 3 - - - - - - - - 1 - 64 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 64 - 1 - 1 - - - 1 - - - - - 64 - - - - - - - - 1 - 64 - 68 - 120 - - - 64 - 64 - 3 - 3 - - - 64 - - - - - 1 - 64 - 68 - 120 - - - - - - - - 4 - 64 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 64 - 68 - 120 - - - 4 - 64 - 1 - 1 - - - 4 - - - - - 1 - 4 - 68 - 120 - - - - - - - - 1 - 4 - 68 - 120 - - - - - 1 - 8160 - 4 - - - - - - - - 1 - 8160 - 4 - - - - - 1 - 8160 - 4 - - - - - - - - 1 - 128 - 34 - 60 - - - - - - - - 1 - 128 - 34 - 60 - - - - - 1 - 128 - 34 - 60 - - - - - - - - 4 - 128 - 1 - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 4 - 1 - 1 - - - 1 - - - - - 4 - - - - - - - - 1 - 128 - 34 - 60 - - - 4 - 128 - 1 - 1 - - - 4 - - - - - 1 - 4 - 34 - 60 - - - - - - - - 1 - 4 - 34 - 60 - - - - - 1 - 2040 - 4 - - - - - - - - 1 - 2040 - 4 - - - - - 1 - 2040 - 4 - - - - - - - - 1 - 32640 - 4 - - - 1 - 8160 - 4 - - - 1 - 2040 - 4 - - - - - 1 - 42840 - 4 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 4 - - - 1 - 42840 - 4 - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 4 - - - - - - - - 12 - 32 - 1 - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - 1 - - - - - 12 - - - - - - - - 1 - 32 - 136 - 240 - - - 12 - 32 - 1 - 1 - - - 12 - - - - - 1 - 12 - 136 - 240 - - - - - - - - 1 - 12 - 136 - 240 - - - - - 1 - 32640 - 12 - - - - - - - - 12 - 64 - 1 - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - 1 - - - - - 12 - - - - - - - - 1 - 64 - 68 - 120 - - - 12 - 64 - 1 - 1 - - - 12 - - - - - 1 - 12 - 68 - 120 - - - - - - - - 1 - 12 - 68 - 120 - - - - - 1 - 8160 - 12 - - - - - - - - 12 - 128 - 1 - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - - - - - - 1 - - - - - - - - 1 - 12 - 1 - 1 - - - 1 - - - - - 12 - - - - - - - - 1 - 128 - 34 - 60 - - - 12 - 128 - 1 - 1 - - - 12 - - - - - 1 - 12 - 34 - 60 - - - - - - - - 1 - 12 - 34 - 60 - - - - - 1 - 2040 - 12 - - - - - - - - 1 - 32640 - 12 - - - 1 - 8160 - 12 - - - 1 - 2040 - 12 - - - - - 1 - 42840 - 12 - - - - - - - - 1 - 42840 - 4 - - - 1 - 42840 - 1 - - - 1 - 42840 - 13 - - - 1 - 42840 - 3 - - - 1 - 42840 - 4 - - - 1 - 42840 - 12 - - - - - 1 - 42840 - 37 - - - - - - - - 1 - - - - - - - - 5 - - - - - - - - 1 - 42840 - 37 - - - 1 - - - 5 - - - - - 1 - 42840 - 4 - - - 1 - 42840 - 1 - - - 1 - 42840 - 13 - - - 1 - 42840 - 7 - - - 1 - 42840 - 12 - - - - - - - - 1 - - - - - - - - 1 - 42840 - 12 - - - 1 - - - - - 1 - 42840 - 1 - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - - - - - - - - 1 - 42840 - 13 - - - 1 - - - - - 1 - 42840 - 1 - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 1 - - - - - - - - 1 - - - - - - - - 1 - 42840 - 13 - - - 1 - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 1 - - - - - - - - 1 - - - - - - - - 1 - 42840 - 12 - - - 1 - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 1 - - - - - - - - 1 - 42840 - 4 - - - 1 - 42840 - 1 - - - 1 - 42840 - 1 - - - 1 - 42840 - 7 - - - 1 - 42840 - 1 - - - 1 - 42840 - 1 - - - - - 1 - 42840 - 15 - - - - - - - - 1 - 42840 - 15 - - - - - 1 - 42840 - 15 - - - - - - - - 1 - 42840 - 15 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt deleted file mode 100644 index 9541c79acd3a3a..00000000000000 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/kernel.errors.txt +++ /dev/null @@ -1,10 +0,0 @@ -Instruction / Operand / Region Errors: - -/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ -Error in CISA routine with name: igc_check - Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) -\----------------------------------------------------------------------------------------------------------------------/ - - - - diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp deleted file mode 100644 index 2f79a73ef298b3..00000000000000 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_reshape_permute_opt.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright (C) 2023 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include - -#include "pass_manager.h" -#include "permute_inst.h" -#include "program_helpers.h" -#include "reorder_inst.h" -#include "reshape_inst.h" - -using namespace cldnn; -bool reorder_reshape_transpose_fuse::check_order(const std::vector& transpose_order, - const std::vector& layout_order, - const std::vector& reorder_inorder, - const std::vector& reorder_outorder) { - if (transpose_order.size() != layout_order.size() || layout_order.size() != reorder_inorder.size() || - reorder_inorder.size() != reorder_outorder.size()) { - return false; - } - auto rev_layout_order = std::vector(layout_order.size()); - for (size_t i = 0; i < rev_layout_order.size(); i++) { - rev_layout_order[layout_order[i]] = i; - } - - auto new_transpose_order = std::vector(transpose_order.size()); - for (size_t i = 0; i < new_transpose_order.size(); i++) { - new_transpose_order[i] = layout_order[transpose_order[rev_layout_order[i]]]; - } - - auto reorder_order = std::vector(reorder_outorder.size()); - for (size_t i = 0; i < reorder_order.size(); i++) { - for (size_t j = 0; j < reorder_order.size(); j++) { - if (reorder_outorder[i] == reorder_inorder[j]) { - reorder_order[i] = j; - continue; - } - } - } - - auto summary_order = std::vector(transpose_order.size()); - for (size_t i = 0; i < summary_order.size(); i++) { - summary_order[i] = reorder_order[new_transpose_order[i]]; - } - - for (size_t i = 0; i < summary_order.size(); i++) { - if (summary_order[i] != i) { - return false; - } - } - return true; -} - -void reorder_reshape_transpose_fuse::run(program& p) { - bool update_processing_order = false; - // temp code to validate reorder + reshape + permute opt - // other patterns to consider: permute + (reshape) + reorder? - auto is_suitable_reorder = [](cldnn::program_node* node) { - return node->get_users().size() == 1 && node->is_dynamic() == false; - }; - auto is_suitable_reshape = [](cldnn::program_node* node) { - if (node->get_users().size() != 1 || node->is_dynamic()) - return false; - const auto& in_shape = node->get_input_layout(0).get_dims(); - const auto& out_shape = node->get_output_layout().get_dims(); - return in_shape.size() == out_shape.size(); - }; - auto is_suitable_transpose = [](cldnn::program_node* node) { - return node->get_users().size() == 1 && node->is_dynamic() == false; - }; - auto update_order = [](std::vector original_order, cldnn::program_node* reshape) { - if (!reshape) - return original_order; - // Example. For this sequence: - // [1,3,4,6] -> Reshape[1,3,24,1]-> [1,24,3,1] - // org order as (0,2,1) - // first reshape to [1,3,24] ->transpose(0,2,1) -> [1,24,3] - // updated order must be (0,2,3,1): - // dim with index=2 is split into 2 parts: 2 and 3 - const auto& reshape_in_shape = reshape->get_input_layout().get_dims(); - const auto& reshape_out_dim = reshape->get_output_layout().get_dims(); - auto reshape_out_shape = reshape_out_dim; - auto transformed_order = original_order; - ov::Shape new_shape(transformed_order.size()); - if (original_order.size() < reshape_out_dim.size() && reshape_out_dim.size() == 4) { - // if order dims is less than reshape dims, means reshape shape has been converted to upper dims - // merge spatial dims - reshape_out_shape.resize(original_order.size()); - for (size_t i = 0; i < reshape_out_dim.size(); ++i) { - if (i < 2) { - reshape_out_shape[i] = reshape_out_dim[i]; - } else { - reshape_out_shape[2] *= reshape_out_dim[i]; - } - } - const size_t merge_dim_idx = [&]() { - for (size_t i = 0; i < reshape_in_shape.size(); ++i) { - if (reshape_in_shape[i] != reshape_out_shape[i]) - return i; - } - OPENVINO_THROW("merged_dim_idx can not be found"); - }(); - auto insertIt = transformed_order.end(); - for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { - auto& elem = *it; - if (elem > merge_dim_idx) { - elem++; - } else if (elem == merge_dim_idx) { - insertIt = it + 1; - } - } - transformed_order.insert(insertIt, merge_dim_idx + 1); - } else { - auto reorder_orders = [](std::vector& order, std::vector place_order) { - // for all elements to put in place - for (size_t i = 0; i < order.size() - 1; ++i) { - while (i != place_order[i]) { - // swap it with the element at its final place - auto alt = place_order[i]; - std::swap(order[i], order[alt]); - std::swap(place_order[i], place_order[alt]); - } - } - }; - reorder_orders(transformed_order, std::vector({0, 1, 3, 2})); - } - return transformed_order; - }; - - auto itr = p.get_processing_order().begin(); - while (itr != p.get_processing_order().end()) { - auto& node = *itr++; - if (!node->is_type()) - continue; - auto& transpose_node = node->as(); - if (transpose_node.id() == "transpose:/detect/Transpose") - std::cout << "break" << std::endl; - if (!is_suitable_transpose(&transpose_node)) - continue; - auto& child_node = transpose_node; - auto parent_node = child_node.get_dependency_with_port(0).first; - cldnn::program_node* inter_node; - if (parent_node->is_type()) { - inter_node = parent_node; - if (!is_suitable_reshape(inter_node)) { - continue; - } - parent_node = inter_node->get_dependency_with_port(0).first; - } else { - continue; // to be matched further - } - - if (!is_suitable_reorder(parent_node)) { - continue; - } - auto& reshape_node = inter_node->as(); // TODO: check null reshape node - - auto transpose_order = update_order(transpose_node.get_permute_order(), &reshape_node); - auto next_node = transpose_node.get_users().front(); - auto next_layout = next_node->get_input_layout(); - auto order_after_transpose = next_node->get_output_layout().get_dims_order(); - auto reorder_in_dims_order = parent_node->get_input_layout().get_dims_order(); - auto reorder_out_dims_order = parent_node->get_output_layout().get_dims_order(); - - if (check_order(transpose_order, order_after_transpose, reorder_in_dims_order, reorder_out_dims_order)) { - std::cout << "debug: " << transpose_node.id() << std::endl; - // qualified for merge - // making new reorder - const auto& prev_node = parent_node->get_dependency_with_port(0).first; - auto new_reorder = std::make_shared(parent_node->id() + reshape_node.id() + transpose_node.id(), - prev_node->id(), - parent_node->get_output_layout()); - std::vector permute_order(transpose_order.size()); - std::copy_n(transpose_order.begin(), transpose_order.size(), permute_order.begin()); - new_reorder->set_src_permutation(permute_order); - auto& new_reorder_node = p.get_or_create(new_reorder); - p.remove_all_connections(transpose_node); - p.remove_all_connections(reshape_node); - p.remove_all_connections(*parent_node); - p.remove_if_dangling(transpose_node); - p.remove_if_dangling(reshape_node); - p.remove_if_dangling(*parent_node); - p.add_connection(*prev_node, *next_node); - p.add_intermediate(new_reorder_node, *next_node, *prev_node); - new_reorder_node.recalc_output_layouts(false); - new_reorder_node.can_be_optimized(true); - update_processing_order = true; - - // if shapes don't match, another reshape must be inserted to perform shape alignment with next node - if (next_layout.get_dims() != new_reorder_node.get_output_layout().get_dims()) { - auto new_reshape = std::make_shared(parent_node->id() + reshape_node.id() + transpose_node.id() + "fake_reshape", - next_node->id(), - next_layout.get_tensor()); - auto& new_reshape_node = p.get_or_create(new_reshape); - p.add_intermediate(new_reshape_node, *next_node, new_reorder_node); - } - } - } - if (update_processing_order) { - p.get_processing_order().calc_processing_order(p); - } -} diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp new file mode 100644 index 00000000000000..23b6a2eae092fb --- /dev/null +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp @@ -0,0 +1,155 @@ +// Copyright (C) 2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "pass_manager.h" +#include "permute_inst.h" +#include "program_helpers.h" +#include "reorder_inst.h" +#include "reshape_inst.h" + +using namespace cldnn; + +void reshape_transfer::run(program& p) { + // (reorder) + reshape + transpose + // sink reshape for further possible optimization + auto is_suitable_permute = [](cldnn::program_node* node) { + return node->get_users().size() == 1 && node->is_dynamic() == false; + }; + + auto is_suitable_reshape = [](cldnn::program_node* node) -> bool { + if (node->get_users().size() != 1 || node->is_dynamic()) + return false; + auto& input_lay = node->get_input_layout(0); + auto& output_lay = node->get_output_layout(); + if (input_lay.compatible(output_lay)) + return true; + return false; + }; + std::function is_suitable_reorder; + + is_suitable_reorder = [&is_suitable_reorder](const cldnn::program_node* node) -> bool { + if (node->get_users().size() != 1 || node->is_dynamic()) + return false; + for (size_t idx = 0; idx < node->get_dependencies().size(); idx++) { + auto& input = node->get_dependency(idx); + if (!input.is_in_data_flow() || input.is_constant()) + continue; + if (input.is_type()) { + return true; + } else if (input.is_type() && input.get_dependency(1).is_constant()) { + return is_suitable_reorder(&input); + } else if (input.is_type()) { + return is_suitable_reorder(&input); + } + return false; + } + return true; + }; + + auto update_order = [](std::vector original_order, cldnn::program_node* reshape) { + if (!reshape) + return original_order; + // Example. For this sequence, there is Reshape node which merges 2 consecutive dims into one + // order must be updated like permute is done before reshape + // [1,3,4,6] -> Reshape[1,3,24,1]-> permute(0,2,1) -> [1,24,3,1] + // updated order must be (0,2,3,1): + // dim with index=2 is split into 2 parts: 2 and 3 + const auto& reshape_in_shape = reshape->get_input_layout().get_dims(); + const auto& reshape_out_dim = reshape->get_output_layout().get_dims(); + auto reshape_out_shape = reshape_out_dim; + auto transformed_order = original_order; + ov::Shape new_shape(transformed_order.size()); + if (original_order.size() < reshape_out_dim.size() && reshape_out_dim.size() == 4) { + // if order dims is less than reshape dims, means reshape shape has been converted to upper dims some time + // before merge spatial dims + reshape_out_shape.resize(original_order.size()); + for (size_t i = 0; i < reshape_out_dim.size(); ++i) { + if (i < 2) { + reshape_out_shape[i] = reshape_out_dim[i]; + } else { + reshape_out_shape[2] *= reshape_out_dim[i]; + } + } + const size_t merge_dim_idx = [&]() { + for (size_t i = 0; i < reshape_in_shape.size(); ++i) { + if (reshape_in_shape[i] != reshape_out_shape[i]) + return i; + } + OPENVINO_THROW("merged_dim_idx can not be found"); + }(); + auto insertIt = transformed_order.end(); + for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { + auto& elem = *it; + if (elem > merge_dim_idx) { + elem++; + } else if (elem == merge_dim_idx) { + insertIt = it + 1; + } + } + transformed_order.insert(insertIt, merge_dim_idx + 1); + } else { + auto reorder_orders = [](std::vector& order, std::vector place_order) { + // for all elements to put in place + for (size_t i = 0; i < order.size() - 1; ++i) { + while (i != place_order[i]) { + // swap it with the element at its final place + auto alt = place_order[i]; + std::swap(order[i], order[alt]); + std::swap(place_order[i], place_order[alt]); + } + } + }; + reorder_orders(transformed_order, std::vector({0, 1, 3, 2})); + } + return transformed_order; + }; + + auto itr = p.get_processing_order().begin(); + while (itr != p.get_processing_order().end()) { + auto& node = *itr++; + if (!node->is_type()) + continue; + auto& transpose_node = node->as(); + if (!is_suitable_permute(&transpose_node)) + continue; + auto& child_node = transpose_node; + auto parent_node = child_node.get_dependency_with_port(0).first; + cldnn::program_node* inter_node; + if (parent_node->is_type()) { + inter_node = parent_node; + if (!is_suitable_reshape(inter_node)) { + continue; + } + parent_node = inter_node->get_dependency_with_port(0).first; + } else { + continue; + } + + if (!is_suitable_reorder(parent_node)) { + continue; + } + reshape_node* reshape_node = nullptr; + if (inter_node && inter_node->is_type()) + reshape_node = &(inter_node->as()); + + auto transpose_order = update_order(transpose_node.get_permute_order(), reshape_node); + auto next_node = transpose_node.get_users().front(); + auto new_reshape_tensor = transpose_node.get_output_layout().get_tensor(); + p.move_node(*reshape_node, *node, *next_node); + // replace the permute node and reshape node + auto new_permute = + std::make_shared(transpose_node.id() + "_reordered", parent_node->id(), transpose_order); + auto& new_permute_node = p.get_or_create(new_permute); + auto new_reshape = + std::make_shared(reshape_node->id() + "_sinked", new_permute_node.id(), new_reshape_tensor); + auto& new_reshape_node = p.get_or_create(new_reshape); + + p.replace(transpose_node, new_permute_node); + p.replace(*reshape_node, new_reshape_node); + new_permute_node.recalc_output_layout(false); + new_reshape_node.recalc_output_layout(false); + } +} diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt deleted file mode 100644 index 9541c79acd3a3a..00000000000000 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel.errors.txt +++ /dev/null @@ -1,10 +0,0 @@ -Instruction / Operand / Region Errors: - -/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ -Error in CISA routine with name: igc_check - Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) -\----------------------------------------------------------------------------------------------------------------------/ - - - - diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt deleted file mode 100644 index 9541c79acd3a3a..00000000000000 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/kernel.errors.txt +++ /dev/null @@ -1,10 +0,0 @@ -Instruction / Operand / Region Errors: - -/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ -Error in CISA routine with name: igc_check - Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) -\----------------------------------------------------------------------------------------------------------------------/ - - - - diff --git a/src/plugins/intel_gpu/src/graph/include/pass_manager.h b/src/plugins/intel_gpu/src/graph/include/pass_manager.h index e007ae8d6855b6..60832e05856fed 100644 --- a/src/plugins/intel_gpu/src/graph/include/pass_manager.h +++ b/src/plugins/intel_gpu/src/graph/include/pass_manager.h @@ -363,22 +363,14 @@ class reorder_transfer : public base_pass { reorder_transfer() : base_pass("reorder_transfer") {} private: - bool check_order(const std::vector& transpose_order, - const std::vector& layout_order, - const std::vector& reorder_inorder, - const std::vector& reorder_outorder); void run(program& p) override; }; -class reorder_reshape_transpose_fuse : public base_pass { +class reshape_transfer : public base_pass { public: - reorder_reshape_transpose_fuse() : base_pass("reorder_reshape_transpose") {} + reshape_transfer() : base_pass("reshape_transfer") {} private: - bool check_order(const std::vector& transpose_order, - const std::vector& layout_order, - const std::vector& reorder_inorder, - const std::vector& reorder_outorder); void run(program& p) override; }; diff --git a/src/plugins/intel_gpu/src/graph/kernel.errors.txt b/src/plugins/intel_gpu/src/graph/kernel.errors.txt deleted file mode 100644 index 9541c79acd3a3a..00000000000000 --- a/src/plugins/intel_gpu/src/graph/kernel.errors.txt +++ /dev/null @@ -1,10 +0,0 @@ -Instruction / Operand / Region Errors: - -/-------------------------------------------!!!KERNEL HEADER ERRORS FOUND!!!-------------------------------------------\ -Error in CISA routine with name: igc_check - Error Message: Input V38 = [256, 260) intersects with V37 = [256, 260) -\----------------------------------------------------------------------------------------------------------------------/ - - - - diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index a03b59a52a4773..6531c3e22627b2 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -569,6 +569,7 @@ void program::pre_optimize_graph(bool is_internal) { apply_opt_pass(); + apply_opt_pass(); #ifdef GPU_DEBUG_CONFIG GPU_DEBUG_IF(!debug_config->disable_primitive_fusing) { #else @@ -596,8 +597,7 @@ void program::pre_optimize_graph(bool is_internal) { if (optimize_data) { apply_opt_pass(); } - // temp place to fuse reorder+transpose - apply_opt_pass(); + apply_opt_pass(); // check if there exists some layout incompatibilities and add an reorder node if required apply_opt_pass(); diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp index 6b1d4027289898..50260af60cc3c7 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp @@ -2,20 +2,80 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "test_utils.h" -#include "program_wrapper.h" #include "convolution_inst.h" +#include "intel_gpu/graph/program.hpp" #include "permute_inst.h" +#include "program_wrapper.h" #include "reshape_inst.h" +#include "test_utils.h" using namespace cldnn; using namespace ::tests; -TEST(merge_reorder_reshape_permute, optimize_yolo) { +TEST(merge_reorder_reshape_permute, no_reshape) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); + topology.add(permute("permute_inter", input_info("reorder_inter"), {0, 2, 3, 1})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + + network net(prog); + + net.set_input_data("input", input); + auto output = net.execute(); + + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + // reorder node is removed in primitive fusing + // later permute is optimized after convolution in selected preferred formats, e.g conv + permute + auto optimzed_nodes = net.get_program()->get_optimized(); + auto it = + std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) { + return oi.first == "reorder_inter"; + }); + ASSERT_NE(it, optimzed_nodes.end()); + auto permute_inst = net.get_primitive("permute_inter"); + ASSERT_TRUE(permute_inst->can_be_optimized()); + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + ASSERT_EQ(actual, lock_ref[i]); + } +} + +// output in byxf layout, check further.... +/*TEST(merge_reorder_reshape_permute, no_reorder) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, { 2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, @@ -29,10 +89,49 @@ TEST(merge_reorder_reshape_permute, optimize_yolo) { topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); - topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); - topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); - topology.add(reshape("reshape", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); - topology.add(permute("permute_inter", input_info("reshape"), {0, 2, 1, 3})); + topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, +false)); topology.add(reshape("reshape_inter", input_info("convolution"), false, {1, 3, 24}, ov::PartialShape{1, 3, +24})); topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); topology.add(softmax("softmax", +input_info("permute_inter"), 1)); ExecutionConfig config_test = get_test_default_config(engine); + ov::intel_gpu::ImplementationDesc softmax_impl_test = { format::bfyx, "", impl_types::ocl }; + config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "softmax_inter", +softmax_impl_test } })); ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + + network net(prog); + + net.set_input_data("input", input); + auto output = net.execute(); + auto permute_inst = net.get_primitive("permute_inter"); + //ASSERT_TRUE(permute_inst->can_be_optimized()); + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + ASSERT_EQ(actual, ref_output[i]); + } +}*/ + +TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(permute("permute_inter", input_info("convolution"), {0, 2, 3, 1})); topology.add(softmax("softmax", input_info("permute_inter"), 1)); ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); @@ -44,12 +143,156 @@ TEST(merge_reorder_reshape_permute, optimize_yolo) { net.set_input_data("input", input); auto output = net.execute(); + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + // select preferred formats, conv + permute + auto permute_inst = net.get_primitive("permute_inter"); + ASSERT_TRUE(permute_inst->can_be_optimized()); + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + ASSERT_EQ(actual, lock_ref[i]); + } +} + +TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); + topology.add( + reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); + topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(false)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + network net(prog); + + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + + net.set_input_data("input", input); + auto output = net.execute(); + auto optimzed_nodes = net.get_program()->get_optimized(); + auto it = + std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) { + return oi.first == "reorder_inter"; + }); + ASSERT_NE(it, optimzed_nodes.end()); + auto permute_inst = net.get_primitive("permute_inter"); + ASSERT_TRUE(permute_inst->can_be_optimized()); + auto reshape_inst = net.get_primitive("reshape_inter"); + ASSERT_TRUE(reshape_inst->can_be_optimized()); + + auto& processing_order = prog->get_processing_order(); + + auto reshape_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("reshape_inter")); + size_t reshape_dist = std::distance(processing_order.begin(), reshape_node); + + auto permute_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("permute_inter")); + size_t permute_dist = std::distance(processing_order.begin(), permute_node); + ASSERT_TRUE(reshape_dist > permute_dist); + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + std::cout << actual << ", " << std::endl; + ASSERT_EQ(actual, lock_ref[i]); + } +} + +TEST(merge_reorder_reshape_permute, not_sinking_reshape) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); + topology.add( + reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 2, 1}, ov::PartialShape{1, 3, 2, 1})); + topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(false)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + network net(prog); + + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + + net.set_input_data("input", input); + auto output = net.execute(); + auto optimzed_nodes = net.get_program()->get_optimized(); + auto it = + std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) { + return oi.first == "reorder_inter"; + }); + ASSERT_NE(it, optimzed_nodes.end()); + auto permute_inst = net.get_primitive("permute_inter"); + ASSERT_FALSE(permute_inst->can_be_optimized()); + auto reshape_inst = net.get_primitive("reshape_inter"); + ASSERT_FALSE(reshape_inst->can_be_optimized()); + + auto& processing_order = prog->get_processing_order(); + + auto reshape_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("reshape_inter")); + size_t reshape_dist = std::distance(processing_order.begin(), reshape_node); + + auto permute_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("permute_inter")); + size_t permute_dist = std::distance(processing_order.begin(), permute_node); + ASSERT_TRUE(reshape_dist < permute_dist); auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); for (size_t i = 0; i < out_mem->count(); i++) { float actual = lock[i]; - std::cout << actual << ", "; + std::cout << actual << ", " << std::endl; + ASSERT_EQ(actual, lock_ref[i]); } - std::cout << std::endl; } From a8272478a09c745a0efac9cbe13877600a57abb4 Mon Sep 17 00:00:00 2001 From: fishbell Date: Mon, 23 Dec 2024 18:36:20 +0800 Subject: [PATCH 3/6] clean up code Signed-off-by: fishbell --- .../include/intel_gpu/primitives/reorder.hpp | 7 +--- .../graph/graph_optimizer/compile_graph.cpp | 6 ++-- .../prepare_primitive_fusing.cpp | 1 - .../src/graph/impls/onednn/reorder_onednn.cpp | 6 +--- src/plugins/intel_gpu/src/graph/program.cpp | 1 - src/plugins/intel_gpu/src/graph/reorder.cpp | 34 ++----------------- 6 files changed, 6 insertions(+), 49 deletions(-) diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp index 79256f500d20c2..ee53cbd8027087 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp @@ -223,17 +223,12 @@ struct reorder : public primitive_base { memory_type input_mem_type = memory_type::buffer; /// @brief Parameters required for reorder weights. std::shared_ptr weights_reorder_params = {}; - /// @brief Parameters required for source transpose. - std::vector src_permutation; + inline bool has_surface_input() const { return input.size() == 1 && input_mem_type == memory_type::surface; } - void set_src_permutation(const std::vector & src_perm) { - this->src_permutation = src_perm; - } - /// @brief Convert truncation Mode bool truncate = false; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp index 585dfb505aa344..fb4d6bfa590312 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/compile_graph.cpp @@ -44,8 +44,6 @@ void compile_graph::run(program& p) { std::string fail_reason = ""; try { if (selected_impl_manager) { - if (node->id() == "reorder:/detect/Reshape_14_reorder") - std::cout << "break" << std::endl; node->selected_impl = selected_impl_manager->create(*node, *params); } } catch (std::exception& e) { @@ -64,8 +62,8 @@ void compile_graph::run(program& p) { }); } } - for (auto& iter : tasks) - task_executor->run_and_wait({iter}); + + task_executor->run_and_wait(tasks); tasks.clear(); if (exception) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 93f0905b3a1ef7..96bb1a65da7279 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -137,7 +137,6 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { program_helpers::do_for_types(*node, [&p](reorder_node& node) { auto& input = node.input(); - // Restrictions: // - inputs cannot be padded // - primitives input cannot be output diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 12738741f9c7b6..7e24cebd6b9ee9 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -54,11 +54,7 @@ struct reorder_onednn : typed_primitive_onednn_impl()->src_permutation; - if (permute_order.size()) - input_md = test_md.permute_axes({0, 3, 1, 2}); + auto input_md = onednn::layout_to_memory_desc(input_layout); auto output_md = onednn::layout_to_memory_desc(output_layout); OPENVINO_ASSERT(input_md.get_format_kind() != dnnl::memory::format_kind::any, diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp index 6531c3e22627b2..d57df3f7d33c53 100644 --- a/src/plugins/intel_gpu/src/graph/program.cpp +++ b/src/plugins/intel_gpu/src/graph/program.cpp @@ -598,7 +598,6 @@ void program::pre_optimize_graph(bool is_internal) { apply_opt_pass(); } - apply_opt_pass(); // check if there exists some layout incompatibilities and add an reorder node if required apply_opt_pass(); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 5a2e83eeb8631f..49a0d7cd8ccef0 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -170,22 +170,7 @@ layout reorder_inst::calc_output_layout(reorder_node const& node, kernel_impl_pa // TODO Shouldn't transform be called every time ifmt != ofmt? return layout(odt, ofmt, input_layout.get_tensor().transform(ofmt, 1), op); } else { - // debug code, to fuse reorder with src permute - auto org_ps = input_layout.get_partial_shape(); - auto output_shape = ov::PartialShape(); - int64_t input_static_rank = org_ps.rank().get_length(); - auto permute_order = desc->src_permutation; - if (permute_order.empty()) { - for (int64_t i = 0; i <= input_static_rank - 1; ++i) { - permute_order.emplace_back(i); // for compliance first - } - } - - for (int64_t i = 0; i < input_static_rank; ++i) { - output_shape.push_back(org_ps[permute_order[i]]); - } - return { layout(odt, ofmt, ov::intel_gpu::tensor_from_dims(output_shape.to_shape()), desc->output_paddings[0]) }; - //return layout(odt, ofmt, input_layout.get_tensor(), op); + return layout(odt, ofmt, input_layout.get_tensor(), op); } } @@ -206,22 +191,7 @@ std::vector reorder_inst::calc_output_layouts(reorder_node const& /*node #endif // ENABLE_ONEDNN_FOR_GPU return { desc->weights_reorder_params->get_output_layout() }; } else { - // debug code, to fuse reorder with src permute - auto org_ps = input_layout.get(); - ShapeType output_shape; - int64_t input_static_rank = org_ps.rank().get_length(); - auto permute_order = desc->src_permutation; - if (permute_order.empty()) { - for (int64_t i = 0; i <= input_static_rank - 1; ++i) { - permute_order.emplace_back(i); // for compliance first - } - } - - for (int64_t i = 0; i < input_static_rank; ++i) { - output_shape.push_back(org_ps[permute_order[i]]); - } - return { layout(output_shape, desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; - //return { layout(input_layout.get(), desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; + return { layout(input_layout.get(), desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; } } From 58aafd59b60e0973b968fffe60f9cb19b84256ef Mon Sep 17 00:00:00 2001 From: fishbell Date: Tue, 24 Dec 2024 15:20:08 +0800 Subject: [PATCH 4/6] optimize code Signed-off-by: fishbell --- .../prepare_primitive_fusing.cpp | 1 + .../graph_optimizer/reshape_transfer.cpp | 94 ++++++++----------- src/plugins/intel_gpu/src/graph/reorder.cpp | 1 - .../unit/passes/reorder_reshape_permute.cpp | 91 ++++++++++-------- 4 files changed, 95 insertions(+), 92 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp index 96bb1a65da7279..93f0905b3a1ef7 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_primitive_fusing.cpp @@ -137,6 +137,7 @@ void prepare_primitive_fusing::fuse_reorders(program &p) { program_helpers::do_for_types(*node, [&p](reorder_node& node) { auto& input = node.input(); + // Restrictions: // - inputs cannot be padded // - primitives input cannot be output diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp index 23b6a2eae092fb..17391299f2651f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp @@ -16,7 +16,8 @@ void reshape_transfer::run(program& p) { // (reorder) + reshape + transpose // sink reshape for further possible optimization auto is_suitable_permute = [](cldnn::program_node* node) { - return node->get_users().size() == 1 && node->is_dynamic() == false; + return node->get_users().size() == 1 && node->is_dynamic() == false && + node->get_output_layout().get_rank() == 4; }; auto is_suitable_reshape = [](cldnn::program_node* node) -> bool { @@ -28,8 +29,8 @@ void reshape_transfer::run(program& p) { return true; return false; }; - std::function is_suitable_reorder; + std::function is_suitable_reorder; is_suitable_reorder = [&is_suitable_reorder](const cldnn::program_node* node) -> bool { if (node->get_users().size() != 1 || node->is_dynamic()) return false; @@ -58,51 +59,32 @@ void reshape_transfer::run(program& p) { // updated order must be (0,2,3,1): // dim with index=2 is split into 2 parts: 2 and 3 const auto& reshape_in_shape = reshape->get_input_layout().get_dims(); - const auto& reshape_out_dim = reshape->get_output_layout().get_dims(); - auto reshape_out_shape = reshape_out_dim; + const auto& reshape_out_shape = reshape->get_output_layout().get_dims(); auto transformed_order = original_order; ov::Shape new_shape(transformed_order.size()); - if (original_order.size() < reshape_out_dim.size() && reshape_out_dim.size() == 4) { - // if order dims is less than reshape dims, means reshape shape has been converted to upper dims some time - // before merge spatial dims - reshape_out_shape.resize(original_order.size()); - for (size_t i = 0; i < reshape_out_dim.size(); ++i) { - if (i < 2) { - reshape_out_shape[i] = reshape_out_dim[i]; - } else { - reshape_out_shape[2] *= reshape_out_dim[i]; - } + const uint16_t merge_dim_idx = [&]() { + for (size_t i = 0; i < reshape_in_shape.size(); ++i) { + if (reshape_in_shape[i] != reshape_out_shape[i]) + return i; } - const size_t merge_dim_idx = [&]() { - for (size_t i = 0; i < reshape_in_shape.size(); ++i) { - if (reshape_in_shape[i] != reshape_out_shape[i]) - return i; - } - OPENVINO_THROW("merged_dim_idx can not be found"); - }(); - auto insertIt = transformed_order.end(); - for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { - auto& elem = *it; - if (elem > merge_dim_idx) { - elem++; - } else if (elem == merge_dim_idx) { - insertIt = it + 1; - } + OPENVINO_THROW("same input/output for reshape node"); + }(); + auto insertIt = transformed_order.end(); + for (auto it = transformed_order.begin(); it != transformed_order.end(); ++it) { + auto& elem = *it; + if (elem > merge_dim_idx) { + elem++; + } else if (elem == merge_dim_idx) { + insertIt = it + 1; } - transformed_order.insert(insertIt, merge_dim_idx + 1); - } else { - auto reorder_orders = [](std::vector& order, std::vector place_order) { - // for all elements to put in place - for (size_t i = 0; i < order.size() - 1; ++i) { - while (i != place_order[i]) { - // swap it with the element at its final place - auto alt = place_order[i]; - std::swap(order[i], order[alt]); - std::swap(place_order[i], place_order[alt]); - } - } - }; - reorder_orders(transformed_order, std::vector({0, 1, 3, 2})); + } + transformed_order.insert(insertIt, merge_dim_idx + 1); + // remove invalid orders + if (transformed_order.size() > reshape_out_shape.size()) { + transformed_order.erase( + std::remove_if(transformed_order.begin(), transformed_order.end(), [&](uint16_t& order) { + return order >= reshape_out_shape.size(); + })); } return transformed_order; }; @@ -136,20 +118,24 @@ void reshape_transfer::run(program& p) { reshape_node = &(inter_node->as()); auto transpose_order = update_order(transpose_node.get_permute_order(), reshape_node); - auto next_node = transpose_node.get_users().front(); - auto new_reshape_tensor = transpose_node.get_output_layout().get_tensor(); - p.move_node(*reshape_node, *node, *next_node); - // replace the permute node and reshape node auto new_permute = std::make_shared(transpose_node.id() + "_reordered", parent_node->id(), transpose_order); auto& new_permute_node = p.get_or_create(new_permute); - auto new_reshape = - std::make_shared(reshape_node->id() + "_sinked", new_permute_node.id(), new_reshape_tensor); - auto& new_reshape_node = p.get_or_create(new_reshape); + if (new_permute_node.as().is_rotating_except_batch()) { + auto next_node = transpose_node.get_users().front(); + auto new_reshape_tensor = transpose_node.get_output_layout().get_tensor(); + p.move_node(*reshape_node, *node, *next_node); + // replace the permute node and reshape node + auto new_reshape = + std::make_shared(reshape_node->id() + "_sinked", new_permute_node.id(), new_reshape_tensor); + auto& new_reshape_node = p.get_or_create(new_reshape); - p.replace(transpose_node, new_permute_node); - p.replace(*reshape_node, new_reshape_node); - new_permute_node.recalc_output_layout(false); - new_reshape_node.recalc_output_layout(false); + p.replace(transpose_node, new_permute_node); + p.replace(*reshape_node, new_reshape_node); + new_permute_node.recalc_output_layout(false); + new_reshape_node.recalc_output_layout(false); + } else { + p.remove_if_dangling(new_permute_node); + } } } diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index 49a0d7cd8ccef0..93698432e73be0 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -5,7 +5,6 @@ #include "primitive_type_base.h" #include "intel_gpu/runtime/error_handler.hpp" #include "json_object.h" -#include "intel_gpu/plugin/common_utils.hpp" #include "intel_gpu/primitives/convolution.hpp" #include "intel_gpu/primitives/eltwise.hpp" #ifdef ENABLE_ONEDNN_FOR_GPU diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp index 50260af60cc3c7..edd0b9205f90dc 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp @@ -12,7 +12,7 @@ using namespace cldnn; using namespace ::tests; -TEST(merge_reorder_reshape_permute, no_reshape) { +TEST(opt_reorder_reshape_permute, no_reshape) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); @@ -60,7 +60,6 @@ TEST(merge_reorder_reshape_permute, no_reshape) { ref_network.set_input_data("input", input); auto ref_output = ref_network.execute(); - auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); for (size_t i = 0; i < out_mem->count(); i++) { @@ -69,33 +68,25 @@ TEST(merge_reorder_reshape_permute, no_reshape) { } } -// output in byxf layout, check further.... -/*TEST(merge_reorder_reshape_permute, no_reorder) { +TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - set_values(input, { 2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, - 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, - 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, - 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, - 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, { 1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); - topology.add(convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, -false)); topology.add(reshape("reshape_inter", input_info("convolution"), false, {1, 3, 24}, ov::PartialShape{1, 3, -24})); topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); topology.add(softmax("softmax", -input_info("permute_inter"), 1)); ExecutionConfig config_test = get_test_default_config(engine); - ov::intel_gpu::ImplementationDesc softmax_impl_test = { format::bfyx, "", impl_types::ocl }; - config_test.set_property(ov::intel_gpu::force_implementations(ov::intel_gpu::ImplForcingMap{ { "softmax_inter", -softmax_impl_test } })); ExecutionConfig config = get_test_default_config(engine); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add(permute("permute_inter", input_info("convolution"), {0, 2, 3, 1})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config); @@ -104,18 +95,27 @@ softmax_impl_test } })); ExecutionConfig config = get_test_default_config(engine net.set_input_data("input", input); auto output = net.execute(); + + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + // select preferred formats, conv + permute auto permute_inst = net.get_primitive("permute_inter"); - //ASSERT_TRUE(permute_inst->can_be_optimized()); + ASSERT_TRUE(permute_inst->can_be_optimized()); auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); for (size_t i = 0; i < out_mem->count(); i++) { float actual = lock[i]; - ASSERT_EQ(actual, ref_output[i]); + ASSERT_EQ(actual, lock_ref[i]); } -}*/ +} -TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) { +TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); @@ -131,24 +131,42 @@ TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) { topology.add(data("weight", weight)); topology.add( convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); - topology.add(permute("permute_inter", input_info("convolution"), {0, 2, 3, 1})); + topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); + topology.add( + reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); + topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); topology.add(softmax("softmax", input_info("permute_inter"), 1)); ExecutionConfig config = get_test_default_config(engine); - config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::allow_new_shape_infer(false)); config.set_property(ov::intel_gpu::optimize_data(true)); auto prog = program::build_program(engine, topology, config); - network net(prog); - net.set_input_data("input", input); - auto output = net.execute(); - ExecutionConfig ref_config = get_test_default_config(engine); ref_config.set_property(ov::intel_gpu::optimize_data(false)); cldnn::network ref_network(engine, topology, ref_config); - // select preferred formats, conv + permute + + net.set_input_data("input", input); + auto output = net.execute(); + auto optimzed_nodes = net.get_program()->get_optimized(); + auto it = + std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) { + return oi.first == "reorder_inter"; + }); + ASSERT_NE(it, optimzed_nodes.end()); auto permute_inst = net.get_primitive("permute_inter"); ASSERT_TRUE(permute_inst->can_be_optimized()); + auto reshape_inst = net.get_primitive("reshape_inter"); + ASSERT_TRUE(reshape_inst->can_be_optimized()); + + auto& processing_order = prog->get_processing_order(); + + auto reshape_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("reshape_inter")); + size_t reshape_dist = std::distance(processing_order.begin(), reshape_node); + + auto permute_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("permute_inter")); + size_t permute_dist = std::distance(processing_order.begin(), permute_node); + ASSERT_TRUE(reshape_dist > permute_dist); auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); @@ -163,7 +181,7 @@ TEST(merge_reorder_reshape_permute, no_reorder_no_reshape) { } } -TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) { +TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike_4d) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); @@ -182,7 +200,7 @@ TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) { topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); topology.add( reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); - topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); + topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1, 3})); topology.add(softmax("softmax", input_info("permute_inter"), 1)); ExecutionConfig config = get_test_default_config(engine); config.set_property(ov::intel_gpu::allow_new_shape_infer(false)); @@ -225,12 +243,11 @@ TEST(merge_reorder_reshape_permute, cutomized_net_yolov6_alike) { mem_lock lock_ref(ref_out_mem, get_test_stream()); for (size_t i = 0; i < out_mem->count(); i++) { float actual = lock[i]; - std::cout << actual << ", " << std::endl; ASSERT_EQ(actual, lock_ref[i]); } } -TEST(merge_reorder_reshape_permute, not_sinking_reshape) { +TEST(opt_reorder_reshape_permute, not_sinking_reshape) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); @@ -248,7 +265,7 @@ TEST(merge_reorder_reshape_permute, not_sinking_reshape) { convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); topology.add(reorder("reorder_inter", input_info("convolution"), format::bfyx, data_types::f16)); topology.add( - reshape("reshape_inter", input_info("reorder_inter"), false, {1, 3, 2, 1}, ov::PartialShape{1, 3, 2, 1})); + reshape("reshape_inter", input_info("reorder_inter"), false, {1, 18, 4, 1}, ov::PartialShape{1, 18, 4, 1})); topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); topology.add(softmax("softmax", input_info("permute_inter"), 1)); ExecutionConfig config = get_test_default_config(engine); From 293a707991d5d45504db2beda035096c4171881f Mon Sep 17 00:00:00 2001 From: fishbell Date: Tue, 24 Dec 2024 17:04:03 +0800 Subject: [PATCH 5/6] relaxed pattern match to allow reorder not in place Signed-off-by: fishbell --- .../graph_optimizer/reshape_transfer.cpp | 36 ++++++------ .../unit/passes/reorder_reshape_permute.cpp | 56 +++++++++++++++++++ 2 files changed, 76 insertions(+), 16 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp index 17391299f2651f..90e0e351097a4f 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp @@ -30,24 +30,28 @@ void reshape_transfer::run(program& p) { return false; }; - std::function is_suitable_reorder; - is_suitable_reorder = [&is_suitable_reorder](const cldnn::program_node* node) -> bool { + std::function is_suitable_parent; + is_suitable_parent = [&is_suitable_parent](const cldnn::program_node* node) -> bool { if (node->get_users().size() != 1 || node->is_dynamic()) return false; - for (size_t idx = 0; idx < node->get_dependencies().size(); idx++) { - auto& input = node->get_dependency(idx); - if (!input.is_in_data_flow() || input.is_constant()) - continue; - if (input.is_type()) { - return true; - } else if (input.is_type() && input.get_dependency(1).is_constant()) { - return is_suitable_reorder(&input); - } else if (input.is_type()) { - return is_suitable_reorder(&input); + if (node->is_type()) + return true; + if (node->is_type()) { + for (size_t idx = 0; idx < node->get_dependencies().size(); idx++) { + auto& input = node->get_dependency(idx); + if (!input.is_in_data_flow() || input.is_constant()) + continue; + if (input.is_type()) { + return true; + } else if (input.is_type() && input.get_dependency(1).is_constant()) { + return is_suitable_parent(&input); + } else if (input.is_type()) { + return is_suitable_parent(&input); + } + return false; } - return false; } - return true; + return false; }; auto update_order = [](std::vector original_order, cldnn::program_node* reshape) { @@ -63,7 +67,7 @@ void reshape_transfer::run(program& p) { auto transformed_order = original_order; ov::Shape new_shape(transformed_order.size()); const uint16_t merge_dim_idx = [&]() { - for (size_t i = 0; i < reshape_in_shape.size(); ++i) { + for (uint16_t i = 0; i < reshape_in_shape.size(); ++i) { if (reshape_in_shape[i] != reshape_out_shape[i]) return i; } @@ -110,7 +114,7 @@ void reshape_transfer::run(program& p) { continue; } - if (!is_suitable_reorder(parent_node)) { + if (!is_suitable_parent(parent_node)) { continue; } reshape_node* reshape_node = nullptr; diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp index edd0b9205f90dc..7189b3e29ee978 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp @@ -68,6 +68,62 @@ TEST(opt_reorder_reshape_permute, no_reshape) { } } +TEST(opt_reorder_reshape_permute, no_reorder) { + auto& engine = get_test_engine(); + auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; + auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); + auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, + 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, + 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, + 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); + + set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + topology topology; + topology.add(input_layout("input", in_layout)); + topology.add(data("weight", weight)); + topology.add( + convolution("convolution", input_info("input"), "weight", "", 1, {1, 1}, {1, 1}, {0, 0}, {0, 0}, false)); + topology.add( + reshape("reshape_inter", input_info("convolution"), false, {1, 3, 24, 1}, ov::PartialShape{1, 3, 24, 1})); + topology.add(permute("permute_inter", input_info("reshape_inter"), {0, 2, 1})); + topology.add(softmax("softmax", input_info("permute_inter"), 1)); + ExecutionConfig config = get_test_default_config(engine); + config.set_property(ov::intel_gpu::allow_new_shape_infer(true)); + config.set_property(ov::intel_gpu::optimize_data(true)); + auto prog = program::build_program(engine, topology, config); + + network net(prog); + + net.set_input_data("input", input); + auto output = net.execute(); + + ExecutionConfig ref_config = get_test_default_config(engine); + ref_config.set_property(ov::intel_gpu::optimize_data(false)); + cldnn::network ref_network(engine, topology, ref_config); + auto& processing_order = prog->get_processing_order(); + auto reshape_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("reshape_inter")); + size_t reshape_dist = std::distance(processing_order.begin(), reshape_node); + + auto permute_node = std::find(processing_order.begin(), processing_order.end(), &prog->get_node("permute_inter")); + size_t permute_dist = std::distance(processing_order.begin(), permute_node); + ASSERT_TRUE(reshape_dist > permute_dist); + // select preferred formats, conv + permute + auto permute_inst = net.get_primitive("permute_inter"); + ASSERT_TRUE(permute_inst->can_be_optimized()); + auto out_mem = output.at("softmax").get_memory(); + mem_lock lock(out_mem, get_test_stream()); + + ref_network.set_input_data("input", input); + auto ref_output = ref_network.execute(); + auto ref_out_mem = ref_output.at("softmax").get_memory(); + mem_lock lock_ref(ref_out_mem, get_test_stream()); + for (size_t i = 0; i < out_mem->count(); i++) { + float actual = lock[i]; + ASSERT_EQ(actual, lock_ref[i]); + } +} + TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) { auto& engine = get_test_engine(); auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; From 98e39cc1ab0940617d4222ace54d7ac9804947e7 Mon Sep 17 00:00:00 2001 From: fishbell Date: Wed, 25 Dec 2024 09:21:37 +0800 Subject: [PATCH 6/6] use random values in test Signed-off-by: fishbell --- .../graph_optimizer/reshape_transfer.cpp | 26 ++-- .../unit/passes/reorder_reshape_permute.cpp | 127 +++++++++--------- 2 files changed, 75 insertions(+), 78 deletions(-) diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp index 90e0e351097a4f..90faed7160310a 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reshape_transfer.cpp @@ -34,22 +34,18 @@ void reshape_transfer::run(program& p) { is_suitable_parent = [&is_suitable_parent](const cldnn::program_node* node) -> bool { if (node->get_users().size() != 1 || node->is_dynamic()) return false; - if (node->is_type()) - return true; - if (node->is_type()) { - for (size_t idx = 0; idx < node->get_dependencies().size(); idx++) { - auto& input = node->get_dependency(idx); - if (!input.is_in_data_flow() || input.is_constant()) - continue; - if (input.is_type()) { - return true; - } else if (input.is_type() && input.get_dependency(1).is_constant()) { - return is_suitable_parent(&input); - } else if (input.is_type()) { - return is_suitable_parent(&input); - } - return false; + for (size_t idx = 0; idx < node->get_dependencies().size(); idx++) { + auto& input = node->get_dependency(idx); + if (!input.is_in_data_flow() || input.is_constant()) + continue; + if (node->is_type() || input.is_type()) { + return true; + } else if (input.is_type() && input.get_dependency(1).is_constant()) { + return is_suitable_parent(&input); + } else if (input.is_type()) { + return is_suitable_parent(&input); } + return false; } return false; }; diff --git a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp index 7189b3e29ee978..261d41c93ed37b 100644 --- a/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp +++ b/src/plugins/intel_gpu/tests/unit/passes/reorder_reshape_permute.cpp @@ -17,13 +17,9 @@ TEST(opt_reorder_reshape_permute, no_reshape) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); + tests::set_random_values(input); + tests::set_random_values(weight); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -54,7 +50,9 @@ TEST(opt_reorder_reshape_permute, no_reshape) { }); ASSERT_NE(it, optimzed_nodes.end()); auto permute_inst = net.get_primitive("permute_inter"); - ASSERT_TRUE(permute_inst->can_be_optimized()); + if (net.get_primitive("convolution")->get_impl()->is_onednn()) { + ASSERT_TRUE(permute_inst->can_be_optimized()); + } auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); @@ -62,9 +60,13 @@ TEST(opt_reorder_reshape_permute, no_reshape) { auto ref_output = ref_network.execute(); auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } } @@ -73,12 +75,8 @@ TEST(opt_reorder_reshape_permute, no_reorder) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + tests::set_random_values(input); + tests::set_random_values(weight); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -110,7 +108,9 @@ TEST(opt_reorder_reshape_permute, no_reorder) { ASSERT_TRUE(reshape_dist > permute_dist); // select preferred formats, conv + permute auto permute_inst = net.get_primitive("permute_inter"); - ASSERT_TRUE(permute_inst->can_be_optimized()); + if (net.get_primitive("convolution")->get_impl()->is_onednn()) { + ASSERT_TRUE(permute_inst->can_be_optimized()); + } auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); @@ -118,9 +118,13 @@ TEST(opt_reorder_reshape_permute, no_reorder) { auto ref_output = ref_network.execute(); auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } } @@ -129,12 +133,8 @@ TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + tests::set_random_values(input); + tests::set_random_values(weight); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -157,7 +157,9 @@ TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) { cldnn::network ref_network(engine, topology, ref_config); // select preferred formats, conv + permute auto permute_inst = net.get_primitive("permute_inter"); - ASSERT_TRUE(permute_inst->can_be_optimized()); + if (net.get_primitive("convolution")->get_impl()->is_onednn()) { + ASSERT_TRUE(permute_inst->can_be_optimized()); + } auto out_mem = output.at("softmax").get_memory(); mem_lock lock(out_mem, get_test_stream()); @@ -165,9 +167,13 @@ TEST(opt_reorder_reshape_permute, no_reorder_no_reshape) { auto ref_output = ref_network.execute(); auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } } @@ -176,12 +182,8 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + tests::set_random_values(input); + tests::set_random_values(weight); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -211,7 +213,9 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike) { }); ASSERT_NE(it, optimzed_nodes.end()); auto permute_inst = net.get_primitive("permute_inter"); - ASSERT_TRUE(permute_inst->can_be_optimized()); + if (net.get_primitive("convolution")->get_impl()->is_onednn()) { + ASSERT_TRUE(permute_inst->can_be_optimized()); + } auto reshape_inst = net.get_primitive("reshape_inter"); ASSERT_TRUE(reshape_inst->can_be_optimized()); @@ -231,9 +235,13 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike) { auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } } @@ -242,12 +250,8 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike_4d) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + tests::set_random_values(input); + tests::set_random_values(weight); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -277,7 +281,9 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike_4d) { }); ASSERT_NE(it, optimzed_nodes.end()); auto permute_inst = net.get_primitive("permute_inter"); - ASSERT_TRUE(permute_inst->can_be_optimized()); + if (net.get_primitive("convolution")->get_impl()->is_onednn()) { + ASSERT_TRUE(permute_inst->can_be_optimized()); + } auto reshape_inst = net.get_primitive("reshape_inter"); ASSERT_TRUE(reshape_inst->can_be_optimized()); @@ -297,9 +303,13 @@ TEST(opt_reorder_reshape_permute, cutomized_net_yolov6_alike_4d) { auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } } @@ -308,12 +318,8 @@ TEST(opt_reorder_reshape_permute, not_sinking_reshape) { auto in_layout = layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}; auto input = engine.allocate_memory(layout{ov::PartialShape({1, 2, 4, 6}), data_types::f16, format::bfyx}); auto weight = engine.allocate_memory(layout{ov::PartialShape({3, 2, 1, 1}), data_types::f16, format::bfyx}); - set_values(input, {2.0f, 3.0f, 4.0f, 4.0f, 3.0f, 2.0f, 1.f, 2.f, 3.f, 1.f, 2.f, 4.f, - 5.f, 1.f, 1.f, 2.f, 1.f, 2.f, 2.0f, 3.0f, 1.0f, 4.0f, 1.0f, 4.0f, - 3.0f, 2.0f, 0.0f, 1.0f, 0.0f, 2.0f, 2.f, 4.f, 1.f, 1.f, 2.f, 1.f, - 1.f, 2.f, 0.f, 2.f, 5.f, 2.f, 4.0f, 3.0f, 1.0f, 0.0f, 3.0f, 2.0f}); - - set_values(weight, {1.f, 1.f, 1.f, 1.f, 1.f, 1.f}); + tests::set_random_values(input); + tests::set_random_values(weight); topology topology; topology.add(input_layout("input", in_layout)); topology.add(data("weight", weight)); @@ -336,16 +342,8 @@ TEST(opt_reorder_reshape_permute, not_sinking_reshape) { net.set_input_data("input", input); auto output = net.execute(); - auto optimzed_nodes = net.get_program()->get_optimized(); - auto it = - std::find_if(std::begin(optimzed_nodes), std::end(optimzed_nodes), [&](cldnn::program::optimized_info& oi) { - return oi.first == "reorder_inter"; - }); - ASSERT_NE(it, optimzed_nodes.end()); auto permute_inst = net.get_primitive("permute_inter"); ASSERT_FALSE(permute_inst->can_be_optimized()); - auto reshape_inst = net.get_primitive("reshape_inter"); - ASSERT_FALSE(reshape_inst->can_be_optimized()); auto& processing_order = prog->get_processing_order(); @@ -363,9 +361,12 @@ TEST(opt_reorder_reshape_permute, not_sinking_reshape) { auto ref_out_mem = ref_output.at("softmax").get_memory(); mem_lock lock_ref(ref_out_mem, get_test_stream()); + auto tolerance = default_tolerance(ref_out_mem->get_layout().data_type); for (size_t i = 0; i < out_mem->count(); i++) { - float actual = lock[i]; - std::cout << actual << ", " << std::endl; - ASSERT_EQ(actual, lock_ref[i]); + ASSERT_NEAR(lock[i],lock_ref[i], tolerance) + << "\ntolerance = " << tolerance + << "\ni = " << i + << "\nref[i] = " << lock_ref[i] + << "\nopt[i] = " << lock[i]; } -} +} \ No newline at end of file