Skip to content

Commit

Permalink
[GPU] Add optimization for FC for beam search and beams number < 8 (o…
Browse files Browse the repository at this point in the history
…penvinotoolkit#22335)

* [GPU] Add optimization for FC for beam search and beams number < 8

* Re-enable async compilation for batch_size==1
  • Loading branch information
sshlyapn authored Jan 26, 2024
1 parent 747db55 commit 20abada
Show file tree
Hide file tree
Showing 6 changed files with 565 additions and 380 deletions.
19 changes: 18 additions & 1 deletion src/plugins/intel_gpu/src/graph/primitive_inst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,7 +685,24 @@ bool primitive_inst::use_async_compilation() {
return false;
}

return (_node->is_type<convolution>() || _node->is_type<fully_connected>() || _node->is_type<gemm>() ||
bool compile_fc_impls = _node->is_type<fully_connected>();
if (compile_fc_impls) {
const auto& fc_node = _node->as<fully_connected>();
if (fc_node.get_primitive()->compressed_weights) {
auto weights_dt = fc_node.weights().get_output_layout().data_type;
auto input_shape = _impl_params->get_input_layout().get_shape();
auto batch_size = std::accumulate(input_shape.begin(),
input_shape.end() - 1,
size_t{1},
std::multiplies<size_t>());

// Disable async compilation for all int4 FC, except in the case of batch_size == 1
if (one_of(weights_dt, {data_types::i4, data_types::u4}) && batch_size != 1)
compile_fc_impls = false;
}
}

return (_node->is_type<convolution>() || compile_fc_impls || _node->is_type<gemm>() ||
(_node->is_type<softmax>() && _node->get_selected_impl() &&
_node->get_selected_impl()->get_kernel_name().find("softmax_gpu_ref") != std::string::npos));
}
Expand Down
Loading

0 comments on commit 20abada

Please sign in to comment.