Skip to content

Commit

Permalink
NPUW: Complete SLICE_OUT fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dmatveev committed Jan 2, 2025
1 parent 0c98405 commit 37f99f3
Showing 1 changed file with 1 addition and 23 deletions.
24 changes: 1 addition & 23 deletions src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ void pre_load_transform(const std::shared_ptr<ov::Model>& model, const ov::AnyMa
rewr.add_matcher<ov::npuw::patterns::opt::SliceLastMatmulMultiply>();
rewr.run_on_model(model);
}
model->validate_nodes_and_infer_types();
}
} // anonymous namespace

Expand Down Expand Up @@ -195,29 +196,6 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
}
}

// FIXME: Find a better place to call this transformation
ov::pass::ConvertPrecision(ov::element::bf16, ov::element::f16).run_on_model(model);

if (m_cfg.get<::intel_npu::NPUW_FOLD>() && m_cfg.get<::intel_npu::NPUW_FUNCALL_FOR_ALL>()) {
// If there's folding enabled AND non-repeating graphs are forced to be
// functions, do extra lifting for gather (if any)
ov::pass::GraphRewrite rewr;
rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherAsymCW>();
rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymCW>();
rewr.add_matcher<ov::npuw::patterns::opt::DQLiftGatherSymGQ>();
rewr.run_on_model(model);
}

if (m_cfg.get<::intel_npu::NPUW_SLICE_OUT>()) {
// Add Slice before last MatMul for the prefill model
ov::pass::GraphRewrite rewr;
rewr.add_matcher<ov::npuw::patterns::opt::SliceLastMatmul>();
rewr.add_matcher<ov::npuw::patterns::opt::SliceLastMatmulAdd>();
rewr.add_matcher<ov::npuw::patterns::opt::SliceLastMatmulTranspose>();
rewr.add_matcher<ov::npuw::patterns::opt::SliceLastMatmulMultiply>();
rewr.run_on_model(model);
}

auto partitioning = getPartitioning(model, m_cfg);
m_total_stat.gflops = partitioning.total_gflops;
m_total_stat.ops = partitioning.total_ops;
Expand Down

0 comments on commit 37f99f3

Please sign in to comment.