diff --git a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp index b2ee5e8f3dcdbb..de9588d01f352b 100644 --- a/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp +++ b/src/plugins/intel_npu/src/plugin/npuw/compiled_model.cpp @@ -118,6 +118,7 @@ void pre_load_transform(const std::shared_ptr& model, const ov::AnyMa rewr.add_matcher(); rewr.run_on_model(model); } + model->validate_nodes_and_infer_types(); } } // anonymous namespace @@ -195,29 +196,6 @@ ov::npuw::CompiledModel::CompiledModel(const std::shared_ptr& model, } } - // FIXME: Find a better place to call this transformation - ov::pass::ConvertPrecision(ov::element::bf16, ov::element::f16).run_on_model(model); - - if (m_cfg.get<::intel_npu::NPUW_FOLD>() && m_cfg.get<::intel_npu::NPUW_FUNCALL_FOR_ALL>()) { - // If there's folding enabled AND non-repeating graphs are forced to be - // functions, do extra lifting for gather (if any) - ov::pass::GraphRewrite rewr; - rewr.add_matcher(); - rewr.add_matcher(); - rewr.add_matcher(); - rewr.run_on_model(model); - } - - if (m_cfg.get<::intel_npu::NPUW_SLICE_OUT>()) { - // Add Slice before last MatMul for the prefill model - ov::pass::GraphRewrite rewr; - rewr.add_matcher(); - rewr.add_matcher(); - rewr.add_matcher(); - rewr.add_matcher(); - rewr.run_on_model(model); - } - auto partitioning = getPartitioning(model, m_cfg); m_total_stat.gflops = partitioning.total_gflops; m_total_stat.ops = partitioning.total_ops;