diff --git a/.github/workflows/job_python_unit_tests.yml b/.github/workflows/job_python_unit_tests.yml index a10c0991fbdf0b..de0a65069d2232 100644 --- a/.github/workflows/job_python_unit_tests.yml +++ b/.github/workflows/job_python_unit_tests.yml @@ -231,7 +231,7 @@ jobs: - name: PyTorch torch.export Layer Tests if: ${{ fromJSON(inputs.affected-components).PyTorch_FE.test && runner.arch != 'ARM64' }} # Ticket: 126287 run: | - python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -m precommit_torch_export --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml + python3 -m pytest ${LAYER_TESTS_INSTALL_DIR}/pytorch_tests -n logical -m precommit_torch_export --junitxml=${INSTALL_TEST_DIR}/TEST-pytorch.xml env: TEST_DEVICE: CPU TEST_PRECISION: FP32 diff --git a/samples/js/node/README.md b/samples/js/node/README.md index e334f8ee9184a4..f0b18b15dd4352 100644 --- a/samples/js/node/README.md +++ b/samples/js/node/README.md @@ -1,13 +1,16 @@ -# OpenVINO™ JavaScript API examples of usage +# OpenVINO™ Node.js Bindings Examples of Usage -## Installation of openvino-node package -From *openvino/src/bindings/js/node* run `npm i` to download OpenVINO™ runtime, install requirements, build bindings and compile TypeScript code to JavaScript +## Install -On the *.nix systems run `source openvino/src/bindings/js/node/scripts/setupvars.sh` to add path to OpenVINO™ runtime libraries in `LD_LIBRARY_PATH` variable +To run samples, install dependencies first. In current directory run: +```bash +npm install +``` Note: Perform these steps also before running notebooks. ## Samples + - hello_classification - hello_reshape_ssd - classification_sample_async @@ -17,10 +20,13 @@ Note: Perform these steps also before running notebooks. Use [Node.js Notebooks (REPL)](https://marketplace.visualstudio.com/items?itemName=donjayamanne.typescript-notebook) VSCode extension to run these notebook samples -Make sure that `LD_LIBRARY_PATH` variable contains path to OpenVINO runtime folder - - ./notebooks - - 001-hello-world.nnb - - 003-hello-segmentation.nnb - - 004-hello-detection.nnb - - 213-question-answering.nnb + - 001-hello-world.nnb + - 003-hello-segmentation.nnb + - 004-hello-detection.nnb + - 213-question-answering.nnb + +## See Also + +* [OpenVINO™ JavaScript API Developer Documentation](../../../src/bindings/js/docs/README.md#openvino-node-package-developer-documentation) +* [OpenVINO™ README](../../../README.md) diff --git a/samples/js/node/classification_sample_async/README.md b/samples/js/node/classification_sample_async/README.md index 209eeaa5828ab4..0b19e908587505 100644 --- a/samples/js/node/classification_sample_async/README.md +++ b/samples/js/node/classification_sample_async/README.md @@ -1,8 +1,10 @@ -# Image Classification Async NodeJS Sample +# Image Classification Async Node.js Sample Models with only 1 input and output are supported. Run: -`node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d AUTO` +```bash +node classification_sample_async.js -m *path_to_model_file* -i *path_to_img1* -i *path_to_img2* -d AUTO +``` -Other details see in /samples/python/classification_sample_async/README.md +Other details see in [../../../python/classification_sample_async/README.md](../../../python/classification_sample_async/README.md) diff --git a/samples/js/node/hello_classification/README.md b/samples/js/node/hello_classification/README.md index f300d848e87e01..2de983af58334b 100644 --- a/samples/js/node/hello_classification/README.md +++ b/samples/js/node/hello_classification/README.md @@ -1,8 +1,10 @@ -# Hello Classification NodeJS Sample +# Hello Classification Node.js Sample Models with only 1 input and output are supported. Run: -`node hello_classification.js *path_to_model_file* *path_to_img* AUTO` +```bash +node hello_classification.js *path_to_model_file* *path_to_img* AUTO +``` -Other details see in /samples/python/hello_classification/README.md +Other details see in [../../../python/hello_classification/README.md](../../../python/hello_classification/README.md) diff --git a/samples/js/node/hello_reshape_ssd/README.md b/samples/js/node/hello_reshape_ssd/README.md index 41bef928bf434d..21d8be8ec4b50d 100644 --- a/samples/js/node/hello_reshape_ssd/README.md +++ b/samples/js/node/hello_reshape_ssd/README.md @@ -1,8 +1,11 @@ -# Hello Reshape SSD NodeJS Sample +# Hello Reshape SSD Node.js Sample Models with only 1 input and output are supported. Run: -`node hello_reshape_ssd.js *path_to_model_file* *path_to_img* AUTO` +```bash +node hello_reshape_ssd.js *path_to_model_file* *path_to_img* AUTO +``` + +Other details see in [../../../python/hello_reshape_ssd/README.md](../../../python/hello_reshape_ssd/README.md) -Other details see in /samples/python/hello_reshape_ssd/README.md diff --git a/src/bindings/README.md b/src/bindings/README.md index 51868fd1a3e130..f8436986383469 100644 --- a/src/bindings/README.md +++ b/src/bindings/README.md @@ -5,7 +5,7 @@ OpenVINO provides bindings for several languages: * [c](./c) * [python](./python) * [javascript](./js) - * [nodejs](./js/nodejs) + * [node.js](./js/node) ## See also * [OpenVINO™ README](../../README.md) diff --git a/src/bindings/js/README.md b/src/bindings/js/README.md index 2e754aa32d9f85..d2f24d9273bb81 100644 --- a/src/bindings/js/README.md +++ b/src/bindings/js/README.md @@ -1,3 +1,3 @@ # OpenVINO™ JavaScript API -- `./node` - openvino-node NPM package with Node.js bindings +- [./node](./node) - **openvino-node** npm package with OpenVINO Node.js bindings diff --git a/src/bindings/js/docs/README.md b/src/bindings/js/docs/README.md index edff31df19e812..ed2e5f5775ae69 100644 --- a/src/bindings/js/docs/README.md +++ b/src/bindings/js/docs/README.md @@ -1,4 +1,83 @@ -# Javascript bindings +# OpenVINO™ JavaScript Bindings + +## Folders - `./docs` - documentation -- `./node` - openvino-node NPM package with Node.js bindings +- `./node` - openvino-node npm package + +## openvino-node Package Developer Documentation + +### Components + +- [include](../node/include/) - header files for current API. +- [lib](../node/lib/) - TypeScript sources for current API. +- [src](../node/src/) - C++ sources for current API. +- [tests](../node/tests/) - tests directory for current API. + +### Build + +- Make sure that all submodules are updated: + ```bash + git submodule update --init --recursive + ``` +- Create the *build* directory: + ```bash + mkdir build && cd build + ``` +- Configure building of the binaries: + ```bash + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_FASTER_BUILD=ON \ + -DCPACK_GENERATOR=NPM \ + -DENABLE_SYSTEM_TBB=OFF -UTBB* \ + -DENABLE_TESTS=OFF \ + -DENABLE_SAMPLES=OFF \ + -DENABLE_WHEEL=OFF \ + -DENABLE_PYTHON=OFF \ + -DENABLE_INTEL_GPU=OFF \ + -DCMAKE_INSTALL_PREFIX=../src/bindings/js/node/bin \ + .. + ``` +- Build the bindings: + ```bash + cmake --build . --config Release --verbose -j4 + ``` +- Install binaries for the *openvino-node* package: + ```bash + cmake --install . + ``` +- Navigate to the *npm* package folder: + ```bash + cd ../src/bindings/js/node + ``` +- Now, you can install dependencies packages and transpile ts to js code: + ```bash + npm install + ``` +- Run tests to make sure that **openvino-node** has been built successfully: + ```bash + npm run test + ``` + +## Usage + +- Add the **openvino-node** package to your project by specifying it in **package.json**: + ```json + "openvino-node": "file:*path-to-current-directory*" + ``` +- Make sure to require it: + ```js + const { addon: ov } = require('openvino-node'); + ``` + +## Samples + +[OpenVINO™ Node.js Bindings Examples of Usage](../../../../samples/js/node/README.md) + +## See Also + +* [OpenVINO™ README](../../../../README.md) +* [OpenVINO™ Core Components](../../../README.md) +* [OpenVINO™ JavaScript API](../README.md) +* [OpenVINO™ Node.js Bindings](../node/README.md) diff --git a/src/bindings/js/node/.npmignore b/src/bindings/js/node/.npmignore index a961be5e3aa24a..e4fe3743e7de30 100644 --- a/src/bindings/js/node/.npmignore +++ b/src/bindings/js/node/.npmignore @@ -3,6 +3,7 @@ include lib src tests +thirdparty .eslintrc.js CMakeLists.txt diff --git a/src/bindings/js/node/README.md b/src/bindings/js/node/README.md index c9da79d593a0be..426bda134779f6 100644 --- a/src/bindings/js/node/README.md +++ b/src/bindings/js/node/README.md @@ -1,50 +1,35 @@ -# OpenVINO Node.js API - -## Components - -- [include](./include/) - header files for current API. -- [lib](./lib/) - TypeScript sources for current API. -- [src](./src/) - C++ sources for current API. -- [tests](./tests/) - tests directory for current API. - -## Build - -- Make sure that all submodules are updated `git submodule update --init --recursive` -- Create build dir `mkdir build && cd build` -- Configure binaries building: - ```bash - cmake \ - -DCMAKE_BUILD_TYPE=Release \ - -DENABLE_FASTER_BUILD=ON \ - -DCPACK_GENERATOR=NPM \ - -DENABLE_SYSTEM_TBB=OFF -UTBB* \ - -DENABLE_TESTS=OFF \ - -DENABLE_SAMPLES=OFF \ - -DENABLE_WHEEL=OFF \ - -DENABLE_PYTHON=OFF \ - -DENABLE_INTEL_GPU=OFF \ - -DCMAKE_INSTALL_PREFIX=../src/bindings/js/node/bin \ - .. - ``` -- Build bindings: - `cmake --build . --config Release --verbose -j4` -- Install binaries for openvino-node package: - `cmake --install .` -- Go to npm package folder `cd ../src/bindings/js/node` -- Now you can install dependencies packages and transpile ts to js code. Run `npm install` -- Run tests `npm run test` to make sure that **openvino-node** built successfully +# OpenVINO™ Node.js Bindings + +Use OpenVINO JavaScript API for your Node.js application. ## Usage -- Add `openvino-node` package in your project, specify in **package.json**: `"openvino-node": "file:*path-to-current-directory*"` -- Require by: `const ov = require('openvino-node');` +Install the **openvino-node** package: +```bash +npm install openvino-node +``` + +Use the **openvino-node** package: +```js +const { addon: ov } = require('openvino-node'); +``` + +## Build From Sources + +For more details, refer to the [OpenVINO™ JavaScript API Developer Documentation](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/js/docs/README.md#openvino-node-package-developer-documentation) + +## Documentation & Samples + +- [OpenVINO™ Node.js API](https://docs.openvino.ai/2024/api/nodejs_api/nodejs_api.html) +- [OpenVINO™ Node.js Bindings Examples of Usage](https://github.com/openvinotoolkit/openvino/blob/master/samples/js/node/README.md) -## Samples +## See Also -[Samples & notebooks of OpenVINO Node.js API](../../../../samples/js/node/README.md) +* [OpenVINO™ README](https://github.com/openvinotoolkit/openvino/blob/master/README.md) +* [OpenVINO™ Core Components](https://github.com/openvinotoolkit/openvino/blob/master/src/README.md) +* [OpenVINO™ Python API](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/python/README.md) +* [OpenVINO™ Other Bindings](https://github.com/openvinotoolkit/openvino/blob/master/src/bindings/README.md) -## See also +[License](https://github.com/openvinotoolkit/openvino/blob/master/LICENSE) -* [OpenVINO™ README](../../../../README.md) -* [OpenVINO™ Core Components](../../../README.md) -* [OpenVINO™ JavaScript API](../README.md) +Copyright © 2018-2024 Intel Corporation diff --git a/src/bindings/js/node/package-lock.json b/src/bindings/js/node/package-lock.json index 00eb7e71c7c09f..a8304774ffa7a9 100644 --- a/src/bindings/js/node/package-lock.json +++ b/src/bindings/js/node/package-lock.json @@ -1,12 +1,12 @@ { "name": "openvino-node", - "version": "2024.0.0-14428.dev20240212", + "version": "2024.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "openvino-node", - "version": "2024.0.0-14428.dev20240212", + "version": "2024.0.0", "hasInstallScript": true, "license": "Apache-2.0", "os": [ diff --git a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py index cb6b6beb0e2ede..7a9c8a881fc9e0 100644 --- a/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py +++ b/src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py @@ -7,7 +7,7 @@ from openvino.frontend.pytorch.py_pytorch_frontend import _FrontEndPytorchDecoder as Decoder from openvino.frontend.pytorch.py_pytorch_frontend import _Type as DecoderType from openvino.runtime import op, PartialShape, Type as OVType, OVAny, Shape -from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map +from openvino.frontend.pytorch.utils import maybe_convert_max_int, make_constant, fetch_attr, pt_to_ov_type_map, torch_tensor_to_ov_const import torch @@ -21,11 +21,11 @@ def __init__(self, pt_module, fx_gm, nodes=None, mark_node_callback=None, input_ self.m_decoders = [] self.pt_module = pt_module self.fx_gm = fx_gm - self.input_types = input_types + self.input_types = [OVAny(pt_to_ov_type_map[str(t)]) + for t in input_types] self.input_shapes = input_shapes self._input_signature = [] - self._output_names = [] if issubclass(type(pt_module), torch.fx.graph_module.GraphModule): @@ -39,25 +39,15 @@ def __init__(self, pt_module, fx_gm, nodes=None, mark_node_callback=None, input_ self._input_signature.append(self._nodes[i].name) elif self._nodes[i].op == 'output': # Instead of putting output index, refer to its target - args = self._nodes[i].args - if isinstance(args[0], tuple): - args = args[0] - if isinstance(args[0], dict): - for name, output in args[0].items(): - self._outputs.append(self._nodes.index(output)) - self._output_names.append(name) - else: - for output in args: - self._outputs.append(self._nodes.index(output)) + uargs = self.unpack_containers(self._nodes[i].args) + self._outputs = [(arg[0], self._nodes.index(arg[1])) for arg in uargs if arg[1] is not None] elif issubclass(type(pt_module), torch.fx.Node): self._nodes = nodes # passed from outer context # FIXME: Quadratic complexity nodes*nodes considering the outer loop over all nodes - for i in range(len(self._nodes)): - if self._nodes[i] == pt_module: - self._outputs = [i] + self._outputs = [("", self._nodes.index(pt_module))] # None in inputs mean the input is inlined or None (also considered inlined) self._inputs = [self._nodes.index( @@ -65,15 +55,20 @@ def __init__(self, pt_module, fx_gm, nodes=None, mark_node_callback=None, input_ # FIXME: Find a better way to pass nested tuples to OV frontend. This is a temporary solution to flatten arguments. new_inputs = [] + self.input_types = [] for i in range(len(pt_module.args)): - if isinstance(pt_module.args[i], list) and any([isinstance(a, torch.fx.Node) for a in pt_module.args[i]]): + if isinstance(pt_module.args[i], (list, tuple)) and any([isinstance(a, torch.fx.Node) for a in pt_module.args[i]]): for arg in pt_module.args[i]: if arg in self._nodes: new_inputs.append(self._nodes.index(arg)) else: new_inputs.append((arg,)) + self.input_types.append(OVAny(DecoderType.List( + TorchFXPythonDecoder.get_type_for_value(arg)))) else: new_inputs.append(self._inputs[i]) + self.input_types.append( + TorchFXPythonDecoder.get_type_for_value(self._inputs[i])) self._inputs = new_inputs def inputs(self): @@ -83,6 +78,24 @@ def inputs(self): def is_input_inlined(self, index): return isinstance(self._inputs[index], tuple) + @staticmethod + def unpack_containers(arg): + if isinstance(arg, (tuple, list)): + res = [] + for e in arg: + res.extend(TorchFXPythonDecoder.unpack_containers(e)) + return res + elif isinstance(arg, dict): + res = [] + for k, e in arg.items(): + unpacked = TorchFXPythonDecoder.unpack_containers(e) + if len(unpacked) == 1: + unpacked[0] = (k, unpacked[0][1]) + res.extend(unpacked) + return res + else: + return [("", arg)] + @staticmethod def arg_to_constant(arg): if isinstance(arg, list): @@ -91,7 +104,7 @@ def arg_to_constant(arg): arg[0]).__name__], Shape([len(arg)]), arg) else: # TODO: which type should we use if list is empty? Need a signaling value here - return make_constant(int, Shape([0]), []) + return make_constant(OVType.i32, Shape([0]), []) elif isinstance(arg, bool): return make_constant(OVType.boolean, Shape([]), [arg]) elif isinstance(arg, int): @@ -103,10 +116,10 @@ def arg_to_constant(arg): []), [arg]) # TODO: f32? why not f64? return None - def inlined_input(self, index): assert index < len(self._inputs), "Requested input doesn't exist" - assert isinstance(self._inputs[index], tuple), "Requested input which is not inlined" + assert isinstance( + self._inputs[index], tuple), "Requested input which is not inlined" assert self._inputs[index][0] is not None, "Requested None inlined input" constant = None arg = self._inputs[index][0] @@ -144,13 +157,13 @@ def get_input_strides(self, index: int) -> list: def get_input_type(self, index): if index < len(self.input_types): - return OVAny(pt_to_ov_type_map[str(self.input_types[index])]) + return self.input_types[index] input = self._raw_input(index) return self.get_type_for_value(input) def get_output_debug_name(self, index): - if self._output_names is not None and index < len(self._output_names): - return self._output_names[index] + if self._outputs is not None and index < len(self._outputs) and self._outputs[index][0]: + return self._outputs[index][0] name = getattr(self.pt_module, "name", "output") return name + ":" + str(index) @@ -168,7 +181,8 @@ def get_shape_for_value(self, value): return PartialShape(len(value.meta['tensor_meta'].shape) * [-1]) return PartialShape.dynamic() - def get_type_for_value(self, value): + @staticmethod + def get_type_for_value(value): if issubclass(type(value), torch.fx.Node): if ('tensor_meta' in value.meta.keys()): if value.meta['tensor_meta'] and isinstance(value.meta['tensor_meta'], torch.Tensor): @@ -259,10 +273,10 @@ def get_schema(self): return self.pt_module.schema() def outputs(self): - return self._outputs + return [o[1] for o in self._outputs] def _raw_outputs(self): - return [self._nodes[x] for x in self._outputs] + return [self._nodes[x[1]] for x in self._outputs] def _raw_output(self, index): return self._raw_outputs()[index] @@ -293,7 +307,7 @@ def as_constant(self): if self.pt_module.op == 'get_attr': # Extract Constant from FX module field ret = fetch_attr(self.fx_gm, self.pt_module.target) - ov_const = op.Constant(ret.numpy(force=True), shared_memory=True) + ov_const = torch_tensor_to_ov_const(ret, shared_memory=True) return ov_const.outputs() if not self.get_op_type() == 'prim::Constant': diff --git a/src/bindings/python/src/pyopenvino/core/core.cpp b/src/bindings/python/src/pyopenvino/core/core.cpp index 78ddebb8ed6009..4e9ec2206914b6 100644 --- a/src/bindings/python/src/pyopenvino/core/core.cpp +++ b/src/bindings/python/src/pyopenvino/core/core.cpp @@ -244,7 +244,21 @@ void regclass_Core(py::module m) { }, py::arg("model"), py::arg("context"), - py::arg("properties")); + py::arg("properties"), + R"( + Creates a compiled model from a source model within a specified remote context. + + GIL is released while running this function. + + :param model: Model acquired from read_model function. + :type model: openvino.Model + :param context: RemoteContext instance. + :type context: openvino.RemoteContext + :param properties: dict of pairs: (property name, property value) relevant only for this load operation. + :type properties: dict + :return: A compiled model. + :rtype: openvino.CompiledModel + )"); cls.def( "create_context", @@ -253,14 +267,33 @@ void regclass_Core(py::module m) { return RemoteContextWrapper(self.create_context(device_name, _properties)); }, py::arg("device_name"), - py::arg("properties")); + py::arg("properties"), + R"( + Creates a new remote shared context object on the specified accelerator device + using specified plugin-specific low-level device API parameters. + + :param device_name: Name of a device to create a new shared context on. + :type device_name: str + :param device_name: dict of device-specific shared context remote properties. + :type device_name: dict + :return: Remote context instance. + :rtype: openvino.RemoteContext + )"); cls.def( "get_default_context", [](ov::Core& self, const std::string& device_name) { return RemoteContextWrapper(self.get_default_context(device_name)); }, - py::arg("device_name")); + py::arg("device_name"), + R"( + Gets default (plugin-supplied) shared context object for the specified accelerator device. + + :param device_name: Name of a device to get a default shared context from. + :type device_name: str + :return: Remote context instance. + :rtype: openvino.RemoteContext + )"); cls.def("get_versions", &ov::Core::get_versions, diff --git a/src/bindings/python/src/pyopenvino/core/remote_context.cpp b/src/bindings/python/src/pyopenvino/core/remote_context.cpp index ca8648ce77074f..858593bbc3a265 100644 --- a/src/bindings/python/src/pyopenvino/core/remote_context.cpp +++ b/src/bindings/python/src/pyopenvino/core/remote_context.cpp @@ -51,6 +51,7 @@ void regclass_RemoteContext(py::module m) { const ov::Shape& shape, const std::map& properties) { auto _properties = Common::utils::properties_to_any_map(properties); + py::gil_scoped_release release; return RemoteTensorWrapper(self.context.create_tensor(type, shape, _properties)); }, py::arg("type"), @@ -61,6 +62,8 @@ void regclass_RemoteContext(py::module m) { using the specified tensor description and low-level device-specific parameters. Returns the object that implements the RemoteTensor interface. + GIL is released while running this function. + :param type: Defines the element type of the tensor. :type type: openvino.Type :param shape: Defines the shape of the tensor. @@ -76,6 +79,7 @@ void regclass_RemoteContext(py::module m) { [](RemoteContextWrapper& self, const ov::element::Type& type, const ov::Shape& shape) { return self.context.create_host_tensor(type, shape); }, + py::call_guard(), py::arg("type"), py::arg("shape"), R"( @@ -84,6 +88,8 @@ void regclass_RemoteContext(py::module m) { (if corresponding extension is available), which could be more efficient than regular host memory. + GIL is released while running this function. + :param type: Defines the element type of the tensor. :type type: openvino.Type :param shape: Defines the shape of the tensor. @@ -124,13 +130,17 @@ void regclass_VAContext(py::module m) { cls.def( "create_tensor_nv12", [](VAContextWrapper& self, const size_t height, const size_t width, const uint32_t nv12_surface) { - ov::AnyMap tensor_params = { - {ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::VA_SURFACE}, - {ov::intel_gpu::dev_object_handle.name(), nv12_surface}, - {ov::intel_gpu::va_plane.name(), uint32_t(0)}}; - auto y_tensor = self.context.create_tensor(ov::element::u8, {1, height, width, 1}, tensor_params); - tensor_params[ov::intel_gpu::va_plane.name()] = uint32_t(1); - auto uv_tensor = self.context.create_tensor(ov::element::u8, {1, height / 2, width / 2, 2}, tensor_params); + ov::RemoteTensor y_tensor, uv_tensor; + { + py::gil_scoped_release release; + ov::AnyMap tensor_params = { + {ov::intel_gpu::shared_mem_type.name(), ov::intel_gpu::SharedMemType::VA_SURFACE}, + {ov::intel_gpu::dev_object_handle.name(), nv12_surface}, + {ov::intel_gpu::va_plane.name(), uint32_t(0)}}; + y_tensor = self.context.create_tensor(ov::element::u8, {1, height, width, 1}, tensor_params); + tensor_params[ov::intel_gpu::va_plane.name()] = uint32_t(1); + uv_tensor = self.context.create_tensor(ov::element::u8, {1, height / 2, width / 2, 2}, tensor_params); + } return py::make_tuple(VASurfaceTensorWrapper(y_tensor), VASurfaceTensorWrapper(uv_tensor)); }, py::arg("height"), @@ -140,6 +150,8 @@ void regclass_VAContext(py::module m) { This function is used to obtain a NV12 tensor from NV12 VA decoder output. The result contains two remote tensors for Y and UV planes of the surface. + GIL is released while running this function. + :param height: A height of Y plane. :type height: int :param width: A width of Y plane @@ -162,6 +174,7 @@ void regclass_VAContext(py::module m) { {ov::intel_gpu::va_plane.name(), plane}}; return VASurfaceTensorWrapper(self.context.create_tensor(type, shape, params)); }, + py::call_guard(), py::arg("type"), py::arg("shape"), py::arg("surface"), @@ -169,6 +182,8 @@ void regclass_VAContext(py::module m) { R"( Create remote tensor from VA surface handle. + GIL is released while running this function. + :param type: Defines the element type of the tensor. :type type: openvino.Type :param shape: Defines the shape of the tensor. diff --git a/src/frontends/pytorch/src/op/arange.cpp b/src/frontends/pytorch/src/op/arange.cpp index f7b8b409db4ef7..d90ebb18908a9c 100644 --- a/src/frontends/pytorch/src/op/arange.cpp +++ b/src/frontends/pytorch/src/op/arange.cpp @@ -85,72 +85,32 @@ OutputVector translate_arange(const NodeContext& context) { OutputVector translate_arange_fx(const NodeContext& context) { auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); auto one = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); - int dtype_port = -1; auto dtype = element::f32; - bool dtype_applied = false; auto num_inputs = context.get_input_size(); ov::Output end; - ov::Output out_tensor; ov::Output start = zero; ov::Output step = one; if (num_inputs == 1) { - // aten::arange(Scalar end, tensor out) + // arange = torch.ops.aten.arange.default(_local_scalar_dense, dtype = torch.int8, device = device(type='cpu'), + // pin_memory = False); end = context.get_input(0); - out_tensor = end; // context.input_is_none(1) ? end : context.get_input(1); } else if (num_inputs == 2) { - // aten::arange(Scalar end, tensor out) start = context.get_input(0); end = context.get_input(1); - out_tensor = end; // context.input_is_none(1) ? end : context.get_input(1); } else if (num_inputs == 3) { - // aten::arange(Scalar start, Scalar end, Scalar step, Tensor out) start = context.get_input(0); end = context.get_input(1); step = context.get_input(2); - out_tensor = end; // context.input_is_none(3) ? end : context.get_input(3); - } else if (num_inputs == 5) { - // aten::arange(Scalar end, ScalarType dtype, Layout, Device, bool pin_memory) - end = context.get_input(0); - out_tensor = end; - dtype_port = 1; - } else if (num_inputs == 6) { - // aten::arange(Scalar start, Scalar end, ScalarType dtype, Layout, Device, bool pin_memory) - start = context.get_input(0); - end = context.get_input(1); - out_tensor = end; - dtype_port = 2; - dtype_applied = true; - } else if (num_inputs == 7) { - // aten::arange(Scalar start, Scalar end, Scalar step, ScalarType dtype, Layout, Device, bool pin_memory) - start = context.get_input(0); - end = context.get_input(1); - step = context.get_input(2); - out_tensor = end; - dtype_port = 3; - dtype_applied = true; } else { PYTORCH_OP_CONVERSION_CHECK(false, "Not expected number of inputs for ", context.get_op_type()); } - if (dtype_port >= 0 && !context.input_is_none(dtype_port)) { - if (std::dynamic_pointer_cast( - context.get_input_from_visible_context(dtype_port).get_node_shared_ptr())) { - dtype = convert_dtype(context.const_input(dtype_port)); - dtype_applied = true; - } else if (const auto& fw_node = - cast_fw_node(context.get_input(dtype_port).get_node_shared_ptr(), "prim::dtype")) { - out_tensor = fw_node->input_value(0); - dtype_applied = false; - } else { - PYTORCH_OP_CONVERSION_CHECK(false, "Couldn't get dtype input"); - } + if (context.has_attribute("dtype")) { + dtype = context.get_attribute("dtype"); } - auto r_end = context.mark_node(std::make_shared(end, dtype)); - auto r_start = context.mark_node(std::make_shared(start, dtype)); - auto r_step = context.mark_node(std::make_shared(step, dtype)); - auto range = context.mark_node(std::make_shared(r_start, r_end, r_step, dtype)); - if (!dtype_applied) { - range = context.mark_node(std::make_shared(range, out_tensor)); + auto range = context.mark_node(std::make_shared(start, end, step, dtype)); + if (!context.has_attribute("dtype")) { + range = context.mark_node(std::make_shared(range, context.get_input(0))); } return {range}; }; diff --git a/src/frontends/pytorch/src/op/argmax_argmin.cpp b/src/frontends/pytorch/src/op/argmax_argmin.cpp index e858dcf87d619a..d2689f2c2302c5 100644 --- a/src/frontends/pytorch/src/op/argmax_argmin.cpp +++ b/src/frontends/pytorch/src/op/argmax_argmin.cpp @@ -21,7 +21,7 @@ using namespace ov::op; namespace { OutputVector create_argmax_argmin_op(const NodeContext& context, TopKMode mode) { - num_inputs_check(context, 2, 3); + num_inputs_check(context, 1, 3); auto input = context.get_input(0); bool keep_dims = false; auto k = context.mark_node(v0::Constant::create(element::i32, Shape{}, {1})); diff --git a/src/frontends/pytorch/src/op/avg_poolnd.cpp b/src/frontends/pytorch/src/op/avg_poolnd.cpp index 4a90db23a67c1e..57389b7b42e872 100644 --- a/src/frontends/pytorch/src/op/avg_poolnd.cpp +++ b/src/frontends/pytorch/src/op/avg_poolnd.cpp @@ -19,7 +19,7 @@ namespace op { using namespace ov::op; OutputVector translate_avg_poolnd(const NodeContext& context) { - num_inputs_check(context, 3, 7); + num_inputs_check(context, 2, 7); auto input = context.get_input(0); auto kernel = context.const_input(1); Strides strides; diff --git a/src/frontends/pytorch/src/op/cat.cpp b/src/frontends/pytorch/src/op/cat.cpp index c1a571dc8d7681..3baec6fea4db05 100644 --- a/src/frontends/pytorch/src/op/cat.cpp +++ b/src/frontends/pytorch/src/op/cat.cpp @@ -74,12 +74,18 @@ OutputVector translate_cat(const NodeContext& context) { OutputVector translate_cat_fx(const NodeContext& context) { // This translator is only needed to get axis as constant from external scope - num_inputs_check(context, 2, context.get_input_size()); + num_inputs_check(context, 1, context.get_input_size()); std::deque> list_elems; for (size_t i = 0; i < context.get_input_size() - 1; i++) { list_elems.push_back(context.get_input(static_cast(i))); } - auto axis = context.const_input(context.get_input_size() - 1); + int64_t axis = 0; + if (!context.get_input_type(context.get_input_size() - 1).is()) { + // axis can be not present and that means that last input will have List type + axis = context.const_input(context.get_input_size() - 1); + } else { + list_elems.push_back(context.get_input(static_cast(context.get_input_size() - 1))); + } return translate_cat_common(context, list_elems, axis, true); }; diff --git a/src/frontends/pytorch/src/op/cumsum.cpp b/src/frontends/pytorch/src/op/cumsum.cpp index c396521a9e402b..664625975bf1f1 100644 --- a/src/frontends/pytorch/src/op/cumsum.cpp +++ b/src/frontends/pytorch/src/op/cumsum.cpp @@ -3,6 +3,7 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/cum_sum.hpp" #include "utils.hpp" @@ -28,6 +29,19 @@ OutputVector translate_cumsum(const NodeContext& context) { return {result}; }; +OutputVector translate_cumsum_fx(const NodeContext& context) { + // cumsum = torch.ops.aten.cumsum.default(arg0_1, 0, dtype = torch.float64) + num_inputs_check(context, 2, 2); + auto x = context.get_input(0); + auto dim = context.get_input(1); + if (context.has_attribute("dtype")) { + auto dtype = context.get_attribute("dtype"); + x = context.mark_node(std::make_shared(x, dtype)); + } + auto result = context.mark_node(std::make_shared(x, dim)); + return {result}; +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op/mean.cpp b/src/frontends/pytorch/src/op/mean.cpp index ef88c2922ac39d..6182b46afe04a5 100644 --- a/src/frontends/pytorch/src/op/mean.cpp +++ b/src/frontends/pytorch/src/op/mean.cpp @@ -3,6 +3,7 @@ // #include "openvino/frontend/pytorch/node_context.hpp" +#include "openvino/op/convert.hpp" #include "openvino/op/reduce_mean.hpp" #include "utils.hpp" @@ -11,6 +12,8 @@ namespace frontend { namespace pytorch { namespace op { +using namespace ov::op; + OutputVector translate_mean(const NodeContext& context) { num_inputs_check(context, 2, 5); auto x = context.get_input(0); @@ -35,7 +38,7 @@ OutputVector translate_mean(const NodeContext& context) { x = apply_dtype(context, 3, x); } } - auto mean = context.mark_node(std::make_shared(x, axes, keep_dims)); + auto mean = context.mark_node(std::make_shared(x, axes, keep_dims)); if (num_inputs == 5 && !context.input_is_none(4)) { context.mutate_input(4, mean); } @@ -43,12 +46,18 @@ OutputVector translate_mean(const NodeContext& context) { }; OutputVector translate_mean_fx(const NodeContext& context) { - num_inputs_check(context, 2, 5); + num_inputs_check(context, 1, 5); auto x = context.get_input(0); auto num_inputs = context.get_input_size(); bool keep_dims = false; + if (context.has_attribute("dtype")) { + auto dtype = context.get_attribute("dtype"); + x = context.mark_node(std::make_shared(x, dtype)); + } Output axes; - if (num_inputs == 2) { + if (num_inputs == 1) { + axes = get_node_axes_range(context, x); + } else if (num_inputs == 2) { axes = context.get_input(1); } else { axes = context.get_input(1); @@ -59,7 +68,7 @@ OutputVector translate_mean_fx(const NodeContext& context) { x = apply_dtype(context, 3, x); } } - auto mean = context.mark_node(std::make_shared(x, axes, keep_dims)); + auto mean = context.mark_node(std::make_shared(x, axes, keep_dims)); if (num_inputs == 5 && !context.input_is_none(4)) { context.mutate_input(4, mean); } diff --git a/src/frontends/pytorch/src/op/min_max.cpp b/src/frontends/pytorch/src/op/min_max.cpp index 3523209e983ebd..b1e1cbd8db9368 100644 --- a/src/frontends/pytorch/src/op/min_max.cpp +++ b/src/frontends/pytorch/src/op/min_max.cpp @@ -110,6 +110,33 @@ OutputVector translate_min(const NodeContext& context) { return {values, indices}; }; +OutputVector translate_min_dim(const NodeContext& context) { + // torch.min.dim(x, dim, keepdim) + num_inputs_check(context, 2, 3); + auto x = context.get_input(0); + auto axes_node = context.get_input(1); + auto axis_const = context.const_input(1); + + bool keepdims = false; + if (!context.input_is_none(2)) { + keepdims = context.const_input(2); + } + + auto values = context.mark_node(std::make_shared(x, axes_node, keepdims)); + auto k = context.mark_node(std::make_shared(element::i32, Shape{}, 1)); + auto topk = std::make_shared(x, k, axis_const, v3::TopK::Mode::MIN, v3::TopK::SortType::NONE); + auto indices = context.mark_node(std::make_shared(topk->output(1), element::i64)); + if (!keepdims) { + indices = std::make_shared(indices, axes_node); + } + return {values, indices}; +}; + +OutputVector translate_min_dim_fx(const NodeContext& context) { + ov::OutputVector out_vec = translate_min_dim(context); + return {context.mark_node(make_list_construct(out_vec))}; +}; + OutputVector translate_maximum(const NodeContext& context) { // aten::maximum(Tensor self, Tensor other) -> Tensor @@ -146,10 +173,13 @@ OutputVector translate_amin(const NodeContext& context) { // aten::amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor // aten::amin.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) - num_inputs_check(context, 3, 4); + num_inputs_check(context, 2, 4); auto x = context.get_input(0); auto dims = context.get_input(1); - auto keep_dims = context.const_input(2); + bool keep_dims = false; + if (!context.input_is_none(2)) { + keep_dims = context.const_input(2); + } auto res = context.mark_node(std::make_shared(x, dims, keep_dims)); if (!context.input_is_none(3)) { context.mutate_input(3, res); @@ -161,10 +191,13 @@ OutputVector translate_amax(const NodeContext& context) { // aten::amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor // aten::amax.out(Tensor self, int[1] dim=[], bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!) - num_inputs_check(context, 3, 4); + num_inputs_check(context, 2, 4); auto x = context.get_input(0); auto dims = context.get_input(1); - auto keep_dims = context.const_input(2); + bool keep_dims = false; + if (!context.input_is_none(2)) { + keep_dims = context.const_input(2); + } auto res = context.mark_node(std::make_shared(x, dims, keep_dims)); if (!context.input_is_none(3)) { context.mutate_input(3, res); diff --git a/src/frontends/pytorch/src/op/reciprocal.cpp b/src/frontends/pytorch/src/op/reciprocal.cpp index 38b12fee06cb18..94d14dd5fb7d1b 100644 --- a/src/frontends/pytorch/src/op/reciprocal.cpp +++ b/src/frontends/pytorch/src/op/reciprocal.cpp @@ -18,9 +18,9 @@ using namespace ov::op; OutputVector translate_reciprocal(const NodeContext& context) { num_inputs_check(context, 1, 1); auto x = context.get_input(0); - auto const_neg_1 = context.mark_node(v0::Constant::create(element::i32, Shape{}, {-1})); - auto cast = context.mark_node(std::make_shared(const_neg_1, x)); - auto power = context.mark_node(std::make_shared(x, cast)); + auto const_neg_1 = context.mark_node(v0::Constant::create(element::f32, Shape{}, {-1}))->output(0); + align_eltwise_input_types(context, x, const_neg_1, true); + auto power = context.mark_node(std::make_shared(x, const_neg_1)); return {context.mark_node(power)}; }; diff --git a/src/frontends/pytorch/src/op/roll.cpp b/src/frontends/pytorch/src/op/roll.cpp index 9f358368fbce8c..290301308c5835 100644 --- a/src/frontends/pytorch/src/op/roll.cpp +++ b/src/frontends/pytorch/src/op/roll.cpp @@ -18,14 +18,18 @@ namespace op { using namespace ov::op; OutputVector translate_roll(const NodeContext& context) { - num_inputs_check(context, 3, 3); + num_inputs_check(context, 2, 3); const auto data = context.get_input(0); const auto shifts = context.get_input(1); - const auto axes = context.get_input(2); - const auto shifts_pshape = shifts.get_partial_shape(); - const auto axes_pshape = axes.get_partial_shape(); - const auto match_dims = axes_pshape.compatible(shifts_pshape); - if (!match_dims) { + Output axes; + bool on_flattened = context.input_is_none(2); + if (!on_flattened) { + axes = context.get_input(2); + const auto shifts_pshape = shifts.get_partial_shape(); + const auto axes_pshape = axes.get_partial_shape(); + on_flattened = !axes_pshape.compatible(shifts_pshape); + } + if (on_flattened) { const auto const_minus_1 = v0::Constant::create(element::i32, Shape{1}, {-1}); const auto axis_0 = v0::Constant::create(element::i32, Shape{1}, {0}); const auto flat = std::make_shared(data, const_minus_1, false); diff --git a/src/frontends/pytorch/src/op/rsub.cpp b/src/frontends/pytorch/src/op/rsub.cpp index 30c9c25698229d..d9de114a22502e 100644 --- a/src/frontends/pytorch/src/op/rsub.cpp +++ b/src/frontends/pytorch/src/op/rsub.cpp @@ -15,20 +15,43 @@ namespace op { using namespace ov::op; +namespace { +OutputVector translate_rsub_common(const NodeContext& context, + Output self, + Output other, + const Output& alpha) { + align_eltwise_input_types(context, self, other); + if (alpha.get_node()) { + // reverse aten::sub other - self * alpha + auto alpha_casted = context.mark_node(std::make_shared(alpha, self)); + self = context.mark_node(std::make_shared(self, alpha_casted)); + } + return {context.mark_node(std::make_shared(other, self))}; +} +} // namespace + OutputVector translate_rsub(const NodeContext& context) { num_inputs_check(context, 2, 3); auto self = context.get_input(0); auto other = context.get_input(1); + Output alpha; if (!context.input_is_none(2)) { - auto alpha = context.get_input(2); - align_eltwise_input_types(context, self, other); - // reverse aten::sub other - self * alpha - auto alpha_casted = context.mark_node(std::make_shared(alpha, self)); - auto alpha_mul = context.mark_node(std::make_shared(self, alpha_casted)); - return {context.mark_node(std::make_shared(other, alpha_mul))}; + alpha = context.get_input(2); } - align_eltwise_input_types(context, self, other); - return {context.mark_node(std::make_shared(other, self))}; + return translate_rsub_common(context, self, other, alpha); +}; + +OutputVector translate_rsub_fx(const NodeContext& context) { + num_inputs_check(context, 2, 3); + auto self = context.get_input(0); + auto other = context.get_input(1); + Output alpha; + if (context.has_attribute("alpha")) { + alpha = context.get_input("alpha"); + } else if (!context.input_is_none(2)) { + alpha = context.get_input(2); + } + return translate_rsub_common(context, self, other, alpha); }; } // namespace op diff --git a/src/frontends/pytorch/src/op/scatter.cpp b/src/frontends/pytorch/src/op/scatter.cpp index afbf8c2208d3a2..8e950562e419b9 100644 --- a/src/frontends/pytorch/src/op/scatter.cpp +++ b/src/frontends/pytorch/src/op/scatter.cpp @@ -90,7 +90,7 @@ OutputVector translate_scatter(const NodeContext& context) { auto reduction = v12::ScatterElementsUpdate::Reduction::NONE; auto input_num = context.get_input_size(); - // 5 argument can be reduction represened as string or out represented as Tensor + // 5 argument can be reduction represented as string or out represented as Tensor if (input_num > 4 && !context.input_is_none(4) && context.get_input_type(4).is()) { auto reduce_mode = context.const_input(4); reduction = get_reduction_mode(reduce_mode); diff --git a/src/frontends/pytorch/src/op/select.cpp b/src/frontends/pytorch/src/op/select.cpp index 7cd898fdf223b5..5c22e7b6e20439 100644 --- a/src/frontends/pytorch/src/op/select.cpp +++ b/src/frontends/pytorch/src/op/select.cpp @@ -2,11 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "openvino/op/select.hpp" - #include "openvino/frontend/pytorch/node_context.hpp" #include "openvino/op/gather.hpp" -#include "openvino/op/squeeze.hpp" #include "utils.hpp" namespace ov { diff --git a/src/frontends/pytorch/src/op/select_scatter.cpp b/src/frontends/pytorch/src/op/select_scatter.cpp new file mode 100644 index 00000000000000..d2c71abd6cb5f8 --- /dev/null +++ b/src/frontends/pytorch/src/op/select_scatter.cpp @@ -0,0 +1,30 @@ +// Copyright (C) 2018-2023 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include + +#include "helper_ops/gather_assign.hpp" +#include "openvino/frontend/pytorch/node_context.hpp" +#include "utils.hpp" + +namespace ov { +namespace frontend { +namespace pytorch { +namespace op { + +using namespace ov::op; + +OutputVector translate_select_scatter_fx(const NodeContext& context) { + num_inputs_check(context, 4, 4); + auto data = context.get_input(0); + auto updates = context.get_input(1); + auto dim = context.get_input(2); + auto index = context.get_input(3); + return {context.mark_node(std::make_shared(data, updates, index, dim))}; +}; + +} // namespace op +} // namespace pytorch +} // namespace frontend +} // namespace ov \ No newline at end of file diff --git a/src/frontends/pytorch/src/op/split.cpp b/src/frontends/pytorch/src/op/split.cpp index acc12ca3d33696..e983c3031d0f91 100644 --- a/src/frontends/pytorch/src/op/split.cpp +++ b/src/frontends/pytorch/src/op/split.cpp @@ -54,12 +54,17 @@ OutputVector translate_unbind_int_fx(const NodeContext& context) { } OutputVector translate_split_with_sizes_fx(const NodeContext& context) { - num_inputs_check(context, 3, 3); + num_inputs_check(context, 2, 3); auto data = context.get_input(0); auto split_lengths = context.get_input(1); - auto dim = context.get_input(2); + Output dim; + if (context.input_is_none(2)) { + dim = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + } else { + dim = context.get_input(2); + } - auto split = std::make_shared(data, dim, split_lengths); + auto split = context.mark_node(std::make_shared(data, dim, split_lengths)); return {context.mark_node(make_list_construct(split->outputs()))}; } diff --git a/src/frontends/pytorch/src/op/sum.cpp b/src/frontends/pytorch/src/op/sum.cpp index d3cf73dabecf97..be664cb8104cc4 100644 --- a/src/frontends/pytorch/src/op/sum.cpp +++ b/src/frontends/pytorch/src/op/sum.cpp @@ -69,6 +69,33 @@ OutputVector translate_sum(const NodeContext& context) { return {sum}; }; +OutputVector translate_sum_fx(const NodeContext& context) { + num_inputs_check(context, 1, 3); + bool keep_dims = false; + auto data = context.get_input(0); + auto data_dtype = simplified_type_interpret(context.get_input_type(0)); + if (context.has_attribute("dtype")) { + auto dtype = context.get_attribute("dtype"); + data = context.mark_node(std::make_shared(data, dtype)); + } else if ((data.get_element_type() == element::boolean || data.get_element_type() == element::u8) || + (data_dtype.is() && (data_dtype.as() == element::boolean || + data_dtype.as() == element::u8))) { + // PyTorch sum converts bool and uint8 to i64 for preventing overflow + data = context.mark_node(std::make_shared(data, element::i64)); + } + Output axes; + if (context.input_is_none(1)) { + axes = get_axes_range(context, 0); + } else { + axes = context.get_input(static_cast(1)); + } + if (!context.input_is_none(2)) { + keep_dims = context.const_input(2); + } + + return {context.mark_node(std::make_shared(data, axes, keep_dims))}; +}; + } // namespace op } // namespace pytorch } // namespace frontend diff --git a/src/frontends/pytorch/src/op/var_mean.cpp b/src/frontends/pytorch/src/op/var_mean.cpp index 5d4de546a901e5..c4937a1c6d888f 100644 --- a/src/frontends/pytorch/src/op/var_mean.cpp +++ b/src/frontends/pytorch/src/op/var_mean.cpp @@ -22,77 +22,122 @@ namespace op { using namespace ov::op; +namespace { +OutputVector translate_var_mean_common(const NodeContext& context, + const Output& data, + const Output& axes, + int32_t correction, + bool keepdims) { + auto num_elements = numel(context, data); + std::shared_ptr mean, t_mean; + auto _axes = axes; + if (!_axes.get_node()) { + // aten::var_mean(input, unbiased) + _axes = context.mark_node(get_axes_range(context, 0)); + mean = context.mark_node(std::make_shared(data, _axes, keepdims)); + t_mean = mean; + } else { + mean = context.mark_node(std::make_shared(data, _axes, keepdims)); + t_mean = context.mark_node(std::make_shared(data, _axes, true)); + auto reduced_dims = context.mark_node(std::make_shared(data, element::i32)); + auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); + reduced_dims = context.mark_node(std::make_shared(reduced_dims, _axes, zero)); + num_elements = context.mark_node(std::make_shared(reduced_dims, zero, false)); + } + auto sub_v = context.mark_node(std::make_shared(data, t_mean)); + auto sqr_sub = context.mark_node(std::make_shared(sub_v, sub_v)); + auto var = context.mark_node(std::make_shared(sqr_sub, _axes, keepdims)); + // if unbiased=true Bessel’s correction will be used + // Correct bias in calculating variance, by dividing it over (N - 1) instead on N + if (correction) { + PYTORCH_OP_CONVERSION_CHECK(correction == 1, "Unexpected value of correction."); + num_elements = context.mark_node(std::make_shared(num_elements, data)); + auto one = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); + one = context.mark_node(std::make_shared(one, data)); + auto mul = context.mark_node(std::make_shared(var, num_elements)); + auto n_minus_one = context.mark_node(std::make_shared(num_elements, one)); + var = context.mark_node(std::make_shared(mul, n_minus_one)); + } + return {var, mean}; +}; +} // namespace + OutputVector translate_var_mean(const NodeContext& context) { num_inputs_check(context, 1, 4); auto data = context.get_input(0); bool unbiased = true; - bool keepdims = false; - auto num_elements = numel(context, data); - std::shared_ptr mean, t_mean; + bool keepdim = false; ov::Output axes; if (context.inputs().size() == 2) { // aten::var_mean(input, unbiased) axes = context.mark_node(get_axes_range(context, 0)); unbiased = context.const_input(1); - mean = context.mark_node(std::make_shared(data, axes, keepdims)); - t_mean = mean; } else { // aten::var_mean(input, dim, unbiased:bool=None, keepdim:bool=None) if (!context.input_is_none(2)) { unbiased = context.const_input(2); } if (!context.input_is_none(3)) { - keepdims = context.const_input(3); + keepdim = context.const_input(3); } - if (context.input_is_none(1)) { - axes = context.mark_node(get_axes_range(context, 0)); - mean = context.mark_node(std::make_shared(data, axes, keepdims)); - t_mean = mean; - } else { + if (!context.input_is_none(1)) { axes = context.get_input(1); - mean = context.mark_node(std::make_shared(data, axes, keepdims)); - t_mean = context.mark_node(std::make_shared(data, axes, true)); - auto reduced_dims = context.mark_node(std::make_shared(data, element::i32)); - auto zero = context.mark_node(v0::Constant::create(element::i32, Shape{}, {0})); - reduced_dims = context.mark_node(std::make_shared(reduced_dims, axes, zero)); - num_elements = context.mark_node(std::make_shared(reduced_dims, zero, false)); } } - auto sub_v = context.mark_node(std::make_shared(data, t_mean)); - auto sqr_sub = context.mark_node(std::make_shared(sub_v, sub_v)); - auto var = context.mark_node(std::make_shared(sqr_sub, axes, keepdims)); - // if unbiased=true Bessel’s correction will be used - // Correct bias in calculating variance, by dividing it over (N - 1) instead on N - if (unbiased) { - num_elements = context.mark_node(std::make_shared(num_elements, data)); - auto one = context.mark_node(v0::Constant::create(element::f32, Shape{}, {1})); - one = context.mark_node(std::make_shared(one, data)); - auto mul = context.mark_node(std::make_shared(var, num_elements)); - auto n_minus_one = context.mark_node(std::make_shared(num_elements, one)); - var = context.mark_node(std::make_shared(mul, n_minus_one)); - } - return {var, mean}; + auto res = translate_var_mean_common(context, data, axes, static_cast(unbiased), keepdim); + return res; }; OutputVector translate_var_mean_fx(const NodeContext& context) { - num_inputs_check(context, 2, 2); + num_inputs_check(context, 1, 2); auto data = context.get_input(0); - auto num_elements = numel(context, data); - std::shared_ptr mean; ov::Output axes; - axes = context.get_input(1); - mean = context.mark_node(std::make_shared(data, axes, true)); - - auto sub_v = context.mark_node(std::make_shared(data, mean)); - auto sqr_sub = context.mark_node(std::make_shared(sub_v, sub_v)); - auto var = context.mark_node(std::make_shared(sqr_sub, axes, true)); + if (!context.input_is_none(1)) { + axes = context.get_input(1); + } + int32_t correction = 0; + if (context.has_attribute("correction")) { + auto correction_node = context.get_attribute>("correction"); + auto const_node = as_type_ptr(correction_node.get_node_shared_ptr()); + PYTORCH_OP_CONVERSION_CHECK(const_node, "correction must be const."); + correction = const_node->cast_vector()[0]; + } + bool keepdim = false; + if (context.has_attribute("keepdim")) { + auto keepdim_node = context.get_attribute>("keepdim"); + auto const_node = as_type_ptr(keepdim_node.get_node_shared_ptr()); + PYTORCH_OP_CONVERSION_CHECK(const_node, "keepdim must be const."); + keepdim = const_node->cast_vector()[0]; + } + auto res = translate_var_mean_common(context, data, axes, correction, keepdim); + return {context.mark_node(make_list_construct(res))}; +}; - ov::OutputVector out_vec; +OutputVector translate_var_fx(const NodeContext& context) { + num_inputs_check(context, 1, 2); + auto data = context.get_input(0); + ov::Output axes; - out_vec.push_back(var); - out_vec.push_back(mean); - return {context.mark_node(make_list_construct(out_vec))}; + if (!context.input_is_none(1)) { + axes = context.get_input(1); + } + int32_t correction = 0; + if (context.has_attribute("correction")) { + auto correction_node = context.get_attribute>("correction"); + auto const_node = as_type_ptr(correction_node.get_node_shared_ptr()); + PYTORCH_OP_CONVERSION_CHECK(const_node, "correction must be const."); + correction = const_node->cast_vector()[0]; + } + bool keepdim = false; + if (context.has_attribute("keepdim")) { + auto keepdim_node = context.get_attribute>("keepdim"); + auto const_node = as_type_ptr(keepdim_node.get_node_shared_ptr()); + PYTORCH_OP_CONVERSION_CHECK(const_node, "keepdim must be const."); + keepdim = const_node->cast_vector()[0]; + } + auto res = translate_var_mean_common(context, data, axes, correction, keepdim); + return {res[0]}; }; OutputVector translate_var(const NodeContext& context) { diff --git a/src/frontends/pytorch/src/op_table.cpp b/src/frontends/pytorch/src/op_table.cpp index 23240270b10a70..26280066c90777 100644 --- a/src/frontends/pytorch/src/op_table.cpp +++ b/src/frontends/pytorch/src/op_table.cpp @@ -128,7 +128,6 @@ OP_CONVERTER(translate_lstm); OP_CONVERTER(translate_masked_fill); OP_CONVERTER(translate_masked_scatter); OP_CONVERTER(translate_max); -OP_CONVERTER(translate_max_dim); OP_CONVERTER(translate_maximum); OP_CONVERTER(translate_max_poolnd); OP_CONVERTER(translate_mean); @@ -250,6 +249,7 @@ OP_CONVERTER(translate_batch_norm_legit_no_training_fx); OP_CONVERTER(translate_batch_norm_legit_no_stats_fx); OP_CONVERTER(translate_cat_fx); OP_CONVERTER(translate_constant_pad_nd_fx); +OP_CONVERTER(translate_cumsum_fx); OP_CONVERTER(translate_chunk_fx); OP_CONVERTER(translate_div_fx); OP_CONVERTER(translate_expand_fx); @@ -265,16 +265,21 @@ OP_CONVERTER(translate_log_softmax_fx); OP_CONVERTER(translate_max_dim_fx); OP_CONVERTER(translate_max_poolnd_fx); OP_CONVERTER(translate_mean_fx); +OP_CONVERTER(translate_min_dim_fx); +OP_CONVERTER(translate_rsub_fx); OP_CONVERTER(translate_scalar_tensor_fx); OP_CONVERTER(translate_scaled_dot_product_attention_fx); +OP_CONVERTER(translate_select_scatter_fx); OP_CONVERTER(translate_slice_fx); OP_CONVERTER(translate_slice_scatter_fx); OP_CONVERTER(translate_softmax_fx); OP_CONVERTER(translate_split_with_sizes_fx); OP_CONVERTER(translate_stack_fx); OP_CONVERTER(translate_sub_fx); +OP_CONVERTER(translate_sum_fx); OP_CONVERTER(translate_to_fx); OP_CONVERTER(translate_transpose_fx); +OP_CONVERTER(translate_var_fx); OP_CONVERTER(translate_var_mean_fx); OP_CONVERTER(translate_unbind_int_fx); @@ -301,6 +306,7 @@ const std::map get_supported_ops_ts() { {"aten::_upsample_bilinear2d_aa", op::translate_upsample_bilinear2d_aa}, {"aten::_weight_norm", op::translate_weight_norm}, {"aten::abs", op::translate_1to1_match_1_inputs}, + {"aten::abs_", op::inplace_op>}, {"aten::acos", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten::acos_", op::inplace_op>}, {"aten::acosh", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, @@ -543,6 +549,7 @@ const std::map get_supported_ops_ts() { {"aten::randn_like", op::translate_randn_like}, // aten::real - Supported in limited set of patterns {"aten::reciprocal", op::translate_reciprocal}, + {"aten::reciprocal_", op::inplace_op}, // aten::reflection_pad2d - Supported in limited set of patterns {"aten::relu", op::translate_1to1_match_1_inputs}, {"aten::relu_", op::inplace_op>}, @@ -674,20 +681,30 @@ const std::map get_supported_ops_ts() { const std::map get_supported_ops_fx() { return { - {"aten.abs.default", op::translate_1to1_match_1_inputs}, + {"", op::translate_getitem}, // TODO: Check if there is any other way to handle this {"aten._adaptive_avg_pool1d.default", op::translate_adaptive_avg_pool1d}, {"aten._adaptive_avg_pool2d.default", op::translate_adaptive_avg_pool2d}, {"aten._adaptive_avg_pool3d.default", op::translate_adaptive_avg_pool3d}, - {"aten.adaptive_max_pool1d.default", op::translate_adaptive_max_pool1d_fx}, - {"aten.adaptive_max_pool2d.default", op::translate_adaptive_max_pool2d_fx}, - {"aten.adaptive_max_pool3d.default", op::translate_adaptive_max_pool3d_fx}, + {"aten._convolution.default", op::translate_convolution}, {"aten._fake_quantize_per_tensor_affine_cachemask_tensor_qparams.default", op::translate_fake_quantize_per_tensor_affine_fx}, {"aten._local_scalar_dense.default", op::skip_node}, {"aten._log_softmax.default", op::translate_log_softmax_fx}, + {"aten._native_batch_norm_legit.default", op::translate_batch_norm_legit_fx}, + {"aten._native_batch_norm_legit.no_stats", op::translate_batch_norm_legit_no_stats_fx}, + {"aten._native_batch_norm_legit_functional.default", op::translate_batch_norm_legit_fx}, + {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, + {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, + {"aten._scaled_dot_product_flash_attention_for_cpu.default", op::translate_scaled_dot_product_attention_fx}, {"aten._softmax.default", op::translate_softmax_fx}, {"aten._to_copy.default", op::translate_to_fx}, {"aten._unsafe_view.default", op::translate_reshape}, + {"aten.abs.default", op::translate_1to1_match_1_inputs}, + {"aten.acos.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.acosh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.adaptive_max_pool1d.default", op::translate_adaptive_max_pool1d_fx}, + {"aten.adaptive_max_pool2d.default", op::translate_adaptive_max_pool2d_fx}, + {"aten.adaptive_max_pool3d.default", op::translate_adaptive_max_pool3d_fx}, {"aten.add.Scalar", op::translate_add}, {"aten.add.Tensor", op::translate_add}, {"aten.add_.Tensor", op::translate_add}, @@ -695,119 +712,157 @@ const std::map get_supported_ops_fx() { {"aten.addmm.default", op::translate_addmm_fx}, {"aten.alias.default", op::skip_node}, {"aten.amax.default", op::translate_amax}, + {"aten.amin.default", op::translate_amin}, + {"aten.arange.default", op::translate_arange_fx}, {"aten.arange.start", op::translate_arange_fx}, {"aten.arange.start_step", op::translate_arange_fx}, - {"aten.arange.default", op::translate_arange_fx}, {"aten.argmax.default", op::translate_argmax}, + {"aten.argmin.default", op::translate_argmin}, {"aten.as_strided.default", op::translate_as_strided}, + {"aten.asin.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.asinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.atan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.atanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.avg_pool2d.default", op::translate_avg_poolnd}, {"aten.avg_pool3d.default", op::translate_avg_poolnd}, {"aten.baddbmm.default", op::translate_addmm_fx}, {"aten.bitwise_and.Tensor", op::translate_bitwise_and}, + {"aten.bitwise_not.default", op::translate_bitwise_not}, + {"aten.bitwise_or.Tensor", op::translate_bitwise_or}, + {"aten.bitwise_xor.Tensor", op::translate_bitwise_xor}, {"aten.bmm.default", op::translate_1to1_match_2_inputs_align_types}, {"aten.cat.default", op::translate_cat_fx}, {"aten.ceil.default", op::translate_1to1_match_1_inputs}, {"aten.clamp.default", op::translate_clamp}, - {"aten.clamp_min.default", op::translate_1to1_match_2_inputs}, - {"aten.constant_pad_nd.default", op::translate_constant_pad_nd_fx}, + {"aten.clamp_max.default", op::translate_1to1_match_2_inputs_align_types}, + {"aten.clamp_max.Tensor", op::translate_1to1_match_2_inputs_align_types}, + {"aten.clamp_min.default", op::translate_1to1_match_2_inputs_align_types}, + {"aten.clamp_min.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.clone.default", op::skip_node}, // ignore clone operators that are inserted by PyTorch autograd + {"aten.constant_pad_nd.default", op::translate_constant_pad_nd_fx}, {"aten.convolution.default", op::translate_convolution}, - {"aten._convolution.default", op::translate_convolution}, {"aten.copy.default", op::skip_node}, {"aten.copy_.default", op::translate_copy_}, - {"aten.cos.default", op::translate_1to1_match_1_inputs}, - {"aten.cumsum.default", op::translate_cumsum}, + {"aten.cos.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.cosh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.cumsum.default", op::translate_cumsum_fx}, {"aten.detach.default", op::skip_node}, {"aten.div.Scalar", op::translate_div_fx}, {"aten.div.Tensor", op::translate_div_fx}, {"aten.div.Tensor_mode", op::translate_div_fx}, {"aten.embedding.default", op::translate_embedding}, {"aten.empty.memory_format", op::translate_empty}, - {"aten.erf.default", op::translate_erf}, {"aten.eq.Scalar", op::translate_1to1_match_2_inputs_align_types}, {"aten.eq.Tensor", op::translate_1to1_match_2_inputs_align_types}, - {"aten.exp.default", op::translate_1to1_match_1_inputs}, + {"aten.erf.default", op::translate_erf}, + {"aten.exp.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.expand.default", op::translate_expand_fx}, {"aten.fake_quantize_per_channel_affine_cachemask.default", op::translate_fake_quantize_per_channel_affine_fx}, {"aten.fill.Scalar", op::translate_fill}, + {"aten.fill.Tensor", op::translate_fill}, + {"aten.flip.default", op::translate_flip}, {"aten.floor.default", op::translate_1to1_match_1_inputs}, {"aten.floor_divide.default", op::translate_floor_divide}, + {"aten.fmod.Scalar", op::translate_fmod}, + {"aten.fmod.Tensor", op::translate_fmod}, {"aten.full.default", op::translate_full_fx}, {"aten.full.names", op::translate_full_fx}, {"aten.full_like.default", op::translate_full_like}, {"aten.gather.default", op::translate_gather}, + {"aten.ge.Scalar", op::translate_1to1_match_2_inputs_align_types}, + {"aten.ge.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.gelu.default", op::translate_gelu_fx}, {"aten.glu.default", op::translate_glu}, {"aten.gt.Scalar", op::translate_1to1_match_2_inputs_align_types}, + {"aten.gt.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.hardsigmoid.default", op::translate_1to1_match_1_inputs}, {"aten.hardswish.default", op::translate_1to1_match_1_inputs}, {"aten.hardswish_.default", op::inplace_op>}, {"aten.hardtanh.default", op::translate_hardtanh}, {"aten.hardtanh_.default", op::inplace_op}, {"aten.index.Tensor", op::translate_index_fx}, + {"aten.index_select.default", op::translate_index_select}, + {"aten.le.Scalar", op::translate_1to1_match_2_inputs_align_types}, + {"aten.le.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.leaky_relu.default", op::translate_leaky_relu_fx}, {"aten.leaky_relu_.default", op::inplace_op}, {"aten.lift_fresh_copy.default", op::skip_node}, {"aten.linalg_vector_norm.default", op::translate_linalg_vector_norm}, {"aten.log.default", op::translate_log}, {"aten.log_sigmoid_forward.default", op::translate_log_sigmoid}, + {"aten.log10.default", op::translate_log10}, + {"aten.log1p.default", op::translate_log1p}, + {"aten.log2.default", op::translate_log2}, {"aten.logsumexp.default", op::translate_logsumexp}, {"aten.lt.Scalar", op::translate_1to1_match_2_inputs_align_types}, {"aten.lt.Tensor", op::translate_1to1_match_2_inputs_align_types}, - {"aten.masked_fill_.Scalar", op::inplace_op}, {"aten.masked_fill.Tensor", op::translate_masked_fill}, + {"aten.masked_fill_.Scalar", op::inplace_op}, + {"aten.max.default", op::translate_max}, {"aten.max.dim", op::translate_max_dim_fx}, {"aten.max_pool2d_with_indices.default", op::translate_max_poolnd_fx}, {"aten.max_pool3d_with_indices.default", op::translate_max_poolnd_fx}, + {"aten.maximum.default", op::translate_maximum}, + {"aten.mean.default", op::translate_mean_fx}, {"aten.mean.dim", op::translate_mean_fx}, + {"aten.min.default", op::translate_min}, + {"aten.min.dim", op::translate_min_dim_fx}, + {"aten.minimum.default", op::translate_minimum}, {"aten.mm.default", op::translate_1to1_match_2_inputs}, - {"aten.mul.Tensor", op::translate_1to1_match_2_inputs_align_types}, - {"aten.mul.Scalar", op::translate_1to1_match_2_inputs_align_types}, + {"aten.mul.Scalar", op::translate_mul}, + {"aten.mul.Tensor", op::translate_mul}, {"aten.native_batch_norm.default", op::translate_batch_norm_legit_fx}, - {"aten._native_batch_norm_legit.default", op::translate_batch_norm_legit_fx}, - {"aten._native_batch_norm_legit.no_stats", op::translate_batch_norm_legit_no_stats_fx}, - {"aten._native_batch_norm_legit_no_training.default", op::translate_batch_norm_legit_no_training_fx}, - {"aten._native_batch_norm_legit_functional.default", op::translate_batch_norm_legit_fx}, {"aten.native_dropout.default", op::skip_node}, {"aten.native_group_norm.default", op::translate_group_norm_fx}, {"aten.native_layer_norm.default", op::translate_layer_norm_fx}, {"aten.ne.Scalar", op::translate_1to1_match_2_inputs_align_types}, + {"aten.ne.Tensor", op::translate_1to1_match_2_inputs_align_types}, {"aten.neg.default", op::translate_neg}, {"aten.new_full.default", op::translate_new_full}, {"aten.new_ones.default", op::translate_new_ones}, {"aten.permute.default", op::translate_1to1_match_2_inputs}, + {"aten.pow.Scalar", op::translate_pow}, {"aten.pow.Tensor_Scalar", op::translate_pow}, + {"aten.pow.Tensor_Tensor", op::translate_pow}, + {"aten.reciprocal.default", op::translate_reciprocal}, {"aten.relu.default", op::translate_1to1_match_1_inputs}, {"aten.relu_.default", op::inplace_op>}, {"aten.repeat.default", op::translate_1to1_match_2_inputs}, - {"aten.rsqrt.default", op::translate_rsqrt}, - {"aten.rsub.Scalar", op::translate_rsub}, {"aten.roll.default", op::translate_roll}, - {"aten._scaled_dot_product_flash_attention.default", op::translate_scaled_dot_product_attention_fx}, - {"aten._scaled_dot_product_flash_attention_for_cpu.default", op::translate_scaled_dot_product_attention_fx}, + {"aten.rsqrt.default", op::translate_rsqrt}, + {"aten.rsub.Scalar", op::translate_rsub_fx}, + {"aten.rsub.Tensor", op::translate_rsub_fx}, {"aten.scalar_tensor.default", op::translate_scalar_tensor_fx}, + {"aten.scatter.value", op::translate_scatter}, {"aten.select.int", op::translate_select}, - {"aten.sigmoid.default", op::translate_1to1_match_1_inputs}, + {"aten.select_scatter.default", op::translate_select_scatter_fx}, + {"aten.sigmoid.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.sign.default", op::translate_sign}, {"aten.silu.default", op::translate_1to1_match_1_inputs}, {"aten.silu_.default", op::inplace_op>}, - {"aten.sin.default", op::translate_1to1_match_1_inputs}, + {"aten.sin.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.sinh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.slice.Tensor", op::translate_slice_fx}, {"aten.slice_scatter.default", op::translate_slice_scatter_fx}, {"aten.split.Tensor", op::translate_chunk_fx}, {"aten.split_with_sizes.default", op::translate_split_with_sizes_fx}, + {"aten.sqrt.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.squeeze.dim", op::translate_squeeze}, {"aten.squeeze.dims", op::translate_squeeze}, {"aten.stack.default", op::translate_stack_fx}, {"aten.sub.default", op::translate_sub_fx}, {"aten.sub.Tensor", op::translate_sub_fx}, - {"aten.sum.dim_IntList", op::translate_sum}, + {"aten.sum.default", op::translate_sum_fx}, + {"aten.sum.dim_IntList", op::translate_sum_fx}, {"aten.t.default", op::translate_t}, - {"aten.tanh.default", op::translate_1to1_match_1_inputs}, - {"aten.unfold.default", op::translate_unfold}, + {"aten.tan.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, + {"aten.tanh.default", op::translate_1to1_match_1_inputs_with_fp32_type_alignment}, {"aten.transpose.int", op::translate_transpose}, {"aten.unbind.int", op::translate_unbind_int_fx}, + {"aten.unfold.default", op::translate_unfold}, {"aten.unsqueeze.default", op::translate_1to1_match_2_inputs}, {"aten.upsample_nearest2d.default", op::translate_upsample_nearest2d}, + {"aten.var.correction", op::translate_var_fx}, {"aten.var_mean.correction", op::translate_var_mean_fx}, {"aten.view.default", op::translate_reshape}, {"aten.where.self", op::translate_where}, @@ -827,8 +882,6 @@ const std::map get_supported_ops_fx() { {"torchvision::deform_conv2d", op::translate_deform_conv}, {"torchvision::nms", op::translate_nms}, {"torchvision::roi_align", op::translate_roi_align}, - {"", op::translate_getitem}, // TODO: Check if there is any other way to handle this - }; }; diff --git a/src/frontends/pytorch/src/translate_session.cpp b/src/frontends/pytorch/src/translate_session.cpp index 5aff8bd1f05755..43b1ec92952af4 100644 --- a/src/frontends/pytorch/src/translate_session.cpp +++ b/src/frontends/pytorch/src/translate_session.cpp @@ -323,10 +323,24 @@ void TranslateSession::encode_tensor_name(Output output, } } +namespace { +bool is_number(const std::string& s) { + std::string::const_iterator it = s.begin(); + while (it != s.end() && std::isdigit(*it)) + ++it; + return !s.empty() && it == s.end(); +} +} // namespace + size_t TranslateSession::decode_tensor_name(const Output& output) { // any_name should always return numerical value even if there is a word value exist in names - const auto& name = output.get_any_name(); + auto name = output.get_any_name(); + auto pos = name.find("_"); + if (pos != std::string::npos) { + name = name.substr(0, pos); + } // numbers after "_" will be ignored by stoll function + FRONT_END_GENERAL_CHECK(is_number(name), "Tensor name is not a number: ", name); return static_cast(std::stoll(name)); } diff --git a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp index d17cc5083e756d..6e4c11c20266e7 100644 --- a/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp +++ b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/extension/conversion.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2018-2023 Intel Corporation +// Copyright (C) 2018-2024 Intel Corporation // SPDX-License-Identifier: Apache-2.0 // @@ -19,7 +19,7 @@ class TENSORFLOW_API ConversionExtension : public ConversionExtensionBase { ConversionExtension() = delete; - ConversionExtension(const std::string& op_type, const ov::frontend::CreatorFunction& converter) + ConversionExtension(const std::string& op_type, const ov::frontend::tensorflow::CreatorFunctionIndexed& converter) : ConversionExtensionBase(op_type), m_converter(converter) {} diff --git a/src/frontends/tensorflow/src/hash_table.hpp b/src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp similarity index 100% rename from src/frontends/tensorflow/src/hash_table.hpp rename to src/frontends/tensorflow/include/openvino/frontend/tensorflow/hash_table.hpp diff --git a/src/frontends/tensorflow/src/op/hash_table.cpp b/src/frontends/tensorflow/src/op/hash_table.cpp index 6d5d023ad627e0..762c88d7add01b 100644 --- a/src/frontends/tensorflow/src/op/hash_table.cpp +++ b/src/frontends/tensorflow/src/op/hash_table.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "hash_table.hpp" +#include "openvino/frontend/tensorflow/hash_table.hpp" #include "common_op_table.hpp" #include "openvino/frontend/tensorflow/node_context.hpp" diff --git a/src/frontends/tensorflow/src/op/lookup_table_find.cpp b/src/frontends/tensorflow/src/op/lookup_table_find.cpp index b9c9647469062e..5ab9c8a4ae11c1 100644 --- a/src/frontends/tensorflow/src/op/lookup_table_find.cpp +++ b/src/frontends/tensorflow/src/op/lookup_table_find.cpp @@ -3,7 +3,7 @@ // #include "common_op_table.hpp" -#include "hash_table.hpp" +#include "openvino/frontend/tensorflow/hash_table.hpp" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/variable.hpp" #include "openvino/op/concat.hpp" diff --git a/src/frontends/tensorflow/src/op/lookup_table_import.cpp b/src/frontends/tensorflow/src/op/lookup_table_import.cpp index 421fe81be18f5f..3b6c09a3a8a1eb 100644 --- a/src/frontends/tensorflow/src/op/lookup_table_import.cpp +++ b/src/frontends/tensorflow/src/op/lookup_table_import.cpp @@ -3,7 +3,7 @@ // #include "common_op_table.hpp" -#include "hash_table.hpp" +#include "openvino/frontend/tensorflow/hash_table.hpp" #include "openvino/frontend/tensorflow/node_context.hpp" #include "openvino/frontend/tensorflow/variable.hpp" diff --git a/src/inference/src/dev/make_tensor.cpp b/src/inference/src/dev/make_tensor.cpp index e34497749ad98e..f28c90ccf4856a 100644 --- a/src/inference/src/dev/make_tensor.cpp +++ b/src/inference/src/dev/make_tensor.cpp @@ -5,6 +5,7 @@ #include "openvino/runtime/make_tensor.hpp" #include +#include #include "openvino/runtime/iremote_tensor.hpp" #include "openvino/runtime/properties.hpp" diff --git a/src/plugins/intel_cpu/src/plugin.cpp b/src/plugins/intel_cpu/src/plugin.cpp index ad48ac4b9f4e98..baafe373f75913 100644 --- a/src/plugins/intel_cpu/src/plugin.cpp +++ b/src/plugins/intel_cpu/src/plugin.cpp @@ -18,16 +18,16 @@ #include "weights_cache.hpp" #if defined(__linux__) -# include -# include -# include +# include +# include +# include #endif #include "cpu/x64/cpu_isa_traits.hpp" #if defined(OV_CPU_WITH_ACL) -#include "nodes/executors/acl/acl_ie_scheduler.hpp" -#include "arm_compute/runtime/CPP/CPPScheduler.h" +# include "arm_compute/runtime/CPP/CPPScheduler.h" +# include "nodes/executors/acl/acl_ie_scheduler.hpp" #endif using namespace ov::threading; @@ -46,31 +46,31 @@ static std::string getDeviceFullName() { // TODO: extract actual device name brand_string = "ARM CPU"; #elif defined(OPENVINO_ARCH_X86) || defined(OPENVINO_ARCH_X86_64) - const unsigned int addr_list[3] = { 0x80000002, 0x80000003, 0x80000004 }; + const unsigned int addr_list[3] = {0x80000002, 0x80000003, 0x80000004}; unsigned int regs[4]; for (auto addr : addr_list) { regs[0] = addr; -#ifdef _WIN32 +# ifdef _WIN32 __cpuid(reinterpret_cast(regs), regs[0]); -#else +# else __cpuid(regs[0], regs[0], regs[1], regs[2], regs[3]); -#endif +# endif char* ch = reinterpret_cast(®s[0]); for (size_t j = 0; j < sizeof(regs); j++) if (ch[j] != '\0') brand_string += ch[j]; } #else -# error "Unkown CPU architecture. Please, add support to openvino/core/visibility.hpp" +# error "Unkown CPU architecture. Please, add support to openvino/core/visibility.hpp" #endif return brand_string; } #if defined(__linux__) -#ifndef AT_MINSIGSTKSZ -# define AT_MINSIGSTKSZ 51 -#endif +# ifndef AT_MINSIGSTKSZ +# define AT_MINSIGSTKSZ 51 +# endif class SigAltStackSetup { stack_t new_stack{0}; @@ -83,8 +83,7 @@ class SigAltStackSetup { auto minsigstksz = getauxval(AT_MINSIGSTKSZ); auto new_size = minsigstksz + SIGSTKSZ; - void * altstack = mmap(NULL, new_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); + void* altstack = mmap(NULL, new_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0); if (altstack == MAP_FAILED) { return; } @@ -121,27 +120,27 @@ class CPUSpecialSetup { public: CPUSpecialSetup() = default; }; -#else // __linux__ +#else // __linux__ class CPUSpecialSetup { public: CPUSpecialSetup() = default; }; -#endif // __linux__ +#endif // __linux__ #if defined(OV_CPU_WITH_ACL) -std::mutex Engine::SchedulerGuard::mutex; -std::weak_ptr Engine::SchedulerGuard::ptr; +std::mutex Plugin::SchedulerGuard::mutex; +std::weak_ptr Plugin::SchedulerGuard::ptr; -Engine::SchedulerGuard::SchedulerGuard() { -#if OV_THREAD == OV_THREAD_SEQ +Plugin::SchedulerGuard::SchedulerGuard() { +# if OV_THREAD == OV_THREAD_SEQ // To save state for ACL cores in single-thread mode arm_compute::Scheduler::set(arm_compute::Scheduler::Type::ST); -#else +# else arm_compute::Scheduler::set(std::make_shared()); -#endif +# endif } -std::shared_ptr Engine::SchedulerGuard::instance() { +std::shared_ptr Plugin::SchedulerGuard::instance() { std::lock_guard lock{SchedulerGuard::mutex}; auto scheduler_guard_ptr = SchedulerGuard::ptr.lock(); if (scheduler_guard_ptr == nullptr) { @@ -150,7 +149,7 @@ std::shared_ptr Engine::SchedulerGuard::instance() { return scheduler_guard_ptr; } -Engine::SchedulerGuard::~SchedulerGuard() { +Plugin::SchedulerGuard::~SchedulerGuard() { // To save the state of scheduler after ACLScheduler has been executed // TODO: find out the cause of the state std::lock_guard lock{this->dest_mutex}; @@ -159,9 +158,7 @@ Engine::SchedulerGuard::~SchedulerGuard() { } #endif -Engine::Engine() : - deviceFullName(getDeviceFullName()), - specialSetup(new CPUSpecialSetup) { +Plugin::Plugin() : deviceFullName(getDeviceFullName()), specialSetup(new CPUSpecialSetup) { set_device_name("CPU"); // Initialize Xbyak::util::Cpu object on Pcore for hybrid cores machine get_executor_manager()->execute_task_by_streams_executor(IStreamsExecutor::Config::PreferredCoreType::BIG, [] { @@ -174,7 +171,7 @@ Engine::Engine() : m_compiled_model_runtime_properties["OV_VERSION"] = std::string(ov_version.buildNumber); } -Engine::~Engine() { +Plugin::~Plugin() { executor_manager()->clear("CPU"); executor_manager()->clear("CPUStreamsExecutor"); executor_manager()->clear("CPUCallbackExecutor"); @@ -184,7 +181,7 @@ static bool streamsSet(const ov::AnyMap& config) { return config.count(ov::num_streams.name()); } -void Engine::get_performance_streams(Config& config, const std::shared_ptr& model) const{ +void Plugin::get_performance_streams(Config& config, const std::shared_ptr& model) const { const int latency_streams = get_default_latency_streams(config.latencyThreadingMode); int streams_set = config.streams; int streams; @@ -203,7 +200,7 @@ void Engine::get_performance_streams(Config& config, const std::shared_ptr& model, bool imported) const { +void Plugin::calculate_streams(Config& conf, const std::shared_ptr& model, bool imported) const { const auto model_prefer_name = std::string("MODEL_PREFER_THREADS"); if (imported && model->has_rt_info("intel_cpu_hints_config")) { // load model_prefer_threads from cache @@ -231,7 +228,7 @@ void Engine::calculate_streams(Config& conf, const std::shared_ptr& m static bool shouldEnableLPT(const ov::AnyMap& modelConfig, const Config& engineConfig) { const auto& enableLPT = modelConfig.find(ov::intel_cpu::lp_transforms_mode.name()); - if (enableLPT == modelConfig.end()) // model config has higher priority + if (enableLPT == modelConfig.end()) // model config has higher priority return engineConfig.lpTransformsMode == Config::LPTransformsMode::On; try { @@ -253,8 +250,9 @@ static ov::element::Type getInferencePrecision(const ov::AnyMap& modelConfig, static Config::ModelType getModelType(const std::shared_ptr& model) { return op::util::has_op_with_type(model) || - op::util::has_op_with_type(model) ? - Config::ModelType::CNN : Config::ModelType::Unknown; + op::util::has_op_with_type(model) + ? Config::ModelType::CNN + : Config::ModelType::Unknown; } static Config::SnippetsMode getSnippetsMode(const ov::AnyMap& modelConfig, const Config& engineConfig) { @@ -273,13 +271,13 @@ static Config::SnippetsMode getSnippetsMode(const ov::AnyMap& modelConfig, const OPENVINO_THROW("Wrong value for property key SNIPPETS_MODE. Expected values: ENABLE/DISABLE/IGNORE_CALLBACK"); } -std::shared_ptr -Engine::compile_model(const std::shared_ptr& model, const ov::AnyMap& orig_config) const{ - OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Engine::compile_model"); +std::shared_ptr Plugin::compile_model(const std::shared_ptr& model, + const ov::AnyMap& orig_config) const { + OV_ITT_SCOPED_TASK(itt::domains::intel_cpu, "Plugin::compile_model"); CREATE_DEBUG_TIMER(debugLoadTimer); // verification of supported input - for (const auto &ii : model->inputs()) { + for (const auto& ii : model->inputs()) { auto input_precision = ii.get_element_type(); static const std::set supported_precisions = {ov::element::Type_t::u8, ov::element::Type_t::i8, @@ -361,14 +359,15 @@ Engine::compile_model(const std::shared_ptr& model, const ov::A return std::make_shared(cloned_model, shared_from_this(), conf, false); } -void Engine::set_property(const ov::AnyMap &config) { - // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and "ifSetExplicitly" flag +void Plugin::set_property(const ov::AnyMap& config) { + // @todo after Legacy configuration is dropped, use some wrapper class to keep both the property and + // "ifSetExplicitly" flag streamsExplicitlySetForEngine = streamsSet(config); engConfig.readProperties(config); } -ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) const { +ov::Any Plugin::get_property(const std::string& name, const ov::AnyMap& options) const { if (name == ov::optimal_number_of_infer_requests) { const auto streams = engConfig.streamExecutorConfig.get_streams(); return decltype(ov::optimal_number_of_infer_requests)::value_type( @@ -442,14 +441,15 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options) } else if (name == ov::internal::exclusive_async_requests.name()) { return engConfig.exclusiveAsyncRequests; } else if (name == ov::hint::dynamic_quantization_group_size) { - return decltype(ov::hint::dynamic_quantization_group_size)::value_type(engConfig.fcDynamicQuantizationGroupSize); + return decltype(ov::hint::dynamic_quantization_group_size)::value_type( + engConfig.fcDynamicQuantizationGroupSize); } else if (name == ov::hint::kv_cache_precision) { return decltype(ov::hint::kv_cache_precision)::value_type(engConfig.kvCachePrecision); } return get_ro_property(name, options); } -ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& options) const { +ov::Any Plugin::get_ro_property(const std::string& name, const ov::AnyMap& options) const { auto RO_property = [](const std::string& propertyName) { return ov::PropertyName(propertyName, ov::PropertyMutability::RO); }; @@ -458,34 +458,36 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio }; if (name == ov::supported_properties) { - std::vector roProperties {RO_property(ov::supported_properties.name()), - RO_property(ov::available_devices.name()), - RO_property(ov::range_for_async_infer_requests.name()), - RO_property(ov::range_for_streams.name()), - RO_property(ov::execution_devices.name()), - RO_property(ov::device::full_name.name()), - RO_property(ov::device::capabilities.name()), - RO_property(ov::device::type.name()), - RO_property(ov::device::architecture.name()), + std::vector roProperties{ + RO_property(ov::supported_properties.name()), + RO_property(ov::available_devices.name()), + RO_property(ov::range_for_async_infer_requests.name()), + RO_property(ov::range_for_streams.name()), + RO_property(ov::execution_devices.name()), + RO_property(ov::device::full_name.name()), + RO_property(ov::device::capabilities.name()), + RO_property(ov::device::type.name()), + RO_property(ov::device::architecture.name()), }; // the whole config is RW before model is loaded. - std::vector rwProperties {RW_property(ov::num_streams.name()), - RW_property(ov::affinity.name()), - RW_property(ov::inference_num_threads.name()), - RW_property(ov::enable_profiling.name()), - RW_property(ov::hint::inference_precision.name()), - RW_property(ov::hint::performance_mode.name()), - RW_property(ov::hint::execution_mode.name()), - RW_property(ov::hint::num_requests.name()), - RW_property(ov::hint::enable_cpu_pinning.name()), - RW_property(ov::hint::scheduling_core_type.name()), - RW_property(ov::hint::enable_hyper_threading.name()), - RW_property(ov::device::id.name()), - RW_property(ov::intel_cpu::denormals_optimization.name()), - RW_property(ov::log::level.name()), - RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()), - RW_property(ov::hint::dynamic_quantization_group_size.name()), - RW_property(ov::hint::kv_cache_precision.name()), + std::vector rwProperties{ + RW_property(ov::num_streams.name()), + RW_property(ov::affinity.name()), + RW_property(ov::inference_num_threads.name()), + RW_property(ov::enable_profiling.name()), + RW_property(ov::hint::inference_precision.name()), + RW_property(ov::hint::performance_mode.name()), + RW_property(ov::hint::execution_mode.name()), + RW_property(ov::hint::num_requests.name()), + RW_property(ov::hint::enable_cpu_pinning.name()), + RW_property(ov::hint::scheduling_core_type.name()), + RW_property(ov::hint::enable_hyper_threading.name()), + RW_property(ov::device::id.name()), + RW_property(ov::intel_cpu::denormals_optimization.name()), + RW_property(ov::log::level.name()), + RW_property(ov::intel_cpu::sparse_weights_decompression_rate.name()), + RW_property(ov::hint::dynamic_quantization_group_size.name()), + RW_property(ov::hint::kv_cache_precision.name()), }; std::vector supportedProperties; @@ -499,11 +501,12 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio ov::PropertyName{ov::internal::caching_properties.name(), ov::PropertyMutability::RO}, ov::PropertyName{ov::internal::exclusive_async_requests.name(), ov::PropertyMutability::RW}, ov::PropertyName{ov::internal::compiled_model_runtime_properties.name(), ov::PropertyMutability::RO}, - ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), ov::PropertyMutability::RO}}; + ov::PropertyName{ov::internal::compiled_model_runtime_properties_supported.name(), + ov::PropertyMutability::RO}}; } else if (name == ov::device::full_name) { return decltype(ov::device::full_name)::value_type(deviceFullName); } else if (name == ov::available_devices) { - const std::vector availableDevices = { "" }; + const std::vector availableDevices = {""}; return decltype(ov::available_devices)::value_type(availableDevices); } else if (name == ov::device::capabilities) { std::vector capabilities; @@ -524,12 +527,14 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio const std::tuple range = std::make_tuple(1, parallel_get_max_threads()); return decltype(ov::range_for_streams)::value_type(range); } else if (name == ov::internal::caching_properties) { - std::vector cachingProperties = { ov::device::full_name }; + std::vector cachingProperties = {ov::device::full_name}; return decltype(ov::internal::caching_properties)::value_type(std::move(cachingProperties)); } else if (name == ov::intel_cpu::denormals_optimization) { - return decltype(ov::intel_cpu::denormals_optimization)::value_type(engConfig.denormalsOptMode == Config::DenormalsOptMode::DO_On); + return decltype(ov::intel_cpu::denormals_optimization)::value_type(engConfig.denormalsOptMode == + Config::DenormalsOptMode::DO_On); } else if (name == ov::intel_cpu::sparse_weights_decompression_rate) { - return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type(engConfig.fcSparseWeiDecompressionRate); + return decltype(ov::intel_cpu::sparse_weights_decompression_rate)::value_type( + engConfig.fcSparseWeiDecompressionRate); } else if (name == ov::execution_devices) { return decltype(ov::execution_devices)::value_type{get_device_name()}; } else if (name == ov::device::type) { @@ -546,14 +551,14 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio #elif defined(OPENVINO_ARCH_RISCV64) return decltype(ov::device::architecture)::value_type{"riscv"}; #else -#error "Undefined system processor" +# error "Undefined system processor" #endif } OPENVINO_THROW("Cannot get unsupported property: ", name); } -ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { +ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& model, const ov::AnyMap& config) const { WeightsSharing::Ptr fake_w_cache; if (model == nullptr) { @@ -570,17 +575,12 @@ ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& || Config::LPTransformsMode::On == engConfig.lpTransformsMode /* or already enabled */; const Config::SnippetsMode snippetsMode = getSnippetsMode(config, conf); - auto context = - std::make_shared(conf, fake_w_cache, false); + auto context = std::make_shared(conf, fake_w_cache, false); auto supported = ov::get_supported_nodes( model, [&](std::shared_ptr& model) { - Transformations transformation(model, - enableLPT, - conf.inferencePrecision, - snippetsMode, - engConfig); + Transformations transformation(model, enableLPT, conf.inferencePrecision, snippetsMode, engConfig); transformation.UpToLpt(); transformation.PostLpt(); transformation.Snippets(); @@ -604,14 +604,12 @@ ov::SupportedOpsMap Engine::query_model(const std::shared_ptr& return res; } -std::shared_ptr Engine::import_model(std::istream& networkModel, - const ov::AnyMap& config) const{ +std::shared_ptr Plugin::import_model(std::istream& networkModel, const ov::AnyMap& config) const { OV_ITT_SCOPE(FIRST_INFERENCE, itt::domains::intel_cpu_LT, "import_model"); - ModelDeserializer deserializer(networkModel, - [this](const std::string& model, const ov::Tensor& weights) { - return get_core()->read_model(model, weights, true); - }); + ModelDeserializer deserializer(networkModel, [this](const std::string& model, const ov::Tensor& weights) { + return get_core()->read_model(model, weights, true); + }); std::shared_ptr model; deserializer >> model; @@ -634,8 +632,8 @@ std::shared_ptr Engine::import_model(std::istream& networkMo auto compiled_model = std::make_shared(model, shared_from_this(), conf, loaded_from_cache); return compiled_model; } -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov using namespace ov::intel_cpu; @@ -646,7 +644,7 @@ static const ov::Version version = {CI_BUILD_NUMBER, "openvino_intel_cpu_plugin" #elif defined(OPENVINO_ARCH_RISCV64) static const ov::Version version = {CI_BUILD_NUMBER, "openvino_riscv_cpu_plugin"}; #else -#error "Undefined system processor" +# error "Undefined system processor" #endif -OV_DEFINE_PLUGIN_CREATE_FUNCTION(Engine, version) +OV_DEFINE_PLUGIN_CREATE_FUNCTION(Plugin, version) diff --git a/src/plugins/intel_cpu/src/plugin.h b/src/plugins/intel_cpu/src/plugin.h index 59e2bd6a197020..39cc4bed1a4e78 100644 --- a/src/plugins/intel_cpu/src/plugin.h +++ b/src/plugins/intel_cpu/src/plugin.h @@ -10,10 +10,10 @@ namespace ov { namespace intel_cpu { -class Engine : public ov::IPlugin { +class Plugin : public ov::IPlugin { public: - Engine(); - ~Engine(); + Plugin(); + ~Plugin(); std::shared_ptr compile_model(const std::shared_ptr& model, const ov::AnyMap& properties) const override; @@ -73,5 +73,5 @@ class Engine : public ov::IPlugin { #endif }; -} // namespace intel_cpu -} // namespace ov +} // namespace intel_cpu +} // namespace ov diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 21ff7b9045fd3d..8d5e432d7b87de 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -200,6 +200,62 @@ std::vector disabledTestPatterns() { R"(smoke_LPT/ConvolutionTransformation.CompareWithRefImpl/f32_\[.*,3,16,16\]_CPU_f32_rank=4D_fq_on_data=\{level=256_shape=\[1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ .*18.7 \}_output_high\{ 18.8 \}_precision=\}_fq_on_weights=\{_255_\[6,1,1,1\]_\{ .*1.52806e.*39, .*0.2, .*0.3, .*0.3, .*0.2, .*0.1 \}_\{ 1.52806e.*39, 0.2, 0.3, 0.3, 0.2, 0.1 \}\})", // Issue: 132494 R"(.*smoke_Inverse.*bf16.*)", + // Issue: CVS-133173 + R"(.*smoke_ScaledAttn_CPU/ScaledAttnLayerCPUTest.CompareWithRefs/netPRC=bf16.*has_scale=0.*)", + R"(.*smoke_LPT_4D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f32_\[1,8,16,16\]_CPU_f32_\[16,16\]_level=256_shape=\[.*\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=__255_\[.*\]_\{ -12.7 \}_\{ 12.7 \}_\{\}.*)", + R"(.*smoke_LPT_4D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f32_\[1,8,16,16\]_CPU_f32_\[16,16\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ -12.7 \}_output_high\{ 12.8 \}_precision=.*)", + R"(.*smoke_LPT_3D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f32_\[1,8,16\]_CPU_f32_\[16\]_.*_input_high=\{ 25.5 \}_.*_precision=__255_\[1,1,1\]_\{ -12.7 \}_\{ 12.7 \}_\{\}.*)", + R"(.*smoke_LPT/ConvolutionQDqTransformation.CompareWithRefImpl/f32_\[(1,3,4,4|4,3,4,4)\]_CPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ -12.8 \}_input_high=\{ 12.7 \}_output_low=\{ 0 \}_output_high=\{ 255 \}_precision=f32__u8___f32__.*_f32_\[\]_1_1_undefined__\{, 15\}_f32_\[\]__255_\[1,1,1,1\]_\{ -128 \}_\{ 127 \}__i8___f32__\{ -128 \}_.*_1_1_i8_.*)", + R"(.*smoke_LPT/ConvolutionQDqTransformation.CompareWithRefImpl/f32_\[(1,3,4,4|4,3,4,4)\]_CPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ -12.8 \}_input_high=\{ 12.7 \}_output_low=\{ 0 \}_output_high=\{ 255 \}_precision=f32__u8___f32_\{\}__\{ 0.1 \}_f32_\[\]_1_1_undefined__\{, 15\}_f32_\[\]__255_\[1,1,1,1\]_\{ -128 \}_\{ 127 \}__i8_.*)", + R"(.*smoke_LPT/MultiplyTransformation.CompareWithRefImpl/f32_\[1,3,16,16\]_CPU_f32_undefined__on_branch1_0_2.55_0_2.55_on_branch2_-1.28_1.27_-1.28_1.27_1.*)", + R"(.*smoke_LPT/MultiplyTransformation.CompareWithRefImpl/f32_\[1,3,16,16\]_CPU_f32_broadcast1_undefined__on_branch1_-1.28_1.27_-1.28_1.27_on_branch2_0_2.55_0_2.55_0.*)", + R"(.*smoke_LPT/MultiplyTransformation.CompareWithRefImpl/f32_\[1,3,16,16\]_CPU_f32_broadcast2_undefined__on_branch1_0_2.55_0_2.55_on_branch2_-1.27_1.28_-1.27_1.28_0.*)", + R"(.*smoke_LPT/ConvolutionTransformation.CompareWithRefImpl/f32_\[(1|4),3,16,16\]_CPU_f32_rank=4D_fq_on_data=\{level=256_shape=\[1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ -18.7 \}_output_high\{ 18.8 \}_precision=\}_fq_on_weights=\{_255_\[1\]_\{ -18.7 \}_\{ 18.7 \}\}.*)", + R"(.*smoke_LPT/ConvolutionTransformation.CompareWithRefImpl/f32_\[(1|4),3,16,16\]_CPU_f32_rank=4D_fq_on_data=\{level=256_shape=\[1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ -18.7 \}_output_high\{ 18.8 \}_precision=\}_fq_on_weights=\{_255_\[6,1,1,1\].*)", + R"(.*smoke_LPT/RecurrentCellTransformation.CompareWithRefImpl/f32_\[1,2,16\]_CPU_f32FQ_X_level=256_.*_FQ_W_level=255.*)", + R"(.*smoke_LPT/SubtractTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_CPU_f32.*)", + R"(.*smoke_LPT/FakeQuantizeTransformation.CompareWithRefImpl/f32_\[1,32,72,48\]_CPU_f32_0_level=65536_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 65.535 \}_output_low=\{ 0 \}_output_high=\{ 65.535 \}_precision=.*)", + R"(.*smoke_LPT/FakeQuantizeTransformation.CompareWithRefImpl/f32_\[1,32,72,48\]_CPU_f32_0_level=65536_shape=\[\]_input_low=\{ -32.768 \}_input_high=\{ 32.767 \}_output_low=\{ -32.768 \}_output_high=\{ 32.767 \}_precision=.*)", + R"(.*smoke_LPT/MoveFakeQuantizeTransformation.CompareWithRefImpl/f32_\[(1|4),1,16,16\]_CPU_f32SPLIT:0_OP:_FQ:level=256_shape=\[\]_input_low=\{ (0|-1.28) \}_input_high=\{ (2.55|1.27) \}_output_low=\{ (0|-1.28) \}_output_high=\{ (2.55|255|1.27) \}_precision=_DQ:.*)", + R"(.*smoke_LPT/MoveFakeQuantizeTransformation.CompareWithRefImpl/f32_\[(1|4),1,16,16\]_CPU_f32SPLIT:0_OP:relu_FQ:level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high=\{ 255 \}_precision=_DQ:__f32_\{\}__\{ 0.01 \}_undefined_\[\]_0_1_undefined.*)", + R"(.*smoke_LPT/MoveFakeQuantizeTransformation.CompareWithRefImpl/f32_\[(1|4),1,16,16\]_CPU_f32SPLIT:0_OP:relu_FQ:level=256_shape=\[1,6,1,1\]_input_low=\{ 0, 0, 0, 0, 0, 0 \}_input_high=\{ 2.55, 1.275, 0.85, 0.6375, 0.51, 0.425 \}_output_low=\{ -128, -128, -128, -128, -128, -128 \}_output_high=\{ 127, 127, 127, 127, 127, 127 \}_precision=_DQ:\{\}.*)", + R"(.*smoke_LPT/MoveFakeQuantizeTransformation.CompareWithRefImpl/f32_\[(1|4),1,16,16\]_CPU_f32SPLIT:(0|1)_OP:_FQ:level=256_shape=\[1,6,1,1\]_input_low=\{ 0, 0, 0, 0, 0, 0 \}_input_high=\{ 2.55, 1.275, 0.85, 0.6375, 0.51, 0.425 \}_output_low=\{ 0, 0, 0, 0, 0, 0 \}_output_high=\{ 255, 127.5, 85, 63.75, 51, 42.5 \}_precision=_DQ:__f32_.*)", + R"(.*smoke_LPT/EliminateFakeQuantizeTransformation.CompareWithRefImpl/CPU_f32_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 127.5 \}_output_low=\{ 0 \}_output_high\{ 127.5 \}_precision=f32_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ (127.5|121.429) \}_output_low=\{ 0 \}_output_high\{ (127.5|121.429) \}_precision=f32.*)", + R"(.*smoke_LPT/MatMulWithOptimizedConstantFq.CompareWithRefImpl/f32_\[1,16\]_\[(10|16),(10|16)\]_CPU_level=256_shape=\[1\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=_level=255_shape=\[1\]_input_low=\{ -12.7 \}_input_high=\{ 12.7 \}_output_low=\{ -12.7 \}_output_high\{ 12.7 \}_precision=.*)", + R"(.*smoke_LPT/FuseDequantizeToFakeQuantizeTransformation.CompareWithRefImpl/CPU_f32_0_undefined_\[\]_f32__\{\}_\{\}__\{ (0.01|0.01, 0.1, 1) \}_.*)", + R"(.*smoke_LPT/GroupConvolutionTransformation.CompareWithRefImpl/f32_\[1,6,24,24\]_CPU_f32_4D_\[1,6,24,24\]_\[1,24,18,18\]_3_-1_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=_wo_reshape__255_\[3,8,1,1,1\]_\{ -127 \}_\{ 127 \}.*)", + R"(.*smoke_LPT/GroupConvolutionTransformation.CompareWithRefImpl/f32_\[1,6,24(,24)*\]_CPU_f32_(3D|4D)_\[1,6,24(,24)*\]_\[1,24,18(,18)*\]_3_-1_level=256_shape=\[1,1,1.*\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=_wo_reshape__255_\[3,8,1,1(,1)*\]_\{ -127, -12.7, -1.27,.*)", + R"(.*smoke_LPT/GroupConvolutionTransformation.CompareWithRefImpl/f32_\[1,6,1,24,24\]_CPU_f32_5D_\[1,6,1,24,24\]_\[1,24,1,18,18\]_3_-1_level=256_shape=\[1,1,1,1,1\]_input_low=\{ -12.8 \}_input_high=\{ 12.7 \}_output_low=\{ -12.8 \}_output_high\{ 12.7 \}_precision=_reshape_on_weights__255_\[1,1,1,1,1\]_\{ -127 \}_\{ 127 \}.*)", + R"(.*smoke_LPT/GroupConvolutionTransformation.CompareWithRefImpl/f32_\[1,24,8,12,12\]_CPU_f32_5D_\[1,24,8,12,12\]_\[1,24,1,1,1\]_3_-1_level=256_shape=\[1,1,1,1,1\]_input_low=\{ -12.8 \}_input_high=\{ 12.7 \}_output_low=\{ -12.8 \}_output_high\{ 12.7 \}_precision=_reshape_on_weights__255_\[1,1,1,1,1\]_\{ -127 \}_\{ 127 \}.*)", + R"(.*smoke_LPT/GroupConvolutionQDqTransformation.CompareWithRefImpl/f32_\[1,6,24,24\]_CPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ -12.8 \}_input_high=\{ 12.7 \}_output_low=\{ 0 \}_output_high=\{ 255 \}_precision=f32__u8___f32_.*_undefinedoutput_original_f32_multiplyAfter=(false|true).*)", + R"(.*smoke_LPT/GroupConvolutionQDqTransformation.CompareWithRefImpl/f32_\[1,6,24,24\]_CPU_f32_level=256_.*_precision=f32__u8___f32_\{\}__\{ 0.1 \}.*_f32_\[6,2,5,5\]__255_\[1,1,1,1\]_\{ -128 \}_\{ 127 \}__i8.*undefinedoutput_original_u8_multiplyAfter=(false|true).*)", + R"(.*smoke_LPT/MatMulWithConstantTransformation.CompareWithRefImpl/\[(2,3,4|1,1,3,4)\]_f32_CPU_.*_shape=\[1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0, 0, 0 \}_output_high=\{ 255, 25.5, 255 \}_precision=_level=256_shape=\[1\]_input_low=\{ -128 \}_.*)", + R"(.*smoke_LPT/ReduceSumTransformation.CompareWithRefImpl/f32_\[1,3,10,10\]_CPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 127 \}_precision=_keepDims__reduce_axis_2_3_.*)", + R"(.*smoke_TestsDFT_1d/DFTLayerTest.Inference.*TS=.*10.4.20.32.2.*Precision=bf16_Axes=\((0|2|3|_2)\).*)", + R"(.*smoke_TestsDFT_1d/DFTLayerTest.Inference.*TS=.*1.120.128.1.2.*Precision=bf16_Axes=\((1|2|_2)\).*)", + R"(.*smoke_TestsDFT_1d/DFTLayerTest.Inference.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\((2|_2)\)_signal_size=\(40\)_Inverse=1.*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*10.4.20.32.2.*_Precision=bf16.*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*2.5.7.8.2.*_Precision=bf16.*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*1.120.128.1.2.*_Precision=bf16.*_signal_size=\(\).*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*1.120.128.1.2.*_Precision=bf16_Axes=\((0.1.2|1.2.3|2.3.1|0.2.3)\)_signal_size=\(7.11.32\)_Inverse=1.*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*1.120.128.1.2.*_Precision=bf16_Axes=\((1.2.3|2.3.1|0.2.3)\)_signal_size=\(7.11.32\)_Inverse=0.*)", + R"(.*smoke_TestsDFT_3d/DFTLayerTest.Inference/.*TS=.*1.120.128.1.2.*_Precision=bf16_Axes=\((_3._1._2|2.3.1)\)_signal_size=\(4.8.16\).*)", + R"(.*smoke_TestsDFT_4d/DFTLayerTest.Inference/.*10.4.20.32.2.*Precision=bf16_Axes=\(0.1.2.3\)_signal_size=\(5.2.5.2\).*)", + R"(.*smoke_TestsDFT_4d/DFTLayerTest.Inference/.*10.4.20.32.2.*Precision=bf16_Axes=\(0.1.2.3\)_signal_size=\(5.2.5.2\).*)", + R"(.*smoke_TestsDFT_4d/DFTLayerTest.Inference/.*2.5.7.8.2.*Precision=bf16.*)", + R"(.*smoke_TestsDFT_4d/DFTLayerTest.Inference/.*1.120.128.1.2.*Precision=bf16.*signal_size=\(\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*10.4.20.32.2.*_Precision=bf16.*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*_Precision=bf16_Axes=\((_1._2|1.3|2.3|2.1)\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\(0.1\)_signal_size=\(\)_Inverse=1.*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\((0.1|2.0)\)_signal_size=\(16.8\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\(2.0\)_signal_size=\(\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\(2.0\)_signal_size=\(5.7\)_Inverse=0.*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*2.5.7.8.2.*Precision=bf16_Axes=\(2.0\)_signal_size=\(4.10\)_Inverse=1.*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*1.120.128.1.2.*_Precision=bf16_.*_signal_size=\(\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*1.120.128.1.2.*Precision=bf16_Axes=\((0.1|_1._2)\)_signal_size=\((4.10|5.7)\)_Inverse=1.*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*1.120.128.1.2.*Precision=bf16_Axes=\(2.1\)_signal_size=\((4.10|5.7)\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*1.120.128.1.2.*Precision=bf16_Axes=\((2.3|2.0|1.3)\)_signal_size=\(16.8\).*)", + R"(.*smoke_TestsDFT_2d/DFTLayerTest.Inference/.*TS.*1.120.128.1.2.*Precision=bf16_Axes=\((2.3|2.0|1.3)\)_signal_size=\(16.8\).*)", #if defined(OPENVINO_ARCH_ARM) // Issue: 126177 R"(.*smoke_CompareWithRefs_4D_Bitwise.*/EltwiseLayerCPUTest.*_eltwise_op_type=Bitwise.*_model_type=i32_.*)" @@ -246,7 +302,6 @@ std::vector disabledTestPatterns() { # if defined(OV_CPU_ARM_ENABLE_FP16) // Issue: 123019 - retVector.emplace_back(R"(smoke_CompareWithRefs_Mvn.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_staticShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); retVector.emplace_back(R"(smoke_dynamicShapes4D.*INFERENCE_PRECISION_HINT=f16.*)"); // Issue: 124309 @@ -288,7 +343,17 @@ std::vector disabledTestPatterns() { retVector.emplace_back(R"(smoke_LPT.*)"); retVector.emplace_back(R"(smoke_Snippets.*)"); #endif - +#if defined(_WIN32) + retVector.emplace_back(R"(.*smoke_QuantizedConvolutionBatchNormTransposeOnWeights/QuantizedConvolutionBatchNorm.CompareWithRefs/conv_type=convolution_quantize_type=fake_quantize_intervals_type=per_(tensor|channel)_transpose_on_weights=true_device=CPU.*)"); + retVector.emplace_back(R"(.*smoke_LPT/ConvolutionTransformation.CompareWithRefImpl/f32_\[(1|4),3,16,16\]_CPU_f32_rank=4D_fq_on_data=\{level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ -12.7 \}_output_high\{ 12.8 \}_precision=\}_fq_on_weights=\{_255_\[1,1,1,1\]_\{ -12.7 \}_\{ 12.7 \}\}.*)"); + retVector.emplace_back(R"(.*smoke_LPT/FuseDequantizeToFakeQuantizeTransformation.CompareWithRefImpl/CPU_f32_0_undefined_\[\]_f32__\{\}_\{\}__\{ 0.01, 0.1, 1 \}_f32_\[1,3\]_1_1_.*)"); + retVector.emplace_back(R"(.*smoke_QuantizedConvolutionBatchNorm/QuantizedConvolutionBatchNorm.CompareWithRefs/conv_type=convolution_quantize_.*)"); + retVector.emplace_back(R"(.*smoke_QuantizedConvolutionBatchNorm/QuantizedConvolutionBatchNorm.CompareWithRefs/conv_type=convolution_backprop_quantize_type=(quantize_dequantize_intervals|compressed_weights_intervals).*)"); + retVector.emplace_back(R"(.*smoke_FQLayerDQBias_4D_static/FQLayerDQBias.smoke_CompareWithRefs/IS=\(\[\]\)_TS=\(\(1.3.64.64\)_\)_layer_type=MatMul.*)"); + retVector.emplace_back(R"(.*smoke_FQLayerDQBias_4D_dynamic/FQLayerDQBias.smoke_CompareWithRefs/IS=\(\[\?.3.\?.\?\]\)_TS=\(\(1.3.64.64\)_\)_layer_type=MatMul.*)"); + retVector.emplace_back(R"(.*smoke_LPT/MatMulTransformation.CompareWithRefImpl/f32_CPU_\[(1|8|1,1,1),4,12,2\]_level=256_shape=\[\]_input_low=\{ (0|-12.8) \}_input_high=\{ (25.5|12.7) \}_output_low=\{ (0|-12.8) \}_output_high\{ (25.5|12.7) \}_.*)"); + retVector.emplace_back(R"(.*smoke_LPT/MatMulTransformation.CompareWithRefImpl/f32_CPU_\[(1|8|1,1,1),4,12,2\]_level=256_shape=\[\]_input_low=\{ (0|-12.8) \}_input_high=\{ (25.5|12.7) \}_output_low=\{ (0|-12.8) \}_output_high\{ (25.5|12.7) \}_.*)"); +#endif if (!ov::with_cpu_x86_avx512_core()) { // on platforms which do not support bfloat16, we are disabling bf16 tests since there are no bf16 primitives, // tests are useless on such platforms diff --git a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp index 96479b7826c750..6597db41eaabfc 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/graph/serialization/layout_serializer.hpp @@ -38,12 +38,84 @@ class Serializer +class Serializer, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const cldnn::format_traits& traits) { + buffer << traits.str; + buffer << traits.batch_num; + buffer << traits.feature_num; + buffer << traits.spatial_num; + buffer << traits.group_num; + buffer << traits._order; + buffer << traits.order; + buffer << traits.internal_order; + buffer << traits.block_sizes.size(); + for (auto& block_size : traits.block_sizes) { + buffer << block_size.first; + buffer << block_size.second; + } + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, cldnn::format_traits& traits) { + buffer >> traits.str; + buffer >> traits.batch_num; + buffer >> traits.feature_num; + buffer >> traits.spatial_num; + buffer >> traits.group_num; + buffer >> traits._order; + buffer >> traits.order; + buffer >> traits.internal_order; + + size_t num_block_size; + buffer >> num_block_size; + size_t blk_size; + int axis_idx; + for (size_t i = 0; i < num_block_size; i++) { + buffer >> blk_size; + buffer >> axis_idx; + traits.block_sizes.push_back(std::make_pair(blk_size, axis_idx)); + } + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void save(BufferType& buffer, const cldnn::format& format) { + cldnn::format::type fmt_type = format; + buffer << make_data(&fmt_type, sizeof(cldnn::format::type)); + if (fmt_type == cldnn::format::custom) + buffer << format.traits(); + } +}; + +template +class Serializer, BufferType>::value>::type> { +public: + static void load(BufferType& buffer, cldnn::format& format) { + cldnn::format::type fmt_type; + buffer >> make_data(&fmt_type, sizeof(cldnn::format::type)); + if (fmt_type == cldnn::format::custom) { + cldnn::format_traits traits; + buffer >> traits; + format = cldnn::format(traits); + } else { + format = cldnn::format(fmt_type); + } + } +}; + template class Serializer, BufferType>::value>::type> { public: static void save(BufferType& buffer, const cldnn::layout& _layout) { buffer << make_data(&_layout.data_type, sizeof(cldnn::data_types)); - buffer << make_data(&_layout.format, sizeof(cldnn::format)); + buffer << _layout.format; buffer << _layout.data_padding; buffer << _layout.get_partial_shape(); } @@ -54,7 +126,7 @@ class Serializer> make_data(&_layout.data_type, sizeof(cldnn::data_types)); - buffer >> make_data(&_layout.format, sizeof(cldnn::format)); + buffer >> _layout.format; buffer >> _layout.data_padding; ov::PartialShape partial_shape; diff --git a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp index 006ae48def7ec1..36da5d01e48759 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/primitives/reorder.hpp @@ -65,6 +65,7 @@ struct WeightsReorderParams { ib >> _transposed; ib >> _grouped; } + virtual ~WeightsReorderParams() = default; protected: layout _in_layout; @@ -267,7 +268,7 @@ struct reorder : public primitive_base { void save(BinaryOutputBuffer& ob) const override { primitive_base::save(ob); - ob << make_data(&output_format, sizeof(format)); + ob << output_format; ob << mean; ob << subtract_per_feature; ob << make_data(&mean_mode, sizeof(reorder_mean_mode)); @@ -283,7 +284,7 @@ struct reorder : public primitive_base { void load(BinaryInputBuffer& ib) override { primitive_base::load(ib); - ib >> make_data(&output_format, sizeof(format)); + ib >> output_format; ib >> mean; ib >> subtract_per_feature; ib >> make_data(&mean_mode, sizeof(reorder_mean_mode)); diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp index 947cdf06553417..9edce100529751 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/format.hpp @@ -13,6 +13,7 @@ #include #include #include +#include "intel_gpu/runtime/optionals.hpp" namespace cldnn { /// @addtogroup cpp_api C++ API @@ -305,29 +306,33 @@ struct format { g_os_y_is_x_osv8_isv4, format_num, ///< number of format types + custom, ///< means that this format is created based on custom format traits and may have no corresponding label any = -1 }; /// @brief Get format traits for particular @p format::type static const format_traits& traits(type fmt); + + /// @brief Get traits for current format + const format_traits& traits() const; /// @brief Returns number of batch dimensions for a @p format. - static size_t batch_num(type fmt) { return traits(fmt).batch_num; } + static size_t batch_num(const format& fmt) { return fmt.traits().batch_num; } /// @brief Returns number of feature dimensions for a @p format. - static size_t feature_num(type fmt) { return traits(fmt).feature_num; } + static size_t feature_num(const format& fmt) { return fmt.traits().feature_num; } /// @brief Returns number of spatial dimensions for a @p format. - static size_t spatial_num(type fmt) { return traits(fmt).spatial_num; } + static size_t spatial_num(const format& fmt) { return fmt.traits().spatial_num; } /// @brief Returns number of group dimensions for a @p format. - static size_t group_num(type fmt) { return traits(fmt).group_num; } + static size_t group_num(const format& fmt) { return fmt.traits().group_num; } /// @brief Returns an order of dimensions for a @ format. - static const std::string& order(type fmt) { return traits(fmt).order; } + static const std::string& order(const format& fmt) { return fmt.traits().order; } /// @brief Returns an internal orders of dimensions for a @p format. - static const std::string& internal_order(type fmt) { return traits(fmt).internal_order; } + static const std::string& internal_order(const format& fmt) { return fmt.traits().internal_order; } /// @brief Returns block sizes for @p format. - static const std::vector>& block_sizes(type fmt) { return traits(fmt).block_sizes; } + static const std::vector>& block_sizes(const format& fmt) { return fmt.traits().block_sizes; } /// @brief Returns number of dimensions contained within a @p format - static size_t dimension(type fmt) { return order(fmt).size(); } + static size_t dimension(const format& fmt) { return order(fmt).size(); } /// @brief Checks if @p format is a winograd format - static bool is_winograd(type fmt) { + static bool is_winograd(const format& fmt) { return (fmt == winograd_2x3_s1_data || fmt == winograd_2x3_s1_weights || fmt == winograd_2x3_s1_fused_weights || @@ -335,7 +340,7 @@ struct format { fmt == image_2d_weights_winograd_6x3_s1_fbxyb || fmt == image_2d_weights_winograd_6x3_s1_xfbyb); } /// @brief Checks if @p format is of image2d type - static bool is_image_2d(type fmt) { + static bool is_image_2d(const format& fmt) { return (fmt == image_2d_weights_c4_fyx_b || fmt == image_2d_weights_c1_b_fyx || fmt == image_2d_weights_winograd_6x3_s1_fbxyb || @@ -344,8 +349,10 @@ struct format { fmt == image_2d_rgba); } /// @brief Checks if @p format is weights format - static bool is_weights_format(type fmt) { - const auto internal_order = traits(fmt).internal_order; + static bool is_weights_format(const format& fmt) { + if (fmt == format::custom) + return true; + const auto internal_order = fmt.traits().internal_order; const auto weights_chars = { "o", "i" }; for (const auto& c : weights_chars) { if (internal_order.find_first_of(c) != std::string::npos) { @@ -355,7 +362,7 @@ struct format { return false; } /// @brief Checks if @p format is simple data format - static bool is_simple_data_format(type fmt) { + static bool is_simple_data_format(const format& fmt) { return (fmt == yxfb || fmt == byxf || fmt == byfx || fmt == bxfy || fmt == bfyx || fmt == fyxb || @@ -365,7 +372,7 @@ struct format { } static format get_default_format(size_t rank, bool is_weights = false, bool is_grouped = false); - static bool is_default_format(type fmt); + static bool is_default_format(const format& fmt); static format adjust_to_rank(format fmt, size_t new_rank); @@ -380,46 +387,46 @@ struct format { bool is_nv12 = false); /// @brief Checks if @p format is of grouped type - static bool is_grouped(type fmt) { return group_num(fmt) != 0; } + static bool is_grouped(const format& fmt) { return group_num(fmt) != 0; } /// @brief Checks if @p format is of image type - static bool is_image(type fmt) { return (is_image_2d(fmt)); } + static bool is_image(const format& fmt) { return (is_image_2d(fmt)); } /// @brief Checks if @p format is blocked format - static bool is_blocked(type fmt) { return !(block_sizes(fmt).empty()); } + static bool is_blocked(const format& fmt) { return !(block_sizes(fmt).empty()); } /// @brief Checks if @p format is blocked format which has single inner block - static bool is_single_blocked(type fmt) { return (block_sizes(fmt).size() == 1); } + static bool is_single_blocked(const format& fmt) { return (block_sizes(fmt).size() == 1); } /// @brief Checks if @p format is blocked format which has multiple inner blocks - static bool is_multi_blocked(type fmt) { return (block_sizes(fmt).size() > 1); } + static bool is_multi_blocked(const format& fmt) { return (block_sizes(fmt).size() > 1); } /// @brief Checks if @p format is nv12 format - static bool is_nv12(type fmt) { return (fmt == nv12); } + static bool is_nv12(const format& fmt) { return (fmt == nv12); } /// @brief Returns number of batch dimensions. - size_t batch_num() const { return traits(value).batch_num; } + size_t batch_num() const { return traits().batch_num; } /// @brief Returns number of feature dimensions. - size_t feature_num() const { return traits(value).feature_num; } + size_t feature_num() const { return traits().feature_num; } /// @brief Returns number of spatial dimensions. - size_t spatial_num() const { return traits(value).spatial_num; } + size_t spatial_num() const { return traits().spatial_num; } /// @brief Returns number of group dimensions. - size_t group_num() const { return traits(value).group_num; } + size_t group_num() const { return traits().group_num; } /// @brief Returns an order of dimensions. - const std::vector& dims_order() const { return traits(value)._order; } + const std::vector& dims_order() const { return traits()._order; } /// @brief Returns an order of dimensions in form of string. - const std::string& order() const { return traits(value).order; } + const std::string& order() const { return traits().order; } /// @brief Returns an internal orders of dimensions form of string. - const std::string& internal_order() const { return traits(value).internal_order; } + const std::string& internal_order() const { return traits().internal_order; } /// @brief Returns block sizes as vector of pairs of dimension and block size for that dimension. - const std::vector>& block_sizes() const { return traits(value).block_sizes; } + const std::vector>& block_sizes() const { return traits().block_sizes; } /// @brief Returns number of dimensions contained within this format - size_t dimension() const { return order(value).size(); } + size_t dimension() const { return traits()._order.size(); } /// @brief Checks if @p format is a winograd format - bool is_winograd() const { return is_winograd(value); } + bool is_winograd() const { return is_winograd(*this); } /// @brief Checks if @p format is of image 2d type - bool is_image_2d() const { return is_image_2d(value); } + bool is_image_2d() const { return is_image_2d(*this); } /// @brief Checks if @p format is of image type - bool is_image() const { return is_image(value); } + bool is_image() const { return is_image(*this); } /// @brief Checks if @p format is blocked format - bool is_blocked() { return is_blocked(value); } + bool is_blocked() { return is_blocked(*this); } /// @brief Checks if @p format is a nv12 format - bool is_nv12() const { return is_nv12(value); } + bool is_nv12() const { return is_nv12(*this); } /// @brief Transforms dimension from internal order to external order size_t internal_to_external(size_t idx) const { @@ -430,8 +437,15 @@ struct format { } type value; + + optional_value custom_traits = {}; + /// @brief Implicit conversion from format::type. - constexpr format(type t) : value(t) {} + format(type t) : value(t) {} + + /// @brief custom format from format_traits. + explicit format(const format_traits& traits) : value(format::custom), custom_traits(traits) {} + /// @brief Implicit conversion to format::type. constexpr operator type() const { return value; } diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp index f3e2af08a33f0a..5fe18422d0c01d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/layout.hpp @@ -428,6 +428,13 @@ struct layout { auto v = pshape[idx].is_dynamic() ? -1 : pshape[idx].get_length(); seed = hash_combine(seed, v); } + + if (format == format::custom) { + for (auto& bs : format.traits().block_sizes) { + seed = hash_combine(seed, bs.first); + seed = hash_combine(seed, bs.second); + } + } return seed; } @@ -440,6 +447,9 @@ inline ::std::ostream& operator<<(::std::ostream& os, const layout& p) { return os << p.to_string(); } +using optional_data_type = optional_value; +using optional_layout = optional_value; + /// @} /// @} } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/optionals.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/optionals.hpp index 0b8723e75cef90..3057abf45cd15d 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/optionals.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/optionals.hpp @@ -4,7 +4,6 @@ #pragma once -#include "layout.hpp" #include "openvino/core/except.hpp" #include @@ -72,7 +71,4 @@ class optional_value { std::unique_ptr storage = nullptr; }; -using optional_data_type = optional_value; -using optional_layout = optional_value; - } // namespace cldnn diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp index 36189a26d85e66..53bd70c8248c5c 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/runtime/tensor.hpp @@ -476,7 +476,7 @@ struct tensor { auto new_order = new_fmt.internal_order(); std::vector old_sizes = sizes(); std::vector new_sizes(old_sizes.size(), default_size); - const auto& new_traits = format::traits(new_fmt); + const auto& new_traits = new_fmt.traits(); static const std::map flatten_mapping = { { 'v', 'u'}, { 'u', 'w'}, diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp index f8d3ed08139817..1f4bfa0ebd8777 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/post_optimize_weights.cpp @@ -52,8 +52,9 @@ void post_optimize_weights::optimize_weights(T& node, program& p) { // in main program and internal program for constant propagation auto set_implementation = [&p, &impl](program_node& weights_reorder_node) { if (!weights_reorder_node.is_constant()) { - auto factory = WeightsReordersFactory::get(impl_types::ocl, shape_types::static_shape); auto reorder_kernel_params = impl->get_weights_reorder_kernel_params(); + auto impl_type = (reorder_kernel_params->get_output_layout(0).format == format::custom) ? impl_types::onednn : impl_types::ocl; + auto factory = WeightsReordersFactory::get(impl_type, shape_types::static_shape); reorder_kernel_params->prog = &p; auto reorder_impl = factory(*reorder_kernel_params); diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp index 64ed3013efe97a..549102bf732620 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/prepare_buffer_fusing.cpp @@ -415,7 +415,7 @@ static bool can_crop_be_optimized_along_batch(const crop_node& node) { const auto& out_padding = crop_layout.data_padding; // Check format's order is 'bxxx' and only batch size is different - if (format::is_simple_data_format(format) && format::traits(format)._order[0] == 0 && + if (format::is_simple_data_format(format) && format.dims_order()[0] == 0 && std::equal(input_shape.begin()+1, input_shape.end(), crop_shape.begin()+1) && !out_padding && !in_padding) { return true; diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp index a7eab1034559f3..0cea8dd96050fb 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/propagate_constants.cpp @@ -5,10 +5,15 @@ #include "pass_manager.h" #include "program_node.h" #include "intel_gpu/runtime/engine.hpp" +#include "intel_gpu/runtime/debug_configuration.hpp" #include "intel_gpu/graph/program.hpp" #include "intel_gpu/graph/network.hpp" #include "data_inst.h" #include "intel_gpu/runtime/itt.hpp" +#ifdef ENABLE_ONEDNN_FOR_GPU +#include "reorder_inst.h" +#include "graph/impls/onednn/utils.hpp" +#endif // ENABLE_ONEDNN_FOR_GPU #include #include #include @@ -151,6 +156,29 @@ void propagate_constants::add_constant(program& prog, program_node& node) { // if a non-tirivial constant has a trivial input, add this input as an input for our network add_deps_to_tpl(prog, node.get_dependencies()); + +#ifdef ENABLE_ONEDNN_FOR_GPU + // Add reorder to transpose when the impl type of reorder is onednn and the weights for deconvolution should be transposed. + bool is_reorder_weights = node.is_type() && node.as().get_primitive()->weights_reorder_params; + if (is_reorder_weights) { + const auto& weights_params = node.as().get_primitive()->weights_reorder_params; + auto onednn_weights_params = std::dynamic_pointer_cast(weights_params); + if (onednn_weights_params != nullptr && onednn_weights_params->should_be_transposed()) { + auto& prev = node.get_dependency(0); + cldnn::primitive_id rotate_reorder_id = prev.id() + "_rotate_reorder"; + auto grouped = weights_params->get_grouped(); + auto layout = weights_params->get_input_layout().convert_to_weights_layout(grouped); + auto rotate_weights_params = std::make_shared(layout, layout, true, grouped); + auto rotate_prim = std::make_shared(rotate_reorder_id, prev.id(), rotate_weights_params); + auto& rotate_node = prog.get_or_create(rotate_prim); + prog.add_intermediate(rotate_node, node, 0); + prog.get_or_create(rotate_prim).recalc_output_layouts(false); + nodes.insert(prog.get_node_ptr(rotate_node.id())); + GPU_DEBUG_LOG << "Added " << rotate_reorder_id << " for transposing weights before " + << node.id() << std::endl; + } + } +#endif // ENABLE_ONEDNN_FOR_GPU } void propagate_constants::add_deps_to_tpl(program& prog, const std::vector>& deps) { diff --git a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp index 20b229ad9c6bc9..7cba6f2dcdfa57 100644 --- a/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp +++ b/src/plugins/intel_gpu/src/graph/graph_optimizer/reorder_inputs.cpp @@ -562,7 +562,8 @@ void insert_reorders_in_dir(program& p, const std::mapid() << " --> " << next->id() << " ## " << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl; - if (in_layout.format == format::any || out_layout.format == format::any) + if (in_layout.format == format::any || out_layout.format == format::any || + in_layout.format == format::custom || out_layout.format == format::custom) continue; auto reorder_pair = rf.get_reorder(predecessor->id(), @@ -612,7 +613,8 @@ void insert_reorders_in_dir(program& p, const std::map

id() << " --> " << next.first->id() << " ## " << fmt_to_str(in_layout.format) << " --> " << fmt_to_str(out_layout.format) << std::endl; - if (in_layout.format == format::any || out_layout.format == format::any) + if (in_layout.format == format::any || out_layout.format == format::any || + in_layout.format == format::custom || out_layout.format == format::custom) continue; auto reorder_pair = rf.get_reorder(predecessor->id(), diff --git a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp index 51eb74bffac51d..aeef57d944b172 100644 --- a/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/ocl/kernel_selector_helper.cpp @@ -1080,7 +1080,7 @@ kernel_selector::data_tensor convert_data_tensor(const layout& l, const tensor v // legacy get_tensor().sizes() impl return dims in external order, so we need to transpose dims ov::PartialShape vals_ordered; - auto axis_order = format::traits(l.format)._order; + auto axis_order = l.format.dims_order(); for (size_t i = 0; i < axis_order.size(); i++) { if (axis_order[i] >= vals_original.size()) vals_ordered.push_back(ov::Dimension(1)); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp index aa11884b2445bc..65c416e69d2a85 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/convolution_onednn.cpp @@ -4,6 +4,7 @@ #include "convolution_inst.h" #include "eltwise_inst.h" +#include "intel_gpu/runtime/format.hpp" #include "quantize_inst.h" #include "primitive_onednn_base.h" #include "implementation_map.hpp" @@ -142,14 +143,27 @@ struct convolution_onednn : typed_primitive_onednn_impl { static std::shared_ptr get_weights_reorder(const kernel_impl_params& impl_params, const dnnl::primitive_desc& pd, bool rotate) { auto cldnn_prim = impl_params.typed_desc(); - auto input_weights_layout = impl_params.get_input_layout(1); - auto grouped_weights = format::is_grouped(input_weights_layout.format) || cldnn_prim->grouped_weights_shape; - format out_fmt = onednn::find_format(pd.weights_desc(0), grouped_weights); + auto source_weights_layout = impl_params.get_input_layout(1); + auto grouped_weights = format::is_grouped(source_weights_layout.format) || cldnn_prim->grouped_weights_shape; + auto target_weights_desc = pd.weights_desc(0); - auto output_weights_layout = input_weights_layout; - output_weights_layout.format = out_fmt; + auto shape_consistent = onednn::keep_weights_reorder_shape_consistent(source_weights_layout, target_weights_desc); + OPENVINO_ASSERT(shape_consistent, "[GPU] Input shape and output shape of weight reorder should be same."); - return std::make_shared(input_weights_layout, output_weights_layout, rotate, grouped_weights); + auto source_weights_desc = onednn::layout_to_memory_desc(source_weights_layout); + + const bool weights_format = true; + auto traits = convert_memory_desc_to_traits(target_weights_desc, weights_format, grouped_weights); + + auto target_weights_layout = source_weights_layout; + target_weights_layout.format = format(traits); + + return std::make_shared(source_weights_layout, + target_weights_layout, + source_weights_desc, + target_weights_desc, + rotate, + grouped_weights); } public: diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp index 1702738598b104..51a96bc860907b 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/deconvolution_onednn.cpp @@ -4,6 +4,7 @@ #include "deconvolution_inst.h" #include "eltwise_inst.h" +#include "impls/onednn/utils.hpp" #include "quantize_inst.h" #include "primitive_onednn_base.h" #include "implementation_map.hpp" @@ -56,14 +57,27 @@ struct deconvolution_onednn : typed_primitive_onednn_impl { static std::shared_ptr get_weights_reorder(const kernel_impl_params& impl_params, const dnnl::primitive_desc& pd) { auto cldnn_prim = impl_params.typed_desc(); - auto input_weights_layout = impl_params.get_input_layout(1); - auto grouped_weights = format::is_grouped(input_weights_layout.format) || cldnn_prim->grouped_weights_shape; - format out_fmt = onednn::find_format(pd.weights_desc(0), grouped_weights); + auto source_weights_layout = impl_params.get_input_layout(1); + auto grouped_weights = format::is_grouped(source_weights_layout.format) || cldnn_prim->grouped_weights_shape; + auto target_weights_desc = pd.weights_desc(0); - auto output_weights_layout = input_weights_layout; - output_weights_layout.format = out_fmt; + auto shape_consistent = onednn::keep_weights_reorder_shape_consistent(source_weights_layout, target_weights_desc); + OPENVINO_ASSERT(shape_consistent, "[GPU] Input shape and output shape of weight reorder should be same."); - return std::make_shared(input_weights_layout, output_weights_layout, false, grouped_weights); + auto source_weights_desc = onednn::layout_to_memory_desc(source_weights_layout); + + const bool weights_format = true; + auto traits = convert_memory_desc_to_traits(target_weights_desc, weights_format, cldnn_prim->grouped_weights_shape); + + auto target_weights_layout = source_weights_layout; + target_weights_layout.format = format(traits); + + return std::make_shared(source_weights_layout, + target_weights_layout, + source_weights_desc, + target_weights_desc, + false, + grouped_weights); } public: diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp index 8ee6979dec598f..995016be48611e 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/fully_connected_onednn.cpp @@ -55,25 +55,40 @@ struct fully_connected_onednn : typed_primitive_onednn_impl { static std::shared_ptr get_weights_reorder(const kernel_impl_params& impl_params, const dnnl::primitive_desc& pd) { auto input_layout = impl_params.get_input_layout(0); - auto weights_layout = impl_params.get_input_layout(1); + auto source_weights_layout = impl_params.get_input_layout(1); auto cldnn_prim = impl_params.typed_desc(); auto input_pshape = input_layout.get_partial_shape(); - auto weights_pshape = weights_layout.get_partial_shape(); + auto weights_pshape = source_weights_layout.get_partial_shape(); + int64_t feature = input_pshape[std::min(cldnn_prim->input_size, static_cast(4)) - 1].get_length(); if (cldnn_prim->input_size == 3) { feature = std::max({input_layout.spatial(0), input_layout.spatial(1), input_layout.spatial(2)}); } + auto target_weights_layout = source_weights_layout; if (weights_pshape.size() != 2) { - weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature)); + target_weights_layout.set_partial_shape(reshape_to_2d(weights_pshape, feature)); } - format out_fmt = onednn::find_format(pd.weights_desc(0)); + auto target_weights_desc = pd.weights_desc(0); + + auto shape_consistent = onednn::keep_weights_reorder_shape_consistent(source_weights_layout, target_weights_desc); + OPENVINO_ASSERT(shape_consistent, "[GPU] Input shape and output shape of weight reorder should be same."); + + auto source_weights_desc = onednn::layout_to_memory_desc(source_weights_layout); + + const bool weights_format = true; + const bool grouped = false; + + auto traits = convert_memory_desc_to_traits(target_weights_desc, weights_format, grouped); - auto output_weights_layout = weights_layout; - output_weights_layout.format = out_fmt; + target_weights_layout.format = format(traits); - return std::make_shared(weights_layout, output_weights_layout, false); + return std::make_shared(source_weights_layout, + target_weights_layout, + source_weights_desc, + target_weights_desc, + false); } static std::shared_ptr get_fully_connected_primitive_descriptor(const kernel_impl_params& impl_params, diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h index 87f129b8b98495..b374686feac5db 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/primitive_onednn_base.h @@ -482,6 +482,33 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { return args; } + virtual std::unordered_map get_arguments(typed_primitive_inst& instance, kernel_arguments_data& mem_args) const { + std::unordered_map args; + auto& engine = instance.get_network().get_engine(); + auto dnnl_engine = engine.get_onednn_engine(); + + OPENVINO_ASSERT(mem_args.inputs.size() == 1); + OPENVINO_ASSERT(mem_args.outputs.size() == 1); + OPENVINO_ASSERT(_scratchpad_md.get_size() == 0); + OPENVINO_ASSERT(instance.get_fused_primitives_onednn().empty()); + + { + auto input = mem_args.inputs[0]; + layout l = input->get_layout(); + auto offset = onednn::get_offset(std::move(l), _pd.dnnl::primitive_desc_base::src_desc(0)); + args.insert({DNNL_ARG_SRC, input->get_onednn_memory(_pd.dnnl::primitive_desc_base::src_desc(0), offset)}); + } + + { + auto output = mem_args.outputs[0]; + layout l = output->get_layout(); + auto offset = onednn::get_offset(std::move(l), _pd.dnnl::primitive_desc_base::dst_desc(0)); + args.insert({DNNL_ARG_DST, output->get_onednn_memory(_pd.dnnl::primitive_desc_base::dst_desc(0), offset)}); + } + + return args; + } + void init_kernels(const kernels_cache&, const kernel_impl_params&) override { } void set_arguments_impl(typed_primitive_inst& instance) override { @@ -491,6 +518,14 @@ struct typed_primitive_onednn_impl : public typed_primitive_impl { _args[net_id] = get_arguments(instance); } + void set_arguments_impl(typed_primitive_inst& instance, kernel_arguments_data& args) override { + if (instance.can_be_optimized()) { + return; + } + + _args[instance.get_network().get_id()] = get_arguments(instance, args); + } + event::ptr execute_impl(const std::vector& /* events */, typed_primitive_inst& instance) override { auto& network = instance.get_network(); diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp index 97fdb612e89272..01f7aaee48c688 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/reorder_onednn.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // +#include "impls/onednn/utils.hpp" #include "reorder_inst.h" #include "primitive_onednn_base.h" #include "implementation_map.hpp" @@ -57,6 +58,11 @@ struct reorder_onednn : typed_primitive_onednn_impl( engine.get_onednn_engine(), input_md, @@ -103,14 +109,41 @@ struct reorder_onednn : typed_primitive_onednn_impl create(const reorder_node& arg, const kernel_impl_params& impl_params) { - auto& engine = impl_params.prog->get_engine(); - auto& config = impl_params.prog->get_config(); - auto attr = arg.get_onednn_primitive_attributes(); - auto prim_desc = get_reorder_primitive_descriptor(impl_params, *attr); + bool is_reorder_weights = format::is_weights_format(impl_params.get_input_layout().format) || + format::is_weights_format(impl_params.get_output_layout().format); + if (is_reorder_weights) { + return create_reorder_weights(impl_params); + } else { + auto& engine = impl_params.prog->get_engine(); + auto& config = impl_params.prog->get_config(); + auto attr = arg.get_onednn_primitive_attributes(); + auto prim_desc = get_reorder_primitive_descriptor(impl_params, *attr); + return cldnn::make_unique(engine, config, attr, *prim_desc); + } + } + + static std::unique_ptr create_reorder_weights(const kernel_impl_params& impl_param) { + auto& engine = impl_param.prog->get_engine(); + const auto& prim = impl_param.typed_desc(); + const auto& weights_params = prim->weights_reorder_params; + + auto onednn_weights_params = std::dynamic_pointer_cast(weights_params); - std::shared_ptr dummy = nullptr; + OPENVINO_ASSERT(impl_param.get_input_layout().bytes_count() == weights_params->get_input_layout().bytes_count(), + "[GPU] Input layout doesn't match required reorder weights layout"); + + auto input_md = onednn_weights_params ? onednn_weights_params->_in_desc : onednn::layout_to_memory_desc(weights_params->get_input_layout()); + auto output_md = onednn_weights_params ? onednn_weights_params->_out_desc : onednn::layout_to_memory_desc(weights_params->get_output_layout()); + + auto attr = std::make_shared(); + auto reorder_prim = std::make_shared( + engine.get_onednn_engine(), + input_md, + engine.get_onednn_engine(), + output_md, + *attr); - return cldnn::make_unique(engine, config, attr, *prim_desc); + return cldnn::make_unique(engine, impl_param.prog->get_config(), attr, *reorder_prim); } }; @@ -118,6 +151,7 @@ namespace detail { attach_reorder_onednn::attach_reorder_onednn() { implementation_map::add(impl_types::onednn, reorder_onednn::create, {}); + WeightsReordersFactory::add(cldnn::impl_types::onednn, shape_types::static_shape, reorder_onednn::create_reorder_weights); } } // namespace detail diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp index d72aba88cb1177..5b2de65473a7aa 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.cpp @@ -106,13 +106,46 @@ dnnl::memory::data_type convert_data_type(cldnn::data_types dt) { } std::vector> format_map = { - { cldnn::format::bfyx, dnnl::memory::format_tag::nchw }, + /// weights format for onednnn + { cldnn::format::oiyx, dnnl::memory::format_tag::oihw }, + { cldnn::format::ioyx, dnnl::memory::format_tag::iohw }, + { cldnn::format::yxio, dnnl::memory::format_tag::hwio }, + { cldnn::format::oizyx, dnnl::memory::format_tag::oidhw }, + { cldnn::format::iozyx, dnnl::memory::format_tag::iodhw }, + { cldnn::format::iyxo, dnnl::memory::format_tag::ihwo }, + { cldnn::format::oyxi, dnnl::memory::format_tag::ohwi }, + { cldnn::format::oyix, dnnl::memory::format_tag::acbd }, + { cldnn::format::oxiy, dnnl::memory::format_tag::adbc }, + { cldnn::format::goiyx, dnnl::memory::format_tag::goihw }, + { cldnn::format::gioyx, dnnl::memory::format_tag::giohw }, + { cldnn::format::gyxio, dnnl::memory::format_tag::ghwio }, + { cldnn::format::giozyx, dnnl::memory::format_tag::giodhw }, + { cldnn::format::goizyx, dnnl::memory::format_tag::goidhw }, + + { cldnn::format::os_iyx_osv16, dnnl::memory::format_tag::Oihw16o }, + { cldnn::format::gs_oiyx_gsv8, dnnl::memory::format_tag::Goihw8g }, + { cldnn::format::gs_oiyx_gsv16, dnnl::memory::format_tag::Goihw16g }, + { cldnn::format::gs_oiyx_gsv32, dnnl::memory::format_tag::Goihw32g }, + { cldnn::format::gs_oizyx_gsv16, dnnl::memory::format_tag::Goidhw16g }, + { cldnn::format::gs_oizyx_gsv32, dnnl::memory::format_tag::Goidhw32g }, + { cldnn::format::g_os_iyx_osv16, dnnl::memory::format_tag::gOihw16o }, + + { cldnn::format::os_is_yx_osv16_isv16, dnnl::memory::format_tag::OIhw16o16i }, + { cldnn::format::os_is_yx_isv16_osv16, dnnl::memory::format_tag::OIhw16i16o }, + { cldnn::format::os_is_zyx_isv16_osv16, dnnl::memory::format_tag::OIdhw16i16o }, + { cldnn::format::is_os_zyx_isv16_osv16, dnnl::memory::format_tag::IOdhw16i16o }, + + { cldnn::format::g_os_is_zyx_isv16_osv16, dnnl::memory::format_tag::gIOdhw16i16o }, + + { cldnn::format::bfyx, dnnl::memory::format_tag::nchw }, + { cldnn::format::byxf, dnnl::memory::format_tag::nhwc }, + { cldnn::format::byfx, dnnl::memory::format_tag::acbd }, + { cldnn::format::bxfy, dnnl::memory::format_tag::adbc }, + { cldnn::format::fyxb, dnnl::memory::format_tag::bcda }, + { cldnn::format::fbyx, dnnl::memory::format_tag::bacd }, { cldnn::format::bfzyx, dnnl::memory::format_tag::ncdhw }, - { cldnn::format::byxf, dnnl::memory::format_tag::nhwc }, - { cldnn::format::byfx, dnnl::memory::format_tag::acbd }, - { cldnn::format::bxfy, dnnl::memory::format_tag::adbc }, - { cldnn::format::fyxb, dnnl::memory::format_tag::bcda }, { cldnn::format::bzyxf, dnnl::memory::format_tag::ndhwc }, + { cldnn::format::bfwzyx, dnnl::memory::format_tag::abcdef }, { cldnn::format::b_fs_yx_fsv2, dnnl::memory::format_tag::undef }, { cldnn::format::b_fs_yx_fsv4, dnnl::memory::format_tag::aBcd4b }, { cldnn::format::b_fs_yx_fsv8, dnnl::memory::format_tag::aBcd8b }, @@ -428,5 +461,107 @@ bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val) { template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); + +static std::string get_external_order(const std::vector& order, bool is_weights, bool is_grouped) { + cldnn::format default_fmt = format::get_default_format(order.size(), is_weights, is_grouped); + const auto& default_order = default_fmt.order(); + + std::string external_order(order.size(), '?'); + + for (size_t i = 0; i < order.size(); i++) { + external_order[i] = default_order[order[i]]; + } + + return external_order; +} + +cldnn::format_traits convert_memory_desc_to_traits(const dnnl::memory::desc& desc, bool is_weights, bool is_grouped) { + OPENVINO_ASSERT(desc.get_format_kind() == dnnl::memory::format_kind::blocked, "[GPU] Only blocked memory desc type is supported"); + auto ndims = desc.get_ndims(); + auto inner_nblks = desc.get_inner_nblks(); + auto inner_blks = desc.get_inner_blks(); + auto inner_idxs = desc.get_inner_idxs(); + auto strides = desc.get_strides(); + + std::vector> stride_order; + for (size_t i = 0; i < strides.size(); i++) { + stride_order.emplace_back(strides[i], i); + } + + // sort by strides in descending order + std::sort(stride_order.begin(), stride_order.end(), [](const std::pair& first, const std::pair& second) { + return first.first > second.first; + }); + + std::vector order; + for (const auto& p : stride_order) { + order.push_back(p.second); + } + + std::vector> block_sizes(inner_nblks); + for (int i = 0; i < inner_nblks; i++) { + block_sizes[i] = std::make_pair(inner_idxs[i], inner_blks[i]); + } + + // all fmts has at least batch and feature dim for now + const int batch_num = 1; + const int feature_num = 1; + const int group_num = is_grouped ? 1 : 0; + const int spatial_size = std::max(ndims - batch_num - feature_num - group_num, 0); + + std::string internal_order = is_weights ? + (is_grouped ? "oixyz???g" : "oixyz") : + "bfxyzwuv"; + + const size_t max_spatial = 2 + (is_weights ? 3 : 6); + const size_t last_spatial_offset = 2 + spatial_size; + for (size_t i = last_spatial_offset; i < max_spatial; i++) { + internal_order[i] = '?'; + } + std::string outer_order = get_external_order(order, is_weights, is_grouped); + + format_traits traits; + traits.batch_num = batch_num; + traits.feature_num = feature_num; + traits.spatial_num = spatial_size; + traits.group_num = group_num; + traits._order = order; + traits.order = outer_order; + traits.internal_order = internal_order; + traits.block_sizes = block_sizes; + traits.str = "custom"; + + return traits; +} + +bool keep_weights_reorder_shape_consistent(cldnn::layout& layout, const dnnl::memory::desc& desc) { + if (layout.is_dynamic()) + return false; + + auto shape = layout.get_shape(); + auto dims = desc.get_dims(); + std::vector target_dims; + std::vector filtered_target_dims; + std::transform(shape.begin(), shape.end(), std::back_inserter(target_dims), + [](size_t v) { return static_cast(v); }); + std::copy_if(target_dims.begin(), target_dims.end(), std::back_inserter(filtered_target_dims), + [](ov::Dimension::value_type i) { return i != 1; }); + + std::vector desc_dims; + std::vector filtered_desc_dims; + std::transform(dims.cbegin(), dims.cend(), std::back_inserter(desc_dims), + [](dnnl::memory::dim v) { return static_cast(v); }); + std::copy_if(desc_dims.begin(), desc_dims.end(), std::back_inserter(filtered_desc_dims), + [](ov::Dimension::value_type i) { return i != 1; }); + + // Check whether they have same values and orders. + if (filtered_target_dims == filtered_desc_dims) { + layout.set_partial_shape(desc_dims); + return true; + } else { + return false; + } +} + } // namespace onednn } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp index e8f099b633011c..0d767bc1619e80 100644 --- a/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp +++ b/src/plugins/intel_gpu/src/graph/impls/onednn/utils.hpp @@ -10,6 +10,8 @@ #include #include #include +#include "intel_gpu/primitives/reorder.hpp" +#include "intel_gpu/runtime/format.hpp" #include namespace cldnn { @@ -36,12 +38,27 @@ std::vector> get_candidate_orders(dnnl::memory::desc desc); cldnn::format find_format(dnnl::memory::desc desc, bool is_grouped = false); cldnn::format find_data_format(dnnl::memory::desc desc); dnnl::memory::format_tag get_format_by_desc(dnnl::memory::desc desc); - +cldnn::format_traits convert_memory_desc_to_traits(const dnnl::memory::desc& desc, bool is_weights = false, bool is_grouped = false); int64_t get_offset(cldnn::layout&& l, dnnl::memory::desc&& desc); +bool keep_weights_reorder_shape_consistent(cldnn::layout& layout, const dnnl::memory::desc& desc); // Check if data node is per-tensor template bool is_per_tensor(cldnn::data_node& node, int32_t& zp_val); +struct WeightsReorderParamsOneDNN : public cldnn::WeightsReorderParams { + WeightsReorderParamsOneDNN(const layout& in_layout, + const layout& out_layout, + const dnnl::memory::desc& in_desc, + const dnnl::memory::desc& out_desc, + bool transposed, bool grouped = false) + : WeightsReorderParams(in_layout, out_layout, transposed, grouped) + , _in_desc(in_desc) + , _out_desc(out_desc) {} + + dnnl::memory::desc _in_desc; + dnnl::memory::desc _out_desc; +}; + } // namespace onednn } // namespace cldnn diff --git a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h index 3b2e3c9223babf..d64314a5609f41 100644 --- a/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h +++ b/src/plugins/intel_gpu/src/graph/include/layout_optimizer.h @@ -60,8 +60,14 @@ class reorder_factory { bool needs_split_reorder; friend bool operator==(cache_key const& lhs, cache_key const& rhs) { - return lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout && - lhs.needs_split_reorder == rhs.needs_split_reorder; + bool ret = lhs.data_source == rhs.data_source && lhs.expected_layout == rhs.expected_layout && + lhs.needs_split_reorder == rhs.needs_split_reorder; + + if (ret && lhs.expected_layout.format == cldnn::format::custom) { + ret &= (lhs.expected_layout.format.traits().block_sizes == + rhs.expected_layout.format.traits().block_sizes); + } + return ret; } friend bool operator!=(cache_key const& lhs, cache_key const& rhs) { return !(lhs == rhs); } @@ -71,6 +77,8 @@ class reorder_factory { return (lhs.data_source < rhs.data_source); else if (lhs.expected_layout != rhs.expected_layout) return (lhs.expected_layout < rhs.expected_layout); + else if (lhs.expected_layout.format == cldnn::format::custom) + return lhs.expected_layout.format.traits().block_sizes < rhs.expected_layout.format.traits().block_sizes; return lhs.needs_split_reorder < rhs.needs_split_reorder; } }; diff --git a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h index 20b8dced0b9369..57a74215c4ad66 100644 --- a/src/plugins/intel_gpu/src/graph/include/reshape_inst.h +++ b/src/plugins/intel_gpu/src/graph/include/reshape_inst.h @@ -41,7 +41,7 @@ struct typed_program_node : public typed_program_node_base { } // Expected a padded input of only batch axis with 'bxxx' format - if (format::traits(input_layout.format)._order[0] != 0 || + if (input_layout.format.dims_order()[0] != 0 || input_pad.lower_size().feature[0] != 0) return false; diff --git a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp index 52a5affdded581..12a0a54c66648d 100644 --- a/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp +++ b/src/plugins/intel_gpu/src/graph/layout_optimizer.cpp @@ -1377,7 +1377,7 @@ bool layout_optimizer::are_layouts_suitable_for_onednn(program_node& node) { bool no_batch_padding = true; auto out_fmt = node.get_output_layout().format; if (format::is_multi_blocked(input_layout.format) || format::is_multi_blocked(out_fmt) || - format::traits(input_layout.format)._order[0] != 0 || format::traits(out_fmt)._order[0] != 0) { + input_layout.format.dims_order()[0] != 0 || out_fmt.dims_order()[0] != 0) { for (size_t i = 0; i < in_padding.lower_size().batch.size(); ++i) { no_batch_padding &= (in_padding.lower_size().batch[i] == 0); } @@ -1567,6 +1567,10 @@ impl_types layout_optimizer::get_preferred_impl_type(program_node& node, format auto input_fmt = input_layout.format; auto output_fmt = output_layout.format; + if (output_fmt == format::custom) { + return impl_types::onednn; + } + preferred_impl = impl_types::onednn; if (std::find(onednn_optimized_fmt.begin(), onednn_optimized_fmt.end(), input_fmt) == onednn_optimized_fmt.end() || diff --git a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp index c4975da37d8145..d5f93c70070e46 100644 --- a/src/plugins/intel_gpu/src/graph/primitive_inst.cpp +++ b/src/plugins/intel_gpu/src/graph/primitive_inst.cpp @@ -1696,12 +1696,15 @@ event::ptr primitive_inst::update_weights() { GPU_DEBUG_TRACE_DETAIL << id() << ": reorder weights from " << original_layout.to_short_string() << " to " << expected_layout.to_short_string() << std::endl; - auto factory = WeightsReordersFactory::get(impl_types::ocl, shape_types::static_shape); + auto impl_type = (reorder_kernel_params->get_output_layout(0).format == format::custom) ? impl_types::onednn : impl_types::ocl; + auto factory = WeightsReordersFactory::get(impl_type, shape_types::static_shape); auto reorder_impl = factory(*reorder_kernel_params); - auto& kernels_cache = get_network().get_program()->get_kernels_cache(); - auto kernels = kernels_cache.compile(*reorder_kernel_params, reorder_impl->get_kernels_source()); - OPENVINO_ASSERT(kernels.size() == 1, "[GPU] Expected number of compiled kernels is 1, but got ", kernels.size()); - reorder_impl->set_kernels(kernels); + if (impl_type == impl_types::ocl) { + auto& kernels_cache = get_network().get_program()->get_kernels_cache(); + auto kernels = kernels_cache.compile(*reorder_kernel_params, reorder_impl->get_kernels_source()); + OPENVINO_ASSERT(kernels.size() == 1, "[GPU] Expected number of compiled kernels is 1, but got ", kernels.size()); + reorder_impl->set_kernels(kernels); + } reorder_inst->set_impl(reorder_impl->clone()); diff --git a/src/plugins/intel_gpu/src/graph/reorder.cpp b/src/plugins/intel_gpu/src/graph/reorder.cpp index acde9789a854ae..2207e986f2f0d5 100644 --- a/src/plugins/intel_gpu/src/graph/reorder.cpp +++ b/src/plugins/intel_gpu/src/graph/reorder.cpp @@ -7,7 +7,9 @@ #include "json_object.h" #include "intel_gpu/primitives/convolution.hpp" #include "intel_gpu/primitives/eltwise.hpp" - +#ifdef ENABLE_ONEDNN_FOR_GPU +#include "graph/impls/onednn/utils.hpp" +#endif // ENABLE_ONEDNN_FOR_GPU #include #include @@ -180,6 +182,12 @@ std::vector reorder_inst::calc_output_layouts(reorder_node const& /*node auto ofmt = desc->output_format == format::any ? ifmt : desc->output_format; if (desc->weights_reorder_params) { +#ifdef ENABLE_ONEDNN_FOR_GPU + auto onednn_weights_params = std::dynamic_pointer_cast(desc->weights_reorder_params); + if (onednn_weights_params && input_layout.format != onednn::find_data_format(onednn_weights_params->_in_desc)) { + onednn_weights_params->_in_desc = onednn::layout_to_memory_desc(input_layout); + } +#endif // ENABLE_ONEDNN_FOR_GPU return { desc->weights_reorder_params->get_output_layout() }; } else { return { layout(input_layout.get(), desc->output_data_types[0].value(), ofmt, desc->output_paddings[0]) }; diff --git a/src/plugins/intel_gpu/src/runtime/format.cpp b/src/plugins/intel_gpu/src/runtime/format.cpp index f7f1eb94b2501c..efb8bca2872eb6 100644 --- a/src/plugins/intel_gpu/src/runtime/format.cpp +++ b/src/plugins/intel_gpu/src/runtime/format.cpp @@ -245,9 +245,20 @@ const format_traits& format::traits(type fmt) { return format_traits_map.at(fmt); } +const format_traits& format::traits() const { + if (value == format::custom) { + OPENVINO_ASSERT(custom_traits.has_value(), "[GPU] Custom format is created w/o traits"); + return *custom_traits; + } + + return format::traits(value); +} + std::string format::to_string() const { if (value == any) { return "any"; + } else if (value == custom) { + return "custom"; } return traits(value).str; } @@ -282,7 +293,7 @@ format format::get_default_format(size_t rank, bool is_weights, bool is_grouped) return default_fmt; } -bool format::is_default_format(type fmt) { +bool format::is_default_format(const format& fmt) { return fmt == get_default_format(dimension(fmt)); } diff --git a/src/plugins/intel_gpu/src/runtime/layout.cpp b/src/plugins/intel_gpu/src/runtime/layout.cpp index 5534e9a1248285..70e760bca41557 100644 --- a/src/plugins/intel_gpu/src/runtime/layout.cpp +++ b/src/plugins/intel_gpu/src/runtime/layout.cpp @@ -171,7 +171,7 @@ std::vector layout::get_ordered_dims() const { } std::vector layout::get_dims_order() const { - return format::traits(format)._order; + return format.dims_order(); } std::string layout::to_string() const { @@ -456,7 +456,7 @@ bool layout::compatible(const layout& other) const { // TODO: Relax restrictions below if (blocks1 != blocks2 || - (!blocks1.empty() && format::traits(l1.format)._order != format::traits(l2.format)._order)) + (!blocks1.empty() && l1.format.dims_order() != l2.format.dims_order())) return false; if (check_format(format::b_fs_yx_fsv2) || @@ -506,7 +506,11 @@ bool layout::compatible(const layout& other) const { bool layout::identical(const layout& other) const { if (is_dynamic() || other.is_dynamic()) return false; - return *this == other; + bool ret = (*this == other); + if (ret && this->format == cldnn::format::custom) { + ret &= (this->format.traits().block_sizes == other.format.traits().block_sizes); + } + return ret; } ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format old_fmt, cldnn::format new_fmt) { @@ -526,7 +530,7 @@ ov::PartialShape layout::transform(const ov::PartialShape& pshape, cldnn::format auto val_order = default_fmt.internal_order(); auto new_order = new_fmt.internal_order(); - const auto& new_traits = format::traits(new_fmt); + const auto& new_traits = new_fmt.traits(); std::vector new_sizes(old_sizes.size(), {default_size}); @@ -583,8 +587,8 @@ static inline bool check_redundant_1d_along_feature(layout const& l1, layout con // No padding, double blocked format and different data_type if (!l1.data_padding && !l2.data_padding && !format::is_multi_blocked(l1.format) && !format::is_multi_blocked(l2.format) && l2.data_type == l1.data_type && l2.count() == l1.count()) { - auto l1_inner_blk = format::is_single_blocked(l1.format) ? format::traits(l1.format).block_sizes.at(0).second : 1; - auto l2_inner_blk = format::is_single_blocked(l2.format) ? format::traits(l2.format).block_sizes.at(0).second : 1; + auto l1_inner_blk = format::is_single_blocked(l1.format) ? l1.format.traits().block_sizes.at(0).second : 1; + auto l2_inner_blk = format::is_single_blocked(l2.format) ? l2.format.traits().block_sizes.at(0).second : 1; auto max_inner_blk = std::max(l1_inner_blk, l2_inner_blk); if (static_cast(l2.feature()) == l1.count() && l2.feature() == l1.feature() && (l2.feature() % max_inner_blk == 0)) { diff --git a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp index 583e252e93c756..6fcbe2713b8fa4 100644 --- a/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp +++ b/src/plugins/intel_gpu/tests/functional/shared_tests_instances/skip_tests_config.cpp @@ -83,6 +83,54 @@ std::vector disabledTestPatterns() { R"(smoke_MemoryTestV3.*)", // Issue: 129991 R"(.*StridedSliceLayerTest.*TS=.*2.2.4.1*.*)", + // Issue: CVS-133173 + R"(.*smoke_GatherCompressedWeights_basic/GatherWeightsDecompression.Inference/data_shape=\[15,32\]_indices_shape=\[\?.\?\]_\[2.3\].*output_precision=f32.*)", R"(.*smoke_CTCLoss_Set2/CTCLossLayerTest.Inference/IS=\(\[\]\)_TS=\{\(3.6.8\)\}_LL=\(6.5.6\)_A=\(4.1.2.3.4.5\)\(5.4.3.0.1.0\)\(2.1.3.1.3.0\)_AL=\(3.3.5\)_BI=7_PCR=1_CMR=1_U=0_PF=f32_PI=i64.*)", + R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/f16_GPU_\[4,3,50,86\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", + R"(.*smoke_LPT/BatchToSpaceTransformation.CompareWithRefImpl/(f32|f16)_GPU_\[4,3,50,86\]_level=256_shape=\[1,3,1,1\]_input_low=\{ 0, 0, 0 \}_input_high=\{ 255, 127.5, 85 \}_output_low=\{ 0, 0, 0 \}_output_high\{ 255, 127.5, 85 \}_precision=.*)", + R"(.*smoke_LPT/ConcatTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=\{\}level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=\{\}.*)", + R"(.*smoke_LPT/ConcatWithChildAndOutputTransformation.CompareWithRefImpl/f16_\[1,6,10,10\]_GPU_f32level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 1.275 \}_precision=.*)", + R"(.*smoke_LPT/ConcatWithDifferentChildrenTransformation.CompareWithRefImpl/f16_\[1,3,10,10\]_GPU_f32_axis_(1|2)_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 1.275 \}_precision=.*)", + R"(.*smoke_LPT/ConcatWithNeighborsGraphTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32.*)", + R"(.*smoke_LPT/ConcatWithIntermediateTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32.*)", + R"(.*smoke_LPT/ConcatWithSplitTransformation.CompareWithRefImpl/f16_\[1,6,10,10\]_GPU_f32level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 1.275 \}_precision=.*)", + R"(.*smoke_LPT_4D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f32_\[1,32,16,16\]_.*_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=__0_\[\]_\{ \}_\{ \}___f32_\{\}__\{ 4 \}_f32_\[\]_1_1_undefined.*)", + R"(.*smoke_LPT_4D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f16_\[1,(8|32),16,16\]_.*_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=__255_\[1,1,1,1\]_\{ 0 \}_\{ 25.4 \}_\{\}.*)", + R"(.*smoke_LPT_4D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/f16_\[1,(8|32),16,16\]_.*_input_low.*0.*input_high=.*255.*_output_low=.*0.*_output_high.*25.5.*_precision=__0_\[\]_\{ \}_\{ \}___f32_\{\}__\{ 4 \}_f32_\[\]_1_1_undefined.*)", + R"(.*smoke_LPT_3D/ConvolutionBackpropDataTransformation.CompareWithRefImpl/(f32|f16)_\[1,32,16,16\]_GPU_f32_\[16\]_level=256_shape=\[1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=__0_\[\]_\{ \}_\{ \}___f32_\{\}__\{ 4 \}_f32_\[\]_1_1_undefined.*)", + R"(.*smoke_LPT/FakeQuantizeAndMaxPoolTransformation.CompareWithRefImpl/f16_\[1,32,72,48\]_GPU_f32.*)", + R"(.*smoke_LPT/FakeQuantizeAndAvgPoolTransformation.CompareWithRefImpl/f16_\[1,32,72,48\]_GPU_f32.*)", + R"(.*smoke_LPT/FuseConvertTransformation.CompareWithRefImpl/f32_\[1,4,16,16\]_GPU_f32_.*)", + R"(.*smoke_LPT/FuseFakeQuantizeAndScaleShiftTransformation.CompareWithRefImpl/f16_GPU_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", + R"(.*smoke_LPT/MVNTransformation.CompareWithRefImpl/f16_\[1,4,16,16\]_GPU_f32_AxisSet.*)", + R"(.*smoke_LPT/NormalizeL2Transformation.CompareWithRefImpl/f16_\[1,4,16,16\]_.*)", + R"(.*smoke_LPT/PadTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32_level=256_shape=\[1,1,1,1\]_.*_(constant|reflect|symmetric|edge)_.*)", + R"(.*smoke_LPT/OutputLayersConcat.CompareWithRefImpl/f32_\[1,3,16,16\]_GPU_f32.*)", + R"(.*smoke_LPT/ReduceMeanTransformation.CompareWithRefImpl/f16_\[1,3,10,10\]_GPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 127 \}_precision=\{\}\{\}_keepDims__reduce_axis_1_.*)", + R"(.*smoke_LPT/ReduceMeanTransformation.CompareWithRefImpl/f16_\[1,3,10,10\]_GPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 127 \}_precision=\{\}\{\}_reduce_axis_1_.*)", + R"(.*smoke_LPT/ReduceSumTransformation.CompareWithRefImpl/(f32|f16)_\[1,3,10,10\]_GPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 255 \}_output_low=\{ 0 \}_output_high\{ 127 \}_precision=_keepDims__reduce_axis_2_3_.*)", + R"(.*smoke_LPT/ReduceSumTransformation.CompareWithRefImpl/f16_\[1,3,10,10\]_GPU_f32_level=256_shape=\[1,1,1,1\]_input_low=\{ 2 \}_input_high=\{ 10 \}_output_low=\{ 2 \}_output_high\{ 10 \}_precision=_reduce_axis_2_3_.*)", + R"(.*smoke_LPT/ReluTransformation.CompareWithRefImpl/f16_GPU_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 25.5 \}_precision=.*)", + R"(.*smoke_LPT/ReluTransformation.CompareWithRefImpl/f16_GPU_level=256_shape=\[\]_input_low=\{ 12.75 \}_input_high=\{ 25.5 \}_output_low=\{ 12.75 \}_output_high\{ 25.5 \}_precision=.*)", + R"(.*smoke_LPT/SpaceToBatchTransformation.CompareWithRefImpl/(f32|f16)_GPU_\[1,3,100,171\]_level=256_shape=\[1,3,1,1\]_input_low=\{ 0, 0, 0 \}_input_high=\{ 255, 127.5, 85 \}_output_low=\{ 0, 0, 0 \}_output_high\{ 255, 127.5, 85 \}_precision=.*)", + R"(.*smoke_LPT/SpaceToBatchTransformation.CompareWithRefImpl/f16_GPU_\[1,3,100,171\]_level=256_shape=\[1,1,1,1\]_input_low=\{ 0 \}_input_high=\{ 2.55 \}_output_low=\{ 0 \}_output_high\{ 2.55 \}_precision=.*)", + R"(.*smoke_LPT/SplitTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 12.75 \}_precision=_axis=2_n_splits=2.*)", + R"(.*smoke_LPT/StridedSliceTransformation.CompareWithRefImpl/f16_\[1,3,24,24\]_GPU_f32_.*_precision=_\{ 0, 0, 0, 0 \}_\{ 1, 0, 1, 1 \}_\{ 1, 2, 1, 1 \}_\{ 1, 0, 1, 1 \}_\{ 1, 1, 1, 1 \}.*)", + R"(.*smoke_LPT/StridedSliceTransformation.CompareWithRefImpl/f16_\[1,3,24,24\]_GPU_f32_.*_precision=_\{ 0, 0, 0, 0 \}_\{ 1, 1, 0, 1 \}_\{ 1, 3, 20, 24 \}_\{ 1, 1, 0, 1 \}_\{ 1, 1, 1, 1 \}.*)", + R"(.*smoke_LPT/StridedSliceTransformation.CompareWithRefImpl/f16_\[1,3,24,24\]_GPU_f32_level=256_shape=\[1,3,1,1\]_.*_precision=_\{ 0, 0 \}_\{ 1, 0 \}_\{ 1, 2 \}_\{ 1, 0 \}_\{ 1, 1 \}.*)", + R"(.*smoke_LPT/SubtractTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32.*)", + R"(.*smoke_LPT/TransposeAfterMatMulTransformation.CompareWithRefImpl/f16.*(T|t)ransposeChannelDim.*)", + R"(.*smoke_LPT/VariadicSplitTransformation.CompareWithRefImpl/f16_\[1,3,16,16\]_GPU_f32_level=256_shape=\[\]_input_low=\{ 0 \}_input_high=\{ 25.5 \}_output_low=\{ 0 \}_output_high\{ 12.75 \}_precision=_axis=2_splitLengths=\{ 9, 7 \}.*)", + R"(.*smoke_ConvolutionBackpropData2D_ExplicitPadding/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=.*1.16.10.10.*_OS=\(\)_K\(1.1\)_S\(1.3\).*)", + R"(.*smoke_ConvolutionBackpropData2D_ExplicitPadding/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=.*1.32.10.10.*_OS=\(\)_K\(1.1\)_S\(1.3\).*)", + R"(.*smoke_ConvolutionBackpropData2D_ExplicitPadding/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=.*1.3.30.30.*_OS=\(\)_K\(1.1\)_S\(1.3\).*O=16.*)", + R"(.*smoke_ConvolutionBackpropData2D_AutoPadValid/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=\{\((1\.32\.10\.10|1\.16\.10\.10)\)\}_OS=\(\)_K\(1.1\)_S\(1.3\)_PB\(0.0\)_PE\(0.0\)_D=\(1.1\)_OP=\(\)_O=(1|5|16)_AP=valid_netPRC=f16.*)", + R"(.*smoke_ConvolutionBackpropData2D_AutoPadValid/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=.*1.3.30.30.*_OS=\(\)_K\(1.1\)_S\(1.3\)_PB\(0.0\)_PE\(0.0\)_D=\(1.1\)_OP=\(\)_O=16_AP=valid_netPRC=f16.*)", + R"(.*smoke_ConvolutionBackpropData2D_ExplicitPadding_OutputPaddingDefined/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=\{\((1.32.10.10|1.16.10.10|1.3.30.30)\)\}_OS=\(\)_K\(1.1\)_S\(3.3\)_PB\(0.0\)_PE\(0.0\)_D=\(1.1\)_OP=\((1.1|2.2)\)_O=(1|5|16)_AP=valid_netPRC=f16.*)", + R"(.*smoke_ConvolutionBackpropData2D_AutoPadding_OutputPaddingDefined/ConvolutionBackpropDataLayerTest.Inference/IS=\(\[\]\)_TS=\{\((1.32.10.10|1.16.10.10|1.3.30.30)\)\}_OS=\(\)_K\(1.1\)_S\(3.3\)_PB\(0.0\)_PE\((0.0|1.1)\)_D=\(1.1\)_OP=\((1.1|2.2)\)_O=(1|5|16).*)", + R"(.*smoke_GridSample/GridSampleLayerTest.Inference/DS=\((5.2.3.5|5.3.4.6)\)_GS=\((5.7.3.2|5.2.8.2)\)_align_corners=(0|1)_Mode=(bilinear|bicubic)_padding_mode=zeros_model_type=f16_grid_type=f32.*)", + R"(.*smoke_MatMul_BothTranspose/MatMulLayerTest.Inference/IS=\(\[\]_\[\]\)_TS=\{\(5\)_\(5\)\}_transpose_a=1_transpose_b=1_secondary_input_type=(CONSTANT|PARAMETER)_modelType=(f16|f32).*)", + R"(.*smoke_dynamic_conv_reshape_fullyconnected/ConvReshapeFullyConnectedDynamicGPUTestDynamic.Inference/IS=\[\?\.64\.1\.\?\.\?\]_\[1\.64\.1\.1\.1\]_model_type=f16.*)", + R"(.*smoke_empty_tensor/EmptyTensorDynamicGPUTest.Inference/IS=\[\?\]_\[30\]_\[40\]_\[50\]_\[10\]_\[7\]_\[\?.\?\]_\[1.0\]_\[1.8\]_\[1.0\]_\[1.3\]_\[1.20\]_NetType=i32.*)", }; } diff --git a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/gather_weights_decompression.cpp b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/gather_weights_decompression.cpp index 77d1ffdf9e98ae..bac9cdc753e17f 100644 --- a/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/gather_weights_decompression.cpp +++ b/src/plugins/intel_gpu/tests/functional/subgraph_tests/dynamic/gather_weights_decompression.cpp @@ -273,6 +273,7 @@ class GatherWeightsDecompression : public testing::WithParamInterface { }; + +TEST_P(custom_layout_test, different_hash) { + auto p = GetParam(); + auto left = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.left)); + auto right = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.right)); + ASSERT_TRUE(left.hash() != right.hash()); +} + +TEST_P(custom_layout_test, same_hash) { + auto p = GetParam(); + auto left = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.left)); + auto right = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.left)); + ASSERT_TRUE(left.hash() == right.hash()); + + left = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.right)); + right = cldnn::layout(p.shape, cldnn::data_types::f16, cldnn::format(p.right)); + ASSERT_TRUE(left.hash() == right.hash()); +} + +INSTANTIATE_TEST_SUITE_P(smoke, custom_layout_test, + testing::ValuesIn(std::vector{ + { + {16, 16, 8, 8}, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 16}, {0, 16}} + }, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 2}, {1, 8}, {0, 8}, {1, 2}} + } + }, + { + {32, 32, 8, 8}, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 4}, {0, 8}, {1, 8}, {0, 4}} + }, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 2}, {1, 8}, {0, 8}, {1, 2}} + } + }, + })); diff --git a/src/plugins/intel_gpu/tests/unit/module_tests/weights_reorder_factory_test.cpp b/src/plugins/intel_gpu/tests/unit/module_tests/weights_reorder_factory_test.cpp index 3f8da1fc6a7c5e..ccd0185e4df727 100644 --- a/src/plugins/intel_gpu/tests/unit/module_tests/weights_reorder_factory_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/module_tests/weights_reorder_factory_test.cpp @@ -24,9 +24,11 @@ TEST(weights_factory, impl_types) { program::init_primitives(); ASSERT_NO_THROW(WeightsReordersFactory::get(impl_types::ocl, shape_types::static_shape)); ASSERT_NO_THROW(WeightsReordersFactory::get(impl_types::any, shape_types::static_shape)); +#ifdef ENABLE_ONEDNN_FOR_GPU + ASSERT_NO_THROW(WeightsReordersFactory::get(impl_types::onednn, shape_types::static_shape)); +#endif // ENABLE_ONEDNN_FOR_GPU ASSERT_ANY_THROW(WeightsReordersFactory::get(impl_types::cpu, shape_types::static_shape)); - ASSERT_ANY_THROW(WeightsReordersFactory::get(impl_types::onednn, shape_types::static_shape)); } TEST(weights_factory, shape_types) { diff --git a/src/plugins/intel_gpu/tests/unit/onednn/utils_test.cpp b/src/plugins/intel_gpu/tests/unit/onednn/utils_test.cpp index a0311467d4bada..fbf3eb432cde8e 100644 --- a/src/plugins/intel_gpu/tests/unit/onednn/utils_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/onednn/utils_test.cpp @@ -3,8 +3,10 @@ // #include +#include #include +#include "intel_gpu/runtime/utils.hpp" #include "test_utils.h" #include "intel_gpu/runtime/format.hpp" @@ -165,3 +167,97 @@ INSTANTIATE_TEST_SUITE_P(smoke, weight_format_test_match_dnnl, {{32, 32, 8, 8}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::BAcd4b8a8b4a, cldnn::format::is_os_yx_isa4_osa8_isv8_osv4}, }), weight_format_test_match_dnnl::PrintToString); + +struct memory_desc_to_fmt_conversion_params { + dnnl::memory::dims dims; + dnnl::memory::data_type data_type; + dnnl::memory::format_tag dnnl_format; + bool grouped; + cldnn::format_traits expected; +}; + +class memory_desc_to_fmt_conversion_test : public testing::TestWithParam { +public: + static std::string PrintToString(testing::TestParamInfo param_info) { + auto dnnl_format = param_info.param.dnnl_format; + std::stringstream s; + ov::Shape shape(param_info.param.dims.begin(), param_info.param.dims.end()); + s << shape; + return std::string(dnnl_fmt_tag2str((dnnl_format_tag_t)dnnl_format)) + "_" + s.str(); + } +}; + +TEST_P(memory_desc_to_fmt_conversion_test, test_match_data_format) { + auto param = GetParam(); + + dnnl::memory::desc test_desc(param.dims, param.data_type, param.dnnl_format); + auto result = onednn::convert_memory_desc_to_traits(test_desc, true, param.grouped); + ASSERT_EQ(result.str, param.expected.str); + ASSERT_EQ(result.batch_num, param.expected.batch_num); + ASSERT_EQ(result.feature_num, param.expected.feature_num); + ASSERT_EQ(result.spatial_num, param.expected.spatial_num); + ASSERT_EQ(result.group_num, param.expected.group_num); + ASSERT_EQ(result._order, param.expected._order); + ASSERT_EQ(result.order, param.expected.order); + ASSERT_EQ(result.internal_order, param.expected.internal_order); + ASSERT_EQ(result.block_sizes, param.expected.block_sizes); +} + +INSTANTIATE_TEST_SUITE_P(smoke, memory_desc_to_fmt_conversion_test, + testing::ValuesIn(std::vector{ + { + {1, 3, 8, 8}, dnnl::memory::data_type::f16, dnnl::memory::format_tag::abcd, false, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {} + } + }, + { + {16, 16, 8, 8}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::ABcd16b16a, false, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 16}, {0, 16}} + } + }, + { + {8, 4, 16, 16, 16}, dnnl::memory::data_type::f16, dnnl::memory::format_tag::ABcde8a4b, false, + format_traits{ + "custom", 1, 1, 3, 0, {0, 1, 2, 3, 4}, "oizyx", "oixyz", {{0, 8}, {1, 4}} + } + }, + { + {16, 16, 8, 8}, dnnl::memory::data_type::f16, dnnl::memory::format_tag::ABcd2a8b8a2b, false, + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{0, 2}, {1, 8}, {0, 8}, {1, 2}} + } + }, + { + {64, 64, 8, 8}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::BAcd4b8a8b4a, false, + format_traits{ + "custom", 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "oixy?", {{1, 4}, {0, 8}, {1, 8}, {0, 4}} + } + }, + { + {32, 32, 8, 8}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::BAcd4b8a8b4a, false, // same format as above but different sizes + format_traits{ + "custom", 1, 1, 2, 0, {0, 1, 2, 3}, "oiyx", "oixy?", {{1, 4}, {0, 8}, {1, 8}, {0, 4}} // Order is different due to A=B=1 (strides are equal) + } + }, + { + {65, 32, 8, 8}, dnnl::memory::data_type::u8, dnnl::memory::format_tag::BAcd4b8a8b4a, false, // same format as above but different sizes + format_traits{ + "custom", 1, 1, 2, 0, {1, 0, 2, 3}, "ioyx", "oixy?", {{1, 4}, {0, 8}, {1, 8}, {0, 4}} + } + }, + { + {10, 20, 30, 40, 50}, dnnl::memory::data_type::f32, dnnl::memory::format_tag::AcdeB24a2b, true, + format_traits{ + "custom", 1, 1, 2, 1, {0, 2, 3, 4, 1}, "giyxo", "oixy????g", {{0, 24}, {1, 2}} + } + }, + { + {10, 60, 70, 40, 50}, dnnl::memory::data_type::f32, dnnl::memory::format_tag::aCBde4c8b8c4b, true, + format_traits{ + "custom", 1, 1, 2, 1, {0, 2, 1, 3, 4}, "gioyx", "oixy????g", {{2, 4}, {1, 8}, {2, 8}, {1, 4}} + } + }, + }), + memory_desc_to_fmt_conversion_test::PrintToString); diff --git a/src/plugins/template/tests/functional/skip_tests_config.cpp b/src/plugins/template/tests/functional/skip_tests_config.cpp index 099b5e9ec1ae50..eac640ebfe40f3 100644 --- a/src/plugins/template/tests/functional/skip_tests_config.cpp +++ b/src/plugins/template/tests/functional/skip_tests_config.cpp @@ -123,8 +123,10 @@ std::vector disabledTestPatterns() { R"(.*ReferenceGroupNormalization.*_f64*)", // Precision not high enough to get exact result for the complex test cases // (both tiny values and very high values necessary) - R"(.*ReferenceInverse.*bf16.*[4,4].*)"}; - + R"(.*ReferenceInverse.*bf16.*[4,4].*)", + R"(.*smoke_CompareWithRefs_static/EltwiseLayerTest.Inference/IS=\(\[\]_\)_TS=.*(4.4.200|1.10.200|10.200|2.200|1.10.100|4.4.16).*_eltwise_op_type=Mod_secondary_input_type=PARAMETER_opType=VECTOR_model_type=f32_InType=undefined_OutType=undefined.*)", + R"(.*smoke_CompareWithRefs_static/EltwiseLayerTest.Inference/IS=.*_TS=\(\(2.17.5.1\)_\(1.17.1.4\)_\)_eltwise_op_type=Mod_secondary_input_type=PARAMETER_opType=VECTOR_model_type=f16_InType=undefined_OutType=undefined_.*)", + R"(.*smoke_CompareWithRefs_static/EltwiseLayerTest.Inference/IS=.*_TS=.*(2.200|10.200|1.10.100|4.4.16|1.2.4|1.4.4|1.4.4.1).*eltwise_op_type=Mod_secondary_input_type=PARAMETER_opType=VECTOR_model_type=f16_InType=undefined_OutType=undefined.*)"}; #ifdef _WIN32 // CVS-63989 retVector.emplace_back(R"(.*ReferenceSigmoidLayerTest.*u64.*)"); diff --git a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp index 832bb6b618f5f7..f4467eb7af390f 100644 --- a/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp +++ b/src/tests/test_utils/common_test_utils/src/ov_tensor_utils.cpp @@ -383,33 +383,14 @@ void compare(const ov::Tensor& expected, if (abs_threshold == std::numeric_limits::max() && rel_threshold == std::numeric_limits::max()) { if (sizeof(ExpectedT) == 1 || sizeof(ActualT) == 1) { abs_threshold = 1.; - rel_threshold = 1.; - if (expected.get_element_type() == ov::element::Type_t::boolean) { - abs_threshold = 0.; - rel_threshold = 0.; - } } else { std::vector abs_values(shape_size_cnt); for (size_t i = 0; i < shape_size_cnt; i++) { abs_values[i] = std::fabs(static_cast(expected_data[i])); } auto abs_median = calculate_median(abs_values); - auto elem_type = expected.get_element_type(); - abs_threshold = abs_median * 0.05 < 1e-5 ? 1e-5 : 0.05 * abs_median; - if (elem_type == ov::element::Type_t::boolean) { - abs_threshold = 0.; - } else if (elem_type.is_integral_number()) { - abs_threshold = 1.0; - } else if (elem_type == ov::element::Type_t::f32 || elem_type == ov::element::Type_t::f64) { - abs_threshold = abs_median * 0.05 < 1e-5 ? 1e-5 : 0.05 * abs_median; - } else if (elem_type == ov::element::Type_t::bf16 || elem_type == ov::element::Type_t::f16) { - abs_threshold = abs_median * 0.05 < 1e-3 ? 1e-3 : 0.05 * abs_median; - } - - rel_threshold = abs_threshold; - if (std::is_integral::value) { abs_threshold = std::ceil(abs_threshold); } @@ -451,15 +432,14 @@ void compare(const ov::Tensor& expected, } double abs = std::fabs(expected_value - actual_value); - double rel = - expected_value && actual_value && !std::isinf(expected_value) ? (abs / std::fabs(expected_value)) : 0; + double rel = expected_value ? (abs / std::fabs(expected_value)) : abs; abs_error.update(abs, i); rel_error.update(rel, i); } abs_error.mean /= shape_size_cnt; rel_error.mean /= shape_size_cnt; - if (!(less_or_equal(abs_error.max, abs_threshold) || less_or_equal(rel_error.mean, rel_threshold))) { + if (!(less_or_equal(abs_error.max, abs_threshold) && less_or_equal(rel_error.max, rel_threshold))) { std::ostringstream out_stream; out_stream << "abs_max < abs_threshold && rel_max < rel_threshold" << "\n\t abs_max: " << abs_error.max << "\n\t\t coordinate " << abs_error.max_coordinate diff --git a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py index 438eeb8a355ac7..deac7afaf6be5c 100644 --- a/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py +++ b/tests/layer_tests/pytorch_tests/pytorch_layer_test_class.py @@ -16,7 +16,10 @@ import torch from packaging import version import openvino.torch +import pytest +def skip_if_export(param, reason="Unsupported on torch.export"): + return pytest.param(param, marks=pytest.mark.skipif(PytorchLayerTest.use_torch_export(), reason=reason)) class PytorchLayerTest: _type_map = { @@ -41,6 +44,21 @@ def _check_kind_exist(graph, kind): return True return False + @staticmethod + def use_torch_compile_backend(): + torch_compile_env = os.getenv("PYTORCH_TRACING_MODE") + if torch_compile_env is not None: + if (torch_compile_env == "TORCHFX" or torch_compile_env == "TORCHSCRIPT"): + return True + return False + + @staticmethod + def use_torch_export(): + torch_compile_env = os.getenv("PYTORCH_TRACING_MODE") + if torch_compile_env is not None: + return torch_compile_env == "EXPORT" + return False + def _test(self, model, ref_net, kind, ie_device, precision, ir_version, infer_timeout=60, dynamic_shapes=True, **kwargs): """ @@ -69,25 +87,12 @@ def numpy_to_torch_recursively(x): else: custom_eps = 1e-4 - def use_torch_compile_backend(): - torch_compile_env = os.getenv("PYTORCH_TRACING_MODE") - if torch_compile_env is not None: - if (torch_compile_env == "TORCHFX" or torch_compile_env == "TORCHSCRIPT"): - return True - return False - - def use_torch_export(): - torch_compile_env = os.getenv("PYTORCH_TRACING_MODE") - if torch_compile_env is not None: - return torch_compile_env == "EXPORT" - return False - ov_inputs = flattenize_inputs(inputs) - if use_torch_compile_backend(): + if self.use_torch_compile_backend(): self.torch_compile_backend_test(model, torch_inputs, custom_eps) else: - if use_torch_export(): + if self.use_torch_export(): from openvino import convert_model from torch.export import export from torch.fx.experimental.proxy_tensor import make_fx @@ -108,8 +113,10 @@ def use_torch_export(): input_types.append(input_data.type()) input_shapes.append(input_data.size()) - decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types) - converted_model = convert_model(decoder, example_input=torch_inputs) + decoder = TorchFXPythonDecoder( + gm, gm, input_shapes=input_shapes, input_types=input_types) + converted_model = convert_model( + decoder, example_input=torch_inputs) self._resolve_input_shape_dtype( converted_model, ov_inputs, dynamic_shapes) smodel = model @@ -268,7 +275,8 @@ def torch_compile_backend_test(self, model, inputs, custom_eps): torch._dynamo.reset() with torch.no_grad(): model.eval() - ov_model = torch.compile(model, backend="openvino", options={"testing" : 1}) + ov_model = torch.compile( + model, backend="openvino", options={"testing": 1}) ov_res = ov_model(*inputs) if not isinstance(fw_res, (tuple)): diff --git a/tests/layer_tests/pytorch_tests/test_add.py b/tests/layer_tests/pytorch_tests/test_add.py index f283194df3ae1e..f168ffe0193583 100644 --- a/tests/layer_tests/pytorch_tests/test_add.py +++ b/tests/layer_tests/pytorch_tests/test_add.py @@ -5,7 +5,7 @@ import pytest import torch -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export @pytest.mark.parametrize('alpha', (-0.5, 0, 0.5, 1, 2)) @@ -39,9 +39,10 @@ def forward2(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend - @pytest.mark.parametrize("op_type", ["add", "add_"]) + @pytest.mark.parametrize("op_type", ["add", skip_if_export("add_")]) def test_add(self, ie_device, precision, ir_version, alpha, input_shape_rhs, op_type): self.input_rhs = np.random.randn(*input_shape_rhs).astype(np.float32) self._test(*self.create_model(alpha, op_type), ie_device, precision, ir_version, use_convert_model=True) @@ -110,6 +111,7 @@ def forward3(self, lhs, rhs): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend def test_add_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape): @@ -134,6 +136,7 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_ts_backend @pytest.mark.precommit_fx_backend def test_add(self, ie_device, precision, ir_version): @@ -169,5 +172,6 @@ def forward(self, x1, x2): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_add(self, lhs_type, rhs_type, ie_device, precision, ir_version): self._test(*self.create_model(lhs_type, rhs_type), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_aliases.py b/tests/layer_tests/pytorch_tests/test_aliases.py index 78f323b4a2d670..eec915a57eea0f 100644 --- a/tests/layer_tests/pytorch_tests/test_aliases.py +++ b/tests/layer_tests/pytorch_tests/test_aliases.py @@ -9,15 +9,17 @@ class aten_alias(torch.nn.Module): def forward(self, x): - x[:, 1, :, :] = 4. - return x + y = x.clone() + y[:, 1, :, :] = 4. + return y class aten_loop_alias(torch.nn.Module): def forward(self, x): + y = x.clone() for i in range(2): - x[:, i, :, :] = 4. - return x + y[:, i, :, :] = 4. + return y class TestAliases(PytorchLayerTest): @@ -27,6 +29,7 @@ def _prepare_input(self): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_alias(self, ie_device, precision, ir_version): self._test(aten_alias(), None, ["aten::slice", "aten::select", @@ -35,6 +38,7 @@ def test_alias(self, ie_device, precision, ir_version): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_loop_alias(self, ie_device, precision, ir_version): self._test(aten_loop_alias(), None, ["aten::slice", "aten::select", diff --git a/tests/layer_tests/pytorch_tests/test_arange.py b/tests/layer_tests/pytorch_tests/test_arange.py index 425d6f6658baa3..9374b140fe893b 100644 --- a/tests/layer_tests/pytorch_tests/test_arange.py +++ b/tests/layer_tests/pytorch_tests/test_arange.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestArange(PytorchLayerTest): @@ -108,9 +108,10 @@ def forward(self, x, y, z, d): return model_class, ref_net, "aten::arange" @pytest.mark.nightly + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("dtype", [None, "float32", "float64", "int32", "int64", "int8", "uin8"]) @pytest.mark.parametrize("end", [1, 2, 3]) - @pytest.mark.parametrize("use_out", [True, False]) + @pytest.mark.parametrize("use_out", [skip_if_export(True), False]) def test_arange_end_only(self, dtype, end, use_out, ie_device, precision, ir_version): self._test(*self.create_model(dtype, 1, use_out), ie_device, precision, ir_version, kwargs_to_prepare_input={"end": end}) @@ -131,6 +132,7 @@ def test_arange_start_end_step(self, dtype, end, start, step, ie_device, precisi kwargs_to_prepare_input={"end": end, "start": start, "step": step, "dtype": dtype}) @pytest.mark.nightly + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "int8", "uint8"]) @pytest.mark.parametrize("end", [1, 2, 3]) def test_arange_end_only_with_prim_dtype(self, dtype, end, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py index 80f035667565c3..3b7ba0486a4d1e 100644 --- a/tests/layer_tests/pytorch_tests/test_argmax_argmin.py +++ b/tests/layer_tests/pytorch_tests/test_argmax_argmin.py @@ -73,6 +73,7 @@ def forward(self, x): @pytest.mark.parametrize("dtype", ["float32", "int32", "int64"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ('arm', 'armv7l', 'aarch64', 'arm64', 'ARM64'), diff --git a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py index b5c8b456fabd83..5d2b040b33bdc9 100644 --- a/tests/layer_tests/pytorch_tests/test_bitwise_ops.py +++ b/tests/layer_tests/pytorch_tests/test_bitwise_ops.py @@ -4,7 +4,7 @@ import numpy as np import pytest import torch -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestBitwiseOp(PytorchLayerTest): @@ -54,6 +54,7 @@ def forward_not_out(self, tensor_a, out): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("op_type", ["and", "or", "not", "xor"]) @pytest.mark.parametrize("lhs_dtype", ["bool", "int32", "uint8", "int64"]) @pytest.mark.parametrize("rhs_dtype", ["bool", "int32", "uint8", "int64"]) @@ -65,7 +66,7 @@ def forward_not_out(self, tensor_a, out): ([], [2, 3]), ], ) - @pytest.mark.parametrize("out", [False, True]) + @pytest.mark.parametrize("out", [False, skip_if_export(True)]) def test_bitwise_mixed_dtypes( self, op_type, out, lhs_dtype, rhs_dtype, lhs_shape, rhs_shape, ie_device, precision, ir_version ): @@ -105,6 +106,7 @@ def forward(self, lhs, rhs): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("lhs_dtype", ["bool", "int32"]) @pytest.mark.parametrize("rhs_dtype", ["bool", "int32"]) @pytest.mark.parametrize( diff --git a/tests/layer_tests/pytorch_tests/test_clamp.py b/tests/layer_tests/pytorch_tests/test_clamp.py index 3a4ce5c6c8de37..c98489034d2cae 100644 --- a/tests/layer_tests/pytorch_tests/test_clamp.py +++ b/tests/layer_tests/pytorch_tests/test_clamp.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestClamp(PytorchLayerTest): @@ -43,9 +43,11 @@ def forward_clip_(self, x): @pytest.mark.parametrize("minimum,maximum", [(0., 1.), (-0.5, 1.5), (None, 10.), (None, -10.), (10., None), (-10., None), (100, 200), (1.0, 0.0)]) - @pytest.mark.parametrize("as_tensors", [True, False]) - @pytest.mark.parametrize("op_type", ["clamp", "clamp_"]) + @pytest.mark.parametrize("as_tensors", [skip_if_export(True), False]) + @pytest.mark.parametrize("op_type", ["clamp", skip_if_export("clamp_")]) @pytest.mark.nightly + @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_clamp(self, minimum, maximum, as_tensors, op_type, ie_device, precision, ir_version): self._test(*self.create_model(minimum, maximum, as_tensors, op_type), ie_device, precision, ir_version) @@ -103,6 +105,7 @@ def forward(self, x): @pytest.mark.parametrize("as_tensor", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_clamp(self, maximum, as_tensor, ie_device, precision, ir_version): self._test(*self.create_model(maximum, as_tensor), ie_device, precision, ir_version, use_convert_model=True, trace_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_comparision.py b/tests/layer_tests/pytorch_tests/test_comparision.py index a114afb1f712c8..969079d8e88cf7 100644 --- a/tests/layer_tests/pytorch_tests/test_comparision.py +++ b/tests/layer_tests/pytorch_tests/test_comparision.py @@ -54,6 +54,7 @@ def forward(self, x, y): @pytest.mark.parametrize("op", ["eq", "ne", "lt", "gt", "le", "ge"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_comp(self, op, ie_device, precision, ir_version): self._test(*self.create_model(op), ie_device, precision, ir_version, use_convert_model=True) @@ -125,6 +126,7 @@ def forward3(self, lhs, rhs): @pytest.mark.parametrize("op", ["eq", "ne", "lt", "gt", "le", "ge"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_eq_mixed_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape, op): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_cumsum.py b/tests/layer_tests/pytorch_tests/test_cumsum.py index 771eb02768bdf0..31a215afea0aed 100644 --- a/tests/layer_tests/pytorch_tests/test_cumsum.py +++ b/tests/layer_tests/pytorch_tests/test_cumsum.py @@ -71,7 +71,10 @@ def forward_out_prim_dtype(self, x, y): @pytest.mark.parametrize("out,dtype_from_input", [(False, False), (True, False), (True, True)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_cumsum(self, axis, dtype, out, dtype_from_input, ie_device, precision, ir_version): + if out and PytorchLayerTest.use_torch_export(): + pytest.skip(reason="export fails for out") self._test(*self.create_model(axis, dtype, out, dtype_from_input), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "out_dtype": dtype}) diff --git a/tests/layer_tests/pytorch_tests/test_erf.py b/tests/layer_tests/pytorch_tests/test_erf.py index cced0b35d793dc..8bd9424bb3e209 100644 --- a/tests/layer_tests/pytorch_tests/test_erf.py +++ b/tests/layer_tests/pytorch_tests/test_erf.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestErf(PytorchLayerTest): @@ -48,11 +48,14 @@ def forward_inplace(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend @pytest.mark.parametrize("mode,input_dtype", [ ("", "float32"), ("", "float64"), ("", "int32"), ("out", "float32"), ("out", "float64"), ("inplace", "float32"), ("inplace", "float64")]) def test_erf(self, mode, input_dtype, ie_device, precision, ir_version): + if PytorchLayerTest.use_torch_export() and mode in ["out", "inplace"]: + pytest.skip(reason="export fails for inplace or out") self._test(*self.create_model(mode, input_dtype), ie_device, precision, ir_version, kwargs_to_prepare_input={"input_dtype": input_dtype, "out": mode == "out"} ) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_flip.py b/tests/layer_tests/pytorch_tests/test_flip.py index e0ede56a3d35f7..df390eb7caf001 100644 --- a/tests/layer_tests/pytorch_tests/test_flip.py +++ b/tests/layer_tests/pytorch_tests/test_flip.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestFlip(PytorchLayerTest): @@ -36,8 +36,9 @@ def forward_out(self, x, y): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("axis", [[0], [1], [-1], [1, 2], [2, 3], [1, 2, 3]]) - @pytest.mark.parametrize("out", [True, False]) + @pytest.mark.parametrize("out", [skip_if_export(True), False]) @pytest.mark.parametrize("dtype", ["float32", "float64", "int32", "int64", "uint8"]) def test_flip(self, axis, out, dtype, ie_device, precision, ir_version): self._test(*self.create_model(axis, out), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "dtype": dtype}) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_fmod.py b/tests/layer_tests/pytorch_tests/test_fmod.py index 67a4e74043b316..dc0729de41f4aa 100644 --- a/tests/layer_tests/pytorch_tests/test_fmod.py +++ b/tests/layer_tests/pytorch_tests/test_fmod.py @@ -66,6 +66,7 @@ def forward3(self, lhs, rhs): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_fmod_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_gather.py b/tests/layer_tests/pytorch_tests/test_gather.py index 49312cf1456ec7..e65026d97cea58 100644 --- a/tests/layer_tests/pytorch_tests/test_gather.py +++ b/tests/layer_tests/pytorch_tests/test_gather.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestGelu(PytorchLayerTest): @@ -39,10 +39,11 @@ def forward_out(self, x, index, out): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("m", [2, 10, 100]) @pytest.mark.parametrize("n", [2, 10, 100]) @pytest.mark.parametrize("axis", [0, 1]) - @pytest.mark.parametrize("out", [True, False]) + @pytest.mark.parametrize("out", [skip_if_export(True), False]) def test_gather(self, m, n, axis, out, ie_device, precision, ir_version): self._test(*self.create_model(axis, out), ie_device, precision, ir_version, kwargs_to_prepare_input={ "m": m, "n": n, "max_val": m if axis == 0 else n, "out": out diff --git a/tests/layer_tests/pytorch_tests/test_hadsigmoid.py b/tests/layer_tests/pytorch_tests/test_hadsigmoid.py index 11c3051bef13ea..0c32b2a7a53f41 100644 --- a/tests/layer_tests/pytorch_tests/test_hadsigmoid.py +++ b/tests/layer_tests/pytorch_tests/test_hadsigmoid.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestHardSigmoid(PytorchLayerTest): @@ -31,6 +31,6 @@ def forward(self, x): @pytest.mark.precommit @pytest.mark.parametrize("shape", [[1, 10], [1, 2, 3], [1, 2, 3, 4], [1, 2, 3, 4, 5]]) @pytest.mark.parametrize("dtype", ["float32", "float64"]) - @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("inplace", [skip_if_export(True), False]) def test_hardsigmoid(self, shape, dtype, inplace, ie_device, precision, ir_version): self._test(*self.create_model(inplace), ie_device, precision, ir_version, kwargs_to_prepare_input={"shape": shape, "dtype": dtype}) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_leaky_relu.py b/tests/layer_tests/pytorch_tests/test_leaky_relu.py index 3fb109d5854005..2ef80dd388ae89 100644 --- a/tests/layer_tests/pytorch_tests/test_leaky_relu.py +++ b/tests/layer_tests/pytorch_tests/test_leaky_relu.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestLeakyRelu(PytorchLayerTest): @@ -29,7 +29,7 @@ def forward(self, x): return aten_leaky_relu(alpha, inplace), ref_net, "aten::leaky_relu" if not inplace else "aten::leaky_relu_" @pytest.mark.parametrize("alpha", [0.01, 1.01, -0.01]) - @pytest.mark.parametrize("inplace", [True, False]) + @pytest.mark.parametrize("inplace", [skip_if_export(True), False]) @pytest.mark.nightly @pytest.mark.precommit def test_leaky_relu(self, alpha, inplace, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_log.py b/tests/layer_tests/pytorch_tests/test_log.py index 264ba734bc9cc5..8d595e82e82166 100644 --- a/tests/layer_tests/pytorch_tests/test_log.py +++ b/tests/layer_tests/pytorch_tests/test_log.py @@ -54,5 +54,7 @@ def forward(self, x): ["log1p", "int32"], ["log1p_", "float32"]]) def test_log(self, op, input_dtype, ie_device, precision, ir_version): + if PytorchLayerTest.use_torch_export() and op[-1] == "_": + pytest.skip(reason="export fails for inplace") self._test(*self.create_model(op), ie_device, precision, ir_version, kwargs_to_prepare_input={"dtype": input_dtype}) \ No newline at end of file diff --git a/tests/layer_tests/pytorch_tests/test_log_softmax.py b/tests/layer_tests/pytorch_tests/test_log_softmax.py index 7e61ab7d1603e7..21626d85108b1a 100644 --- a/tests/layer_tests/pytorch_tests/test_log_softmax.py +++ b/tests/layer_tests/pytorch_tests/test_log_softmax.py @@ -39,6 +39,7 @@ def _prepare_input(self): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend def test_log_softmax(self, input_dtype, convert_dtype, dim, ie_device, precision, ir_version): self.input_dtype = input_dtype diff --git a/tests/layer_tests/pytorch_tests/test_mean.py b/tests/layer_tests/pytorch_tests/test_mean.py index c6258da4b8b445..af381fa19bb7d3 100644 --- a/tests/layer_tests/pytorch_tests/test_mean.py +++ b/tests/layer_tests/pytorch_tests/test_mean.py @@ -79,6 +79,9 @@ def forward_out(self, x, out): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_sum(self, axes, keep_dim, dtype, out, ie_device, precision, ir_version): + if PytorchLayerTest.use_torch_export() and out: + pytest.skip(reason="export fails for out") self._test(*self.create_model(axes, keep_dim, dtype, out), ie_device, precision, ir_version, kwargs_to_prepare_input={"out": out, "axis": axes, "dtype": dtype, "keep_dim": keep_dim}) diff --git a/tests/layer_tests/pytorch_tests/test_min_max.py b/tests/layer_tests/pytorch_tests/test_min_max.py index 1610194d0a796d..8008d725db3d63 100644 --- a/tests/layer_tests/pytorch_tests/test_min_max.py +++ b/tests/layer_tests/pytorch_tests/test_min_max.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestMinMax(PytorchLayerTest): @@ -75,6 +75,7 @@ def forward(self, x, y): @pytest.mark.parametrize("op_type", ['min', 'max']) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_reduce_min_max(self, axes, keep_dims, op_type, ie_device, precision, ir_version): self._test(*self.create_model(op_type, axes, keep_dims, single_input=True), ie_device, precision, ir_version) @@ -84,6 +85,7 @@ def test_reduce_min_max(self, axes, keep_dims, op_type, ie_device, precision, ir @pytest.mark.parametrize("first_input_dtype", ["float32", "int32", "float64", "int64", "uint8"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_min_max(self, op_type, first_input_dtype, second_input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(op_type, None, None, single_input=False, dtypes=(first_input_dtype, second_input_dtype)), ie_device, precision, ir_version, kwargs_to_prepare_input= @@ -147,7 +149,7 @@ def forward(self, x: float, y: float): ]) @pytest.mark.nightly @pytest.mark.precommit - def test_min_max(self, case, kwargs_to_prepare_input, ie_device, precision, ir_version): + def test_max(self, case, kwargs_to_prepare_input, ie_device, precision, ir_version): self._test(*self.create_model(case), ie_device, precision, ir_version, kwargs_to_prepare_input=kwargs_to_prepare_input, use_mo_convert=False) @@ -263,6 +265,7 @@ def forward(self, x, y): @pytest.mark.parametrize("first_input_dtype", ["float32", "int32", "int64", "float64"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_minimum_maximum( self, op_type, first_input_dtype, second_input_dtype, ie_device, precision, ir_version ): @@ -330,8 +333,10 @@ def forward(self, x): @pytest.mark.parametrize("op_type", ["amin", "amax"]) @pytest.mark.parametrize("axis", [0, -1, 1, [1, 2], [-1, -2], [2, 0, -1], [0, 1, 2, 3]]) @pytest.mark.parametrize("keep_dims", [True, False]) - @pytest.mark.parametrize("out", [True, False]) + @pytest.mark.parametrize("out", [skip_if_export(True), False]) @pytest.mark.parametrize("input_dtype", ['float32', 'int32', 'int64', 'float64']) + @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend def test_amin_amax(self, op_type, input_dtype, axis, keep_dims, out, ie_device, precision, ir_version): self._test(*self.create_model(op_type, axis, keep_dims, out), diff --git a/tests/layer_tests/pytorch_tests/test_mul.py b/tests/layer_tests/pytorch_tests/test_mul.py index fd5602510e8a51..8152c8cb9c0af7 100644 --- a/tests/layer_tests/pytorch_tests/test_mul.py +++ b/tests/layer_tests/pytorch_tests/test_mul.py @@ -140,5 +140,6 @@ def forward(self, input_tensor, other_tensor): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_mul_bool(self, lhs_type, rhs_type, ie_device, precision, ir_version): self._test(*self.create_model(lhs_type, rhs_type), ie_device, precision, ir_version, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_one_hot.py b/tests/layer_tests/pytorch_tests/test_one_hot.py index 87e8f5e2dbf020..de8716f8460782 100644 --- a/tests/layer_tests/pytorch_tests/test_one_hot.py +++ b/tests/layer_tests/pytorch_tests/test_one_hot.py @@ -27,7 +27,7 @@ def forward(self, x): @pytest.mark.parametrize(("num_classes"), [-1, 3, 1000,]) @pytest.mark.nightly - @pytest.mark.precommit + #@pytest.mark.precommit def test_one_hot(self, num_classes, ie_device, precision, ir_version): self._test(*self.create_model(num_classes), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_pooling.py b/tests/layer_tests/pytorch_tests/test_pooling.py index ac069567d6933b..f0ea018a552856 100644 --- a/tests/layer_tests/pytorch_tests/test_pooling.py +++ b/tests/layer_tests/pytorch_tests/test_pooling.py @@ -135,6 +135,7 @@ def forward(self, x): @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): @@ -155,6 +156,7 @@ def test_avg_pool1d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): @@ -166,6 +168,7 @@ def test_avg_pool2d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("count_include_pad", [True, False]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, precision, ir_version): @@ -178,6 +181,7 @@ def test_avg_pool3d(self, params, ceil_mode, count_include_pad, ie_device, preci @pytest.mark.parametrize("dilation", [1, 2]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') def test_max_pool1d(self, params, ceil_mode, dilation, ie_device, precision, ir_version): diff --git a/tests/layer_tests/pytorch_tests/test_pow.py b/tests/layer_tests/pytorch_tests/test_pow.py index 2284e0561044e4..b3424dfc3be695 100644 --- a/tests/layer_tests/pytorch_tests/test_pow.py +++ b/tests/layer_tests/pytorch_tests/test_pow.py @@ -46,7 +46,10 @@ def forward_inplace(self, input_data, exponent): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_pow(self, inplace, ie_device, precision, ir_version, test_input): + if inplace and PytorchLayerTest.use_torch_export(): + pytest.skip(reason="export fails for inplace") self.test_input = test_input self._test(*self.create_model(inplace), ie_device, precision, ir_version, use_convert_model=True) @@ -105,6 +108,7 @@ def forward3(self, lhs, rhs): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_pow_mixed_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type, rhs_shape): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_repeat.py b/tests/layer_tests/pytorch_tests/test_repeat.py index 884a51e2a24f6f..bc7949eb091c30 100644 --- a/tests/layer_tests/pytorch_tests/test_repeat.py +++ b/tests/layer_tests/pytorch_tests/test_repeat.py @@ -29,6 +29,7 @@ def forward(self, x): @pytest.mark.parametrize("repeats", [(4, 3), (1, 1), (1, 2, 3), (1, 2, 2, 3)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_repeat(self, repeats, ie_device, precision, ir_version): self._test(*self.create_model(repeats), ie_device, precision, ir_version) @@ -54,6 +55,7 @@ def forward(self, x, y): @pytest.mark.parametrize("repeats", [(4, 3), (1, 1), (1, 3, 3), (1, 2, 2, 3)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_repeat(self, repeats, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, kwargs_to_prepare_input={"repeats_shape": repeats}) @@ -76,5 +78,6 @@ def forward(self, x): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_repeat_t5(self, ie_device, precision, ir_version): self._test(*self.create_model(), ie_device, precision, ir_version, trace_model=True, use_convert_model=True) diff --git a/tests/layer_tests/pytorch_tests/test_roll.py b/tests/layer_tests/pytorch_tests/test_roll.py index a405fa839af2b3..7c4c6f2831717a 100644 --- a/tests/layer_tests/pytorch_tests/test_roll.py +++ b/tests/layer_tests/pytorch_tests/test_roll.py @@ -37,5 +37,6 @@ def forward(self, x): ]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_roll(self, shifts, dim, ie_device, precision, ir_version): self._test(*self.create_model(shifts, dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_rsub.py b/tests/layer_tests/pytorch_tests/test_rsub.py index 68e36f8abd2167..08a9372582b07d 100644 --- a/tests/layer_tests/pytorch_tests/test_rsub.py +++ b/tests/layer_tests/pytorch_tests/test_rsub.py @@ -40,7 +40,7 @@ def forward(self, x, y:int, alpha: float): ]) @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub1(self, ie_device, precision, ir_version, input_data): self.input_data = [] for input in input_data: if type(input) is list: @@ -55,7 +55,7 @@ def test_rsub(self, ie_device, precision, ir_version, input_data): ]) @pytest.mark.nightly @pytest.mark.precommit - def test_rsub(self, ie_device, precision, ir_version, input_data): + def test_rsub2(self, ie_device, precision, ir_version, input_data): self.input_data = [] for input in input_data: if type(input) is list: @@ -103,6 +103,7 @@ def forward2(self, lhs, rhs:int): @pytest.mark.parametrize(("lhs_shape"), [[2, 3], [3], [2, 3, 4]]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_rsub_types(self, ie_device, precision, ir_version, lhs_type, lhs_shape, rhs_type): self.lhs_type = lhs_type self.lhs_shape = lhs_shape diff --git a/tests/layer_tests/pytorch_tests/test_scatter.py b/tests/layer_tests/pytorch_tests/test_scatter.py index fd69a8b309d57d..620d6d5c0ed0c3 100644 --- a/tests/layer_tests/pytorch_tests/test_scatter.py +++ b/tests/layer_tests/pytorch_tests/test_scatter.py @@ -267,7 +267,7 @@ def forward(self, x: torch.Tensor): @pytest.mark.parametrize("src", [torch.arange(1, 26).reshape(5, 5)]) @pytest.mark.parametrize("dtype", ["int32", "int64", "float32", "float64"]) @pytest.mark.parametrize("inplace", [True, False]) - def test_scatter_reduce(self, dim, index, src, dtype, inplace, ie_device, precision, ir_version): + def test_scatter_add(self, dim, index, src, dtype, inplace, ie_device, precision, ir_version): if isinstance(src, torch.Tensor): src = src.to(getattr(torch, dtype)) if index is None: diff --git a/tests/layer_tests/pytorch_tests/test_squeeze.py b/tests/layer_tests/pytorch_tests/test_squeeze.py index 4a90a6946f6990..28111fc53c0d74 100644 --- a/tests/layer_tests/pytorch_tests/test_squeeze.py +++ b/tests/layer_tests/pytorch_tests/test_squeeze.py @@ -32,15 +32,17 @@ def forward(self, x): @pytest.mark.parametrize("dim,dynamic_shapes", [(-2, True), (0, True), (None, False)]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.precommit_fx_backend def test_squeeze(self, dim, dynamic_shapes, ie_device, precision, ir_version): + if PytorchLayerTest.use_torch_export() and dim is None: + pytest.xfail(reason="export fails if dim is not provided") self._test(*self.create_model(dim), ie_device, precision, ir_version, dynamic_shapes=dynamic_shapes) @pytest.mark.xfail(reason='OpenVINO squeeze does not support dimension is not equal to 1.') @pytest.mark.parametrize("dim", [-1, 2]) @pytest.mark.nightly @pytest.mark.precommit - @pytest.mark.precommit_fx_backend def test_squeeze_non_1(self, dim, ie_device, precision, ir_version): # Dynamic shapes are introducing dynamic rank, with is not suppoerted by Squeeze operation. self._test(*self.create_model(dim), ie_device, precision, ir_version, dynamic_shapes=False) diff --git a/tests/layer_tests/pytorch_tests/test_sum.py b/tests/layer_tests/pytorch_tests/test_sum.py index f045ea466c3936..79d379bfef8aba 100644 --- a/tests/layer_tests/pytorch_tests/test_sum.py +++ b/tests/layer_tests/pytorch_tests/test_sum.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestSum(PytorchLayerTest): @@ -92,10 +92,11 @@ def forward_out(self, x, out): @pytest.mark.parametrize("axes,keep_dims", [(None, None), (None, False), (-1, None), (1, None), ((2, 3), False), ((3, 2), True)]) @pytest.mark.parametrize("dtype", [None, "float32", "int64"]) - @pytest.mark.parametrize("out", [True, False]) + @pytest.mark.parametrize("out", [skip_if_export(True), False]) @pytest.mark.parametrize("input_dtype", ["float32", "uint8", "bool", "int64"]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_sum(self, axes, keep_dims, out, dtype, input_dtype, ie_device, precision, ir_version): self._test(*self.create_model(axes, keep_dims, out, dtype, input_dtype), ie_device, precision, ir_version, diff --git a/tests/layer_tests/pytorch_tests/test_unary_ops.py b/tests/layer_tests/pytorch_tests/test_unary_ops.py index 04346bdef48ef5..f495e7ba3d272f 100644 --- a/tests/layer_tests/pytorch_tests/test_unary_ops.py +++ b/tests/layer_tests/pytorch_tests/test_unary_ops.py @@ -5,9 +5,11 @@ import torch import torch.nn.functional as F -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export OPS = { + "aten::abs": torch.abs, + "aten::abs_": torch.abs_, "aten::rsqrt": torch.rsqrt, "aten::sqrt": torch.sqrt, "aten::exp": torch.exp, @@ -20,6 +22,8 @@ "aten::floor_": torch.floor_, "aten::sigmoid": torch.sigmoid, "aten::sigmoid_": torch.sigmoid_, + "aten::reciprocal": torch.reciprocal, + "aten::reciprocal_": torch.reciprocal_, "aten::cos": torch.cos, "aten::cos_": torch.cos_, "aten::sin": torch.sin, @@ -66,19 +70,22 @@ def _prepare_input(self): @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export @pytest.mark.parametrize("dtype", [torch.float32, torch.float64, torch.int8, torch.uint8, torch.int32, torch.int64]) @pytest.mark.parametrize("op_type", [ + "aten::abs", "aten::rsqrt", "aten::sqrt", "aten::exp", "aten::relu", - "aten::relu_", + skip_if_export("aten::relu_"), "aten::ceil", - "aten::ceil_", + skip_if_export("aten::ceil_"), "aten::floor", - "aten::floor_", + skip_if_export("aten::floor_"), "aten::sigmoid", + "aten::reciprocal", # trigonometry "aten::cos", "aten::sin", @@ -104,8 +111,10 @@ def test_unary_op(self, op_type, dtype, ie_device, precision, ir_version): @pytest.mark.parametrize("op_type", [ # some pytorch inplace ops do not support int + "aten::abs_", "aten::exp_", "aten::sigmoid_", + "aten::reciprocal_", # trigonometry "aten::cos_", "aten::sin_", diff --git a/tests/layer_tests/pytorch_tests/test_unsqueeze.py b/tests/layer_tests/pytorch_tests/test_unsqueeze.py index fc936f2530642f..f978ecc6438f00 100644 --- a/tests/layer_tests/pytorch_tests/test_unsqueeze.py +++ b/tests/layer_tests/pytorch_tests/test_unsqueeze.py @@ -3,7 +3,7 @@ import pytest -from pytorch_layer_test_class import PytorchLayerTest +from pytorch_layer_test_class import PytorchLayerTest, skip_if_export class TestUnsqueeze(PytorchLayerTest): @@ -36,9 +36,10 @@ def forward(self, x): return model_class(dim), ref_net, op - @pytest.mark.parametrize("inplace", [False, True]) + @pytest.mark.parametrize("inplace", [False, skip_if_export(True)]) @pytest.mark.parametrize("dim", [0, 1, -1]) @pytest.mark.nightly @pytest.mark.precommit + @pytest.mark.precommit_torch_export def test_unsqueeze(self, inplace, dim, ie_device, precision, ir_version): self._test(*self.create_model(inplace, dim), ie_device, precision, ir_version) diff --git a/tests/layer_tests/pytorch_tests/test_var_mean.py b/tests/layer_tests/pytorch_tests/test_var_mean.py index bd8a5a10617eb4..fddf7457749096 100644 --- a/tests/layer_tests/pytorch_tests/test_var_mean.py +++ b/tests/layer_tests/pytorch_tests/test_var_mean.py @@ -8,7 +8,7 @@ from pytorch_layer_test_class import PytorchLayerTest -class TestVar(PytorchLayerTest): +class TestVarMean(PytorchLayerTest): def _prepare_input(self): import numpy as np return (np.random.randn(1, 3, 224, 224).astype(np.float32),) @@ -56,7 +56,7 @@ def forward(self, x): @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): + def test_op2args(self, unbiased, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, op_type=op_type), ie_device, precision, ir_version) @pytest.mark.nightly @@ -67,5 +67,5 @@ def test_var2args(self, unbiased, op_type, ie_device, precision, ir_version): @pytest.mark.parametrize("op_type", ["var", "var_mean", "std", "std_mean"]) @pytest.mark.xfail(condition=platform.system() == 'Darwin' and platform.machine() == 'arm64', reason='Ticket - 122715') - def test_var(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version): + def test_op(self, unbiased, dim, keepdim, op_type, ie_device, precision, ir_version): self._test(*self.create_model(unbiased, dim, keepdim, two_args_case=False, op_type=op_type), ie_device, precision, ir_version) \ No newline at end of file diff --git a/tests/model_hub_tests/pytorch/test_hf_transformers.py b/tests/model_hub_tests/pytorch/test_hf_transformers.py index 4fa17540a8ce61..954cd3bda0bdb7 100644 --- a/tests/model_hub_tests/pytorch/test_hf_transformers.py +++ b/tests/model_hub_tests/pytorch/test_hf_transformers.py @@ -563,4 +563,5 @@ def test_convert_model_precommit(self, name, type, ie_device): process_pytest_marks(os.path.join(os.path.dirname(__file__), "hf_transformers_models"))) @pytest.mark.nightly def test_convert_model_all_models(self, name, ie_device): + self.mode = "export" self.run(model_name=name, model_link=None, ie_device=ie_device) diff --git a/tests/model_hub_tests/pytorch/torch_utils.py b/tests/model_hub_tests/pytorch/torch_utils.py index 92bc08ee73e4cf..afb348a9341202 100644 --- a/tests/model_hub_tests/pytorch/torch_utils.py +++ b/tests/model_hub_tests/pytorch/torch_utils.py @@ -72,28 +72,54 @@ def prepare_inputs(self, inputs_info): def convert_model_impl(self, model_obj): if hasattr(self, "mode") and self.mode == "export": - from torch.fx.experimental.proxy_tensor import make_fx + from torch.fx.experimental.proxy_tensor import make_fx, get_isolated_graphmodule from torch.export import export from packaging import version from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder + import inspect + from openvino.frontend.pytorch.utils import prepare_example_inputs_and_model - graph = export(model_obj, self.example) + input_shapes = [] + input_types = [] + if isinstance(self.example, dict): + graph = export(model_obj, tuple(), self.example) + for input_data in self.example.values(): + input_types.append(input_data.type()) + input_shapes.append(input_data.size()) + else: + graph = export(model_obj, self.example) + for input_data in self.example: + input_types.append(input_data.type()) + input_shapes.append(input_data.size()) if version.parse(torch.__version__) >= version.parse("2.2"): graph = graph.run_decompositions() - try: + if isinstance(self.example, dict): + try: + gm = get_isolated_graphmodule(graph, tuple(), self.example) + except: + gm = get_isolated_graphmodule(graph, tuple(), self.example, tracing_mode='symbolic') + else: + try: gm = make_fx(graph)(*self.example) - except: + except: gm = make_fx(graph, tracing_mode='symbolic')(*self.example) - input_shapes = [] - input_types = [] - for input_data in self.example: - input_types.append(input_data.type()) - input_shapes.append(input_data.size()) + print(gm.code) decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types) - ov_model = convert_model(decoder, example_input=self.example) + print(list(gm.graph.nodes)[-1].args) + if isinstance(self.example, dict): + decoder._input_signature = list(self.example.keys()) + ov_model = convert_model(decoder, example_input=self.example) + if isinstance(self.example, dict): + pt_res = model_obj(**self.example) + else: + pt_res = model_obj(*self.example) + if isinstance(pt_res, dict): + for i, k in enumerate(pt_res.keys()): + ov_model.outputs[i].get_tensor().set_names({k}) + ov_model.validate_nodes_and_infer_types() else: ov_model = convert_model(model_obj, example_input=self.example,