Skip to content

Commit

Permalink
(FIX) Changed test case B value with correct zp/scale values & correc…
Browse files Browse the repository at this point in the history
…ted nuances in op implementation
  • Loading branch information
virajwad committed Dec 20, 2024
1 parent 571700c commit 2c1ddb4
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include "exceptions.hpp"
#include "openvino/frontend/exception.hpp"
#include "openvino/op/subtract.hpp"
#include "openvino/op/divide.hpp"
#include "openvino/op/multiply.hpp"
#include "openvino/op/matmul.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/convert.hpp"
Expand Down Expand Up @@ -77,14 +77,14 @@ ov::OutputVector dynamic_quantize_matmul(const ov::frontend::onnx::Node& node) {
}

// At time of writing, ov::MatMul does not support int8/uint8 types. To get the correct output, we need to dequantize B.
// Technically this does not do DynamicQuantization, but is required for correct output of the operator
// B_dequantized = (B - b_zero_point) / b_scale

// We also need to convert b_scale dtype to match B_dequantized, as OV "Divide" op requires both argument dtypes to match

ov::Output<ov::Node> B_dequantized = std::make_shared<v1::Subtract>(B, b_zero_point);
B_dequantized = std::make_shared<v0::Convert>(B_dequantized, b_scale.get_element_type());
B_dequantized = std::make_shared<v1::Divide>(B_dequantized, b_scale);
// Technically this does not do DynamicQuantization, but is required for correct output of the operator. It will implement A * B_dequantized + bias
// According to ONNX RT docs, they do linear quantization shown here https://tomwildenhain-microsoft.github.io/onnxruntime/docs/performance/quantization.html
// B_dequantized = (B - b_zero_point) * b_scale

ov::Output<ov::Node> B_dequantized = std::make_shared<v0::Convert>(B, b_scale.get_element_type());
b_zero_point = std::make_shared<v0::Convert>(b_zero_point, b_scale.get_element_type());
B_dequantized = std::make_shared<v1::Subtract>(B_dequantized, b_zero_point);
B_dequantized = std::make_shared<v1::Multiply>(B_dequantized, b_scale);

// A, B are N-dimensional matrices. According to example ONNX models for this operator, the suboperations pass input A/B such that B's shape is already transposed.
// E.g. https://github.com/microsoft/onnxruntime/blob/main/onnxruntime/test/testdata/transform/fusion/dynamic_quantize_matmul.onnx
Expand Down
11 changes: 5 additions & 6 deletions src/frontends/onnx/tests/onnx_import_com_microsoft.in.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1363,12 +1363,11 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_dynamic_quantize_matmul) {

// Fill test case here
const std::vector<float> input_A{1.29292f, 2.47473f, 3.291903f, 4.1728945f, 5.213912f, 6.1293125f};
const std::vector<int8_t> input_B{1, 2, 3, 125, 126, -128};
const std::vector<float> b_scale{6.28947502f};
const std::vector<int8_t> b_zero_point{69};
const std::vector<int8_t> input_B{-2, 29, 61, 61, 29, 125};
const std::vector<float> b_scale{0.003137f};
const std::vector<int8_t> b_zero_point{-34};

const std::vector<float> expected{3.16867157e+02f, 3.40601959e+02f, -3.60632910e+03f, 6.47037888e+01f, 1.11719864e+02f, -6.54899121e+03f,
-7.19602890e+01f, -6.04708314e-01f, -9.75953906e+03f};
const std::vector<float> expected{0.8681802f, 0.7458673f, 1.6218146f, 1.5770973f, 1.4774824f, 3.0677009f, 2.3504133f, 2.2423527f, 4.611995f};

// add_input needs to be called in order of model inputs (order matters)
test_case.add_input<float>(Shape{3,2}, input_A);
Expand All @@ -1379,7 +1378,7 @@ OPENVINO_TEST(${BACKEND_NAME}, onnx_com_microsoft_dynamic_quantize_matmul) {
test_case.add_expected_output<float>(Shape{3,3}, expected);

if (std::string("${BACKEND_NAME}") == std::string("IE_GPU")) {
test_case.run_with_tolerance_as_fp(0.0001f);
test_case.run_with_tolerance_as_fp(0.003f);
} else {
test_case.run();
}
Expand Down

0 comments on commit 2c1ddb4

Please sign in to comment.