Skip to content

Commit

Permalink
applied reviews
Browse files Browse the repository at this point in the history
  • Loading branch information
e-ddykim committed Jan 9, 2025
1 parent e667219 commit c3d6519
Show file tree
Hide file tree
Showing 9 changed files with 76 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -253,12 +253,12 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ov::pass::MatcherPass
const std::vector<ov::element::Type> defaultPrecisions =
{ ov::element::u8, ov::element::i8 },
const bool reshapeIgnorePerTensorQuantizationCheck = false,
const bool useDefaultTransformation = true) :
const bool scalingMode = false) :
updatePrecisions(updatePrecisions),
deqPrecision(deqPrecision),
defaultPrecisions(defaultPrecisions),
reshapeIgnorePerTensorQuantizationCheck(reshapeIgnorePerTensorQuantizationCheck),
useDefaultTransformation(useDefaultTransformation) {}
scalingMode(scalingMode) {}

Params& setUpdatePrecisions(const bool updatePrecisions) {
this->updatePrecisions = updatePrecisions;
Expand All @@ -283,8 +283,8 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ov::pass::MatcherPass
std::vector<ov::element::Type> defaultPrecisions;
// to support GPU workarround to keep Reshape and MatMul in FP32
bool reshapeIgnorePerTensorQuantizationCheck;
// for MultiplyPartialTransformation to support Activations Scaling
bool useDefaultTransformation;
// to support Activations Scaling
bool scalingMode;
};

class PrecisionDetails {
Expand Down Expand Up @@ -356,7 +356,7 @@ class LP_TRANSFORMATIONS_API LayerTransformation : public ov::pass::MatcherPass
element::Type deqPrecision;
std::vector<ov::element::Type> defaultPrecisions;
bool reshapeIgnorePerTensorQuantizationCheck;
bool useDefaultTransformation;
bool scalingMode;

static constexpr char originalLayerPostfix[] = "_original";
TransformationContext* context;
Expand Down
2 changes: 1 addition & 1 deletion src/common/low_precision_transformations/src/add.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ bool AddTransformation::transform(TransformationContext& context, ov::pass::patt
newSubtractFullPathValues),
newMultiplyFullPathValues);

auto output_type = useDefaultTransformation ? element::f32 : add->get_output_element_type(0);
auto output_type = scalingMode ? add->get_output_element_type(0) : element::f32;
newAddOrSubtract = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Add>>(
std::vector<element::Type>{output_type, output_type}, std::vector<element::Type>{ output_type },
ov::op::TemporaryReplaceOutputType(inputs[0], output_type).get(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ LayerTransformation::LayerTransformation(const Params& params) :
deqPrecision(params.deqPrecision),
defaultPrecisions(params.defaultPrecisions),
reshapeIgnorePerTensorQuantizationCheck(params.reshapeIgnorePerTensorQuantizationCheck),
useDefaultTransformation(params.useDefaultTransformation),
scalingMode(params.scalingMode),
context(nullptr) {}

void LayerTransformation::setContext(TransformationContext* context) noexcept {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ bool MultiplyPartialTransformation::transform(TransformationContext& context, ov
auto constParent = multiply->input_value(multiplyBranch.first == 0 ? 1 : 0);
auto multiplyParentParent = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second);
auto multiplyParentConst = multiplyParent.get_node_shared_ptr()->input_value(multiplyBranch.second == 0 ? 1 : 0);
auto input_data_type = useDefaultTransformation ? element::f32 : multiply->get_output_element_type(0);
auto input_data_type = scalingMode ? multiply->get_output_element_type(0) : element::f32;

newMultiply = std::make_shared<ov::op::TypeRelaxed<ov::opset1::Multiply>>(
std::vector<ov::element::Type>{ input_data_type, input_data_type },
Expand Down Expand Up @@ -134,17 +134,17 @@ bool MultiplyPartialTransformation::transform(TransformationContext& context, ov


// before: Y = (SC1 * (X1 - SH1)) * (SC2 * X2)
// if useDefaultTransformation = true
// if scalingMode == false
// after : Y = (SC1' * (X1 - SH1)) * (X2) , where :
// SC1' = SC1 * SC2
// else
// after : Y = ((X1 - SH1) * X2) * SC1' , where :
// SC1' = SC1 * SC2
auto newMultiplyValuesFullPath = fold<ov::opset1::Multiply>(multiplyValuesEmptyPath, multiplyValuesFullPath);
OutputVector inputs{ {}, {} };
inputs[emptyPathIndex] = useDefaultTransformation ? dequantizationEmptyPath.data : newMultiplyValuesFullPath;
auto input_for_fullPath = useDefaultTransformation ? newMultiplyValuesFullPath :
dequantizationEmptyPath.data.get_node_shared_ptr();
inputs[emptyPathIndex] = scalingMode ? newMultiplyValuesFullPath : dequantizationEmptyPath.data;
auto input_for_fullPath = scalingMode ? dequantizationEmptyPath.data.get_node_shared_ptr() :
newMultiplyValuesFullPath;

ov::Output<ov::Node> parent0 = dequantizationFullPath.subtract == nullptr ?
(dequantizationFullPath.convert == nullptr ? dequantizationFullPath.data : dequantizationFullPath.convert) :
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ class TRANSFORMATIONS_API ActivationsScaling;
namespace activations_scaling {

class TRANSFORMATIONS_API ScaleDownSingleLayer;
class TRANSFORMATIONS_API EliminateMultiplyScalar;
class TRANSFORMATIONS_API EliminateScalarMul;
class TRANSFORMATIONS_API MulConcatTransformation;
class TRANSFORMATIONS_API MulShareTransformation;
class TRANSFORMATIONS_API MulMulTransformation;
class TRANSFORMATIONS_API MoveDownScalarMul;

} // namespace activations_scaling
} // namespace pass
Expand All @@ -31,32 +31,74 @@ class TRANSFORMATIONS_API MulMulTransformation;
// For example, when this property is set as 16, activations are divided by 16.
// If ov::hint::activations_scale_factor is less than or equal to zero, it is disabled.

// Add scale_down and scale_up layers around Convolution and MatMul nodes
// Conv/MatMul
// ==>
// Multiply(scale_down by scale_factor) --> Conv/MatMul --> Multiply(scale_up by scale_factor)
class ov::pass::activations_scaling::ScaleDownSingleLayer : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("ScaleDownSingleLayer", "0");
ScaleDownSingleLayer(float scale_factor, ov::element::Type scaled_prec);
};

class ov::pass::activations_scaling::EliminateMultiplyScalar : public ov::pass::MatcherPass {
// Normalization and ShapeOf have the following property.
//
// Norm(input * const_a) = Norm(input)
//
// So, we can skip Multiply that is connected to Normalization and ShapeOf.
//
// input --> Multiply --> Normalization/ShapeOf
// ==>
// input --> Normalization/ShapeOf
class ov::pass::activations_scaling::EliminateScalarMul : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("EliminateMultiplyScalar", "0");
EliminateMultiplyScalar();
OPENVINO_MATCHER_PASS_RTTI("EliminateScalarMul", "0");
EliminateScalarMul();
};

// input_a const_a input_b const_b input_c const_c
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// ==>
// (const_a (const_b (const_c
// input_a /const_c) input_b /const_c) input_c /const_c)
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// | const_c
// | /
// Multiply
class ov::pass::activations_scaling::MulConcatTransformation : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MulConcatTransformation", "0");
MulConcatTransformation();
};

// input input
// / \ |
// Norm Mul ==> Mul (expect to be fused into the input layer)
// | | / \_
// op_a op_b Norm op_b
// |
// op_a
class ov::pass::activations_scaling::MulShareTransformation : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MulShareTransformation", "0");
MulShareTransformation();
};

class ov::pass::activations_scaling::MulMulTransformation : public ov::pass::MatcherPass {
// input_b scalar input_a input_b
// \ / \ /
// input_a Mul_b ==> Mul_a' scalar
// \ / \ /
// Mul_a Mul_b' (expect to be merged with Mul_a')
class ov::pass::activations_scaling::MoveDownScalarMul : public ov::pass::MatcherPass {
public:
OPENVINO_MATCHER_PASS_RTTI("MulMulTransformation", "0");
MulMulTransformation();
OPENVINO_MATCHER_PASS_RTTI("MoveDownScalarMul", "0");
MoveDownScalarMul();
};
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ using namespace ov::pass::activations_scaling;
using namespace ov::pass::pattern;
using ov::pass::pattern::op::Or;

// Add scale_down and scale_up layers around Convolution and MatMul nodes
// Conv/MatMul
// ==>
// Multiply(scale_down by scale_factor) --> Conv/MatMul --> Multiply(scale_up by scale_factor)
ov::pass::activations_scaling::ScaleDownSingleLayer::ScaleDownSingleLayer(float scale_factor,
ov::element::Type scaled_prec) {
MATCHER_SCOPE(ScaleDownSingleLayer);
Expand Down Expand Up @@ -184,17 +180,8 @@ ov::pass::activations_scaling::ScaleDownSingleLayer::ScaleDownSingleLayer(float
this->register_matcher(m, callback);
}

// Normalization has the following property.
//
// Norm(input * const_a) = Norm(input)
//
// So, we can skip Multiply that is connected to Normalization.
//
// input --> Multiply --> Normalization
// ==>
// input --> Normalization
ov::pass::activations_scaling::EliminateMultiplyScalar::EliminateMultiplyScalar() {
MATCHER_SCOPE(EliminateMultiplyScalar);
ov::pass::activations_scaling::EliminateScalarMul::EliminateScalarMul() {
MATCHER_SCOPE(EliminateScalarMul);

auto activation_m = any_input(is_non_const_node);
auto convert_m = ov::pass::pattern::optional<ov::op::v0::Convert>(activation_m);
Expand All @@ -221,27 +208,10 @@ ov::pass::activations_scaling::EliminateMultiplyScalar::EliminateMultiplyScalar(
return true;
};

auto m = std::make_shared<ov::pass::pattern::Matcher>(norm_m, "EliminateMultiplyScalar");
auto m = std::make_shared<ov::pass::pattern::Matcher>(norm_m, "EliminateScalarMul");
this->register_matcher(m, callback);
}

// input_a const_a input_b const_b input_c const_c
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// ==>
// (const_a (const_b (const_c
// input_a /const_c) input_b /const_c) input_c /const_c)
// \ / \ / \ /
// Multiply_a Multiply_b Multiply_c
// \ | /
// \ | /
// ---------- Concat ------------
// | const_c
// | /
// Multiply
ov::pass::activations_scaling::MulConcatTransformation::MulConcatTransformation() {
MATCHER_SCOPE(MulConcatTransformation);

Expand Down Expand Up @@ -328,13 +298,6 @@ ov::pass::activations_scaling::MulConcatTransformation::MulConcatTransformation(
this->register_matcher(m, callback);
}

// input input
// / \ |
// Norm Mul ==> Mul (expect to be fused into the input layer)
// | | / \_
// op_a op_b Norm op_b
// |
// op_a
ov::pass::activations_scaling::MulShareTransformation::MulShareTransformation() {
MATCHER_SCOPE(MulShareTransformation);

Expand Down Expand Up @@ -383,13 +346,8 @@ ov::pass::activations_scaling::MulShareTransformation::MulShareTransformation()
this->register_matcher(m, callback);
}

// input_b scalar input_a input_b
// \ / \ /
// input_a Mul_b ==> Mul_a' scalar
// \ / \ /
// Mul_a Mul_b' (expect to be merged with Mul_a')
ov::pass::activations_scaling::MulMulTransformation::MulMulTransformation() {
MATCHER_SCOPE(MulMulTransformation);
ov::pass::activations_scaling::MoveDownScalarMul::MoveDownScalarMul() {
MATCHER_SCOPE(MoveDownScalarMul);

auto activation_b_m = any_input(is_non_const_node);
auto mul_const_m = ov::pass::pattern::wrap_type<ov::op::v0::Constant>(is_scalar_node);
Expand Down Expand Up @@ -429,6 +387,6 @@ ov::pass::activations_scaling::MulMulTransformation::MulMulTransformation() {
return true;
};

auto m = std::make_shared<ov::pass::pattern::Matcher>(mul_a_m, "MulMulTransformation");
auto m = std::make_shared<ov::pass::pattern::Matcher>(mul_a_m, "MoveDownScalarMul");
this->register_matcher(m, callback);
}
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ TEST_F(TransformationTestsF, ScaleDownSingleLayerTest) {
}
}

TEST_F(TransformationTestsF, EliminateMultiplyScalarTest) {
TEST_F(TransformationTestsF, EliminateScalarMulTest) {
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{1, 3, 16, 16});
auto scale_const = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10});
Expand All @@ -83,7 +83,7 @@ TEST_F(TransformationTestsF, EliminateMultiplyScalarTest) {
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input});
manager.register_pass<ov::pass::activations_scaling::EliminateMultiplyScalar>();
manager.register_pass<ov::pass::activations_scaling::EliminateScalarMul>();
}
{
auto input = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{1, 3, 16, 16});
Expand Down Expand Up @@ -130,7 +130,7 @@ TEST_F(TransformationTestsF, ConcatTransformationTest) {
}
}

TEST_F(TransformationTestsF, MulMulTransformationTest) {
TEST_F(TransformationTestsF, MoveDownScalarMulTest) {
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
auto scale_const0 = ov::op::v0::Constant::create(ov::element::f16, ov::Shape{1}, {10});
Expand All @@ -141,7 +141,7 @@ TEST_F(TransformationTestsF, MulMulTransformationTest) {
auto result = std::make_shared<ov::op::v0::Result>(convert);

model = std::make_shared<ov::Model>(ov::ResultVector{result}, ov::ParameterVector{input0, input1});
manager.register_pass<ov::pass::activations_scaling::MulMulTransformation>();
manager.register_pass<ov::pass::activations_scaling::MoveDownScalarMul>();
}
{
auto input0 = std::make_shared<ov::op::v0::Parameter>(ov::element::f16, ov::PartialShape{6, 12, 10, 24});
Expand Down
4 changes: 2 additions & 2 deletions src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -960,9 +960,9 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
// Move down scalar-multiply layers as much as possible
auto params = LayerTransformation::Params(false, infer_precision, {infer_precision}, true, false);
auto lpt_pass = manager.register_pass<LowPrecision>(supportedPrecisions, perTensorQuantization, params);
lpt_pass->add_main<ov::pass::activations_scaling::EliminateMultiplyScalar>();
lpt_pass->add_main<ov::pass::activations_scaling::EliminateScalarMul>();
lpt_pass->add_main<ov::pass::activations_scaling::MulConcatTransformation>();
lpt_pass->add_main<ov::pass::activations_scaling::MulMulTransformation>();
lpt_pass->add_main<ov::pass::activations_scaling::MoveDownScalarMul>();

// Move up remained scalar-multiply layers
manager.register_pass<ov::pass::EliminateEltwise>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,9 +145,9 @@ class ActivationsScaling : public testing::WithParamInterface<ActivationsScaling

auto add12 = std::make_shared<ov::op::v1::Add>(transpose1, transpose2);

auto concat0 = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{transpose0, add12}, 3);
auto concat0 = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{transpose0, add12}, 0);

auto concat1 = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{add12, transpose2}, 3);
auto concat1 = std::make_shared<ov::op::v0::Concat>(ov::NodeVector{add12, transpose2}, 0);

auto add = std::make_shared<ov::op::v1::Add>(concat0, concat1);

Expand Down

0 comments on commit c3d6519

Please sign in to comment.