diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td index 842d35362..b571287c8 100644 --- a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td +++ b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td @@ -651,6 +651,8 @@ class TTIR_ReductionOp traits = []> : return {builder.getAffineMapArrayAttr(indexingMaps), builder.getArrayAttr(iteratorTypes)};} }]; + + let hasVerifier = 1; } def TTIR_SumOp : TTIR_ReductionOp<"sum"> { diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td index b91595947..0d1d235bb 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td @@ -581,6 +581,8 @@ class TTNN_ReductionOp traits = []> : TTNN_Op:$dim_arg); let results = (outs AnyRankedTensor:$result); + + let hasVerifier = 1; } def TTNN_SumOp : TTNN_ReductionOp<"sum"> { diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td index 94d05eadc..8d20a2bcc 100644 --- a/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td +++ b/include/ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.td @@ -145,6 +145,7 @@ def TTNN_TTNNLayoutAttr: TTNN_Attr<"TTNNLayout", "ttnn_layout"> { TTNNLayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayoutAttr memLayoutAttr); TTNNLayoutAttr withMemoryLayout(::mlir::MLIRContext *context, TensorMemoryLayout memLayout); TTNNLayoutAttr withShardShape(::mlir::MLIRContext *context, llvm::SmallVector shardShape); + TTNNLayoutAttr withTensorShape(::mlir::MLIRContext *context, ArrayRef tensorShape); bool isSystemBufferType() const { return ::mlir::tt::ttnn::isSystemBufferType(getBufferType()); } bool isDeviceBufferType() const { return ::mlir::tt::ttnn::isDeviceBufferType(getBufferType()); } diff --git a/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h b/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h new file mode 100644 index 000000000..741fbfc06 --- /dev/null +++ b/include/ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h @@ -0,0 +1,140 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#ifndef TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H +#define TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H + +#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" + +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/PatternMatch.h" +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" + +namespace mlir::tt::ttnn::workarounds::decomposition { + +// Extracts reduce dimensions' values from the dimArg attribute. In case when +// dimArg is not specified, returns empty vector. +llvm::SmallVector +getReduceDims(const std::optional &dimArg); + +// Calculates the shape of the new Reduce op created in the workaround, based +// on the input shape and reducing dimensions. +llvm::SmallVector +calculateNewReduceShape(RankedTensorType inputType, + const std::optional &dimArg); + +// This workaround addresses the next Metal issue: +// https://github.com/tenstorrent/tt-metal/issues/13361 +// +// TODO(mrakita): Remove this workaround once these Metal issues are fixed +// (tracked by https://github.com/tenstorrent/tt-mlir/issues/1624). +// +template +class ReduceOpsKeepDimRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ReduceOp srcOp, + PatternRewriter &rewriter) const override { + if (srcOp.getKeepDim()) { + return failure(); + } + + RankedTensorType inputType = srcOp.getInput().getType(); + RankedTensorType outputType = srcOp.getResult().getType(); + + ReduceOp newReduceOp = + createReduceOpWithKeepDim(srcOp, rewriter, inputType, outputType); + + // Metal TTNN implementation of Reduce ops doesn't yet support + // keepDim=false. As a workaround, we convert Reduce ops to combination of + // Reduce op with keepDim=true + Reshape op to remove the reduce dims so + // that the rest of the graph is not affected. In case when this is not + // needed (for example because type converters already promoted rank of the + // op result) then we avoid adding unnecessary Reshape op. + if (outputType.getShape().size() < inputType.getShape().size()) { + replaceOpWithReshapeOp(srcOp, newReduceOp, rewriter, outputType); + } else { + rewriter.replaceOp(srcOp, newReduceOp); + } + + return success(); + } + +private: + ReduceOp createReduceOpWithKeepDim(ReduceOp srcOp, PatternRewriter &rewriter, + RankedTensorType inputType, + RankedTensorType outputType) const { + llvm::SmallVector outputShapeVec = + calculateNewReduceShape(inputType, srcOp.getDimArg()); + + TTNNLayoutAttr newOutputLayoutAttr = + mlir::cast(outputType.getEncoding()) + .withTensorShape(rewriter.getContext(), outputShapeVec); + + RankedTensorType newOutputType = RankedTensorType::get( + outputShapeVec, outputType.getElementType(), newOutputLayoutAttr); + + return rewriter.create(srcOp.getLoc(), newOutputType, + srcOp.getInput(), true /*keep_dim*/, + srcOp.getDimArg().value_or(nullptr)); + } + + void replaceOpWithReshapeOp(ReduceOp srcOp, ReduceOp newReduceOp, + PatternRewriter &rewriter, + RankedTensorType outputType) const { + mlir::ArrayAttr shapeAttr = rewriter.getI32ArrayAttr( + llvm::SmallVector(outputType.getShape())); + + rewriter.replaceOpWithNewOp( + srcOp, outputType, newReduceOp, shapeAttr); + } +}; + +// This workaround addresses the next Metal issue: +// https://github.com/tenstorrent/tt-metal/issues/16118 +// +// TODO(mrakita): Remove this workaround once these Metal issues are fixed +// (tracked by https://github.com/tenstorrent/tt-mlir/issues/1624). +// +template +class ReduceOpsAllDimsRewritePattern : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ReduceOp srcOp, + PatternRewriter &rewriter) const override { + if (!srcOp.getDimArg() || srcOp.getDimArg()->empty()) { + return failure(); + } + + llvm::SmallVector reduceDims = getReduceDims(srcOp.getDimArg()); + llvm::SmallSet uniqueReduceDims(reduceDims.begin(), + reduceDims.end()); + + // Check if reduce is done over all dimensions of the input tensor. + if (uniqueReduceDims.size() != + srcOp.getInput().getType().getShape().size()) { + return failure(); + } + + // In case when reduce is done over all dimensions of the input we need to + // unset the dimensions attribute, because Metal supports reduce over all + // dimensions for any tensor rank when reduce dimensions are not specified, + // but it doesn't support reduce for tensors with rank larger than 2 when + // reduce dimensions are specified. + rewriter.replaceOpWithNewOp(srcOp, srcOp.getResult().getType(), + srcOp.getInput(), srcOp.getKeepDim(), + nullptr); + + return success(); + } +}; + +} // namespace mlir::tt::ttnn::workarounds::decomposition + +#endif // TTMLIR_DIALECT_TTNN_TRANSFORMS_WORKAROUNDS_DECOMPOSITION_REDUCEOPSREWRITEPATTERN_H diff --git a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp index 4eeec92dc..cdab2a4b7 100644 --- a/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp +++ b/lib/Conversion/StableHLOToTTIR/StableHLOToTTIRPatterns.cpp @@ -116,10 +116,9 @@ class StableHLOToTTIRReduceOpConversionPattern tensor::EmptyOp outputTensor = rewriter.create( srcOp.getLoc(), outputType.getShape(), outputType.getElementType()); - mlir::ArrayAttr dimArg = rewriter.getArrayAttr(SmallVector( - 1, rewriter.getI32IntegerAttr(adaptor.getDimensionsAttr().size() > 0 - ? adaptor.getDimensionsAttr()[0] - : 1))); + // Can't reuse the original dimensions attribute because it uses i64 type. + mlir::ArrayAttr dimArg = rewriter.getI32ArrayAttr( + llvm::SmallVector(srcOp.getDimensions())); rewriter.replaceOpWithNewOp( srcOp, outputType, adaptor.getInputs().front(), outputTensor, diff --git a/lib/Dialect/TTIR/IR/TTIROps.cpp b/lib/Dialect/TTIR/IR/TTIROps.cpp index 52e68b811..83bb98baa 100644 --- a/lib/Dialect/TTIR/IR/TTIROps.cpp +++ b/lib/Dialect/TTIR/IR/TTIROps.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/Location.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/LogicalResult.h" @@ -1672,32 +1673,32 @@ static void buildGenericEltwiseUnaryRegion(::mlir::Location loc, opBuilder.create(loc, mlir::ValueRange({result})); } -// AddOp generic region builder +// AddOp generic region builder. void mlir::tt::ttir::AddOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// MultiplyOp generic region builder +// MultiplyOp generic region builder. void mlir::tt::ttir::MultiplyOp::buildGenericRegion( ::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// ExpOp generic region builder +// ExpOp generic region builder. void mlir::tt::ttir::ExpOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseUnaryRegion(getLoc(), opBuilder, block); } -// DivOp generic region builder +// DivOp generic region builder. void mlir::tt::ttir::DivOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { return buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, block); } -// MaximumOp generic region builder +// MaximumOp generic region builder. void mlir::tt::ttir::MaximumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { buildGenericEltwiseBinaryRegion(getLoc(), opBuilder, @@ -1708,7 +1709,7 @@ void mlir::tt::ttir::MaximumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, // KernelOp //===----------------------------------------------------------------------===// -// KernelOp builders +// KernelOp builders. static mlir::tt::ttir::KernelOp buildKernelOp(::mlir::OpBuilder &opBuilder, ::mlir::Location loc, ::mlir::StringRef kernelName, ::mlir::StringRef kernelKind, @@ -1717,7 +1718,7 @@ buildKernelOp(::mlir::OpBuilder &opBuilder, ::mlir::Location loc, loc, outputs.getTypes(), kernelName, kernelKind, inputs, outputs); } -// Reduce op kernel builder +// Reduce op kernel builder. static void createReduceOp(::mlir::OpBuilder &opBuilder, ::mlir::Block *block, mlir::Location loc, ::mlir::StringRef kernelKind) { auto kernelOp = buildKernelOp(opBuilder, loc, "reduce", kernelKind, @@ -1725,23 +1726,81 @@ static void createReduceOp(::mlir::OpBuilder &opBuilder, ::mlir::Block *block, opBuilder.create(loc, kernelOp->getResults()); } -// Sum op kernel builder -void mlir::tt::ttir::SumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, +// Common verifier for all Reduce ops. +static mlir::LogicalResult +verifyReduceOp(mlir::Operation *reduceOp, mlir::RankedTensorType inputType, + const std::optional &reduceDims) { + if (!reduceDims) { + return mlir::success(); + } + + int64_t inputTensorRank = inputType.getRank(); + + llvm::SmallSet uniqueReduceDims; + for (mlir::Attribute reduceDim : *reduceDims) { + int64_t reduceDimInt = mlir::cast(reduceDim).getInt(); + if (reduceDimInt < -inputTensorRank || reduceDimInt >= inputTensorRank) { + return reduceOp->emitOpError("Reduce dimensions are out of range"); + } + uniqueReduceDims.insert(reduceDimInt); + } + + if (uniqueReduceDims.size() != reduceDims->size()) { + return reduceOp->emitOpError("Reduce dimensions are not unique"); + } + + // TODO(mrakita): Add a check that depending on inputShape, reduceDims and + // keepDim computes the expected output shape and checks if it matches the + // actual output shape. Tracked by: + // https://github.com/tenstorrent/tt-mlir/issues/1639 + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +// MaxOp kernel builder. +void mlir::tt::ttir::MaxOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE - createReduceOp(opBuilder, block, getLoc(), "sum"); + createReduceOp(opBuilder, block, getLoc(), "max"); +} + +// MaxOp verification. +::mlir::LogicalResult mlir::tt::ttir::MaxOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); } -// Mean op kernel builder +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +// MeanOp kernel builder. void mlir::tt::ttir::MeanOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE createReduceOp(opBuilder, block, getLoc(), "mean"); } -// Max op kernel builder -void mlir::tt::ttir::MaxOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, +// MeanOp verification. +::mlir::LogicalResult mlir::tt::ttir::MeanOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// SumOp +//===----------------------------------------------------------------------===// + +// SumOp kernel builder. +void mlir::tt::ttir::SumOp::buildGenericRegion(::mlir::OpBuilder &opBuilder, ::mlir::Block *block) { // NOLINTNEXTLINE - createReduceOp(opBuilder, block, getLoc(), "max"); + createReduceOp(opBuilder, block, getLoc(), "sum"); +} + +// SumOp verification. +::mlir::LogicalResult mlir::tt::ttir::SumOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); } diff --git a/lib/Dialect/TTNN/IR/TTNNOps.cpp b/lib/Dialect/TTNN/IR/TTNNOps.cpp index e3fc5a33c..286393858 100644 --- a/lib/Dialect/TTNN/IR/TTNNOps.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOps.cpp @@ -1310,4 +1310,52 @@ ::mlir::LogicalResult mlir::tt::ttnn::PermuteOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// Reduction ops +//===----------------------------------------------------------------------===// + +// Common verifier for all Reduction ops. +static mlir::LogicalResult +verifyReduceOp(mlir::Operation *reduceOp, mlir::RankedTensorType inputType, + const std::optional &reduceDims) { + int64_t inputTensorRank = inputType.getRank(); + + // TODO(mrakita): Only last two dimensions can be reduced, check for that + // too. + if (reduceDims && reduceDims->size() > 2 && + static_cast(reduceDims->size()) != inputTensorRank) { + return reduceOp->emitOpError("Reduce on more than two dimensions is not " + "currently supported by TTNN"); + } + + return mlir::success(); +} + +//===----------------------------------------------------------------------===// +// MaxOp +//===----------------------------------------------------------------------===// + +// MaxOp verification. +::mlir::LogicalResult MaxOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// MeanOp +//===----------------------------------------------------------------------===// + +// MeanOp verification. +::mlir::LogicalResult MeanOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + +//===----------------------------------------------------------------------===// +// SumOp +//===----------------------------------------------------------------------===// + +// SumOp verification. +::mlir::LogicalResult SumOp::verify() { + return verifyReduceOp(getOperation(), getInput().getType(), getDimArg()); +} + } // namespace mlir::tt::ttnn diff --git a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp index c7bf769dd..d16a74822 100644 --- a/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp +++ b/lib/Dialect/TTNN/IR/TTNNOpsAttrs.cpp @@ -494,6 +494,24 @@ TTNNLayoutAttr::withShardShape(::mlir::MLIRContext *context, getMemLayout()); } +// Construct a new TTNNLayoutAttr +// +// This function creates a deep copy of the current TTNNLayoutAttr and +// applies changes necessary to fit new tensor shape. +// +// param context The MLIR context. +// param tensorShape The new tensor shape. +// return The new TTNNLayoutAttr with the given tensor shape. +TTNNLayoutAttr TTNNLayoutAttr::withTensorShape(::mlir::MLIRContext *context, + ArrayRef tensorShape) { + // TODO(mrakita): This leaves default value of collapseIntervals parameter, + // which might be different than the original value used to create the layout + // attribute. This will work for now since we always use default value, but in + // the future we would need to take this into account. + return TTNNLayoutAttr::get(context, tensorShape, getElementType(), + getBufferType(), getGrid(), getMemLayout()); +} + // Construct a new TTNNLayoutAttr // // This function constructs a new TTNNLayoutAttr with the given parameters. diff --git a/lib/Dialect/TTNN/Transforms/CMakeLists.txt b/lib/Dialect/TTNN/Transforms/CMakeLists.txt index fd21e03d0..1aae802c6 100644 --- a/lib/Dialect/TTNN/Transforms/CMakeLists.txt +++ b/lib/Dialect/TTNN/Transforms/CMakeLists.txt @@ -3,7 +3,8 @@ add_mlir_dialect_library(MLIRTTNNTransforms Passes.cpp TTNNLayout.cpp TTNNToCpp.cpp - TTNNWorkarounds.cpp + Workarounds/Decomposition/ReduceOpsRewritePattern.cpp + Workarounds/TTNNWorkarounds.cpp ADDITIONAL_HEADER_DIRS ${PROJECT_SOURCE_DIR}/include/ttmlir diff --git a/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp b/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp new file mode 100644 index 000000000..99b61ef0b --- /dev/null +++ b/lib/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.cpp @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC +// +// SPDX-License-Identifier: Apache-2.0 + +#include "ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h" + +#include + +namespace mlir::tt::ttnn::workarounds::decomposition { + +llvm::SmallVector +getReduceDims(const std::optional &dimArg) { + llvm::SmallVector reduceDims; + if (!dimArg) { + return reduceDims; + } + + for (const mlir::Attribute &reduceDim : *dimArg) { + reduceDims.push_back(mlir::cast(reduceDim).getInt()); + } + + return reduceDims; +} + +llvm::SmallVector +calculateNewReduceShape(RankedTensorType inputType, + const std::optional &dimArg) { + llvm::SmallVector outputShapeVec(inputType.getShape()); + llvm::SmallVector reduceDims = getReduceDims(dimArg); + + if (reduceDims.empty()) { + // When reduce dimensions are not specified that means we are reducing over + // all dimensions, so all dimensions of the output shape become 1. + std::fill(outputShapeVec.begin(), outputShapeVec.end(), 1); + } else { + // Dimensions can be specified as negative numbers, so to calculate the + // index in the output shape vector we need to sum them with the output + // shape rank. + int64_t outputShapeRank = static_cast(outputShapeVec.size()); + for (const int64_t reduceDim : reduceDims) { + int64_t outputShapeIndex = + reduceDim < 0 ? outputShapeRank + reduceDim : reduceDim; + outputShapeVec[static_cast(outputShapeIndex)] = 1; + } + } + + return outputShapeVec; +} + +} // namespace mlir::tt::ttnn::workarounds::decomposition diff --git a/lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp b/lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp similarity index 88% rename from lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp rename to lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp index 2c0c48dbc..eed6af498 100644 --- a/lib/Dialect/TTNN/Transforms/TTNNWorkarounds.cpp +++ b/lib/Dialect/TTNN/Transforms/Workarounds/TTNNWorkarounds.cpp @@ -8,6 +8,7 @@ #include "ttmlir/Dialect/TTNN/IR/TTNNOps.h" #include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h" #include "ttmlir/Dialect/TTNN/IR/TTNNWorkarounds.h" +#include "ttmlir/Dialect/TTNN/Transforms/Workarounds/Decomposition/ReduceOpsRewritePattern.h" #include "ttmlir/Dialect/TTNN/Types/Types.h" #include "ttmlir/Dialect/TTNN/Utils/TransformUtils.h" #include "ttmlir/Dialect/TTNN/Utils/Utils.h" @@ -399,44 +400,55 @@ class TTNNWorkarounds : public impl::TTNNWorkaroundsBase { void runOnOperation() final { if (decompositionWorkaroundsEnabled) { - // Placeholder for workaround decomposition patterns. RewritePatternSet patterns(&getContext()); - patterns.add(&getContext()); - - FrozenRewritePatternSet patternSet(std::move(patterns)); - GreedyRewriteConfig config = GreedyRewriteConfig(); - config.useTopDownTraversal = true; - config.maxIterations = GreedyRewriteConfig::kNoLimit; - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet, - config))) { - signalPassFailure(); - return; - } + patterns.add, + workarounds::decomposition::ReduceOpsKeepDimRewritePattern< + ttnn::MaxOp>, + workarounds::decomposition::ReduceOpsKeepDimRewritePattern< + ttnn::MeanOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::SumOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::MaxOp>, + workarounds::decomposition::ReduceOpsAllDimsRewritePattern< + ttnn::MeanOp>>(&getContext()); + + runRewritePatterns(std::move(patterns), + GreedyRewriteConfig::kNoLimit /*maxIterations*/); } if (layouotWorkaroundsEnabled) { RewritePatternSet patterns(&getContext()); patterns.add(&getContext()); - FrozenRewritePatternSet patternSet(std::move(patterns)); - GreedyRewriteConfig config = GreedyRewriteConfig(); - // This configuration specifies that the rewriter should traverse the IR - // in a top-down order. - config.useTopDownTraversal = true; - // This configuration specifies the maximum number of iterations the - // rewriter will perform on the IR. The rewriter will iterate through the - // IR until a fixpoint is reached. All workarounds should be applied - // during the first iteration. If the workarounds are not applied in the - // first iteration, it indicates a bug in the workarounds implementation. - // Although the workarounds are applied in the first iteration, the - // rewriter must iterate through the IR once more to confirm that the - // fixpoint is reached. If the fixpoint is not reached in the second - // iteration, it indicates a bug in the workarounds implementation. - config.maxIterations = 2; - if (failed(applyPatternsAndFoldGreedily(getOperation(), patternSet, - config))) { - signalPassFailure(); - return; - } + // All layout workarounds should be applied during the first iteration. If + // the workarounds are not applied in the first iteration, it indicates a + // bug in the workarounds implementation. Although the workarounds are + // applied in the first iteration, the rewriter must iterate through the + // IR once more to confirm that the fixpoint is reached. If the fixpoint + // is not reached in the second iteration, it indicates a bug in the + // workarounds implementation. + const int64_t maxIterations = 2; + runRewritePatterns(std::move(patterns), maxIterations); + } + } + +private: + // Runs rewrite patterns with specified maximum number of iterations the + // rewriter will perform on the IR. The rewriter will iterate through the IR + // until a fixpoint is reached. + void runRewritePatterns(RewritePatternSet &&patterns, int64_t maxIterations) { + FrozenRewritePatternSet patternSet(std::move(patterns)); + GreedyRewriteConfig config = GreedyRewriteConfig(); + config.maxIterations = maxIterations; + // This configuration specifies that the rewriter should traverse the IR + // in a top-down order. + config.useTopDownTraversal = true; + if (failed( + applyPatternsAndFoldGreedily(getOperation(), patternSet, config))) { + signalPassFailure(); + return; } } }; diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir index b7058b3de..66f3ce4e1 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_add_op.mlir @@ -1,10 +1,113 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s module @jit_reduce_add attributes {} { - func.func public @test_reduce_add(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + func.func public @test_reduce_add_4to3dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32x4xf32> + return %0 : tensor<128x32x4xf32> + } + + func.func public @test_reduce_add_4to2dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32xf32> + return %0 : tensor<128x32xf32> + } + + func.func public @test_reduce_add_4to1dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_add_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<128xf32> %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> - // CHECK: %[[C:.*]] = tensor.empty[[C:.*]] - // CHECK: %[[C:.*]] = "ttir.sum"[[C:.*]] return %0 : tensor<128xf32> } + + func.func public @test_reduce_add_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.sum" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir index ea03c5766..81fb59bfb 100644 --- a/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir +++ b/test/ttmlir/Conversion/StableHLOToTTIR/reduce_maximum_op.mlir @@ -1,10 +1,113 @@ // REQUIRES: stablehlo // RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | FileCheck %s module @jit_reduce_maximum attributes {} { - func.func public @test_reduce_maximum(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + func.func public @test_reduce_maximum_4to3dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32x4xf32> + return %0 : tensor<128x32x4xf32> + } + + func.func public @test_reduce_maximum_4to2dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128x32xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128x32xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128x32xf32> + return %0 : tensor<128x32xf32> + } + + func.func public @test_reduce_maximum_4to1dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32, 3 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x32x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128x4xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_maximum_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<128xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10x4xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<128xf32> %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> - // CHECK: %[[C:.*]] = tensor.empty[[C:.*]] - // CHECK: %[[C:.*]] = "ttir.max"[[C:.*]] return %0 : tensor<128xf32> } + + func.func public @test_reduce_maximum_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32, 1 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128x10xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: tensor.empty + // CHECK: "ttir.max" + // CHECK-SAME: dim_arg = [0 : i32] + // CHECK-SAME: keep_dim = false + // CHECK-SAME: tensor<128xf32> + // CHECK-SAME: -> tensor<1xf32> + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir new file mode 100644 index 000000000..565745d05 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_high.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are out of range +func.func public @test_reduce_add_invalid_dim_high(%arg0: tensor<128x10xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [2 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir new file mode 100644 index 000000000..bd4a237d4 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_invalid_dim_low.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are out of range +func.func public @test_reduce_add_invalid_dim_low(%arg0: tensor<128x10xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [-3 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir new file mode 100644 index 000000000..13649e1e6 --- /dev/null +++ b/test/ttmlir/Dialect/TTIR/reduce_ops/negative_repeating_dims.mlir @@ -0,0 +1,9 @@ +// RUN: not ttmlir-opt --split-input-file %s 2>&1 | FileCheck %s +// Negative tests for reduce ops + +// CHECK: error: 'ttir.sum' op Reduce dimensions are not unique +func.func public @test_reduce_add_repeating_dims(%arg0: tensor<128x10x32x4xf32>, %arg1: tensor<1xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32, 2 : i32, 3 : i32, 2 : i32], keep_dim = false}> : (tensor<128x10x32x4xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir new file mode 100644 index 000000000..ac587303e --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/max_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Max op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.max' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir new file mode 100644 index 000000000..768b220bb --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/mean_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Mean op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.mean' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir b/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir new file mode 100644 index 000000000..c0c634f05 --- /dev/null +++ b/test/ttmlir/Dialect/TTNN/reduction/sum_op_negative.mlir @@ -0,0 +1,10 @@ +// RUN: not ttmlir-opt --split-input-file --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s 2>&1 | FileCheck %s +// Negative tests for Sum op. +module { + func.func @forward(%arg0: tensor<128x32x10x4xbf16>) -> tensor<128x1x1x1xbf16> { + %0 = tensor.empty() : tensor<128x1x1x1xbf16> + // CHECK: error: 'ttnn.sum' op Reduce on more than two dimensions is not currently supported by TTNN + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1: i32, 2: i32, 3: i32], keep_dim = true}> : (tensor<128x32x10x4xbf16>, tensor<128x1x1x1xbf16>) -> tensor<128x1x1x1xbf16> + return %1 : tensor<128x1x1x1xbf16> + } +} diff --git a/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir b/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir index 9da138bbb..89f51123e 100644 --- a/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir +++ b/test/ttmlir/Silicon/StableHLO/reduce_add_op.mlir @@ -1,22 +1,108 @@ // REQUIRES: stablehlo // RUN: rm -rf %t.ttnn // RUN: rm -rf %t.mlir -// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir -// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir // RUN: FileCheck --input-file=%t.mlir %s +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true -// error: keepdim=False is not supported +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 module @jit_reduce_add attributes {} { - func.func public @test_reduce_add(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { - // CHECK-LABEL: func.func public @test_reduce_add - // CHECK: ttnn.sum - // CHECK-SAME: dim_arg = [1 : i32], - // CHECK-SAME: keep_dim = false - // CHECK-SAME: tensor<128x10xf32 + func.func public @test_reduce_add_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x32x4xf32, + // CHECK-SAME: -> tensor<1x1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x4xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32, 4 : i32] + // CHECK-SAME: tensor<128x1x4xf32, + // CHECK-SAME: -> tensor<128x4xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_add_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_add_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, // CHECK-SAME: -> tensor<128xf32, %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> return %0 : tensor<128xf32> } + + func.func public @test_reduce_add_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_add_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.sum" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128xf32, + // CHECK-SAME: -> tensor<1xf32, + // CHECK-NOT: "ttnn.reshape" + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.add across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir b/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir index 57318948e..8ee57fd52 100644 --- a/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir +++ b/test/ttmlir/Silicon/StableHLO/reduce_maximum_op.mlir @@ -1,22 +1,108 @@ // REQUIRES: stablehlo // RUN: rm -rf %t.ttnn // RUN: rm -rf %t.mlir -// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline %s | \ -// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir -// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// RUN: ttmlir-opt --stablehlo-to-ttir-pipeline --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" > %t.mlir // RUN: FileCheck --input-file=%t.mlir %s +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true -// error: keepdim=False is not supported +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 module @jit_reduce_maximum attributes {} { - func.func public @test_reduce_maximum(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { - // CHECK-LABEL: func.func public @test_reduce_maximum - // CHECK: ttnn.max - // CHECK-SAME: dim_arg = [1 : i32], - // CHECK-SAME: keep_dim = false} + func.func public @test_reduce_maximum_4to0dim(%arg0: tensor<128x10x32x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x32x4xf32, + // CHECK-SAME: -> tensor<1x1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2, 3] : (tensor<128x10x32x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_3to2dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128x4xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x4xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32, 4 : i32] + // CHECK-SAME: tensor<128x1x4xf32, + // CHECK-SAME: -> tensor<128x4xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10x4xf32>, tensor) -> tensor<128x4xf32> + return %0 : tensor<128x4xf32> + } + + func.func public @test_reduce_maximum_3to1dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32, 2 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<128x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor<128xf32> + return %0 : tensor<128xf32> + } + + func.func public @test_reduce_maximum_3to0dim(%arg0: tensor<128x10x4xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10x4xf32, + // CHECK-SAME: -> tensor<1x1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1, 2] : (tensor<128x10x4xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_2to1dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor<128xf32> { + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true // CHECK-SAME: tensor<128x10xf32, - // CHECK-SAME: -> tensor<128xf32 + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [1] : (tensor<128x10xf32>, tensor) -> tensor<128xf32> return %0 : tensor<128xf32> } + + func.func public @test_reduce_maximum_2to0dim(%arg0: tensor<128x10xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<1x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [1 : i32] + // CHECK-SAME: tensor<1x1xf32, + // CHECK-SAME: -> tensor<1xf32, + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0, 1] : (tensor<128x10xf32>, tensor) -> tensor + return %0 : tensor + } + + func.func public @test_reduce_maximum_1to0dim(%arg0: tensor<128xf32>, %cst_0: tensor) -> tensor { + // CHECK: "ttnn.max" + // CHECK-NOT: dim_arg + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128xf32, + // CHECK-SAME: -> tensor<1xf32, + // CHECK-NOT: "ttnn.reshape" + %0 = stablehlo.reduce(%arg0 init: %cst_0) applies stablehlo.maximum across dimensions = [0] : (tensor<128xf32>, tensor) -> tensor + return %0 : tensor + } } diff --git a/test/ttmlir/Silicon/TTNN/simple_max.mlir b/test/ttmlir/Silicon/TTNN/simple_max.mlir new file mode 100644 index 000000000..8ec3bdc59 --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/simple_max.mlir @@ -0,0 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + +module { + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.max" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.max"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> + } +} diff --git a/test/ttmlir/Silicon/TTNN/simple_mean.mlir b/test/ttmlir/Silicon/TTNN/simple_mean.mlir index 0a3250936..476dcd9ab 100644 --- a/test/ttmlir/Silicon/TTNN/simple_mean.mlir +++ b/test/ttmlir/Silicon/TTNN/simple_mean.mlir @@ -1,12 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir // RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir // RUN: FileCheck %s --input-file=%t.mlir // RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn // UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + module { - func.func @forward(%arg0: tensor<512x1024xbf16>) -> tensor<512x32xbf16> { - %0 = tensor.empty() : tensor<512x32xbf16> - // CHECK: %[[C:.*]] = "ttnn.mean"[[C:.*]] - %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [-1: i32], keep_dim = true}> : (tensor<512x1024xbf16>, tensor<512x32xbf16>) -> tensor<512x32xbf16> - return %1 : tensor<512x32xbf16> + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.mean" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.mean" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.mean"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> } } diff --git a/test/ttmlir/Silicon/TTNN/simple_sum.mlir b/test/ttmlir/Silicon/TTNN/simple_sum.mlir new file mode 100644 index 000000000..cb1904a34 --- /dev/null +++ b/test/ttmlir/Silicon/TTNN/simple_sum.mlir @@ -0,0 +1,39 @@ +// RUN: rm -rf %t.ttnn +// RUN: rm -rf %t.mlir +// RUN: ttmlir-opt --ttir-to-ttnn-backend-pipeline="system-desc-path=%system_desc_path%" %s > %t.mlir +// RUN: FileCheck %s --input-file=%t.mlir +// RUN: ttmlir-translate --ttnn-to-flatbuffer %t.mlir > %t.ttnn +// UNSUPPORTED: true +// These tests are currently failing until a fix for this issue is uplifted +// with new version of Metal: https://github.com/tenstorrent/tt-metal/issues/16104 +// TODO(mrakita): Enable and edit these tests after the Metal issue is fixed. +// Tracked by: https://github.com/tenstorrent/tt-mlir/issues/1640 + +module { + func.func public @reduce_not_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128xf32> { + %0 = tensor.empty() : tensor<128xf32> + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK: "ttnn.reshape" + // CHECK-SAME: shape = [128 : i32] + // CHECK-SAME: tensor<128x1xf32, + // CHECK-SAME: -> tensor<128xf32, + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = false}> : (tensor<128x10xf32>, tensor<128xf32>) -> tensor<128xf32> + return %1 : tensor<128xf32> + } + + func.func public @reduce_keep_dim(%arg0: tensor<128x10xf32>) -> tensor<128x1xf32> { + %0 = tensor.empty() : tensor<128x1xf32> + // CHECK: "ttnn.sum" + // CHECK-SAME: dim_arg = [1 : i32] + // CHECK-SAME: keep_dim = true + // CHECK-SAME: tensor<128x10xf32, + // CHECK-SAME: -> tensor<128x1xf32, + // CHECK-NOT: "ttnn.reshape" + %1 = "ttir.sum"(%arg0, %0) <{dim_arg = [1 : i32], keep_dim = true}> : (tensor<128x10xf32>, tensor<128x1xf32>) -> tensor<128x1xf32> + return %1 : tensor<128x1xf32> + } +}