Add support for conv_transpose2d operation

tenstorrent · Dec 20, 2024 · 7b36217 · 7b36217
1 parent 1ce54d5
commit 7b36217
Show file tree

Hide file tree

Showing 18 changed files with 1,162 additions and 0 deletions.
diff --git a/include/ttmlir/Dialect/TTIR/IR/TTIROps.td b/include/ttmlir/Dialect/TTIR/IR/TTIROps.td
@@ -856,6 +856,58 @@ def TTIR_Conv2dOp : TTIR_DPSOp<"conv2d"> {
     let hasVerifier = 1;
 }
 
+def TTIR_ConvTranspose2dOp : TTIR_DPSOp<"conv_transpose2d"> {
+    let summary = "ConvTranspose2d operation.";
+    let description = [{
+      Applies a 2D transposed convolution operator over an input image composed of several input planes.
+
+      Inputs:
+      - `input` AnyRankedTensor: NHWC format (batch_size x height x width x channels)
+      - `weight` AnyRankedTensor: OIHW format (output_channels x input_channels x height x width)
+      - `bias` Optional<AnyRankedTensor>: (1 x 1 x 1 x output_channels)
+      - `output` AnyRankedTensor: NHWC format (batch_size x height x width x channels)
+
+      Attributes:
+      - `stride` (i32 | array<i32>): Controls the stride for the cross-correlation.
+      - `padding` (i32 | array<i32>): Controls the amount of implicit zero padding on both sides for dilation * (kernel_size - 1) - padding number of points.
+      - `output_padding` (i32 | array<i32>): Controls the additional size added to one side of the output shape.
+      - `dilation` (i32 | array<i32>): Controls the spacing between the kernel points
+      - `groups` i32: Controls the connections between inputs and outputs. Must be divisible by input and output channels.
+
+      Example:
+        %input = tensor.empty() : () -> tensor<256x256x3x3xbf16>
+        %weight = tensor.empty() : () -> tensor<256x256x3x3xbf16>
+        %bias = tensor.empty() : () -> tensor<1x1x1x256xbf16>
+        %output = tensor.empty() : () -> tensor<1x10x10x256xbf16>
+        %0 = "ttir.conv_transpose2d"(%input, %weight, %bias, %output)
+          <{
+            stride = = array<i32: 1, 1>,
+            padding = 0: i32,
+            output_padding = 0: i32,
+            dilation = 1: i32,
+            groups = 1: i32
+          > : (tensor<1x8x8x256xbf16>, tensor<256x256x3x3xbf16>, tensor<1x1x1x256xbf16>, tensor<1x10x10x256xbf16>) -> tensor<1x10x10x256xbf16>
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$weight,
+                         Optional<AnyRankedTensor>:$bias,
+                         AnyRankedTensor:$output,
+                         AnyAttrOf<[I32Attr, DenseI32ArrayAttr]>:$stride,
+                         AnyAttrOf<[I32Attr, DenseI32ArrayAttr]>:$padding,
+                         AnyAttrOf<[I32Attr, DenseI32ArrayAttr]>:$output_padding,
+                         AnyAttrOf<[I32Attr, DenseI32ArrayAttr]>:$dilation,
+                         I32Attr:$groups);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
+
 def TTIR_ConvolutionOp : TTIR_DPSOp<"convolution"> {
   let summary = "Generalized convolution op.";
   let description = [{

diff --git a/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td b/include/ttmlir/Dialect/TTNN/IR/TTNNOps.td
@@ -834,6 +834,57 @@ def TTNN_Conv2dOp : TTNN_NamedDPSOp<"conv2d"> {
     let hasVerifier = 1;
 }
 
+def TTNN_ConvTranspose2dOp : TTNN_NamedDPSOp<"conv_transpose2d"> {
+    let summary = "ConvTranspose2d operation.";
+    let description = [{
+      Applies a 2D transposed convolution operator over an input image composed of several input planes.
+
+      Inputs:
+      - `input` AnyRankedTensor: NHWC format (batch_size x height x width x channels)
+      - `weight` AnyRankedTensor: OIHW format (output_channels x input_channels x height x width)
+      - `bias` Optional<AnyRankedTensor>: (1 x 1 x 1 x output_channels)
+      - `output` AnyRankedTensor: (1 x 1 x (batch_size * height * width) x channels)
+
+      Attributes:
+      - `in_channels` i32: The number of input channels.
+      - `out_channels` i32: The number of output channels.
+      - `batch_size` i32: The batch size.
+      - `input_height` i32: The input height.
+      - `input_width` i32: The input width.
+      - `kernel_size` array<i32>: The kernel size.
+      - `stride` array<i32>: Controls the stride for the cross-correlation.
+      - `padding` array<i32>: Controls the amount of implicit zero padding on both sides for dilation * (kernel_size - 1) - padding number of points.
+      - `output_padding` array<i32>: Controls the additional size added to one side of the output shape.
+      - `dilation` array<i32>: Controls the spacing between the kernel points
+      - `groups` i32: Controls the connections between inputs and outputs. Must be divisible by input and output channels.
+    }];
+
+    let arguments = (ins AnyRankedTensor:$input,
+                         AnyRankedTensor:$weight,
+                         Optional<AnyRankedTensor>:$bias,
+                         AnyRankedTensor:$output,
+                         TT_Device:$device,
+                         I32Attr:$in_channels,
+                         I32Attr:$out_channels,
+                         I32Attr:$batch_size,
+                         I32Attr:$input_height,
+                         I32Attr:$input_width,
+                         DenseI32ArrayAttr:$kernel_size,
+                         DenseI32ArrayAttr:$stride,
+                         DenseI32ArrayAttr:$padding,
+                         DenseI32ArrayAttr:$output_padding,
+                         DenseI32ArrayAttr:$dilation,
+                         I32Attr:$groups);
+
+    let results = (outs AnyRankedTensor:$result);
+
+    let extraClassDeclaration = [{
+      MutableOperandRange getDpsInitsMutable() { return getOutputMutable(); }
+    }];
+
+    let hasVerifier = 1;
+}
+
 def TTNN_MaxPool2dOp : TTNN_NamedDPSOp<"max_pool2d"> {
     let summary = "Applies a 2D max pooling over an input signal composed of several input planes.";
     let description = [{

diff --git a/include/ttmlir/Target/TTNN/program.fbs b/include/ttmlir/Target/TTNN/program.fbs
@@ -263,6 +263,25 @@ table Conv2dOp {
   groups: uint32;
 }
 
+table ConvTranspose2dOp {
+  input: tt.target.TensorRef;
+  weight: tt.target.TensorRef;
+  bias: tt.target.TensorRef;
+  out: tt.target.TensorRef;
+  device: tt.target.DeviceRef;
+  in_channels: uint32;
+  out_channels: uint32;
+  batch_size: uint32;
+  input_height: uint32;
+  input_width: uint32;
+  kernel_size: [int32];
+  stride: [int32];
+  padding: [int32];
+  output_padding: [int32];
+  dilation: [int32];
+  groups: uint32;
+}
+
 table MaxPool2dOp {
   in: tt.target.TensorRef;
   out: tt.target.TensorRef;
@@ -332,6 +351,7 @@ union OpType {
   SoftmaxOp,
   TransposeOp,
   Conv2dOp,
+  ConvTranspose2dOp,
   ConcatOp,
   ReshapeOp,
   SliceOp,

diff --git a/include/ttmlir/Utils.h b/include/ttmlir/Utils.h
@@ -131,6 +131,27 @@ inline bool isRankedTensor(mlir::Value v) {
   return mlir::isa<mlir::RankedTensorType>(v.getType());
 }
 
+// Parses an attribute into a two-element vector, commonly used for attributes
+// representing spatial configurations like padding, strides, or dilation
+// where a single integer can apply to all dimensions or a specific 2D
+// configuration can be provided
+inline llvm::SmallVector<int32_t, 2>
+parseAttrToTwoElementVector(mlir::Attribute baseAttr) {
+  llvm::SmallVector<int32_t, 2> result;
+
+  if (const auto attr =
+          mlir::dyn_cast_if_present<mlir::IntegerAttr>(baseAttr)) {
+    result.assign(2, attr.getInt());
+  }
+
+  if (const auto attr = mlir::dyn_cast<mlir::DenseI32ArrayAttr>(baseAttr);
+      attr && attr.size() == 2) {
+    result.append({attr[0], attr[1]});
+  }
+
+  return result;
+}
+
 } // namespace ttmlir::utils
 
 #endif
diff --git a/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp b/lib/Conversion/TTIRToTTNN/TTIRToTTNN.cpp
@@ -12,6 +12,7 @@
 #include "ttmlir/Dialect/TTNN/Types/Types.h"
 #include "ttmlir/Dialect/TTNN/Utils/TransformUtils.h"
 #include "ttmlir/Dialect/TTNN/Utils/Utils.h"
+#include "ttmlir/Utils.h"
 
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/IR/Attributes.h"
@@ -31,6 +32,8 @@
 using namespace mlir;
 using namespace mlir::tt;
 
+#include <iostream>
+
 namespace {
 
 class TensorEmptyConversionPattern
@@ -871,6 +874,77 @@ class Conv2dOpConversionPattern : public OpConversionPattern<ttir::Conv2dOp> {
   }
 };
 
+class ConvTranspose2dOpConversionPattern
+    : public OpConversionPattern<ttir::ConvTranspose2dOp> {
+public:
+  using OpConversionPattern<ttir::ConvTranspose2dOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(ttir::ConvTranspose2dOp op, OpAdaptor adaptor,
+                  ConversionPatternRewriter &rewriter) const override {
+
+    auto device = ::ttnn::utils::getOrInsertDevice(rewriter, op);
+
+    auto inputTy = mlir::cast<RankedTensorType>(adaptor.getInput().getType());
+    auto kernelTy = mlir::cast<RankedTensorType>(adaptor.getWeight().getType());
+    auto outputTy = mlir::cast<RankedTensorType>(adaptor.getOutput().getType());
+
+    llvm::ArrayRef<std::int64_t> output_shape = outputTy.getShape();
+
+    auto getLastDim = [](const RankedTensorType &ty, int offset = 1) {
+      return ty.getShape()[ty.getRank() - offset];
+    };
+
+    auto inChannels = rewriter.getI32IntegerAttr(getLastDim(inputTy));
+    auto outChannels = rewriter.getI32IntegerAttr(getLastDim(outputTy));
+    auto batchSize = rewriter.getI32IntegerAttr(getLastDim(inputTy, 4));
+    auto inputHeight = rewriter.getI32IntegerAttr(getLastDim(inputTy, 3));
+    auto inputWidth = rewriter.getI32IntegerAttr(getLastDim(inputTy, 2));
+
+    auto kernelSize = rewriter.getDenseI32ArrayAttr(
+        {static_cast<int32_t>(getLastDim(kernelTy, 2)),
+         static_cast<int32_t>(getLastDim(kernelTy, 1))});
+    auto stride = rewriter.getDenseI32ArrayAttr(
+        ttmlir::utils::parseAttrToTwoElementVector(adaptor.getStride()));
+    auto padding = rewriter.getDenseI32ArrayAttr(
+        ttmlir::utils::parseAttrToTwoElementVector(adaptor.getPaddingAttr()));
+    auto outputPadding = rewriter.getDenseI32ArrayAttr(
+        ttmlir::utils::parseAttrToTwoElementVector(
+            adaptor.getOutputPaddingAttr()));
+    auto dilation = rewriter.getDenseI32ArrayAttr(
+        ttmlir::utils::parseAttrToTwoElementVector(adaptor.getDilationAttr()));
+    auto groups = rewriter.getI32IntegerAttr(adaptor.getGroups());
+
+    std::vector<int64_t> flattenedOutputShape = {
+        1, 1, output_shape[0] * output_shape[1] * output_shape[2],
+        output_shape[3]};
+
+    outputTy = mlir::cast<RankedTensorType>(getTypeConverter()->convertType(
+        outputTy.cloneWith(flattenedOutputShape, outputTy.getElementType())));
+
+    // Using a tensor::EmptyOp so that the rewriter for EmptyOp can handle the
+    // attribute determination
+    auto convDPSOutput = rewriter.replaceOpWithNewOp<tensor::EmptyOp>(
+        adaptor.getOutput().getDefiningOp(), flattenedOutputShape,
+        outputTy.getElementType());
+
+    // Must set the type to the output type to maintain the layout attributes
+    convDPSOutput.getResult().setType(outputTy);
+
+    ttnn::ConvTranspose2dOp new_conv = rewriter.create<ttnn::ConvTranspose2dOp>(
+        op.getLoc(), outputTy, adaptor.getInput(), adaptor.getWeight(),
+        adaptor.getBias(), convDPSOutput, device, inChannels, outChannels,
+        batchSize, inputHeight, inputWidth, kernelSize, stride, padding,
+        outputPadding, dilation, groups);
+
+    Value output =
+        ttir_to_ttnn::utils::generateReshape(new_conv, output_shape, rewriter);
+
+    rewriter.replaceOp(op, output);
+    return success();
+  }
+};
+
 class MaxPool2dOpConversionPattern
     : public OpConversionPattern<ttir::MaxPool2dOp> {
 public:
@@ -1193,6 +1267,7 @@ void populateTTIRToTTNNPatterns(MLIRContext *ctx, RewritePatternSet &patterns,
            LinearOpConversionPattern,
            MatmulOpConversionPattern,
            Conv2dOpConversionPattern,
+           ConvTranspose2dOpConversionPattern,
            MaxPool2dOpConversionPattern,
            SubtractOpConversionPattern,
            MeshShardOpConversionPattern,

diff --git a/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp b/lib/Conversion/TTNNToEmitC/TTNNToEmitC.cpp
@@ -746,6 +746,7 @@ void populateTTNNToEmitCPatterns(mlir::MLIRContext *ctx,
   // Conv ops
   //
   patterns.add<DefaultOpConversionPattern<ttnn::Conv2dOp>>(typeConverter, ctx);
+  patterns.add<DefaultOpConversionPattern<ttnn::ConvTranspose2dOp>>(typeConverter, ctx);
   patterns.add<DefaultOpConversionPattern<ttnn::MaxPool2dOp>>(typeConverter,
                                                               ctx);