Skip to content

Commit

Permalink
ttnn ops backend metal wrapper lib (#1230)
Browse files Browse the repository at this point in the history
TTNNOpModelLib initial version. 
To be used for op model interface (constraints, l1, perf).
builds with -DTTMLIR_ENABLE_OPMODEL=ON.
  • Loading branch information
mbezuljTT authored Nov 28, 2024
1 parent f8121bf commit ae987d5
Show file tree
Hide file tree
Showing 10 changed files with 334 additions and 9 deletions.
13 changes: 8 additions & 5 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,9 @@ jobs:
fail-fast: false
matrix:
build: [
{runs-on: ubuntu-latest, enable_perf: OFF, name: "run", ttrt_flags: ""},
{runs-on: ubuntu-latest, enable_perf: ON, name: "perf", ttrt_flags: ""},
{runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: OFF, name: "run", ttrt_flags: ""},
{runs-on: ubuntu-latest, enable_perf: ON, enable_op_model: OFF, name: "perf", ttrt_flags: ""},
{runs-on: ubuntu-latest, enable_perf: OFF, enable_op_model: ON, name: "op_model" , ttrt_flags: ""}
]

name: Build tt-mlir
Expand Down Expand Up @@ -78,7 +79,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
create-symlink: true
key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-${{ env.SDK_VERSION }}
key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-${{ env.SDK_VERSION }}

# Build project

Expand All @@ -97,6 +98,7 @@ jobs:
-DTTMLIR_ENABLE_RUNTIME_TESTS=ON \
-DTT_RUNTIME_ENABLE_PERF_TRACE=${{ matrix.build.enable_perf }} \
-DTTMLIR_ENABLE_STABLEHLO=ON \
-DTTMLIR_ENABLE_OP_MODEL=${{ matrix.build.enable_op_model }} \
-S ${{ steps.strings.outputs.work-dir }}
- name: Build
Expand Down Expand Up @@ -147,7 +149,7 @@ jobs:
- name: Upload Test Report
uses: actions/upload-artifact@v4
with:
name: test-reports-${{ matrix.build.runs-on }}-perf-${{ matrix.build.enable_perf }}
name: test-reports-${{ matrix.build.runs-on }}-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}
path: build/test/report.xml

- name: Show Test Report
Expand Down Expand Up @@ -480,7 +482,7 @@ jobs:
uses: hendrikmuhs/[email protected]
with:
create-symlink: true
key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-${{ env.SDK_VERSION }}
key: ${{ matrix.build.runs-on }}-run-ON-perf-${{ matrix.build.enable_perf }}-op_model-${{ matrix.build.enable_op_model }}-${{ env.SDK_VERSION }}

- name: Configure CMake
shell: bash
Expand All @@ -496,6 +498,7 @@ jobs:
-DTTMLIR_ENABLE_RUNTIME_TESTS=OFF \
-DTT_RUNTIME_ENABLE_PERF_TRACE=${{ matrix.build.enable_perf }} \
-DTTMLIR_ENABLE_STABLEHLO=OFF \
-DTTMLIR_ENABLE_OP_MODEL=${{ matrix.build.enable_op_model }} \
-S ${{ steps.strings.outputs.work-dir }}
- name: Build tt-explorer
Expand Down
6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ endif()
option(TT_RUNTIME_ENABLE_PERF_TRACE "Enable performance mode" OFF)
option(TTMLIR_ENABLE_RUNTIME "Enable runtime" OFF)
option(TTMLIR_ENABLE_STABLEHLO "Enable StableHLO support" OFF)
option(TTMLIR_ENABLE_OP_MODEL "Enable OpModel support" OFF)

if (TTMLIR_ENABLE_STABLEHLO)
add_compile_definitions(TTMLIR_ENABLE_STABLEHLO)
Expand All @@ -20,6 +21,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

set(TTMLIR_ENABLE_BINDINGS_PYTHON ON CACHE BOOL "Enable Python bindings")

if (APPLE)
set(TTMLIR_ENABLE_OP_MODEL OFF)
message(WARNING "TTNNOpModelLib is disabled on Apple platforms. Optimizer will not get true performance.")
endif()

list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR}/cmake/modules)

if (TT_RUNTIME_ENABLE_PERF_TRACE)
Expand Down
24 changes: 24 additions & 0 deletions include/ttmlir/OpModel/TTNN/TTNNOpModel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#ifndef TTMLIR_OPMODEL_TTNN_TTNNOPMODEL_H
#define TTMLIR_OPMODEL_TTNN_TTNNOPMODEL_H

#include "ttmlir/Dialect/TTNN/IR/TTNNOpsAttrs.h"

#include <tuple>

namespace mlir::tt::op_model::ttnn {

struct ReluOpInterface {
static bool isLegal(const mlir::tt::ttnn::TTNNLayoutAttr &inputLayout,
const mlir::tt::ttnn::TTNNLayoutAttr &outputLayout);

static std::tuple<size_t, size_t, size_t>
getOpL1Usage(const mlir::tt::ttnn::TTNNLayoutAttr &inputLayout,
const mlir::tt::ttnn::TTNNLayoutAttr &outputLayout);
};

} // namespace mlir::tt::op_model::ttnn
#endif // TTMLIR_OPMODEL_TTNN_TTNNOPMODEL_H
1 change: 1 addition & 0 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include_directories(${TTMLIR_TOOLCHAIN_DIR}/src/stablehlo)
include_directories(${TTMLIR_TOOLCHAIN_DIR}/src/stablehlo-build)

add_subdirectory(OpModel)
add_subdirectory(CAPI)
add_subdirectory(Conversion)
add_subdirectory(Dialect)
Expand Down
2 changes: 2 additions & 0 deletions lib/Dialect/TTNN/IR/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ add_mlir_dialect_library(MLIRTTNNDialect
DEPENDS
MLIRTTNNOpsIncGen
MLIRTTOpsIncGen
TTNNOpModelLib

LINK_LIBS PUBLIC
TTMLIRTTNNUtils
MLIRSCFToEmitC
MLIRLinalgDialect
MLIRMLProgramDialect
TTNNOpModelLib
)
13 changes: 9 additions & 4 deletions lib/Dialect/TTNN/IR/TTNNOpModelInterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include "ttmlir/Dialect/TTNN/IR/TTNNOps.h"

#include "ttmlir/Dialect/TTNN/IR/TTNNOpModelInterface.cpp.inc"
#include "ttmlir/OpModel/TTNN/TTNNOpModel.h"

#include <cassert>
#include <tuple>

namespace mlir::tt::ttnn {
Expand All @@ -22,14 +25,16 @@ size_t ReluOp::getOpPerfCycles(const std::vector<TTNNLayoutAttr> &input_layouts,
std::tuple<size_t, size_t, size_t>
ReluOp::getOpL1Usage(const std::vector<TTNNLayoutAttr> &input_layouts,
const TTNNLayoutAttr &output_layout) {
// TODO(mbezulj) wire to tt-metal once we have API
return std::make_tuple(1024, 2048, 1024);
assert(input_layouts.size() == 1);
return op_model::ttnn::ReluOpInterface::getOpL1Usage(input_layouts[0],
output_layout);
}

bool ReluOp::isOpLegal(const std::vector<TTNNLayoutAttr> &input_layouts,
const TTNNLayoutAttr &output_layout) {
// TODO(mbezulj) wire to tt-metal once we have API
return true;
assert(input_layouts.size() == 1);
return op_model::ttnn::ReluOpInterface::isLegal(input_layouts[0],
output_layout);
}

} // namespace mlir::tt::ttnn
1 change: 1 addition & 0 deletions lib/OpModel/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
add_subdirectory(TTNN)
40 changes: 40 additions & 0 deletions lib/OpModel/TTNN/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
set(LIB_NAME TTNNOpModelLib)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

set(SOURCES
TTNNOpModelLib.cpp
)
add_library(${LIB_NAME} STATIC ${SOURCES})

message(STATUS "TTMLIR_ENABLE_OP_MODEL[${TTMLIR_ENABLE_OP_MODEL}]")
if (TTMLIR_ENABLE_OPMODEL)
# Link to tt-metal libs and include directories
target_include_directories(${LIB_NAME} PUBLIC "$<BUILD_INTERFACE:${TTMETAL_INCLUDE_DIRS}>")
target_link_libraries(${LIB_NAME} PUBLIC TTNN_LIBRARY TTMETAL_LIBRARY)
target_compile_definitions(${LIB_NAME} PUBLIC TTMLIR_ENABLE_OPMODEL)
else()
# link stubs implementation when op model library is disabled
message(WARNING "TTNNOpModelLib is disabled. The optimizer will not achieve optimal performance.")
endif()

# Specify the include directories for the library
target_include_directories(${LIB_NAME}
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}/
${PROJECT_SOURCE_DIR}/include/ttmlir/OpModel/TTNN/)


# Add TTNNOpModelLib to the export set
install(TARGETS ${LIB_NAME}
EXPORT TTNNOpModelLibTargets
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib
RUNTIME DESTINATION bin
INCLUDES DESTINATION include)

# Export the targets
export(EXPORT TTNNOpModelLibTargets
FILE "${CMAKE_CURRENT_BINARY_DIR}/TTNNOpModelLibTargets.cmake"
NAMESPACE TTNN::)
183 changes: 183 additions & 0 deletions lib/OpModel/TTNN/TTNNOpModelLib.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
// SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
//
// SPDX-License-Identifier: Apache-2.0

#include "TTNNOpModel.h"

#ifdef TTMLIR_ENABLE_OPMODEL
#include "TTNNOpModelLib_Impl.h"
#include "ttmlir/Dialect/TT/IR/TTOpsTypes.h"

#include <llvm/Support/Casting.h>
#include <mlir/IR/AttrTypeSubElements.h>

#include <cstddef>
#include <stdexcept>
#endif // TTMLIR_ENABLE_OPMODEL

namespace mlir::tt::op_model::ttnn {

#ifdef TTMLIR_ENABLE_OPMODEL
// alias to a common tt_metal types
using DataType = ::tt::tt_metal::DataType;
using Layout = ::tt::tt_metal::Layout;
using CoreRange = ::tt::tt_metal::CoreRange;
using CoreRangeSet = ::tt::tt_metal::CoreRangeSet;
using CoreCoord = ::tt::tt_metal::CoreCoord;
using ShardSpec = ::tt::tt_metal::ShardSpec;
using ShardOrientation = ::tt::tt_metal::ShardOrientation;
using TensorMemoryLayout = ::tt::tt_metal::TensorMemoryLayout;
using MemoryConfig = ::tt::tt_metal::MemoryConfig;

namespace detail {

DataType getDataType(const mlir::MemRefType &memref) {

auto dataType = elementTypeToDataType(memref.getElementType());

switch (dataType) {
case tt::DataType::Float32:
return DataType::FLOAT32;
case tt::DataType::BFloat16:
return DataType::BFLOAT16;
case tt::DataType::BFP_BFloat8:
return DataType::BFLOAT8_B;
case tt::DataType::BFP_BFloat4:
return DataType::BFLOAT4_B;
case tt::DataType::UInt32:
return DataType::UINT32;
case tt::DataType::UInt16:
return DataType::UINT16;
case tt::DataType::UInt8:
return DataType::UINT8;
default:
throw std::runtime_error("Invalid element type");
}
}

::ttnn::SimpleShape getTensorShape(const mlir::MemRefType &memref) {
::tt::tt_metal::SmallVector<uint32_t> small_vector_shape(
memref.getShape().begin(), memref.getShape().end());
return ::ttnn::SimpleShape(small_vector_shape);
}

const std::array<uint32_t, 2>
getShardShape(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {
const auto layoutShardTile = layout.getShardShape();

if (layoutShardTile.size() != 2) {
llvm::errs() << "ERROR: layout_shard_tile.size() != 2\n";
return {0, 0};
}

std::array<uint32_t, 2> shardShape;
shardShape[0] = layoutShardTile[0];
shardShape[1] = layoutShardTile[1];
return shardShape;
}

Layout getTensorLayout(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {
return layout.isTiled() ? Layout::TILE : Layout::ROW_MAJOR;
}

CoreRangeSet getCoreRangeSet(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {
// TODO(mbezulj): handle more complex grid shapes
// assuming grid shape is one rect starting at (0,0)

const auto layoutGrid = layout.getGrid();

const auto layoutGridShape = layoutGrid.getShape();
if (layoutGridShape.size() != 2) {
llvm::errs() << "ERROR: layout_grid.getShape().size() == 2\n";
return {};
}

return CoreRangeSet(CoreRange(CoreCoord(0, layoutGridShape[0]),
CoreCoord(0, layoutGridShape[1])));
}

std::optional<ShardSpec>
layout_get_shard_spec(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {
// tt_ShardOrientation is not part of ttnn::TTNNLayoutAttr;
// defaulting to ROW_MAJOR. TODO: figure out if we need to expose this
return isShardedMemoryLayout(layout.getMemLayout())
? std::make_optional(ShardSpec(getCoreRangeSet(layout),
getShardShape(layout),
ShardOrientation::ROW_MAJOR, false))
: std::nullopt;
}

::tt::tt_metal::BufferType getBufferType(const mlir::MemRefType &memref) {
auto memorySpace =
mlir::cast<tt::MemorySpaceAttr>(memref.getMemorySpace()).getValue();

switch (memorySpace) {
case tt::MemorySpace::DeviceDRAM:
return ::tt::tt_metal::BufferType::DRAM;
case tt::MemorySpace::DeviceL1:
return ::tt::tt_metal::BufferType::L1;
default: // TODO(mbezulj): handle other memory spaces
throw std::runtime_error("Unsupported memory space");
}
}

::tt::tt_metal::TensorMemoryLayout
getTensorMemoryLayout(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {
auto tensorMemoryLayout = layout.getMemLayout();

switch (tensorMemoryLayout) {
case mlir::tt::ttnn::TensorMemoryLayout::Interleaved:
return ::tt::tt_metal::TensorMemoryLayout::INTERLEAVED;
case mlir::tt::ttnn::TensorMemoryLayout::SingleBank:
return ::tt::tt_metal::TensorMemoryLayout::SINGLE_BANK;
case mlir::tt::ttnn::TensorMemoryLayout::HeightSharded:
return ::tt::tt_metal::TensorMemoryLayout::HEIGHT_SHARDED;
case mlir::tt::ttnn::TensorMemoryLayout::WidthSharded:
return ::tt::tt_metal::TensorMemoryLayout::WIDTH_SHARDED;
case mlir::tt::ttnn::TensorMemoryLayout::BlockSharded:
return ::tt::tt_metal::TensorMemoryLayout::BLOCK_SHARDED;
default:
throw std::runtime_error("Unsupported tensor memory layout");
}
}

::tt::tt_metal::MemoryConfig
getMemoryConfig(const mlir::tt::ttnn::TTNNLayoutAttr &layout) {

auto tensorMemoryLayout = getTensorMemoryLayout(layout);
auto bufferType = getBufferType(layout.getMemref());

auto shardSpec = layout_get_shard_spec(layout);
return ::tt::tt_metal::MemoryConfig(tensorMemoryLayout, bufferType,
shardSpec);
}

} // namespace detail
#endif // TTMLIR_ENABLE_OPMODEL

//===----------------------------------------------------------------------===//
// ReluOp
//===----------------------------------------------------------------------===//

bool ReluOpInterface::isLegal(
const mlir::tt::ttnn::TTNNLayoutAttr &inputLayout,
const mlir::tt::ttnn::TTNNLayoutAttr &outputLayout) {

#ifdef TTMLIR_ENABLE_OPMODEL
return true; // to wire into tt-metal with the next uplift
#else
return true;
#endif // TTMLIR_ENABLE_OPMODEL
}

std::tuple<size_t, size_t, size_t> ReluOpInterface::getOpL1Usage(
const mlir::tt::ttnn::TTNNLayoutAttr &inputLayout,
const mlir::tt::ttnn::TTNNLayoutAttr &outputLayout) {
#ifdef TTMLIR_ENABLE_OPMODEL
return std::make_tuple(0, 0, 0); // to wire into tt-metal with the next uplift
#else
return std::make_tuple(0, 0, 0);
#endif // TTMLIR_ENABLE_OPMODEL
}

} // namespace mlir::tt::op_model::ttnn
Loading

0 comments on commit ae987d5

Please sign in to comment.