From f4cfebae3b6b81f4802f4d225b65538130fea68b Mon Sep 17 00:00:00 2001 From: Jacob DeSousa Date: Mon, 25 Nov 2024 12:45:35 -0500 Subject: [PATCH] Add NoC Mcast Ops in D2M (#1100) --- .../ttmlir/Dialect/TTKernel/IR/TTKernelOps.td | 62 +++++++++++++++++++ .../TTKernelToEmitC/TTKernelToEmitC.cpp | 6 ++ 2 files changed, 68 insertions(+) diff --git a/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td b/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td index 4b6da4b68..c0f6d4361 100644 --- a/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td +++ b/include/ttmlir/Dialect/TTKernel/IR/TTKernelOps.td @@ -503,6 +503,68 @@ def TTKernel_NocAsyncWriteBarrierOp : TTKernel_Op<"noc_async_write_barrier"> { }]; } +//===----------------------------------------------------------------------===// +// TTKernel Multicast NoC operations +//===----------------------------------------------------------------------===// + +def TTKernel_GetNocMulticastAddrOp : TTKernel_Op<"get_noc_multicast_addr"> { + let summary = "GetNocMulticastAddr"; + let description = [{ + GetNocMulticastAddr + }]; + + let arguments = (ins I32:$noc_x_start, I32:$noc_y_start, I32:$noc_x_end, I32:$noc_y_end, I32:$addr, Optional:$noc); + let results = (outs TTKernel_NocAddr:$mcastNocAddr); +} + +def TTKernel_NocAsyncWriteMulticastOnePacketOp : TTKernel_Op<"noc_async_write_multicast_one_packet"> { + let summary = "NocAsyncWriteMulticastOnePacket"; + let description = [{ + NocAsyncWriteMulticastOnePacket + this issues only a single packet with size <= NOC_MAX_BURST_SIZE (ie maximum packet size) + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + +def TTKernel_NocAsyncWriteMulticastOp : TTKernel_Op<"noc_async_write_multicast"> { + let summary = "NocAsyncWriteMulticast"; + let description = [{ + Initiates an asynchronous write from a source address in L1 memory on the + Tensix core executing this function call to a rectangular destination grid. + The destinations are specified using a uint64_t encoding referencing an + on-chip grid of nodes located at NOC coordinate range + (x_start,y_start,x_end,y_end) and a local address created using + *get_noc_multicast_addr* function. Also, *see noc_async_write_barrier*. + + The destination nodes can only be a set of Tensix cores + L1 memory address. + The destination nodes must form a rectangular grid. The destination L1 + memory address must be the same on all destination nodes. + + With this API, the multicast sender cannot be part of the multicast + destinations. If the multicast sender has to be in the multicast + destinations (i.e. must perform a local L1 write), the other API variant + *noc_async_write_multicast_loopback_src* can be used. + + Note: The number of destinations needs to be non-zero. Besides that, + there is no restriction on the number of destinations, i.e. the + multicast destinations can span the full chip. However, as mentioned + previously, the multicast source cannot be part of the destinations. So, the + maximum number of destinations is 119. + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + +def TTKernel_NocAsyncWriteMulticastLoopbackSrcOp : TTKernel_Op<"noc_async_write_multicast_loopback_src"> { + let summary = "NocAsyncWriteMulticastLoopbackSrc"; + let description = [{ + NocAsyncWriteMulticastLoopbackSrc + }]; + + let arguments = (ins I32:$srcLocalL1Addr, TTKernel_NocAddr:$dstNocAddrMulticast, I32:$size, I32:$num_dests, OptionalAttr:$linked, OptionalAttr:$multicast_path_reserve, Optional:$noc); +} + //===----------------------------------------------------------------------===// // TTKernel Misc operations //===----------------------------------------------------------------------===// diff --git a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp index 312377eb6..b907ad7f3 100644 --- a/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp +++ b/lib/Conversion/TTKernelToEmitC/TTKernelToEmitC.cpp @@ -419,6 +419,12 @@ class ConvertTTKernelToEmitCPass TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter< + ttkernel::NocAsyncWriteMulticastOnePacketOp>, + TTMetalToEmitCOpaqueRewriter, + TTMetalToEmitCOpaqueRewriter< + ttkernel::NocAsyncWriteMulticastLoopbackSrcOp>, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter, TTMetalToEmitCOpaqueRewriter,