From 13b979b66ab74333ec3b6f7d6ff27dd2448a199d Mon Sep 17 00:00:00 2001
From: panickal-xmos <deepakpanickal@xmos.com>
Date: Sun, 15 Dec 2024 15:01:15 +0000
Subject: [PATCH 1/4] Add changes for allocating same offset for input and
 output tensors

---
 xformer/Analysis/MemoryPlan.cpp               | 146 +++++++++++++++-
 xformer/Analysis/MemoryPlan.h                 |  22 ++-
 xformer/Transforms/Options.h                  |   2 +
 xformer/Transforms/Passes.cpp                 |   1 +
 xformer/Transforms/Passes.h                   |   2 +
 .../VerifySameAllocationTensors.cpp           | 160 ++++++++++++++++++
 xformer/XCoreOptMain.cpp                      |  25 +++
 7 files changed, 344 insertions(+), 14 deletions(-)
 create mode 100644 xformer/Transforms/VerifySameAllocationTensors.cpp
diff --git a/xformer/Analysis/MemoryPlan.cpp b/xformer/Analysis/MemoryPlan.cpp
index 9a72a8094..383cdb16a 100644
--- a/xformer/Analysis/MemoryPlan.cpp
+++ b/xformer/Analysis/MemoryPlan.cpp
@@ -129,7 +129,9 @@ int MemoryPlan::getOffset(Value v, int size,
 
     if ((valueInfo[allocatedVal].firstUsed > valueInfo[v].lastUsed) ||
         (valueInfo[v].firstUsed > valueInfo[allocatedVal].lastUsed)) {
-      // No overlap
+      // There is no overlap with this buffer. We move on until we have a clash.
+      // When there is a clash, we know we can allocate before that one if there
+      // is space as we don't overlap with any of those buffers.
       continue;
     }
 
@@ -149,6 +151,63 @@ int MemoryPlan::getOffset(Value v, int size,
   return offset;
 }
 
+void MemoryPlan::buildInputOutputTensorMaps(
+    llvm::StringMap<Value> &inputTensorMap,
+    llvm::StringMap<Value> &outputTensorMap) {
+  auto buildMap = [&](StringRef argAttr, StringRef nameAttr,
+                      llvm::SmallVector<std::string> &attrsInOrder) {
+    llvm::StringMap<std::string> map;
+    llvm::SmallVector<std::string> argNames;
+    auto funcOp = dyn_cast<func::FuncOp>(op);
+
+    auto argAttrs = funcOp->getAttrOfType<mlir::ArrayAttr>(argAttr);
+    for (auto attr : argAttrs) {
+      auto d = attr.dyn_cast_or_null<mlir::DictionaryAttr>();
+
+      const ArrayRef<Attribute> indexPathAttrs =
+          d.get("tf_saved_model.index_path").cast<ArrayAttr>().getValue();
+      auto stringAttr = indexPathAttrs[0].dyn_cast_or_null<mlir::StringAttr>();
+      if (!stringAttr)
+        continue;
+      argNames.push_back(stringAttr.getValue().str());
+    }
+
+    llvm::SmallVector<llvm::StringRef, 2> inputNames;
+    auto dictAttr =
+        funcOp->getAttrOfType<mlir::DictionaryAttr>("tf.entry_function");
+    if (auto str =
+            dictAttr.get(nameAttr).dyn_cast_or_null<mlir::StringAttr>()) {
+      str.getValue().split(inputNames, ',', /*MaxSplit=*/-1,
+                           /*KeepEmpty=*/false);
+    }
+
+    assert(argNames.size() == inputNames.size());
+    for (int i = 0; i < inputNames.size(); i++) {
+      map[inputNames[i].str()] = argNames[i];
+      attrsInOrder.push_back(argNames[i]);
+    }
+    return map;
+  };
+
+  llvm::StringMap<std::string> inNameToAttrMap, outNameToAttrMap;
+  llvm::SmallVector<std::string> attrsInOrder;
+
+  inNameToAttrMap = buildMap("arg_attrs", "inputs", attrsInOrder);
+  outNameToAttrMap = buildMap("res_attrs", "outputs", attrsInOrder);
+
+  for (int i = 0; i < inNameToAttrMap.size(); i++) {
+    inputTensorMap[attrsInOrder[i]] = values[i];
+  }
+
+  for (auto v : values) {
+    if (auto loc = v.getLoc()->dyn_cast_or_null<NameLoc>()) {
+      if (outNameToAttrMap.count(loc.getName())) {
+        outputTensorMap[outNameToAttrMap[loc.getName()]] = v;
+      }
+    }
+  }
+}
+
 std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
                                                  int &peakMemoryUsed,
                                                  int &peakOpId) {
@@ -245,6 +304,22 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
     }
   }
 
+  // Handle input output tensor same allocations
+  llvm::DenseSet<Value> inputTensorSet;
+  llvm::DenseSet<Value> outputTensorSet;
+  llvm::StringMap<Value> inputTensorMap, outputTensorMap;
+
+  if (sameAllocationInputOutputTensorOption.size() > 0) {
+    buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
+    for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
+         i = i + 2) {
+      inputTensorSet.insert(
+          inputTensorMap[sameAllocationInputOutputTensorOption[i]]);
+      outputTensorSet.insert(
+          outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]);
+    }
+  }
+
   // The comparator keeps the buffers ordered by id if their sizes are the
   // same
   auto DecreasingSizesComparator = [&](QueueItem &lhs, QueueItem &rhs) {
@@ -259,23 +334,51 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
       queue(DecreasingSizesComparator);
 
   // Insert values and their sizes into priority queue
+  // InOutmap prevents adding in values which are overlapped
+  // In a chain of overlapped values, only the last value is allocated and the
+  // rest are patched up and add in allocated values list later
+  // Don't insert same allocation input and output tensors into queue as they
+  // are allocated separately
   for (auto v : values) {
-    if (!inOutMap.count(v) && !vInfo[v].isConstant) {
+    if (!inOutMap.count(v) && !vInfo[v].isConstant &&
+        !outputTensorSet.contains(v) && !inputTensorSet.contains(v)) {
       queue.push({v, vInfo[v].size});
     }
   }
 
   ValuesOrderedByOffset allocatedValues;
-  auto v = queue.top().first;
-  queue.pop();
-  allocatedValues.insert({v, 0});
+
+  // If there are same allocation input and output tensors, allocate those first
+  if (sameAllocationInputOutputTensorOption.size() > 0) {
+    // Allocate first input and output tensor with offsets of zero
+    allocatedValues.insert(
+        {inputTensorMap[sameAllocationInputOutputTensorOption[0]], 0});
+    allocatedValues.insert(
+        {outputTensorMap[sameAllocationInputOutputTensorOption[1]], 0});
+
+    for (int i = 2; i < sameAllocationInputOutputTensorOption.size();
+         i = i + 2) {
+      auto inputTensor =
+          inputTensorMap[sameAllocationInputOutputTensorOption[i]];
+      int newOffset = getOffset(inputTensor, vInfo[inputTensor].size, vInfo,
+                                allocatedValues);
+      allocatedValues.insert({inputTensor, newOffset});
+      allocatedValues.insert(
+          {outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]],
+           newOffset});
+    }
+  } else {
+    // Else allocate the largest tensor at offset zero
+    auto v = queue.top().first;
+    queue.pop();
+    allocatedValues.insert({v, 0});
+  }
 
   while (!queue.empty()) {
     auto v = queue.top().first;
     auto size = queue.top().second;
     queue.pop();
 
-    // check with allocatedValues list
     int newOffset = getOffset(v, size, vInfo, allocatedValues);
     allocatedValues.insert({v, newOffset});
   }
@@ -313,6 +416,37 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
     allocatedValuesOrderedByID.insert(i);
   }
 
+  // Check if buffers clash
+  // for (auto i : allocatedValuesOrderedByID) {
+  //   for (auto j : allocatedValuesOrderedByID) {
+  //     if (vInfo[i.first].id < vInfo[j.first].id) {
+  //       if ((vInfo[i.first].firstUsed > vInfo[j.first].firstUsed &&
+  //            vInfo[i.first].firstUsed < vInfo[j.first].lastUsed) ||
+  //           (vInfo[j.first].firstUsed > vInfo[i.first].firstUsed &&
+  //            vInfo[j.first].firstUsed < vInfo[i.first].lastUsed)) {
+  //         auto iBegin = i.second;
+  //         auto iEnd = i.second + vInfo[i.first].size;
+  //         auto jBegin = j.second;
+  //         auto jEnd = j.second + vInfo[j.first].size;
+  //         if ((iBegin > jBegin && iBegin < jEnd) ||
+  //             (jBegin > iBegin && jBegin < iEnd)) {
+  //           printf("\n\nProblem!");
+  //           std::cout << "\nValue one " << vInfo[i.first].id
+  //                     << ", size = " << vInfo[i.first].size
+  //                     << ", offset = " << i.second
+  //                     << ", first = " << vInfo[i.first].firstUsed
+  //                     << ", last = " << vInfo[i.first].lastUsed;
+  //           std::cout << "\nValue two " << vInfo[j.first].id
+  //                     << ", size = " << vInfo[j.first].size
+  //                     << ", offset = " << j.second
+  //                     << ", first = " << vInfo[j.first].firstUsed
+  //                     << ", last = " << vInfo[j.first].lastUsed;
+  //         }
+  //       }
+  //     }
+  //   }
+  // }
+
   size_t peakUsed = 0;
   size_t peakUsedValueID = 0;
   size_t maxId = 0;
diff --git a/xformer/Analysis/MemoryPlan.h b/xformer/Analysis/MemoryPlan.h
index 49c463145..fde7f3248 100644
--- a/xformer/Analysis/MemoryPlan.h
+++ b/xformer/Analysis/MemoryPlan.h
@@ -7,12 +7,21 @@
 #include "mlir/Analysis/Liveness.h"
 #include "mlir/IR/Value.h"
 #include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/StringMap.h"
 
 #include <set>
 
 namespace mlir {
 namespace xcore {
 
+struct ValueInfo {
+  size_t id;
+  size_t size;
+  bool isConstant;
+  int firstUsed;
+  int lastUsed;
+};
+
 // Represents an analysis for memory planning of a given FuncOp for a model.
 // - Uses liveness analysis and a greedy algorithm to arrange buffers in memory.
 // - Tries to overlap input and output buffers based on the op characteristics.
@@ -51,6 +60,11 @@ class MemoryPlan {
 
   std::vector<Operation *> getOperationsSequence() { return operations; }
 
+  DenseMap<Value, ValueInfo> getValuesInfoMap() { return valueInfo; }
+
+  void buildInputOutputTensorMaps(llvm::StringMap<Value> &inputTensorMap,
+                                  llvm::StringMap<Value> &outputTensorMap);
+
   // OpSplitPlan getOpSplitPlan();
 
   void printMemoryPlan();
@@ -70,14 +84,6 @@ class MemoryPlan {
   using ValuesOrderedByOffset =
       std::multiset<QueueItem, IncreasingOffsetsComparator>;
 
-  struct ValueInfo {
-    size_t id;
-    size_t size;
-    bool isConstant;
-    int firstUsed;
-    int lastUsed;
-  };
-
   int getOffset(Value v, int size, DenseMap<Value, ValueInfo> &valueInfo,
                 ValuesOrderedByOffset &allocatedOffsets);
 
diff --git a/xformer/Transforms/Options.h b/xformer/Transforms/Options.h
index b200309c5..046e7e32c 100644
--- a/xformer/Transforms/Options.h
+++ b/xformer/Transforms/Options.h
@@ -32,6 +32,8 @@ extern llvm::cl::opt<bool> convDebugOption;
 extern llvm::cl::opt<bool> overlapConvOption;
 extern llvm::cl::opt<bool> offlineOffsetsOption;
 extern llvm::cl::opt<unsigned> convChannelwiseSplitSizeOption;
+extern llvm::cl::list<std::string> sameAllocationInputOutputTensorOption;
+
 } // namespace xcore
 } // namespace mlir
 
diff --git a/xformer/Transforms/Passes.cpp b/xformer/Transforms/Passes.cpp
index 6acce3f26..11aff6de7 100644
--- a/xformer/Transforms/Passes.cpp
+++ b/xformer/Transforms/Passes.cpp
@@ -15,6 +15,7 @@ void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
   // Run pass from LCE to convert Larq ops which are in TFL custom op format to
   // Larq dialect
   pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
+  pm.addPass(createVerifySameAllocationTensorsPass());
   // Convert dynamic shapes in batch dimension to static
   pm.addPass(createRemoveDynamicShapePass());
 }
diff --git a/xformer/Transforms/Passes.h b/xformer/Transforms/Passes.h
index dfdc15a7e..95eb57dda 100644
--- a/xformer/Transforms/Passes.h
+++ b/xformer/Transforms/Passes.h
@@ -30,6 +30,8 @@ std::unique_ptr<OperationPass<func::FuncOp>> createReplaceFCWithConv2DPass();
 std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeConv2DPass();
 std::unique_ptr<OperationPass<func::FuncOp>> createOpSplitPass();
 std::unique_ptr<OperationPass<func::FuncOp>> createApplyTFLPatternsPass();
+std::unique_ptr<OperationPass<func::FuncOp>>
+createVerifySameAllocationTensorsPass();
 std::unique_ptr<OperationPass<func::FuncOp>> createRemoveDynamicShapePass();
 std::unique_ptr<OperationPass<func::FuncOp>> createReplaceAddSubPass();
 std::unique_ptr<OperationPass<func::FuncOp>> createReplaceMulPass();
diff --git a/xformer/Transforms/VerifySameAllocationTensors.cpp b/xformer/Transforms/VerifySameAllocationTensors.cpp
new file mode 100644
index 000000000..2ad34ea68
--- /dev/null
+++ b/xformer/Transforms/VerifySameAllocationTensors.cpp
@@ -0,0 +1,160 @@
+// Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the
+// XMOS Public License: Version 1
+
+#include "Analysis/MemoryPlan.h"
+#include "IR/XCoreOps.h"
+#include "Transforms/Options.h"
+#include "Utils/Util.h"
+
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h"
+
+namespace mlir::xcore {
+
+namespace {
+struct VerifySameAllocationTensors
+    : public PassWrapper<VerifySameAllocationTensors,
+                         OperationPass<func::FuncOp>> {
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(VerifySameAllocationTensors)
+
+  void getDependentDialects(DialectRegistry &registry) const final {
+    registry.insert<TFL::TFLDialect>();
+  }
+  StringRef getArgument() const final { return "xcore-preset-allocations"; }
+  StringRef getDescription() const final { return "Remove dynamic shape"; }
+  void runOnOperation() override;
+};
+
+void VerifySameAllocationTensors::runOnOperation() {
+  auto func = getOperation();
+  auto *ctx = &getContext();
+
+  // iterate through all tensor options
+  // check that input and output tensors are present
+  // check that the sizes of each pair are the same, otherwise error
+  // if quantization is different by 1/2^16 or 2^8, insert quantize op at input,
+  // and emit warning
+
+  auto &m = getAnalysis<MemoryPlan>();
+  llvm::StringMap<Value> inputTensorMap, outputTensorMap;
+  m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
+
+  bool failed = false;
+  // Check names of input and output tensors
+  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
+    if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) {
+      func.emitError()
+          << sameAllocationInputOutputTensorOption[i]
+          << " not present in input tensors. Please check the name!";
+      failed = true;
+    }
+    if (!outputTensorMap.count(sameAllocationInputOutputTensorOption[i + 1])) {
+      func.emitError()
+          << sameAllocationInputOutputTensorOption[i + 1]
+          << " not present in output tensors. Please check the name!";
+      failed = true;
+    }
+  }
+
+  // Check sizes
+  auto vInfo = m.getValuesInfoMap();
+  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
+    if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]].size !=
+        vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]]
+            .size) {
+      func.emitError() << "Size of input tensor "
+                       << sameAllocationInputOutputTensorOption[i]
+                       << " is not equal to output tensor "
+                       << sameAllocationInputOutputTensorOption[i + 1]
+                       << ". Please check!";
+      failed = true;
+    }
+  }
+
+  // Check quantization
+  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
+    auto inQType = dyn_cast_or_null<quant::UniformQuantizedType>(
+        inputTensorMap[sameAllocationInputOutputTensorOption[i]]
+            .getType()
+            .cast<RankedTensorType>()
+            .getElementType());
+    auto outQType = dyn_cast_or_null<quant::UniformQuantizedType>(
+        outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]
+            .getType()
+            .cast<RankedTensorType>()
+            .getElementType());
+    if (inQType && !outQType) {
+      func.emitError() << "Input tensor "
+                       << sameAllocationInputOutputTensorOption[i]
+                       << " is quantized, but "
+                       << sameAllocationInputOutputTensorOption[i + 1]
+                       << " is not. Please check!";
+      failed = true;
+    } else if (!inQType && outQType) {
+      func.emitError() << "Input tensor "
+                       << sameAllocationInputOutputTensorOption[i]
+                       << " is not quantized, but "
+                       << sameAllocationInputOutputTensorOption[i + 1]
+                       << " is quantized. Please check!";
+      failed = true;
+    } else if (inQType && outQType) {
+      // Both are quantized, but check element sizes, maybe i8 and i16
+
+      auto inScale = inQType.getScale();
+      auto inZeroPoint = inQType.getZeroPoint();
+
+      auto outScale = outQType.getScale();
+      auto outZeroPoint = outQType.getZeroPoint();
+      if (inScale != outScale || inZeroPoint != outZeroPoint) {
+        // change input block arg to output quantization
+
+        // insert quantize op to convert back to original input quantization
+        // auto module = func->getParentOfType<ModuleOp>();
+        // OpBuilder builder(module);
+        // auto outVal = outputTensorMap[sameAllocationInputOutputTensorOption[i
+        // + 1]]; auto newQType = inQType.castFromExpressedType(
+        //     quant::QuantizedType::castToExpressedType(outVal.getType()));
+        // auto newQuantizeOp = builder.create<TFL::QuantizeOp>(
+        //     inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType));
+
+        auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]];
+        auto typeNumBits =
+            utils::getTypeSize(
+                inVal.getType().cast<RankedTensorType>().getElementType()) *
+            8;
+        double maxError = 1.0 / (2 << (typeNumBits - 1));
+        if (abs(inScale - outScale) > maxError) {
+          func.emitError() << "Input tensor "
+                           << sameAllocationInputOutputTensorOption[i]
+                           << " has scale of " << inScale
+                           << " and zeropoint of " << inZeroPoint
+                           << ", but output tensor "
+                           << sameAllocationInputOutputTensorOption[i + 1]
+                           << " has scale of " << outScale
+                           << " and zeropoint of " << outZeroPoint
+                           << ". Please check!";
+          failed = true;
+        }
+      }
+    } else if (!inQType && !outQType) {
+      // Both are not quantized, but check element sizes, maybe i8 and i16
+    }
+  }
+
+  if (failed) {
+    signalPassFailure();
+    return;
+  }
+}
+} // namespace
+
+// Creates an instance of the VerifySameAllocationTensors pass.
+std::unique_ptr<OperationPass<func::FuncOp>>
+createVerifySameAllocationTensorsPass() {
+  return std::make_unique<VerifySameAllocationTensors>();
+}
+
+static PassRegistration<VerifySameAllocationTensors> pass;
+
+} // namespace mlir::xcore
diff --git a/xformer/XCoreOptMain.cpp b/xformer/XCoreOptMain.cpp
index 63310d300..26cadeaba 100644
--- a/xformer/XCoreOptMain.cpp
+++ b/xformer/XCoreOptMain.cpp
@@ -36,6 +36,13 @@ namespace mlir::xcore {
 // and -help) will be hidden.
 static cl::OptionCategory XformerCategory("Xformer options");
 
+llvm::cl::list<std::string> sameAllocationInputOutputTensorOption(
+    "xcore-same-allocation-input-output-tensor",
+    cl::desc("Allocate this input and output tensor in the same memory "
+             "location. This helps avoiding a memcopy from output to input in "
+             "case of recurrent networks. The first tensor must be the input."),
+    cl::CommaSeparated, cl::cat(XformerCategory));
+
 cl::opt<bool> enableMemoryAnalysisOption(
     "xcore-run-memory-analysis",
     cl::desc("Run memory analysis to aid in operation splitting."),
@@ -507,6 +514,24 @@ int main(int argc, char **argv) {
     return failedMessage("Please specify a thread count between one and five!");
   }
 
+  llvm::DenseMap<int, int> positionCountMap;
+  for (int i = 0; i < mlir::xcore::sameAllocationInputOutputTensorOption.size();
+       i++) {
+    int pos = mlir::xcore::sameAllocationInputOutputTensorOption.getPosition(i);
+    if (positionCountMap.count(pos)) {
+      positionCountMap[pos]++;
+    } else {
+      positionCountMap[pos] = 1;
+    }
+  }
+  for (auto i : positionCountMap) {
+    if (i.second != 2) {
+      return failedMessage(
+          "Please specify two tensors, an input tensor and output tensor for "
+          "each of xcore-same-allocation-input-output-tensor options!");
+    }
+  }
+
   if (failed(isCompatibleVersion(
           versionLibTfliteMicro, lib_tflite_micro::major_version,
           lib_tflite_micro::minor_version, lib_tflite_micro::patch_version))) {

From 3ead48f111e6b5991c3b9bc65d4845d879448443 Mon Sep 17 00:00:00 2001
From: panickal-xmos <deepakpanickal@xmos.com>
Date: Sun, 15 Dec 2024 15:15:26 +0000
Subject: [PATCH 2/4] Update submodule

---
 third_party/lib_tflite_micro | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro
index 6a387b04c..a0e68eea3 160000
--- a/third_party/lib_tflite_micro
+++ b/third_party/lib_tflite_micro
@@ -1 +1 @@
-Subproject commit 6a387b04c20602383ab9af903de5092290d70091
+Subproject commit a0e68eea3892c00afe510bd8de54ff45d9eba53b

From e50b9027a3c8a05fb25be90536d7b27bc4aeda3e Mon Sep 17 00:00:00 2001
From: panickal-xmos <deepakpanickal@xmos.com>
Date: Sun, 15 Dec 2024 22:27:52 +0000
Subject: [PATCH 3/4] Fix when arg attrs are not present

---
 xformer/Analysis/MemoryPlan.cpp | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/xformer/Analysis/MemoryPlan.cpp b/xformer/Analysis/MemoryPlan.cpp
index 383cdb16a..96236b942 100644
--- a/xformer/Analysis/MemoryPlan.cpp
+++ b/xformer/Analysis/MemoryPlan.cpp
@@ -160,18 +160,6 @@ void MemoryPlan::buildInputOutputTensorMaps(
     llvm::SmallVector<std::string> argNames;
     auto funcOp = dyn_cast<func::FuncOp>(op);
 
-    auto argAttrs = funcOp->getAttrOfType<mlir::ArrayAttr>(argAttr);
-    for (auto attr : argAttrs) {
-      auto d = attr.dyn_cast_or_null<mlir::DictionaryAttr>();
-
-      const ArrayRef<Attribute> indexPathAttrs =
-          d.get("tf_saved_model.index_path").cast<ArrayAttr>().getValue();
-      auto stringAttr = indexPathAttrs[0].dyn_cast_or_null<mlir::StringAttr>();
-      if (!stringAttr)
-        continue;
-      argNames.push_back(stringAttr.getValue().str());
-    }
-
     llvm::SmallVector<llvm::StringRef, 2> inputNames;
     auto dictAttr =
         funcOp->getAttrOfType<mlir::DictionaryAttr>("tf.entry_function");
@@ -181,6 +169,25 @@ void MemoryPlan::buildInputOutputTensorMaps(
                            /*KeepEmpty=*/false);
     }
 
+    auto argAttrs = funcOp->getAttrOfType<mlir::ArrayAttr>(argAttr);
+    if (argAttrs) {
+      for (auto attr : argAttrs) {
+        auto d = attr.dyn_cast_or_null<mlir::DictionaryAttr>();
+
+        const ArrayRef<Attribute> indexPathAttrs =
+            d.get("tf_saved_model.index_path").cast<ArrayAttr>().getValue();
+        auto stringAttr =
+            indexPathAttrs[0].dyn_cast_or_null<mlir::StringAttr>();
+        if (!stringAttr)
+          continue;
+        argNames.push_back(stringAttr.getValue().str());
+      }
+    } else {
+      for (int i = 0; i < inputNames.size(); i++) {
+        argNames.push_back(inputNames[i].str());
+      }
+    }
+
     assert(argNames.size() == inputNames.size());
     for (int i = 0; i < inputNames.size(); i++) {
       map[inputNames[i].str()] = argNames[i];

From de136205e679876a6dab9f24c88a879c5069b2a9 Mon Sep 17 00:00:00 2001
From: panickal-xmos <deepakpanickal@xmos.com>
Date: Sun, 15 Dec 2024 22:28:06 +0000
Subject: [PATCH 4/4] Run pass only when same allocation option is present

---
 .../VerifySameAllocationTensors.cpp           | 218 +++++++++---------
 1 file changed, 112 insertions(+), 106 deletions(-)

diff --git a/xformer/Transforms/VerifySameAllocationTensors.cpp b/xformer/Transforms/VerifySameAllocationTensors.cpp
index 2ad34ea68..7f9f0a424 100644
--- a/xformer/Transforms/VerifySameAllocationTensors.cpp
+++ b/xformer/Transforms/VerifySameAllocationTensors.cpp
@@ -30,121 +30,127 @@ void VerifySameAllocationTensors::runOnOperation() {
   auto func = getOperation();
   auto *ctx = &getContext();
 
-  // iterate through all tensor options
-  // check that input and output tensors are present
-  // check that the sizes of each pair are the same, otherwise error
-  // if quantization is different by 1/2^16 or 2^8, insert quantize op at input,
-  // and emit warning
-
-  auto &m = getAnalysis<MemoryPlan>();
-  llvm::StringMap<Value> inputTensorMap, outputTensorMap;
-  m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
-
-  bool failed = false;
-  // Check names of input and output tensors
-  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
-    if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) {
-      func.emitError()
-          << sameAllocationInputOutputTensorOption[i]
-          << " not present in input tensors. Please check the name!";
-      failed = true;
+  if (sameAllocationInputOutputTensorOption.size() > 0) {
+
+    auto &m = getAnalysis<MemoryPlan>();
+    llvm::StringMap<Value> inputTensorMap, outputTensorMap;
+    m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
+
+    bool failed = false;
+    // Check names of input and output tensors
+    for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
+         i = i + 2) {
+      if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) {
+        func.emitError()
+            << sameAllocationInputOutputTensorOption[i]
+            << " not present in input tensors. Please check the name!";
+        failed = true;
+      }
+      if (!outputTensorMap.count(
+              sameAllocationInputOutputTensorOption[i + 1])) {
+        func.emitError()
+            << sameAllocationInputOutputTensorOption[i + 1]
+            << " not present in output tensors. Please check the name!";
+        failed = true;
+      }
     }
-    if (!outputTensorMap.count(sameAllocationInputOutputTensorOption[i + 1])) {
-      func.emitError()
-          << sameAllocationInputOutputTensorOption[i + 1]
-          << " not present in output tensors. Please check the name!";
-      failed = true;
+
+    if (failed) {
+      signalPassFailure();
+      return;
     }
-  }
 
-  // Check sizes
-  auto vInfo = m.getValuesInfoMap();
-  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
-    if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]].size !=
-        vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]]
-            .size) {
-      func.emitError() << "Size of input tensor "
-                       << sameAllocationInputOutputTensorOption[i]
-                       << " is not equal to output tensor "
-                       << sameAllocationInputOutputTensorOption[i + 1]
-                       << ". Please check!";
-      failed = true;
+    // Check sizes
+    auto vInfo = m.getValuesInfoMap();
+    for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
+         i = i + 2) {
+      if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]]
+              .size !=
+          vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]]
+              .size) {
+        func.emitError() << "Size of input tensor "
+                         << sameAllocationInputOutputTensorOption[i]
+                         << " is not equal to output tensor "
+                         << sameAllocationInputOutputTensorOption[i + 1]
+                         << ". Please check!";
+        failed = true;
+      }
     }
-  }
 
-  // Check quantization
-  for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) {
-    auto inQType = dyn_cast_or_null<quant::UniformQuantizedType>(
-        inputTensorMap[sameAllocationInputOutputTensorOption[i]]
-            .getType()
-            .cast<RankedTensorType>()
-            .getElementType());
-    auto outQType = dyn_cast_or_null<quant::UniformQuantizedType>(
-        outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]
-            .getType()
-            .cast<RankedTensorType>()
-            .getElementType());
-    if (inQType && !outQType) {
-      func.emitError() << "Input tensor "
-                       << sameAllocationInputOutputTensorOption[i]
-                       << " is quantized, but "
-                       << sameAllocationInputOutputTensorOption[i + 1]
-                       << " is not. Please check!";
-      failed = true;
-    } else if (!inQType && outQType) {
-      func.emitError() << "Input tensor "
-                       << sameAllocationInputOutputTensorOption[i]
-                       << " is not quantized, but "
-                       << sameAllocationInputOutputTensorOption[i + 1]
-                       << " is quantized. Please check!";
-      failed = true;
-    } else if (inQType && outQType) {
-      // Both are quantized, but check element sizes, maybe i8 and i16
-
-      auto inScale = inQType.getScale();
-      auto inZeroPoint = inQType.getZeroPoint();
-
-      auto outScale = outQType.getScale();
-      auto outZeroPoint = outQType.getZeroPoint();
-      if (inScale != outScale || inZeroPoint != outZeroPoint) {
-        // change input block arg to output quantization
-
-        // insert quantize op to convert back to original input quantization
-        // auto module = func->getParentOfType<ModuleOp>();
-        // OpBuilder builder(module);
-        // auto outVal = outputTensorMap[sameAllocationInputOutputTensorOption[i
-        // + 1]]; auto newQType = inQType.castFromExpressedType(
-        //     quant::QuantizedType::castToExpressedType(outVal.getType()));
-        // auto newQuantizeOp = builder.create<TFL::QuantizeOp>(
-        //     inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType));
-
-        auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]];
-        auto typeNumBits =
-            utils::getTypeSize(
-                inVal.getType().cast<RankedTensorType>().getElementType()) *
-            8;
-        double maxError = 1.0 / (2 << (typeNumBits - 1));
-        if (abs(inScale - outScale) > maxError) {
-          func.emitError() << "Input tensor "
-                           << sameAllocationInputOutputTensorOption[i]
-                           << " has scale of " << inScale
-                           << " and zeropoint of " << inZeroPoint
-                           << ", but output tensor "
-                           << sameAllocationInputOutputTensorOption[i + 1]
-                           << " has scale of " << outScale
-                           << " and zeropoint of " << outZeroPoint
-                           << ". Please check!";
-          failed = true;
+    // Check quantization
+    for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
+         i = i + 2) {
+      auto inQType = dyn_cast_or_null<quant::UniformQuantizedType>(
+          inputTensorMap[sameAllocationInputOutputTensorOption[i]]
+              .getType()
+              .cast<RankedTensorType>()
+              .getElementType());
+      auto outQType = dyn_cast_or_null<quant::UniformQuantizedType>(
+          outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]
+              .getType()
+              .cast<RankedTensorType>()
+              .getElementType());
+      if (inQType && !outQType) {
+        func.emitError() << "Input tensor "
+                         << sameAllocationInputOutputTensorOption[i]
+                         << " is quantized, but "
+                         << sameAllocationInputOutputTensorOption[i + 1]
+                         << " is not. Please check!";
+        failed = true;
+      } else if (!inQType && outQType) {
+        func.emitError() << "Input tensor "
+                         << sameAllocationInputOutputTensorOption[i]
+                         << " is not quantized, but "
+                         << sameAllocationInputOutputTensorOption[i + 1]
+                         << " is quantized. Please check!";
+        failed = true;
+      } else if (inQType && outQType) {
+        // Both are quantized, but check element sizes, maybe i8 and i16
+
+        auto inScale = inQType.getScale();
+        auto inZeroPoint = inQType.getZeroPoint();
+
+        auto outScale = outQType.getScale();
+        auto outZeroPoint = outQType.getZeroPoint();
+        if (inScale != outScale || inZeroPoint != outZeroPoint) {
+          // change input block arg to output quantization
+
+          // insert quantize op to convert back to original input quantization
+          // auto module = func->getParentOfType<ModuleOp>();
+          // OpBuilder builder(module);
+          // auto outVal =
+          // outputTensorMap[sameAllocationInputOutputTensorOption[i
+          // + 1]]; auto newQType = inQType.castFromExpressedType(
+          //     quant::QuantizedType::castToExpressedType(outVal.getType()));
+          // auto newQuantizeOp = builder.create<TFL::QuantizeOp>(
+          //     inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType));
+
+          auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]];
+          auto typeNumBits =
+              utils::getTypeSize(
+                  inVal.getType().cast<RankedTensorType>().getElementType()) *
+              8;
+          double maxError = 1.0 / (2 << (typeNumBits - 1));
+          if (abs(inScale - outScale) > maxError) {
+            func.emitError()
+                << "Input tensor " << sameAllocationInputOutputTensorOption[i]
+                << " has scale of " << inScale << " and zeropoint of "
+                << inZeroPoint << ", but output tensor "
+                << sameAllocationInputOutputTensorOption[i + 1]
+                << " has scale of " << outScale << " and zeropoint of "
+                << outZeroPoint << ". Please check!";
+            failed = true;
+          }
         }
+      } else if (!inQType && !outQType) {
+        // Both are not quantized, but check element sizes, maybe i8 and i16
       }
-    } else if (!inQType && !outQType) {
-      // Both are not quantized, but check element sizes, maybe i8 and i16
     }
-  }
 
-  if (failed) {
-    signalPassFailure();
-    return;
+    if (failed) {
+      signalPassFailure();
+      return;
+    }
   }
 }
 } // namespace