From 13b979b66ab74333ec3b6f7d6ff27dd2448a199d Mon Sep 17 00:00:00 2001 From: panickal-xmos Date: Sun, 15 Dec 2024 15:01:15 +0000 Subject: [PATCH 1/4] Add changes for allocating same offset for input and output tensors --- xformer/Analysis/MemoryPlan.cpp | 146 +++++++++++++++- xformer/Analysis/MemoryPlan.h | 22 ++- xformer/Transforms/Options.h | 2 + xformer/Transforms/Passes.cpp | 1 + xformer/Transforms/Passes.h | 2 + .../VerifySameAllocationTensors.cpp | 160 ++++++++++++++++++ xformer/XCoreOptMain.cpp | 25 +++ 7 files changed, 344 insertions(+), 14 deletions(-) create mode 100644 xformer/Transforms/VerifySameAllocationTensors.cpp diff --git a/xformer/Analysis/MemoryPlan.cpp b/xformer/Analysis/MemoryPlan.cpp index 9a72a8094..383cdb16a 100644 --- a/xformer/Analysis/MemoryPlan.cpp +++ b/xformer/Analysis/MemoryPlan.cpp @@ -129,7 +129,9 @@ int MemoryPlan::getOffset(Value v, int size, if ((valueInfo[allocatedVal].firstUsed > valueInfo[v].lastUsed) || (valueInfo[v].firstUsed > valueInfo[allocatedVal].lastUsed)) { - // No overlap + // There is no overlap with this buffer. We move on until we have a clash. + // When there is a clash, we know we can allocate before that one if there + // is space as we don't overlap with any of those buffers. continue; } @@ -149,6 +151,63 @@ int MemoryPlan::getOffset(Value v, int size, return offset; } +void MemoryPlan::buildInputOutputTensorMaps( + llvm::StringMap &inputTensorMap, + llvm::StringMap &outputTensorMap) { + auto buildMap = [&](StringRef argAttr, StringRef nameAttr, + llvm::SmallVector &attrsInOrder) { + llvm::StringMap map; + llvm::SmallVector argNames; + auto funcOp = dyn_cast(op); + + auto argAttrs = funcOp->getAttrOfType(argAttr); + for (auto attr : argAttrs) { + auto d = attr.dyn_cast_or_null(); + + const ArrayRef indexPathAttrs = + d.get("tf_saved_model.index_path").cast().getValue(); + auto stringAttr = indexPathAttrs[0].dyn_cast_or_null(); + if (!stringAttr) + continue; + argNames.push_back(stringAttr.getValue().str()); + } + + llvm::SmallVector inputNames; + auto dictAttr = + funcOp->getAttrOfType("tf.entry_function"); + if (auto str = + dictAttr.get(nameAttr).dyn_cast_or_null()) { + str.getValue().split(inputNames, ',', /*MaxSplit=*/-1, + /*KeepEmpty=*/false); + } + + assert(argNames.size() == inputNames.size()); + for (int i = 0; i < inputNames.size(); i++) { + map[inputNames[i].str()] = argNames[i]; + attrsInOrder.push_back(argNames[i]); + } + return map; + }; + + llvm::StringMap inNameToAttrMap, outNameToAttrMap; + llvm::SmallVector attrsInOrder; + + inNameToAttrMap = buildMap("arg_attrs", "inputs", attrsInOrder); + outNameToAttrMap = buildMap("res_attrs", "outputs", attrsInOrder); + + for (int i = 0; i < inNameToAttrMap.size(); i++) { + inputTensorMap[attrsInOrder[i]] = values[i]; + } + + for (auto v : values) { + if (auto loc = v.getLoc()->dyn_cast_or_null()) { + if (outNameToAttrMap.count(loc.getName())) { + outputTensorMap[outNameToAttrMap[loc.getName()]] = v; + } + } + } +} + std::vector MemoryPlan::getAllocatedOffsets(const bool overlapOps, int &peakMemoryUsed, int &peakOpId) { @@ -245,6 +304,22 @@ std::vector MemoryPlan::getAllocatedOffsets(const bool overlapOps, } } + // Handle input output tensor same allocations + llvm::DenseSet inputTensorSet; + llvm::DenseSet outputTensorSet; + llvm::StringMap inputTensorMap, outputTensorMap; + + if (sameAllocationInputOutputTensorOption.size() > 0) { + buildInputOutputTensorMaps(inputTensorMap, outputTensorMap); + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); + i = i + 2) { + inputTensorSet.insert( + inputTensorMap[sameAllocationInputOutputTensorOption[i]]); + outputTensorSet.insert( + outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]); + } + } + // The comparator keeps the buffers ordered by id if their sizes are the // same auto DecreasingSizesComparator = [&](QueueItem &lhs, QueueItem &rhs) { @@ -259,23 +334,51 @@ std::vector MemoryPlan::getAllocatedOffsets(const bool overlapOps, queue(DecreasingSizesComparator); // Insert values and their sizes into priority queue + // InOutmap prevents adding in values which are overlapped + // In a chain of overlapped values, only the last value is allocated and the + // rest are patched up and add in allocated values list later + // Don't insert same allocation input and output tensors into queue as they + // are allocated separately for (auto v : values) { - if (!inOutMap.count(v) && !vInfo[v].isConstant) { + if (!inOutMap.count(v) && !vInfo[v].isConstant && + !outputTensorSet.contains(v) && !inputTensorSet.contains(v)) { queue.push({v, vInfo[v].size}); } } ValuesOrderedByOffset allocatedValues; - auto v = queue.top().first; - queue.pop(); - allocatedValues.insert({v, 0}); + + // If there are same allocation input and output tensors, allocate those first + if (sameAllocationInputOutputTensorOption.size() > 0) { + // Allocate first input and output tensor with offsets of zero + allocatedValues.insert( + {inputTensorMap[sameAllocationInputOutputTensorOption[0]], 0}); + allocatedValues.insert( + {outputTensorMap[sameAllocationInputOutputTensorOption[1]], 0}); + + for (int i = 2; i < sameAllocationInputOutputTensorOption.size(); + i = i + 2) { + auto inputTensor = + inputTensorMap[sameAllocationInputOutputTensorOption[i]]; + int newOffset = getOffset(inputTensor, vInfo[inputTensor].size, vInfo, + allocatedValues); + allocatedValues.insert({inputTensor, newOffset}); + allocatedValues.insert( + {outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]], + newOffset}); + } + } else { + // Else allocate the largest tensor at offset zero + auto v = queue.top().first; + queue.pop(); + allocatedValues.insert({v, 0}); + } while (!queue.empty()) { auto v = queue.top().first; auto size = queue.top().second; queue.pop(); - // check with allocatedValues list int newOffset = getOffset(v, size, vInfo, allocatedValues); allocatedValues.insert({v, newOffset}); } @@ -313,6 +416,37 @@ std::vector MemoryPlan::getAllocatedOffsets(const bool overlapOps, allocatedValuesOrderedByID.insert(i); } + // Check if buffers clash + // for (auto i : allocatedValuesOrderedByID) { + // for (auto j : allocatedValuesOrderedByID) { + // if (vInfo[i.first].id < vInfo[j.first].id) { + // if ((vInfo[i.first].firstUsed > vInfo[j.first].firstUsed && + // vInfo[i.first].firstUsed < vInfo[j.first].lastUsed) || + // (vInfo[j.first].firstUsed > vInfo[i.first].firstUsed && + // vInfo[j.first].firstUsed < vInfo[i.first].lastUsed)) { + // auto iBegin = i.second; + // auto iEnd = i.second + vInfo[i.first].size; + // auto jBegin = j.second; + // auto jEnd = j.second + vInfo[j.first].size; + // if ((iBegin > jBegin && iBegin < jEnd) || + // (jBegin > iBegin && jBegin < iEnd)) { + // printf("\n\nProblem!"); + // std::cout << "\nValue one " << vInfo[i.first].id + // << ", size = " << vInfo[i.first].size + // << ", offset = " << i.second + // << ", first = " << vInfo[i.first].firstUsed + // << ", last = " << vInfo[i.first].lastUsed; + // std::cout << "\nValue two " << vInfo[j.first].id + // << ", size = " << vInfo[j.first].size + // << ", offset = " << j.second + // << ", first = " << vInfo[j.first].firstUsed + // << ", last = " << vInfo[j.first].lastUsed; + // } + // } + // } + // } + // } + size_t peakUsed = 0; size_t peakUsedValueID = 0; size_t maxId = 0; diff --git a/xformer/Analysis/MemoryPlan.h b/xformer/Analysis/MemoryPlan.h index 49c463145..fde7f3248 100644 --- a/xformer/Analysis/MemoryPlan.h +++ b/xformer/Analysis/MemoryPlan.h @@ -7,12 +7,21 @@ #include "mlir/Analysis/Liveness.h" #include "mlir/IR/Value.h" #include "llvm/ADT/PriorityQueue.h" +#include "llvm/ADT/StringMap.h" #include namespace mlir { namespace xcore { +struct ValueInfo { + size_t id; + size_t size; + bool isConstant; + int firstUsed; + int lastUsed; +}; + // Represents an analysis for memory planning of a given FuncOp for a model. // - Uses liveness analysis and a greedy algorithm to arrange buffers in memory. // - Tries to overlap input and output buffers based on the op characteristics. @@ -51,6 +60,11 @@ class MemoryPlan { std::vector getOperationsSequence() { return operations; } + DenseMap getValuesInfoMap() { return valueInfo; } + + void buildInputOutputTensorMaps(llvm::StringMap &inputTensorMap, + llvm::StringMap &outputTensorMap); + // OpSplitPlan getOpSplitPlan(); void printMemoryPlan(); @@ -70,14 +84,6 @@ class MemoryPlan { using ValuesOrderedByOffset = std::multiset; - struct ValueInfo { - size_t id; - size_t size; - bool isConstant; - int firstUsed; - int lastUsed; - }; - int getOffset(Value v, int size, DenseMap &valueInfo, ValuesOrderedByOffset &allocatedOffsets); diff --git a/xformer/Transforms/Options.h b/xformer/Transforms/Options.h index b200309c5..046e7e32c 100644 --- a/xformer/Transforms/Options.h +++ b/xformer/Transforms/Options.h @@ -32,6 +32,8 @@ extern llvm::cl::opt convDebugOption; extern llvm::cl::opt overlapConvOption; extern llvm::cl::opt offlineOffsetsOption; extern llvm::cl::opt convChannelwiseSplitSizeOption; +extern llvm::cl::list sameAllocationInputOutputTensorOption; + } // namespace xcore } // namespace mlir diff --git a/xformer/Transforms/Passes.cpp b/xformer/Transforms/Passes.cpp index 6acce3f26..11aff6de7 100644 --- a/xformer/Transforms/Passes.cpp +++ b/xformer/Transforms/Passes.cpp @@ -15,6 +15,7 @@ void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) { // Run pass from LCE to convert Larq ops which are in TFL custom op format to // Larq dialect pm.addPass(mlir::TFL::CreateTranslateToLCEPass()); + pm.addPass(createVerifySameAllocationTensorsPass()); // Convert dynamic shapes in batch dimension to static pm.addPass(createRemoveDynamicShapePass()); } diff --git a/xformer/Transforms/Passes.h b/xformer/Transforms/Passes.h index dfdc15a7e..95eb57dda 100644 --- a/xformer/Transforms/Passes.h +++ b/xformer/Transforms/Passes.h @@ -30,6 +30,8 @@ std::unique_ptr> createReplaceFCWithConv2DPass(); std::unique_ptr> createOptimizeConv2DPass(); std::unique_ptr> createOpSplitPass(); std::unique_ptr> createApplyTFLPatternsPass(); +std::unique_ptr> +createVerifySameAllocationTensorsPass(); std::unique_ptr> createRemoveDynamicShapePass(); std::unique_ptr> createReplaceAddSubPass(); std::unique_ptr> createReplaceMulPass(); diff --git a/xformer/Transforms/VerifySameAllocationTensors.cpp b/xformer/Transforms/VerifySameAllocationTensors.cpp new file mode 100644 index 000000000..2ad34ea68 --- /dev/null +++ b/xformer/Transforms/VerifySameAllocationTensors.cpp @@ -0,0 +1,160 @@ +// Copyright 2021 XMOS LIMITED. This Software is subject to the terms of the +// XMOS Public License: Version 1 + +#include "Analysis/MemoryPlan.h" +#include "IR/XCoreOps.h" +#include "Transforms/Options.h" +#include "Utils/Util.h" + +#include "mlir/Pass/Pass.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" +#include "tensorflow/compiler/mlir/lite/ir/tfl_ops.h" + +namespace mlir::xcore { + +namespace { +struct VerifySameAllocationTensors + : public PassWrapper> { + MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(VerifySameAllocationTensors) + + void getDependentDialects(DialectRegistry ®istry) const final { + registry.insert(); + } + StringRef getArgument() const final { return "xcore-preset-allocations"; } + StringRef getDescription() const final { return "Remove dynamic shape"; } + void runOnOperation() override; +}; + +void VerifySameAllocationTensors::runOnOperation() { + auto func = getOperation(); + auto *ctx = &getContext(); + + // iterate through all tensor options + // check that input and output tensors are present + // check that the sizes of each pair are the same, otherwise error + // if quantization is different by 1/2^16 or 2^8, insert quantize op at input, + // and emit warning + + auto &m = getAnalysis(); + llvm::StringMap inputTensorMap, outputTensorMap; + m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap); + + bool failed = false; + // Check names of input and output tensors + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { + if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) { + func.emitError() + << sameAllocationInputOutputTensorOption[i] + << " not present in input tensors. Please check the name!"; + failed = true; + } + if (!outputTensorMap.count(sameAllocationInputOutputTensorOption[i + 1])) { + func.emitError() + << sameAllocationInputOutputTensorOption[i + 1] + << " not present in output tensors. Please check the name!"; + failed = true; + } + } + + // Check sizes + auto vInfo = m.getValuesInfoMap(); + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { + if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]].size != + vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]] + .size) { + func.emitError() << "Size of input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is not equal to output tensor " + << sameAllocationInputOutputTensorOption[i + 1] + << ". Please check!"; + failed = true; + } + } + + // Check quantization + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { + auto inQType = dyn_cast_or_null( + inputTensorMap[sameAllocationInputOutputTensorOption[i]] + .getType() + .cast() + .getElementType()); + auto outQType = dyn_cast_or_null( + outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]] + .getType() + .cast() + .getElementType()); + if (inQType && !outQType) { + func.emitError() << "Input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is quantized, but " + << sameAllocationInputOutputTensorOption[i + 1] + << " is not. Please check!"; + failed = true; + } else if (!inQType && outQType) { + func.emitError() << "Input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is not quantized, but " + << sameAllocationInputOutputTensorOption[i + 1] + << " is quantized. Please check!"; + failed = true; + } else if (inQType && outQType) { + // Both are quantized, but check element sizes, maybe i8 and i16 + + auto inScale = inQType.getScale(); + auto inZeroPoint = inQType.getZeroPoint(); + + auto outScale = outQType.getScale(); + auto outZeroPoint = outQType.getZeroPoint(); + if (inScale != outScale || inZeroPoint != outZeroPoint) { + // change input block arg to output quantization + + // insert quantize op to convert back to original input quantization + // auto module = func->getParentOfType(); + // OpBuilder builder(module); + // auto outVal = outputTensorMap[sameAllocationInputOutputTensorOption[i + // + 1]]; auto newQType = inQType.castFromExpressedType( + // quant::QuantizedType::castToExpressedType(outVal.getType())); + // auto newQuantizeOp = builder.create( + // inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType)); + + auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]]; + auto typeNumBits = + utils::getTypeSize( + inVal.getType().cast().getElementType()) * + 8; + double maxError = 1.0 / (2 << (typeNumBits - 1)); + if (abs(inScale - outScale) > maxError) { + func.emitError() << "Input tensor " + << sameAllocationInputOutputTensorOption[i] + << " has scale of " << inScale + << " and zeropoint of " << inZeroPoint + << ", but output tensor " + << sameAllocationInputOutputTensorOption[i + 1] + << " has scale of " << outScale + << " and zeropoint of " << outZeroPoint + << ". Please check!"; + failed = true; + } + } + } else if (!inQType && !outQType) { + // Both are not quantized, but check element sizes, maybe i8 and i16 + } + } + + if (failed) { + signalPassFailure(); + return; + } +} +} // namespace + +// Creates an instance of the VerifySameAllocationTensors pass. +std::unique_ptr> +createVerifySameAllocationTensorsPass() { + return std::make_unique(); +} + +static PassRegistration pass; + +} // namespace mlir::xcore diff --git a/xformer/XCoreOptMain.cpp b/xformer/XCoreOptMain.cpp index 63310d300..26cadeaba 100644 --- a/xformer/XCoreOptMain.cpp +++ b/xformer/XCoreOptMain.cpp @@ -36,6 +36,13 @@ namespace mlir::xcore { // and -help) will be hidden. static cl::OptionCategory XformerCategory("Xformer options"); +llvm::cl::list sameAllocationInputOutputTensorOption( + "xcore-same-allocation-input-output-tensor", + cl::desc("Allocate this input and output tensor in the same memory " + "location. This helps avoiding a memcopy from output to input in " + "case of recurrent networks. The first tensor must be the input."), + cl::CommaSeparated, cl::cat(XformerCategory)); + cl::opt enableMemoryAnalysisOption( "xcore-run-memory-analysis", cl::desc("Run memory analysis to aid in operation splitting."), @@ -507,6 +514,24 @@ int main(int argc, char **argv) { return failedMessage("Please specify a thread count between one and five!"); } + llvm::DenseMap positionCountMap; + for (int i = 0; i < mlir::xcore::sameAllocationInputOutputTensorOption.size(); + i++) { + int pos = mlir::xcore::sameAllocationInputOutputTensorOption.getPosition(i); + if (positionCountMap.count(pos)) { + positionCountMap[pos]++; + } else { + positionCountMap[pos] = 1; + } + } + for (auto i : positionCountMap) { + if (i.second != 2) { + return failedMessage( + "Please specify two tensors, an input tensor and output tensor for " + "each of xcore-same-allocation-input-output-tensor options!"); + } + } + if (failed(isCompatibleVersion( versionLibTfliteMicro, lib_tflite_micro::major_version, lib_tflite_micro::minor_version, lib_tflite_micro::patch_version))) { From 3ead48f111e6b5991c3b9bc65d4845d879448443 Mon Sep 17 00:00:00 2001 From: panickal-xmos Date: Sun, 15 Dec 2024 15:15:26 +0000 Subject: [PATCH 2/4] Update submodule --- third_party/lib_tflite_micro | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/lib_tflite_micro b/third_party/lib_tflite_micro index 6a387b04c..a0e68eea3 160000 --- a/third_party/lib_tflite_micro +++ b/third_party/lib_tflite_micro @@ -1 +1 @@ -Subproject commit 6a387b04c20602383ab9af903de5092290d70091 +Subproject commit a0e68eea3892c00afe510bd8de54ff45d9eba53b From e50b9027a3c8a05fb25be90536d7b27bc4aeda3e Mon Sep 17 00:00:00 2001 From: panickal-xmos Date: Sun, 15 Dec 2024 22:27:52 +0000 Subject: [PATCH 3/4] Fix when arg attrs are not present --- xformer/Analysis/MemoryPlan.cpp | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/xformer/Analysis/MemoryPlan.cpp b/xformer/Analysis/MemoryPlan.cpp index 383cdb16a..96236b942 100644 --- a/xformer/Analysis/MemoryPlan.cpp +++ b/xformer/Analysis/MemoryPlan.cpp @@ -160,18 +160,6 @@ void MemoryPlan::buildInputOutputTensorMaps( llvm::SmallVector argNames; auto funcOp = dyn_cast(op); - auto argAttrs = funcOp->getAttrOfType(argAttr); - for (auto attr : argAttrs) { - auto d = attr.dyn_cast_or_null(); - - const ArrayRef indexPathAttrs = - d.get("tf_saved_model.index_path").cast().getValue(); - auto stringAttr = indexPathAttrs[0].dyn_cast_or_null(); - if (!stringAttr) - continue; - argNames.push_back(stringAttr.getValue().str()); - } - llvm::SmallVector inputNames; auto dictAttr = funcOp->getAttrOfType("tf.entry_function"); @@ -181,6 +169,25 @@ void MemoryPlan::buildInputOutputTensorMaps( /*KeepEmpty=*/false); } + auto argAttrs = funcOp->getAttrOfType(argAttr); + if (argAttrs) { + for (auto attr : argAttrs) { + auto d = attr.dyn_cast_or_null(); + + const ArrayRef indexPathAttrs = + d.get("tf_saved_model.index_path").cast().getValue(); + auto stringAttr = + indexPathAttrs[0].dyn_cast_or_null(); + if (!stringAttr) + continue; + argNames.push_back(stringAttr.getValue().str()); + } + } else { + for (int i = 0; i < inputNames.size(); i++) { + argNames.push_back(inputNames[i].str()); + } + } + assert(argNames.size() == inputNames.size()); for (int i = 0; i < inputNames.size(); i++) { map[inputNames[i].str()] = argNames[i]; From de136205e679876a6dab9f24c88a879c5069b2a9 Mon Sep 17 00:00:00 2001 From: panickal-xmos Date: Sun, 15 Dec 2024 22:28:06 +0000 Subject: [PATCH 4/4] Run pass only when same allocation option is present --- .../VerifySameAllocationTensors.cpp | 218 +++++++++--------- 1 file changed, 112 insertions(+), 106 deletions(-) diff --git a/xformer/Transforms/VerifySameAllocationTensors.cpp b/xformer/Transforms/VerifySameAllocationTensors.cpp index 2ad34ea68..7f9f0a424 100644 --- a/xformer/Transforms/VerifySameAllocationTensors.cpp +++ b/xformer/Transforms/VerifySameAllocationTensors.cpp @@ -30,121 +30,127 @@ void VerifySameAllocationTensors::runOnOperation() { auto func = getOperation(); auto *ctx = &getContext(); - // iterate through all tensor options - // check that input and output tensors are present - // check that the sizes of each pair are the same, otherwise error - // if quantization is different by 1/2^16 or 2^8, insert quantize op at input, - // and emit warning - - auto &m = getAnalysis(); - llvm::StringMap inputTensorMap, outputTensorMap; - m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap); - - bool failed = false; - // Check names of input and output tensors - for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { - if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) { - func.emitError() - << sameAllocationInputOutputTensorOption[i] - << " not present in input tensors. Please check the name!"; - failed = true; + if (sameAllocationInputOutputTensorOption.size() > 0) { + + auto &m = getAnalysis(); + llvm::StringMap inputTensorMap, outputTensorMap; + m.buildInputOutputTensorMaps(inputTensorMap, outputTensorMap); + + bool failed = false; + // Check names of input and output tensors + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); + i = i + 2) { + if (!inputTensorMap.count(sameAllocationInputOutputTensorOption[i])) { + func.emitError() + << sameAllocationInputOutputTensorOption[i] + << " not present in input tensors. Please check the name!"; + failed = true; + } + if (!outputTensorMap.count( + sameAllocationInputOutputTensorOption[i + 1])) { + func.emitError() + << sameAllocationInputOutputTensorOption[i + 1] + << " not present in output tensors. Please check the name!"; + failed = true; + } } - if (!outputTensorMap.count(sameAllocationInputOutputTensorOption[i + 1])) { - func.emitError() - << sameAllocationInputOutputTensorOption[i + 1] - << " not present in output tensors. Please check the name!"; - failed = true; + + if (failed) { + signalPassFailure(); + return; } - } - // Check sizes - auto vInfo = m.getValuesInfoMap(); - for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { - if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]].size != - vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]] - .size) { - func.emitError() << "Size of input tensor " - << sameAllocationInputOutputTensorOption[i] - << " is not equal to output tensor " - << sameAllocationInputOutputTensorOption[i + 1] - << ". Please check!"; - failed = true; + // Check sizes + auto vInfo = m.getValuesInfoMap(); + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); + i = i + 2) { + if (vInfo[inputTensorMap[sameAllocationInputOutputTensorOption[i]]] + .size != + vInfo[outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]] + .size) { + func.emitError() << "Size of input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is not equal to output tensor " + << sameAllocationInputOutputTensorOption[i + 1] + << ". Please check!"; + failed = true; + } } - } - // Check quantization - for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); i = i + 2) { - auto inQType = dyn_cast_or_null( - inputTensorMap[sameAllocationInputOutputTensorOption[i]] - .getType() - .cast() - .getElementType()); - auto outQType = dyn_cast_or_null( - outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]] - .getType() - .cast() - .getElementType()); - if (inQType && !outQType) { - func.emitError() << "Input tensor " - << sameAllocationInputOutputTensorOption[i] - << " is quantized, but " - << sameAllocationInputOutputTensorOption[i + 1] - << " is not. Please check!"; - failed = true; - } else if (!inQType && outQType) { - func.emitError() << "Input tensor " - << sameAllocationInputOutputTensorOption[i] - << " is not quantized, but " - << sameAllocationInputOutputTensorOption[i + 1] - << " is quantized. Please check!"; - failed = true; - } else if (inQType && outQType) { - // Both are quantized, but check element sizes, maybe i8 and i16 - - auto inScale = inQType.getScale(); - auto inZeroPoint = inQType.getZeroPoint(); - - auto outScale = outQType.getScale(); - auto outZeroPoint = outQType.getZeroPoint(); - if (inScale != outScale || inZeroPoint != outZeroPoint) { - // change input block arg to output quantization - - // insert quantize op to convert back to original input quantization - // auto module = func->getParentOfType(); - // OpBuilder builder(module); - // auto outVal = outputTensorMap[sameAllocationInputOutputTensorOption[i - // + 1]]; auto newQType = inQType.castFromExpressedType( - // quant::QuantizedType::castToExpressedType(outVal.getType())); - // auto newQuantizeOp = builder.create( - // inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType)); - - auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]]; - auto typeNumBits = - utils::getTypeSize( - inVal.getType().cast().getElementType()) * - 8; - double maxError = 1.0 / (2 << (typeNumBits - 1)); - if (abs(inScale - outScale) > maxError) { - func.emitError() << "Input tensor " - << sameAllocationInputOutputTensorOption[i] - << " has scale of " << inScale - << " and zeropoint of " << inZeroPoint - << ", but output tensor " - << sameAllocationInputOutputTensorOption[i + 1] - << " has scale of " << outScale - << " and zeropoint of " << outZeroPoint - << ". Please check!"; - failed = true; + // Check quantization + for (int i = 0; i < sameAllocationInputOutputTensorOption.size(); + i = i + 2) { + auto inQType = dyn_cast_or_null( + inputTensorMap[sameAllocationInputOutputTensorOption[i]] + .getType() + .cast() + .getElementType()); + auto outQType = dyn_cast_or_null( + outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]] + .getType() + .cast() + .getElementType()); + if (inQType && !outQType) { + func.emitError() << "Input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is quantized, but " + << sameAllocationInputOutputTensorOption[i + 1] + << " is not. Please check!"; + failed = true; + } else if (!inQType && outQType) { + func.emitError() << "Input tensor " + << sameAllocationInputOutputTensorOption[i] + << " is not quantized, but " + << sameAllocationInputOutputTensorOption[i + 1] + << " is quantized. Please check!"; + failed = true; + } else if (inQType && outQType) { + // Both are quantized, but check element sizes, maybe i8 and i16 + + auto inScale = inQType.getScale(); + auto inZeroPoint = inQType.getZeroPoint(); + + auto outScale = outQType.getScale(); + auto outZeroPoint = outQType.getZeroPoint(); + if (inScale != outScale || inZeroPoint != outZeroPoint) { + // change input block arg to output quantization + + // insert quantize op to convert back to original input quantization + // auto module = func->getParentOfType(); + // OpBuilder builder(module); + // auto outVal = + // outputTensorMap[sameAllocationInputOutputTensorOption[i + // + 1]]; auto newQType = inQType.castFromExpressedType( + // quant::QuantizedType::castToExpressedType(outVal.getType())); + // auto newQuantizeOp = builder.create( + // inVal.getLoc(), newQType, outVal, TypeAttr::get(inQType)); + + auto inVal = inputTensorMap[sameAllocationInputOutputTensorOption[i]]; + auto typeNumBits = + utils::getTypeSize( + inVal.getType().cast().getElementType()) * + 8; + double maxError = 1.0 / (2 << (typeNumBits - 1)); + if (abs(inScale - outScale) > maxError) { + func.emitError() + << "Input tensor " << sameAllocationInputOutputTensorOption[i] + << " has scale of " << inScale << " and zeropoint of " + << inZeroPoint << ", but output tensor " + << sameAllocationInputOutputTensorOption[i + 1] + << " has scale of " << outScale << " and zeropoint of " + << outZeroPoint << ". Please check!"; + failed = true; + } } + } else if (!inQType && !outQType) { + // Both are not quantized, but check element sizes, maybe i8 and i16 } - } else if (!inQType && !outQType) { - // Both are not quantized, but check element sizes, maybe i8 and i16 } - } - if (failed) { - signalPassFailure(); - return; + if (failed) { + signalPassFailure(); + return; + } } } } // namespace