Skip to content

Commit

Permalink
Merge pull request #945 from xmos/allocate_same_tensors
Browse files Browse the repository at this point in the history
Add changes for allocating same offset for input and output tensors
  • Loading branch information
panickal-xmos authored Dec 16, 2024
2 parents bc0b994 + de13620 commit 373261d
Show file tree
Hide file tree
Showing 8 changed files with 358 additions and 15 deletions.
2 changes: 1 addition & 1 deletion third_party/lib_tflite_micro
Submodule lib_tflite_micro updated 0 files
153 changes: 147 additions & 6 deletions xformer/Analysis/MemoryPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ int MemoryPlan::getOffset(Value v, int size,

if ((valueInfo[allocatedVal].firstUsed > valueInfo[v].lastUsed) ||
(valueInfo[v].firstUsed > valueInfo[allocatedVal].lastUsed)) {
// No overlap
// There is no overlap with this buffer. We move on until we have a clash.
// When there is a clash, we know we can allocate before that one if there
// is space as we don't overlap with any of those buffers.
continue;
}

Expand All @@ -149,6 +151,70 @@ int MemoryPlan::getOffset(Value v, int size,
return offset;
}

void MemoryPlan::buildInputOutputTensorMaps(
llvm::StringMap<Value> &inputTensorMap,
llvm::StringMap<Value> &outputTensorMap) {
auto buildMap = [&](StringRef argAttr, StringRef nameAttr,
llvm::SmallVector<std::string> &attrsInOrder) {
llvm::StringMap<std::string> map;
llvm::SmallVector<std::string> argNames;
auto funcOp = dyn_cast<func::FuncOp>(op);

llvm::SmallVector<llvm::StringRef, 2> inputNames;
auto dictAttr =
funcOp->getAttrOfType<mlir::DictionaryAttr>("tf.entry_function");
if (auto str =
dictAttr.get(nameAttr).dyn_cast_or_null<mlir::StringAttr>()) {
str.getValue().split(inputNames, ',', /*MaxSplit=*/-1,
/*KeepEmpty=*/false);
}

auto argAttrs = funcOp->getAttrOfType<mlir::ArrayAttr>(argAttr);
if (argAttrs) {
for (auto attr : argAttrs) {
auto d = attr.dyn_cast_or_null<mlir::DictionaryAttr>();

const ArrayRef<Attribute> indexPathAttrs =
d.get("tf_saved_model.index_path").cast<ArrayAttr>().getValue();
auto stringAttr =
indexPathAttrs[0].dyn_cast_or_null<mlir::StringAttr>();
if (!stringAttr)
continue;
argNames.push_back(stringAttr.getValue().str());
}
} else {
for (int i = 0; i < inputNames.size(); i++) {
argNames.push_back(inputNames[i].str());
}
}

assert(argNames.size() == inputNames.size());
for (int i = 0; i < inputNames.size(); i++) {
map[inputNames[i].str()] = argNames[i];
attrsInOrder.push_back(argNames[i]);
}
return map;
};

llvm::StringMap<std::string> inNameToAttrMap, outNameToAttrMap;
llvm::SmallVector<std::string> attrsInOrder;

inNameToAttrMap = buildMap("arg_attrs", "inputs", attrsInOrder);
outNameToAttrMap = buildMap("res_attrs", "outputs", attrsInOrder);

for (int i = 0; i < inNameToAttrMap.size(); i++) {
inputTensorMap[attrsInOrder[i]] = values[i];
}

for (auto v : values) {
if (auto loc = v.getLoc()->dyn_cast_or_null<NameLoc>()) {
if (outNameToAttrMap.count(loc.getName())) {
outputTensorMap[outNameToAttrMap[loc.getName()]] = v;
}
}
}
}

std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
int &peakMemoryUsed,
int &peakOpId) {
Expand Down Expand Up @@ -245,6 +311,22 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
}
}

// Handle input output tensor same allocations
llvm::DenseSet<Value> inputTensorSet;
llvm::DenseSet<Value> outputTensorSet;
llvm::StringMap<Value> inputTensorMap, outputTensorMap;

if (sameAllocationInputOutputTensorOption.size() > 0) {
buildInputOutputTensorMaps(inputTensorMap, outputTensorMap);
for (int i = 0; i < sameAllocationInputOutputTensorOption.size();
i = i + 2) {
inputTensorSet.insert(
inputTensorMap[sameAllocationInputOutputTensorOption[i]]);
outputTensorSet.insert(
outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]]);
}
}

// The comparator keeps the buffers ordered by id if their sizes are the
// same
auto DecreasingSizesComparator = [&](QueueItem &lhs, QueueItem &rhs) {
Expand All @@ -259,23 +341,51 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
queue(DecreasingSizesComparator);

// Insert values and their sizes into priority queue
// InOutmap prevents adding in values which are overlapped
// In a chain of overlapped values, only the last value is allocated and the
// rest are patched up and add in allocated values list later
// Don't insert same allocation input and output tensors into queue as they
// are allocated separately
for (auto v : values) {
if (!inOutMap.count(v) && !vInfo[v].isConstant) {
if (!inOutMap.count(v) && !vInfo[v].isConstant &&
!outputTensorSet.contains(v) && !inputTensorSet.contains(v)) {
queue.push({v, vInfo[v].size});
}
}

ValuesOrderedByOffset allocatedValues;
auto v = queue.top().first;
queue.pop();
allocatedValues.insert({v, 0});

// If there are same allocation input and output tensors, allocate those first
if (sameAllocationInputOutputTensorOption.size() > 0) {
// Allocate first input and output tensor with offsets of zero
allocatedValues.insert(
{inputTensorMap[sameAllocationInputOutputTensorOption[0]], 0});
allocatedValues.insert(
{outputTensorMap[sameAllocationInputOutputTensorOption[1]], 0});

for (int i = 2; i < sameAllocationInputOutputTensorOption.size();
i = i + 2) {
auto inputTensor =
inputTensorMap[sameAllocationInputOutputTensorOption[i]];
int newOffset = getOffset(inputTensor, vInfo[inputTensor].size, vInfo,
allocatedValues);
allocatedValues.insert({inputTensor, newOffset});
allocatedValues.insert(
{outputTensorMap[sameAllocationInputOutputTensorOption[i + 1]],
newOffset});
}
} else {
// Else allocate the largest tensor at offset zero
auto v = queue.top().first;
queue.pop();
allocatedValues.insert({v, 0});
}

while (!queue.empty()) {
auto v = queue.top().first;
auto size = queue.top().second;
queue.pop();

// check with allocatedValues list
int newOffset = getOffset(v, size, vInfo, allocatedValues);
allocatedValues.insert({v, newOffset});
}
Expand Down Expand Up @@ -313,6 +423,37 @@ std::vector<int> MemoryPlan::getAllocatedOffsets(const bool overlapOps,
allocatedValuesOrderedByID.insert(i);
}

// Check if buffers clash
// for (auto i : allocatedValuesOrderedByID) {
// for (auto j : allocatedValuesOrderedByID) {
// if (vInfo[i.first].id < vInfo[j.first].id) {
// if ((vInfo[i.first].firstUsed > vInfo[j.first].firstUsed &&
// vInfo[i.first].firstUsed < vInfo[j.first].lastUsed) ||
// (vInfo[j.first].firstUsed > vInfo[i.first].firstUsed &&
// vInfo[j.first].firstUsed < vInfo[i.first].lastUsed)) {
// auto iBegin = i.second;
// auto iEnd = i.second + vInfo[i.first].size;
// auto jBegin = j.second;
// auto jEnd = j.second + vInfo[j.first].size;
// if ((iBegin > jBegin && iBegin < jEnd) ||
// (jBegin > iBegin && jBegin < iEnd)) {
// printf("\n\nProblem!");
// std::cout << "\nValue one " << vInfo[i.first].id
// << ", size = " << vInfo[i.first].size
// << ", offset = " << i.second
// << ", first = " << vInfo[i.first].firstUsed
// << ", last = " << vInfo[i.first].lastUsed;
// std::cout << "\nValue two " << vInfo[j.first].id
// << ", size = " << vInfo[j.first].size
// << ", offset = " << j.second
// << ", first = " << vInfo[j.first].firstUsed
// << ", last = " << vInfo[j.first].lastUsed;
// }
// }
// }
// }
// }

size_t peakUsed = 0;
size_t peakUsedValueID = 0;
size_t maxId = 0;
Expand Down
22 changes: 14 additions & 8 deletions xformer/Analysis/MemoryPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,21 @@
#include "mlir/Analysis/Liveness.h"
#include "mlir/IR/Value.h"
#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/StringMap.h"

#include <set>

namespace mlir {
namespace xcore {

struct ValueInfo {
size_t id;
size_t size;
bool isConstant;
int firstUsed;
int lastUsed;
};

// Represents an analysis for memory planning of a given FuncOp for a model.
// - Uses liveness analysis and a greedy algorithm to arrange buffers in memory.
// - Tries to overlap input and output buffers based on the op characteristics.
Expand Down Expand Up @@ -51,6 +60,11 @@ class MemoryPlan {

std::vector<Operation *> getOperationsSequence() { return operations; }

DenseMap<Value, ValueInfo> getValuesInfoMap() { return valueInfo; }

void buildInputOutputTensorMaps(llvm::StringMap<Value> &inputTensorMap,
llvm::StringMap<Value> &outputTensorMap);

// OpSplitPlan getOpSplitPlan();

void printMemoryPlan();
Expand All @@ -70,14 +84,6 @@ class MemoryPlan {
using ValuesOrderedByOffset =
std::multiset<QueueItem, IncreasingOffsetsComparator>;

struct ValueInfo {
size_t id;
size_t size;
bool isConstant;
int firstUsed;
int lastUsed;
};

int getOffset(Value v, int size, DenseMap<Value, ValueInfo> &valueInfo,
ValuesOrderedByOffset &allocatedOffsets);

Expand Down
2 changes: 2 additions & 0 deletions xformer/Transforms/Options.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ extern llvm::cl::opt<bool> convDebugOption;
extern llvm::cl::opt<bool> overlapConvOption;
extern llvm::cl::opt<bool> offlineOffsetsOption;
extern llvm::cl::opt<unsigned> convChannelwiseSplitSizeOption;
extern llvm::cl::list<std::string> sameAllocationInputOutputTensorOption;

} // namespace xcore
} // namespace mlir

Expand Down
1 change: 1 addition & 0 deletions xformer/Transforms/Passes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ void buildXCorePreOpSplitPassPipeline(OpPassManager &pm) {
// Run pass from LCE to convert Larq ops which are in TFL custom op format to
// Larq dialect
pm.addPass(mlir::TFL::CreateTranslateToLCEPass());
pm.addPass(createVerifySameAllocationTensorsPass());
// Convert dynamic shapes in batch dimension to static
pm.addPass(createRemoveDynamicShapePass());
}
Expand Down
2 changes: 2 additions & 0 deletions xformer/Transforms/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ std::unique_ptr<OperationPass<func::FuncOp>> createReplaceFCWithConv2DPass();
std::unique_ptr<OperationPass<func::FuncOp>> createOptimizeConv2DPass();
std::unique_ptr<OperationPass<func::FuncOp>> createOpSplitPass();
std::unique_ptr<OperationPass<func::FuncOp>> createApplyTFLPatternsPass();
std::unique_ptr<OperationPass<func::FuncOp>>
createVerifySameAllocationTensorsPass();
std::unique_ptr<OperationPass<func::FuncOp>> createRemoveDynamicShapePass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceAddSubPass();
std::unique_ptr<OperationPass<func::FuncOp>> createReplaceMulPass();
Expand Down
Loading

0 comments on commit 373261d

Please sign in to comment.