Skip to content

Commit

Permalink
[tests] add pytorch.llama
Browse files Browse the repository at this point in the history
Signed-off-by: Avimitin <[email protected]>
  • Loading branch information
Avimitin authored and sequencer committed Aug 30, 2024
1 parent 2a30401 commit d6fe912
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 0 deletions.
6 changes: 6 additions & 0 deletions nix/pkgs/buddy-mlir.nix
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,13 @@ let
pyenv = python3.withPackages (ps: [
self
ps.torch

# mobilenet
ps.torchvision

# tinyllama
ps.transformers
ps.accelerate
]);
};
};
Expand Down
101 changes: 101 additions & 0 deletions tests/pytorch/tinyllama/build.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
{ buildBuddyE2ETest, fetchgit }:
let
model = fetchgit {
url = "https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0";
rev = "fe8a4ea1ffedaf415f4da2f062534de366a451e6";
fetchLFS = true;
hash = "sha256-vp/aUHKX+NJZZMIk2CgSh2czeGD0HeQGS30p/If2pA0=";
};
in
buildBuddyE2ETest {
caseName = "tinyllama";

passthru.model = model;

env.LLAMA_MODEL_PATH = "${model}";
optPhase = ''
python ./tinyllama.py
echo "Lowering forward.mlir"
buddy-opt forward.mlir -pass-pipeline \
"builtin.module(func.func(tosa-to-linalg-named),func.func(tosa-to-linalg),\
func.func(tosa-to-tensor),func.func(tosa-to-arith))" \
| buddy-opt --arith-expand \
--eliminate-empty-tensors \
--empty-tensor-to-alloc-tensor \
--one-shot-bufferize \
--batchmatmul-optimize \
--convert-linalg-to-affine-loops \
--affine-loop-fusion \
--lower-affine \
--func-bufferize \
--arith-bufferize \
--tensor-bufferize \
--buffer-deallocation \
--finalizing-bufferize \
--convert-vector-to-scf \
--expand-strided-metadata \
--convert-vector-to-llvm \
--memref-expand \
--arith-expand \
--convert-arith-to-llvm \
--finalize-memref-to-llvm \
--convert-scf-to-cf \
--llvm-request-c-wrappers \
--convert-openmp-to-llvm \
--convert-arith-to-llvm \
--convert-math-to-llvm \
--convert-math-to-libm \
--convert-func-to-llvm \
--reconcile-unrealized-casts \
> forward-lowered.mlir
echo "Lowering subgraphs[0]"
buddy-opt subgraphs0.mlir -pass-pipeline \
"builtin.module(func.func(tosa-to-linalg-named, tosa-to-arith, tosa-to-linalg, tosa-to-tensor))" \
| buddy-opt \
--arith-expand \
--eliminate-empty-tensors \
--empty-tensor-to-alloc-tensor \
--one-shot-bufferize \
--batchmatmul-optimize \
--convert-linalg-to-affine-loops \
--affine-loop-fusion \
--lower-affine \
--func-bufferize-dynamic-offset \
--tensor-bufferize \
--arith-bufferize \
--buffer-deallocation \
--finalizing-bufferize \
--convert-vector-to-scf \
--expand-strided-metadata \
--cse \
--lower-vector-exp \
--lower-rvv=rv32 \
--convert-vector-to-llvm \
--memref-expand \
--arith-expand \
--convert-arith-to-llvm \
--finalize-memref-to-llvm \
--convert-scf-to-cf \
--llvm-request-c-wrappers \
--convert-openmp-to-llvm \
--convert-arith-to-llvm \
--convert-math-to-llvm \
--convert-math-to-libm \
--convert-func-to-llvm \
--reconcile-unrealized-casts \
> subgraphs0-lowered.mlir
echo "Compiling memrefCopy library"
$CXX -nostdlib -c ${../lib/MemrefCopy.cc} -o memrefCopy.o
llcArtifacts+=(
memrefCopy.o
)
optArtifacts+=(
"forward-lowered.mlir"
"subgraphs0-lowered.mlir"
)
'';
}
60 changes: 60 additions & 0 deletions tests/pytorch/tinyllama/tinyllama.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===- GoogleBenchmarkMain.cpp --------------------------------------------===//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
//===----------------------------------------------------------------------===//
//
// This file implements the benchmark for Tiny LLaMA model.
//
//===----------------------------------------------------------------------===//

#include "memref.hpp"

constexpr size_t ParamsSize = 110581;
// constexpr size_t ParamsSize = 11058;
constexpr size_t MaxVocabSize = 32000;
constexpr size_t MaxTokenLength = 40;
constexpr size_t HiddenSize = 2048;

// resultContainer[0]
__attribute((section(".vdata"))) float result0[1 + MaxTokenLength + HiddenSize];
static constexpr int32_t sizesResult0[3] = {1, MaxTokenLength, HiddenSize};

// resultContainer[1]
__attribute((
section(".vdata"))) float result1[1 + MaxTokenLength + MaxVocabSize];
static constexpr int32_t sizesResult1[3] = {1, MaxTokenLength, MaxVocabSize};

// inputContainer
__attribute((section(".vdata"))) int32_t input[1 + MaxTokenLength];
static constexpr int32_t sizesInput[2] = {1, MaxTokenLength};

// paramsContainer
__attribute((section(".vdata"))) float param[ParamsSize];
static constexpr int32_t sizesParam[1] = {ParamsSize};

extern "C" {
void _mlir_ciface_forward(MemRef<float, 3> *a, MemRef<float, 1> *b,
MemRef<int32_t, 2> *c);
}

MemRef<float, 3> resultContainer[2] = {
MemRef<float, 3>(result0, 2.0, sizesResult0),
MemRef<float, 3>(result1, 3.0, sizesResult1)};
MemRef<int32_t, 2> inputContainer(input, 4, sizesInput);
MemRef<float, 1> paramsContainerf32(param, 5.0, sizesParam);

extern "C" int test() {
_mlir_ciface_forward(resultContainer, &paramsContainerf32, &inputContainer);
return 0;
}
60 changes: 60 additions & 0 deletions tests/pytorch/tinyllama/tinyllama.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# ===- buddy_tinyllama_import.py -----------------------------------------------
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===---------------------------------------------------------------------------
#
# This is the TinyLlama model AOT importer.
#
# ===---------------------------------------------------------------------------

import os
import sys
import torch
from torch._inductor.decomposition import decompositions as inductor_decomp
from transformers import AutoModelForCausalLM, AutoTokenizer

from buddy.compiler.frontend import DynamoCompiler
from buddy.compiler.ops import tosa
from buddy.compiler.graph import GraphDriver
from buddy.compiler.graph.transform import simply_fuse

checkpoint = os.environ.get("LLAMA_MODEL_PATH")
if checkpoint is None:
sys.exit("Error: No model path was provided. Please set $LLAMA_MODEL_PATH")
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto")
model.config.use_cache = False

# Initialize Dynamo Compiler with specific configurations as an importer.
dynamo_compiler = DynamoCompiler(
primary_registry=tosa.ops_registry,
aot_autograd_decomposition=inductor_decomp,
)

# Import the model into MLIR module and parameters.
with torch.no_grad():
data = torch.tensor([[1 for i in range(40)]], dtype=torch.int64)
graphs = dynamo_compiler.importer(model, data)

assert len(graphs) == 1
graph = graphs[0]
params = dynamo_compiler.imported_params[graph]
pattern_list = [simply_fuse]
graphs[0].fuse_ops(pattern_list)
driver = GraphDriver(graphs[0])
driver.subgraphs[0].lower_to_top_level_ir()
with open("subgraphs0.mlir", "w") as module_file:
print(driver.subgraphs[0]._imported_module, file=module_file)
with open("forward.mlir", "w") as module_file:
print(driver.construct_main_graph(True), file=module_file)

0 comments on commit d6fe912

Please sign in to comment.