Skip to content

Commit

Permalink
[BOUNTY] Add Phi-2 (#117)
Browse files Browse the repository at this point in the history
* Qwen1.5 0.5B pybuda implementation

* remove unneeded requirement

* rename "acceleration" to "accelerate"

* Update env vars and compiler configs

* remove undefined device_map

* Remove misleading and unnecessary environment variables

* remove qwen from phi branch

* Add Phi 2

* Update requirements.txt

* Standardize Phi2 demo and added tests

* Remove old phi2 demo

* fix missing quote in pyproject.toml

* fix

* Fix test saying qwen1_5 instead of phi2
  • Loading branch information
JushBJJ authored Sep 3, 2024
1 parent 6439fd0 commit 3c71b9f
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 0 deletions.
68 changes: 68 additions & 0 deletions model_demos/nlp_demos/phi2/pytorch_phi2_text_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
# SPDX-License-Identifier: Apache-2.0

# Phi2 Demo - Text Generation

import os
import pybuda

from transformers import PhiForCausalLM, AutoTokenizer, PhiConfig
from pybuda.transformers.pipeline import pipeline as pybuda_pipeline

def run_phi2_causal_lm(batch_size=1):
os.environ["TT_BACKEND_TIMEOUT"] = '0'

# Set PyBuda configurations
compiler_cfg = pybuda.config._get_global_compiler_config()
compiler_cfg.default_df_override = pybuda.DataFormat.Float16_b
compiler_cfg.enable_auto_fusing = True
compiler_cfg.balancer_policy = "Ribbon"

# Setup model configuration
config = PhiConfig.from_pretrained("microsoft/phi-2")
config.use_cache = False
config.return_dict = False

# Load model and tokenizer with config
model = PhiForCausalLM.from_pretrained("microsoft/phi-2", config=config)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")
tokenizer.pad_token, tokenizer.pad_token_id = (tokenizer.eos_token, tokenizer.eos_token_id)

# Disable DynamicCache
# See: https://github.com/tenstorrent/tt-buda/issues/42
model._supports_cache_class = False

# Example usage
prompt = ["My name is Jim Keller and"] * batch_size

# Initialize pipeline
text_generator = pybuda_pipeline(
"text-generation",
model=model,
tokenizer=tokenizer
)

# Inference on TT device
response = text_generator(
prompt,
temperature=0.7,
top_k=50,
top_p=0.9,
max_new_tokens=512,
num_beams=1,
do_sample=True,
no_repeat_ngram_size=5,
pad_token_id=tokenizer.pad_token_id,
eos_token_id=tokenizer.eos_token_id,
early_stopping=True
)

# Display Responses
for batch_id in range(batch_size):
print(f"Batch: {batch_id}")
print(f"Response: {response[batch_id][0]['generated_text']}")
print()


if __name__ == "__main__":
run_phi2_causal_lm()
1 change: 1 addition & 0 deletions model_demos/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,5 +87,6 @@ markers = [
"yolov6: tests that involve yolov6",
"segformer: tests that involve SegFormer",
"monodle: tests that involve Monodle",
"phi2: tests that involve Phi2",
"yolox: tests that involve YOLOX",
]
10 changes: 10 additions & 0 deletions model_demos/tests/test_pytorch_phi2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# SPDX-FileCopyrightText: © 2024 Tenstorrent AI ULC
# SPDX-License-Identifier: Apache-2.0

import pytest

from nlp_demos.phi2.pytorch_phi2_text_generation import run_phi2_causal_lm

@pytest.mark.phi2
def test_phi2_causal_lm_pytorch(clear_pybuda, test_device, batch_size):
run_phi2_causal_lm(batch_size=batch_size)

0 comments on commit 3c71b9f

Please sign in to comment.