Skip to content

Commit

Permalink
Add qwen model v2.5 (#875)
Browse files Browse the repository at this point in the history
  • Loading branch information
dsudhakarTT authored Dec 23, 2024
1 parent bd90520 commit 572cadf
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 0 deletions.
52 changes: 52 additions & 0 deletions forge/test/models/pytorch/text/qwen/test_qwen_coder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import pytest
import forge
from transformers import AutoModelForCausalLM, AutoTokenizer

# Variants for testing
variants = [
"Qwen/Qwen2.5-Coder-0.5B",
"Qwen/Qwen2.5-Coder-1.5B",
"Qwen/Qwen2.5-Coder-1.5B-Instruct",
"Qwen/Qwen2.5-Coder-3B",
"Qwen/Qwen2.5-Coder-3B-Instruct",
"Qwen/Qwen2.5-Coder-7B",
"Qwen/Qwen2.5-Coder-7B-Instruct",
]


@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.model_analysis
@pytest.mark.xfail(
reason="RuntimeError: Found Unsupported operations while lowering from TTForge to TTIR in forward graph - repeat interleave"
)
@pytest.mark.nightly
def test_qwen_response(variant):
"""
Test function for generating responses and verifying model compilation.
"""
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(variant, device_map="cpu")
model.config.return_dict = False
tokenizer = AutoTokenizer.from_pretrained(variant)

# Prepare input
prompt = "write a quick sort algorithm."
messages = [
{"role": "system", "content": "You are Qwen, created by TT Cloud. You are a helpful assistant."},
{"role": "user", "content": prompt},
]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize and prepare inputs
model_inputs = tokenizer([text], return_tensors="pt")
input_ids = model_inputs["input_ids"]
attention_mask = model_inputs["attention_mask"]
inputs = [input_ids, attention_mask]

# Compile the model
compiled_model = forge.compile(
model, sample_inputs=inputs, module_name=f"pt_{variant.replace('/', '_').replace('.', '_').replace('-', '_')}"
)
46 changes: 46 additions & 0 deletions forge/test/models/pytorch/text/qwen/test_qwen_v2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# SPDX-FileCopyrightText: (c) 2024 Tenstorrent AI ULC
#
# SPDX-License-Identifier: Apache-2.0
import pytest
import forge
from transformers import AutoModelForCausalLM, AutoTokenizer


# Variants for testing
variants = [
"Qwen/Qwen2.5-0.5B",
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/Qwen2.5-1.5B",
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-3B",
"Qwen/Qwen2.5-3B-Instruct",
"Qwen/Qwen2.5-7B",
"Qwen/Qwen2.5-7B-Instruct",
]


@pytest.mark.parametrize("variant", variants, ids=variants)
@pytest.mark.model_analysis
@pytest.mark.xfail(
reason="RuntimeError: Found Unsupported operations while lowering from TTForge to TTIR in forward graph - repeat interleave"
)
@pytest.mark.nightly
def test_qwen_response(variant):
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(variant, device_map="cpu")
model.config.return_dict = False
tokenizer = AutoTokenizer.from_pretrained(variant)

# Prepare input
prompt = "Give me a short introduction to large language models."
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize and generate
model_inputs = tokenizer([text], return_tensors="pt")
input_ids = model_inputs["input_ids"]
attention_mask = model_inputs["attention_mask"]
inputs = [input_ids, attention_mask]
compiled_model = forge.compile(
model, sample_inputs=inputs, module_name=f"pt_{variant.replace('/', '_').replace('.', '_').replace('-', '_')}"
)

0 comments on commit 572cadf

Please sign in to comment.