Skip to content

echo

echo #6

Workflow file for this run

name: llm_cpp
on:
pull_request:
paths:
- llm/cpp/*
- '!llm/cpp/README.md'
- thirdparty/openvino_contrib/
- .github/workflows/llm.yml
jobs:
llm-cpp:
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: 3.8
- uses: actions/checkout@v4
with:
repository: TinyLlama/TinyLlama-1.1B-Chat-v0.6
ref: bf9ae1c8bf026667e6f810768de259bb4a7f4777
path: TinyLlama-1.1B-Chat-v0.6
lfs: true
github-server-url: https://huggingface.co
- name: Install OpenVINO
run: |
mkdir ./ov/
curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2023.3.0-13432-a6ea22ad0e6/l_openvino_toolkit_ubuntu20_2023.3.0.dev20231129_x86_64.tgz | tar --directory ./ov/ --strip-components 1 -xz
sudo ./ov/install_dependencies/install_openvino_dependencies.sh
- name: Download, convert and build
run: |
source ./ov/setupvars.sh
python -m pip install --upgrade-strategy eager transformers==4.35.2 "optimum[openvino]>=1.14" ./thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python -m pip uninstall --yes optimum-intel && python -m pip install git+https://github.com/huggingface/optimum-intel.git@5dac93d6e8d15c96fe061c653d82b7afd54954db && optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v0.6 ./TinyLlama-1.1B-Chat-v0.6/ &
cmake -DCMAKE_BUILD_TYPE=Release -S ./llm/cpp -B ./build/
cmake --build ./build/ --config Release -j
wait
- name: Compare
run: |
source ./ov/setupvars.sh
python ./llm/cpp/convert_tokenizers.py ./TinyLlama-1.1B-Chat-v0.6/
timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml 69 > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
tokenized = tokenizer('69', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo 69 passed
timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml Hi > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
tokenized = tokenizer('Hi', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo Hi passed
timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
tokenized = tokenizer('return 0', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo return 0 passed
./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
tokenized = tokenizer('', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo '""' passed
./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "你好! 你好嗎?" > ./pred.txt
python -c "
import transformers
with open('pred.txt', 'r') as file:
predictions = file.read()
tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
tokenized = tokenizer('你好! 你好嗎?', return_tensors='pt')
for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False):
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
idx = predictions.find(ref)
if -1 == idx:
raise RuntimeError(f'Missing "{ref=}" from predictions')
predictions = predictions[:idx] + predictions[idx + len(ref):]
"
echo 你好! 你好嗎? passed
llm-cpp-windows:
runs-on: windows-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: actions/setup-python@v4
with:
python-version: 3.8
- uses: actions/checkout@v4
with:
repository: TinyLlama/TinyLlama-1.1B-Chat-v0.6
ref: bf9ae1c8bf026667e6f810768de259bb4a7f4777
path: TinyLlama-1.1B-Chat-v0.6
lfs: true
github-server-url: https://huggingface.co
- name: Install OpenVINO
shell: bash
run: |
curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2023.3.0-13570-402d251483c/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64.zip
unzip ov.zip
- name: Download, convert and build
shell: bash
run: |
export OpenVINO_DIR=$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/cmake
export OPENVINO_LIB_PATHS=$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/3rdparty/tbb/bin\;$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/bin/intel64/Release
export PATH=$OPENVINO_LIB_PATHS:$PATH
export PYTHONPATH=./w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/python:./w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/python/python3
python -m pip install --upgrade-strategy eager transformers==4.35.2 "optimum[openvino]>=1.14" ./thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python -m pip uninstall --yes optimum-intel && python -m pip install git+https://github.com/huggingface/optimum-intel.git@5dac93d6e8d15c96fe061c653d82b7afd54954db && optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v0.6 ./TinyLlama-1.1B-Chat-v0.6/ &
cmake -DCMAKE_BUILD_TYPE=Release -S ./llm/cpp -B ./build/
cmake --build ./build/ --config Release -j
wait
- name: Compare
shell: cmd
run: |
call w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64\setupvars.bat
python .\llm\cpp\convert_tokenizers.py .\TinyLlama-1.1B-Chat-v0.6\
.\build\llm\cpp\Release\llm.exe .\TinyLlama-1.1B-Chat-v0.6\openvino_model.xml .\tokenizer.xml .\detokenizer.xml 69 > .\pred.txt
echo import transformers > ref.py
echo predictions = open('pred.txt', 'r').read() >> ref.py
echo tokenizer = transformers.LlamaTokenizer.from_pretrained(r'.\TinyLlama-1.1B-Chat-v0.6') >> ref.py
echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py
echo for beam in transformers.LlamaForCausalLM.from_pretrained(r'.\TinyLlama-1.1B-Chat-v0.6').generate(**tokenized, pynum_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): >> ref.py
echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' >> ref.py
echo idx = predictions.find(ref) >> ref.py
echo if -1 == idx: >> ref.py
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
python ref.py