echo #6

Workflow file for this run

.github/workflows/llm_cpp.yml at a6a02be

	name: llm_cpp
	on:
	pull_request:
	paths:
	- llm/cpp/*
	- '!llm/cpp/README.md'
	- thirdparty/openvino_contrib/
	- .github/workflows/llm.yml
	jobs:
	llm-cpp:
	runs-on: ubuntu-20.04
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- uses: actions/checkout@v4
	with:
	repository: TinyLlama/TinyLlama-1.1B-Chat-v0.6
	ref: bf9ae1c8bf026667e6f810768de259bb4a7f4777
	path: TinyLlama-1.1B-Chat-v0.6
	lfs: true
	github-server-url: https://huggingface.co
	- name: Install OpenVINO
	run: \|
	mkdir ./ov/
	curl https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2023.3.0-13432-a6ea22ad0e6/l_openvino_toolkit_ubuntu20_2023.3.0.dev20231129_x86_64.tgz \| tar --directory ./ov/ --strip-components 1 -xz
	sudo ./ov/install_dependencies/install_openvino_dependencies.sh
	- name: Download, convert and build
	run: \|
	source ./ov/setupvars.sh
	python -m pip install --upgrade-strategy eager transformers==4.35.2 "optimum[openvino]>=1.14" ./thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python -m pip uninstall --yes optimum-intel && python -m pip install git+https://github.com/huggingface/optimum-intel.git@5dac93d6e8d15c96fe061c653d82b7afd54954db && optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v0.6 ./TinyLlama-1.1B-Chat-v0.6/ &
	cmake -DCMAKE_BUILD_TYPE=Release -S ./llm/cpp -B ./build/
	cmake --build ./build/ --config Release -j
	wait
	- name: Compare
	run: \|
	source ./ov/setupvars.sh
	python ./llm/cpp/convert_tokenizers.py ./TinyLlama-1.1B-Chat-v0.6/

	timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml 69 > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
	tokenized = tokenizer('69', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo 69 passed

	timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml Hi > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
	tokenized = tokenizer('Hi', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo Hi passed

	timeout 25s ./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "return 0" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
	tokenized = tokenizer('return 0', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo return 0 passed

	./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
	tokenized = tokenizer('', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo '""' passed

	./build/llm/cpp/llm ./TinyLlama-1.1B-Chat-v0.6/openvino_model.xml ./tokenizer.xml ./detokenizer.xml "你好！你好嗎？" > ./pred.txt
	python -c "
	import transformers
	with open('pred.txt', 'r') as file:
	predictions = file.read()
	tokenizer = transformers.LlamaTokenizer.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/')
	tokenized = tokenizer('你好！你好嗎？', return_tensors='pt')
	for beam in transformers.LlamaForCausalLM.from_pretrained('./TinyLlama-1.1B-Chat-v0.6/').generate(tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False):
	ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n'
	idx = predictions.find(ref)
	if -1 == idx:
	raise RuntimeError(f'Missing "{ref=}" from predictions')
	predictions = predictions[:idx] + predictions[idx + len(ref):]
	"
	echo 你好！你好嗎？ passed
	llm-cpp-windows:
	runs-on: windows-latest
	steps:
	- uses: actions/checkout@v4
	with:
	submodules: recursive
	- uses: actions/setup-python@v4
	with:
	python-version: 3.8
	- uses: actions/checkout@v4
	with:
	repository: TinyLlama/TinyLlama-1.1B-Chat-v0.6
	ref: bf9ae1c8bf026667e6f810768de259bb4a7f4777
	path: TinyLlama-1.1B-Chat-v0.6
	lfs: true
	github-server-url: https://huggingface.co
	- name: Install OpenVINO
	shell: bash
	run: \|
	curl --output ov.zip https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2023.3.0-13570-402d251483c/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64.zip
	unzip ov.zip
	- name: Download, convert and build
	shell: bash
	run: \|
	export OpenVINO_DIR=$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/cmake
	export OPENVINO_LIB_PATHS=$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/3rdparty/tbb/bin\;$GITHUB_WORKSPACE/w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/runtime/bin/intel64/Release
	export PATH=$OPENVINO_LIB_PATHS:$PATH
	export PYTHONPATH=./w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/python:./w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64/python/python3
	python -m pip install --upgrade-strategy eager transformers==4.35.2 "optimum[openvino]>=1.14" ./thirdparty/openvino_contrib/modules/custom_operations/[transformers] --extra-index-url https://download.pytorch.org/whl/cpu && python -m pip uninstall --yes optimum-intel && python -m pip install git+https://github.com/huggingface/optimum-intel.git@5dac93d6e8d15c96fe061c653d82b7afd54954db && optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-Chat-v0.6 ./TinyLlama-1.1B-Chat-v0.6/ &
	cmake -DCMAKE_BUILD_TYPE=Release -S ./llm/cpp -B ./build/
	cmake --build ./build/ --config Release -j
	wait
	- name: Compare
	shell: cmd
	run: \|
	call w_openvino_toolkit_windows_2023.3.0.dev20231211_x86_64\setupvars.bat
	python .\llm\cpp\convert_tokenizers.py .\TinyLlama-1.1B-Chat-v0.6\

	.\build\llm\cpp\Release\llm.exe .\TinyLlama-1.1B-Chat-v0.6\openvino_model.xml .\tokenizer.xml .\detokenizer.xml 69 > .\pred.txt
	echo import transformers > ref.py
	echo predictions = open('pred.txt', 'r').read() >> ref.py
	echo tokenizer = transformers.LlamaTokenizer.from_pretrained(r'.\TinyLlama-1.1B-Chat-v0.6') >> ref.py
	echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py
	echo for beam in transformers.LlamaForCausalLM.from_pretrained(r'.\TinyLlama-1.1B-Chat-v0.6').generate(tokenized, pynum_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=99, do_sample=False): >> ref.py
	echo ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) + '\n' >> ref.py
	echo idx = predictions.find(ref) >> ref.py
	echo if -1 == idx: >> ref.py
	echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py
	echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py
	python ref.py

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

echo #6

Workflow file

echo #6

Jobs

Run details

Workflow file for this run