[Image generation] Fixed SD3 accuracy issues (#1131) #9
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: causal_lm_cpp | |
on: | |
workflow_dispatch: | |
pull_request: | |
merge_group: | |
push: | |
branches: | |
- master | |
- 'releases/**' | |
permissions: read-all # Required by https://github.com/ossf/scorecard/blob/e23b8ad91fd6a64a0a971ca4fc0a4d1650725615/docs/checks.md#token-permissions | |
concurrency: | |
# github.ref is not unique in post-commit | |
group: ${{ github.event_name == 'push' && github.run_id || github.ref }}-causal-lm-cpp | |
cancel-in-progress: true | |
env: | |
l_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17253-b9a94c3f8b8/l_openvino_toolkit_ubuntu20_2024.5.0.dev20241031_x86_64.tgz | |
l_u22_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17253-b9a94c3f8b8/l_openvino_toolkit_ubuntu22_2024.5.0.dev20241031_x86_64.tgz | |
m_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17253-b9a94c3f8b8/m_openvino_toolkit_macos_12_6_2024.5.0.dev20241031_x86_64.tgz | |
w_ov_link: https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/2024.5.0-17253-b9a94c3f8b8/w_openvino_toolkit_windows_2024.5.0.dev20241031_x86_64.zip | |
jobs: | |
cpp-multinomial-greedy_causal_lm-ubuntu: | |
runs-on: ubuntu-20.04-8-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model openlm-research/open_llama_3b_v2 open_llama_3b_v2 | |
optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T | |
wget https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true -O adapter_model.safetensors | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 25s ./build/samples/cpp/multinomial_causal_lm/multinomial_causal_lm ./open_llama_3b_v2/ a | |
env: | |
PYTHONPATH: "./build" | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 25s ./samples/python/multinomial_causal_lm/multinomial_causal_lm.py ./open_llama_3b_v2/ b | |
env: | |
PYTHONPATH: "./build" | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 25s ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./open_llama_3b_v2/ "return 0" | |
| diff <(timeout 25s samples/python/greedy_causal_lm/greedy_causal_lm.py ./open_llama_3b_v2/ "return 0") - | |
env: | |
PYTHONPATH: "./build" | |
- run: > | |
. ./ov/setupvars.sh | |
&& samples/python/greedy_causal_lm/lora.py ./TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T/ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?" | |
env: | |
PYTHONPATH: "./build" | |
cpp-beam_search_causal_lm-ubuntu: | |
strategy: | |
matrix: | |
executable: | |
[ | |
./build/samples/cpp/beam_search_causal_lm/beam_search_causal_lm, | |
python ./samples/python/beam_search_causal_lm/beam_search_causal_lm.py, | |
] | |
runs-on: ubuntu-20.04 | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
- name: Compare | |
env: | |
PYTHONPATH: "./build/" # C++ ignores that | |
run: | | |
source ./ov/setupvars.sh | |
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Why is the Sun yellow?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('Why is the Sun yellow?', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Why is the Sun yellow?" passed | |
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ 69 > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('69', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo 69 passed | |
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ Hi > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('Hi', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Hi" passed | |
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "return 0" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('return 0', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "return 0" passed | |
timeout 25s ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "你好! 你好嗎?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r', errors='ignore') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
tokenized = tokenizer('你好! 你好嗎?', return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref.replace('�', '')) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "你好! 你好嗎?" passed | |
timeout 1m ${{ matrix.executable }} ./TinyLlama-1.1B-Chat-v1.0/ "Alan Turing was a" "return 0" "你好! 你好嗎?" > ./pred.txt | |
python -c " | |
import transformers | |
with open('pred.txt', 'r', errors='ignore') as file: | |
predictions = file.read() | |
tokenizer = transformers.LlamaTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0') | |
prompts = [ | |
'Alan Turing was a', | |
'return 0', | |
'你好! 你好嗎?' | |
] | |
for prompt in prompts: | |
tokenized = tokenizer(prompt, return_tensors='pt') | |
for beam in transformers.LlamaForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0').generate(**tokenized, num_beam_groups=3, num_beams=15, num_return_sequences=15, diversity_penalty=1.0, max_new_tokens=20, early_stopping=False, length_penalty=1.0, no_repeat_ngram_size=9**9, do_sample=False): | |
ref = ': ' + tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref.replace('�', '')) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Multi prompt" passed | |
cpp-greedy_causal_lm-windows: | |
runs-on: windows-latest | |
env: | |
PYTHONIOENCODING: "utf8" | |
defaults: | |
run: | |
shell: cmd | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- run: curl --output ov.zip ${{ env.w_ov_link }} | |
- run: unzip -d ov ov.zip | |
- run: dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}" | |
shell: bash | |
- name: Build app | |
run: | | |
call .\ov\setupvars.bat | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert model | |
run: | | |
call .\ov\setupvars.bat | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
optimum-cli export openvino -m TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T | |
curl -o adapter_model.safetensors -s -L https://huggingface.co/smangrul/tinyllama_lora_sql/resolve/main/adapter_model.safetensors?download=true | |
- run: > | |
set PATH=.\build\openvino_genai\;%PATH% | |
&& call .\ov\setupvars.bat | |
&& .\build\samples\cpp\greedy_causal_lm\Release\greedy_causal_lm.exe .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\cpp.txt | |
- run: | | |
echo import transformers > ref.py | |
echo predictions = open('cpp.txt', 'r').read() >> ref.py | |
echo tokenizer = transformers.AutoTokenizer.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True) >> ref.py | |
echo tokenized = tokenizer('69', return_tensors='pt') >> ref.py | |
echo for beam in transformers.AutoModelForCausalLM.from_pretrained('TinyLlama/TinyLlama-1.1B-Chat-v1.0', trust_remote_code=True).generate(**tokenized, max_new_tokens=100, do_sample=False): >> ref.py | |
echo ref = tokenizer.decode(beam[tokenized['input_ids'].numel():], skip_special_tokens=True) >> ref.py | |
echo idx = predictions.find(ref) >> ref.py | |
echo if -1 == idx: >> ref.py | |
echo raise RuntimeError(f'Missing "{ref=}" from predictions') >> ref.py | |
echo predictions = predictions[:idx] + predictions[idx + len(ref):] >> ref.py | |
- run: python ref.py | |
- run: > | |
set PATH=.\build\openvino_genai\;%PATH% | |
&& set "PYTHONPATH=./build/" | |
&& call .\ov\setupvars.bat | |
&& python samples\python\greedy_causal_lm\greedy_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 69 > .\py.txt | |
- run: fc .\cpp.txt .\py.txt | |
- run: > | |
set PATH=.\build\openvino_genai\;%PATH% | |
&& set "PYTHONPATH=./build/" | |
&& call .\ov\setupvars.bat | |
&& python samples\python\greedy_causal_lm\lora.py .\TinyLlama\TinyLlama-1.1B-intermediate-step-1431k-3T\ adapter_model.safetensors "How to create a table with two columns, one of them has type float, another one has type int?" | |
cpp-greedy_causal_lm-Qwen-7B-Chat: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.11 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 2m ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./Qwen-7B-Chat/ 69 | diff <(timeout 2m samples/python/greedy_causal_lm/greedy_causal_lm.py ./Qwen-7B-Chat/ 69) - | |
env: | |
PYTHONPATH: "./build" | |
cpp-beam_search_causal_lm-Qwen1_5-7B-Chat: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.12 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen1.5-7B-Chat Qwen1.5-7B-Chat | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 50s ./build/samples/cpp/beam_search_causal_lm/beam_search_causal_lm ./Qwen1.5-7B-Chat/ "你好!" | |
| diff <(timeout 50s ./samples/python/beam_search_causal_lm/beam_search_causal_lm.py ./Qwen1.5-7B-Chat/ "你好!") - | |
env: | |
PYTHONPATH: "./build" | |
cpp-beam_search_causal_lm-Phi-2: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-2 phi-2 | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 50s ./build/samples/cpp/beam_search_causal_lm/beam_search_causal_lm ./phi-2/ 69 | |
| diff <(timeout 50s ./samples/python/beam_search_causal_lm/beam_search_causal_lm.py ./phi-2/ 69) - | |
env: | |
PYTHONPATH: "./build" | |
cpp-beam_search_causal_lm-notus-7b-v1: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model argilla/notus-7b-v1 notus-7b-v1 | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 50s ./build/samples/cpp/beam_search_causal_lm/beam_search_causal_lm ./notus-7b-v1/ 69 | |
| diff <(timeout 50s ./samples/python/beam_search_causal_lm/beam_search_causal_lm.py ./notus-7b-v1/ 69) - | |
env: | |
PYTHONPATH: "./build" | |
cpp-speculative_decoding_lm-ubuntu: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.11 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-3b dolly-v2-3b | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model databricks/dolly-v2-7b dolly-v2-7b | |
- name: run and compare | |
run: | | |
source ./ov/setupvars.sh | |
./build/samples/cpp/speculative_decoding_lm/speculative_decoding_lm ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_speculative.txt | |
./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./dolly-v2-7b/ "Alan Turing was a" > predictions_greedy.txt | |
python ./samples/python/speculative_decoding_lm/speculative_decoding_lm.py ./dolly-v2-7b/ ./dolly-v2-3b/ "Alan Turing was a" > predictions_py.txt | |
python -c " | |
with open('predictions_greedy.txt', 'r') as f: | |
predicted_greedy = f.readline() | |
with open('predictions_speculative.txt', 'r') as f: | |
predicted_speculative = f.readline() | |
with open('predictions_py.txt', 'r') as f: | |
predicted_py = f.readline() | |
assert predicted_greedy == predicted_speculative | |
assert predicted_greedy == predicted_py | |
assert predicted_speculative == predicted_py | |
" | |
echo "Alan Turing was a" passed | |
env: | |
PYTHONPATH: "./build/:$PYTHONPATH" | |
LD_LIBRARY_PATH: "./build/openvino_genai/:$LD_LIBRARY_PATH" | |
cpp-prompt_lookup_decoding_lm-ubuntu: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.12 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model Qwen/Qwen-7B-Chat Qwen-7B-Chat --task text-generation-with-past | |
- name: run and compare | |
run: | | |
source ./ov/setupvars.sh | |
echo 'Code:```python | |
def add(a, b): | |
return a + b | |
``` | |
Question: Can you please add 2 and 3 | |
A:' > ./prompt.txt | |
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_prompt_lookup.txt | |
./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./TinyLlama-1.1B-Chat-v1.0/ "$(<prompt.txt)" > predictions_greedy.txt | |
python -c " | |
with open('predictions_greedy.txt', 'r') as f: | |
predicted_greedy = f.readline() | |
with open('predictions_prompt_lookup.txt', 'r') as f: | |
predicted_prompt_lookup = f.readline() | |
assert predicted_greedy == predicted_prompt_lookup | |
" | |
echo "Prompt lookup" passed | |
- name: run and compare (model with seq_length_axis = 1) | |
run: | | |
source ./ov/setupvars.sh | |
echo 'Code:```python | |
def add(a, b): | |
return a + b | |
``` | |
Question: Can you please add 2 and 3 | |
A:' > ./prompt.txt | |
./build/samples/cpp/prompt_lookup_decoding_lm/prompt_lookup_decoding_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_prompt_lookup.txt | |
./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./Qwen-7B-Chat/ "$(<prompt.txt)" > predictions_greedy.txt | |
python -c " | |
with open('predictions_greedy.txt', 'r') as f: | |
predicted_greedy = f.readline() | |
with open('predictions_prompt_lookup.txt', 'r') as f: | |
predicted_prompt_lookup = f.readline() | |
assert predicted_greedy == predicted_prompt_lookup | |
" | |
echo "Prompt lookup" passed | |
cpp-Phi-1_5: | |
runs-on: ubuntu-20.04-16-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model microsoft/phi-1_5 phi-1_5 | |
- name: Run Generation | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./phi-1_5/ "Alan Turing was a" > ./pred_greedy.txt | |
- name: Compare | |
run: | | |
python -c " | |
import transformers | |
with open('pred_greedy.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.AutoTokenizer.from_pretrained('microsoft/phi-1_5') | |
tokenized = tokenizer('Alan Turing was a', return_tensors='pt') | |
for output in transformers.AutoModelForCausalLM.from_pretrained('microsoft/phi-1_5').generate(**tokenized, max_length=100, do_sample=False): | |
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref=}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo Phi-1_5 passed | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 50s samples/python/greedy_causal_lm/greedy_causal_lm.py ./phi-1_5/ "Alan Turing was a" | |
| diff ./pred_greedy.txt - | |
env: | |
PYTHONPATH: "./build" | |
cpp-greedy_causal_lm-redpajama-3b-chat: | |
runs-on: ubuntu-20.04-4-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.10' | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model ikala/redpajama-3b-chat redpajama-3b-chat | |
- name: Run Generation | |
run: | | |
source ./ov/setupvars.sh | |
timeout 50s ./build/samples/cpp/greedy_causal_lm/greedy_causal_lm ./redpajama-3b-chat/ "Alan Turing was a" > ./pred_greedy.txt | |
- name: Compare | |
run: | | |
python -c " | |
import transformers | |
with open('pred_greedy.txt', 'r') as file: | |
predictions = file.read() | |
tokenizer = transformers.AutoTokenizer.from_pretrained('ikala/redpajama-3b-chat') | |
tokenized = tokenizer('Alan Turing was a', return_tensors='pt') | |
for output in transformers.AutoModelForCausalLM.from_pretrained('ikala/redpajama-3b-chat').generate(**tokenized, max_length=100, do_sample=False): | |
ref = tokenizer.decode(output[tokenized['input_ids'].numel():], skip_special_tokens=True) | |
idx = predictions.find(ref) | |
if -1 == idx: | |
raise RuntimeError(f'Missing "{ref}" from predictions') | |
predictions = predictions[:idx] + predictions[idx + len(ref):] | |
" | |
echo "Alan Turing was a" passed | |
- run: > | |
. ./ov/setupvars.sh | |
&& timeout 50s samples/python/greedy_causal_lm/greedy_causal_lm.py ./redpajama-3b-chat/ "Alan Turing was a" | |
| diff ./pred_greedy.txt - | |
env: | |
PYTHONPATH: "./build" | |
cpp-chat_sample-ubuntu: | |
runs-on: ubuntu-24.04 | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.11 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
- name: Compare | |
env: | |
PYTHONPATH: "./build" | |
run: | | |
source ./ov/setupvars.sh | |
printf 'What is 2 + 2?\nWhat is the previous answer?\nAdd 1 to it.\nSubtract 5 from it.\nWhy is the sun yellow?\nWhat was my first question?\n' > ./input.txt | |
timeout 30s ./build/samples/cpp/chat_sample/chat_sample ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred.txt | |
python -c " | |
from transformers import LlamaTokenizer, AutoModelForCausalLM | |
model_id = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0' | |
tokenizer = LlamaTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id) | |
prompts = ['What is 2 + 2?', 'What is the previous answer?', 'Add 1 to it.', 'Subtract 5 from it.', 'Why is the sun yellow?', 'What was my first question?'] | |
def gen_prompt(prompt): | |
return {'role': 'user', 'content': prompt} | |
def gen_answer(answer): | |
return {'role': 'assistant', 'content': answer} | |
chat_history = [] | |
chat_prompt = '' | |
output = open('ref.txt', 'w') | |
for prompt in prompts: | |
output.write('question:\n') | |
chat_history.append(gen_prompt(prompt)) | |
chat_prompt = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True) | |
tokenized = tokenizer(chat_prompt, return_tensors='pt', add_special_tokens=False) | |
answer = model.generate(**tokenized, max_length=1000, do_sample=False) | |
answer_str = tokenizer.decode(answer[0, tokenized['input_ids'].numel():], skip_special_tokens=True) | |
chat_history.append(gen_answer(answer_str)) | |
output.write(answer_str) | |
output.write('\n----------\n') | |
output.write('question:\n') | |
output.close() | |
" | |
diff pred.txt ref.txt | |
echo "Chat sample cpp" passed | |
timeout 30s ./samples/python/chat_sample/chat_sample.py ./TinyLlama-1.1B-Chat-v1.0/ < input.txt > ./pred2.txt | |
diff pred2.txt ref.txt | |
echo "Chat sample python" passed | |
visual_language_chat_sample-ubuntu: | |
runs-on: ubuntu-22.04-16-cores | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.11 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_u22_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release --target visual_language_chat py_openvino_genai -j | |
- name: Install dependencies | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt opencv-python | |
- name: Download and convert MiniCPM-V-2_6 model and an image | |
run: | | |
python -m pip install git+https://github.com/eaidova/optimum-intel.git@ea/minicpmv | |
python -m pip install -U "optimum<1.23" --no-dependencies | |
source ./ov/setupvars.sh | |
optimum-cli export openvino -m openbmb/MiniCPM-V-2_6 MiniCPM-V-2_6 --trust-remote-code | |
wget https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11 --output-document cat.jpg | |
mkdir images | |
- name: Generate reference - MiniCPM-V-2_6 | |
shell: python | |
run: | | |
from optimum.intel.openvino import OVModelForVisualCausalLM | |
from transformers import AutoProcessor | |
from PIL import Image | |
import cv2 | |
import numpy as np | |
import requests | |
res = 448, 448 | |
lines = np.arange(res[0] * res[1] * 3, dtype=np.uint8) % 255 | |
lines = lines.reshape([*res, 3]) | |
cv2.imwrite("images/lines.png", lines) | |
cat = Image.open(requests.get("https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11", stream=True).raw).convert('RGB') | |
shrunk_cat = cat.resize((448, 448)) # Resize is different in C++ impl, there's also sin and cos calc in double but it has smaller effect. | |
shrunk_cat.save("images/shrunk_cat.png") | |
lines = Image.open("images/lines.png").convert('RGB') | |
model_id = "openbmb/MiniCPM-V-2_6" | |
processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True) | |
prompt = processor.tokenizer.apply_chat_template([{"role": "user", "content": "(<image>./</image>)\n(<image>./</image>)\nDescribe the images?"}], tokenize=False, add_generation_prompt=True) | |
inputs = processor([prompt], [lines, shrunk_cat], return_tensors="pt") | |
model = OVModelForVisualCausalLM.from_pretrained("MiniCPM-V-2_6", device="CPU", trust_remote_code=True) | |
result = model.generate(**inputs, max_new_tokens=99) | |
decoded = processor.tokenizer.batch_decode(result[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0] | |
print(decoded) | |
with open("ref.txt", "w") as f: | |
f.write(f"question:\n{decoded}\n----------\nquestion:\n") | |
- name: Run visual_language_chat C++ sample - MiniCPM-V-2_6 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ ./images/ | |
<<< $'Describe the images?' | tee cpp.txt | |
timeout-minutes: 2 | |
- run: diff cpp.txt ref.txt | |
- name: Run visual_language_chat Python sample - MiniCPM-V-2_6 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./samples/python/visual_language_chat/visual_language_chat.py ./MiniCPM-V-2_6/ ./images/ | |
<<< $'Describe the images?' | tee py.txt | |
env: | |
PYTHONPATH: "./build/" | |
- run: diff py.txt ref.txt | |
- name: Run visual_language_chat C++ sample with 2 prompts - MiniCPM-V-2_6 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./MiniCPM-V-2_6/ ./cat.jpg | |
<<< $'What is unusual on this image?\nGo on.' | tee cpp2.txt | |
timeout-minutes: 2 | |
- name: Run visual_language_chat Python sample with 2 prompts - MiniCPM-V-2_6 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./samples/python/visual_language_chat/visual_language_chat.py ./MiniCPM-V-2_6/ ./cat.jpg | |
<<< $'What is unusual on this image?\nGo on.' | tee py2.txt | |
env: | |
PYTHONPATH: "./build/" | |
- run: diff cpp2.txt py2.txt | |
- name: Download and convert LLaVa 1.5 model and an image | |
run: | | |
source ./ov/setupvars.sh | |
optimum-cli export openvino --model llava-hf/llava-1.5-7b-hf ./llava_1_5_7b_ov/ | |
wget https://llava-vl.github.io/static/images/monalisa.jpg | |
- name: Run visual_language_chat C++ sample - LLaVa 1.5 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./llava_1_5_7b_ov/ monalisa.jpg | |
<<< $'Who drew this painting?\nWhen did the painter live?' | |
timeout-minutes: 4 | |
- name: Download and convert LLaVa-Next model | |
run: | | |
source ./ov/setupvars.sh | |
optimum-cli export openvino --model llava-hf/llava-v1.6-mistral-7b-hf ./llava_v1_6_mistral_7b_ov/ | |
- name: Run visual_language_chat C++ sample - LLaVa-Next | |
run: > | |
source ./ov/setupvars.sh | |
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./llava_v1_6_mistral_7b_ov/ monalisa.jpg | |
<<< $'Who drew this painting?\nWhen did the painter live?' | |
timeout-minutes: 4 | |
- name: Download and convert InternVL2 model | |
run: | | |
source ./ov/setupvars.sh | |
optimum-cli export openvino --model OpenGVLab/InternVL2-4B ./internvl2_4b_ov/ --trust-remote-code | |
- name: Run visual_language_chat C++ sample - InternVL2 | |
run: > | |
source ./ov/setupvars.sh | |
&& ./build/samples/cpp/visual_language_chat/visual_language_chat ./internvl2_4b_ov/ monalisa.jpg | |
<<< $'Who drew this painting?\nWhen did the painter live?' | |
timeout-minutes: 4 | |
cpp-continuous-batching-ubuntu: | |
runs-on: ubuntu-20.04-8-cores | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.12 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.l_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
sudo ./ov/install_dependencies/install_openvino_dependencies.sh | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
- name: Run gtests | |
run: | | |
source ./ov/setupvars.sh | |
./build/tests/cpp/tests_continuous_batching | |
- name: Run accuracy_sample | |
run: | | |
source ./ov/setupvars.sh | |
timeout --verbose 50s ./build/tools/continuous_batching/accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5 | |
- name: Run throughput_benchmark | |
run: | | |
wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json | |
source ./ov/setupvars.sh | |
timeout --verbose 200s ./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 10 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 | |
timeout --verbose 200s ./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 10 --dynamic_split_fuse --max_batch_size 256 --max_input_len 256 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 | |
cpp-continuous-batching-windows: | |
runs-on: windows-latest | |
env: | |
PYTHONIOENCODING: "utf8" | |
defaults: | |
run: | |
shell: cmd | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install OpenVINO | |
run: | | |
curl --output ov.zip ${{ env.w_ov_link }} | |
unzip -d ov ov.zip | |
dirs=(ov/*) && mv ov/*/* ov && rmdir "${dirs[@]}" | |
shell: bash | |
- name: Build app | |
run: | | |
call .\ov\setupvars.bat | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
call .\ov\setupvars.bat | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
- name: Run gtests | |
run: | | |
set PATH=.\build\openvino_genai\;%PATH% | |
call .\ov\setupvars.bat | |
.\build\tests\cpp\Release\tests_continuous_batching.exe | |
- name: Run accuracy_sample | |
run: | | |
set PATH=.\build\openvino_genai\;%PATH% | |
call .\ov\setupvars.bat | |
.\build\tools\continuous_batching\accuracy\Release\continuous_batching_accuracy.exe -m .\TinyLlama-1.1B-Chat-v1.0\ -n 5 | |
- name: Run throughput_benchmark | |
run: | | |
curl -o .\ShareGPT_V3_unfiltered_cleaned_split.json -s -L "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json" | |
set PATH=.\build\openvino_genai\;%PATH% | |
call .\ov\setupvars.bat | |
.\build\tools\continuous_batching\benchmark\Release\continuous_batching_benchmark.exe -n 2 -m .\TinyLlama-1.1B-Chat-v1.0\ --dataset .\ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 | |
cpp-continuous-batching-macos: | |
runs-on: macos-12 | |
defaults: | |
run: | |
shell: bash | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
submodules: recursive | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: 3.9 | |
- name: Install OpenVINO | |
run: | | |
mkdir ./ov/ | |
curl ${{ env.m_ov_link }} | tar --directory ./ov/ --strip-components 1 -xz | |
brew install coreutils scons | |
- name: Build app | |
run: | | |
source ./ov/setupvars.sh | |
cmake -DCMAKE_BUILD_TYPE=Release -S ./ -B ./build/ | |
cmake --build ./build/ --config Release -j | |
- name: Download and convert and model | |
run: | | |
source ./ov/setupvars.sh | |
python -m pip install --upgrade-strategy eager -r ./samples/requirements.txt | |
python -m pip install ./thirdparty/openvino_tokenizers/[transformers] --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly | |
optimum-cli export openvino --trust-remote-code --weight-format fp16 --model TinyLlama/TinyLlama-1.1B-Chat-v1.0 TinyLlama-1.1B-Chat-v1.0 | |
- name: Run gtests | |
run: | | |
source ./ov/setupvars.sh | |
./build/tests/cpp/tests_continuous_batching | |
- name: Run accuracy_sample | |
run: | | |
source ./ov/setupvars.sh | |
timeout --verbose 120s ./build/tools/continuous_batching/accuracy/continuous_batching_accuracy -m ./TinyLlama-1.1B-Chat-v1.0/ -n 5 | |
- name: Run throughput_benchmark | |
run: | | |
wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json | |
source ./ov/setupvars.sh | |
./build/tools/continuous_batching/benchmark/continuous_batching_benchmark -n 5 -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1 | |
Overall_Status: | |
name: ci/gha_overall_status_causal_lm | |
needs: [cpp-multinomial-greedy_causal_lm-ubuntu, cpp-beam_search_causal_lm-ubuntu, cpp-greedy_causal_lm-windows, | |
cpp-greedy_causal_lm-Qwen-7B-Chat, cpp-beam_search_causal_lm-Qwen1_5-7B-Chat, cpp-beam_search_causal_lm-Phi-2, | |
cpp-beam_search_causal_lm-notus-7b-v1, cpp-speculative_decoding_lm-ubuntu, cpp-prompt_lookup_decoding_lm-ubuntu, | |
cpp-Phi-1_5, cpp-greedy_causal_lm-redpajama-3b-chat, cpp-chat_sample-ubuntu, cpp-continuous-batching-ubuntu, | |
visual_language_chat_sample-ubuntu, | |
cpp-continuous-batching-windows, cpp-continuous-batching-macos] | |
if: ${{ always() }} | |
runs-on: ubuntu-latest | |
steps: | |
- name: Check status of all jobs | |
if: >- | |
${{ | |
contains(needs.*.result, 'failure') || | |
contains(needs.*.result, 'cancelled') | |
}} | |
run: exit 1 |