From 9999349c23d88d141388e61eaeb46e74e50b70ad Mon Sep 17 00:00:00 2001
From: Alexander Suvorov <alexander.suvorov@intel.com>
Date: Wed, 3 Jul 2024 16:00:34 +0200
Subject: [PATCH] Skip tests

---
 .github/workflows/continuous_batching_cpp.yml       | 13 +++++++++++--
 .../continuous_batching/test_sampling.py            |  7 ++++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/continuous_batching_cpp.yml b/.github/workflows/continuous_batching_cpp.yml
index 524dc7663..99526efe2 100644
--- a/.github/workflows/continuous_batching_cpp.yml
+++ b/.github/workflows/continuous_batching_cpp.yml
@@ -51,7 +51,7 @@ jobs:
         run: |
           wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
           source ./ov/setupvars.sh
-          timeout 150s ./build/samples/cpp/throughput_benchmark/throughput_benchmark -n 10 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json
+          timeout 150s ./build/samples/cpp/throughput_benchmark/throughput_benchmark -n 10 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
 
   continuous_batching_python_lib_ubuntu:
     # A tokenizers' dependency fails to compile on ubuntu-20 n CenOS7 env.
@@ -75,8 +75,10 @@ jobs:
       - run: source ./ov/setupvars.sh && cmake --build ./build/ --config Release -j
       - run: source ./ov/setupvars.sh && python -m pip install ./thirdparty/openvino_tokenizers/[transformers] -r ./tests/python_tests/requirements.txt --pre --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly --upgrade-strategy eager
       - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/continuous_batching/test_sampling.py -m precommit
+      - run: source ./ov/setupvars.sh && PYTHONPATH=./build/:$PYTHONPATH python -m pytest ./tests/python_tests/continuous_batching/test_preemption.py -m precommit
       - run: source ./ov/setupvars.sh && python -m pip install .
       - run: python -m pytest ./tests/python_tests/continuous_batching/test_sampling.py -m precommit
+      - run: python -m pytest ./tests/python_tests/continuous_batching/test_preemption.py -m precommit
 
   cpp-accuracy-sample-windows:
     runs-on: windows-latest
@@ -112,6 +114,13 @@ jobs:
           set PATH=.\build\openvino_genai\;%PATH%
           call .\ov\setupvars.bat
           .\build\samples\cpp\accuracy_sample\Release\accuracy_sample.exe -m .\TinyLlama-1.1B-Chat-v1.0\ -n 5
+      - name: Run throughput_benchmark
+        if: false
+        run: |
+          curl -o .\ShareGPT_V3_unfiltered_cleaned_split.json -s -L "https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json"
+          set PATH=.\build\openvino_genai\;%PATH%
+          call .\ov\setupvars.bat
+          .\build\samples\cpp\throughput_benchmark\Release\throughput_benchmark.exe -n 2 --dynamic_split_fuse -m .\TinyLlama-1.1B-Chat-v1.0\ --dataset .\ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
 
   cpp-accuracy-sample-macos:
     runs-on: macos-12
@@ -147,4 +156,4 @@ jobs:
         run: |
           wget -q https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
           source ./ov/setupvars.sh
-          ./build/samples/cpp/throughput_benchmark/throughput_benchmark -n 5 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json
+          ./build/samples/cpp/throughput_benchmark/throughput_benchmark -n 5 --dynamic_split_fuse -m ./TinyLlama-1.1B-Chat-v1.0/ --dataset ./ShareGPT_V3_unfiltered_cleaned_split.json --cache_size 1
diff --git a/tests/python_tests/continuous_batching/test_sampling.py b/tests/python_tests/continuous_batching/test_sampling.py
index 265c8caa6..1dab6a3f2 100644
--- a/tests/python_tests/continuous_batching/test_sampling.py
+++ b/tests/python_tests/continuous_batching/test_sampling.py
@@ -19,9 +19,13 @@
     get_multinomial_temperature_and_frequence_penalty, get_multinomial_temperature_and_presence_penalty, \
     generate_and_compare_with_hf, get_multinomial_temperature_and_repetition_penalty, get_scheduler_config
 
-
 @pytest.mark.precommit
 @pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit")))
+@pytest.mark.xfail(
+    raises=RuntimeError,
+    reason="Test fails with error: CPU: head size must be multiple of 16, current: X. CVS-145986.",
+    strict=False,
+)
 def test_sampling_precommit(tmp_path, model_id):
     run_test_pipeline(tmp_path, model_id)
 
@@ -163,6 +167,7 @@ class RandomSamplingTestStruct:
              "greedy_with_penalties",
              "multinomial_max_and_min_token"])
 def test_individual_generation_configs_random(tmp_path, test_struct: RandomSamplingTestStruct):
+
     generation_config = test_struct.generation_config
 
     prompts = test_struct.prompts