Skip to content

Commit

Permalink
add missing config features from CV
Browse files Browse the repository at this point in the history
  • Loading branch information
Wovchena committed Jul 3, 2024
1 parent ca51075 commit 3ba9a0f
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ class ContinuousBatchingPipeline::Impl {
}

GenerationHandle add_request(uint64_t request_id, std::string prompt, ov::genai::GenerationConfig sampling_params) {
sampling_params.eos_token_id = m_tokenizer->get_eos_token_id();
sampling_params.set_eos_token_id(m_tokenizer->get_eos_token_id());
sampling_params.validate();

ov::Tensor input_ids;
Expand Down
4 changes: 4 additions & 0 deletions src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
// EOS special token
int64_t eos_token_id = -1;

/** @brief sets eos_token_id to tokenizer_eos_token_id if eos_token_id is less than 0.
* Otherwise verifies eos_token_id == tokenizer_eos_token_id.
*/
void set_eos_token_id(size_t tokenizer_eos_token_id);
size_t get_max_new_tokens(size_t prompt_length = 0) const;
bool is_greedy_decoding() const;
bool is_beam_search() const;
Expand Down
17 changes: 17 additions & 0 deletions src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ GenerationConfig::GenerationConfig(const std::string& json_path) {
}
}

void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
if (eos_token_id < 0) {
eos_token_id = tokenizer_eos_token_id;
} else {
OPENVINO_ASSERT(eos_token_id == tokenizer_eos_token_id,
"EOS token ID is different in generation config (", eos_token_id, ") and tokenizer (",
tokenizer_eos_token_id, ")");
}
}

void GenerationConfig::update_generation_config(const ov::AnyMap& config_map) {
using ov::genai::utils::read_anymap_param;

Expand Down Expand Up @@ -98,6 +108,7 @@ void GenerationConfig::validate() const {
"or set num_beams=1 if you with to use multinomial sampling.");
OPENVINO_ASSERT(num_return_sequences > 0, "num_return_sequences must be greater than 0");
OPENVINO_ASSERT(max_new_tokens > 0, "'max_new_tokens' must be greater than 0");
OPENVINO_ASSERT(min_new_tokens <= max_new_tokens, "min_new_tokens must be less or equal max_new_tokens");

// max_new_tokens has priority over max_length
// if max_new_tokens is defined no need to check max_length
Expand All @@ -123,6 +134,12 @@ void GenerationConfig::validate() const {

OPENVINO_ASSERT(eos_token_id != -1 || max_new_tokens != SIZE_MAX || max_length != SIZE_MAX,
"Either 'eos_token_id', or 'max_new_tokens', or 'max_length' should be defined.");
if (is_beam_search()) {
OPENVINO_ASSERT(no_repeat_ngram_size > 0, "no_repeat_ngram_size must be positive");
} else {
OPENVINO_ASSERT(frequency_penalty >= -2.0f && frequency_penalty <= 2.0f, "frequence_penalty penalty must be a [-2; +2]");
OPENVINO_ASSERT(presence_penalty >= -2.0f && presence_penalty <= 2.0f, "presence_penalty penalty must be a [-2; +2]");
}
}

GenerationConfig beam_search() {
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ class StatefulLLMPipeline final : public LLMPipelineImplBase {

// If eos_token_id was not provided, take value
if (m_generation_config.eos_token_id == -1)
m_generation_config.eos_token_id = m_tokenizer.get_eos_token_id();
m_generation_config.set_eos_token_id(m_tokenizer.get_eos_token_id());
}

StatefulLLMPipeline(
Expand Down
2 changes: 1 addition & 1 deletion src/cpp/src/llm_pipeline_static.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ EncodedResults StaticLLMPipeline::generate(
GenerationConfig config = (generation_config.has_value()) ? *generation_config : m_generation_config;
// If eos_token_id was not provided, take value from default m_generation_config
if (config.eos_token_id == -1)
config.eos_token_id = m_generation_config.eos_token_id;
config.set_eos_token_id(m_generation_config.eos_token_id);
config.validate();

std::shared_ptr<StreamerBase> streamer_ptr;
Expand Down
3 changes: 2 additions & 1 deletion src/python/py_generate_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ OptionalGenerationConfig update_config_from_kwargs(const OptionalGenerationConfi
} else if (key == "repetition_penalty") {
res_config.repetition_penalty = py::cast<float>(item.second);
} else if (key == "eos_token_id") {
res_config.eos_token_id = py::cast<int>(item.second);
res_config.set_eos_token_id(py::cast<int>(item.second));
} else {
throw(std::invalid_argument("'" + key + "' is incorrect GenerationConfig parameter name. "
"Use help(openvino_genai.GenerationConfig) to get list of acceptable parameters."));
Expand Down Expand Up @@ -512,6 +512,7 @@ PYBIND11_MODULE(py_generate_pipeline, m) {
.def_readwrite("presence_penalty", &GenerationConfig::presence_penalty)
.def_readwrite("frequency_penalty", &GenerationConfig::frequency_penalty)
.def_readwrite("rng_seed", &GenerationConfig::rng_seed)
.def("set_eos_token_id", &GenerationConfig::set_eos_token_id)
.def("is_beam_search", &GenerationConfig::is_beam_search);

py::class_<DecodedResults>(m, "DecodedResults")
Expand Down
10 changes: 9 additions & 1 deletion tests/python_tests/continuous_batching/test_sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pytest
import shutil
import sys
from dataclasses import dataclass
from pathlib import Path
from openvino_genai.py_continuous_batching import ContinuousBatchingPipeline
Expand All @@ -23,7 +24,7 @@

@pytest.mark.precommit
@pytest.mark.parametrize("model_id", get_models_list(os.path.join(os.path.dirname(os.path.realpath(__file__)), "models", "precommit")))
@pytest.mark.xfail(reason='CPU: head size must be multiple of 16, current: 8', raises=RuntimeError, strict=True)
@pytest.mark.xfail(reason='CPU: head size must be multiple of 16, current: 8. Ticket 145986.', raises=RuntimeError, strict=True)
def test_sampling_precommit(tmp_path, model_id):
run_test_pipeline(tmp_path, model_id)

Expand Down Expand Up @@ -165,6 +166,13 @@ class RandomSamplingTestStruct:
"greedy_with_penalties",
"multinomial_max_and_min_token"])
def test_individual_generation_configs_random(tmp_path, test_struct: RandomSamplingTestStruct):
if test_struct in (
RANDOM_SAMPLING_TEST_CASES[1],
RANDOM_SAMPLING_TEST_CASES[3],
RANDOM_SAMPLING_TEST_CASES[6],
RANDOM_SAMPLING_TEST_CASES[10],
) and sys.platform.startswith("win"):
pytest.xfail("assert ref_text == ov_text fails")
generation_config = test_struct.generation_config

prompts = test_struct.prompts
Expand Down

0 comments on commit 3ba9a0f

Please sign in to comment.