Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Nov 4, 2024
1 parent 90fa8fb commit 631d228
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 75 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/llm_bench-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ jobs:
run: |
optimum-cli export openvino --model segmind/tiny-sd --trust-remote-code --weight-format fp16 ./ov_models/tiny-sd/pytorch/dldt/FP16/
python ./tools/llm_bench/benchmark.py -m ./ov_models/tiny-sd/pytorch/dldt/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1
- name: Test tiny-stable-diffusion on Linux with GenAI
run: |
optimum-cli export openvino --model dreamlike-art/dreamlike-anime-1.0 --task stable-diffusion --weight-format fp16 ov_models/dreamlike-art-dreamlike-anime-1.0/FP16
python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai
- name: Test tiny-stable-diffusion on Linux with GenAI and LoRA
run: |
wget -O ./models/soulcard.safetensors https://civitai.com/api/download/models/72591
python ./tools/llm_bench/benchmark.py -m ./ov_models/dreamlike-art-dreamlike-anime-1.0/FP16/ -pf ./tools/llm_bench/prompts/stable-diffusion.jsonl -d cpu -n 1 --genai --lora ./models/soulcard.safetensors --lora_alphas 0.7
- name: WWB Tests
run: |
GIT_CLONE_PROTECTION_ACTIVE=false pip install -r ${{ env.WWB_PATH }}/requirements.txt
Expand Down
1 change: 0 additions & 1 deletion src/python/openvino_genai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,5 @@
AggregationMode,
Generator,
CppStdGenerator,
Scheduler,
draft_model
)
4 changes: 2 additions & 2 deletions tools/llm_bench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ def get_argprser():
nargs='*',
required=False,
default=None,
help="Path to LoRA adapters for using OpenVINO GenAI Text2Image optimized pipelines with LoRA for benchmarking")
parser.add_argument('--lora_alphas', nargs='*', help='Alphas params for LoRA adapters.', required=False, default=[0.5])
help="Path to LoRA adapters for using OpenVINO GenAI optimized pipelines with LoRA for benchmarking")
parser.add_argument('--lora_alphas', nargs='*', help='Alphas params for LoRA adapters.', required=False, default=[])
parser.add_argument("--use_cb", action="store_true", help="Use Continuous Batching inference mode")
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings or dict")
parser.add_argument(
Expand Down
63 changes: 38 additions & 25 deletions tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,32 @@ def decode_ov_tokenizer(self, token_ids, *args, **kwargs):
return hf_tokenizer


def get_lora_config(lora_paths, lora_alphas):
import openvino_genai

adapter_config = list()
if not lora_paths:
return adapter_config

if len(lora_paths) != len(lora_alphas):
log.warning('Amount of provided LoRA paths and alphas is not eq. LoRA will be ignored.')
return adapter_config

for idx in range(len(lora_paths)):
if not Path(lora_paths[idx]).exists():
log.warning(f'LoRA path is not exists: {lora_paths[idx]}. LoRA will be ignored.')
continue
adapter_config = openvino_genai.AdapterConfig()
adapter = openvino_genai.Adapter(lora_paths[idx])
alpha = float(lora_alphas[idx])
adapter_config.add(adapter, alpha)

if adapter_config:
log.info('LoRA adapter(s) are added to config.')

return adapter_config


def create_text_gen_model(model_path, device, **kwargs):
"""Create text generation model.
Expand All @@ -145,7 +171,7 @@ def create_text_gen_model(model_path, device, **kwargs):
else:
if kwargs.get("genai", False) and is_genai_available(log_msg=True):
if model_class not in [OV_MODEL_CLASSES_MAPPING[default_model_type], OV_MODEL_CLASSES_MAPPING["mpt"], OV_MODEL_CLASSES_MAPPING["chatglm"]]:
log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking")
log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default benchmarking")
else:
return create_genai_text_gen_model(model_path, device, ov_config, **kwargs)
remote_code = False
Expand Down Expand Up @@ -200,6 +226,11 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
for param, value in scheduler_params.items():
setattr(scheduler_config, param, value)
ov_config["scheduler_config"] = scheduler_config

adapter_config = get_lora_config(kwargs.get("lora", None), kwargs.get("lora_alphas", []))
if adapter_config:
ov_config['adapters'] = adapter_config

start = time.perf_counter()
llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), **ov_config)
end = time.perf_counter()
Expand Down Expand Up @@ -256,46 +287,28 @@ def create_image_gen_model(model_path, device, **kwargs):
raise RuntimeError(f'==Failure ==: model path:{model_path} does not exist')
else:
if kwargs.get("genai", False) and is_genai_available(log_msg=True):
if model_class not in [OV_MODEL_CLASSES_MAPPING[default_model_type], OV_MODEL_CLASSES_MAPPING["mpt"], OV_MODEL_CLASSES_MAPPING["chatglm"]]:
log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking")
else:
return create_genai_image_gen_model(model_path, device, ov_config, **kwargs)
return create_genai_image_gen_model(model_path, device, ov_config, **kwargs)

start = time.perf_counter()
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
end = time.perf_counter()
from_pretrained_time = end - start
log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
return ov_model, from_pretrained_time, False, None
return ov_model, from_pretrained_time, False


def create_genai_image_gen_model(model_path, device, ov_config, **kwargs):
import openvino_genai
from transformers import AutoTokenizer

if kwargs.get("lora", None):
if len(kwargs['lora']) != len(kwargs['lora_alphas']):
log.warning(f'LoRA paths and alphas is not eq. LoRA will be ignored. {kwargs["lora"]} {kwargs["lora_alphas"]}')
else:
adapter_config = list()
for idx in range(len(kwargs['lora'])):
if not Path(kwargs['lora'][idx]).exists():
log.warning(f'LoRA path is not exists: {kwargs["lora"][idx]}. LoRA will be ignored.')
continue
adapter_config = openvino_genai.AdapterConfig()
adapter = openvino_genai.Adapter(kwargs['lora'][idx])
alpha = float(kwargs['lora_alphas'][idx])
adapter_config.add(adapter, alpha)
ov_config['adapters'] = adapter_config
adapter_config = get_lora_config(kwargs.get("lora", None), kwargs.get("lora_alphas", []))
if adapter_config:
ov_config['adapters'] = adapter_config

start = time.perf_counter()
tokenizer = None
if (model_path / 'tokenizer').exists():
tokenizer = AutoTokenizer.from_pretrained(model_path / 'tokenizer')
t2i_pipe = openvino_genai.Text2ImagePipeline(model_path, device.upper(), **ov_config)
end = time.perf_counter()
log.info(f'Pipeline initialization time: {end - start:.2f}s')
return t2i_pipe, end - start, True, tokenizer
return t2i_pipe, end - start, True


def create_ldm_super_resolution_model(model_path, device, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion tools/llm_bench/llm_bench_utils/pt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def create_image_gen_model(model_path, device, **kwargs):
backend = kwargs['torch_compile_backend']
compiled_model = run_torch_compile(pipe, backend)
pipe = compiled_model
return pipe, from_pretrain_time, False, None
return pipe, from_pretrain_time, False


def create_ldm_super_resolution_model(model_path, device, **kwargs):
Expand Down
83 changes: 37 additions & 46 deletions tools/llm_bench/task/image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,31 +24,38 @@
stable_diffusion_hook = StableDiffusionHook()


def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer):
def collects_input_args(image_param, model_type, model_name):
input_args = {}
input_args["width"] = image_param.get('width', DEFAULT_IMAGE_WIDTH)
input_args["height"] = image_param.get('height', DEFAULT_IMAGE_HEIGHT)
input_args["num_inference_steps"] = image_param.get('steps', DEFAULT_INFERENCE_STEPS if 'lcm' not in model_name else LCM_DEFAULT_INFERENCE_STEPS)
guidance_scale = image_param.get('guidance_scale', None)

if guidance_scale is not None:
input_args["guidance_scale"] = guidance_scale
else:
if 'turbo' in model_name:
input_args["guidance_scale"] = 0.0

return input_args


def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption):
set_seed(args['seed'])
input_text = image_param['prompt']
image_width = image_param.get('width', DEFAULT_IMAGE_WIDTH)
image_height = image_param.get('height', DEFAULT_IMAGE_HEIGHT)
nsteps = image_param.get('steps', DEFAULT_INFERENCE_STEPS if 'lcm' not in args["model_name"] else LCM_DEFAULT_INFERENCE_STEPS)
guidance_scale = image_param.get('guidance_scale', None)
input_args = collects_input_args(image_param, args['model_type'], args['model_name'])
log.info(
f"[{'warm-up' if num == 0 else num}][P{image_id}] Input params: Batch_size={args['batch_size']}, "
f'steps={nsteps}, width={image_width}, height={image_height}, guidance_scale={guidance_scale}'
f'steps={input_args["num_inference_steps"]}, width={input_args["width"]}, height={input_args["height"]}, guidance_scale={input_args["guidance_scale"]}'
)

result_md5_list = []
max_rss_mem_consumption = ''
max_uss_mem_consumption = ''
max_shared_mem_consumption = ''
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.start_collect_memory_consumption()
additional_args = {}
if guidance_scale is not None:
additional_args["guidance_scale"] = guidance_scale
else:
if 'lcm-sdxl' in args['model_type']:
additional_args["guidance_scale"] = 1.0
if 'turbo' in args['model_name']:
additional_args["guidance_scale"] = 0.0

input_text_list = [input_text] * args['batch_size']
input_data = pipe.tokenizer(input_text, return_tensors='pt')
input_data.pop('token_type_ids', None)
Expand All @@ -59,7 +66,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
for bs_idx, in_text in enumerate(input_text_list):
llm_bench_utils.output_file.output_image_input_text(in_text, args, image_id, bs_idx, proc_id)
start = time.perf_counter()
res = pipe(input_text_list, num_inference_steps=nsteps, height=image_height, width=image_width, **additional_args).images
res = pipe(input_text_list, **input_args).images
end = time.perf_counter()
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.end_collect_momory_consumption()
Expand All @@ -72,7 +79,7 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
iter_data = gen_output_data.gen_iterate_data(
iter_idx=num,
in_size=input_token_size * args['batch_size'],
infer_count=nsteps,
infer_count=input_args["num_inference_steps"],
gen_time=generation_time,
res_md5=result_md5_list,
max_rss_mem=max_rss_mem_consumption,
Expand All @@ -95,43 +102,27 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
stable_diffusion_hook.clear_statistics()


def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer):
def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption):
set_seed(args['seed'])
input_text = image_param['prompt']
image_width = image_param.get('width', DEFAULT_IMAGE_WIDTH)
image_height = image_param.get('height', DEFAULT_IMAGE_HEIGHT)
nsteps = image_param.get('steps', DEFAULT_INFERENCE_STEPS if 'lcm' not in args["model_name"] else LCM_DEFAULT_INFERENCE_STEPS)
guidance_scale = image_param.get('guidance_scale', None)
input_args = collects_input_args(image_param, args['model_type'], args['model_name'])
log.info(
f"[{'warm-up' if num == 0 else num}][P{image_id}] Input params: Batch_size={args['batch_size']}, "
f'steps={nsteps}, width={image_width}, height={image_height}, guidance_scale={guidance_scale}'
f'steps={input_args["num_inference_steps"]}, width={input_args["width"]}, height={input_args["height"]}, guidance_scale={input_args["guidance_scale"]}'
)
result_md5_list = []
max_rss_mem_consumption = ''
max_uss_mem_consumption = ''
max_shared_mem_consumption = ''
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.start_collect_memory_consumption()
additional_args = {}
if guidance_scale is not None:
additional_args["guidance_scale"] = guidance_scale
else:
if 'lcm-sdxl' in args['model_type']:
additional_args["guidance_scale"] = 1.0
if 'turbo' in args['model_name']:
additional_args["guidance_scale"] = 0.0

input_text_list = [input_text] * args['batch_size']
input_token_size = 0
if tokenizer:
input_data = tokenizer(input_text, return_tensors='pt')
input_data.pop('token_type_ids', None)
input_tokens = input_data['input_ids'] if 'input_ids' in input_data else input_data
input_token_size = input_tokens[0].numel()
if num == 0 and args["output_dir"] is not None:
for bs_idx, in_text in enumerate(input_text_list):
llm_bench_utils.output_file.output_image_input_text(in_text, args, image_id, bs_idx, proc_id)
start = time.perf_counter()
res = pipe.generate(input_text, num_inference_steps=nsteps, height=image_height, width=image_width, **additional_args).data
res = pipe.generate(input_text, **input_args).data
end = time.perf_counter()
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.end_collect_momory_consumption()
Expand All @@ -144,8 +135,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data
generation_time = end - start
iter_data = gen_output_data.gen_iterate_data(
iter_idx=num,
in_size=input_token_size * args['batch_size'],
infer_count=nsteps,
infer_count=input_args["num_inference_steps"],
gen_time=generation_time,
res_md5=result_md5_list,
max_rss_mem=max_rss_mem_consumption,
Expand All @@ -169,7 +159,7 @@ def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data


def run_image_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
pipe, pretrain_time, use_genai, tokenizer = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
pipe, pretrain_time, use_genai = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)

iter_data_list = []
input_image_list = model_utils.get_image_param_from_prompt_file(args)
Expand All @@ -192,10 +182,10 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
raise RuntimeError('==Failure prompts is empty ==')
log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(image_list)}, prompt idx: {prompt_idx_list}')

if not use_genai:
text_gen_fn = run_image_generation
if use_genai:
image_gen_fn = run_image_generation_genai
else:
text_gen_fn = run_image_generation_genai
image_gen_fn = run_image_generation

# if num_iters == 0, just output warm-up data
proc_id = os.getpid()
Expand All @@ -205,7 +195,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
for image_id, image_param in enumerate(image_list):
p_idx = prompt_idx_list[image_id]
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
text_gen_fn(image_param, num, prompt_idx_list[image_id], pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer)
image_gen_fn(image_param, num, prompt_idx_list[image_id], pipe, args, iter_data_list, proc_id, mem_consumption)
iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
Expand All @@ -214,10 +204,11 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
p_idx = prompt_idx_list[image_id]
for num in range(num_iters + 1):
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
text_gen_fn(image_param, num, p_idx, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer)
image_gen_fn(image_param, num, p_idx, pipe, args, iter_data_list, proc_id, mem_consumption)
iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")

metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
if not use_genai:
metrics_print.print_average(iter_data_list, prompt_idx_list, args['batch_size'], False)
return iter_data_list, pretrain_time, iter_timestamp

0 comments on commit 631d228

Please sign in to comment.