Skip to content

Commit

Permalink
Add Text2ImagePipeline to llm bench
Browse files Browse the repository at this point in the history
  • Loading branch information
sbalandi committed Nov 1, 2024
1 parent a8145bc commit a4300c6
Show file tree
Hide file tree
Showing 9 changed files with 135 additions and 12 deletions.
1 change: 1 addition & 0 deletions src/python/openvino_genai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,6 @@
AggregationMode,
Generator,
CppStdGenerator,
Scheduler,
draft_model
)
2 changes: 1 addition & 1 deletion src/python/py_image_generation_pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ void init_image_generation_pipelines(py::module_& m) {
(text2image_generate_docstring + std::string(" \n ")).c_str()
);

auto image_generation_scheduler = py::class_<ov::genai::Scheduler>(m, "Scheduler", "Scheduler for image generation pipelines.")
auto image_generation_scheduler = py::class_<ov::genai::Scheduler, std::shared_ptr<ov::genai::Scheduler>>(m, "Scheduler", "Scheduler for image generation pipelines.")
.def(py::init<>())
.def("from_config", &ov::genai::Scheduler::from_config);

Expand Down
7 changes: 7 additions & 0 deletions tools/llm_bench/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,13 @@ def get_argprser():
parser.add_argument('-od', '--output_dir', help='Save the input text and generated text, images to files')
llm_bench_utils.model_utils.add_stateful_model_arguments(parser)
parser.add_argument("--genai", action="store_true", help="Use OpenVINO GenAI optimized pipelines for benchmarking")
parser.add_argument(
"--lora",
nargs='*',
required=False,
default=None,
help="Path to LoRA adapters for using OpenVINO GenAI Text2Image optimized pipelines with LoRA for benchmarking")
parser.add_argument('--lora_alphas', nargs='*', help='Alphas params for LoRA adapters.', required=False, default=[0.5])
parser.add_argument("--use_cb", action="store_true", help="Use Continuous Batching inference mode")
parser.add_argument("--cb_config", required=False, default=None, help="Path to file with Continuous Batching Scheduler settings or dict")
parser.add_argument(
Expand Down
2 changes: 1 addition & 1 deletion tools/llm_bench/llm_bench_utils/config_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@
}

USE_CASES = {
'image_gen': ['stable-diffusion-', 'ssd-', 'deepfloyd-if', 'tiny-sd', 'small-sd', 'lcm-', 'sdxl'],
'image_gen': ['stable-diffusion-', 'ssd-', 'deepfloyd-if', 'tiny-sd', 'small-sd', 'lcm-', 'sdxl', 'dreamlike'],
'text2speech': ['whisper'],
'image_cls': ['vit'],
'code_gen': ['replit', 'codegen2', 'codegen', 'codet5', "stable-code"],
Expand Down
2 changes: 2 additions & 0 deletions tools/llm_bench/llm_bench_utils/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ def analyze_args(args):
model_args['subsequent'] = args.subsequent
model_args['output_dir'] = args.output_dir
model_args['genai'] = args.genai
model_args['lora'] = args.lora
model_args['lora_alphas'] = args.lora_alphas
model_args["use_cb"] = args.use_cb
model_args['devices'] = args.device
model_args['prompt_index'] = [] if args.prompt_index is not None else None
Expand Down
38 changes: 37 additions & 1 deletion tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,12 +255,48 @@ def create_image_gen_model(model_path, device, **kwargs):
if not Path(model_path).exists():
raise RuntimeError(f'==Failure ==: model path:{model_path} does not exist')
else:
if kwargs.get("genai", False) and is_genai_available(log_msg=True):
if model_class not in [OV_MODEL_CLASSES_MAPPING[default_model_type], OV_MODEL_CLASSES_MAPPING["mpt"], OV_MODEL_CLASSES_MAPPING["chatglm"]]:
log.warning("OpenVINO GenAI based benchmarking is not available for {model_type}. Will be switched to default bencmarking")
else:
return create_genai_image_gen_model(model_path, device, ov_config, **kwargs)

start = time.perf_counter()
ov_model = model_class.from_pretrained(model_path, device=device, ov_config=ov_config)
end = time.perf_counter()
from_pretrained_time = end - start
log.info(f'From pretrained time: {from_pretrained_time:.2f}s')
return ov_model, from_pretrained_time
return ov_model, from_pretrained_time, False, None


def create_genai_image_gen_model(model_path, device, ov_config, **kwargs):
import openvino_genai
from transformers import AutoTokenizer
import numpy as np

if kwargs.get("lora", None):
if len(kwargs['lora']) != len(kwargs['lora_alphas']):
log.warning(f'LoRA paths and alphas is not eq. LoRA will be ignored. {kwargs["lora"]} {kwargs["lora_alphas"]}')
else:
adapter_config = list()
for idx in range(len(kwargs['lora'])):
if not Path(kwargs['lora'][idx]).exists():
log.warning(f'LoRA path is not exists: {kwargs["lora"][idx]}. LoRA will be ignored.')
continue
adapter_config = openvino_genai.AdapterConfig()
adapter = openvino_genai.Adapter(kwargs['lora'][idx])
alpha = float(kwargs['lora_alphas'][idx])
adapter_config.add(adapter, alpha)
ov_config['adapters'] = adapter_config

start = time.perf_counter()
tokenizer = None
if (model_path / 'tokenizer').exists():
tokenizer = AutoTokenizer.from_pretrained(model_path / 'tokenizer')
t2i_pipe = openvino_genai.Text2ImagePipeline(model_path, device.upper(), **ov_config)
end = time.perf_counter()
log.info(f'Pipeline initialization time: {end - start:.2f}s')
return t2i_pipe, end - start, True, tokenizer


def create_ldm_super_resolution_model(model_path, device, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion tools/llm_bench/llm_bench_utils/pt_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def create_image_gen_model(model_path, device, **kwargs):
backend = kwargs['torch_compile_backend']
compiled_model = run_torch_compile(pipe, backend)
pipe = compiled_model
return pipe, from_pretrain_time
return pipe, from_pretrain_time, False, None


def create_ldm_super_resolution_model(model_path, device, **kwargs):
Expand Down
91 changes: 84 additions & 7 deletions tools/llm_bench/task/image_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
stable_diffusion_hook = StableDiffusionHook()


def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption):
def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer, generator):
set_seed(args['seed'])
input_text = image_param['prompt']
image_width = image_param.get('width', DEFAULT_IMAGE_WIDTH)
Expand Down Expand Up @@ -95,13 +95,85 @@ def run_image_generation(image_param, num, image_id, pipe, args, iter_data_list,
stable_diffusion_hook.clear_statistics()


def run_image_generation_genai(image_param, num, image_id, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer, generator):
set_seed(args['seed'])
input_text = image_param['prompt']
image_width = image_param.get('width', DEFAULT_IMAGE_WIDTH)
image_height = image_param.get('height', DEFAULT_IMAGE_HEIGHT)
nsteps = image_param.get('steps', DEFAULT_INFERENCE_STEPS if 'lcm' not in args["model_name"] else LCM_DEFAULT_INFERENCE_STEPS)
guidance_scale = image_param.get('guidance_scale', None)
log.info(
f"[{'warm-up' if num == 0 else num}][P{image_id}] Input params: Batch_size={args['batch_size']}, "
f'steps={nsteps}, width={image_width}, height={image_height}, guidance_scale={guidance_scale}'
)
result_md5_list = []
max_rss_mem_consumption = ''
max_uss_mem_consumption = ''
max_shared_mem_consumption = ''
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.start_collect_memory_consumption()
additional_args = {}
if guidance_scale is not None:
additional_args["guidance_scale"] = guidance_scale
else:
if 'lcm-sdxl' in args['model_type']:
additional_args["guidance_scale"] = 1.0
if 'turbo' in args['model_name']:
additional_args["guidance_scale"] = 0.0
input_text_list = [input_text] * args['batch_size']
input_token_size = 0
if tokenizer:
input_data = tokenizer(input_text, return_tensors='pt')
input_data.pop('token_type_ids', None)
input_tokens = input_data['input_ids'] if 'input_ids' in input_data else input_data
input_token_size = input_tokens[0].numel()
if num == 0 and args["output_dir"] is not None:
for bs_idx, in_text in enumerate(input_text_list):
llm_bench_utils.output_file.output_image_input_text(in_text, args, image_id, bs_idx, proc_id)
start = time.perf_counter()
res = pipe.generate(input_text, num_inference_steps=nsteps, height=image_height, width=image_width, **additional_args, generator=generator).data
end = time.perf_counter()
if (args['mem_consumption'] == 1 and num == 0) or args['mem_consumption'] == 2:
mem_consumption.end_collect_momory_consumption()
max_rss_mem_consumption, max_shared_mem_consumption, max_uss_mem_consumption = mem_consumption.get_max_memory_consumption()
mem_consumption.clear_max_memory_consumption()
for bs_idx in range(args['batch_size']):
image = Image.fromarray(res[bs_idx])
rslt_img_fn = llm_bench_utils.output_file.output_gen_image(image, args, image_id, num, bs_idx, proc_id, '.png')
result_md5_list.append(hashlib.md5(Image.open(rslt_img_fn).tobytes(), usedforsecurity=False).hexdigest())
generation_time = end - start
iter_data = gen_output_data.gen_iterate_data(
iter_idx=num,
in_size=input_token_size * args['batch_size'],
infer_count=nsteps,
gen_time=generation_time,
res_md5=result_md5_list,
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
max_uss_mem=max_uss_mem_consumption,
prompt_idx=image_id,
)
iter_data_list.append(iter_data)
metrics_print.print_metrics(
num,
iter_data,
warm_up=(num == 0),
max_rss_mem=max_rss_mem_consumption,
max_shared_mem=max_shared_mem_consumption,
max_uss_mem=max_uss_mem_consumption,
stable_diffusion=None,
prompt_idx=image_id
)
metrics_print.print_generated(num, warm_up=(num == 0), generated=rslt_img_fn, prompt_idx=image_id)
stable_diffusion_hook.clear_statistics()


def run_image_generation_benchmark(model_path, framework, device, args, num_iters, mem_consumption):
if args['genai']:
log.warning("GenAI pipeline is not supported for this task. Switched on default benchmarking")
pipe, pretrain_time = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)
pipe, pretrain_time, use_genai, tokenizer, generator = FW_UTILS[framework].create_image_gen_model(model_path, device, **args)

iter_data_list = []
input_image_list = model_utils.get_image_param_from_prompt_file(args)
if framework == "ov":
if framework == "ov" and not use_genai:
stable_diffusion_hook.new_text_encoder(pipe)
stable_diffusion_hook.new_unet(pipe)
stable_diffusion_hook.new_vae_decoder(pipe)
Expand All @@ -120,6 +192,11 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
raise RuntimeError('==Failure prompts is empty ==')
log.info(f'Benchmarking iter nums(exclude warm-up): {num_iters}, prompt nums: {len(image_list)}, prompt idx: {prompt_idx_list}')

if not use_genai:
text_gen_fn = run_image_generation
else:
text_gen_fn = run_image_generation_genai

# if num_iters == 0, just output warm-up data
proc_id = os.getpid()
iter_timestamp = model_utils.init_timestamp(num_iters, image_list, prompt_idx_list)
Expand All @@ -128,7 +205,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
for image_id, image_param in enumerate(image_list):
p_idx = prompt_idx_list[image_id]
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
run_image_generation(image_param, num, prompt_idx_list[image_id], pipe, args, iter_data_list, proc_id, mem_consumption)
text_gen_fn(image_param, num, prompt_idx_list[image_id], pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer, generator)
iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
Expand All @@ -137,7 +214,7 @@ def run_image_generation_benchmark(model_path, framework, device, args, num_iter
p_idx = prompt_idx_list[image_id]
for num in range(num_iters + 1):
iter_timestamp[num][p_idx]['start'] = datetime.datetime.now().isoformat()
run_image_generation(image_param, num, p_idx, pipe, args, iter_data_list, proc_id, mem_consumption)
text_gen_fn(image_param, num, p_idx, pipe, args, iter_data_list, proc_id, mem_consumption, tokenizer, generator)
iter_timestamp[num][p_idx]['end'] = datetime.datetime.now().isoformat()
prefix = '[warm-up]' if num == 0 else '[{}]'.format(num)
log.info(f"{prefix}[P{p_idx}] start: {iter_timestamp[num][p_idx]['start']}, end: {iter_timestamp[num][p_idx]['end']}")
Expand Down
2 changes: 1 addition & 1 deletion tools/who_what_benchmark/whowhatbench/wwb.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def genai_gen_image(model, prompt, num_inference_steps, generator=None):
width=model.resolution[0],
height=model.resolution[1],
num_inference_steps=num_inference_steps,
random_generator=generator
generator=generator
)
image = Image.fromarray(image_tensor.data[0])
return image
Expand Down

0 comments on commit a4300c6

Please sign in to comment.