diff --git a/samples/cpp/text_generation/CMakeLists.txt b/samples/cpp/text_generation/CMakeLists.txt index 4ed269d737..5824e132bd 100644 --- a/samples/cpp/text_generation/CMakeLists.txt +++ b/samples/cpp/text_generation/CMakeLists.txt @@ -46,7 +46,7 @@ FetchContent_Declare(cxxopts URL_HASH SHA256=523175f792eb0ff04f9e653c90746c12655f10cb70f1d5e6d6d9491420298a08) FetchContent_MakeAvailable(cxxopts) -add_executable(benchmark_genai benchmark_genai.cpp) +add_executable(benchmark_genai benchmark_genai.cpp read_prompt_from_file.cpp) target_link_libraries(benchmark_genai PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_genai PROPERTIES # Ensure out of box LC_RPATH on macOS with SIP diff --git a/samples/cpp/text_generation/README.md b/samples/cpp/text_generation/README.md index ab4fc030b5..91a0fd328f 100644 --- a/samples/cpp/text_generation/README.md +++ b/samples/cpp/text_generation/README.md @@ -161,7 +161,8 @@ For more information how performance metrics are calculated please follow [perfo ``` #### Options - `-m, --model`: Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"The Sky is blue because"` +- `--pf, --prompt_file` Read prompt from file. - `--nw, --num_warmup` (default: `1`): Number of warmup iterations. - `--mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. - `-n, --num_iter` (default: `3`): Number of iterations. diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 4a8c8d0723..078dd88969 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -3,13 +3,15 @@ #include "openvino/genai/llm_pipeline.hpp" #include +#include "read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vanilla_genai", "Help command"); options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()) - ("p,prompt", "Prompt", cxxopts::value()->default_value("The Sky is blue because")) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) ("mt,max_new_tokens", "Maximal number of new tokens", cxxopts::value()->default_value(std::to_string(20))) @@ -30,7 +32,22 @@ int main(int argc, char* argv[]) try { return EXIT_SUCCESS; } - std::string prompt = result["prompt"].as(); + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } else { + prompt = result["prompt"].as().empty() ? "The Sky is blue because" : result["prompt"].as(); + } + } + if (prompt.empty()) { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } + const std::string models_path = result["model"].as(); std::string device = result["device"].as(); size_t num_warmup = result["num_warmup"].as(); @@ -39,7 +56,17 @@ int main(int argc, char* argv[]) try { ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); - ov::genai::LLMPipeline pipe(models_path, device); + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); + + std::cout << ov::get_openvino_version() << std::endl; + + ov::genai::LLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); + + auto input_data = pipe.get_tokenizer().encode(prompt); + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; + std::cout << "Prompt token size:" << prompt_token_size << std::endl; for (size_t i = 0; i < num_warmup; i++) pipe.generate(prompt, config); @@ -52,6 +79,7 @@ int main(int argc, char* argv[]) try { } std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; diff --git a/samples/cpp/text_generation/read_prompt_from_file.cpp b/samples/cpp/text_generation/read_prompt_from_file.cpp new file mode 100644 index 0000000000..7559c2d1db --- /dev/null +++ b/samples/cpp/text_generation/read_prompt_from_file.cpp @@ -0,0 +1,19 @@ +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include "read_prompt_from_file.h" + +std::string utils::read_prompt(const std::string& file_path) { + std::ifstream file(file_path); + if (file.is_open()) { + std::stringstream buffer; + buffer << file.rdbuf(); + return buffer.str(); + } else { + std::stringstream error_message; + error_message << "Error opening prompt file: '" << file_path << "'"; + throw std::runtime_error{error_message.str()}; + } +} \ No newline at end of file diff --git a/samples/cpp/text_generation/read_prompt_from_file.h b/samples/cpp/text_generation/read_prompt_from_file.h new file mode 100644 index 0000000000..b47cd08d92 --- /dev/null +++ b/samples/cpp/text_generation/read_prompt_from_file.h @@ -0,0 +1,11 @@ + +// Copyright (C) 2023-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 + +#pragma once + +#include + +namespace utils { +std::string read_prompt(const std::string& file_path); +} \ No newline at end of file diff --git a/samples/cpp/visual_language_chat/CMakeLists.txt b/samples/cpp/visual_language_chat/CMakeLists.txt index 23545b78ce..54795351a6 100644 --- a/samples/cpp/visual_language_chat/CMakeLists.txt +++ b/samples/cpp/visual_language_chat/CMakeLists.txt @@ -44,8 +44,7 @@ install(TARGETS encrypted_model_vlm EXCLUDE_FROM_ALL) # create benchmark executable - -add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp) +add_executable(benchmark_vlm benchmark_vlm.cpp load_image.cpp ../text_generation/read_prompt_from_file.cpp) target_include_directories(benchmark_vlm PRIVATE "${CMAKE_BINARY_DIR}") target_link_libraries(benchmark_vlm PRIVATE openvino::genai cxxopts::cxxopts) set_target_properties(benchmark_vlm PROPERTIES diff --git a/samples/cpp/visual_language_chat/README.md b/samples/cpp/visual_language_chat/README.md index 18424be9be..58065bc070 100644 --- a/samples/cpp/visual_language_chat/README.md +++ b/samples/cpp/visual_language_chat/README.md @@ -40,7 +40,8 @@ benchmark_vlm [OPTIONS] ### Options - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `-p, --prompt` (default: ''): The prompt to generate text. If without `-p` and `--pf`, the default prompt is `"What is on the image?"` +- `--pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/cpp/visual_language_chat/benchmark_vlm.cpp b/samples/cpp/visual_language_chat/benchmark_vlm.cpp index 8467738307..822f71348b 100644 --- a/samples/cpp/visual_language_chat/benchmark_vlm.cpp +++ b/samples/cpp/visual_language_chat/benchmark_vlm.cpp @@ -6,14 +6,15 @@ #include "load_image.hpp" #include - +#include "../text_generation/read_prompt_from_file.h" int main(int argc, char* argv[]) try { cxxopts::Options options("benchmark_vlm", "Help command"); options.add_options() ("m,model", "Path to model and tokenizers base directory", cxxopts::value()->default_value(".")) - ("p,prompt", "Prompt", cxxopts::value()->default_value("What is on the image?")) + ("p,prompt", "Prompt", cxxopts::value()->default_value("")) + ("pf,prompt_file", "Read prompt from file", cxxopts::value()) ("i,image", "Image", cxxopts::value()->default_value("image.jpg")) ("nw,num_warmup", "Number of warmup iterations", cxxopts::value()->default_value(std::to_string(1))) ("n,num_iter", "Number of iterations", cxxopts::value()->default_value(std::to_string(3))) @@ -35,30 +36,57 @@ int main(int argc, char* argv[]) try { return EXIT_SUCCESS; } - std::string prompt = result["prompt"].as(); + std::string prompt; + if (result.count("prompt") && result.count("prompt_file")) { + std::cout << "Prompt and prompt file should not exist together!" << std::endl; + return EXIT_FAILURE; + } else { + if (result.count("prompt_file")) { + prompt = utils::read_prompt(result["prompt_file"].as()); + } else { + prompt = result["prompt"].as().empty() ? "What is on the image?" : result["prompt"].as(); + } + } + if (prompt.empty()) { + std::cout << "Prompt is empty!" << std::endl; + return EXIT_FAILURE; + } + const std::string models_path = result["model"].as(); const std::string image_path = result["image"].as(); std::string device = result["device"].as(); size_t num_warmup = result["num_warmup"].as(); size_t num_iter = result["num_iter"].as(); - ov::Tensor image = utils::load_image(image_path); - + std::vector images = utils::load_images(image_path); + ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); + config.ignore_eos = true; + + ov::genai::SchedulerConfig scheduler_config; + scheduler_config.enable_prefix_caching = false; + scheduler_config.max_num_batched_tokens = std::numeric_limits::max(); + + std::cout << ov::get_openvino_version() << std::endl; + + ov::genai::VLMPipeline pipe(models_path, device, ov::genai::scheduler_config(scheduler_config)); + + auto input_data = pipe.get_tokenizer().encode(prompt); + size_t prompt_token_size = input_data.input_ids.get_shape()[1]; + std::cout << "Number of images:" << images.size() << ", prompt token size:" << prompt_token_size << std::endl; - ov::genai::VLMPipeline pipe(models_path, device); - for (size_t i = 0; i < num_warmup; i++) - pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); - auto res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + auto res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); auto metrics = res.perf_metrics; for (size_t i = 0; i < num_iter - 1; i++) { - res = pipe.generate(prompt, ov::genai::image(image), ov::genai::generation_config(config)); + res = pipe.generate(prompt, ov::genai::images(images), ov::genai::generation_config(config)); metrics = metrics + res.perf_metrics; } std::cout << std::fixed << std::setprecision(2); + std::cout << "Output token size:" << res.perf_metrics.get_num_generated_tokens() << std::endl; std::cout << "Load time: " << metrics.get_load_time() << " ms" << std::endl; std::cout << "Generate time: " << metrics.get_generate_duration().mean << " ± " << metrics.get_generate_duration().std << " ms" << std::endl; std::cout << "Tokenization time: " << metrics.get_tokenization_duration().mean << " ± " << metrics.get_tokenization_duration().std << " ms" << std::endl; diff --git a/samples/python/text_generation/README.md b/samples/python/text_generation/README.md index 7d334df29e..cb0ef68f70 100644 --- a/samples/python/text_generation/README.md +++ b/samples/python/text_generation/README.md @@ -153,7 +153,8 @@ For more information how performance metrics are calculated please follow [perfo ``` #### Options - `-m, --model`: Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `"The Sky is blue because"`): The prompt to generate text. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"The Sky is blue because"` +- `-pf, --prompt_file` Read prompt from file. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. - `-n, --num_iter` (default: `3`): Number of iterations. diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py index d279ab95fc..c3a55a9d2e 100755 --- a/samples/python/text_generation/benchmark_genai.py +++ b/samples/python/text_generation/benchmark_genai.py @@ -1,13 +1,16 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import sys import argparse import openvino_genai as ov_genai +from openvino import get_version def main(): parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens") @@ -15,9 +18,21 @@ def main(): args = parser.parse_args() + if args.prompt is not None and args.prompt_file is not None: + raise RuntimeError(f'Prompt and prompt file should not exist together!') + else: + if args.prompt_file is not None: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompt = [f.read()] + else: + prompt = ['The Sky is blue because'] if args.prompt is None else [args.prompt] + if len(prompt) == 0: + raise RuntimeError(f'Prompt is empty!') + + print(f'openvino runtime version: {get_version()}') + # Perf metrics is stored in DecodedResults. # In order to get DecodedResults instead of a string input should be a list. - prompt = [args.prompt] models_path = args.model device = args.device num_warmup = args.num_warmup @@ -26,8 +41,16 @@ def main(): config = ov_genai.GenerationConfig() config.max_new_tokens = args.max_new_tokens - pipe = ov_genai.LLMPipeline(models_path, device) + scheduler_config = ov_genai.SchedulerConfig() + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize + + pipe = ov_genai.LLMPipeline(models_path, device, scheduler_config=scheduler_config) + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Prompt token size: {prompt_token_size}") + for _ in range(num_warmup): pipe.generate(prompt, config) @@ -37,6 +60,7 @@ def main(): res = pipe.generate(prompt, config) perf_metrics += res.perf_metrics + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") print(f"Load time: {perf_metrics.get_load_time():.2f} ms") print(f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") print(f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms") diff --git a/samples/python/visual_language_chat/README.md b/samples/python/visual_language_chat/README.md index 098cbac630..3457f51318 100644 --- a/samples/python/visual_language_chat/README.md +++ b/samples/python/visual_language_chat/README.md @@ -40,7 +40,8 @@ python benchmark_vlm.py [OPTIONS] ### Options - `-m, --model`(default: `.`): Path to the model and tokenizers base directory. -- `-p, --prompt` (default: `What is on the image?`): The prompt to generate text. +- `-p, --prompt` (default: `None`): The prompt to generate text. If without `-p` and `-pf`, the default prompt is `"What is on the image?"` +- `-pf, --prompt_file` Read prompt from file. - `-i, --image` (default: `image.jpg`): Path to the image. - `-nw, --num_warmup` (default: `1`): Number of warmup iterations. - `-mt, --max_new_tokens` (default: `20`): Maximal number of new tokens. diff --git a/samples/python/visual_language_chat/benchmark_vlm.py b/samples/python/visual_language_chat/benchmark_vlm.py index cbce4197dd..9499e947bd 100755 --- a/samples/python/visual_language_chat/benchmark_vlm.py +++ b/samples/python/visual_language_chat/benchmark_vlm.py @@ -2,11 +2,14 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import sys import argparse import openvino_genai as ov_genai from PIL import Image from openvino import Tensor +from pathlib import Path import numpy as np +from openvino import get_version def read_image(path: str) -> Tensor: @@ -22,11 +25,18 @@ def read_image(path: str) -> Tensor: image_data = np.array(pic) return Tensor(image_data) +def read_images(path: str) -> list[Tensor]: + entry = Path(path) + if entry.is_dir(): + return [read_image(str(file)) for file in sorted(entry.iterdir())] + return [read_image(path)] + def main(): parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, help="Path to model and tokenizers base directory") - parser.add_argument("-p", "--prompt", type=str, default="The Sky is blue because", help="Prompt") + parser.add_argument("-p", "--prompt", type=str, default=None, help="Prompt") + parser.add_argument("-pf", "--prompt_file", type=str, help="Read prompt from file") parser.add_argument("-i", "--image", type=str, default="image.jpg", help="Image") parser.add_argument("-nw", "--num_warmup", type=int, default=1, help="Number of warmup iterations") parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") @@ -35,11 +45,23 @@ def main(): args = parser.parse_args() + if args.prompt is not None and args.prompt_file is not None: + raise RuntimeError(f'Prompt and prompt file should not exist together!') + else: + if args.prompt_file is not None: + with open(args.prompt_file, 'r', encoding='utf-8') as f: + prompt = f.read() + else: + prompt = 'What is on the image?' if args.prompt is None else args.prompt + if len(prompt) == 0: + raise RuntimeError(f'Prompt is empty!') + + print(f'openvino runtime version: {get_version()}') + # Perf metrics is stored in VLMDecodedResults. # In order to get VLMDecodedResults instead of a string input should be a list. - prompt = args.prompt models_path = args.model - image = read_image(args.image) + images = read_images(args.image) device = args.device num_warmup = args.num_warmup num_iter = args.num_iter @@ -47,17 +69,26 @@ def main(): config = ov_genai.GenerationConfig() config.max_new_tokens = args.max_new_tokens - pipe = ov_genai.VLMPipeline(models_path, device) + scheduler_config = ov_genai.SchedulerConfig() + scheduler_config.enable_prefix_caching = False + scheduler_config.max_num_batched_tokens = sys.maxsize + + pipe = ov_genai.VLMPipeline(models_path, device, scheduler_config=scheduler_config) + + input_data = pipe.get_tokenizer().encode(prompt) + prompt_token_size = input_data.input_ids.get_shape()[1] + print(f"Number of images:{len(images)}, Prompt token size: {prompt_token_size}") for _ in range(num_warmup): - pipe.generate(prompt, images=image, generation_config=config) + pipe.generate(prompt, images=images, generation_config=config) - res = pipe.generate(prompt, images=image, generation_config=config) + res = pipe.generate(prompt, images=images, generation_config=config) perf_metrics = res.perf_metrics for _ in range(num_iter - 1): - res = pipe.generate(prompt, images=image, generation_config=config) + res = pipe.generate(prompt, images=images, generation_config=config) perf_metrics += res.perf_metrics + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") print(f"Load time: {perf_metrics.get_load_time():.2f} ms") print( f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") diff --git a/tools/llm_bench/task/visual_language_generation.py b/tools/llm_bench/task/visual_language_generation.py index 6d55625d47..674f691d4a 100644 --- a/tools/llm_bench/task/visual_language_generation.py +++ b/tools/llm_bench/task/visual_language_generation.py @@ -12,13 +12,13 @@ import openvino as ov import hashlib import llm_bench_utils.metrics_print as metrics_print -import llm_bench_utils.output_csv from transformers import set_seed from transformers.image_utils import load_image -import llm_bench_utils.output_json import llm_bench_utils.output_file import llm_bench_utils.gen_output_data as gen_output_data import llm_bench_utils.parse_json_data as parse_json_data +from pathlib import Path + FW_UTILS = {'pt': llm_bench_utils.pt_utils, 'ov': llm_bench_utils.ov_utils} @@ -37,10 +37,16 @@ def run_visual_language_generation_optimum( prompts = [] inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: - if "media" in input_data: - images.append(load_image(input_data["media"])) + if input_data.get("media", None): + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image(str(file))) + else: + images.append(load_image(input_data["media"])) prompts.append(input_data["prompt"]) - + prefix = '[warm-up]' if num == 0 else '[{}]'.format(num) + log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}') if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): llm_bench_utils.output_file.output_input_text(in_text, args, model_precision, prompt_index, bs_index, proc_id) @@ -192,8 +198,13 @@ def run_visual_language_generation_genai( prompts = [] inputs = [inputs] if not isinstance(inputs, (list, tuple)) else inputs for input_data in inputs: - if "media" in input_data: - images.append(load_image_genai(input_data["media"])) + if input_data.get("media", None): + entry = Path(input_data["media"]) + if entry.is_dir(): + for file in sorted(entry.iterdir()): + images.append(load_image_genai(str(file))) + else: + images.append(load_image_genai(input_data["media"])) prompts.append(input_data["prompt"]) if args["output_dir"] is not None and num == 0: for bs_index, in_text in enumerate(prompts): @@ -212,7 +223,9 @@ def run_visual_language_generation_genai( gen_config.ignore_eos = True kwargs = {} if len(images) >= 1: - kwargs["images"] = images[0] + kwargs["images"] = images + prefix = '[warm-up]' if num == 0 else '[{}]'.format(num) + log.info(f'{prefix}[P{prompt_index}] Input image nums:{len(images)}') start = time.perf_counter() generation_result = model.generate(prompts[0], generation_config=gen_config, **kwargs) end = time.perf_counter()