From 7de1d64b6ce7a55ce31db16d53f786127b9a44bf Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Mon, 26 May 2025 23:09:59 +0800 Subject: [PATCH 01/25] Add VCLApi and VCLCompilerImpl Use fake metadata Use driver ext to parse metadata If not find npu_driver_compiler, rollback to npu_mlir_compiler with vcl Use ENABLE_VCL_FOR_COMPILER as switch and download lib Fix support options Fix win download issue Add download for ubuntu24.04 and refactor script Install vcl compiler during cpack Fix proxy issue Fix install path Fix supported option api and use npu_vcl_compiler Retrieve error log Fix log issue Use vclAllocatedExecutableCreate2 Detect version to create executable Fix allocate API Fix style Fix cache_dir Update to use new compiler Rollback to old windows driver which does not need WinRAR Fix build issue Fix weightsless Signed-off-by: Xin Wang --- src/plugins/intel_npu/CMakeLists.txt | 5 + .../cmake/download_compiler_libs.cmake | 174 +++++++ src/plugins/intel_npu/cmake/features.cmake | 3 + .../al/include/intel_npu/config/options.hpp | 2 +- .../include/npu_driver_compiler.h | 339 ++++++++++++++ .../include/plugin_compiler_adapter.hpp | 1 + .../src/compiler_adapter/include/vcl_api.hpp | 118 +++++ .../include/ze_graph_ext_wrappers.hpp | 3 + .../src/compiler_adapter/src/graph.cpp | 1 - .../src/plugin_compiler_adapter.cpp | 111 ++++- .../src/compiler_adapter/src/vcl_api.cpp | 439 ++++++++++++++++++ .../src/ze_graph_ext_wrappers.cpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 2 + 13 files changed, 1180 insertions(+), 20 deletions(-) create mode 100644 src/plugins/intel_npu/cmake/download_compiler_libs.cmake create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt index 8871512b85b848..470801fb39bc10 100644 --- a/src/plugins/intel_npu/CMakeLists.txt +++ b/src/plugins/intel_npu/CMakeLists.txt @@ -18,6 +18,11 @@ set(NPU_PLUGIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) include(cmake/features.cmake) +if(ENABLE_VCL_FOR_COMPILER) + include(cmake/download_compiler_libs.cmake) + add_definitions("-DVCL_FOR_COMPILER") +endif() + set(CMAKE_CXX_STANDARD 17) if(ENABLE_NPU_DEBUG_CAPS) diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake new file mode 100644 index 00000000000000..95ced5f78bdb59 --- /dev/null +++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake @@ -0,0 +1,174 @@ +# Copyright (C) 2018-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Function to download and extract files +function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy) + # Check if the prebuilt VCL compiler libraries not exist + if(NOT EXISTS "${extracted_dir}") + if(modify_proxy STREQUAL "MODIFY") + # Update proxy to enable download for windows url + set(original_NO_PROXY $ENV{NO_PROXY}) + set(original_no_proxy $ENV{no_proxy}) + set(ENV{NO_PROXY} "") + set(ENV{no_proxy} "") + endif() + + # Download the prebuilt VCL compiler libraries, if failure, show error message and exit + message(STATUS "Downloading prebuilt VCL compiler libraries from ${url}") + file(DOWNLOAD "${url}" "${zip_file}" + TIMEOUT 3600 + LOG log_output + STATUS download_status + SHOW_PROGRESS) + + if(modify_proxy STREQUAL "MODIFY") + # Restore proxy + set(ENV{NO_PROXY} ${original_NO_PROXY}) + set(ENV{no_proxy} ${original_no_proxy}) + endif() + + list(GET download_status 0 download_result) + if(NOT download_result EQUAL 0) + message(FATAL_ERROR "Download failed!\nStatus: ${download_status}\nLog: ${log_output}") + else() + message(STATUS "Download completed: ${zip_file}") + endif() + + message(STATUS "Unzipping prebuilt VCL compiler libraries to ${extracted_dir}") + # Determine extraction method based on file extension + if("${zip_file}" MATCHES "\\.zip$") + file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}") + elseif("${zip_file}" MATCHES "\\.deb$") + execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}") + elseif("${zip_file}" MATCHES "\\.exe$") + set(WINRAR_PATHS + "C:/Program Files/WinRAR" + "C:/Program Files (x86)/WinRAR" + ) + + set(WINRAR_FOUND FALSE) + set(WINRAR_EXECUTABLE "") + + foreach(PATH ${WINRAR_PATHS}) + if(EXISTS "${PATH}/WinRAR.exe") + set(WINRAR_FOUND TRUE) + set(WINRAR_EXECUTABLE "${PATH}/WinRAR.exe") + break() + endif() + endforeach() + + if(WINRAR_FOUND) + message(STATUS "WinRAR found at: ${WINRAR_EXECUTABLE} and extract ${zip_file} to ${extracted_dir}") + file(MAKE_DIRECTORY "${extracted_dir}") + execute_process( + COMMAND "${WINRAR_EXECUTABLE}" x -y -o+ "${zip_file}" "${extracted_dir}" + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error + ) + + if(result EQUAL 0) + message(STATUS "Extraction successful: ${output}") + else() + #file(REMOVE_RECURSE "${extracted_dir}") + message(STATUS "Extraction failed: ${error}") + endif() + else() + message(FATAL_ERROR "WinRAR not found. Please install WinRAR to proceed.") + endif() + else() + message(FATAL_ERROR "Unsupported file extension for extraction: ${zip_file}") + endif() + file(REMOVE "${zip_file}") + else() + message(STATUS "Prebuilt VCL compiler libraries already exist, skip download") + endif() +endfunction() + +if(ENABLE_VCL_FOR_COMPILER) + if(ENABLE_SYSTEM_NPU_VCL_COMPILER) + message(STATUS "Using system NPU VCL compiler libraries, skip download") + else() + message(STATUS "Downloading prebuilt NPU VCL compiler libraries") + if(WIN32) + set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win") + set(VCL_COMPILER_LIBS_URL "https://downloadmirror.intel.com/854488/npu_win_32.0.100.4023.zip") + set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023.zip") + set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023") + + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY") + set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/npu_win_32.0.100.4023/drivers/x64/") + + + configure_file( + ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll + ${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll + COPYONLY + ) + set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll") + file(COPY "${VCL_COMPILER_LIB}" + DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}") + message(STATUS "Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows") + else() + # Check if the operating system is Linux and not macOS + if(UNIX AND NOT APPLE) + # Get the OS name and version + execute_process(COMMAND lsb_release -is OUTPUT_VARIABLE OS_NAME OUTPUT_STRIP_TRAILING_WHITESPACE) + execute_process(COMMAND lsb_release -rs OUTPUT_VARIABLE OS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) + + if(OS_NAME STREQUAL "Ubuntu") + if(OS_VERSION STREQUAL "22.04") + # Ubuntu 22.04-specific settings or actions + set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu22.04") + set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb") + set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb") + set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04") + + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") + + set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") + configure_file( + ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so + ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so + COPYONLY + ) + set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") + file(COPY "${VCL_COMPILER_LIB}" + DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") + message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04") + elseif(OS_VERSION STREQUAL "24.04") + message(STATUS "This is Ubuntu 24.04") + # Ubuntu 24.04-specific settings or actions + set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu24.04") + set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb") + set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb") + set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04") + + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") + + set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") + configure_file( + ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so + ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so + COPYONLY + ) + set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") + file(COPY "${VCL_COMPILER_LIB}" + DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") + message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04") + else() + message(STATUS "This is another version of Ubuntu: ${OS_VERSION}") + # Other Ubuntu-specific settings or actions + endif() + else() + message(STATUS "This is a different Linux distribution: ${OS_NAME}, skip downloading prebuilt VCL compiler libraries") + # Other Linux-specific settings or actions + endif() + endif() + endif() + endif() + + install(FILES ${VCL_COMPILER_LIB} + DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT}) +endif() diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 4190b8415b87ad..1f462c0e461806 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -11,3 +11,6 @@ if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS) endif() ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) + +ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON) +ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index 3005c4ae2ac634..d9a533729eeeab 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase +# include +#else +# include +# include +#endif + +#if defined(__cplusplus) +# pragma once +#endif + +#if defined(__cplusplus) +extern "C" { +#endif + +#define VCL_COMPILER_VERSION_MAJOR 7 +#define VCL_COMPILER_VERSION_MINOR 4 +#define VCL_PROFILING_VERSION_MAJOR 2 +#define VCL_PROFILING_VERSION_MINOR 0 + +#ifndef DEPRECATED +# define DEPRECATED // for documentation only +#endif + +/////////////////////////////////////////////////////////////////////////////// +#ifndef VCL_APICALL +# if defined(_WIN32) +/// @brief Calling convention for all API functions +# define VCL_APICALL __cdecl +# else +# define VCL_APICALL +# endif // defined(_WIN32) +#endif // VCL_APICALL + +/////////////////////////////////////////////////////////////////////////////// +#ifndef VCL_APIEXPORT +# if defined(_WIN32) +/// @brief Windows-specific dllexport storage-class attribute +# define VCL_APIEXPORT __declspec(dllexport) +# else +# define VCL_APIEXPORT +# endif // defined(_WIN32) +#endif // VCL_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Compiler handle +typedef struct __vcl_compiler_handle_t* vcl_compiler_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Executable handle +typedef struct __vcl_executable_handle_t* vcl_executable_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Profiling handle +typedef struct __vcl_profiling_handle_t* vcl_profiling_handle_t; + +/////////////////////////////////////////////////////////////////////////////// + +/// @brief QueryNetwork handle +typedef struct __vcl_query_handle_t* vcl_query_handle_t; + +/// @brief Error log handle +typedef struct __vcl_log_handle_t* vcl_log_handle_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines type of requested data. +/// Must be in sync with \b _ze_graph_profiling_type_t +typedef enum __vcl_profiling_request_type_t { + VCL_PROFILING_LAYER_LEVEL = 0x1, + VCL_PROFILING_TASK_LEVEL = 0x2, + VCL_PROFILING_RAW = 0x3, + + VCL_PROFILING_FORCE_UINT32 = 0x7fffffff +} vcl_profiling_request_type_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines version info for the VPUXCompilerL0 API +typedef struct __vcl_version_info_t { + uint16_t major; + uint16_t minor; + +} vcl_version_info_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines return/error codes +typedef enum __vcl_result_t { + VCL_RESULT_SUCCESS = 0, ///< [Core] success + VCL_RESULT_ERROR_OUT_OF_MEMORY = 0x70000002, ///< [Core] insufficient memory to satisfy call + VCL_RESULT_ERROR_UNSUPPORTED_FEATURE = 0x78000003, ///< [Validation] generic error code for unsupported features + VCL_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004, ///< [Validation] generic error code for invalid arguments + VCL_RESULT_ERROR_INVALID_NULL_HANDLE = 0x78000005, ///< [Validation] handle argument is not valid + VCL_RESULT_ERROR_IO = 0x78000006, ///< [Core] IO error + VCL_RESULT_ERROR_INVALID_IR = 0x78000007, ///< [Validation] the member of modelIR is not valid + VCL_RESULT_ERROR_UNKNOWN = 0x7ffffffe, ///< [Core] unknown or internal error + +} vcl_result_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines compiler properties +typedef struct __vcl_compiler_properties_t { + const char* id; + vcl_version_info_t version; + uint32_t supportedOpsets; + +} vcl_compiler_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines profiling properties +typedef struct __vcl_profiling_properties_t { + vcl_version_info_t version; ///< Profiling module version + +} vcl_profiling_properties_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines debug level for VCL +typedef enum __vcl_log_level_t { + VCL_LOG_NONE = 0, ///< Log is disabled + VCL_LOG_ERROR = 1, ///< Events which are not expected, containing probable reason + VCL_LOG_WARNING = 2, ///< Events which are unusal + VCL_LOG_INFO = 3, ///< Short messages about ongoing activity + VCL_LOG_DEBUG = 4, ///< Messages with praticular data and explanations + VCL_LOG_TRACE = 5, ///< Messages with detailed information about execution + +} vcl_log_level_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines device desc to be passed during creation +/// +/// For online compilation, revision is always valid value and -1u for offline compilation. +/// 1. In offline mode the driver does not know the stepping and provides -1 (unknown) to VCL +/// 2. In VCL +/// If driver provides valid revsion, the value will be default value for NPU_STEPPING +/// If driver provides -1u as value for revision, VCL will not set NPU_STEPPING +/// 3. If NPU_STEPPING is set by user with config, VCL will use user config instead of default value. +/// 4. If NPU_STEPPING is not passed to compiler, compiler will choose default stepping. +typedef struct __vcl_device_desc_t { + uint64_t size; /// Size of vcl_device_desc_t + uint32_t deviceID; /// The lower 16 bits equal to PCI Device ID, the upper 16 bits are zero + uint16_t revision; /// NPU Revision Identifier, -1u as invalid value + uint32_t tileCount; /// Value equals maximum number of slices +} vcl_device_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines compiler desc to be passed during creation +typedef struct __vcl_compiler_desc_t { + vcl_version_info_t version; /// The host vcl version + vcl_log_level_t debugLevel; /// Debug level for VCL +} vcl_compiler_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines executable description to be passed during executable +/// creation +/// +/// Format of modelIRData (defined in L0 adaptor): +/// 1. API version : vcl_version_info_t +/// 2. Num of data elements (now only xml + weights = 2) : uint32_t +/// 3. Size of data 1 (xml) : uint64_t +/// 4. Data 1 : $2 bytes +/// 5. Size of data 2 (weights) : uint64_t +/// 6. Data 2 : $4 bytes +typedef struct __vcl_executable_desc_t { + const uint8_t* modelIRData; + uint64_t modelIRSize; ///< Size of modelIRData + const char* options; ///< Compiler config options + uint64_t optionsSize; ///< Size of options +} vcl_executable_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines query description to be passed during query network creation +/// +/// Format of modelIRData (defined in L0 adaptor): +/// 1. API version : vcl_version_info_t +/// 2. Num of data elements (now only xml + weights = 2) : uint32_t +/// 3. Size of data 1 (xml) : uint64_t +/// 4. Data 1 : $2 bytes +/// 5. Size of data 2 (weights) : uint64_t +/// 6. Data 2 : $4 bytes +typedef struct __vcl_query_desc_t { + const uint8_t* modelIRData; + uint64_t modelIRSize; ///< Size of modelIRData + const char* options; ///< Compiler config options + uint64_t optionsSize; ///< Size of options +} vcl_query_desc_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Defines input that is required to create profiling handler +typedef struct __vcl_profiling_input_t { + const uint8_t* blobData; ///< Pointer to the buffer with the blob + uint64_t blobSize; ///< Size of the blob in bytes + const uint8_t* profData; ///< Pointer to the raw profiling output + uint64_t profSize; ///< Size of the raw profiling output +} vcl_profiling_input_t, *p_vcl_profiling_input_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Decoded profiling output +typedef struct __vcl_profiling_output_t { + const uint8_t* data; ///< Either a pointer to raw data or pointer to the array of structures + uint64_t size; ///< Size of the buffer in bytes +} vcl_profiling_output_t, *p_vcl_profiling_output_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Return VCL API version to caller, shall never change this interface to support backward compatibility check +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetVersion(vcl_version_info_t* compilerVersion, + vcl_version_info_t* profilingVersion); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates a compiler object and returns the compiler handle +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerCreate(vcl_compiler_desc_t* compilerDesc, + vcl_device_desc_t* deviceDesc, + vcl_compiler_handle_t* compiler, + vcl_log_handle_t* logHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the compiler +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerDestroy(vcl_compiler_handle_t compiler); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves the compiler properties, include the version and supported_opsets +VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerGetProperties(vcl_compiler_handle_t compiler, + vcl_compiler_properties_t* properties); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Create an querynetwork object and return the handle +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkCreate(vcl_compiler_handle_t compiler, + vcl_query_desc_t desc, + vcl_query_handle_t* query); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieve result of query network +/// @attention Should be called twice, first time to retrieve data size, second time to get data. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetwork(vcl_query_handle_t query, uint8_t* queryResult, uint64_t* size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the queryNetwork and releases the cached query result +VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkDestroy(vcl_query_handle_t query); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates an executable object and returns the executable handle. +/// Parse modelIRData in the executable descriptor to blob and store it in the executable. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableCreate(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_executable_handle_t* executable); + +DEPRECATED typedef struct __vcl_allocator_t { + uint8_t* (*allocate)(uint64_t); + void (*deallocate)(uint8_t*); +} vcl_allocator_t; + +typedef struct __vcl_allocator2_t { + uint8_t* (*allocate)(struct __vcl_allocator2_t*, uint64_t); + void (*deallocate)(struct __vcl_allocator2_t*, uint8_t*); +} vcl_allocator2_t; + +DEPRECATED VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + const vcl_allocator_t* allocator, + uint8_t** blobBuffer, + uint64_t* blobSize); + +VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_allocator2_t* allocator, + uint8_t** blobBuffer, + uint64_t* blobSize); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the executable and releases the cached blob. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief If blobBuffer is null, the function returns the size of the blob stored in the executable. +/// Otherwise the function copies the executable cached blob to the blobBuffer provided by the caller. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableGetSerializableBlob(vcl_executable_handle_t executable, + uint8_t* blobBuffer, + uint64_t* blobSize); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Creates a buffer with decoded profiling info. +/// This is the most computationally expensive profiling API. +/// It does all memory allocations and postprocessing. +/// @warning Caller must keep \b p_vcl_profiling_input_t::profData buffer alive until +/// \b vclProfilingDestroy call if \b VCL_PROFILING_RAW request is expected. +/// \b vclProfilingCreate function doesn't copy profiling output buffer but will +/// return pointer to it as a response to \b VCL_PROFILING_RAW request. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingCreate(p_vcl_profiling_input_t profilingInput, + vcl_profiling_handle_t* profilingHandle, + vcl_log_handle_t* logHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Provides profiling information based on request argument. +/// @warning For \b VCL_PROFILING_RAW request it returns a pointer to the buffer that was provided to +/// \b vclProfilingCreate function call. This means that original buffer with profiling output must +/// be alive till this call. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetDecodedProfilingBuffer(vcl_profiling_handle_t profilingHandle, + vcl_profiling_request_type_t requestType, + p_vcl_profiling_output_t profilingOutput); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Destroys the buffer with decoded profiling info. +/// Now caller may safely dispose raw profiling output. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingDestroy(vcl_profiling_handle_t profilingHandle); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get version of post-processing module +VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingGetProperties(vcl_profiling_handle_t profilingHandle, + vcl_profiling_properties_t* properties); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieves error message from log handler. +/// Handle is released automatically with related compiler or Profiler. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclLogHandleGetString(vcl_log_handle_t logHandle, size_t* logSize, char* log); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Retrieve the list of supported compiler options +/// @attention Should be called twice, first time to retrieve data size, second time to get data. +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerSupportedOptions(vcl_compiler_handle_t compiler, + char* result, + uint64_t* size); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Verifies if a given config option (or option-value pair) is supported by the compiler +VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerIsOptionSupported(vcl_compiler_handle_t compiler, + const char* option, + const char* value); + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // VPUX_COMPILER_L0_H diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 0675d964565947..5bc7c236e45a10 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -11,6 +11,7 @@ #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/so_ptr.hpp" +#include "vcl_api.hpp" #include "ze_graph_ext_wrappers.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp new file mode 100644 index 00000000000000..6251821b04403d --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp @@ -0,0 +1,118 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "intel_npu/icompiler.hpp" +#include "npu_driver_compiler.h" +#include "openvino/core/except.hpp" + +namespace intel_npu { + +// clang-format off +#define vcl_symbols_list() \ + vcl_symbol_statement(vclGetVersion) \ + vcl_symbol_statement(vclCompilerCreate) \ + vcl_symbol_statement(vclCompilerDestroy) \ + vcl_symbol_statement(vclCompilerGetProperties) \ + vcl_symbol_statement(vclQueryNetworkCreate) \ + vcl_symbol_statement(vclQueryNetwork) \ + vcl_symbol_statement(vclQueryNetworkDestroy) \ + vcl_symbol_statement(vclExecutableCreate) \ + vcl_symbol_statement(vclAllocatedExecutableCreate) \ + vcl_symbol_statement(vclExecutableDestroy) \ + vcl_symbol_statement(vclExecutableGetSerializableBlob) \ + vcl_symbol_statement(vclProfilingCreate) \ + vcl_symbol_statement(vclGetDecodedProfilingBuffer) \ + vcl_symbol_statement(vclProfilingDestroy) \ + vcl_symbol_statement(vclProfilingGetProperties) \ + vcl_symbol_statement(vclLogHandleGetString) + + +//unsupported symbols with older ze_loader versions +#define vcl_weak_symbols_list() \ + vcl_symbol_statement(vclAllocatedExecutableCreate2) \ + vcl_symbol_statement(vclGetCompilerSupportedOptions) \ + vcl_symbol_statement(vclGetCompilerIsOptionSupported) +// clang-format on + +class VCLApi { +public: + VCLApi(); + VCLApi(const VCLApi& other) = delete; + VCLApi(VCLApi&& other) = delete; + void operator=(const VCLApi&) = delete; + void operator=(VCLApi&&) = delete; + + static const std::shared_ptr& getInstance(); + std::shared_ptr getLibrary() const { + return lib; + } + +#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol; + vcl_symbols_list(); + vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +private: + std::shared_ptr lib; + Logger _logger; +}; + +#define vcl_symbol_statement(vcl_symbol) \ + template \ + inline typename std::invoke_result::type wrapped_##vcl_symbol(Args... args) { \ + const auto& ptr = VCLApi::getInstance(); \ + if (ptr->vcl_symbol == nullptr) { \ + OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol); \ + } \ + return ptr->vcl_symbol(std::forward(args)...); \ + } +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement +#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol; +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +class VCLCompilerImpl final : public intel_npu::ICompiler { +public: + VCLCompilerImpl(); + ~VCLCompilerImpl() override; + + static std::shared_ptr& getInstance() { + static std::shared_ptr compiler = std::make_shared(); + return compiler; + } + + NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override; + + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; + + NetworkMetadata parse(const std::vector& network, const Config& config) const override; + + uint32_t get_version() const override; + + std::vector process_profiling_output(const std::vector& profData, + const std::vector& network, + const intel_npu::Config& config) const final override; + + bool get_supported_options(std::vector& options) const; + + bool is_option_supported(const std::string& option) const; + +private: + std::shared_ptr _vclApi; + vcl_log_handle_t _logHandle = nullptr; + vcl_compiler_handle_t _compilerHandle = nullptr; + vcl_compiler_properties_t _compilerProperties; + vcl_version_info_t _vclVersion; + vcl_version_info_t _vclProfilingVersion; + Logger _logger; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index 505c988e41151c..f647e349a6d01a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -75,4 +75,7 @@ class ZeGraphExtWrappers { Logger _logger; }; +// Parse the result string of query from foramt to unordered_set of string +std::unordered_set parseQueryResult(std::vector& data); + } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp index 9494f484bb7c72..c37071f10395d3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp @@ -226,7 +226,6 @@ void Graph::initialize(const Config& config) { _zeGraphExt->initializeGraph(_graphDesc, _commandQueueGroupOrdinal); _logger.debug("Graph initialize finish"); - // We are allowed to release the original blob because weights were loaded in NPU memory during // _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are // releasing it here to avoid unnecessary memory usage. diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 6a636fa398bf19..2a841dd9d522be 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -23,14 +23,15 @@ #include "openvino/util/shared_object.hpp" #include "weightless_graph.hpp" -namespace { +namespace { +#ifndef VCL_FOR_COMPILER std::shared_ptr load_library(const std::string& libpath) { -#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) +# if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) return ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str()); -#else +# else return ov::util::load_shared_object(libpath.c_str()); -#endif +# endif } std::shared_ptr get_compiler(std::shared_ptr so) { @@ -51,7 +52,7 @@ ov::SoPtr load_compiler(const std::string& libpath) { return ov::SoPtr(compiler, compilerSO); } - +#endif ov::Tensor make_tensor_from_vector(std::vector& vector) { auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data()); auto impl = ov::get_tensor_impl(std::move(tensor)); @@ -63,6 +64,7 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { } // namespace + namespace intel_npu { PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) @@ -70,11 +72,15 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr(VCLCompilerImpl::getInstance(), VCLApi::getInstance()->getLibrary()); +#else _logger.info("PLUGIN compiler will be used."); std::string baseName = "npu_mlir_compiler"; auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX); _compiler = load_compiler(libPath); - +#endif if (_zeroInitStruct == nullptr) { return; } @@ -101,26 +107,36 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); + graphDesc = + _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); +#ifdef VCL_FOR_COMPILER + // For VCL, we need to get metadata from driver parser + networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); + networkMeta.name = model->get_friendly_name(); +>>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl) } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } - return std::make_shared( - _zeGraphExt, - _zeroInitStruct, - graphDesc, - std::move(networkDesc.metadata), - std::move(tensor), - config, - /* persistentBlob = */ true, // exporting the blob shall be available in such a scenario - _compiler); + return std::make_shared(_zeGraphExt, + _zeroInitStruct, + graphDesc, +#ifdef VCL_FOR_COMPILER + std::move(networkMeta), +#else + std::move(networkDesc.metadata), +#endif + std::move(tensor), + /* blobAllocatedByPlugin = */ false, + config, + _compiler); } std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr& model, @@ -262,21 +278,37 @@ std::shared_ptr PluginCompilerAdapter::parse( const std::optional>& model) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); - _logger.debug("parse start"); + ze_graph_handle_t graphHandle = nullptr; + NetworkMetadata networkMeta; std::vector network(mainBlob.get_byte_size()); + +#ifdef VCL_FOR_COMPILER + _logger.debug("parse metadata from driver for vcl compiler"); + if (_zeGraphExt) { + _logger.debug("parse start for vcl compiler"); + graphHandle = _zeGraphExt->getGraphHandle(*reinterpret_cast(mainBlob.data()), mainBlob.get_byte_size()); + networkMeta = _zeGraphExt->getNetworkMeta(graphHandle); + } + _logger.debug("parse end for vcl compiler"); +#else + _logger.debug("parse start"); network.assign(reinterpret_cast(mainBlob.data()), reinterpret_cast(mainBlob.data()) + mainBlob.get_byte_size()); auto networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); +<<<<<<< HEAD GraphDescriptor mainGraphDesc; +======= +>>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl) if (_zeGraphExt) { mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); } _logger.debug("main schedule parse end"); +#endif // exporting the blob when we get it from cache or ov::hint::compiled_blob property // shall be available @@ -343,15 +375,60 @@ uint32_t PluginCompilerAdapter::get_version() const { } std::vector PluginCompilerAdapter::get_supported_options() const { +#ifdef VCL_FOR_COMPILER + // For VCL, we can return the supported options from compiler + VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); + if (vclCompiler == nullptr) { + _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options."); + return {}; + } + std::vector options; + if (!vclCompiler->get_supported_options(options)) { + _logger.warning("VCLCompilerImpl get_supported_options failed. Returning empty supported options."); + return {}; + } + + if (options.empty()) { + _logger.warning("get_supported_options returned empty options."); + return {}; + } + + std::string compilerOptionsStr(options.data(), options.size()); + _logger.debug("VCLCompilerImpl return supported_options: %s", compilerOptionsStr.c_str()); + // vectorize string + std::istringstream suppstream(compilerOptionsStr); + std::vector compilerOpts = {}; + std::string option; + while (suppstream >> option) { + compilerOpts.push_back(option); + } + return compilerOpts; +#else // PluginCompiler has all the same options as plugin // Returing empty string to let the plugin fallback to legacy registration return {}; +#endif } bool PluginCompilerAdapter::is_option_supported(std::string optname) const { +#ifdef VCL_FOR_COMPILER + VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); + if (vclCompiler == nullptr) { + _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check."); + return false; + } + if (vclCompiler->is_option_supported(optname)) { + _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str()); + return true; + } else { + _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str()); + return false; + } +#else // This functions has no utility in PluginCompiler // returning false for any request to avoid the option of spaming the plugin return false; +#endif } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp new file mode 100644 index 00000000000000..60b3afb7628814 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -0,0 +1,439 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "vcl_api.hpp" + +#include "intel_npu/profiling.hpp" +#include "ir_serializer.hpp" +#include "openvino/runtime/make_tensor.hpp" +#include "openvino/util/file_util.hpp" +#include "openvino/util/shared_object.hpp" + +namespace intel_npu { + +static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) { + Logger _logger("VCLAPI", Logger::global().level()); + _logger.debug("getLatestVCLLog start"); + + vcl_version_info_t compilerVersion; + vcl_version_info_t profilingVersion; + vcl_result_t ret = vclGetVersion(&compilerVersion, &profilingVersion); + + if (ret != VCL_RESULT_SUCCESS || compilerVersion.major < 3) { + _logger.warning("Failed to get VCL version: 0x%x", ret); + return "Can not get VCL log, VCL version is too old!"; + } + + // Get log size + size_t size = 0; + // Null graph handle to get error log + ret = vclLogHandleGetString(logHandle, &size, nullptr); + if (VCL_RESULT_SUCCESS != ret) { + return "Failed to get size of latest VCL log"; + } + + if (size <= 0) { + return "No error stored in VCL when error detected"; + } + + // Get log content + std::string logContent{}; + logContent.resize(size); + ret = vclLogHandleGetString(logHandle, &size, const_cast(logContent.data())); + if (VCL_RESULT_SUCCESS != ret) { + return "Size of latest error log > 0, failed to get content"; + } + _logger.debug("getLatestBuildError end"); + return logContent; +} + +#define THROW_ON_FAIL_FOR_VCL(step, ret, logHandle) \ + { \ + vcl_result_t result = ret; \ + if (result != VCL_RESULT_SUCCESS) { \ + OPENVINO_THROW("Failed to call VCL API : ", \ + step, \ + " result: 0x", \ + std::hex, \ + result, \ + " - ", \ + getLatestVCLLog(logHandle)); \ + } \ + } + +VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) { + const std::string baseName = "npu_vcl_compiler"; + try { + auto libpath = ov::util::make_plugin_library_name({}, baseName); + _logger.debug("Try to load npu_vcl_compiler"); + +#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) + this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str()); +#else + this->lib = ov::util::load_shared_object(libpath.c_str()); +#endif + } catch (const std::runtime_error& error) { + _logger.debug("Failed to load npu_vcl_compiler"); + OPENVINO_THROW(error.what()); + } + + try { +#define vcl_symbol_statement(vcl_symbol) \ + this->vcl_symbol = reinterpret_cast(ov::util::get_symbol(lib, #vcl_symbol)); + vcl_symbols_list(); +#undef vcl_symbol_statement + } catch (const std::runtime_error& error) { + _logger.debug("Failed to get formal symbols from npu_vcl_compiler"); + OPENVINO_THROW(error.what()); + } + +#define vcl_symbol_statement(vcl_symbol) \ + try { \ + this->vcl_symbol = reinterpret_cast(ov::util::get_symbol(lib, #vcl_symbol)); \ + } catch (const std::runtime_error&) { \ + _logger.debug("Failed to get %s from npu_vcl_compiler", #vcl_symbol); \ + this->vcl_symbol = nullptr; \ + } + vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +#define vcl_symbol_statement(vcl_symbol) vcl_symbol = this->vcl_symbol; + vcl_symbols_list(); + vcl_weak_symbols_list(); +#undef vcl_symbol_statement +} + +const std::shared_ptr& VCLApi::getInstance() { + static std::shared_ptr instance = std::make_shared(); + return instance; +} + +VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", ov::log::Level::DEBUG) { + _logger.debug("VCLCompilerImpl constructor start"); + // Initialize the VCL API + THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr); + + _logger.info("Plugin VCL API Version: %d.%d", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR); + _logger.info("Plugin VCL Profiling API Version: %d.%d", VCL_PROFILING_VERSION_MAJOR, VCL_PROFILING_VERSION_MINOR); + _logger.info("Lib VCL Compiler Version: %d.%d", _vclVersion.major, _vclVersion.minor); + _logger.info("Lib VCL Profiling Version: %d.%d", _vclProfilingVersion.major, _vclProfilingVersion.minor); + _logger.info("Use Lib VCL version to create compiler"); + + vcl_compiler_desc_t compilerDesc; + compilerDesc.version = _vclVersion; + compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast(Logger::global().level()) - 1); + vcl_device_desc_t device_desc; + device_desc.size = sizeof(vcl_device_desc_t); + device_desc.deviceID = 0x643E; // Value from intel_npu/src/backend/src/zero_device.cpp + device_desc.revision = -1; // -1 to skip the config + device_desc.tileCount = 5; // 1 as init value + + THROW_ON_FAIL_FOR_VCL("vclCompilerCreate", + vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle), + nullptr); + + THROW_ON_FAIL_FOR_VCL("vclCompilerGetProperties", + vclCompilerGetProperties(_compilerHandle, &_compilerProperties), + _logHandle); + + _logger.info("VCL Compiler created successfully"); + _logger.info("VCL Compiler Properties: ID: %s, Version: %d.%d, Supported Opsets: %u", + _compilerProperties.id, + _compilerProperties.version.major, + _compilerProperties.version.minor, + _compilerProperties.supportedOpsets); +} + +VCLCompilerImpl::~VCLCompilerImpl() { + if (_compilerHandle) { + THROW_ON_FAIL_FOR_VCL("vclCompilerDestroy", vclCompilerDestroy(_compilerHandle), _logHandle); + } + if (_logHandle) { + _logHandle = nullptr; // Log handle is released automatically with the compiler + } + _logger.info("VCL Compiler destroyed successfully"); +} + +struct vcl_allocator_vector : vcl_allocator2_t { + vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} + + static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator_vector* vecAllocator = static_cast(allocator); + vecAllocator->m_vec.resize(size); + return vecAllocator->m_vec.data(); + } + + static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + vcl_allocator_vector* vecAllocator = static_cast(allocator); + vecAllocator->m_vec.clear(); + vecAllocator->m_vec.shrink_to_fit(); + } + + std::vector m_vec; +}; + +struct vcl_allocator_malloc { + static uint8_t* vcl_allocate(uint64_t size) { + return reinterpret_cast(malloc(size)); + } + + static void vcl_deallocate(uint8_t* ptr) { + free(ptr); + } +}; + +NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { + _logger.debug("compile start"); + + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion); + + std::string buildFlags; + const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); + + _logger.debug("create build flags"); + buildFlags += intel_npu::driver_compiler_utils::serializeIOInfo(model, useIndices); + buildFlags += " "; + buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion); + _logger.debug("final build flags to compiler: %s", buildFlags.c_str()); + vcl_executable_desc_t exeDesc = {serializedIR.second.get(), + serializedIR.first, + buildFlags.c_str(), + buildFlags.size()}; + _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); + if (_vclVersion.major >= 7 && _vclVersion.minor >= 4) { + // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2 + _logger.debug("Using vclAllocatedExecutableCreate2 for VCL 7.4+"); + vcl_allocator_vector allocator; + uint8_t* blob = nullptr; + size_t size = 0; + + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate2", + vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size), + _logHandle); + if (size == 0 || blob == nullptr) { + OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null"); + } + + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + + _logger.debug("compile end, blob size:%d", allocator.m_vec.size()); + return NetworkDescription(std::move(allocator.m_vec), std::move(metadata)); + } else if (_vclVersion.major >= 6 && _vclVersion.minor >= 1) { + // For older versions, we use vclAllocatedExecutableCreate + _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4"); + + vcl_allocator_t allocator; + allocator.allocate = vcl_allocator_malloc::vcl_allocate; + allocator.deallocate = vcl_allocator_malloc::vcl_deallocate; + uint8_t* blob = nullptr; + size_t size = 0; + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate", + vclAllocatedExecutableCreate(_compilerHandle, exeDesc, &allocator, &blob, &size), + _logHandle); + if (size == 0 || blob == nullptr) { + OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null"); + } + + std::vector compiledNetwork(blob, blob + size); + allocator.deallocate(blob); + + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + + _logger.debug("compile end, blob size:%d", compiledNetwork.size()); + return NetworkDescription(std::move(compiledNetwork), std::move(metadata)); + } else { + // For versions before 6.1, we use vclExecutableCreate + _logger.debug("Using vclExecutableCreate for VCL < 6.1"); + vcl_executable_handle_t exeHandle = nullptr; + THROW_ON_FAIL_FOR_VCL("vclExecutableCreate", + vclExecutableCreate(_compilerHandle, exeDesc, &exeHandle), + _logHandle); + + size_t size = 0; + THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob", + vclExecutableGetSerializableBlob(exeHandle, nullptr, &size), + _logHandle); + if (size == 0) { + OPENVINO_THROW("Failed to get VCL executable blob size, size is zero"); + } + std::vector compiledNetwork(size); + THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob", + vclExecutableGetSerializableBlob(exeHandle, compiledNetwork.data(), &size), + _logHandle); + + THROW_ON_FAIL_FOR_VCL("vclExecutableDestroy", vclExecutableDestroy(exeHandle), _logHandle); + + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + + _logger.debug("compile end, blob size:%d", compiledNetwork.size()); + return NetworkDescription(std::move(compiledNetwork), std::move(metadata)); + } +} + +intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, const Config& config) const { + _logger.debug("parse start"); + // VCL does not support parse, return empty metadata + return intel_npu::NetworkMetadata(); +} + +std::vector VCLCompilerImpl::process_profiling_output(const std::vector& profData, + const std::vector& network, + const intel_npu::Config& config) const { + _logger.debug("process_profiling_output start"); + + vcl_profiling_handle_t profilingHandle; + vcl_profiling_input_t profilingInput = {network.data(), network.size(), profData.data(), profData.size()}; + vcl_log_handle_t logHandle; + THROW_ON_FAIL_FOR_VCL("vclProfilingCreate", + vclProfilingCreate(&profilingInput, &profilingHandle, &logHandle), + nullptr); + + vcl_profiling_properties_t profProperties; + THROW_ON_FAIL_FOR_VCL("vclProfilingGetProperties", + vclProfilingGetProperties(profilingHandle, &profProperties), + logHandle); + + _logger.info("VCL Profiling Properties: Version: %d.%d", + profProperties.version.major, + profProperties.version.minor); + + // We only use layer level info + vcl_profiling_request_type_t request = VCL_PROFILING_LAYER_LEVEL; + + vcl_profiling_output_t profOutput; + profOutput.data = NULL; + THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", + vclGetDecodedProfilingBuffer(profilingHandle, request, &profOutput), + logHandle); + if (profOutput.data == NULL) { + OPENVINO_THROW("Failed to get VCL profiling output"); + } + + std::vector layerInfo(profOutput.size / sizeof(ze_profiling_layer_info)); + if (profOutput.size > 0) { + _logger.debug("VCL profiling output size: %d", profOutput.size); + std::memcpy(layerInfo.data(), profOutput.data, profOutput.size); + } + + // profOutput.data = NULL; + // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle, + // VCL_PROFILING_TASK_LEVEL, &profOutput), logHandle); if (profOutput.data == NULL) { + // OPENVINO_THROW("Failed to get VCL profiling task level output"); + // } + + // profOutput.data = NULL; + // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle, + // VCL_PROFILING_RAW, &profOutput),logHandle); if (profOutput.data == NULL) { + // OPENVINO_THROW("Failed to get VCL profiling raw output"); + // } + + THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle); + + return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo); // Return processed profiling info +} + +uint32_t VCLCompilerImpl::get_version() const { + return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor); +} + +ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, const Config& config) const { + _logger.debug("query start"); + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion); + + std::string buildFlags; + buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion); + _logger.debug("queryImpl build flags : %s", buildFlags.c_str()); + + vcl_query_handle_t queryHandle; + vcl_query_desc_t queryDesc = {serializedIR.second.get(), serializedIR.first, buildFlags.c_str(), buildFlags.size()}; + THROW_ON_FAIL_FOR_VCL("vclQueryNetworkCreate", + vclQueryNetworkCreate(_compilerHandle, queryDesc, &queryHandle), + _logHandle); + + uint64_t size = 0; + THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", vclQueryNetwork(queryHandle, nullptr, &size), _logHandle); + + std::vector supportedLayers(size); + THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", + vclQueryNetwork(queryHandle, reinterpret_cast(supportedLayers.data()), &size), + _logHandle); + + THROW_ON_FAIL_FOR_VCL("vclQueryNetworkDestroy", vclQueryNetworkDestroy(queryHandle), _logHandle); + + const std::string deviceName = "NPU"; + ov::SupportedOpsMap result; + const auto parsedSupportedLayers = parseQueryResult(supportedLayers); + for (auto&& layerName : parsedSupportedLayers) { + result.emplace(layerName, deviceName); + } + _logger.info("For given model, there are %d supported layers", parsedSupportedLayers.size()); + + return result; +} + +bool VCLCompilerImpl::get_supported_options(std::vector& options) const { + _logger.debug("get_supported_options start"); + // 1. get size of compiler supported options list + size_t str_size = 0; + try { + THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions", + vclGetCompilerSupportedOptions(_compilerHandle, nullptr, &str_size), + _logHandle); + + if (str_size > 0) { + _logger.debug("obtain list"); + // 2. allocate buffer for it + options.resize(str_size); + // 3. populate char list + THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions", + vclGetCompilerSupportedOptions(_compilerHandle, options.data(), &str_size), + _logHandle); + + _logger.debug("Option list size %d, got option list", str_size); + return true; + } else { + _logger.debug("Option list size 0 - skipping!"); + } + } catch (const std::exception& e) { + // The API is only supported in new version, just add log here + _logger.debug("Exception in get_supported_options: %s", e.what()); + } + _logger.debug("get_supported_options end, no options found"); + return false; +} + +bool VCLCompilerImpl::is_option_supported(const std::string& option) const { + try { + const char* optname_ch = option.c_str(); + _logger.debug("is_option_supported start for option: %s", optname_ch); + THROW_ON_FAIL_FOR_VCL("vclGetCompilerIsOptionSupported", + vclGetCompilerIsOptionSupported(_compilerHandle, optname_ch, nullptr), + _logHandle); + return true; + } catch (const std::exception& e) { + // The API is only supported in new version, just add log here + _logger.debug("Exception in is_option_supported: %s", e.what()); + } + _logger.debug("option: %s is not supported", option.c_str()); + return false; +} + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp index a37ebff363b00d..ccb00d971b8471 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp @@ -250,7 +250,7 @@ void ZeGraphExtWrappers::initializeGraphThroughCommandList(ze_graph_handle_t gra } // Parse the result string of query from format to unordered_set of string -static std::unordered_set parseQueryResult(std::vector& data) { +std::unordered_set parseQueryResult(std::vector& data) { std::string dataString(data.begin(), data.end()); std::unordered_set result; size_t i = 0, start = 0; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index b983a7b32ae2e2..0691e0f986d406 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -603,6 +603,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); +#ifndef VCL_FOR_COMPILER const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); @@ -610,6 +611,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); } } +#endif const auto platform = utils::getCompilationPlatform(localConfig.get(), From 3aad6d869f77334ce1fe7d452389de479171d11f Mon Sep 17 00:00:00 2001 From: Xin Wang Date: Sun, 20 Jul 2025 10:40:30 +0800 Subject: [PATCH 02/25] Use vclAllocateExecutionCreate3 to get metadata Signed-off-by: Xin Wang remove vclAllocatedExecutableCreate3 --- .../include/npu_driver_compiler.h | 2 +- .../src/plugin_compiler_adapter.cpp | 31 ++++++++----------- .../src/compiler_adapter/src/vcl_api.cpp | 2 +- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h index a8c38506fc844c..c945a26565ebef 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h +++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h @@ -23,7 +23,7 @@ extern "C" { #endif #define VCL_COMPILER_VERSION_MAJOR 7 -#define VCL_COMPILER_VERSION_MINOR 4 +#define VCL_COMPILER_VERSION_MINOR 5 #define VCL_PROFILING_VERSION_MAJOR 2 #define VCL_PROFILING_VERSION_MINOR 0 diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 2a841dd9d522be..a21c44f10d1592 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -23,7 +23,6 @@ #include "openvino/util/shared_object.hpp" #include "weightless_graph.hpp" - namespace { #ifndef VCL_FOR_COMPILER std::shared_ptr load_library(const std::string& libpath) { @@ -64,7 +63,6 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { } // namespace - namespace intel_npu { PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) @@ -107,18 +105,20 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); + graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); #ifdef VCL_FOR_COMPILER - // For VCL, we need to get metadata from driver parser - networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); - networkMeta.name = model->get_friendly_name(); ->>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl) + if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) { + // If the metadata is empty, we can try to get it from the driver parser + _logger.info("Metadata is empty, trying to get it from the driver parser"); + networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); + networkMeta.name = model->get_friendly_name(); + } +#endif } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -134,8 +134,8 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::parse( const std::optional>& model) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); - ze_graph_handle_t graphHandle = nullptr; NetworkMetadata networkMeta; std::vector network(mainBlob.get_byte_size()); + GraphDescriptor mainGraphDesc; #ifdef VCL_FOR_COMPILER _logger.debug("parse metadata from driver for vcl compiler"); if (_zeGraphExt) { _logger.debug("parse start for vcl compiler"); - graphHandle = _zeGraphExt->getGraphHandle(*reinterpret_cast(mainBlob.data()), mainBlob.get_byte_size()); - networkMeta = _zeGraphExt->getNetworkMeta(graphHandle); + mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); + networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc); } _logger.debug("parse end for vcl compiler"); #else @@ -298,11 +298,6 @@ std::shared_ptr PluginCompilerAdapter::parse( network.clear(); network.shrink_to_fit(); -<<<<<<< HEAD - GraphDescriptor mainGraphDesc; - -======= ->>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl) if (_zeGraphExt) { mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 60b3afb7628814..c1373974b9c88c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -210,7 +210,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr= 7 && _vclVersion.minor >= 4) { // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2 - _logger.debug("Using vclAllocatedExecutableCreate2 for VCL 7.4+"); + _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5"); vcl_allocator_vector allocator; uint8_t* blob = nullptr; size_t size = 0; From cc6268681b662353c288b18e26d2921b385b71e2 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Sun, 28 Sep 2025 15:45:31 +0800 Subject: [PATCH 03/25] update vcl version check, default compilertype and vcl download link --- .../cmake/download_compiler_libs.cmake | 33 ++++--- src/plugins/intel_npu/cmake/features.cmake | 3 +- .../include/compiler_adapter_factory.hpp | 1 - .../src/compiler_adapter/src/graph.cpp | 1 + .../src/plugin_compiler_adapter.cpp | 2 +- .../src/compiler_adapter/src/vcl_api.cpp | 89 +++++++++++++++++-- .../intel_npu/src/plugin/src/plugin.cpp | 11 +++ 7 files changed, 115 insertions(+), 25 deletions(-) diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake index 95ced5f78bdb59..3455677525eecf 100644 --- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake +++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake @@ -3,7 +3,7 @@ # # Function to download and extract files -function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy) +function(download_and_extract url zip_file extracted_dir modify_proxy) # Check if the prebuilt VCL compiler libraries not exist if(NOT EXISTS "${extracted_dir}") if(modify_proxy STREQUAL "MODIFY") @@ -39,6 +39,12 @@ function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy) # Determine extraction method based on file extension if("${zip_file}" MATCHES "\\.zip$") file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}") + elseif("${zip_file}" MATCHES "\\.tar.gz$") + if(NOT EXISTS "${extracted_dir}") + file(MAKE_DIRECTORY "${extracted_dir}") + message(STATUS "Directory ${extracted_dir} created to unzip.") + endif() + execute_process(COMMAND tar -xzf "${zip_file}" -C "${extracted_dir}") elseif("${zip_file}" MATCHES "\\.deb$") execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}") elseif("${zip_file}" MATCHES "\\.exe$") @@ -93,13 +99,12 @@ if(ENABLE_VCL_FOR_COMPILER) message(STATUS "Downloading prebuilt NPU VCL compiler libraries") if(WIN32) set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win") - set(VCL_COMPILER_LIBS_URL "https://downloadmirror.intel.com/854488/npu_win_32.0.100.4023.zip") - set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023.zip") - set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023") - - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY") - set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/npu_win_32.0.100.4023/drivers/x64/") + set(VCL_COMPILER_LIBS_URL "https://github.com/openvinotoolkit/npu_compiler/releases/download/npu_ud_2025_38_rc4/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip") + set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip") + set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218") + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY") + set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/cid/lib") configure_file( ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll @@ -109,7 +114,7 @@ if(ENABLE_VCL_FOR_COMPILER) set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll") file(COPY "${VCL_COMPILER_LIB}" DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}") - message(STATUS "Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows") + message(STATUS "Not Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows") else() # Check if the operating system is Linux and not macOS if(UNIX AND NOT APPLE) @@ -125,7 +130,7 @@ if(ENABLE_VCL_FOR_COMPILER) set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb") set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04") - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") configure_file( @@ -136,7 +141,7 @@ if(ENABLE_VCL_FOR_COMPILER) set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") file(COPY "${VCL_COMPILER_LIB}" DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") - message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04") + message(STATUS "Not Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04") elseif(OS_VERSION STREQUAL "24.04") message(STATUS "This is Ubuntu 24.04") # Ubuntu 24.04-specific settings or actions @@ -145,7 +150,7 @@ if(ENABLE_VCL_FOR_COMPILER) set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb") set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04") - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") + download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") configure_file( @@ -154,8 +159,8 @@ if(ENABLE_VCL_FOR_COMPILER) COPYONLY ) set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") - file(COPY "${VCL_COMPILER_LIB}" - DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") + # file(COPY "${VCL_COMPILER_LIB}" + # DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04") else() message(STATUS "This is another version of Ubuntu: ${OS_VERSION}") @@ -171,4 +176,4 @@ if(ENABLE_VCL_FOR_COMPILER) install(FILES ${VCL_COMPILER_LIB} DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT}) -endif() +endif() \ No newline at end of file diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 1f462c0e461806..5f763da69188ac 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -12,5 +12,4 @@ endif() ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) -ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON) -ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF) +ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" OFF) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp index 32e1fb384668b2..81e075adf65be3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -27,7 +27,6 @@ class CompilerAdapterFactory final { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend"); } - return std::make_unique(engineBackend->getInitStructs()); } default: diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp index c37071f10395d3..9494f484bb7c72 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp @@ -226,6 +226,7 @@ void Graph::initialize(const Config& config) { _zeGraphExt->initializeGraph(_graphDesc, _commandQueueGroupOrdinal); _logger.debug("Graph initialize finish"); + // We are allowed to release the original blob because weights were loaded in NPU memory during // _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are // releasing it here to avoid unnecessary memory usage. diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index a21c44f10d1592..1f2b1cfc83a380 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -134,7 +134,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr(static_cast(Logger::global().level()) - 1); vcl_device_desc_t device_desc; device_desc.size = sizeof(vcl_device_desc_t); + device_desc.deviceID = 0x643E; // Value from intel_npu/src/backend/src/zero_device.cpp device_desc.revision = -1; // -1 to skip the config device_desc.tileCount = 5; // 1 as init value @@ -183,6 +196,17 @@ struct vcl_allocator_malloc { } }; +std::string supportVclCompiler(int major, int minor) { + if (major >= 7 && minor >= 4) { + return "vclAllocatedExecutableCreate2"; + } else if (major >= 6 && minor >= 1) { + return "vclAllocatedExecutableCreate"; + } else { + return "vclExecutableCreate"; + } + return "unsupported VCL version"; +} + NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { _logger.debug("compile start"); @@ -193,22 +217,52 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr= 7 && _vclVersion.minor >= 4) { + + /// Check the linked vcl version whether supported in plugin + int usedMajor = 0; + bool isDowngrade = false; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) < _vclVersion.major) { + usedMajor = VCL_COMPILER_VERSION_MAJOR; + isDowngrade = true; + } + int usedMinor = isDowngrade ? VCL_COMPILER_VERSION_MINOR : _vclVersion.minor; + + _logger.info("[Debug] Used VCL API Version: %d.%d", usedMajor, usedMinor); + _logger.info("[Debug] compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); + _logger.info("[Debug] embedding compiler vcl version: %d.%d", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR); + + if (usedMajor >= 7 && usedMinor >= 4) { + if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { + _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " + "%d.%d, \n but loaded VCL is %d.%d.\n" + "Will downwise to form %s to use vclAllocatedExecutableCreate2", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR, + _vclVersion.major, + _vclVersion.minor, + supportVclCompiler(usedMajor, usedMinor)); + } + // check the vcl version whether support the lastest compile api + // support the lastest vcl api // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2 _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5"); vcl_allocator_vector allocator; @@ -227,7 +281,17 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr= 6 && _vclVersion.minor >= 1) { + } else if (usedMajor >= 6 && usedMinor >= 1) { + if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { + _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " + "%d.%d, \n but loaded VCL is %d.%d.\n" + "Will downwise to form %s to use vclAllocatedExecutableCreate2", + VCL_COMPILER_VERSION_MAJOR, + VCL_COMPILER_VERSION_MINOR, + _vclVersion.major, + _vclVersion.minor, + supportVclCompiler(usedMajor, usedMinor)); + } // For older versions, we use vclAllocatedExecutableCreate _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4"); @@ -252,6 +316,16 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptrsecond.as()); } + // if there is no compiler_type provided = use base_config value + // update the compilerType by platform: + // 3720 -> DRIVER + // 4000 and later -> MLIR (default value) + auto it_platform = local_conf.find(std::string(PLATFORM::key())); + if (it_platform != local_conf.end()) { + // if platform is provided by local config = use that + if (it_platform->second.as() == ov::intel_npu::Platform::NPU3720) { + return ov::intel_npu::CompilerType::DRIVER; + } + } return base_conf.get(); } From 8b11ee8c106f25996fc569f560aea195f93f4808 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Thu, 6 Nov 2025 13:22:54 +0800 Subject: [PATCH 04/25] fix export issue and build --- src/plugins/intel_npu/cmake/features.cmake | 3 +- .../include/compiler_adapter_factory.hpp | 1 + .../src/plugin_compiler_adapter.cpp | 6 +- .../src/compiler_adapter/src/vcl_api.cpp | 55 ++++++++++++++----- .../intel_npu/src/plugin/src/plugin.cpp | 4 +- 5 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 5f763da69188ac..1f462c0e461806 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -12,4 +12,5 @@ endif() ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) -ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" OFF) +ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON) +ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp index 81e075adf65be3..32e1fb384668b2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -27,6 +27,7 @@ class CompilerAdapterFactory final { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend"); } + return std::make_unique(engineBackend->getInitStructs()); } default: diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 1f2b1cfc83a380..5bf62498ca81eb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -135,7 +135,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr()) { localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}}); } @@ -294,7 +294,7 @@ std::shared_ptr PluginCompilerAdapter::parse( _logger.debug("parse start"); network.assign(reinterpret_cast(mainBlob.data()), reinterpret_cast(mainBlob.data()) + mainBlob.get_byte_size()); - auto networkMeta = _compiler->parse(network, config); + networkMeta = _compiler->parse(network, config); network.clear(); network.shrink_to_fit(); diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 13dfe9bc598d28..580883f77c3914 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -4,12 +4,15 @@ #include "vcl_api.hpp" +#include "intel_npu/config/options.hpp" +#include "intel_npu/common/filtered_config.hpp" #include "intel_npu/profiling.hpp" -#include "ir_serializer.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" +#include "vcl_serializer.hpp" #include "ze_graph_ext_wrappers.hpp" +#include "intel_npu/npu_private_properties.hpp" namespace intel_npu { @@ -207,7 +210,8 @@ std::string supportVclCompiler(int major, int minor) { return "unsupported VCL version"; } -NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { +NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, + const Config& config) const { _logger.debug("compile start"); const auto maxOpsetVersion = _compilerProperties.supportedOpsets; @@ -217,16 +221,27 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + auto serializedIR = driver_compiler_utils::serializeIR( + model, + compilerVersion, + maxOpsetVersion, + updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get() + : true, + updatedConfig.get()); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); _logger.debug("create build flags"); - buildFlags += irSerializer.serializeIOInfo(model, useIndices); + buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices); buildFlags += " "; - buildFlags += irSerializer.serializeConfig(config, compilerVersion); + buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); _logger.debug("final build flags to compiler: %s", buildFlags.c_str()); vcl_executable_desc_t exeDesc = {serializedIR.second.get(), @@ -259,9 +274,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& network, const Config& config) const { +intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, + const Config& config) const { _logger.debug("parse start"); // VCL does not support parse, return empty metadata return intel_npu::NetworkMetadata(); @@ -421,7 +436,8 @@ uint32_t VCLCompilerImpl::get_version() const { return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor); } -ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, const Config& config) const { +ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, + const Config& config) const { _logger.debug("query start"); const auto maxOpsetVersion = _compilerProperties.supportedOpsets; _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); @@ -430,11 +446,22 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + + auto serializedIR = driver_compiler_utils::serializeIR( + model, + compilerVersion, + maxOpsetVersion, + updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get() + : true, + updatedConfig.get()); std::string buildFlags; - buildFlags += irSerializer.serializeConfig(config, compilerVersion); + buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); _logger.debug("queryImpl build flags : %s", buildFlags.c_str()); vcl_query_handle_t queryHandle; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 9f04aa83bcd376..16e6e87ab63cf9 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -612,7 +612,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); - auto localConfig = fork_local_config(localPropertiesMap, compiler); + auto localConfig = fork_local_config(localPropertiesMap, compiler); //FilteredConfig #ifndef VCL_FOR_COMPILER const auto set_cache_dir = localConfig.get(); @@ -733,7 +733,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (successfullyDebatched && localConfig.get() == ov::hint::PerformanceMode::LATENCY) { _logger.info("Override performance mode to THROUGHPUT for compilation"); - auto modifiedConfig = localConfig; // Copy only when needed + auto modifiedConfig = localConfig; // Copy only when needed, FilteredConfig std::stringstream strStream; strStream << ov::hint::PerformanceMode::THROUGHPUT; modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}}); From a08feb728b753edc79633332aa4f54a9922e3aaa Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Wed, 12 Nov 2025 22:46:32 +0800 Subject: [PATCH 05/25] clang-format, add platform update and compiler check --- .../cmake/download_compiler_libs.cmake | 4 +- .../include/compiler_adapter_factory.hpp | 7 +- .../include/npu_driver_compiler.h | 2 +- .../include/plugin_compiler_adapter.hpp | 2 +- .../src/compiler_adapter/include/vcl_api.hpp | 9 +- .../include/ze_graph_ext_wrappers.hpp | 2 +- .../src/plugin_compiler_adapter.cpp | 60 +++++++--- .../src/compiler_adapter/src/vcl_api.cpp | 106 +++++++++--------- .../intel_npu/src/plugin/src/plugin.cpp | 46 ++++++-- 9 files changed, 148 insertions(+), 90 deletions(-) diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake index 3455677525eecf..d8a664259299d7 100644 --- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake +++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake @@ -8,7 +8,7 @@ function(download_and_extract url zip_file extracted_dir modify_proxy) if(NOT EXISTS "${extracted_dir}") if(modify_proxy STREQUAL "MODIFY") # Update proxy to enable download for windows url - set(original_NO_PROXY $ENV{NO_PROXY}) + set(original_NO_PROXY $ENV{NO_PROXY}) set(original_no_proxy $ENV{no_proxy}) set(ENV{NO_PROXY} "") set(ENV{no_proxy} "") @@ -24,7 +24,7 @@ function(download_and_extract url zip_file extracted_dir modify_proxy) if(modify_proxy STREQUAL "MODIFY") # Restore proxy - set(ENV{NO_PROXY} ${original_NO_PROXY}) + set(ENV{NO_PROXY} ${original_NO_PROXY}) set(ENV{no_proxy} ${original_no_proxy}) endif() diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp index 32e1fb384668b2..ada0d47fa19ff3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -15,13 +15,14 @@ namespace intel_npu { class CompilerAdapterFactory final { public: std::unique_ptr getCompiler(const ov::SoPtr& engineBackend, - const ov::intel_npu::CompilerType type) const { + const ov::intel_npu::CompilerType type, + std::string deviceID = "4000") const { switch (type) { case ov::intel_npu::CompilerType::PLUGIN: { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { - return std::make_unique(nullptr); + return std::make_unique(nullptr, deviceID); } - return std::make_unique(engineBackend->getInitStructs()); + return std::make_unique(engineBackend->getInitStructs(), deviceID); } case ov::intel_npu::CompilerType::DRIVER: { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h index c945a26565ebef..a8c38506fc844c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h +++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h @@ -23,7 +23,7 @@ extern "C" { #endif #define VCL_COMPILER_VERSION_MAJOR 7 -#define VCL_COMPILER_VERSION_MINOR 5 +#define VCL_COMPILER_VERSION_MINOR 4 #define VCL_PROFILING_VERSION_MAJOR 2 #define VCL_PROFILING_VERSION_MINOR 0 diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 5bc7c236e45a10..f89d634c6491cf 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -18,7 +18,7 @@ namespace intel_npu { class PluginCompilerAdapter final : public ICompilerAdapter { public: - PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); + PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, const std::string& deviceId); std::shared_ptr compile(const std::shared_ptr& model, const FilteredConfig& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp index 6251821b04403d..54f65e8dc0260a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp @@ -79,13 +79,16 @@ vcl_symbols_list(); vcl_weak_symbols_list(); #undef vcl_symbol_statement +void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device); +std::string supportVclCompiler(int major, int minor); + class VCLCompilerImpl final : public intel_npu::ICompiler { public: - VCLCompilerImpl(); + VCLCompilerImpl(const std::string& device); ~VCLCompilerImpl() override; - static std::shared_ptr& getInstance() { - static std::shared_ptr compiler = std::make_shared(); + static std::shared_ptr getInstance(const std::string& device) { + std::shared_ptr compiler = std::make_shared(device); return compiler; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp index f647e349a6d01a..079a051e65f8a1 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp @@ -75,7 +75,7 @@ class ZeGraphExtWrappers { Logger _logger; }; -// Parse the result string of query from foramt to unordered_set of string +// Parse the result string of query from format to unordered_set of string std::unordered_set parseQueryResult(std::vector& data); } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 5bf62498ca81eb..5b378aede0e398 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -24,13 +24,13 @@ #include "weightless_graph.hpp" namespace { -#ifndef VCL_FOR_COMPILER + std::shared_ptr load_library(const std::string& libpath) { -# if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) +#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) return ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str()); -# else +#else return ov::util::load_shared_object(libpath.c_str()); -# endif +#endif } std::shared_ptr get_compiler(std::shared_ptr so) { @@ -51,7 +51,7 @@ ov::SoPtr load_compiler(const std::string& libpath) { return ov::SoPtr(compiler, compilerSO); } -#endif + ov::Tensor make_tensor_from_vector(std::vector& vector) { auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data()); auto impl = ov::get_tensor_impl(std::move(tensor)); @@ -65,20 +65,45 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { namespace intel_npu { -PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) +PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, + const std::string& deviceId) : _zeroInitStruct(zeroInitStruct), _logger("PluginCompilerAdapter", Logger::global().level()) { _logger.debug("initialize PluginCompilerAdapter start"); #ifdef VCL_FOR_COMPILER - _logger.info("VCL driver compiler will be used."); - _compiler = ov::SoPtr(VCLCompilerImpl::getInstance(), VCLApi::getInstance()->getLibrary()); + _logger.info("PLUGIN VCL compiler will be used."); + try { + auto vclCompilerPtr = VCLCompilerImpl::getInstance(deviceId); + auto vclLib = VCLApi::getInstance()->getLibrary(); + if (vclCompilerPtr && vclLib) { + _compiler = ov::SoPtr(vclCompilerPtr, vclLib); + } else { + throw std::runtime_error("VCL compiler or library is nullptr"); + } + } catch (const std::exception& vcl_exception) { + _logger.warning("VCL compiler load failed: %s. Trying to load MLIR compiler...", vcl_exception.what()); + std::string baseName = "npu_mlir_compiler"; + auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX); + try { + _compiler = load_compiler(libPath); + if (!_compiler) { + throw std::runtime_error("MLIR compiler load returned nullptr"); + } else { + _logger.info("MLIR compiler loaded successfully. PLUGIN compiler will be used."); + } + } catch (const std::exception& mlir_exception) { + _logger.error("MLIR compiler load failed: %s", mlir_exception.what()); + throw std::runtime_error("Both VCL and MLIR compiler load failed, aborting."); + } + } #else _logger.info("PLUGIN compiler will be used."); std::string baseName = "npu_mlir_compiler"; auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX); _compiler = load_compiler(libPath); #endif + if (_zeroInitStruct == nullptr) { return; } @@ -125,18 +150,19 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr(_zeGraphExt, - _zeroInitStruct, - graphDesc, + return std::make_shared( + _zeGraphExt, + _zeroInitStruct, + graphDesc, #ifdef VCL_FOR_COMPILER - std::move(networkMeta), + std::move(networkMeta), #else - std::move(networkDesc.metadata), + std::move(networkDesc.metadata), #endif - std::move(tensor), - config, - /* persistentBlob = */ true, // exporting the blob shall be available in such a scenario - _compiler); + std::move(tensor), + config, + /* persistentBlob = */ true, // exporting the blob shall be available in such a scenario + _compiler); } std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr& model, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 580883f77c3914..35637df12efccb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -4,15 +4,15 @@ #include "vcl_api.hpp" -#include "intel_npu/config/options.hpp" #include "intel_npu/common/filtered_config.hpp" +#include "intel_npu/config/options.hpp" +#include "intel_npu/npu_private_properties.hpp" #include "intel_npu/profiling.hpp" #include "openvino/runtime/make_tensor.hpp" #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" #include "vcl_serializer.hpp" #include "ze_graph_ext_wrappers.hpp" -#include "intel_npu/npu_private_properties.hpp" namespace intel_npu { @@ -113,7 +113,24 @@ const std::shared_ptr& VCLApi::getInstance() { return instance; } -VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", ov::log::Level::DEBUG) { +void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) { + std::unordered_map devicesDescsMap = { + {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast(-1), 2}}, + {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast(-1), 5}}, + // For other devices, the tile configuration needs to be provided by the user. + }; + + auto it = devicesDescsMap.find(device); + if (it != devicesDescsMap.end()) { + device_desc = it->second; + } else { + device_desc = devicesDescsMap["4000"]; + } +} + +VCLCompilerImpl::VCLCompilerImpl(const std::string& device) + : _logHandle(nullptr), + _logger("VCLCompilerImpl", Logger::global().level()) { _logger.debug("VCLCompilerImpl constructor start"); // Initialize the VCL API THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr); @@ -127,7 +144,7 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) { _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL %d.%d, " "\n but loaded VCL is %d.%d.\n" - "Will downwise to use the lastest plugin vcl compiler!!!", + "Will downwise to use the latest plugin vcl compiler!!!", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR, _vclVersion.major, @@ -138,12 +155,9 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm vcl_compiler_desc_t compilerDesc; compilerDesc.version = _vclVersion; compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast(Logger::global().level()) - 1); - vcl_device_desc_t device_desc; - device_desc.size = sizeof(vcl_device_desc_t); - device_desc.deviceID = 0x643E; // Value from intel_npu/src/backend/src/zero_device.cpp - device_desc.revision = -1; // -1 to skip the config - device_desc.tileCount = 5; // 1 as init value + vcl_device_desc_t device_desc; + setDeviceDesc(device_desc, device); THROW_ON_FAIL_FOR_VCL("vclCompilerCreate", vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle), @@ -210,8 +224,7 @@ std::string supportVclCompiler(int major, int minor) { return "unsupported VCL version"; } -NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, - const Config& config) const { +NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { _logger.debug("compile start"); const auto maxOpsetVersion = _compilerProperties.supportedOpsets; @@ -227,13 +240,14 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr() - : true, - updatedConfig.get()); + auto serializedIR = + driver_compiler_utils::serializeIR(model, + compilerVersion, + maxOpsetVersion, + updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) + ? updatedConfig.get() + : true, + updatedConfig.get()); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); @@ -251,25 +265,19 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr(VCL_COMPILER_VERSION_MAJOR) < _vclVersion.major) { - usedMajor = VCL_COMPILER_VERSION_MAJOR; - isDowngrade = true; + uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { + usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); + } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { + usedMajor = _vclVersion.major; + usedMinor = _vclVersion.minor; } - int usedMinor = isDowngrade ? VCL_COMPILER_VERSION_MINOR : _vclVersion.minor; - - _logger.info("[Debug] Used VCL API Version: %d.%d", usedMajor, usedMinor); - _logger.info("[Debug] compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); - _logger.info("[Debug] embedding compiler vcl version: %d.%d", - VCL_COMPILER_VERSION_MAJOR, - VCL_COMPILER_VERSION_MINOR); if (usedMajor >= 7 && usedMinor >= 4) { if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " "%d.%d, \n but loaded VCL is %d.%d.\n" - "Will downwise to form %s to use vclAllocatedExecutableCreate2", + "Will downgrade to form %s to use vclAllocatedExecutableCreate2", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR, _vclVersion.major, @@ -369,8 +377,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& network, - const Config& config) const { +intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, const Config& config) const { _logger.debug("parse start"); // VCL does not support parse, return empty metadata return intel_npu::NetworkMetadata(); @@ -415,29 +422,17 @@ std::vector VCLCompilerImpl::process_profiling_output(const s std::memcpy(layerInfo.data(), profOutput.data, profOutput.size); } - // profOutput.data = NULL; - // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle, - // VCL_PROFILING_TASK_LEVEL, &profOutput), logHandle); if (profOutput.data == NULL) { - // OPENVINO_THROW("Failed to get VCL profiling task level output"); - // } - - // profOutput.data = NULL; - // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle, - // VCL_PROFILING_RAW, &profOutput),logHandle); if (profOutput.data == NULL) { - // OPENVINO_THROW("Failed to get VCL profiling raw output"); - // } - THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle); - return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo); // Return processed profiling info + // Return processed profiling info + return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo); } uint32_t VCLCompilerImpl::get_version() const { return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor); } -ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, - const Config& config) const { +ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr& model, const Config& config) const { _logger.debug("query start"); const auto maxOpsetVersion = _compilerProperties.supportedOpsets; _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); @@ -452,13 +447,14 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr() - : true, - updatedConfig.get()); + auto serializedIR = + driver_compiler_utils::serializeIR(model, + compilerVersion, + maxOpsetVersion, + updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) + ? updatedConfig.get() + : true, + updatedConfig.get()); std::string buildFlags; buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 16e6e87ab63cf9..1ca5f8dba13872 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -153,6 +153,18 @@ static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& bas return COMPILER_TYPE::parse(it->second.as()); } + // if there is no compiler_type provided = use base_config value + // update the compilerType by device id: + // 3720 -> DRIVER + // 4000 and later -> MLIR (default value) + auto it_device = local_conf.find(std::string(DEVICE_ID::key())); + if (it_device != local_conf.end()) { + // if platform is provided by local config = use that + if (it_device->second.as() == ov::intel_npu::Platform::NPU3720) { + return ov::intel_npu::CompilerType::DRIVER; + } + } + // if there is no compiler_type provided = use base_config value // update the compilerType by platform: // 3720 -> DRIVER @@ -239,6 +251,20 @@ std::shared_ptr exclude_model_ptr_from_map(ov::AnyMap& properti return modelPtr; } +std::string getDeviceFromProperties(const std::map& propertiesMap) { + const std::string defaultDevice = "4000"; + auto it = propertiesMap.find(std::string(DEVICE_ID::key())); + if (it != propertiesMap.end()) { + return it->second; + } + + it = propertiesMap.find(std::string(PLATFORM::key())); + if (it != propertiesMap.end()) { + return it->second; + } + return defaultDevice; +} + } // namespace namespace intel_npu { @@ -608,11 +634,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< update_log_level(localPropertiesMap); // create compiler + std::string device_id = getDeviceFromProperties(localPropertiesMap); CompilerAdapterFactory compilerAdapterFactory; - auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); + auto compiler = + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties), device_id); OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); - auto localConfig = fork_local_config(localPropertiesMap, compiler); //FilteredConfig + auto localConfig = fork_local_config(localPropertiesMap, compiler); #ifndef VCL_FOR_COMPILER const auto set_cache_dir = localConfig.get(); @@ -733,7 +761,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< if (successfullyDebatched && localConfig.get() == ov::hint::PerformanceMode::LATENCY) { _logger.info("Override performance mode to THROUGHPUT for compilation"); - auto modifiedConfig = localConfig; // Copy only when needed, FilteredConfig + auto modifiedConfig = localConfig; // Copy only when needed std::stringstream strStream; strStream << ov::hint::PerformanceMode::THROUGHPUT; modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}}); @@ -908,8 +936,10 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& exclude_model_ptr_from_map(npu_plugin_properties); const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); + std::string device_id = getDeviceFromProperties(propertiesMap); + auto compiler = compilerAdapterFactory.getCompiler(_backend, + resolveCompilerType(_globalConfig, npu_plugin_properties), + device_id); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); _logger.setLevel(localConfig.get()); const auto platform = @@ -944,8 +974,10 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); + std::string device_id = getDeviceFromProperties(propertiesMap); + auto compiler = compilerAdapterFactory.getCompiler(_backend, + resolveCompilerType(_globalConfig, npu_plugin_properties), + device_id); OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config"); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime); From cbb8de0758692773b467601cf3be4f400217a96b Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Thu, 13 Nov 2025 18:24:17 +0800 Subject: [PATCH 06/25] remove ENABLE_VCL_FOR_COMPILER and update tile, default compilertype --- src/plugins/intel_npu/CMakeLists.txt | 5 - .../cmake/download_compiler_libs.cmake | 179 ------------------ src/plugins/intel_npu/cmake/features.cmake | 3 - .../al/include/intel_npu/config/options.hpp | 2 +- .../src/plugin_compiler_adapter.cpp | 64 +++---- .../src/compiler_adapter/src/vcl_api.cpp | 2 +- .../intel_npu/src/plugin/src/plugin.cpp | 2 - 7 files changed, 31 insertions(+), 226 deletions(-) delete mode 100644 src/plugins/intel_npu/cmake/download_compiler_libs.cmake diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt index 470801fb39bc10..8871512b85b848 100644 --- a/src/plugins/intel_npu/CMakeLists.txt +++ b/src/plugins/intel_npu/CMakeLists.txt @@ -18,11 +18,6 @@ set(NPU_PLUGIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) include(cmake/features.cmake) -if(ENABLE_VCL_FOR_COMPILER) - include(cmake/download_compiler_libs.cmake) - add_definitions("-DVCL_FOR_COMPILER") -endif() - set(CMAKE_CXX_STANDARD 17) if(ENABLE_NPU_DEBUG_CAPS) diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake deleted file mode 100644 index d8a664259299d7..00000000000000 --- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake +++ /dev/null @@ -1,179 +0,0 @@ -# Copyright (C) 2018-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 -# - -# Function to download and extract files -function(download_and_extract url zip_file extracted_dir modify_proxy) - # Check if the prebuilt VCL compiler libraries not exist - if(NOT EXISTS "${extracted_dir}") - if(modify_proxy STREQUAL "MODIFY") - # Update proxy to enable download for windows url - set(original_NO_PROXY $ENV{NO_PROXY}) - set(original_no_proxy $ENV{no_proxy}) - set(ENV{NO_PROXY} "") - set(ENV{no_proxy} "") - endif() - - # Download the prebuilt VCL compiler libraries, if failure, show error message and exit - message(STATUS "Downloading prebuilt VCL compiler libraries from ${url}") - file(DOWNLOAD "${url}" "${zip_file}" - TIMEOUT 3600 - LOG log_output - STATUS download_status - SHOW_PROGRESS) - - if(modify_proxy STREQUAL "MODIFY") - # Restore proxy - set(ENV{NO_PROXY} ${original_NO_PROXY}) - set(ENV{no_proxy} ${original_no_proxy}) - endif() - - list(GET download_status 0 download_result) - if(NOT download_result EQUAL 0) - message(FATAL_ERROR "Download failed!\nStatus: ${download_status}\nLog: ${log_output}") - else() - message(STATUS "Download completed: ${zip_file}") - endif() - - message(STATUS "Unzipping prebuilt VCL compiler libraries to ${extracted_dir}") - # Determine extraction method based on file extension - if("${zip_file}" MATCHES "\\.zip$") - file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}") - elseif("${zip_file}" MATCHES "\\.tar.gz$") - if(NOT EXISTS "${extracted_dir}") - file(MAKE_DIRECTORY "${extracted_dir}") - message(STATUS "Directory ${extracted_dir} created to unzip.") - endif() - execute_process(COMMAND tar -xzf "${zip_file}" -C "${extracted_dir}") - elseif("${zip_file}" MATCHES "\\.deb$") - execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}") - elseif("${zip_file}" MATCHES "\\.exe$") - set(WINRAR_PATHS - "C:/Program Files/WinRAR" - "C:/Program Files (x86)/WinRAR" - ) - - set(WINRAR_FOUND FALSE) - set(WINRAR_EXECUTABLE "") - - foreach(PATH ${WINRAR_PATHS}) - if(EXISTS "${PATH}/WinRAR.exe") - set(WINRAR_FOUND TRUE) - set(WINRAR_EXECUTABLE "${PATH}/WinRAR.exe") - break() - endif() - endforeach() - - if(WINRAR_FOUND) - message(STATUS "WinRAR found at: ${WINRAR_EXECUTABLE} and extract ${zip_file} to ${extracted_dir}") - file(MAKE_DIRECTORY "${extracted_dir}") - execute_process( - COMMAND "${WINRAR_EXECUTABLE}" x -y -o+ "${zip_file}" "${extracted_dir}" - RESULT_VARIABLE result - OUTPUT_VARIABLE output - ERROR_VARIABLE error - ) - - if(result EQUAL 0) - message(STATUS "Extraction successful: ${output}") - else() - #file(REMOVE_RECURSE "${extracted_dir}") - message(STATUS "Extraction failed: ${error}") - endif() - else() - message(FATAL_ERROR "WinRAR not found. Please install WinRAR to proceed.") - endif() - else() - message(FATAL_ERROR "Unsupported file extension for extraction: ${zip_file}") - endif() - file(REMOVE "${zip_file}") - else() - message(STATUS "Prebuilt VCL compiler libraries already exist, skip download") - endif() -endfunction() - -if(ENABLE_VCL_FOR_COMPILER) - if(ENABLE_SYSTEM_NPU_VCL_COMPILER) - message(STATUS "Using system NPU VCL compiler libraries, skip download") - else() - message(STATUS "Downloading prebuilt NPU VCL compiler libraries") - if(WIN32) - set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win") - set(VCL_COMPILER_LIBS_URL "https://github.com/openvinotoolkit/npu_compiler/releases/download/npu_ud_2025_38_rc4/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip") - set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip") - set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218") - - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY") - set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/cid/lib") - - configure_file( - ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll - ${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll - COPYONLY - ) - set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll") - file(COPY "${VCL_COMPILER_LIB}" - DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}") - message(STATUS "Not Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows") - else() - # Check if the operating system is Linux and not macOS - if(UNIX AND NOT APPLE) - # Get the OS name and version - execute_process(COMMAND lsb_release -is OUTPUT_VARIABLE OS_NAME OUTPUT_STRIP_TRAILING_WHITESPACE) - execute_process(COMMAND lsb_release -rs OUTPUT_VARIABLE OS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - - if(OS_NAME STREQUAL "Ubuntu") - if(OS_VERSION STREQUAL "22.04") - # Ubuntu 22.04-specific settings or actions - set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu22.04") - set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb") - set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb") - set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04") - - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") - - set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") - configure_file( - ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so - ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so - COPYONLY - ) - set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") - file(COPY "${VCL_COMPILER_LIB}" - DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") - message(STATUS "Not Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04") - elseif(OS_VERSION STREQUAL "24.04") - message(STATUS "This is Ubuntu 24.04") - # Ubuntu 24.04-specific settings or actions - set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu24.04") - set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb") - set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb") - set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04") - - download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE") - - set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu") - configure_file( - ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so - ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so - COPYONLY - ) - set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so") - # file(COPY "${VCL_COMPILER_LIB}" - # DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") - message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04") - else() - message(STATUS "This is another version of Ubuntu: ${OS_VERSION}") - # Other Ubuntu-specific settings or actions - endif() - else() - message(STATUS "This is a different Linux distribution: ${OS_NAME}, skip downloading prebuilt VCL compiler libraries") - # Other Linux-specific settings or actions - endif() - endif() - endif() - endif() - - install(FILES ${VCL_COMPILER_LIB} - DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT}) -endif() \ No newline at end of file diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake index 1f462c0e461806..4190b8415b87ad 100644 --- a/src/plugins/intel_npu/cmake/features.cmake +++ b/src/plugins/intel_npu/cmake/features.cmake @@ -11,6 +11,3 @@ if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS) endif() ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF) - -ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON) -ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index d9a533729eeeab..3005c4ae2ac634 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); -#ifdef VCL_FOR_COMPILER + + // if use vcl lib to compile, the metadata is empty and git the info from driver parser if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) { // If the metadata is empty, we can try to get it from the driver parser _logger.info("Metadata is empty, trying to get it from the driver parser"); networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); networkMeta.name = model->get_friendly_name(); + networkDesc.metadata = networkMeta; } -#endif + } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } + } else { + _logger.debug("no zeGraphExt, metadata is empty from vcl compiler"); } return std::make_shared( _zeGraphExt, _zeroInitStruct, graphDesc, -#ifdef VCL_FOR_COMPILER - std::move(networkMeta), -#else std::move(networkDesc.metadata), -#endif std::move(tensor), config, /* persistentBlob = */ true, // exporting the blob shall be available in such a scenario @@ -308,19 +301,26 @@ std::shared_ptr PluginCompilerAdapter::parse( std::vector network(mainBlob.get_byte_size()); GraphDescriptor mainGraphDesc; -#ifdef VCL_FOR_COMPILER - _logger.debug("parse metadata from driver for vcl compiler"); - if (_zeGraphExt) { - _logger.debug("parse start for vcl compiler"); - mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); - networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc); - } - _logger.debug("parse end for vcl compiler"); -#else _logger.debug("parse start"); network.assign(reinterpret_cast(mainBlob.data()), reinterpret_cast(mainBlob.data()) + mainBlob.get_byte_size()); networkMeta = _compiler->parse(network, config); + + if (_zeGraphExt) { + // if use vcl lib to compile, the metadata is empty and get the info from driver parser + if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) { + // If the metadata is empty, we can try to get it from the driver parser + _logger.info("Metadata is empty, trying to get it from the driver parser"); + networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc); + if (model) { + networkMeta.name = model->get_friendly_name(); + } else { + _logger.warning("networkMeta name is empty!"); + } + } + } else { + _logger.warning("no zeGraphExt, metadata is empty from vcl compiler."); + } network.clear(); network.shrink_to_fit(); @@ -329,7 +329,6 @@ std::shared_ptr PluginCompilerAdapter::parse( } _logger.debug("main schedule parse end"); -#endif // exporting the blob when we get it from cache or ov::hint::compiled_blob property // shall be available @@ -396,10 +395,12 @@ uint32_t PluginCompilerAdapter::get_version() const { } std::vector PluginCompilerAdapter::get_supported_options() const { -#ifdef VCL_FOR_COMPILER // For VCL, we can return the supported options from compiler VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); if (vclCompiler == nullptr) { + // If _compiler cannot cover to VCLCompilerImpl, it should use the mlir library. + // PluginCompiler has all the same options as plugin + // Returing empty string to let the plugin fallback to legacy registration _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options."); return {}; } @@ -424,20 +425,18 @@ std::vector PluginCompilerAdapter::get_supported_options() const { compilerOpts.push_back(option); } return compilerOpts; -#else - // PluginCompiler has all the same options as plugin - // Returing empty string to let the plugin fallback to legacy registration - return {}; -#endif } bool PluginCompilerAdapter::is_option_supported(std::string optname) const { -#ifdef VCL_FOR_COMPILER VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); if (vclCompiler == nullptr) { + // If _compiler cannot cover to VCLCompilerImpl, it should use the mlir library. + // This functions has no utility in PluginCompiler + // returning false for any request to avoid the option of spaming the plugin _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check."); return false; } + if (vclCompiler->is_option_supported(optname)) { _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str()); return true; @@ -445,11 +444,6 @@ bool PluginCompilerAdapter::is_option_supported(std::string optname) const { _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str()); return false; } -#else - // This functions has no utility in PluginCompiler - // returning false for any request to avoid the option of spaming the plugin - return false; -#endif } } // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 35637df12efccb..76a39de08a5247 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -116,7 +116,7 @@ const std::shared_ptr& VCLApi::getInstance() { void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) { std::unordered_map devicesDescsMap = { {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast(-1), 2}}, - {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast(-1), 5}}, + {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast(-1), 6}}, // For other devices, the tile configuration needs to be provided by the user. }; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 1ca5f8dba13872..d9cd543f24a176 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -642,7 +642,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); -#ifndef VCL_FOR_COMPILER const auto set_cache_dir = localConfig.get(); if (!set_cache_dir.empty()) { const auto compilerType = localConfig.get(); @@ -650,7 +649,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); } } -#endif const auto platform = utils::getCompilationPlatform(localConfig.get(), From 047aab18de6e1bc99b66f7629547a8007734da6d Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Thu, 13 Nov 2025 23:54:29 +0800 Subject: [PATCH 07/25] set device desc empty and auto parse in compile --- .../include/compiler_adapter_factory.hpp | 7 +++--- .../include/plugin_compiler_adapter.hpp | 2 +- .../src/compiler_adapter/include/vcl_api.hpp | 7 +++--- .../src/plugin_compiler_adapter.cpp | 9 ++++---- .../src/compiler_adapter/src/vcl_api.cpp | 23 +++---------------- .../intel_npu/src/plugin/src/plugin.cpp | 13 ++++------- 6 files changed, 19 insertions(+), 42 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp index ada0d47fa19ff3..32e1fb384668b2 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp @@ -15,14 +15,13 @@ namespace intel_npu { class CompilerAdapterFactory final { public: std::unique_ptr getCompiler(const ov::SoPtr& engineBackend, - const ov::intel_npu::CompilerType type, - std::string deviceID = "4000") const { + const ov::intel_npu::CompilerType type) const { switch (type) { case ov::intel_npu::CompilerType::PLUGIN: { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { - return std::make_unique(nullptr, deviceID); + return std::make_unique(nullptr); } - return std::make_unique(engineBackend->getInitStructs(), deviceID); + return std::make_unique(engineBackend->getInitStructs()); } case ov::intel_npu::CompilerType::DRIVER: { if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index f89d634c6491cf..5bc7c236e45a10 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -18,7 +18,7 @@ namespace intel_npu { class PluginCompilerAdapter final : public ICompilerAdapter { public: - PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, const std::string& deviceId); + PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct); std::shared_ptr compile(const std::shared_ptr& model, const FilteredConfig& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp index 54f65e8dc0260a..2eb451812e1f12 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp @@ -79,16 +79,15 @@ vcl_symbols_list(); vcl_weak_symbols_list(); #undef vcl_symbol_statement -void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device); std::string supportVclCompiler(int major, int minor); class VCLCompilerImpl final : public intel_npu::ICompiler { public: - VCLCompilerImpl(const std::string& device); + VCLCompilerImpl(); ~VCLCompilerImpl() override; - static std::shared_ptr getInstance(const std::string& device) { - std::shared_ptr compiler = std::make_shared(device); + static std::shared_ptr getInstance() { + static std::shared_ptr compiler = std::make_shared(); return compiler; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 509f8267ecbb6d..6db88441f76070 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -65,15 +65,14 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { namespace intel_npu { -PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct, - const std::string& deviceId) +PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr& zeroInitStruct) : _zeroInitStruct(zeroInitStruct), _logger("PluginCompilerAdapter", Logger::global().level()) { _logger.debug("initialize PluginCompilerAdapter start"); _logger.info("PLUGIN VCL compiler will be used."); try { - auto vclCompilerPtr = VCLCompilerImpl::getInstance(deviceId); + auto vclCompilerPtr = VCLCompilerImpl::getInstance(); auto vclLib = VCLApi::getInstance()->getLibrary(); if (vclCompilerPtr && vclLib) { _compiler = ov::SoPtr(vclCompilerPtr, vclLib); @@ -178,7 +177,7 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr()) { localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}}); } @@ -313,7 +312,7 @@ std::shared_ptr PluginCompilerAdapter::parse( _logger.info("Metadata is empty, trying to get it from the driver parser"); networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc); if (model) { - networkMeta.name = model->get_friendly_name(); + networkMeta.name = model.value()->get_friendly_name(); } else { _logger.warning("networkMeta name is empty!"); } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 76a39de08a5247..56342238e57675 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -113,24 +113,7 @@ const std::shared_ptr& VCLApi::getInstance() { return instance; } -void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) { - std::unordered_map devicesDescsMap = { - {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast(-1), 2}}, - {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast(-1), 6}}, - // For other devices, the tile configuration needs to be provided by the user. - }; - - auto it = devicesDescsMap.find(device); - if (it != devicesDescsMap.end()) { - device_desc = it->second; - } else { - device_desc = devicesDescsMap["4000"]; - } -} - -VCLCompilerImpl::VCLCompilerImpl(const std::string& device) - : _logHandle(nullptr), - _logger("VCLCompilerImpl", Logger::global().level()) { +VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) { _logger.debug("VCLCompilerImpl constructor start"); // Initialize the VCL API THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr); @@ -156,8 +139,8 @@ VCLCompilerImpl::VCLCompilerImpl(const std::string& device) compilerDesc.version = _vclVersion; compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast(Logger::global().level()) - 1); - vcl_device_desc_t device_desc; - setDeviceDesc(device_desc, device); + // Set device description as empty, the related info will be processed in compile phase if passed by user. + vcl_device_desc_t device_desc = {}; THROW_ON_FAIL_FOR_VCL("vclCompilerCreate", vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle), diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index d9cd543f24a176..632cbc62e76bdc 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -636,8 +636,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // create compiler std::string device_id = getDeviceFromProperties(localPropertiesMap); CompilerAdapterFactory compilerAdapterFactory; - auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties), device_id); + auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); @@ -935,9 +934,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); std::string device_id = getDeviceFromProperties(propertiesMap); - auto compiler = compilerAdapterFactory.getCompiler(_backend, - resolveCompilerType(_globalConfig, npu_plugin_properties), - device_id); + auto compiler = + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); _logger.setLevel(localConfig.get()); const auto platform = @@ -973,9 +971,8 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); std::string device_id = getDeviceFromProperties(propertiesMap); - auto compiler = compilerAdapterFactory.getCompiler(_backend, - resolveCompilerType(_globalConfig, npu_plugin_properties), - device_id); + auto compiler = + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config"); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime); From baedc2b2f88aa3654a1d2d89ebbdc06f5c6c4475 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 17 Nov 2025 09:45:10 +0800 Subject: [PATCH 08/25] update vcl compiler to openvino_intel_npu_compiler --- .../intel_npu/src/compiler_adapter/src/vcl_api.cpp | 12 ++++++------ src/plugins/intel_npu/src/plugin/src/plugin.cpp | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 56342238e57675..e54568c483806c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -66,11 +66,11 @@ static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) { } \ } -VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) { - const std::string baseName = "npu_vcl_compiler"; +VCLApi::VCLApi() : _logger("VCLApi", Logger::global().level()) { + const std::string baseName = "openvino_intel_npu_compiler"; try { auto libpath = ov::util::make_plugin_library_name({}, baseName); - _logger.debug("Try to load npu_vcl_compiler"); + _logger.debug("Try to load openvino_intel_npu_compiler"); #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32) this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str()); @@ -78,7 +78,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) { this->lib = ov::util::load_shared_object(libpath.c_str()); #endif } catch (const std::runtime_error& error) { - _logger.debug("Failed to load npu_vcl_compiler"); + _logger.debug("Failed to load openvino_intel_npu_compiler"); OPENVINO_THROW(error.what()); } @@ -88,7 +88,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) { vcl_symbols_list(); #undef vcl_symbol_statement } catch (const std::runtime_error& error) { - _logger.debug("Failed to get formal symbols from npu_vcl_compiler"); + _logger.debug("Failed to get formal symbols from openvino_intel_npu_compiler"); OPENVINO_THROW(error.what()); } @@ -96,7 +96,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) { try { \ this->vcl_symbol = reinterpret_cast(ov::util::get_symbol(lib, #vcl_symbol)); \ } catch (const std::runtime_error&) { \ - _logger.debug("Failed to get %s from npu_vcl_compiler", #vcl_symbol); \ + _logger.debug("Failed to get %s from openvino_intel_npu_compiler", #vcl_symbol); \ this->vcl_symbol = nullptr; \ } vcl_weak_symbols_list(); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 632cbc62e76bdc..f2575f3f41b45d 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -634,7 +634,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< update_log_level(localPropertiesMap); // create compiler - std::string device_id = getDeviceFromProperties(localPropertiesMap); CompilerAdapterFactory compilerAdapterFactory; auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); From 8f3f570112ef7207550495a123ab809d09dc88dc Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 17 Nov 2025 11:46:47 +0800 Subject: [PATCH 09/25] fix ie mdk issue for compilerType Inconsistency issues for 3720 --- .../src/plugin_compiler_adapter.cpp | 4 +- .../intel_npu/src/plugin/src/plugin.cpp | 81 ++++++++++--------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 312a6947afa787..4bb069a463eb80 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -70,10 +70,11 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptrgetLibrary(); + _logger.info("PLUGIN VCL compiler is loading"); if (vclCompilerPtr && vclLib) { _compiler = ov::SoPtr(vclCompilerPtr, vclLib); } else { @@ -123,7 +124,6 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr& propertiesMap) { } } -static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) { - // first look if provided config changes compiler type - auto it = local_conf.find(std::string(COMPILER_TYPE::key())); - if (it != local_conf.end()) { - // if compiler_type is provided by local config = use that - return COMPILER_TYPE::parse(it->second.as()); +std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { + const std::string defaultDevice = std::string(ov::intel_npu::Platform::NPU4000); + auto it = propertiesMap.find(std::string(DEVICE_ID::key())); + if (it != propertiesMap.end()) { + return it->second.as(); } - // if there is no compiler_type provided = use base_config value - // update the compilerType by device id: - // 3720 -> DRIVER - // 4000 and later -> MLIR (default value) - auto it_device = local_conf.find(std::string(DEVICE_ID::key())); - if (it_device != local_conf.end()) { - // if platform is provided by local config = use that - if (it_device->second.as() == ov::intel_npu::Platform::NPU3720) { - return ov::intel_npu::CompilerType::DRIVER; - } + it = propertiesMap.find(std::string(PLATFORM::key())); + if (it != propertiesMap.end()) { + return it->second.as(); } + return defaultDevice; +} - // if there is no compiler_type provided = use base_config value - // update the compilerType by platform: +void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) { + // if there is no compiler_type provided, use base_config value, check and update by the device + // update the compilerType by device: // 3720 -> DRIVER - // 4000 and later -> MLIR (default value) - auto it_platform = local_conf.find(std::string(PLATFORM::key())); - if (it_platform != local_conf.end()) { + // 4000 and later -> MLIR + auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key())); + if (it_compiler_type == propertiesMap.end()) { // if platform is provided by local config = use that - if (it_platform->second.as() == ov::intel_npu::Platform::NPU3720) { - return ov::intel_npu::CompilerType::DRIVER; + const ov::AnyMap localProperties = propertiesMap; + std::string getdevice = getDeviceFromProperties(localProperties); + if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) { + if(base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { + log.warning( + "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " + "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); + } + // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user + propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); } } +} + +static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) { + // first look if provided config changes compiler type + auto it = local_conf.find(std::string(COMPILER_TYPE::key())); + if (it != local_conf.end()) { + // if compiler_type is provided by local config = use that + return COMPILER_TYPE::parse(it->second.as()); + } + // if there is no compiler_type provided = use base_config value return base_conf.get(); } @@ -250,20 +263,6 @@ std::shared_ptr exclude_model_ptr_from_map(ov::AnyMap& properti return modelPtr; } -std::string getDeviceFromProperties(const std::map& propertiesMap) { - const std::string defaultDevice = "4000"; - auto it = propertiesMap.find(std::string(DEVICE_ID::key())); - if (it != propertiesMap.end()) { - return it->second; - } - - it = propertiesMap.find(std::string(PLATFORM::key())); - if (it != propertiesMap.end()) { - return it->second; - } - return defaultDevice; -} - } // namespace namespace intel_npu { @@ -663,6 +662,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // activate the NPUW path auto useNpuwKey = ov::intel_npu::use_npuw.name(); ov::AnyMap localProperties = properties; + if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); @@ -678,6 +678,9 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< _logger.warning("Model received in config will be ignored as it was already provided by parameter."); } + // For 3720, need check and update its compiler_type + checkUpdateforspecialPlatform(_globalConfig, localProperties, _logger); + const std::map localPropertiesMap = any_copy(localProperties); update_log_level(localPropertiesMap); @@ -980,7 +983,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& exclude_model_ptr_from_map(npu_plugin_properties); const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - std::string device_id = getDeviceFromProperties(propertiesMap); + checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); @@ -1017,7 +1020,7 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - std::string device_id = getDeviceFromProperties(propertiesMap); + checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); From 4f1aa39d816ad8d6c934ee05b762bb596de8b55b Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 17 Nov 2025 15:59:15 +0800 Subject: [PATCH 10/25] fix clang-format --- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 2 +- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 4bb069a463eb80..39531e33152a4c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -135,7 +135,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptr( diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 2016029ecc0bed..f52fdbcb3928fa 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -158,7 +158,7 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { return defaultDevice; } -void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) { +void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) { // if there is no compiler_type provided, use base_config value, check and update by the device // update the compilerType by device: // 3720 -> DRIVER @@ -169,13 +169,14 @@ void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& const ov::AnyMap localProperties = propertiesMap; std::string getdevice = getDeviceFromProperties(localProperties); if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) { - if(base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { + if (base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { log.warning( "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); } // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user - propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); + propertiesMap[std::string(COMPILER_TYPE::key())] = + COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); } } } @@ -662,7 +663,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // activate the NPUW path auto useNpuwKey = ov::intel_npu::use_npuw.name(); ov::AnyMap localProperties = properties; - if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); From 1a60dfa947c956bd307cd3b6de82e6db5427da7a Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 17 Nov 2025 23:37:22 +0800 Subject: [PATCH 11/25] fix unit test --- .../src/plugin_compiler_adapter.cpp | 19 ++++++++----------- .../intel_npu/src/plugin/src/plugin.cpp | 12 ++++++++---- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 39531e33152a4c..4f4e8e285d3db7 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -292,16 +292,19 @@ std::shared_ptr PluginCompilerAdapter::parse( const std::optional>& model) const { OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse"); - NetworkMetadata networkMeta; - std::vector network(mainBlob.get_byte_size()); - GraphDescriptor mainGraphDesc; - _logger.debug("parse start"); + std::vector network(mainBlob.get_byte_size()); network.assign(reinterpret_cast(mainBlob.data()), reinterpret_cast(mainBlob.data()) + mainBlob.get_byte_size()); - networkMeta = _compiler->parse(network, config); + auto networkMeta = _compiler->parse(network, config); + network.clear(); + network.shrink_to_fit(); + + GraphDescriptor mainGraphDesc; if (_zeGraphExt) { + mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); + // if use vcl lib to compile, the metadata is empty and get the info from driver parser if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) { // If the metadata is empty, we can try to get it from the driver parser @@ -316,12 +319,6 @@ std::shared_ptr PluginCompilerAdapter::parse( } else { _logger.warning("no zeGraphExt, metadata is empty from vcl compiler."); } - network.clear(); - network.shrink_to_fit(); - - if (_zeGraphExt) { - mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); - } _logger.debug("main schedule parse end"); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index f52fdbcb3928fa..77757f0e9a4c10 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -164,6 +164,7 @@ void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& // 3720 -> DRIVER // 4000 and later -> MLIR auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key())); + // if user set compilerType, will not update auto if (it_compiler_type == propertiesMap.end()) { // if platform is provided by local config = use that const ov::AnyMap localProperties = propertiesMap; @@ -663,6 +664,7 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // activate the NPUW path auto useNpuwKey = ov::intel_npu::use_npuw.name(); ov::AnyMap localProperties = properties; + if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); @@ -871,6 +873,7 @@ ov::SoPtr Plugin::get_default_context(const ov::AnyMap& remo return std::make_shared(_backend); } +// duo std::shared_ptr Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); @@ -907,9 +910,10 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c OPENVINO_THROW("Blob size is too large to be represented on a std::streamsize!"); } stream.read(tensor.data(), static_cast(blobSize)); + std::cout << "=======just to check issue========" << std ::endl; return parse(tensor, std::move(metadata), npu_plugin_properties); } catch (const std::exception& ex) { - OPENVINO_THROW("Can't import network: ", ex.what()); + OPENVINO_THROW("Can't import network: ", ex.what()); /// get issue message } catch (...) { OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); } @@ -981,9 +985,9 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& CompilerAdapterFactory compilerAdapterFactory; auto npu_plugin_properties = properties; exclude_model_ptr_from_map(npu_plugin_properties); + checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); @@ -1016,11 +1020,11 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, // ov::hint::model has no corresponding "Config" implementation thus we need to remove it from the // list of properties auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties); - + checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); + auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); From 21ea3961e73aa252b643bd02274877e1fe24f4e7 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Tue, 18 Nov 2025 10:19:49 +0800 Subject: [PATCH 12/25] fix SERIALIZATION_WEIGHTS_SIZE_THRESHOLD undeclared identifiers --- .../intel_npu/src/compiler_adapter/src/vcl_api.cpp | 8 ++++---- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 12 ++++-------- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index e54568c483806c..6cb2e5b2fdacea 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -228,9 +228,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr() + ? updatedConfig.get() : true, - updatedConfig.get()); + updatedConfig.get()); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); @@ -435,9 +435,9 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr() + ? updatedConfig.get() : true, - updatedConfig.get()); + updatedConfig.get()); std::string buildFlags; buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 77757f0e9a4c10..e361946f853e38 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -159,12 +159,12 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { } void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) { - // if there is no compiler_type provided, use base_config value, check and update by the device + // If there is no compiler_type provided, use base_config value, check and update by the device // update the compilerType by device: // 3720 -> DRIVER // 4000 and later -> MLIR auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key())); - // if user set compilerType, will not update auto + // If user set compilerType, will not update by device if (it_compiler_type == propertiesMap.end()) { // if platform is provided by local config = use that const ov::AnyMap localProperties = propertiesMap; @@ -664,7 +664,6 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< // activate the NPUW path auto useNpuwKey = ov::intel_npu::use_npuw.name(); ov::AnyMap localProperties = properties; - if (localProperties.count(useNpuwKey)) { if (localProperties.at(useNpuwKey).as() == true) { return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties); @@ -873,7 +872,6 @@ ov::SoPtr Plugin::get_default_context(const ov::AnyMap& remo return std::make_shared(_backend); } -// duo std::shared_ptr Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const { OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model"); @@ -910,10 +908,9 @@ std::shared_ptr Plugin::import_model(std::istream& stream, c OPENVINO_THROW("Blob size is too large to be represented on a std::streamsize!"); } stream.read(tensor.data(), static_cast(blobSize)); - std::cout << "=======just to check issue========" << std ::endl; return parse(tensor, std::move(metadata), npu_plugin_properties); } catch (const std::exception& ex) { - OPENVINO_THROW("Can't import network: ", ex.what()); /// get issue message + OPENVINO_THROW("Can't import network: ", ex.what()); } catch (...) { OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel"); } @@ -1021,13 +1018,12 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, // list of properties auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties); checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); + CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); - auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); - OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config"); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime); _logger.setLevel(localConfig.get()); From da62ff89b09c8726a980b3c4f45569c7a003ec34 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Tue, 18 Nov 2025 11:47:11 +0800 Subject: [PATCH 13/25] revert namespace for SERIALIZATION_WEIGHTS_SIZE_THRESHOLD --- .../intel_npu/src/compiler_adapter/src/vcl_api.cpp | 8 ++++---- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 6cb2e5b2fdacea..e54568c483806c 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -228,9 +228,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr() + ? updatedConfig.get() : true, - updatedConfig.get()); + updatedConfig.get()); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); @@ -435,9 +435,9 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr() + ? updatedConfig.get() : true, - updatedConfig.get()); + updatedConfig.get()); std::string buildFlags; buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index e361946f853e38..3c9b0ea4dffb98 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -987,6 +987,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& update_log_level(propertiesMap); auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); + auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); _logger.setLevel(localConfig.get()); const auto platform = From 800c78e00a0c0b79e3ac022f80a02bd2726e6f7c Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Tue, 18 Nov 2025 14:26:28 +0800 Subject: [PATCH 14/25] remove USE_BASE_MODEL_SERIALIZER option --- .../intel_npu/src/compiler_adapter/src/vcl_api.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index e54568c483806c..d46f221494fe30 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -228,9 +228,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr() - : true, - updatedConfig.get()); + ? config.get() + : true); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); @@ -435,9 +434,8 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr() - : true, - updatedConfig.get()); + ? config.get() + : true); std::string buildFlags; buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); From acafdc663945e821b37e68ac7076682becd79403 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Tue, 18 Nov 2025 14:46:03 +0800 Subject: [PATCH 15/25] fix 3720 platfrom compilerType issue and metadata name issue --- .../src/plugin_compiler_adapter.cpp | 7 +- .../intel_npu/src/plugin/src/plugin.cpp | 125 +++++++++++++----- 2 files changed, 97 insertions(+), 35 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 4f4e8e285d3db7..dc817bf3c76e0d 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -130,6 +130,7 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); + networkMeta.name = model->get_friendly_name(); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); @@ -390,7 +391,7 @@ std::vector PluginCompilerAdapter::get_supported_options() const { // For VCL, we can return the supported options from compiler VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); if (vclCompiler == nullptr) { - // If _compiler cannot cover to VCLCompilerImpl, it should use the mlir library. + // If _compiler cannot be cast to VCLCompilerImpl, it should use the mlir library. // PluginCompiler has all the same options as plugin // Returing empty string to let the plugin fallback to legacy registration _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options."); @@ -422,9 +423,9 @@ std::vector PluginCompilerAdapter::get_supported_options() const { bool PluginCompilerAdapter::is_option_supported(std::string optname) const { VCLCompilerImpl* vclCompiler = dynamic_cast(_compiler.operator->()); if (vclCompiler == nullptr) { - // If _compiler cannot cover to VCLCompilerImpl, it should use the mlir library. + // If _compiler cannot be cast to VCLCompilerImpl, it should use the mlir library. // This functions has no utility in PluginCompiler - // returning false for any request to avoid the option of spaming the plugin + // returning false for any request to avoid the option of spamming the plugin _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check."); return false; } diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index d7218e4ef4a8f4..b14c272ece5cba 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -145,7 +145,7 @@ void update_log_level(const std::map& propertiesMap) { } std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { - const std::string defaultDevice = std::string(ov::intel_npu::Platform::NPU4000); + const std::string defaultDevice = ""; auto it = propertiesMap.find(std::string(DEVICE_ID::key())); if (it != propertiesMap.end()) { return it->second.as(); @@ -153,43 +153,95 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { it = propertiesMap.find(std::string(PLATFORM::key())); if (it != propertiesMap.end()) { - return it->second.as(); + auto platformStr = it->second.as(); + if (platformStr == ov::intel_npu::Platform::AUTO_DETECT) { + return defaultDevice; + } + + platformStr = utils::getPlatformByDeviceName(platformStr); + platformStr = ov::intel_npu::Platform::standardize(platformStr); + return platformStr; } return defaultDevice; } -void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) { - // If there is no compiler_type provided, use base_config value, check and update by the device - // update the compilerType by device: +void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf, + ov::AnyMap& propertiesMap, + const std::string& deviceName, + Logger& log) { + // If there is no compiler_type provided, use base_config default value + // Default compilerType for different platform is up to device: // 3720 -> DRIVER - // 4000 and later -> MLIR - auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key())); - // If user set compilerType, will not update by device - if (it_compiler_type == propertiesMap.end()) { - // if platform is provided by local config = use that - const ov::AnyMap localProperties = propertiesMap; - std::string getdevice = getDeviceFromProperties(localProperties); - if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) { - if (base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { - log.warning( - "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " - "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); - } - // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user - propertiesMap[std::string(COMPILER_TYPE::key())] = - COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); - } + // 4000 and later -> default + + // If user set compilerType in config, will not update by device + auto it = propertiesMap.find(std::string(COMPILER_TYPE::key())); + if(it != propertiesMap.end()) { + return; + } + + std::string getDevice = getDeviceFromProperties(propertiesMap); + + if (deviceName.empty() && getDevice.empty()) { + OPENVINO_THROW("Device name is empty!"); + } + + std::string usedDevice = deviceName; + if (deviceName != getDevice) { + log.info("The device from properties '%s' is different from the actual device '%s', use device '%s' to check " + "compiler_type.", + getDevice.c_str(), + deviceName.c_str(), + deviceName.c_str()); + + usedDevice = deviceName.empty() ? getDevice : deviceName; + } + + // If the platform is not 3720, will not update by device + if (usedDevice != std::string(ov::intel_npu::Platform::NPU3720)) { + return; } + + if (base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { + log.warning( + "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " + "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); + } + + // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user + propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); + + return; } -static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) { +static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, + const ov::AnyMap& local_conf, + const std::string& deviceName) { // first look if provided config changes compiler type auto it = local_conf.find(std::string(COMPILER_TYPE::key())); if (it != local_conf.end()) { // if compiler_type is provided by local config = use that return COMPILER_TYPE::parse(it->second.as()); } - // if there is no compiler_type provided = use base_config value + // if there is no compiler_type provided = use base_config value and update default vaule by platform if needed + // Default compilerType for different platform is up to device: + // 3720 -> DRIVER + // 4000 and later -> default + if (!deviceName.empty()) { + if (deviceName == std::string(ov::intel_npu::Platform::NPU3720)) { + return ov::intel_npu::CompilerType::DRIVER; + } + } else { + std::string getdevice = getDeviceFromProperties(local_conf); + if (getdevice == std::string(ov::intel_npu::Platform::NPU3720)) { + return ov::intel_npu::CompilerType::DRIVER; + } + if (getdevice == std::string(ov::intel_npu::Platform::AUTO_DETECT)) { + Logger::global().warning("Device is set to AUTO_DETECT, cannot decide the default compiler_type by device, " + "use the default compiler_type."); + } + } + return base_conf.get(); } @@ -679,14 +731,16 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< } // For 3720, need check and update its compiler_type - checkUpdateforspecialPlatform(_globalConfig, localProperties, _logger); - + auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice(); + std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : ""; + checkUpdateforSpecialPlatform(_globalConfig, localProperties, deviceName, _logger); const std::map localPropertiesMap = any_copy(localProperties); update_log_level(localPropertiesMap); // create compiler CompilerAdapterFactory compilerAdapterFactory; - auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); + auto compiler = + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, localProperties, deviceName)); OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); @@ -981,12 +1035,14 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& CompilerAdapterFactory compilerAdapterFactory; auto npu_plugin_properties = properties; exclude_model_ptr_from_map(npu_plugin_properties); - checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); + auto device = _backend == nullptr ? nullptr : _backend->getDevice(); + std::string deviceName = device != nullptr ? device->getName() : ""; + checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger); const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); - + compilerAdapterFactory.getCompiler(_backend, + resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName)); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); _logger.setLevel(localConfig.get()); const auto platform = @@ -1017,13 +1073,18 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, // ov::hint::model has no corresponding "Config" implementation thus we need to remove it from the // list of properties auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties); - checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger); + + auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice(); + std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : ""; + checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger); CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); + compilerAdapterFactory.getCompiler(_backend, + resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName)); + OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config"); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime); _logger.setLevel(localConfig.get()); From 0c9ae289cd12971f97ab9b9ef2d83d3c72cab31f Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Fri, 21 Nov 2025 12:14:54 +0800 Subject: [PATCH 16/25] update to use vcl serializer --- .../src/compiler_adapter/include/vcl_api.hpp | 2 + .../src/compiler_adapter/src/vcl_api.cpp | 121 +++++++++++++----- .../intel_npu/src/plugin/src/plugin.cpp | 7 +- 3 files changed, 96 insertions(+), 34 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp index 2eb451812e1f12..b0f040be32cc33 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp @@ -6,6 +6,7 @@ #include +#include "intel_npu/common/filtered_config.hpp" #include "intel_npu/icompiler.hpp" #include "npu_driver_compiler.h" #include "openvino/core/except.hpp" @@ -79,6 +80,7 @@ vcl_symbols_list(); vcl_weak_symbols_list(); #undef vcl_symbol_statement +bool isUseBaseModelSerializer(const FilteredConfig& config); std::string supportVclCompiler(int major, int minor); class VCLCompilerImpl final : public intel_npu::ICompiler { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index d46f221494fe30..1e102343a0b4c6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -4,7 +4,6 @@ #include "vcl_api.hpp" -#include "intel_npu/common/filtered_config.hpp" #include "intel_npu/config/options.hpp" #include "intel_npu/npu_private_properties.hpp" #include "intel_npu/profiling.hpp" @@ -125,9 +124,9 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm _logger.info("Use Lib VCL version to create compiler"); if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major || (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) { - _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL %d.%d, " + _logger.warning("inside supported VCL version is lower than loaded VCL api:\n plugin was built with VCL %d.%d, " "\n but loaded VCL is %d.%d.\n" - "Will downwise to use the latest plugin vcl compiler!!!", + "Will downgrade to use the latest plugin vcl compiler!!!", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR, _vclVersion.major, @@ -196,6 +195,24 @@ struct vcl_allocator_malloc { } }; +bool isUseBaseModelSerializer(const FilteredConfig& config) { + // user pass use_base_model_serializer config + if (config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) && + config.has(ov::intel_npu::use_base_model_serializer.name())) { + return config.get(); + } + + // user pass model_serializer_version config + if (config.isAvailable(ov::intel_npu::model_serializer_version.name()) && + config.has(ov::intel_npu::use_base_model_serializer.name())) { + return (config.get() == + ov::intel_npu::ModelSerializerVersion::ALL_WEIGHTS_COPY); + } + + // vcl serializer method is not set by user, will default to use it. + return false; +} + std::string supportVclCompiler(int major, int minor) { if (major >= 7 && minor >= 4) { return "vclAllocatedExecutableCreate2"; @@ -204,12 +221,21 @@ std::string supportVclCompiler(int major, int minor) { } else { return "vclExecutableCreate"; } - return "unsupported VCL version"; } NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { _logger.debug("compile start"); + /// Check the linked vcl version whether supported in plugin + uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { + usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); + } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { + usedMajor = _vclVersion.major; + usedMinor = _vclVersion.minor; + } + _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor); + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); @@ -223,13 +249,31 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr= 7.5 + if (usedMajor >= 7 && usedMinor >= 5) { + useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + } + + if (useBaseModelSerializer) { + _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer); + } else { + _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer); + + // To resolve the issue with the default configuration where no user passes the serializer config, the VCL + // serializer will be used as the default in the plugin adapter. You need to pass the serializer config; + // otherwise, you will encounter a deserialization issue within the compiler. + _logger.warning("Add serializer config"); + if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) { + updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}}); + } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) { + updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}}); + } + } + auto serializedIR = - driver_compiler_utils::serializeIR(model, - compilerVersion, - maxOpsetVersion, - updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) - ? config.get() - : true); + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); std::string buildFlags; const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9)); @@ -237,7 +281,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { - usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); - } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { - usedMajor = _vclVersion.major; - usedMinor = _vclVersion.minor; - } - if (usedMajor >= 7 && usedMinor >= 4) { if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) { _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL " @@ -268,7 +303,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr& model, const Config& config) const { _logger.debug("query start"); + + /// Check the linked vcl version whether supported in plugin + uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR; + if (static_cast(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) { + usedMinor = std::min(static_cast(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor); + } else if (static_cast(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) { + usedMajor = _vclVersion.major; + usedMinor = _vclVersion.minor; + } + _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor); + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); @@ -428,17 +474,32 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr= 7.5 + if (usedMajor >= 7 && usedMinor >= 5) { + useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + } + if (useBaseModelSerializer) { + _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer); + } else { + _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer); + + // To resolve the issue with the default configuration where no user passes the serializer config, the VCL + // serializer will be used as the default in the plugin adapter. You need to pass the serializer config; + // otherwise, you will encounter a deserialization issue within the compiler. + _logger.warning("Add serializer config"); + if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) { + updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}}); + } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) { + updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}}); + } + } auto serializedIR = - driver_compiler_utils::serializeIR(model, - compilerVersion, - maxOpsetVersion, - updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) - ? config.get() - : true); + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); std::string buildFlags; - buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); + buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion); _logger.debug("queryImpl build flags : %s", buildFlags.c_str()); vcl_query_handle_t queryHandle; diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index b1ba60cdf215c1..007b63ae9f6698 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -176,7 +176,7 @@ void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf, // If user set compilerType in config, will not update by device auto it = propertiesMap.find(std::string(COMPILER_TYPE::key())); - if(it != propertiesMap.end()) { + if (it != propertiesMap.end()) { return; } @@ -203,9 +203,8 @@ void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf, } if (base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { - log.warning( - "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " - "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); + log.warning("Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " + "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); } // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user From b363ff6d69b5dd3e65ae8f1194aeb68450d5f9b9 Mon Sep 17 00:00:00 2001 From: "Kang, Wenjing" Date: Fri, 21 Nov 2025 22:31:09 +0800 Subject: [PATCH 17/25] Add compileWsOneShot and compileWsIterative for VCLCompilerImpl Signed-off-by: Kang, Wenjing --- .../intel_npu/common/icompiler_adapter.hpp | 35 +++++ .../include/npu_driver_compiler.h | 6 +- .../src/compiler_adapter/include/vcl_api.hpp | 10 +- .../src/driver_compiler_adapter.cpp | 33 ----- .../src/compiler_adapter/src/graph.cpp | 4 + .../src/plugin_compiler_adapter.cpp | 134 ++++++++++-------- .../src/compiler_adapter/src/vcl_api.cpp | 94 +++++++++++- .../compiler_adapter/src/weightless_graph.cpp | 4 + 8 files changed, 220 insertions(+), 100 deletions(-) diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index f41ceef203ad60..1ed01edb67ac4d 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -6,6 +6,9 @@ #include "intel_npu/common/filtered_config.hpp" #include "intel_npu/common/igraph.hpp" +#include "openvino/core/model.hpp" +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" +#include "openvino/op/constant.hpp" namespace intel_npu { @@ -56,6 +59,38 @@ class ICompilerAdapter { virtual std::vector get_supported_options() const = 0; virtual bool is_option_supported(std::string optname) const = 0; + /** + * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon + * serialization. + * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information + * regarding the offset of the weights within the binary file, as well as the original size and precision. This + * information is required within the "weights separation" flow, therefore this function is here to store it. + * @note Not calling this function in the weights separation flow would lead to this information being lost upon + * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent + * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be + * misinformed and lookups of weights offsets could fail. + * + * @param model Both source and target. + */ + void storeWeightlessCacheAttribute(const std::shared_ptr& model) const { + size_t constantId = 0; + for (auto&& node : model->get_ordered_ops()) { + if (ov::is_type(node)) { + ov::RTMap& runtimeInfoMap = node->get_rt_info(); + const auto& weightlessCacheAttrIt = + runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); + + const std::string constantIdString = std::to_string(constantId++); + if (weightlessCacheAttrIt != runtimeInfoMap.end()) { + auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); + model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); + } + } + } + } + virtual ~ICompilerAdapter() = default; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h index a8c38506fc844c..e7f3d3bee21010 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h +++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h @@ -23,7 +23,7 @@ extern "C" { #endif #define VCL_COMPILER_VERSION_MAJOR 7 -#define VCL_COMPILER_VERSION_MINOR 4 +#define VCL_COMPILER_VERSION_MINOR 6 #define VCL_PROFILING_VERSION_MAJOR 2 #define VCL_PROFILING_VERSION_MINOR 0 @@ -272,6 +272,10 @@ VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compile uint8_t** blobBuffer, uint64_t* blobSize); +VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler, + vcl_executable_desc_t desc, + vcl_allocator2_t* allocator); + /////////////////////////////////////////////////////////////////////////////// /// @brief Destroys the executable and releases the cached blob. VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable); diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp index b0f040be32cc33..7163beada5322a 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp @@ -37,7 +37,8 @@ namespace intel_npu { #define vcl_weak_symbols_list() \ vcl_symbol_statement(vclAllocatedExecutableCreate2) \ vcl_symbol_statement(vclGetCompilerSupportedOptions) \ - vcl_symbol_statement(vclGetCompilerIsOptionSupported) + vcl_symbol_statement(vclGetCompilerIsOptionSupported) \ + vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot) // clang-format on class VCLApi { @@ -95,6 +96,13 @@ class VCLCompilerImpl final : public intel_npu::ICompiler { NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override; + std::vector> compileWsOneShot(const std::shared_ptr& model, + const Config& config) const override; + + NetworkDescription compileWsIterative(const std::shared_ptr& model, + const Config& config, + size_t callNumber) const override; + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; NetworkMetadata parse(const std::vector& network, const Config& config) const override; diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 15d0091e73bd4f..4b11610d56b7a3 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -13,7 +13,6 @@ #include "intel_npu/utils/logger/logger.hpp" #include "mem_usage.hpp" #include "openvino/core/model.hpp" -#include "openvino/core/rt_info/weightless_caching_attributes.hpp" #include "vcl_serializer.hpp" #include "weightless_graph.hpp" @@ -26,38 +25,6 @@ bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) { return networkMetadata.inputs.at(0).isInitInputWeights; } -/** - * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon - * serialization. - * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information - * regarding the offset of the weights within the binary file, as well as the original size and precision. This - * information is required within the "weights separation" flow, therefore this function is here to store it. - * @note Not calling this function in the weights separation flow would lead to this information being lost upon - * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent - * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be - * misinformed and lookups of weights offsets could fail. - * - * @param model Both source and target. - */ -void storeWeightlessCacheAttribute(const std::shared_ptr& model) { - size_t constantId = 0; - for (auto&& node : model->get_ordered_ops()) { - if (ov::is_type(node)) { - ov::RTMap& runtimeInfoMap = node->get_rt_info(); - const auto& weightlessCacheAttrIt = - runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); - - const std::string constantIdString = std::to_string(constantId++); - if (weightlessCacheAttrIt != runtimeInfoMap.end()) { - auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); - model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); - } - } - } -} - /** * @brief On-going migration from "use_base_model_serializer" to "model_serializer_version". So we have to check both, * depending on which one is supported by the compiler. diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp index 9ace471ea66ad4..a6d1d6bcc68f95 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp @@ -157,6 +157,10 @@ void Graph::set_argument_value(uint32_t argi, const void* argv) const { } void Graph::initialize(const Config& config) { + if (!_zeroInitStruct) { + _logger.warning("_zeroInitStruct is nullptr!"); + return; + } _logger.debug("Graph initialize start"); if (_zeGraphExt == nullptr || _graphDesc._handle == nullptr) { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index f5928cb22f2a1b..4fd516f9b642bd 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -61,6 +61,13 @@ ov::Tensor make_tensor_from_vector(std::vector& vector) { return ov::make_tensor(impl); } +bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) { + if (networkMetadata.inputs.size() == 0) { + return false; + } + return networkMetadata.inputs.at(0).isInitInputWeights; +} + } // namespace namespace intel_npu { @@ -154,23 +161,12 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr> initNetworkDescriptions; - std::shared_ptr mainNetworkDescription; + // OPENVINO_ASSERT(_zeGraphExt); + storeWeightlessCacheAttribute(model); _logger.debug("compile start"); - const auto starts_with = [](const std::string& str, const std::string& prefix) { - return str.substr(0, prefix.size()) == prefix; - }; - const auto isInit = [&](std::string name) { - return starts_with(name, "init"); - }; - - const auto isMain = [&](std::string name) { - return starts_with(name, "main"); - }; - - Config localConfig = config; + FilteredConfig localConfig = config; if (!localConfig.has()) { localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}}); } @@ -182,37 +178,87 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr= ov::log::Level::INFO) { compile_model_mem_start = get_peak_memory_usage(); } + + std::vector initGraphDescriptors; + std::vector tensorsInits; + std::vector initNetworkMetadata; + std::vector> initNetworkDescriptions; + + ov::Tensor tensorMain; + GraphDescriptor mainGraphDesc; + NetworkMetadata mainNetworkMetadata; + std::shared_ptr mainNetworkDescription; + switch (localConfig.get()) { case ov::intel_npu::WSVersion::ONE_SHOT: { std::vector> initMainNetworkDescriptions = _compiler->compileWsOneShot(model, localConfig); -#if 0 // TODO: it is not clear whether we should change the name - OPENVINO_ASSERT(isMain(initMainNetworkDescriptions.back()->metadata.name), - "Unexpected network name for main:", - initMainNetworkDescriptions.back()->metadata.name); -#endif - mainNetworkDescription = initMainNetworkDescriptions.back(); initMainNetworkDescriptions.pop_back(); initNetworkDescriptions = std::move(initMainNetworkDescriptions); + + tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork); + if (_zeGraphExt) { + // Depending on the config, we may get an error when trying to + // get the graph handle from the compiled network + try { + mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size()); + mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); + } catch (...) { + _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " + "allowed. Only exports are available"); + } + } + + initGraphDescriptors.reserve(initNetworkDescriptions.size()); + tensorsInits.reserve(initNetworkDescriptions.size()); + initNetworkMetadata.reserve(initNetworkDescriptions.size()); + for (auto& networkDesc : initNetworkDescriptions) { + ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork); + GraphDescriptor initGraphDesc; + NetworkMetadata initNetworkMeta; + if (_zeGraphExt) { + try { + initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); + initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc); + } catch (...) { + } + } + + initGraphDescriptors.push_back(initGraphDesc); + tensorsInits.push_back(std::move(tensor)); + initNetworkMetadata.push_back(std::move(initNetworkMeta)); + } } break; case ov::intel_npu::WSVersion::ITERATIVE: { + OPENVINO_ASSERT(_zeGraphExt, + "The \"iterative\" implementation of the weights separation feature requires a Level Zero " + "graph handle to compile a model."); + + // The state of the model needs to be reset every iteration const std::shared_ptr originalModel = model->clone(); std::shared_ptr targetModel = model; size_t i = 0; while (auto networkDescription = std::make_shared(_compiler->compileWsIterative(targetModel, localConfig, i++))) { - if (isInit(networkDescription->metadata.name)) { - initNetworkDescriptions.push_back(networkDescription); + ov::Tensor tensor = make_tensor_from_vector(networkDescription->compiledNetwork); + GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); + NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc); + + if (isInitMetadata(networkDescription->metadata)) { targetModel = originalModel->clone(); + initGraphDescriptors.push_back(graphDesc); + tensorsInits.push_back(std::move(tensor)); + initNetworkMetadata.push_back(std::move(networkMetadata)); + initNetworkDescriptions.push_back(networkDescription); continue; } - OPENVINO_ASSERT(isMain(networkDescription->metadata.name), - "Unexpected network name: ", - networkDescription->metadata.name); + tensorMain = std::move(tensor); + mainGraphDesc = graphDesc; + mainNetworkMetadata = std::move(networkMetadata); mainNetworkDescription = std::move(networkDescription); break; } @@ -233,44 +279,6 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptrcompiledNetwork); - GraphDescriptor mainGraphDesc; - NetworkMetadata mainNetworkMetadata; - if (_zeGraphExt) { - // Depending on the config, we may get an error when trying to - // get the graph handle from the compiled network - try { - mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size()); - mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); - } catch (...) { - _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " - "allowed. Only exports are available"); - } - } - - std::vector initGraphDescriptors; - std::vector tensorsInits; - std::vector initNetworkMetadata; - initGraphDescriptors.reserve(initNetworkDescriptions.size()); - tensorsInits.reserve(initNetworkDescriptions.size()); - initNetworkMetadata.reserve(initNetworkDescriptions.size()); - for (auto& networkDesc : initNetworkDescriptions) { - ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork); - GraphDescriptor initGraphDesc; - NetworkMetadata initNetworkMeta; - if (_zeGraphExt) { - try { - initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size()); - initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc); - } catch (...) { - } - } - - initGraphDescriptors.push_back(initGraphDesc); - tensorsInits.push_back(std::move(tensor)); - initNetworkMetadata.push_back(std::move(initNetworkMeta)); - } - return std::make_shared( _zeGraphExt, _zeroInitStruct, diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp index 1e102343a0b4c6..d86e8b74dbf881 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp @@ -185,6 +185,27 @@ struct vcl_allocator_vector : vcl_allocator2_t { std::vector m_vec; }; +struct vcl_allocator_vector_2 : vcl_allocator2_t { + vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} + + static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) { + vcl_allocator_vector_2* vecAllocator = static_cast(allocator); + auto newVec = std::make_shared>(); + newVec->resize(size); + uint8_t* ptr = newVec->data(); + vecAllocator->m_vector.emplace_back(newVec); + return ptr; + } + + static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) { + vcl_allocator_vector_2* vecAllocator = static_cast(allocator); + vecAllocator->m_vector.clear(); + vecAllocator->m_vector.shrink_to_fit(); + } + + std::vector>> m_vector; +}; + struct vcl_allocator_malloc { static uint8_t* vcl_allocate(uint64_t size) { return reinterpret_cast(malloc(size)); @@ -276,10 +297,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr> VCLCompilerImpl::compileWsOneShot( + const std::shared_ptr& model, + const Config& config) const { + _logger.debug("compileWsOneShot start"); + + const auto maxOpsetVersion = _compilerProperties.supportedOpsets; + _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion); + + _logger.debug("serialize IR"); + ze_graph_compiler_version_info_t compilerVersion; + compilerVersion.major = _compilerProperties.version.major; + compilerVersion.minor = _compilerProperties.version.minor; + + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + bool useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig); + auto serializedIR = + driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer); + + std::string buildFlags; + + _logger.debug("create build flags"); + buildFlags += driver_compiler_utils::serializeIOInfo(model, true); + buildFlags += " "; + buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion); + _logger.debug("final build flags to compiler: %s", buildFlags.c_str()); + + vcl_executable_desc_t exeDesc = {serializedIR.second.get(), + serializedIR.first, + buildFlags.c_str(), + buildFlags.size()}; + _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor); + + _logger.debug("Using vclAllocatedExecutableCreateWSOneShot"); + vcl_allocator_vector_2 allocator; + + THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot", + vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator), + _logHandle); + + if (allocator.m_vector.size() == 0) { + OPENVINO_THROW("Failed to create VCL executable, blobCount is zero"); + } + + std::vector> networkDescrs; + for (uint32_t i = 0; i < allocator.m_vector.size(); i++) { + // Use empty metadata as VCL does not support metadata extraction + NetworkMetadata metadata; + networkDescrs.emplace_back( + std::make_shared(std::move(*allocator.m_vector[i]), std::move(metadata))); + } + return networkDescrs; +} + +NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr& model, + const Config& config, + size_t callNumber) const { + _logger.debug("compileWsIterative start"); + const FilteredConfig* filteredConfig = dynamic_cast(&config); + if (filteredConfig == nullptr) { + OPENVINO_THROW("config is not FilteredConfig"); + } + FilteredConfig updatedConfig = *filteredConfig; + updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}}); + return compile(model, config); +} + intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector& network, const Config& config) const { _logger.debug("parse start"); // VCL does not support parse, return empty metadata diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp index 1d8549f57f24b8..ec74095e410105 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp @@ -275,6 +275,10 @@ std::pair>> WeightlessGraph::expor } void WeightlessGraph::initialize(const Config& config) { + if (!_zeroInitStruct) { + _wgLogger.warning("_zeroInitStruct is nullptr!"); + return; + } // Simplified version for init schedules const size_t numberOfInits = _initsGraphDesc.size(); _initsCommandQueueOrdinals.resize(numberOfInits); From 16d591c8fd55502a17d80fc9dfc4f74575f7750a Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Sat, 22 Nov 2025 00:04:34 +0800 Subject: [PATCH 18/25] clang-format --- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 4fd516f9b642bd..b2478e25866866 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -188,7 +188,7 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr mainNetworkDescription; - + switch (localConfig.get()) { case ov::intel_npu::WSVersion::ONE_SHOT: { std::vector> initMainNetworkDescriptions = From 5ba9f3941ad71d9b759ebb5699d9f0597b70be50 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 24 Nov 2025 11:04:08 +0800 Subject: [PATCH 19/25] fix comments --- .../intel_npu/common/icompiler_adapter.hpp | 35 ----- .../{npu_driver_compiler.h => compiler.h} | 0 .../include/compiler_impl.hpp | 64 +++++++++ .../include/plugin_compiler_adapter.hpp | 1 - .../src/compiler_adapter/include/vcl_api.hpp | 130 ------------------ .../include/weightless_utils.hpp | 24 ++++ .../src/{vcl_api.cpp => compiler_impl.cpp} | 117 +++++++++++----- .../src/driver_compiler_adapter.cpp | 1 + .../src/plugin_compiler_adapter.cpp | 8 +- .../compiler_adapter/src/weightless_utils.cpp | 44 ++++++ .../intel_npu/src/plugin/src/plugin.cpp | 114 +-------------- 11 files changed, 221 insertions(+), 317 deletions(-) rename src/plugins/intel_npu/src/compiler_adapter/include/{npu_driver_compiler.h => compiler.h} (100%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp delete mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp rename src/plugins/intel_npu/src/compiler_adapter/src/{vcl_api.cpp => compiler_impl.cpp} (89%) create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp index 1ed01edb67ac4d..f41ceef203ad60 100644 --- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp +++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp @@ -6,9 +6,6 @@ #include "intel_npu/common/filtered_config.hpp" #include "intel_npu/common/igraph.hpp" -#include "openvino/core/model.hpp" -#include "openvino/core/rt_info/weightless_caching_attributes.hpp" -#include "openvino/op/constant.hpp" namespace intel_npu { @@ -59,38 +56,6 @@ class ICompilerAdapter { virtual std::vector get_supported_options() const = 0; virtual bool is_option_supported(std::string optname) const = 0; - /** - * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon - * serialization. - * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information - * regarding the offset of the weights within the binary file, as well as the original size and precision. This - * information is required within the "weights separation" flow, therefore this function is here to store it. - * @note Not calling this function in the weights separation flow would lead to this information being lost upon - * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent - * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be - * misinformed and lookups of weights offsets could fail. - * - * @param model Both source and target. - */ - void storeWeightlessCacheAttribute(const std::shared_ptr& model) const { - size_t constantId = 0; - for (auto&& node : model->get_ordered_ops()) { - if (ov::is_type(node)) { - ov::RTMap& runtimeInfoMap = node->get_rt_info(); - const auto& weightlessCacheAttrIt = - runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); - - const std::string constantIdString = std::to_string(constantId++); - if (weightlessCacheAttrIt != runtimeInfoMap.end()) { - auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); - model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); - model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); - } - } - } - } - virtual ~ICompilerAdapter() = default; }; diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h similarity index 100% rename from src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h rename to src/plugins/intel_npu/src/compiler_adapter/include/compiler.h diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp new file mode 100644 index 00000000000000..3f212f878bb795 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "compiler.h" +#include "intel_npu/common/filtered_config.hpp" +#include "intel_npu/icompiler.hpp" +#include "openvino/core/except.hpp" + +namespace intel_npu { + +bool isUseBaseModelSerializer(const FilteredConfig& config); +std::string supportVclCompiler(int major, int minor); +class VCLApi; + +class VCLCompilerImpl final : public intel_npu::ICompiler { +public: + VCLCompilerImpl(); + ~VCLCompilerImpl() override; + + static std::shared_ptr getInstance() { + static std::shared_ptr compiler = std::make_shared(); + return compiler; + } + + NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override; + + std::vector> compileWsOneShot(const std::shared_ptr& model, + const Config& config) const override; + + NetworkDescription compileWsIterative(const std::shared_ptr& model, + const Config& config, + size_t callNumber) const override; + + ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; + + NetworkMetadata parse(const std::vector& network, const Config& config) const override; + + uint32_t get_version() const override; + + std::vector process_profiling_output(const std::vector& profData, + const std::vector& network, + const intel_npu::Config& config) const final override; + + bool get_supported_options(std::vector& options) const; + + bool is_option_supported(const std::string& option) const; + + std::shared_ptr getLinkedLibrary() const; + +private: + vcl_log_handle_t _logHandle = nullptr; + vcl_compiler_handle_t _compilerHandle = nullptr; + vcl_compiler_properties_t _compilerProperties; + vcl_version_info_t _vclVersion; + vcl_version_info_t _vclProfilingVersion; + Logger _logger; +}; + +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp index 5bc7c236e45a10..0675d964565947 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp @@ -11,7 +11,6 @@ #include "intel_npu/utils/logger/logger.hpp" #include "intel_npu/utils/zero/zero_init.hpp" #include "openvino/runtime/so_ptr.hpp" -#include "vcl_api.hpp" #include "ze_graph_ext_wrappers.hpp" namespace intel_npu { diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp deleted file mode 100644 index 7163beada5322a..00000000000000 --- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp +++ /dev/null @@ -1,130 +0,0 @@ -// Copyright (C) 2018-2025 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include "intel_npu/common/filtered_config.hpp" -#include "intel_npu/icompiler.hpp" -#include "npu_driver_compiler.h" -#include "openvino/core/except.hpp" - -namespace intel_npu { - -// clang-format off -#define vcl_symbols_list() \ - vcl_symbol_statement(vclGetVersion) \ - vcl_symbol_statement(vclCompilerCreate) \ - vcl_symbol_statement(vclCompilerDestroy) \ - vcl_symbol_statement(vclCompilerGetProperties) \ - vcl_symbol_statement(vclQueryNetworkCreate) \ - vcl_symbol_statement(vclQueryNetwork) \ - vcl_symbol_statement(vclQueryNetworkDestroy) \ - vcl_symbol_statement(vclExecutableCreate) \ - vcl_symbol_statement(vclAllocatedExecutableCreate) \ - vcl_symbol_statement(vclExecutableDestroy) \ - vcl_symbol_statement(vclExecutableGetSerializableBlob) \ - vcl_symbol_statement(vclProfilingCreate) \ - vcl_symbol_statement(vclGetDecodedProfilingBuffer) \ - vcl_symbol_statement(vclProfilingDestroy) \ - vcl_symbol_statement(vclProfilingGetProperties) \ - vcl_symbol_statement(vclLogHandleGetString) - - -//unsupported symbols with older ze_loader versions -#define vcl_weak_symbols_list() \ - vcl_symbol_statement(vclAllocatedExecutableCreate2) \ - vcl_symbol_statement(vclGetCompilerSupportedOptions) \ - vcl_symbol_statement(vclGetCompilerIsOptionSupported) \ - vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot) -// clang-format on - -class VCLApi { -public: - VCLApi(); - VCLApi(const VCLApi& other) = delete; - VCLApi(VCLApi&& other) = delete; - void operator=(const VCLApi&) = delete; - void operator=(VCLApi&&) = delete; - - static const std::shared_ptr& getInstance(); - std::shared_ptr getLibrary() const { - return lib; - } - -#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol; - vcl_symbols_list(); - vcl_weak_symbols_list(); -#undef vcl_symbol_statement - -private: - std::shared_ptr lib; - Logger _logger; -}; - -#define vcl_symbol_statement(vcl_symbol) \ - template \ - inline typename std::invoke_result::type wrapped_##vcl_symbol(Args... args) { \ - const auto& ptr = VCLApi::getInstance(); \ - if (ptr->vcl_symbol == nullptr) { \ - OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol); \ - } \ - return ptr->vcl_symbol(std::forward(args)...); \ - } -vcl_symbols_list(); -vcl_weak_symbols_list(); -#undef vcl_symbol_statement -#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol; -vcl_symbols_list(); -vcl_weak_symbols_list(); -#undef vcl_symbol_statement - -bool isUseBaseModelSerializer(const FilteredConfig& config); -std::string supportVclCompiler(int major, int minor); - -class VCLCompilerImpl final : public intel_npu::ICompiler { -public: - VCLCompilerImpl(); - ~VCLCompilerImpl() override; - - static std::shared_ptr getInstance() { - static std::shared_ptr compiler = std::make_shared(); - return compiler; - } - - NetworkDescription compile(const std::shared_ptr& model, const Config& config) const override; - - std::vector> compileWsOneShot(const std::shared_ptr& model, - const Config& config) const override; - - NetworkDescription compileWsIterative(const std::shared_ptr& model, - const Config& config, - size_t callNumber) const override; - - ov::SupportedOpsMap query(const std::shared_ptr& model, const Config& config) const override; - - NetworkMetadata parse(const std::vector& network, const Config& config) const override; - - uint32_t get_version() const override; - - std::vector process_profiling_output(const std::vector& profData, - const std::vector& network, - const intel_npu::Config& config) const final override; - - bool get_supported_options(std::vector& options) const; - - bool is_option_supported(const std::string& option) const; - -private: - std::shared_ptr _vclApi; - vcl_log_handle_t _logHandle = nullptr; - vcl_compiler_handle_t _compilerHandle = nullptr; - vcl_compiler_properties_t _compilerProperties; - vcl_version_info_t _vclVersion; - vcl_version_info_t _vclProfilingVersion; - Logger _logger; -}; - -} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp new file mode 100644 index 00000000000000..d32abbd6ab4509 --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp @@ -0,0 +1,24 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "openvino/core/model.hpp" + +namespace intel_npu { +/** + * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon + * serialization. + * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information + * regarding the offset of the weights within the binary file, as well as the original size and precision. This + * information is required within the "weights separation" flow, therefore this function is here to store it. + * @note Not calling this function in the weights separation flow would lead to this information being lost upon + * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent + * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be + * misinformed and lookups of weights offsets could fail. + * + * @param model Both source and target. + */ +void storeWeightlessCacheAttribute(const std::shared_ptr& model); +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp similarity index 89% rename from src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp rename to src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index d86e8b74dbf881..5396da3b89d0cc 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 // -#include "vcl_api.hpp" +#include "compiler_impl.hpp" #include "intel_npu/config/options.hpp" #include "intel_npu/npu_private_properties.hpp" @@ -15,6 +15,74 @@ namespace intel_npu { +// clang-format off +#define vcl_symbols_list() \ + vcl_symbol_statement(vclGetVersion) \ + vcl_symbol_statement(vclCompilerCreate) \ + vcl_symbol_statement(vclCompilerDestroy) \ + vcl_symbol_statement(vclCompilerGetProperties) \ + vcl_symbol_statement(vclQueryNetworkCreate) \ + vcl_symbol_statement(vclQueryNetwork) \ + vcl_symbol_statement(vclQueryNetworkDestroy) \ + vcl_symbol_statement(vclExecutableCreate) \ + vcl_symbol_statement(vclAllocatedExecutableCreate) \ + vcl_symbol_statement(vclExecutableDestroy) \ + vcl_symbol_statement(vclExecutableGetSerializableBlob) \ + vcl_symbol_statement(vclProfilingCreate) \ + vcl_symbol_statement(vclGetDecodedProfilingBuffer) \ + vcl_symbol_statement(vclProfilingDestroy) \ + vcl_symbol_statement(vclProfilingGetProperties) \ + vcl_symbol_statement(vclLogHandleGetString) \ + vcl_symbol_statement(vclAllocatedExecutableCreate2) \ + vcl_symbol_statement(vclGetCompilerSupportedOptions) \ + vcl_symbol_statement(vclGetCompilerIsOptionSupported) \ + + +//unsupported symbols with older ze_loader versions +#define vcl_weak_symbols_list() \ + vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot) +// clang-format on + +class VCLApi { +public: + VCLApi(); + VCLApi(const VCLApi& other) = delete; + VCLApi(VCLApi&& other) = delete; + void operator=(const VCLApi&) = delete; + void operator=(VCLApi&&) = delete; + + static const std::shared_ptr& getInstance(); + std::shared_ptr getLibrary() const { + return lib; + } + +#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol; + vcl_symbols_list(); + vcl_weak_symbols_list(); +#undef vcl_symbol_statement + +private: + std::shared_ptr lib; + Logger _logger; +}; + +#define vcl_symbol_statement(vcl_symbol) \ + template \ + inline typename std::invoke_result::type wrapped_##vcl_symbol(Args... args) { \ + const auto& ptr = VCLApi::getInstance(); \ + if (ptr->vcl_symbol == nullptr) { \ + OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol); \ + } \ + return ptr->vcl_symbol(std::forward(args)...); \ + } +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement +#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol; +vcl_symbols_list(); +vcl_weak_symbols_list(); +#undef vcl_symbol_statement + static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) { Logger _logger("VCLAPI", Logger::global().level()); _logger.debug("getLatestVCLLog start"); @@ -114,6 +182,10 @@ const std::shared_ptr& VCLApi::getInstance() { VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) { _logger.debug("VCLCompilerImpl constructor start"); + + // Load VCL library + (void)VCLApi::getInstance(); + // Initialize the VCL API THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr); @@ -167,6 +239,10 @@ VCLCompilerImpl::~VCLCompilerImpl() { _logger.info("VCL Compiler destroyed successfully"); } +std::shared_ptr VCLCompilerImpl::getLinkedLibrary() const { + return VCLApi::getInstance(); +} + struct vcl_allocator_vector : vcl_allocator2_t { vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {} @@ -375,42 +451,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr compiledNetwork(size); - THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob", - vclExecutableGetSerializableBlob(exeHandle, compiledNetwork.data(), &size), - _logHandle); - - THROW_ON_FAIL_FOR_VCL("vclExecutableDestroy", vclExecutableDestroy(exeHandle), _logHandle); - - // Use empty metadata as VCL does not support metadata extraction - NetworkMetadata metadata; - - _logger.debug("compile end, blob size:%d", compiledNetwork.size()); - return NetworkDescription(std::move(compiledNetwork), std::move(metadata)); + OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later", + _vclVersion.major, + _vclVersion.minor); } } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp index 4b11610d56b7a3..f8d91edb8d62bb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp @@ -15,6 +15,7 @@ #include "openvino/core/model.hpp" #include "vcl_serializer.hpp" #include "weightless_graph.hpp" +#include "weightless_utils.hpp" namespace { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index b2478e25866866..3b13ac11c50949 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -7,6 +7,7 @@ #include #include +#include "compiler_impl.hpp" #include "graph.hpp" #include "intel_npu/common/device_helpers.hpp" #include "intel_npu/common/itt.hpp" @@ -22,6 +23,7 @@ #include "openvino/util/file_util.hpp" #include "openvino/util/shared_object.hpp" #include "weightless_graph.hpp" +#include "weightless_utils.hpp" namespace { @@ -80,7 +82,7 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptrgetLibrary(); + auto vclLib = vclCompilerPtr->getLinkedLibrary(); _logger.info("PLUGIN VCL compiler is loading"); if (vclCompilerPtr && vclLib) { _compiler = ov::SoPtr(vclCompilerPtr, vclLib); @@ -161,7 +163,6 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr PluginCompilerAdapter::parse( mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size()); mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc); _logger.debug("main schedule parse end"); - std::cout << "RUN here == for vcl adapter call===" << std::endl; if (model) { - std::cout << "RUN here == for vcl adapter call 1===" << std::endl; mainNetworkMetadata.name = model.value()->get_friendly_name(); } else { - std::cout << "RUN here == for vcl adapter call 2===" << std::endl; _logger.warning("networkMeta name is empty in parse!"); } } else { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp new file mode 100644 index 00000000000000..cac510735f68ef --- /dev/null +++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp @@ -0,0 +1,44 @@ +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include "weightless_utils.hpp" + +#include "openvino/core/rt_info/weightless_caching_attributes.hpp" +#include "openvino/op/constant.hpp" + +namespace intel_npu { +/** + * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon + * serialization. + * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information + * regarding the offset of the weights within the binary file, as well as the original size and precision. This + * information is required within the "weights separation" flow, therefore this function is here to store it. + * @note Not calling this function in the weights separation flow would lead to this information being lost upon + * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent + * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be + * misinformed and lookups of weights offsets could fail. + * + * @param model Both source and target. + */ +void storeWeightlessCacheAttribute(const std::shared_ptr& model) { + size_t constantId = 0; + for (auto&& node : model->get_ordered_ops()) { + if (ov::is_type(node)) { + ov::RTMap& runtimeInfoMap = node->get_rt_info(); + const auto& weightlessCacheAttrIt = + runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static()); + + const std::string constantIdString = std::to_string(constantId++); + if (weightlessCacheAttrIt != runtimeInfoMap.end()) { + auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as(); + model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString); + model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString); + } + } + } +} +} // namespace intel_npu diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 007b63ae9f6698..97c6a7e09ceeac 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -144,103 +144,14 @@ void update_log_level(const std::map& propertiesMap) { } } -std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) { - const std::string defaultDevice = ""; - auto it = propertiesMap.find(std::string(DEVICE_ID::key())); - if (it != propertiesMap.end()) { - return it->second.as(); - } - - it = propertiesMap.find(std::string(PLATFORM::key())); - if (it != propertiesMap.end()) { - auto platformStr = it->second.as(); - if (platformStr == ov::intel_npu::Platform::AUTO_DETECT) { - return defaultDevice; - } - - platformStr = utils::getPlatformByDeviceName(platformStr); - platformStr = ov::intel_npu::Platform::standardize(platformStr); - return platformStr; - } - return defaultDevice; -} - -void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf, - ov::AnyMap& propertiesMap, - const std::string& deviceName, - Logger& log) { - // If there is no compiler_type provided, use base_config default value - // Default compilerType for different platform is up to device: - // 3720 -> DRIVER - // 4000 and later -> default - - // If user set compilerType in config, will not update by device - auto it = propertiesMap.find(std::string(COMPILER_TYPE::key())); - if (it != propertiesMap.end()) { - return; - } - - std::string getDevice = getDeviceFromProperties(propertiesMap); - - if (deviceName.empty() && getDevice.empty()) { - OPENVINO_THROW("Device name is empty!"); - } - - std::string usedDevice = deviceName; - if (deviceName != getDevice) { - log.info("The device from properties '%s' is different from the actual device '%s', use device '%s' to check " - "compiler_type.", - getDevice.c_str(), - deviceName.c_str(), - deviceName.c_str()); - - usedDevice = deviceName.empty() ? getDevice : deviceName; - } - - // If the platform is not 3720, will not update by device - if (usedDevice != std::string(ov::intel_npu::Platform::NPU3720)) { - return; - } - - if (base_conf.get() != ov::intel_npu::CompilerType::DRIVER) { - log.warning("Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the " - "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues."); - } - - // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user - propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER); - - return; -} - -static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, - const ov::AnyMap& local_conf, - const std::string& deviceName) { +static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) { // first look if provided config changes compiler type auto it = local_conf.find(std::string(COMPILER_TYPE::key())); if (it != local_conf.end()) { // if compiler_type is provided by local config = use that return COMPILER_TYPE::parse(it->second.as()); } - // if there is no compiler_type provided = use base_config value and update default vaule by platform if needed - // Default compilerType for different platform is up to device: - // 3720 -> DRIVER - // 4000 and later -> default - if (!deviceName.empty()) { - if (deviceName == std::string(ov::intel_npu::Platform::NPU3720)) { - return ov::intel_npu::CompilerType::DRIVER; - } - } else { - std::string getdevice = getDeviceFromProperties(local_conf); - if (getdevice == std::string(ov::intel_npu::Platform::NPU3720)) { - return ov::intel_npu::CompilerType::DRIVER; - } - if (getdevice == std::string(ov::intel_npu::Platform::AUTO_DETECT)) { - Logger::global().warning("Device is set to AUTO_DETECT, cannot decide the default compiler_type by device, " - "use the default compiler_type."); - } - } - + // if there is no compiler_type provided = use base_config value return base_conf.get(); } @@ -749,18 +660,12 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< localProperties.erase(modelSerializerVersionKey); } - // For 3720, need check and update its compiler_type, if usr not pass in config - auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice(); - std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : ""; - checkUpdateforSpecialPlatform(_globalConfig, localProperties, deviceName, _logger); - const std::map localPropertiesMap = any_copy(localProperties); update_log_level(localPropertiesMap); // create compiler CompilerAdapterFactory compilerAdapterFactory; - auto compiler = - compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, localProperties, deviceName)); + auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties)); OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); @@ -1065,14 +970,10 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr& CompilerAdapterFactory compilerAdapterFactory; auto npu_plugin_properties = properties; exclude_model_ptr_from_map(npu_plugin_properties); - auto device = _backend == nullptr ? nullptr : _backend->getDevice(); - std::string deviceName = device != nullptr ? device->getName() : ""; - checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger); const std::map propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); auto compiler = - compilerAdapterFactory.getCompiler(_backend, - resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName)); + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime); _logger.setLevel(localConfig.get()); const auto platform = @@ -1104,16 +1005,11 @@ std::shared_ptr Plugin::parse(const ov::Tensor& tensorBig, // list of properties auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties); - auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice(); - std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : ""; - checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger); - CompilerAdapterFactory compilerAdapterFactory; const auto propertiesMap = any_copy(npu_plugin_properties); update_log_level(propertiesMap); auto compiler = - compilerAdapterFactory.getCompiler(_backend, - resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName)); + compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties)); OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config"); auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime); From 24e293504116773810a42ece397da4d98da5aa49 Mon Sep 17 00:00:00 2001 From: "Kang, Wenjing" Date: Mon, 24 Nov 2025 16:03:49 +0800 Subject: [PATCH 20/25] Add OPENVINO_ASSERT for one shot weightless compilation to check initMainNetworkDescriptions size Signed-off-by: Kang, Wenjing --- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index 3b13ac11c50949..cc2b0fdd226bfb 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -197,6 +197,8 @@ std::shared_ptr PluginCompilerAdapter::compileWS(const std::shared_ptr 0, + "The initMainNetworkDescriptions after getting mainNetworkDescription must not be empty!"); initNetworkDescriptions = std::move(initMainNetworkDescriptions); tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork); From a54dae0e8a3493c9455f2e80afbe68d00356677a Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 24 Nov 2025 18:33:33 +0800 Subject: [PATCH 21/25] fix comments2 --- .../intel_npu/src/compiler_adapter/include/compiler.h | 5 ++--- .../src/compiler_adapter/include/compiler_impl.hpp | 2 +- src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp | 4 ---- .../src/compiler_adapter/src/plugin_compiler_adapter.cpp | 6 +++++- .../intel_npu/src/compiler_adapter/src/weightless_utils.cpp | 2 -- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h index e7f3d3bee21010..409798e64b4a1b 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h @@ -1,6 +1,5 @@ -// -// Copyright (C) 2023 Intel Corporation. -// SPDX-License-Identifier: Apache 2.0 +// Copyright (C) 2018-2025 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 // #ifndef VPUX_COMPILER_L0_H diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp index 3f212f878bb795..0dbeba9afafc97 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp @@ -23,7 +23,7 @@ class VCLCompilerImpl final : public intel_npu::ICompiler { ~VCLCompilerImpl() override; static std::shared_ptr getInstance() { - static std::shared_ptr compiler = std::make_shared(); + static std::weak_ptr compiler = std::make_shared(); return compiler; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp index a6d1d6bcc68f95..9ace471ea66ad4 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp @@ -157,10 +157,6 @@ void Graph::set_argument_value(uint32_t argi, const void* argv) const { } void Graph::initialize(const Config& config) { - if (!_zeroInitStruct) { - _logger.warning("_zeroInitStruct is nullptr!"); - return; - } _logger.debug("Graph initialize start"); if (_zeGraphExt == nullptr || _graphDesc._handle == nullptr) { diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp index cc2b0fdd226bfb..62466ab3aae8af 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp @@ -140,12 +140,16 @@ std::shared_ptr PluginCompilerAdapter::compile(const std::shared_ptrgetGraphDescriptor(tensor.data(), tensor.get_byte_size()); networkMeta = _zeGraphExt->getNetworkMeta(graphDesc); networkMeta.name = model->get_friendly_name(); + } catch (const std::exception& ex) { + _logger.info("Failed to use the level zero graph handle: %s. Inference requests for this model are not " + "allowed. Only exports are available", + ex.what()); } catch (...) { _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not " "allowed. Only exports are available"); } } else { - _logger.warning("no zeGraphExt, metadata is empty from vcl compiler"); + _logger.warning("No driver is found, zeGraphExt is nullptr, so metadata is empty. Only exports are available"); } return std::make_shared( diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp index cac510735f68ef..987ff372ea8532 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp @@ -2,8 +2,6 @@ // SPDX-License-Identifier: Apache-2.0 // -#pragma once - #include "weightless_utils.hpp" #include "openvino/core/rt_info/weightless_caching_attributes.hpp" From 6ea5978041c73290ba93aa51efbcf02f48445b1f Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Tue, 25 Nov 2025 00:18:30 +0800 Subject: [PATCH 22/25] add log for serialize IR --- .../intel_npu/src/compiler_adapter/src/compiler_impl.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index 5396da3b89d0cc..917508d8e006c6 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -615,8 +615,10 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr Date: Tue, 25 Nov 2025 00:26:06 +0800 Subject: [PATCH 23/25] update compilerType to mlir --- .../intel_npu/src/al/include/intel_npu/config/options.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp index 51403d304718bd..cadc4f6aa8d270 100644 --- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp +++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp @@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase Date: Tue, 25 Nov 2025 00:34:10 +0800 Subject: [PATCH 24/25] remove ov cache check in plugin --- src/plugins/intel_npu/src/plugin/src/plugin.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp index 97c6a7e09ceeac..578b2ba29d61e2 100644 --- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp +++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp @@ -670,13 +670,13 @@ std::shared_ptr Plugin::compile_model(const std::shared_ptr< OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config"); auto localConfig = fork_local_config(localPropertiesMap, compiler); - const auto set_cache_dir = localConfig.get(); - if (!set_cache_dir.empty()) { - const auto compilerType = localConfig.get(); - if (compilerType == ov::intel_npu::CompilerType::PLUGIN) { - OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); - } - } + // const auto set_cache_dir = localConfig.get(); + // if (!set_cache_dir.empty()) { + // const auto compilerType = localConfig.get(); + // if (compilerType == ov::intel_npu::CompilerType::PLUGIN) { + // OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type"); + // } + // } const auto platform = utils::getCompilationPlatform(localConfig.get(), From 07be1a20c7f085139888d83d04f0382fe0b76e58 Mon Sep 17 00:00:00 2001 From: "Liu, Dan1" Date: Mon, 24 Nov 2025 18:33:33 +0800 Subject: [PATCH 25/25] fix comments2 --- .../src/compiler_adapter/include/compiler_impl.hpp | 11 ++++++++++- .../src/compiler_adapter/src/compiler_impl.cpp | 6 ++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp index 0dbeba9afafc97..a55c12faded9ec 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp +++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include "compiler.h" #include "intel_npu/common/filtered_config.hpp" @@ -23,7 +24,15 @@ class VCLCompilerImpl final : public intel_npu::ICompiler { ~VCLCompilerImpl() override; static std::shared_ptr getInstance() { - static std::weak_ptr compiler = std::make_shared(); + static std::mutex mutex; + static std::weak_ptr weak_compiler; + + std::lock_guard lock(mutex); + auto compiler = weak_compiler.lock(); + if (!compiler) { + compiler = std::make_shared(); + weak_compiler = compiler; + } return compiler; } diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp index 917508d8e006c6..118bdfd4188ba9 100644 --- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp +++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp @@ -615,10 +615,12 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr