From 7de1d64b6ce7a55ce31db16d53f786127b9a44bf Mon Sep 17 00:00:00 2001
From: Xin Wang <xin1.wang@intel.com>
Date: Mon, 26 May 2025 23:09:59 +0800
Subject: [PATCH 01/25] Add VCLApi and VCLCompilerImpl

Use fake metadata
Use driver ext to parse metadata
If not find npu_driver_compiler, rollback to npu_mlir_compiler with vcl
Use ENABLE_VCL_FOR_COMPILER as switch and download lib
Fix support options
Fix win download issue
Add download for ubuntu24.04 and refactor script
Install vcl compiler during cpack
Fix proxy issue
Fix install path
Fix supported option api and use npu_vcl_compiler
Retrieve error log
Fix log issue
Use vclAllocatedExecutableCreate2
Detect version to create executable
Fix allocate API
Fix style
Fix cache_dir
Update to use new compiler
Rollback to old windows driver which does not need WinRAR
Fix build issue
Fix weightsless

Signed-off-by: Xin Wang <xin1.wang@intel.com>
---
 src/plugins/intel_npu/CMakeLists.txt          |   5 +
 .../cmake/download_compiler_libs.cmake        | 174 +++++++
 src/plugins/intel_npu/cmake/features.cmake    |   3 +
 .../al/include/intel_npu/config/options.hpp   |   2 +-
 .../include/npu_driver_compiler.h             | 339 ++++++++++++++
 .../include/plugin_compiler_adapter.hpp       |   1 +
 .../src/compiler_adapter/include/vcl_api.hpp  | 118 +++++
 .../include/ze_graph_ext_wrappers.hpp         |   3 +
 .../src/compiler_adapter/src/graph.cpp        |   1 -
 .../src/plugin_compiler_adapter.cpp           | 111 ++++-
 .../src/compiler_adapter/src/vcl_api.cpp      | 439 ++++++++++++++++++
 .../src/ze_graph_ext_wrappers.cpp             |   2 +-
 .../intel_npu/src/plugin/src/plugin.cpp       |   2 +
 13 files changed, 1180 insertions(+), 20 deletions(-)
 create mode 100644 src/plugins/intel_npu/cmake/download_compiler_libs.cmake
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp

diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt
index 8871512b85b848..470801fb39bc10 100644
--- a/src/plugins/intel_npu/CMakeLists.txt
+++ b/src/plugins/intel_npu/CMakeLists.txt
@@ -18,6 +18,11 @@ set(NPU_PLUGIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 include(cmake/features.cmake)
 
+if(ENABLE_VCL_FOR_COMPILER)
+    include(cmake/download_compiler_libs.cmake)
+    add_definitions("-DVCL_FOR_COMPILER")
+endif()
+
 set(CMAKE_CXX_STANDARD 17)
 
 if(ENABLE_NPU_DEBUG_CAPS)
diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
new file mode 100644
index 00000000000000..95ced5f78bdb59
--- /dev/null
+++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
@@ -0,0 +1,174 @@
+# Copyright (C) 2018-2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+#
+
+# Function to download and extract files
+function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy)
+    # Check if the prebuilt VCL compiler libraries not exist
+    if(NOT EXISTS "${extracted_dir}")
+        if(modify_proxy STREQUAL "MODIFY")
+            # Update proxy to enable download for windows url
+	    set(original_NO_PROXY $ENV{NO_PROXY})
+            set(original_no_proxy $ENV{no_proxy})
+            set(ENV{NO_PROXY} "")
+            set(ENV{no_proxy} "")
+        endif()
+
+        # Download the prebuilt VCL compiler libraries, if failure, show error message and exit
+        message(STATUS "Downloading prebuilt VCL compiler libraries from ${url}")
+        file(DOWNLOAD "${url}" "${zip_file}"
+            TIMEOUT 3600
+            LOG log_output
+            STATUS download_status
+            SHOW_PROGRESS)
+
+        if(modify_proxy STREQUAL "MODIFY")
+            # Restore proxy
+	    set(ENV{NO_PROXY} ${original_NO_PROXY})
+            set(ENV{no_proxy} ${original_no_proxy})
+        endif()
+
+        list(GET download_status 0 download_result)
+        if(NOT download_result EQUAL 0)
+            message(FATAL_ERROR "Download failed!\nStatus: ${download_status}\nLog: ${log_output}")
+        else()
+            message(STATUS "Download completed: ${zip_file}")
+        endif()
+
+        message(STATUS "Unzipping prebuilt VCL compiler libraries to ${extracted_dir}")
+        # Determine extraction method based on file extension
+        if("${zip_file}" MATCHES "\\.zip$")
+            file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}")
+        elseif("${zip_file}" MATCHES "\\.deb$")
+            execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}")
+        elseif("${zip_file}" MATCHES "\\.exe$")
+            set(WINRAR_PATHS
+                "C:/Program Files/WinRAR"
+                "C:/Program Files (x86)/WinRAR"
+            )
+
+            set(WINRAR_FOUND FALSE)
+            set(WINRAR_EXECUTABLE "")
+
+            foreach(PATH ${WINRAR_PATHS})
+                if(EXISTS "${PATH}/WinRAR.exe")
+                    set(WINRAR_FOUND TRUE)
+                    set(WINRAR_EXECUTABLE "${PATH}/WinRAR.exe")
+                    break()
+                endif()
+            endforeach()
+
+            if(WINRAR_FOUND)
+                message(STATUS "WinRAR found at: ${WINRAR_EXECUTABLE} and extract ${zip_file} to ${extracted_dir}")
+                file(MAKE_DIRECTORY "${extracted_dir}")
+                execute_process(
+                    COMMAND "${WINRAR_EXECUTABLE}" x -y -o+ "${zip_file}" "${extracted_dir}"
+                    RESULT_VARIABLE result
+                    OUTPUT_VARIABLE output
+                    ERROR_VARIABLE error
+                )
+
+                if(result EQUAL 0)
+                    message(STATUS "Extraction successful: ${output}")
+                else()
+                    #file(REMOVE_RECURSE "${extracted_dir}")
+                    message(STATUS "Extraction failed: ${error}")
+                endif()
+            else()
+                message(FATAL_ERROR "WinRAR not found. Please install WinRAR to proceed.")
+            endif()
+        else()
+            message(FATAL_ERROR "Unsupported file extension for extraction: ${zip_file}")
+        endif()
+        file(REMOVE "${zip_file}")
+    else()
+        message(STATUS "Prebuilt VCL compiler libraries already exist, skip download")
+    endif()
+endfunction()
+
+if(ENABLE_VCL_FOR_COMPILER)
+    if(ENABLE_SYSTEM_NPU_VCL_COMPILER)
+        message(STATUS "Using system NPU VCL compiler libraries, skip download")
+    else()
+        message(STATUS "Downloading prebuilt NPU VCL compiler libraries")
+        if(WIN32)
+            set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win")
+            set(VCL_COMPILER_LIBS_URL "https://downloadmirror.intel.com/854488/npu_win_32.0.100.4023.zip")
+            set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023.zip")
+            set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023")
+
+            download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY")
+            set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/npu_win_32.0.100.4023/drivers/x64/")
+
+
+            configure_file(
+                ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll
+                ${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll
+                COPYONLY
+            )
+            set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll")
+            file(COPY "${VCL_COMPILER_LIB}"
+                DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}")
+            message(STATUS "Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows")
+        else()
+            # Check if the operating system is Linux and not macOS
+            if(UNIX AND NOT APPLE)
+                # Get the OS name and version
+                execute_process(COMMAND lsb_release -is OUTPUT_VARIABLE OS_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
+                execute_process(COMMAND lsb_release -rs OUTPUT_VARIABLE OS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+                if(OS_NAME STREQUAL "Ubuntu")
+                    if(OS_VERSION STREQUAL "22.04")
+                        # Ubuntu 22.04-specific settings or actions
+                        set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu22.04")
+                        set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb")
+                        set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb")
+                        set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04")
+
+                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
+
+                        set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
+                        configure_file(
+                            ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so
+                            ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so
+                            COPYONLY
+                        )
+                        set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
+                        file(COPY "${VCL_COMPILER_LIB}"
+                            DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
+                        message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04")
+                    elseif(OS_VERSION STREQUAL "24.04")
+                        message(STATUS "This is Ubuntu 24.04")
+                        # Ubuntu 24.04-specific settings or actions
+                        set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu24.04")
+                        set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb")
+                        set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb")
+                        set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04")
+
+                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
+
+                        set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
+                        configure_file(
+                            ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so
+                            ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so
+                            COPYONLY
+                        )
+                        set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
+                        file(COPY "${VCL_COMPILER_LIB}"
+                            DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
+                        message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04")
+                    else()
+                        message(STATUS "This is another version of Ubuntu: ${OS_VERSION}")
+                        # Other Ubuntu-specific settings or actions
+                    endif()
+                else()
+                    message(STATUS "This is a different Linux distribution: ${OS_NAME}, skip downloading prebuilt VCL compiler libraries")
+                    # Other Linux-specific settings or actions
+                endif()
+            endif()
+        endif()
+    endif()
+
+    install(FILES ${VCL_COMPILER_LIB}
+        DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT})
+endif()
diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 4190b8415b87ad..1f462c0e461806 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -11,3 +11,6 @@ if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS)
 endif()
 
 ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
+
+ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON)
+ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF)
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index 3005c4ae2ac634..d9a533729eeeab 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
     }
 
     static ov::intel_npu::CompilerType defaultValue() {
-        return ov::intel_npu::CompilerType::DRIVER;
+        return ov::intel_npu::CompilerType::MLIR;
     }
 
     static ov::intel_npu::CompilerType parse(std::string_view val) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
new file mode 100644
index 00000000000000..a8c38506fc844c
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
@@ -0,0 +1,339 @@
+//
+// Copyright (C) 2023 Intel Corporation.
+// SPDX-License-Identifier: Apache 2.0
+//
+
+#ifndef VPUX_COMPILER_L0_H
+#define VPUX_COMPILER_L0_H
+
+#if defined(__cplusplus)
+#    include <cstdint>
+#    include <cstdlib>
+#else
+#    include <stdint.h>
+#    include <stdlib.h>
+#endif
+
+#if defined(__cplusplus)
+#    pragma once
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define VCL_COMPILER_VERSION_MAJOR  7
+#define VCL_COMPILER_VERSION_MINOR  4
+#define VCL_PROFILING_VERSION_MAJOR 2
+#define VCL_PROFILING_VERSION_MINOR 0
+
+#ifndef DEPRECATED
+#    define DEPRECATED  // for documentation only
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef VCL_APICALL
+#    if defined(_WIN32)
+/// @brief Calling convention for all API functions
+#        define VCL_APICALL __cdecl
+#    else
+#        define VCL_APICALL
+#    endif  // defined(_WIN32)
+#endif      // VCL_APICALL
+
+///////////////////////////////////////////////////////////////////////////////
+#ifndef VCL_APIEXPORT
+#    if defined(_WIN32)
+/// @brief Windows-specific dllexport storage-class attribute
+#        define VCL_APIEXPORT __declspec(dllexport)
+#    else
+#        define VCL_APIEXPORT
+#    endif  // defined(_WIN32)
+#endif      // VCL_APIEXPORT
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Compiler handle
+typedef struct __vcl_compiler_handle_t* vcl_compiler_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Executable handle
+typedef struct __vcl_executable_handle_t* vcl_executable_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Profiling handle
+typedef struct __vcl_profiling_handle_t* vcl_profiling_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+
+/// @brief QueryNetwork handle
+typedef struct __vcl_query_handle_t* vcl_query_handle_t;
+
+/// @brief Error log handle
+typedef struct __vcl_log_handle_t* vcl_log_handle_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines type of requested data.
+/// Must be in sync with \b _ze_graph_profiling_type_t
+typedef enum __vcl_profiling_request_type_t {
+    VCL_PROFILING_LAYER_LEVEL = 0x1,
+    VCL_PROFILING_TASK_LEVEL = 0x2,
+    VCL_PROFILING_RAW = 0x3,
+
+    VCL_PROFILING_FORCE_UINT32 = 0x7fffffff
+} vcl_profiling_request_type_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines version info for the VPUXCompilerL0 API
+typedef struct __vcl_version_info_t {
+    uint16_t major;
+    uint16_t minor;
+
+} vcl_version_info_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines return/error codes
+typedef enum __vcl_result_t {
+    VCL_RESULT_SUCCESS = 0,                             ///< [Core] success
+    VCL_RESULT_ERROR_OUT_OF_MEMORY = 0x70000002,        ///< [Core] insufficient memory to satisfy call
+    VCL_RESULT_ERROR_UNSUPPORTED_FEATURE = 0x78000003,  ///< [Validation] generic error code for unsupported features
+    VCL_RESULT_ERROR_INVALID_ARGUMENT = 0x78000004,     ///< [Validation] generic error code for invalid arguments
+    VCL_RESULT_ERROR_INVALID_NULL_HANDLE = 0x78000005,  ///< [Validation] handle argument is not valid
+    VCL_RESULT_ERROR_IO = 0x78000006,                   ///< [Core] IO error
+    VCL_RESULT_ERROR_INVALID_IR = 0x78000007,           ///< [Validation] the member of modelIR is not valid
+    VCL_RESULT_ERROR_UNKNOWN = 0x7ffffffe,              ///< [Core] unknown or internal error
+
+} vcl_result_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines compiler properties
+typedef struct __vcl_compiler_properties_t {
+    const char* id;
+    vcl_version_info_t version;
+    uint32_t supportedOpsets;
+
+} vcl_compiler_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines profiling properties
+typedef struct __vcl_profiling_properties_t {
+    vcl_version_info_t version;  ///< Profiling module version
+
+} vcl_profiling_properties_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines debug level for VCL
+typedef enum __vcl_log_level_t {
+    VCL_LOG_NONE = 0,     ///< Log is disabled
+    VCL_LOG_ERROR = 1,    ///< Events which are not expected, containing probable reason
+    VCL_LOG_WARNING = 2,  ///< Events which are unusal
+    VCL_LOG_INFO = 3,     ///< Short messages about ongoing activity
+    VCL_LOG_DEBUG = 4,    ///< Messages with praticular data and explanations
+    VCL_LOG_TRACE = 5,    ///< Messages with detailed information about execution
+
+} vcl_log_level_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines device desc to be passed during creation
+///
+///        For online compilation, revision is always valid value and -1u for offline compilation.
+///        1. In offline mode the driver does not know the stepping and provides -1 (unknown) to VCL
+///        2. In VCL
+///               If driver provides valid revsion, the value will be default value for NPU_STEPPING
+///               If driver provides -1u as value for revision, VCL will not set NPU_STEPPING
+///        3. If NPU_STEPPING is set by user with config, VCL will use user config instead of default value.
+///        4. If NPU_STEPPING is not passed to compiler, compiler will choose default stepping.
+typedef struct __vcl_device_desc_t {
+    uint64_t size;       /// Size of vcl_device_desc_t
+    uint32_t deviceID;   /// The lower 16 bits equal to PCI Device ID, the upper 16 bits are zero
+    uint16_t revision;   /// NPU Revision Identifier, -1u as invalid value
+    uint32_t tileCount;  /// Value equals maximum number of slices
+} vcl_device_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines compiler desc to be passed during creation
+typedef struct __vcl_compiler_desc_t {
+    vcl_version_info_t version;  /// The host vcl version
+    vcl_log_level_t debugLevel;  /// Debug level for VCL
+} vcl_compiler_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines executable description to be passed during executable
+///        creation
+///
+///        Format of modelIRData (defined in L0 adaptor):
+///        1. API version : vcl_version_info_t
+///        2. Num of data elements (now only xml + weights = 2) : uint32_t
+///        3. Size of data 1 (xml) : uint64_t
+///        4. Data 1 : $2 bytes
+///        5. Size of data 2 (weights) : uint64_t
+///        6. Data 2 : $4 bytes
+typedef struct __vcl_executable_desc_t {
+    const uint8_t* modelIRData;
+    uint64_t modelIRSize;  ///< Size of modelIRData
+    const char* options;   ///< Compiler config options
+    uint64_t optionsSize;  ///< Size of options
+} vcl_executable_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines query description to be passed during query network creation
+///
+///        Format of modelIRData (defined in L0 adaptor):
+///        1. API version : vcl_version_info_t
+///        2. Num of data elements (now only xml + weights = 2) : uint32_t
+///        3. Size of data 1 (xml) : uint64_t
+///        4. Data 1 : $2 bytes
+///        5. Size of data 2 (weights) : uint64_t
+///        6. Data 2 : $4 bytes
+typedef struct __vcl_query_desc_t {
+    const uint8_t* modelIRData;
+    uint64_t modelIRSize;  ///< Size of modelIRData
+    const char* options;   ///< Compiler config options
+    uint64_t optionsSize;  ///< Size of options
+} vcl_query_desc_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Defines input that is required to create profiling handler
+typedef struct __vcl_profiling_input_t {
+    const uint8_t* blobData;  ///< Pointer to the buffer with the blob
+    uint64_t blobSize;        ///< Size of the blob in bytes
+    const uint8_t* profData;  ///< Pointer to the raw profiling output
+    uint64_t profSize;        ///< Size of the raw profiling output
+} vcl_profiling_input_t, *p_vcl_profiling_input_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Decoded profiling output
+typedef struct __vcl_profiling_output_t {
+    const uint8_t* data;  ///< Either a pointer to raw data or pointer to the array of structures
+    uint64_t size;        ///< Size of the buffer in bytes
+} vcl_profiling_output_t, *p_vcl_profiling_output_t;
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Return VCL API version to caller, shall never change this interface to support backward compatibility check
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetVersion(vcl_version_info_t* compilerVersion,
+                                                     vcl_version_info_t* profilingVersion);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a compiler object and returns the compiler handle
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerCreate(vcl_compiler_desc_t* compilerDesc,
+                                                         vcl_device_desc_t* deviceDesc,
+                                                         vcl_compiler_handle_t* compiler,
+                                                         vcl_log_handle_t* logHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the compiler
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerDestroy(vcl_compiler_handle_t compiler);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieves the compiler properties, include the version and supported_opsets
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclCompilerGetProperties(vcl_compiler_handle_t compiler,
+                                                                vcl_compiler_properties_t* properties);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Create an querynetwork object and return the handle
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkCreate(vcl_compiler_handle_t compiler,
+                                                             vcl_query_desc_t desc,
+                                                             vcl_query_handle_t* query);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve result of query network
+/// @attention Should be called twice, first time to retrieve data size, second time to get data.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetwork(vcl_query_handle_t query, uint8_t* queryResult, uint64_t* size);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the queryNetwork and releases the cached query result
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclQueryNetworkDestroy(vcl_query_handle_t query);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates an executable object and returns the executable handle.
+/// Parse modelIRData in the executable descriptor to blob and store it in the executable.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableCreate(vcl_compiler_handle_t compiler,
+                                                           vcl_executable_desc_t desc,
+                                                           vcl_executable_handle_t* executable);
+
+DEPRECATED typedef struct __vcl_allocator_t {
+    uint8_t* (*allocate)(uint64_t);
+    void (*deallocate)(uint8_t*);
+} vcl_allocator_t;
+
+typedef struct __vcl_allocator2_t {
+    uint8_t* (*allocate)(struct __vcl_allocator2_t*, uint64_t);
+    void (*deallocate)(struct __vcl_allocator2_t*, uint8_t*);
+} vcl_allocator2_t;
+
+DEPRECATED VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate(vcl_compiler_handle_t compiler,
+                                                                               vcl_executable_desc_t desc,
+                                                                               const vcl_allocator_t* allocator,
+                                                                               uint8_t** blobBuffer,
+                                                                               uint64_t* blobSize);
+
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compiler_handle_t compiler,
+                                                                     vcl_executable_desc_t desc,
+                                                                     vcl_allocator2_t* allocator,
+                                                                     uint8_t** blobBuffer,
+                                                                     uint64_t* blobSize);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the executable and releases the cached blob.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief If blobBuffer is null, the function returns the size of the blob stored in the executable.
+/// Otherwise the function copies the executable cached blob to the blobBuffer provided by the caller.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableGetSerializableBlob(vcl_executable_handle_t executable,
+                                                                        uint8_t* blobBuffer,
+                                                                        uint64_t* blobSize);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Creates a buffer with decoded profiling info.
+/// This is the most computationally expensive profiling API.
+/// It does all memory allocations and postprocessing.
+/// @warning Caller must keep \b p_vcl_profiling_input_t::profData buffer alive until
+/// \b vclProfilingDestroy call if \b VCL_PROFILING_RAW request is expected.
+/// \b vclProfilingCreate function doesn't copy profiling output buffer but will
+/// return pointer to it as a response to \b VCL_PROFILING_RAW request.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingCreate(p_vcl_profiling_input_t profilingInput,
+                                                          vcl_profiling_handle_t* profilingHandle,
+                                                          vcl_log_handle_t* logHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Provides profiling information based on request argument.
+/// @warning For \b VCL_PROFILING_RAW request it returns a pointer to the buffer that was provided to
+/// \b vclProfilingCreate function call. This means that original buffer with profiling output must
+/// be alive till this call.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetDecodedProfilingBuffer(vcl_profiling_handle_t profilingHandle,
+                                                                    vcl_profiling_request_type_t requestType,
+                                                                    p_vcl_profiling_output_t profilingOutput);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Destroys the buffer with decoded profiling info.
+/// Now caller may safely dispose raw profiling output.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingDestroy(vcl_profiling_handle_t profilingHandle);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Get version of post-processing module
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclProfilingGetProperties(vcl_profiling_handle_t profilingHandle,
+                                                                 vcl_profiling_properties_t* properties);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieves error message from log handler.
+/// Handle is released automatically with related compiler or Profiler.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclLogHandleGetString(vcl_log_handle_t logHandle, size_t* logSize, char* log);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Retrieve the list of supported compiler options
+/// @attention Should be called twice, first time to retrieve data size, second time to get data.
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerSupportedOptions(vcl_compiler_handle_t compiler,
+                                                                      char* result,
+                                                                      uint64_t* size);
+
+///////////////////////////////////////////////////////////////////////////////
+/// @brief Verifies if a given config option (or option-value pair) is supported by the compiler
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclGetCompilerIsOptionSupported(vcl_compiler_handle_t compiler,
+                                                                       const char* option,
+                                                                       const char* value);
+
+#if defined(__cplusplus)
+}  // extern "C"
+#endif
+
+#endif  // VPUX_COMPILER_L0_H
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
index 0675d964565947..5bc7c236e45a10 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
@@ -11,6 +11,7 @@
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
 #include "openvino/runtime/so_ptr.hpp"
+#include "vcl_api.hpp"
 #include "ze_graph_ext_wrappers.hpp"
 
 namespace intel_npu {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
new file mode 100644
index 00000000000000..6251821b04403d
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
@@ -0,0 +1,118 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "intel_npu/icompiler.hpp"
+#include "npu_driver_compiler.h"
+#include "openvino/core/except.hpp"
+
+namespace intel_npu {
+
+// clang-format off
+#define vcl_symbols_list()                                  \
+    vcl_symbol_statement(vclGetVersion)                     \
+    vcl_symbol_statement(vclCompilerCreate)                 \
+    vcl_symbol_statement(vclCompilerDestroy)                \
+    vcl_symbol_statement(vclCompilerGetProperties)          \
+    vcl_symbol_statement(vclQueryNetworkCreate)             \
+    vcl_symbol_statement(vclQueryNetwork)                   \
+    vcl_symbol_statement(vclQueryNetworkDestroy)            \
+    vcl_symbol_statement(vclExecutableCreate)               \
+    vcl_symbol_statement(vclAllocatedExecutableCreate)      \
+    vcl_symbol_statement(vclExecutableDestroy)              \
+    vcl_symbol_statement(vclExecutableGetSerializableBlob)  \
+    vcl_symbol_statement(vclProfilingCreate)                \
+    vcl_symbol_statement(vclGetDecodedProfilingBuffer)      \
+    vcl_symbol_statement(vclProfilingDestroy)               \
+    vcl_symbol_statement(vclProfilingGetProperties)         \
+    vcl_symbol_statement(vclLogHandleGetString)
+
+
+//unsupported symbols with older ze_loader versions
+#define vcl_weak_symbols_list()                             \
+    vcl_symbol_statement(vclAllocatedExecutableCreate2)     \
+    vcl_symbol_statement(vclGetCompilerSupportedOptions)    \
+    vcl_symbol_statement(vclGetCompilerIsOptionSupported)
+// clang-format on
+
+class VCLApi {
+public:
+    VCLApi();
+    VCLApi(const VCLApi& other) = delete;
+    VCLApi(VCLApi&& other) = delete;
+    void operator=(const VCLApi&) = delete;
+    void operator=(VCLApi&&) = delete;
+
+    static const std::shared_ptr<VCLApi>& getInstance();
+    std::shared_ptr<void> getLibrary() const {
+        return lib;
+    }
+
+#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol;
+    vcl_symbols_list();
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+private:
+    std::shared_ptr<void> lib;
+    Logger _logger;
+};
+
+#define vcl_symbol_statement(vcl_symbol)                                                                            \
+    template <typename... Args>                                                                                     \
+    inline typename std::invoke_result<decltype(&::vcl_symbol), Args...>::type wrapped_##vcl_symbol(Args... args) { \
+        const auto& ptr = VCLApi::getInstance();                                                                    \
+        if (ptr->vcl_symbol == nullptr) {                                                                           \
+            OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol);                                                  \
+        }                                                                                                           \
+        return ptr->vcl_symbol(std::forward<Args>(args)...);                                                        \
+    }
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol;
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+class VCLCompilerImpl final : public intel_npu::ICompiler {
+public:
+    VCLCompilerImpl();
+    ~VCLCompilerImpl() override;
+
+    static std::shared_ptr<VCLCompilerImpl>& getInstance() {
+        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
+        return compiler;
+    }
+
+    NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
+
+    uint32_t get_version() const override;
+
+    std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
+                                                            const std::vector<uint8_t>& network,
+                                                            const intel_npu::Config& config) const final override;
+
+    bool get_supported_options(std::vector<char>& options) const;
+
+    bool is_option_supported(const std::string& option) const;
+
+private:
+    std::shared_ptr<VCLApi> _vclApi;
+    vcl_log_handle_t _logHandle = nullptr;
+    vcl_compiler_handle_t _compilerHandle = nullptr;
+    vcl_compiler_properties_t _compilerProperties;
+    vcl_version_info_t _vclVersion;
+    vcl_version_info_t _vclProfilingVersion;
+    Logger _logger;
+};
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
index 505c988e41151c..f647e349a6d01a 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
@@ -75,4 +75,7 @@ class ZeGraphExtWrappers {
     Logger _logger;
 };
 
+// Parse the result string of query from foramt <name_0><name_1><name_2> to unordered_set of string
+std::unordered_set<std::string> parseQueryResult(std::vector<char>& data);
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
index 9494f484bb7c72..c37071f10395d3 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
@@ -226,7 +226,6 @@ void Graph::initialize(const Config& config) {
 
     _zeGraphExt->initializeGraph(_graphDesc, _commandQueueGroupOrdinal);
     _logger.debug("Graph initialize finish");
-
     //  We are allowed to release the original blob because weights were loaded in NPU memory during
     //  _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
     //  releasing it here to avoid unnecessary memory usage.
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 6a636fa398bf19..2a841dd9d522be 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -23,14 +23,15 @@
 #include "openvino/util/shared_object.hpp"
 #include "weightless_graph.hpp"
 
-namespace {
 
+namespace {
+#ifndef VCL_FOR_COMPILER
 std::shared_ptr<void> load_library(const std::string& libpath) {
-#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+#    if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
     return ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str());
-#else
+#    else
     return ov::util::load_shared_object(libpath.c_str());
-#endif
+#    endif
 }
 
 std::shared_ptr<intel_npu::ICompiler> get_compiler(std::shared_ptr<void> so) {
@@ -51,7 +52,7 @@ ov::SoPtr<intel_npu::ICompiler> load_compiler(const std::string& libpath) {
 
     return ov::SoPtr<intel_npu::ICompiler>(compiler, compilerSO);
 }
-
+#endif
 ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
     auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data());
     auto impl = ov::get_tensor_impl(std::move(tensor));
@@ -63,6 +64,7 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
 
 }  // namespace
 
+
 namespace intel_npu {
 
 PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
@@ -70,11 +72,15 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
+#ifdef VCL_FOR_COMPILER
+    _logger.info("VCL driver compiler will be used.");
+    _compiler = ov::SoPtr<intel_npu::ICompiler>(VCLCompilerImpl::getInstance(), VCLApi::getInstance()->getLibrary());
+#else
     _logger.info("PLUGIN compiler will be used.");
     std::string baseName = "npu_mlir_compiler";
     auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
     _compiler = load_compiler(libPath);
-
+#endif
     if (_zeroInitStruct == nullptr) {
         return;
     }
@@ -101,26 +107,36 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
     ov::Tensor tensor = make_tensor_from_vector(networkDesc.compiledNetwork);
     GraphDescriptor graphDesc;
 
+    NetworkMetadata networkMeta;
     if (_zeGraphExt) {
         // Depending on the config, we may get an error when trying to get the graph handle from the compiled
         // network
         try {
-            graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+            graphDesc =
+                _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+#ifdef VCL_FOR_COMPILER
+            // For VCL, we need to get metadata from driver parser
+            networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
+            networkMeta.name = model->get_friendly_name();
+>>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl)
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
         }
     }
 
-    return std::make_shared<Graph>(
-        _zeGraphExt,
-        _zeroInitStruct,
-        graphDesc,
-        std::move(networkDesc.metadata),
-        std::move(tensor),
-        config,
-        /* persistentBlob = */ true,  // exporting the blob shall be available in such a scenario
-        _compiler);
+    return std::make_shared<Graph>(_zeGraphExt,
+                                   _zeroInitStruct,
+                                   graphDesc,
+#ifdef VCL_FOR_COMPILER
+                                   std::move(networkMeta),
+#else
+                                   std::move(networkDesc.metadata),
+#endif
+                                   std::move(tensor),
+                                   /* blobAllocatedByPlugin = */ false,
+                                   config,
+                                   _compiler);
 }
 
 std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<ov::Model>& model,
@@ -262,21 +278,37 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     const std::optional<std::shared_ptr<const ov::Model>>& model) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
 
-    _logger.debug("parse start");
+    ze_graph_handle_t graphHandle = nullptr;
+    NetworkMetadata networkMeta;
     std::vector<uint8_t> network(mainBlob.get_byte_size());
+
+#ifdef VCL_FOR_COMPILER
+    _logger.debug("parse metadata from driver for vcl compiler");
+    if (_zeGraphExt) {
+        _logger.debug("parse start for vcl compiler");
+        graphHandle = _zeGraphExt->getGraphHandle(*reinterpret_cast<const uint8_t*>(mainBlob.data()), mainBlob.get_byte_size());
+        networkMeta = _zeGraphExt->getNetworkMeta(graphHandle);
+    }
+    _logger.debug("parse end for vcl compiler");
+#else
+    _logger.debug("parse start");
     network.assign(reinterpret_cast<const uint8_t*>(mainBlob.data()),
                    reinterpret_cast<const uint8_t*>(mainBlob.data()) + mainBlob.get_byte_size());
     auto networkMeta = _compiler->parse(network, config);
     network.clear();
     network.shrink_to_fit();
 
+<<<<<<< HEAD
     GraphDescriptor mainGraphDesc;
 
+=======
+>>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl)
     if (_zeGraphExt) {
         mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
     }
 
     _logger.debug("main schedule parse end");
+#endif
 
     // exporting the blob when we get it from cache or ov::hint::compiled_blob property
     // shall be available
@@ -343,15 +375,60 @@ uint32_t PluginCompilerAdapter::get_version() const {
 }
 
 std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
+#ifdef VCL_FOR_COMPILER
+    // For VCL, we can return the supported options from compiler
+    VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
+    if (vclCompiler == nullptr) {
+        _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options.");
+        return {};
+    }
+    std::vector<char> options;
+    if (!vclCompiler->get_supported_options(options)) {
+        _logger.warning("VCLCompilerImpl get_supported_options failed. Returning empty supported options.");
+        return {};
+    }
+
+    if (options.empty()) {
+        _logger.warning("get_supported_options returned empty options.");
+        return {};
+    }
+
+    std::string compilerOptionsStr(options.data(), options.size());
+    _logger.debug("VCLCompilerImpl return supported_options: %s", compilerOptionsStr.c_str());
+    // vectorize string
+    std::istringstream suppstream(compilerOptionsStr);
+    std::vector<std::string> compilerOpts = {};
+    std::string option;
+    while (suppstream >> option) {
+        compilerOpts.push_back(option);
+    }
+    return compilerOpts;
+#else
     // PluginCompiler has all the same options as plugin
     // Returing empty string to let the plugin fallback to legacy registration
     return {};
+#endif
 }
 
 bool PluginCompilerAdapter::is_option_supported(std::string optname) const {
+#ifdef VCL_FOR_COMPILER
+    VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
+    if (vclCompiler == nullptr) {
+        _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check.");
+        return false;
+    }
+    if (vclCompiler->is_option_supported(optname)) {
+        _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str());
+        return true;
+    } else {
+        _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str());
+        return false;
+    }
+#else
     // This functions has no utility in PluginCompiler
     // returning false for any request to avoid the option of spaming the plugin
     return false;
+#endif
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
new file mode 100644
index 00000000000000..60b3afb7628814
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -0,0 +1,439 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "vcl_api.hpp"
+
+#include "intel_npu/profiling.hpp"
+#include "ir_serializer.hpp"
+#include "openvino/runtime/make_tensor.hpp"
+#include "openvino/util/file_util.hpp"
+#include "openvino/util/shared_object.hpp"
+
+namespace intel_npu {
+
+static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) {
+    Logger _logger("VCLAPI", Logger::global().level());
+    _logger.debug("getLatestVCLLog start");
+
+    vcl_version_info_t compilerVersion;
+    vcl_version_info_t profilingVersion;
+    vcl_result_t ret = vclGetVersion(&compilerVersion, &profilingVersion);
+
+    if (ret != VCL_RESULT_SUCCESS || compilerVersion.major < 3) {
+        _logger.warning("Failed to get VCL version: 0x%x", ret);
+        return "Can not get VCL log, VCL version is too old!";
+    }
+
+    // Get log size
+    size_t size = 0;
+    // Null graph handle to get error log
+    ret = vclLogHandleGetString(logHandle, &size, nullptr);
+    if (VCL_RESULT_SUCCESS != ret) {
+        return "Failed to get size of latest VCL log";
+    }
+
+    if (size <= 0) {
+        return "No error stored in VCL when error detected";
+    }
+
+    // Get log content
+    std::string logContent{};
+    logContent.resize(size);
+    ret = vclLogHandleGetString(logHandle, &size, const_cast<char*>(logContent.data()));
+    if (VCL_RESULT_SUCCESS != ret) {
+        return "Size of latest error log > 0, failed to get content";
+    }
+    _logger.debug("getLatestBuildError end");
+    return logContent;
+}
+
+#define THROW_ON_FAIL_FOR_VCL(step, ret, logHandle)     \
+    {                                                   \
+        vcl_result_t result = ret;                      \
+        if (result != VCL_RESULT_SUCCESS) {             \
+            OPENVINO_THROW("Failed to call VCL API : ", \
+                           step,                        \
+                           " result: 0x",               \
+                           std::hex,                    \
+                           result,                      \
+                           " - ",                       \
+                           getLatestVCLLog(logHandle)); \
+        }                                               \
+    }
+
+VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) {
+    const std::string baseName = "npu_vcl_compiler";
+    try {
+        auto libpath = ov::util::make_plugin_library_name({}, baseName);
+        _logger.debug("Try to load npu_vcl_compiler");
+
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+        this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str());
+#else
+        this->lib = ov::util::load_shared_object(libpath.c_str());
+#endif
+    } catch (const std::runtime_error& error) {
+        _logger.debug("Failed to load npu_vcl_compiler");
+        OPENVINO_THROW(error.what());
+    }
+
+    try {
+#define vcl_symbol_statement(vcl_symbol) \
+    this->vcl_symbol = reinterpret_cast<decltype(&::vcl_symbol)>(ov::util::get_symbol(lib, #vcl_symbol));
+        vcl_symbols_list();
+#undef vcl_symbol_statement
+    } catch (const std::runtime_error& error) {
+        _logger.debug("Failed to get formal symbols from npu_vcl_compiler");
+        OPENVINO_THROW(error.what());
+    }
+
+#define vcl_symbol_statement(vcl_symbol)                                                                      \
+    try {                                                                                                     \
+        this->vcl_symbol = reinterpret_cast<decltype(&::vcl_symbol)>(ov::util::get_symbol(lib, #vcl_symbol)); \
+    } catch (const std::runtime_error&) {                                                                     \
+        _logger.debug("Failed to get %s from npu_vcl_compiler", #vcl_symbol);                                 \
+        this->vcl_symbol = nullptr;                                                                           \
+    }
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+#define vcl_symbol_statement(vcl_symbol) vcl_symbol = this->vcl_symbol;
+    vcl_symbols_list();
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+}
+
+const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
+    static std::shared_ptr<VCLApi> instance = std::make_shared<VCLApi>();
+    return instance;
+}
+
+VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", ov::log::Level::DEBUG) {
+    _logger.debug("VCLCompilerImpl constructor start");
+    // Initialize the VCL API
+    THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr);
+
+    _logger.info("Plugin VCL API Version: %d.%d", VCL_COMPILER_VERSION_MAJOR, VCL_COMPILER_VERSION_MINOR);
+    _logger.info("Plugin VCL Profiling API Version: %d.%d", VCL_PROFILING_VERSION_MAJOR, VCL_PROFILING_VERSION_MINOR);
+    _logger.info("Lib VCL Compiler Version: %d.%d", _vclVersion.major, _vclVersion.minor);
+    _logger.info("Lib VCL Profiling Version: %d.%d", _vclProfilingVersion.major, _vclProfilingVersion.minor);
+    _logger.info("Use Lib VCL version to create compiler");
+
+    vcl_compiler_desc_t compilerDesc;
+    compilerDesc.version = _vclVersion;
+    compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast<int>(Logger::global().level()) - 1);
+    vcl_device_desc_t device_desc;
+    device_desc.size = sizeof(vcl_device_desc_t);
+    device_desc.deviceID = 0x643E;  // Value from intel_npu/src/backend/src/zero_device.cpp
+    device_desc.revision = -1;      // -1 to skip the config
+    device_desc.tileCount = 5;      // 1 as init value
+
+    THROW_ON_FAIL_FOR_VCL("vclCompilerCreate",
+                          vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle),
+                          nullptr);
+
+    THROW_ON_FAIL_FOR_VCL("vclCompilerGetProperties",
+                          vclCompilerGetProperties(_compilerHandle, &_compilerProperties),
+                          _logHandle);
+
+    _logger.info("VCL Compiler created successfully");
+    _logger.info("VCL Compiler Properties: ID: %s, Version: %d.%d, Supported Opsets: %u",
+                 _compilerProperties.id,
+                 _compilerProperties.version.major,
+                 _compilerProperties.version.minor,
+                 _compilerProperties.supportedOpsets);
+}
+
+VCLCompilerImpl::~VCLCompilerImpl() {
+    if (_compilerHandle) {
+        THROW_ON_FAIL_FOR_VCL("vclCompilerDestroy", vclCompilerDestroy(_compilerHandle), _logHandle);
+    }
+    if (_logHandle) {
+        _logHandle = nullptr;  // Log handle is released automatically with the compiler
+    }
+    _logger.info("VCL Compiler destroyed successfully");
+}
+
+struct vcl_allocator_vector : vcl_allocator2_t {
+    vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
+
+    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
+        vecAllocator->m_vec.resize(size);
+        return vecAllocator->m_vec.data();
+    }
+
+    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        vcl_allocator_vector* vecAllocator = static_cast<vcl_allocator_vector*>(allocator);
+        vecAllocator->m_vec.clear();
+        vecAllocator->m_vec.shrink_to_fit();
+    }
+
+    std::vector<uint8_t> m_vec;
+};
+
+struct vcl_allocator_malloc {
+    static uint8_t* vcl_allocate(uint64_t size) {
+        return reinterpret_cast<uint8_t*>(malloc(size));
+    }
+
+    static void vcl_deallocate(uint8_t* ptr) {
+        free(ptr);
+    }
+};
+
+NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+    _logger.debug("compile start");
+
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+    auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion);
+
+    std::string buildFlags;
+    const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
+
+    _logger.debug("create build flags");
+    buildFlags += intel_npu::driver_compiler_utils::serializeIOInfo(model, useIndices);
+    buildFlags += " ";
+    buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion);
+    _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
+    vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
+                                     serializedIR.first,
+                                     buildFlags.c_str(),
+                                     buildFlags.size()};
+    _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
+    if (_vclVersion.major >= 7 && _vclVersion.minor >= 4) {
+        // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
+        _logger.debug("Using vclAllocatedExecutableCreate2 for VCL 7.4+");
+        vcl_allocator_vector allocator;
+        uint8_t* blob = nullptr;
+        size_t size = 0;
+
+        THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate2",
+                              vclAllocatedExecutableCreate2(_compilerHandle, exeDesc, &allocator, &blob, &size),
+                              _logHandle);
+        if (size == 0 || blob == nullptr) {
+            OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
+        }
+
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+
+        _logger.debug("compile end, blob size:%d", allocator.m_vec.size());
+        return NetworkDescription(std::move(allocator.m_vec), std::move(metadata));
+    } else if (_vclVersion.major >= 6 && _vclVersion.minor >= 1) {
+        // For older versions, we use vclAllocatedExecutableCreate
+        _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4");
+
+        vcl_allocator_t allocator;
+        allocator.allocate = vcl_allocator_malloc::vcl_allocate;
+        allocator.deallocate = vcl_allocator_malloc::vcl_deallocate;
+        uint8_t* blob = nullptr;
+        size_t size = 0;
+        THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreate",
+                              vclAllocatedExecutableCreate(_compilerHandle, exeDesc, &allocator, &blob, &size),
+                              _logHandle);
+        if (size == 0 || blob == nullptr) {
+            OPENVINO_THROW("Failed to create VCL executable, size is zero or blob is null");
+        }
+
+        std::vector<uint8_t> compiledNetwork(blob, blob + size);
+        allocator.deallocate(blob);
+
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+
+        _logger.debug("compile end, blob size:%d", compiledNetwork.size());
+        return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
+    } else {
+        // For versions before 6.1, we use vclExecutableCreate
+        _logger.debug("Using vclExecutableCreate for VCL < 6.1");
+        vcl_executable_handle_t exeHandle = nullptr;
+        THROW_ON_FAIL_FOR_VCL("vclExecutableCreate",
+                              vclExecutableCreate(_compilerHandle, exeDesc, &exeHandle),
+                              _logHandle);
+
+        size_t size = 0;
+        THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob",
+                              vclExecutableGetSerializableBlob(exeHandle, nullptr, &size),
+                              _logHandle);
+        if (size == 0) {
+            OPENVINO_THROW("Failed to get VCL executable blob size, size is zero");
+        }
+        std::vector<uint8_t> compiledNetwork(size);
+        THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob",
+                              vclExecutableGetSerializableBlob(exeHandle, compiledNetwork.data(), &size),
+                              _logHandle);
+
+        THROW_ON_FAIL_FOR_VCL("vclExecutableDestroy", vclExecutableDestroy(exeHandle), _logHandle);
+
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+
+        _logger.debug("compile end, blob size:%d", compiledNetwork.size());
+        return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
+    }
+}
+
+intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
+    _logger.debug("parse start");
+    // VCL does not support parse, return empty metadata
+    return intel_npu::NetworkMetadata();
+}
+
+std::vector<ov::ProfilingInfo> VCLCompilerImpl::process_profiling_output(const std::vector<uint8_t>& profData,
+                                                                         const std::vector<uint8_t>& network,
+                                                                         const intel_npu::Config& config) const {
+    _logger.debug("process_profiling_output start");
+
+    vcl_profiling_handle_t profilingHandle;
+    vcl_profiling_input_t profilingInput = {network.data(), network.size(), profData.data(), profData.size()};
+    vcl_log_handle_t logHandle;
+    THROW_ON_FAIL_FOR_VCL("vclProfilingCreate",
+                          vclProfilingCreate(&profilingInput, &profilingHandle, &logHandle),
+                          nullptr);
+
+    vcl_profiling_properties_t profProperties;
+    THROW_ON_FAIL_FOR_VCL("vclProfilingGetProperties",
+                          vclProfilingGetProperties(profilingHandle, &profProperties),
+                          logHandle);
+
+    _logger.info("VCL Profiling Properties: Version: %d.%d",
+                 profProperties.version.major,
+                 profProperties.version.minor);
+
+    // We only use layer level info
+    vcl_profiling_request_type_t request = VCL_PROFILING_LAYER_LEVEL;
+
+    vcl_profiling_output_t profOutput;
+    profOutput.data = NULL;
+    THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer",
+                          vclGetDecodedProfilingBuffer(profilingHandle, request, &profOutput),
+                          logHandle);
+    if (profOutput.data == NULL) {
+        OPENVINO_THROW("Failed to get VCL profiling output");
+    }
+
+    std::vector<ze_profiling_layer_info> layerInfo(profOutput.size / sizeof(ze_profiling_layer_info));
+    if (profOutput.size > 0) {
+        _logger.debug("VCL profiling output size: %d", profOutput.size);
+        std::memcpy(layerInfo.data(), profOutput.data, profOutput.size);
+    }
+
+    // profOutput.data = NULL;
+    // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle,
+    // VCL_PROFILING_TASK_LEVEL, &profOutput), logHandle); if (profOutput.data == NULL) {
+    //     OPENVINO_THROW("Failed to get VCL profiling task level output");
+    // }
+
+    // profOutput.data = NULL;
+    // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle,
+    // VCL_PROFILING_RAW, &profOutput),logHandle); if (profOutput.data == NULL) {
+    //     OPENVINO_THROW("Failed to get VCL profiling raw output");
+    // }
+
+    THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle);
+
+    return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo);  // Return processed profiling info
+}
+
+uint32_t VCLCompilerImpl::get_version() const {
+    return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor);
+}
+
+ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+    _logger.debug("query start");
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+    auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion);
+
+    std::string buildFlags;
+    buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion);
+    _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
+
+    vcl_query_handle_t queryHandle;
+    vcl_query_desc_t queryDesc = {serializedIR.second.get(), serializedIR.first, buildFlags.c_str(), buildFlags.size()};
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetworkCreate",
+                          vclQueryNetworkCreate(_compilerHandle, queryDesc, &queryHandle),
+                          _logHandle);
+
+    uint64_t size = 0;
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetwork", vclQueryNetwork(queryHandle, nullptr, &size), _logHandle);
+
+    std::vector<char> supportedLayers(size);
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetwork",
+                          vclQueryNetwork(queryHandle, reinterpret_cast<uint8_t*>(supportedLayers.data()), &size),
+                          _logHandle);
+
+    THROW_ON_FAIL_FOR_VCL("vclQueryNetworkDestroy", vclQueryNetworkDestroy(queryHandle), _logHandle);
+
+    const std::string deviceName = "NPU";
+    ov::SupportedOpsMap result;
+    const auto parsedSupportedLayers = parseQueryResult(supportedLayers);
+    for (auto&& layerName : parsedSupportedLayers) {
+        result.emplace(layerName, deviceName);
+    }
+    _logger.info("For given model, there are %d supported layers", parsedSupportedLayers.size());
+
+    return result;
+}
+
+bool VCLCompilerImpl::get_supported_options(std::vector<char>& options) const {
+    _logger.debug("get_supported_options start");
+    // 1. get size of compiler supported options list
+    size_t str_size = 0;
+    try {
+        THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions",
+                              vclGetCompilerSupportedOptions(_compilerHandle, nullptr, &str_size),
+                              _logHandle);
+
+        if (str_size > 0) {
+            _logger.debug("obtain list");
+            // 2. allocate buffer for it
+            options.resize(str_size);
+            // 3. populate char list
+            THROW_ON_FAIL_FOR_VCL("vclGetCompilerSupportedOptions",
+                                  vclGetCompilerSupportedOptions(_compilerHandle, options.data(), &str_size),
+                                  _logHandle);
+
+            _logger.debug("Option list size %d, got option list", str_size);
+            return true;
+        } else {
+            _logger.debug("Option list size 0 - skipping!");
+        }
+    } catch (const std::exception& e) {
+        // The API is only supported in new version, just add log here
+        _logger.debug("Exception in get_supported_options: %s", e.what());
+    }
+    _logger.debug("get_supported_options end, no options found");
+    return false;
+}
+
+bool VCLCompilerImpl::is_option_supported(const std::string& option) const {
+    try {
+        const char* optname_ch = option.c_str();
+        _logger.debug("is_option_supported start for option: %s", optname_ch);
+        THROW_ON_FAIL_FOR_VCL("vclGetCompilerIsOptionSupported",
+                              vclGetCompilerIsOptionSupported(_compilerHandle, optname_ch, nullptr),
+                              _logHandle);
+        return true;
+    } catch (const std::exception& e) {
+        // The API is only supported in new version, just add log here
+        _logger.debug("Exception in is_option_supported: %s", e.what());
+    }
+    _logger.debug("option: %s is not supported", option.c_str());
+    return false;
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
index a37ebff363b00d..ccb00d971b8471 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/ze_graph_ext_wrappers.cpp
@@ -250,7 +250,7 @@ void ZeGraphExtWrappers::initializeGraphThroughCommandList(ze_graph_handle_t gra
 }
 
 // Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
-static std::unordered_set<std::string> parseQueryResult(std::vector<char>& data) {
+std::unordered_set<std::string> parseQueryResult(std::vector<char>& data) {
     std::string dataString(data.begin(), data.end());
     std::unordered_set<std::string> result;
     size_t i = 0, start = 0;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index b983a7b32ae2e2..0691e0f986d406 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -603,6 +603,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
 
+#ifndef VCL_FOR_COMPILER
     const auto set_cache_dir = localConfig.get<CACHE_DIR>();
     if (!set_cache_dir.empty()) {
         const auto compilerType = localConfig.get<COMPILER_TYPE>();
@@ -610,6 +611,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
             OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
         }
     }
+#endif
 
     const auto platform =
         utils::getCompilationPlatform(localConfig.get<PLATFORM>(),

From 3aad6d869f77334ce1fe7d452389de479171d11f Mon Sep 17 00:00:00 2001
From: Xin Wang <xin1.wang@intel.com>
Date: Sun, 20 Jul 2025 10:40:30 +0800
Subject: [PATCH 02/25] Use vclAllocateExecutionCreate3 to get metadata

Signed-off-by: Xin Wang <xin1.wang@intel.com>

remove vclAllocatedExecutableCreate3
---
 .../include/npu_driver_compiler.h             |  2 +-
 .../src/plugin_compiler_adapter.cpp           | 31 ++++++++-----------
 .../src/compiler_adapter/src/vcl_api.cpp      |  2 +-
 3 files changed, 15 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
index a8c38506fc844c..c945a26565ebef 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
@@ -23,7 +23,7 @@ extern "C" {
 #endif
 
 #define VCL_COMPILER_VERSION_MAJOR  7
-#define VCL_COMPILER_VERSION_MINOR  4
+#define VCL_COMPILER_VERSION_MINOR  5
 #define VCL_PROFILING_VERSION_MAJOR 2
 #define VCL_PROFILING_VERSION_MINOR 0
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 2a841dd9d522be..a21c44f10d1592 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -23,7 +23,6 @@
 #include "openvino/util/shared_object.hpp"
 #include "weightless_graph.hpp"
 
-
 namespace {
 #ifndef VCL_FOR_COMPILER
 std::shared_ptr<void> load_library(const std::string& libpath) {
@@ -64,7 +63,6 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
 
 }  // namespace
 
-
 namespace intel_npu {
 
 PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
@@ -107,18 +105,20 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
     ov::Tensor tensor = make_tensor_from_vector(networkDesc.compiledNetwork);
     GraphDescriptor graphDesc;
 
-    NetworkMetadata networkMeta;
+    NetworkMetadata networkMeta = std::move(networkDesc.metadata);
     if (_zeGraphExt) {
         // Depending on the config, we may get an error when trying to get the graph handle from the compiled
         // network
         try {
-            graphDesc =
-                _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+            graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
 #ifdef VCL_FOR_COMPILER
-            // For VCL, we need to get metadata from driver parser
-            networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
-            networkMeta.name = model->get_friendly_name();
->>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl)
+            if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) {
+                // If the metadata is empty, we can try to get it from the driver parser
+                _logger.info("Metadata is empty, trying to get it from the driver parser");
+                networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
+                networkMeta.name = model->get_friendly_name();
+            }
+#endif
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
@@ -134,8 +134,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
                                    std::move(networkDesc.metadata),
 #endif
                                    std::move(tensor),
+                                    config,
                                    /* blobAllocatedByPlugin = */ false,
-                                   config,
                                    _compiler);
 }
 
@@ -278,16 +278,16 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     const std::optional<std::shared_ptr<const ov::Model>>& model) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
 
-    ze_graph_handle_t graphHandle = nullptr;
     NetworkMetadata networkMeta;
     std::vector<uint8_t> network(mainBlob.get_byte_size());
+    GraphDescriptor mainGraphDesc;
 
 #ifdef VCL_FOR_COMPILER
     _logger.debug("parse metadata from driver for vcl compiler");
     if (_zeGraphExt) {
         _logger.debug("parse start for vcl compiler");
-        graphHandle = _zeGraphExt->getGraphHandle(*reinterpret_cast<const uint8_t*>(mainBlob.data()), mainBlob.get_byte_size());
-        networkMeta = _zeGraphExt->getNetworkMeta(graphHandle);
+        mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
+        networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
     }
     _logger.debug("parse end for vcl compiler");
 #else
@@ -298,11 +298,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     network.clear();
     network.shrink_to_fit();
 
-<<<<<<< HEAD
-    GraphDescriptor mainGraphDesc;
-
-=======
->>>>>>> e20458aedf (Add VCLApi and VCLCompilerImpl)
     if (_zeGraphExt) {
         mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
     }
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 60b3afb7628814..c1373974b9c88c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -210,7 +210,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
     if (_vclVersion.major >= 7 && _vclVersion.minor >= 4) {
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
-        _logger.debug("Using vclAllocatedExecutableCreate2 for VCL 7.4+");
+        _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5");
         vcl_allocator_vector allocator;
         uint8_t* blob = nullptr;
         size_t size = 0;

From cc6268681b662353c288b18e26d2921b385b71e2 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Sun, 28 Sep 2025 15:45:31 +0800
Subject: [PATCH 03/25] update vcl version check, default compilertype and vcl
 download link

---
 .../cmake/download_compiler_libs.cmake        | 33 ++++---
 src/plugins/intel_npu/cmake/features.cmake    |  3 +-
 .../include/compiler_adapter_factory.hpp      |  1 -
 .../src/compiler_adapter/src/graph.cpp        |  1 +
 .../src/plugin_compiler_adapter.cpp           |  2 +-
 .../src/compiler_adapter/src/vcl_api.cpp      | 89 +++++++++++++++++--
 .../intel_npu/src/plugin/src/plugin.cpp       | 11 +++
 7 files changed, 115 insertions(+), 25 deletions(-)

diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
index 95ced5f78bdb59..3455677525eecf 100644
--- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
+++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
@@ -3,7 +3,7 @@
 #
 
 # Function to download and extract files
-function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy)
+function(download_and_extract url zip_file extracted_dir modify_proxy)
     # Check if the prebuilt VCL compiler libraries not exist
     if(NOT EXISTS "${extracted_dir}")
         if(modify_proxy STREQUAL "MODIFY")
@@ -39,6 +39,12 @@ function(download_and_extract url dest_dir zip_file extracted_dir modify_proxy)
         # Determine extraction method based on file extension
         if("${zip_file}" MATCHES "\\.zip$")
             file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}")
+        elseif("${zip_file}" MATCHES "\\.tar.gz$")
+            if(NOT EXISTS "${extracted_dir}")
+                file(MAKE_DIRECTORY "${extracted_dir}")
+                message(STATUS "Directory ${extracted_dir} created to unzip.")
+            endif()
+            execute_process(COMMAND tar -xzf "${zip_file}" -C "${extracted_dir}")
         elseif("${zip_file}" MATCHES "\\.deb$")
             execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}")
         elseif("${zip_file}" MATCHES "\\.exe$")
@@ -93,13 +99,12 @@ if(ENABLE_VCL_FOR_COMPILER)
         message(STATUS "Downloading prebuilt NPU VCL compiler libraries")
         if(WIN32)
             set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win")
-            set(VCL_COMPILER_LIBS_URL "https://downloadmirror.intel.com/854488/npu_win_32.0.100.4023.zip")
-            set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023.zip")
-            set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/npu_win_32.0.100.4023")
-
-            download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY")
-            set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/npu_win_32.0.100.4023/drivers/x64/")
+            set(VCL_COMPILER_LIBS_URL "https://github.com/openvinotoolkit/npu_compiler/releases/download/npu_ud_2025_38_rc4/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip")
+            set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip")
+            set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218")
 
+            download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY")
+            set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/cid/lib")
 
             configure_file(
                 ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll
@@ -109,7 +114,7 @@ if(ENABLE_VCL_FOR_COMPILER)
             set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll")
             file(COPY "${VCL_COMPILER_LIB}"
                 DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}")
-            message(STATUS "Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows")
+            message(STATUS "Not Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows")
         else()
             # Check if the operating system is Linux and not macOS
             if(UNIX AND NOT APPLE)
@@ -125,7 +130,7 @@ if(ENABLE_VCL_FOR_COMPILER)
                         set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb")
                         set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04")
 
-                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
+                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
 
                         set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
                         configure_file(
@@ -136,7 +141,7 @@ if(ENABLE_VCL_FOR_COMPILER)
                         set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
                         file(COPY "${VCL_COMPILER_LIB}"
                             DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
-                        message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04")
+                        message(STATUS "Not Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04")
                     elseif(OS_VERSION STREQUAL "24.04")
                         message(STATUS "This is Ubuntu 24.04")
                         # Ubuntu 24.04-specific settings or actions
@@ -145,7 +150,7 @@ if(ENABLE_VCL_FOR_COMPILER)
                         set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb")
                         set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04")
 
-                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DIR}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
+                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
 
                         set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
                         configure_file(
@@ -154,8 +159,8 @@ if(ENABLE_VCL_FOR_COMPILER)
                             COPYONLY
                         )
                         set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
-                        file(COPY "${VCL_COMPILER_LIB}"
-                            DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
+                        # file(COPY "${VCL_COMPILER_LIB}"
+                        #     DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
                         message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04")
                     else()
                         message(STATUS "This is another version of Ubuntu: ${OS_VERSION}")
@@ -171,4 +176,4 @@ if(ENABLE_VCL_FOR_COMPILER)
 
     install(FILES ${VCL_COMPILER_LIB}
         DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT})
-endif()
+endif()
\ No newline at end of file
diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 1f462c0e461806..5f763da69188ac 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -12,5 +12,4 @@ endif()
 
 ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
 
-ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON)
-ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF)
+ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" OFF)
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
index 32e1fb384668b2..81e075adf65be3 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
@@ -27,7 +27,6 @@ class CompilerAdapterFactory final {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
                 OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend");
             }
-
             return std::make_unique<DriverCompilerAdapter>(engineBackend->getInitStructs());
         }
         default:
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
index c37071f10395d3..9494f484bb7c72 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
@@ -226,6 +226,7 @@ void Graph::initialize(const Config& config) {
 
     _zeGraphExt->initializeGraph(_graphDesc, _commandQueueGroupOrdinal);
     _logger.debug("Graph initialize finish");
+
     //  We are allowed to release the original blob because weights were loaded in NPU memory during
     //  _zeGraphExt->initializeGraph(). The driver will not access the original blob from this moment on, so we are
     //  releasing it here to avoid unnecessary memory usage.
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index a21c44f10d1592..1f2b1cfc83a380 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -134,7 +134,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
                                    std::move(networkDesc.metadata),
 #endif
                                    std::move(tensor),
-                                    config,
+                                   config,
                                    /* blobAllocatedByPlugin = */ false,
                                    _compiler);
 }
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index c1373974b9c88c..13dfe9bc598d28 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -9,6 +9,7 @@
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
+#include "ze_graph_ext_wrappers.hpp"
 
 namespace intel_npu {
 
@@ -119,12 +120,24 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm
     _logger.info("Lib VCL Compiler Version: %d.%d", _vclVersion.major, _vclVersion.minor);
     _logger.info("Lib VCL Profiling Version: %d.%d", _vclProfilingVersion.major, _vclProfilingVersion.minor);
     _logger.info("Use Lib VCL version to create compiler");
+    if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major ||
+        (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) {
+        _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL %d.%d, "
+                        "\n      but loaded VCL is %d.%d.\n"
+                        "Will downwise to use the lastest plugin vcl compiler!!!",
+                        VCL_COMPILER_VERSION_MAJOR,
+                        VCL_COMPILER_VERSION_MINOR,
+                        _vclVersion.major,
+                        _vclVersion.minor);
+    }
 
+    _logger.info("Use Lib VCL version to create compiler");
     vcl_compiler_desc_t compilerDesc;
     compilerDesc.version = _vclVersion;
     compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast<int>(Logger::global().level()) - 1);
     vcl_device_desc_t device_desc;
     device_desc.size = sizeof(vcl_device_desc_t);
+
     device_desc.deviceID = 0x643E;  // Value from intel_npu/src/backend/src/zero_device.cpp
     device_desc.revision = -1;      // -1 to skip the config
     device_desc.tileCount = 5;      // 1 as init value
@@ -183,6 +196,17 @@ struct vcl_allocator_malloc {
     }
 };
 
+std::string supportVclCompiler(int major, int minor) {
+    if (major >= 7 && minor >= 4) {
+        return "vclAllocatedExecutableCreate2";
+    } else if (major >= 6 && minor >= 1) {
+        return "vclAllocatedExecutableCreate";
+    } else {
+        return "vclExecutableCreate";
+    }
+    return "unsupported VCL version";
+}
+
 NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
     _logger.debug("compile start");
 
@@ -193,22 +217,52 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     ze_graph_compiler_version_info_t compilerVersion;
     compilerVersion.major = _compilerProperties.version.major;
     compilerVersion.minor = _compilerProperties.version.minor;
-    auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion);
+    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
+    auto serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
     _logger.debug("create build flags");
-    buildFlags += intel_npu::driver_compiler_utils::serializeIOInfo(model, useIndices);
+    buildFlags += irSerializer.serializeIOInfo(model, useIndices);
     buildFlags += " ";
-    buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion);
+    buildFlags += irSerializer.serializeConfig(config, compilerVersion);
     _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
+
     vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
                                      serializedIR.first,
                                      buildFlags.c_str(),
                                      buildFlags.size()};
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
-    if (_vclVersion.major >= 7 && _vclVersion.minor >= 4) {
+
+    /// Check the linked vcl version whether supported in plugin
+    int usedMajor = 0;
+    bool isDowngrade = false;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) < _vclVersion.major) {
+        usedMajor = VCL_COMPILER_VERSION_MAJOR;
+        isDowngrade = true;
+    }
+    int usedMinor = isDowngrade ? VCL_COMPILER_VERSION_MINOR : _vclVersion.minor;
+
+    _logger.info("[Debug] Used VCL API Version: %d.%d", usedMajor, usedMinor);
+    _logger.info("[Debug] compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
+    _logger.info("[Debug] embedding compiler vcl version: %d.%d",
+                 VCL_COMPILER_VERSION_MAJOR,
+                 VCL_COMPILER_VERSION_MINOR);
+
+    if (usedMajor >= 7 && usedMinor >= 4) {
+        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
+            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
+                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
+                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            VCL_COMPILER_VERSION_MAJOR,
+                            VCL_COMPILER_VERSION_MINOR,
+                            _vclVersion.major,
+                            _vclVersion.minor,
+                            supportVclCompiler(usedMajor, usedMinor));
+        }
+        // check the vcl version whether support the lastest compile api
+        // support the lastest vcl api
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
         _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5");
         vcl_allocator_vector allocator;
@@ -227,7 +281,17 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
 
         _logger.debug("compile end, blob size:%d", allocator.m_vec.size());
         return NetworkDescription(std::move(allocator.m_vec), std::move(metadata));
-    } else if (_vclVersion.major >= 6 && _vclVersion.minor >= 1) {
+    } else if (usedMajor >= 6 && usedMinor >= 1) {
+        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
+            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
+                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
+                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            VCL_COMPILER_VERSION_MAJOR,
+                            VCL_COMPILER_VERSION_MINOR,
+                            _vclVersion.major,
+                            _vclVersion.minor,
+                            supportVclCompiler(usedMajor, usedMinor));
+        }
         // For older versions, we use vclAllocatedExecutableCreate
         _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4");
 
@@ -252,6 +316,16 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         _logger.debug("compile end, blob size:%d", compiledNetwork.size());
         return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
     } else {
+        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
+            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
+                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
+                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            VCL_COMPILER_VERSION_MAJOR,
+                            VCL_COMPILER_VERSION_MINOR,
+                            _vclVersion.major,
+                            _vclVersion.minor,
+                            supportVclCompiler(usedMajor, usedMinor).c_str());
+        }
         // For versions before 6.1, we use vclExecutableCreate
         _logger.debug("Using vclExecutableCreate for VCL < 6.1");
         vcl_executable_handle_t exeHandle = nullptr;
@@ -356,10 +430,11 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
     ze_graph_compiler_version_info_t compilerVersion;
     compilerVersion.major = _compilerProperties.version.major;
     compilerVersion.minor = _compilerProperties.version.minor;
-    auto serializedIR = intel_npu::driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion);
+    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
+    auto serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
 
     std::string buildFlags;
-    buildFlags += intel_npu::driver_compiler_utils::serializeConfig(config, compilerVersion);
+    buildFlags += irSerializer.serializeConfig(config, compilerVersion);
     _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
 
     vcl_query_handle_t queryHandle;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 0691e0f986d406..9f04aa83bcd376 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -152,7 +152,18 @@ static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& bas
         // if compiler_type is provided by local config = use that
         return COMPILER_TYPE::parse(it->second.as<std::string>());
     }
+
     // if there is no compiler_type provided = use base_config value
+    // update the compilerType by platform:
+    //  3720 -> DRIVER
+    //    4000 and later -> MLIR (default value)
+    auto it_platform = local_conf.find(std::string(PLATFORM::key()));
+    if (it_platform != local_conf.end()) {
+        // if platform is provided by local config = use that
+        if (it_platform->second.as<std::string>() == ov::intel_npu::Platform::NPU3720) {
+            return ov::intel_npu::CompilerType::DRIVER;
+        }
+    }
     return base_conf.get<COMPILER_TYPE>();
 }
 

From 8b11ee8c106f25996fc569f560aea195f93f4808 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Thu, 6 Nov 2025 13:22:54 +0800
Subject: [PATCH 04/25] fix export issue and build

---
 src/plugins/intel_npu/cmake/features.cmake    |  3 +-
 .../include/compiler_adapter_factory.hpp      |  1 +
 .../src/plugin_compiler_adapter.cpp           |  6 +-
 .../src/compiler_adapter/src/vcl_api.cpp      | 55 ++++++++++++++-----
 .../intel_npu/src/plugin/src/plugin.cpp       |  4 +-
 5 files changed, 49 insertions(+), 20 deletions(-)

diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 5f763da69188ac..1f462c0e461806 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -12,4 +12,5 @@ endif()
 
 ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
 
-ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" OFF)
+ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON)
+ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF)
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
index 81e075adf65be3..32e1fb384668b2 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
@@ -27,6 +27,7 @@ class CompilerAdapterFactory final {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
                 OPENVINO_THROW("NPU Compiler Adapter must be used with LEVEL0 backend");
             }
+
             return std::make_unique<DriverCompilerAdapter>(engineBackend->getInitStructs());
         }
         default:
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 1f2b1cfc83a380..5bf62498ca81eb 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -135,7 +135,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
 #endif
                                    std::move(tensor),
                                    config,
-                                   /* blobAllocatedByPlugin = */ false,
+                                   /* persistentBlob = */ true,  // exporting the blob shall be available in such a scenario
                                    _compiler);
 }
 
@@ -159,7 +159,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         return starts_with(name, "main");
     };
 
-    Config localConfig = config;
+    FilteredConfig localConfig = config;
     if (!localConfig.has<SEPARATE_WEIGHTS_VERSION>()) {
         localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}});
     }
@@ -294,7 +294,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     _logger.debug("parse start");
     network.assign(reinterpret_cast<const uint8_t*>(mainBlob.data()),
                    reinterpret_cast<const uint8_t*>(mainBlob.data()) + mainBlob.get_byte_size());
-    auto networkMeta = _compiler->parse(network, config);
+    networkMeta = _compiler->parse(network, config);
     network.clear();
     network.shrink_to_fit();
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 13dfe9bc598d28..580883f77c3914 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -4,12 +4,15 @@
 
 #include "vcl_api.hpp"
 
+#include "intel_npu/config/options.hpp"
+#include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/profiling.hpp"
-#include "ir_serializer.hpp"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
+#include "vcl_serializer.hpp"
 #include "ze_graph_ext_wrappers.hpp"
+#include "intel_npu/npu_private_properties.hpp"
 
 namespace intel_npu {
 
@@ -207,7 +210,8 @@ std::string supportVclCompiler(int major, int minor) {
     return "unsupported VCL version";
 }
 
-NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model,
+                                            const Config& config) const {
     _logger.debug("compile start");
 
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
@@ -217,16 +221,27 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     ze_graph_compiler_version_info_t compilerVersion;
     compilerVersion.major = _compilerProperties.version.major;
     compilerVersion.minor = _compilerProperties.version.minor;
-    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
-    auto serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
+
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    auto serializedIR = driver_compiler_utils::serializeIR(
+        model,
+        compilerVersion,
+        maxOpsetVersion,
+        updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                                                            : true,
+        updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
     _logger.debug("create build flags");
-    buildFlags += irSerializer.serializeIOInfo(model, useIndices);
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices);
     buildFlags += " ";
-    buildFlags += irSerializer.serializeConfig(config, compilerVersion);
+    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
     _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
 
     vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
@@ -259,9 +274,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                             VCL_COMPILER_VERSION_MINOR,
                             _vclVersion.major,
                             _vclVersion.minor,
-                            supportVclCompiler(usedMajor, usedMinor));
+                            supportVclCompiler(usedMajor, usedMinor).c_str());
         }
-        // check the vcl version whether support the lastest compile api
         // support the lastest vcl api
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
         _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5");
@@ -290,7 +304,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                             VCL_COMPILER_VERSION_MINOR,
                             _vclVersion.major,
                             _vclVersion.minor,
-                            supportVclCompiler(usedMajor, usedMinor));
+                            supportVclCompiler(usedMajor, usedMinor).c_str());
         }
         // For older versions, we use vclAllocatedExecutableCreate
         _logger.debug("Using vclAllocatedExecutableCreate for 6.1 < VCL < 7.4");
@@ -355,7 +369,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     }
 }
 
-intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
+intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network,
+                                                  const Config& config) const {
     _logger.debug("parse start");
     // VCL does not support parse, return empty metadata
     return intel_npu::NetworkMetadata();
@@ -421,7 +436,8 @@ uint32_t VCLCompilerImpl::get_version() const {
     return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor);
 }
 
-ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
+ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model,
+                                           const Config& config) const {
     _logger.debug("query start");
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
@@ -430,11 +446,22 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
     ze_graph_compiler_version_info_t compilerVersion;
     compilerVersion.major = _compilerProperties.version.major;
     compilerVersion.minor = _compilerProperties.version.minor;
-    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
-    auto serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+
+    auto serializedIR = driver_compiler_utils::serializeIR(
+        model,
+        compilerVersion,
+        maxOpsetVersion,
+        updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                                                            : true,
+        updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
-    buildFlags += irSerializer.serializeConfig(config, compilerVersion);
+    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
     _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
 
     vcl_query_handle_t queryHandle;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 9f04aa83bcd376..16e6e87ab63cf9 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -612,7 +612,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
 
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
-    auto localConfig = fork_local_config(localPropertiesMap, compiler);
+    auto localConfig = fork_local_config(localPropertiesMap, compiler); //FilteredConfig
 
 #ifndef VCL_FOR_COMPILER
     const auto set_cache_dir = localConfig.get<CACHE_DIR>();
@@ -733,7 +733,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         if (successfullyDebatched && localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
             _logger.info("Override performance mode to THROUGHPUT for compilation");
 
-            auto modifiedConfig = localConfig;  // Copy only when needed
+            auto modifiedConfig = localConfig;  // Copy only when needed, FilteredConfig
             std::stringstream strStream;
             strStream << ov::hint::PerformanceMode::THROUGHPUT;
             modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}});

From a08feb728b753edc79633332aa4f54a9922e3aaa Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Wed, 12 Nov 2025 22:46:32 +0800
Subject: [PATCH 05/25] clang-format, add platform update and compiler check

---
 .../cmake/download_compiler_libs.cmake        |   4 +-
 .../include/compiler_adapter_factory.hpp      |   7 +-
 .../include/npu_driver_compiler.h             |   2 +-
 .../include/plugin_compiler_adapter.hpp       |   2 +-
 .../src/compiler_adapter/include/vcl_api.hpp  |   9 +-
 .../include/ze_graph_ext_wrappers.hpp         |   2 +-
 .../src/plugin_compiler_adapter.cpp           |  60 +++++++---
 .../src/compiler_adapter/src/vcl_api.cpp      | 106 +++++++++---------
 .../intel_npu/src/plugin/src/plugin.cpp       |  46 ++++++--
 9 files changed, 148 insertions(+), 90 deletions(-)

diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
index 3455677525eecf..d8a664259299d7 100644
--- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
+++ b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
@@ -8,7 +8,7 @@ function(download_and_extract url zip_file extracted_dir modify_proxy)
     if(NOT EXISTS "${extracted_dir}")
         if(modify_proxy STREQUAL "MODIFY")
             # Update proxy to enable download for windows url
-	    set(original_NO_PROXY $ENV{NO_PROXY})
+            set(original_NO_PROXY $ENV{NO_PROXY})
             set(original_no_proxy $ENV{no_proxy})
             set(ENV{NO_PROXY} "")
             set(ENV{no_proxy} "")
@@ -24,7 +24,7 @@ function(download_and_extract url zip_file extracted_dir modify_proxy)
 
         if(modify_proxy STREQUAL "MODIFY")
             # Restore proxy
-	    set(ENV{NO_PROXY} ${original_NO_PROXY})
+            set(ENV{NO_PROXY} ${original_NO_PROXY})
             set(ENV{no_proxy} ${original_no_proxy})
         endif()
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
index 32e1fb384668b2..ada0d47fa19ff3 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
@@ -15,13 +15,14 @@ namespace intel_npu {
 class CompilerAdapterFactory final {
 public:
     std::unique_ptr<ICompilerAdapter> getCompiler(const ov::SoPtr<IEngineBackend>& engineBackend,
-                                                  const ov::intel_npu::CompilerType type) const {
+                                                  const ov::intel_npu::CompilerType type,
+                                                  std::string deviceID = "4000") const {
         switch (type) {
         case ov::intel_npu::CompilerType::PLUGIN: {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
-                return std::make_unique<PluginCompilerAdapter>(nullptr);
+                return std::make_unique<PluginCompilerAdapter>(nullptr, deviceID);
             }
-            return std::make_unique<PluginCompilerAdapter>(engineBackend->getInitStructs());
+            return std::make_unique<PluginCompilerAdapter>(engineBackend->getInitStructs(), deviceID);
         }
         case ov::intel_npu::CompilerType::DRIVER: {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
index c945a26565ebef..a8c38506fc844c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
@@ -23,7 +23,7 @@ extern "C" {
 #endif
 
 #define VCL_COMPILER_VERSION_MAJOR  7
-#define VCL_COMPILER_VERSION_MINOR  5
+#define VCL_COMPILER_VERSION_MINOR  4
 #define VCL_PROFILING_VERSION_MAJOR 2
 #define VCL_PROFILING_VERSION_MINOR 0
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
index 5bc7c236e45a10..f89d634c6491cf 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
@@ -18,7 +18,7 @@ namespace intel_npu {
 
 class PluginCompilerAdapter final : public ICompilerAdapter {
 public:
-    PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
+    PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, const std::string& deviceId);
 
     std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
                                     const FilteredConfig& config) const override;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
index 6251821b04403d..54f65e8dc0260a 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
@@ -79,13 +79,16 @@ vcl_symbols_list();
 vcl_weak_symbols_list();
 #undef vcl_symbol_statement
 
+void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device);
+std::string supportVclCompiler(int major, int minor);
+
 class VCLCompilerImpl final : public intel_npu::ICompiler {
 public:
-    VCLCompilerImpl();
+    VCLCompilerImpl(const std::string& device);
     ~VCLCompilerImpl() override;
 
-    static std::shared_ptr<VCLCompilerImpl>& getInstance() {
-        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
+    static std::shared_ptr<VCLCompilerImpl> getInstance(const std::string& device) {
+        std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>(device);
         return compiler;
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
index f647e349a6d01a..079a051e65f8a1 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
@@ -75,7 +75,7 @@ class ZeGraphExtWrappers {
     Logger _logger;
 };
 
-// Parse the result string of query from foramt <name_0><name_1><name_2> to unordered_set of string
+// Parse the result string of query from format <name_0><name_1><name_2> to unordered_set of string
 std::unordered_set<std::string> parseQueryResult(std::vector<char>& data);
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 5bf62498ca81eb..5b378aede0e398 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -24,13 +24,13 @@
 #include "weightless_graph.hpp"
 
 namespace {
-#ifndef VCL_FOR_COMPILER
+
 std::shared_ptr<void> load_library(const std::string& libpath) {
-#    if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
+#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
     return ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str());
-#    else
+#else
     return ov::util::load_shared_object(libpath.c_str());
-#    endif
+#endif
 }
 
 std::shared_ptr<intel_npu::ICompiler> get_compiler(std::shared_ptr<void> so) {
@@ -51,7 +51,7 @@ ov::SoPtr<intel_npu::ICompiler> load_compiler(const std::string& libpath) {
 
     return ov::SoPtr<intel_npu::ICompiler>(compiler, compilerSO);
 }
-#endif
+
 ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
     auto tensor = ov::Tensor(ov::element::u8, ov::Shape{vector.size()}, vector.data());
     auto impl = ov::get_tensor_impl(std::move(tensor));
@@ -65,20 +65,45 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
 
 namespace intel_npu {
 
-PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
+PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
+                                             const std::string& deviceId)
     : _zeroInitStruct(zeroInitStruct),
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
 #ifdef VCL_FOR_COMPILER
-    _logger.info("VCL driver compiler will be used.");
-    _compiler = ov::SoPtr<intel_npu::ICompiler>(VCLCompilerImpl::getInstance(), VCLApi::getInstance()->getLibrary());
+    _logger.info("PLUGIN VCL compiler will be used.");
+    try {
+        auto vclCompilerPtr = VCLCompilerImpl::getInstance(deviceId);
+        auto vclLib = VCLApi::getInstance()->getLibrary();
+        if (vclCompilerPtr && vclLib) {
+            _compiler = ov::SoPtr<intel_npu::ICompiler>(vclCompilerPtr, vclLib);
+        } else {
+            throw std::runtime_error("VCL compiler or library is nullptr");
+        }
+    } catch (const std::exception& vcl_exception) {
+        _logger.warning("VCL compiler load failed: %s. Trying to load MLIR compiler...", vcl_exception.what());
+        std::string baseName = "npu_mlir_compiler";
+        auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
+        try {
+            _compiler = load_compiler(libPath);
+            if (!_compiler) {
+                throw std::runtime_error("MLIR compiler load returned nullptr");
+            } else {
+                _logger.info("MLIR compiler loaded successfully. PLUGIN compiler will be used.");
+            }
+        } catch (const std::exception& mlir_exception) {
+            _logger.error("MLIR compiler load failed: %s", mlir_exception.what());
+            throw std::runtime_error("Both VCL and MLIR compiler load failed, aborting.");
+        }
+    }
 #else
     _logger.info("PLUGIN compiler will be used.");
     std::string baseName = "npu_mlir_compiler";
     auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
     _compiler = load_compiler(libPath);
 #endif
+
     if (_zeroInitStruct == nullptr) {
         return;
     }
@@ -125,18 +150,19 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
         }
     }
 
-    return std::make_shared<Graph>(_zeGraphExt,
-                                   _zeroInitStruct,
-                                   graphDesc,
+    return std::make_shared<Graph>(
+        _zeGraphExt,
+        _zeroInitStruct,
+        graphDesc,
 #ifdef VCL_FOR_COMPILER
-                                   std::move(networkMeta),
+        std::move(networkMeta),
 #else
-                                   std::move(networkDesc.metadata),
+        std::move(networkDesc.metadata),
 #endif
-                                   std::move(tensor),
-                                   config,
-                                   /* persistentBlob = */ true,  // exporting the blob shall be available in such a scenario
-                                   _compiler);
+        std::move(tensor),
+        config,
+        /* persistentBlob = */ true,  // exporting the blob shall be available in such a scenario
+        _compiler);
 }
 
 std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<ov::Model>& model,
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 580883f77c3914..35637df12efccb 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -4,15 +4,15 @@
 
 #include "vcl_api.hpp"
 
-#include "intel_npu/config/options.hpp"
 #include "intel_npu/common/filtered_config.hpp"
+#include "intel_npu/config/options.hpp"
+#include "intel_npu/npu_private_properties.hpp"
 #include "intel_npu/profiling.hpp"
 #include "openvino/runtime/make_tensor.hpp"
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "vcl_serializer.hpp"
 #include "ze_graph_ext_wrappers.hpp"
-#include "intel_npu/npu_private_properties.hpp"
 
 namespace intel_npu {
 
@@ -113,7 +113,24 @@ const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
     return instance;
 }
 
-VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", ov::log::Level::DEBUG) {
+void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) {
+    std::unordered_map<std::string, vcl_device_desc_t> devicesDescsMap = {
+        {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast<uint16_t>(-1), 2}},
+        {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast<uint16_t>(-1), 5}},
+        // For other devices, the tile configuration needs to be provided by the user.
+    };
+
+    auto it = devicesDescsMap.find(device);
+    if (it != devicesDescsMap.end()) {
+        device_desc = it->second;
+    } else {
+        device_desc = devicesDescsMap["4000"];
+    }
+}
+
+VCLCompilerImpl::VCLCompilerImpl(const std::string& device)
+    : _logHandle(nullptr),
+      _logger("VCLCompilerImpl", Logger::global().level()) {
     _logger.debug("VCLCompilerImpl constructor start");
     // Initialize the VCL API
     THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr);
@@ -127,7 +144,7 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm
         (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) {
         _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL %d.%d, "
                         "\n      but loaded VCL is %d.%d.\n"
-                        "Will downwise to use the lastest plugin vcl compiler!!!",
+                        "Will downwise to use the latest plugin vcl compiler!!!",
                         VCL_COMPILER_VERSION_MAJOR,
                         VCL_COMPILER_VERSION_MINOR,
                         _vclVersion.major,
@@ -138,12 +155,9 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm
     vcl_compiler_desc_t compilerDesc;
     compilerDesc.version = _vclVersion;
     compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast<int>(Logger::global().level()) - 1);
-    vcl_device_desc_t device_desc;
-    device_desc.size = sizeof(vcl_device_desc_t);
 
-    device_desc.deviceID = 0x643E;  // Value from intel_npu/src/backend/src/zero_device.cpp
-    device_desc.revision = -1;      // -1 to skip the config
-    device_desc.tileCount = 5;      // 1 as init value
+    vcl_device_desc_t device_desc;
+    setDeviceDesc(device_desc, device);
 
     THROW_ON_FAIL_FOR_VCL("vclCompilerCreate",
                           vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle),
@@ -210,8 +224,7 @@ std::string supportVclCompiler(int major, int minor) {
     return "unsupported VCL version";
 }
 
-NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model,
-                                            const Config& config) const {
+NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
     _logger.debug("compile start");
 
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
@@ -227,13 +240,14 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         OPENVINO_THROW("config is not FilteredConfig");
     }
     FilteredConfig updatedConfig = *filteredConfig;
-    auto serializedIR = driver_compiler_utils::serializeIR(
-        model,
-        compilerVersion,
-        maxOpsetVersion,
-        updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
-                                                                            : true,
-        updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model,
+                                           compilerVersion,
+                                           maxOpsetVersion,
+                                           updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
+                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                               : true,
+                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
@@ -251,25 +265,19 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
 
     /// Check the linked vcl version whether supported in plugin
-    int usedMajor = 0;
-    bool isDowngrade = false;
-    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) < _vclVersion.major) {
-        usedMajor = VCL_COMPILER_VERSION_MAJOR;
-        isDowngrade = true;
+    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
+        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
+    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
+        usedMajor = _vclVersion.major;
+        usedMinor = _vclVersion.minor;
     }
-    int usedMinor = isDowngrade ? VCL_COMPILER_VERSION_MINOR : _vclVersion.minor;
-
-    _logger.info("[Debug] Used VCL API Version: %d.%d", usedMajor, usedMinor);
-    _logger.info("[Debug] compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
-    _logger.info("[Debug] embedding compiler vcl version: %d.%d",
-                 VCL_COMPILER_VERSION_MAJOR,
-                 VCL_COMPILER_VERSION_MINOR);
 
     if (usedMajor >= 7 && usedMinor >= 4) {
         if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
             _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
                             "%d.%d, \n      but loaded VCL is %d.%d.\n"
-                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
                             VCL_COMPILER_VERSION_MAJOR,
                             VCL_COMPILER_VERSION_MINOR,
                             _vclVersion.major,
@@ -369,8 +377,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     }
 }
 
-intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network,
-                                                  const Config& config) const {
+intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
     _logger.debug("parse start");
     // VCL does not support parse, return empty metadata
     return intel_npu::NetworkMetadata();
@@ -415,29 +422,17 @@ std::vector<ov::ProfilingInfo> VCLCompilerImpl::process_profiling_output(const s
         std::memcpy(layerInfo.data(), profOutput.data, profOutput.size);
     }
 
-    // profOutput.data = NULL;
-    // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle,
-    // VCL_PROFILING_TASK_LEVEL, &profOutput), logHandle); if (profOutput.data == NULL) {
-    //     OPENVINO_THROW("Failed to get VCL profiling task level output");
-    // }
-
-    // profOutput.data = NULL;
-    // THROW_ON_FAIL_FOR_VCL("vclGetDecodedProfilingBuffer", vclGetDecodedProfilingBuffer(profilingHandle,
-    // VCL_PROFILING_RAW, &profOutput),logHandle); if (profOutput.data == NULL) {
-    //     OPENVINO_THROW("Failed to get VCL profiling raw output");
-    // }
-
     THROW_ON_FAIL_FOR_VCL("vclProfilingDestroy", vclProfilingDestroy(profilingHandle), logHandle);
 
-    return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo);  // Return processed profiling info
+    // Return processed profiling info
+    return intel_npu::profiling::convertLayersToIeProfilingInfo(layerInfo);
 }
 
 uint32_t VCLCompilerImpl::get_version() const {
     return ZE_MAKE_VERSION(_compilerProperties.version.major, _compilerProperties.version.minor);
 }
 
-ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model,
-                                           const Config& config) const {
+ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
     _logger.debug("query start");
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
@@ -452,13 +447,14 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
     }
     FilteredConfig updatedConfig = *filteredConfig;
 
-    auto serializedIR = driver_compiler_utils::serializeIR(
-        model,
-        compilerVersion,
-        maxOpsetVersion,
-        updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
-                                                                            : true,
-        updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model,
+                                           compilerVersion,
+                                           maxOpsetVersion,
+                                           updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
+                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                               : true,
+                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 16e6e87ab63cf9..1ca5f8dba13872 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -153,6 +153,18 @@ static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& bas
         return COMPILER_TYPE::parse(it->second.as<std::string>());
     }
 
+    // if there is no compiler_type provided = use base_config value
+    // update the compilerType by device id:
+    //  3720 -> DRIVER
+    //    4000 and later -> MLIR (default value)
+    auto it_device = local_conf.find(std::string(DEVICE_ID::key()));
+    if (it_device != local_conf.end()) {
+        // if platform is provided by local config = use that
+        if (it_device->second.as<std::string>() == ov::intel_npu::Platform::NPU3720) {
+            return ov::intel_npu::CompilerType::DRIVER;
+        }
+    }
+
     // if there is no compiler_type provided = use base_config value
     // update the compilerType by platform:
     //  3720 -> DRIVER
@@ -239,6 +251,20 @@ std::shared_ptr<const ov::Model> exclude_model_ptr_from_map(ov::AnyMap& properti
     return modelPtr;
 }
 
+std::string getDeviceFromProperties(const std::map<std::string, std::string>& propertiesMap) {
+    const std::string defaultDevice = "4000";
+    auto it = propertiesMap.find(std::string(DEVICE_ID::key()));
+    if (it != propertiesMap.end()) {
+        return it->second;
+    }
+
+    it = propertiesMap.find(std::string(PLATFORM::key()));
+    if (it != propertiesMap.end()) {
+        return it->second;
+    }
+    return defaultDevice;
+}
+
 }  // namespace
 
 namespace intel_npu {
@@ -608,11 +634,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     update_log_level(localPropertiesMap);
 
     // create compiler
+    std::string device_id = getDeviceFromProperties(localPropertiesMap);
     CompilerAdapterFactory compilerAdapterFactory;
-    auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
+    auto compiler =
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties), device_id);
 
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
-    auto localConfig = fork_local_config(localPropertiesMap, compiler); //FilteredConfig
+    auto localConfig = fork_local_config(localPropertiesMap, compiler);
 
 #ifndef VCL_FOR_COMPILER
     const auto set_cache_dir = localConfig.get<CACHE_DIR>();
@@ -733,7 +761,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         if (successfullyDebatched && localConfig.get<PERFORMANCE_HINT>() == ov::hint::PerformanceMode::LATENCY) {
             _logger.info("Override performance mode to THROUGHPUT for compilation");
 
-            auto modifiedConfig = localConfig;  // Copy only when needed, FilteredConfig
+            auto modifiedConfig = localConfig;  // Copy only when needed
             std::stringstream strStream;
             strStream << ov::hint::PerformanceMode::THROUGHPUT;
             modifiedConfig.update({{ov::hint::performance_mode.name(), strStream.str()}});
@@ -908,8 +936,10 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     exclude_model_ptr_from_map(npu_plugin_properties);
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
+    std::string device_id = getDeviceFromProperties(propertiesMap);
+    auto compiler = compilerAdapterFactory.getCompiler(_backend,
+                                                       resolveCompilerType(_globalConfig, npu_plugin_properties),
+                                                       device_id);
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());
     const auto platform =
@@ -944,8 +974,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
+    std::string device_id = getDeviceFromProperties(propertiesMap);
+    auto compiler = compilerAdapterFactory.getCompiler(_backend,
+                                                       resolveCompilerType(_globalConfig, npu_plugin_properties),
+                                                       device_id);
 
     OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config");
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime);

From cbb8de0758692773b467601cf3be4f400217a96b Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Thu, 13 Nov 2025 18:24:17 +0800
Subject: [PATCH 06/25] remove ENABLE_VCL_FOR_COMPILER and update tile, default
 compilertype

---
 src/plugins/intel_npu/CMakeLists.txt          |   5 -
 .../cmake/download_compiler_libs.cmake        | 179 ------------------
 src/plugins/intel_npu/cmake/features.cmake    |   3 -
 .../al/include/intel_npu/config/options.hpp   |   2 +-
 .../src/plugin_compiler_adapter.cpp           |  64 +++----
 .../src/compiler_adapter/src/vcl_api.cpp      |   2 +-
 .../intel_npu/src/plugin/src/plugin.cpp       |   2 -
 7 files changed, 31 insertions(+), 226 deletions(-)
 delete mode 100644 src/plugins/intel_npu/cmake/download_compiler_libs.cmake

diff --git a/src/plugins/intel_npu/CMakeLists.txt b/src/plugins/intel_npu/CMakeLists.txt
index 470801fb39bc10..8871512b85b848 100644
--- a/src/plugins/intel_npu/CMakeLists.txt
+++ b/src/plugins/intel_npu/CMakeLists.txt
@@ -18,11 +18,6 @@ set(NPU_PLUGIN_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
 include(cmake/features.cmake)
 
-if(ENABLE_VCL_FOR_COMPILER)
-    include(cmake/download_compiler_libs.cmake)
-    add_definitions("-DVCL_FOR_COMPILER")
-endif()
-
 set(CMAKE_CXX_STANDARD 17)
 
 if(ENABLE_NPU_DEBUG_CAPS)
diff --git a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake b/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
deleted file mode 100644
index d8a664259299d7..00000000000000
--- a/src/plugins/intel_npu/cmake/download_compiler_libs.cmake
+++ /dev/null
@@ -1,179 +0,0 @@
-# Copyright (C) 2018-2025 Intel Corporation
-# SPDX-License-Identifier: Apache-2.0
-#
-
-# Function to download and extract files
-function(download_and_extract url zip_file extracted_dir modify_proxy)
-    # Check if the prebuilt VCL compiler libraries not exist
-    if(NOT EXISTS "${extracted_dir}")
-        if(modify_proxy STREQUAL "MODIFY")
-            # Update proxy to enable download for windows url
-            set(original_NO_PROXY $ENV{NO_PROXY})
-            set(original_no_proxy $ENV{no_proxy})
-            set(ENV{NO_PROXY} "")
-            set(ENV{no_proxy} "")
-        endif()
-
-        # Download the prebuilt VCL compiler libraries, if failure, show error message and exit
-        message(STATUS "Downloading prebuilt VCL compiler libraries from ${url}")
-        file(DOWNLOAD "${url}" "${zip_file}"
-            TIMEOUT 3600
-            LOG log_output
-            STATUS download_status
-            SHOW_PROGRESS)
-
-        if(modify_proxy STREQUAL "MODIFY")
-            # Restore proxy
-            set(ENV{NO_PROXY} ${original_NO_PROXY})
-            set(ENV{no_proxy} ${original_no_proxy})
-        endif()
-
-        list(GET download_status 0 download_result)
-        if(NOT download_result EQUAL 0)
-            message(FATAL_ERROR "Download failed!\nStatus: ${download_status}\nLog: ${log_output}")
-        else()
-            message(STATUS "Download completed: ${zip_file}")
-        endif()
-
-        message(STATUS "Unzipping prebuilt VCL compiler libraries to ${extracted_dir}")
-        # Determine extraction method based on file extension
-        if("${zip_file}" MATCHES "\\.zip$")
-            file(ARCHIVE_EXTRACT INPUT "${zip_file}" DESTINATION "${extracted_dir}")
-        elseif("${zip_file}" MATCHES "\\.tar.gz$")
-            if(NOT EXISTS "${extracted_dir}")
-                file(MAKE_DIRECTORY "${extracted_dir}")
-                message(STATUS "Directory ${extracted_dir} created to unzip.")
-            endif()
-            execute_process(COMMAND tar -xzf "${zip_file}" -C "${extracted_dir}")
-        elseif("${zip_file}" MATCHES "\\.deb$")
-            execute_process(COMMAND dpkg-deb -x "${zip_file}" "${extracted_dir}")
-        elseif("${zip_file}" MATCHES "\\.exe$")
-            set(WINRAR_PATHS
-                "C:/Program Files/WinRAR"
-                "C:/Program Files (x86)/WinRAR"
-            )
-
-            set(WINRAR_FOUND FALSE)
-            set(WINRAR_EXECUTABLE "")
-
-            foreach(PATH ${WINRAR_PATHS})
-                if(EXISTS "${PATH}/WinRAR.exe")
-                    set(WINRAR_FOUND TRUE)
-                    set(WINRAR_EXECUTABLE "${PATH}/WinRAR.exe")
-                    break()
-                endif()
-            endforeach()
-
-            if(WINRAR_FOUND)
-                message(STATUS "WinRAR found at: ${WINRAR_EXECUTABLE} and extract ${zip_file} to ${extracted_dir}")
-                file(MAKE_DIRECTORY "${extracted_dir}")
-                execute_process(
-                    COMMAND "${WINRAR_EXECUTABLE}" x -y -o+ "${zip_file}" "${extracted_dir}"
-                    RESULT_VARIABLE result
-                    OUTPUT_VARIABLE output
-                    ERROR_VARIABLE error
-                )
-
-                if(result EQUAL 0)
-                    message(STATUS "Extraction successful: ${output}")
-                else()
-                    #file(REMOVE_RECURSE "${extracted_dir}")
-                    message(STATUS "Extraction failed: ${error}")
-                endif()
-            else()
-                message(FATAL_ERROR "WinRAR not found. Please install WinRAR to proceed.")
-            endif()
-        else()
-            message(FATAL_ERROR "Unsupported file extension for extraction: ${zip_file}")
-        endif()
-        file(REMOVE "${zip_file}")
-    else()
-        message(STATUS "Prebuilt VCL compiler libraries already exist, skip download")
-    endif()
-endfunction()
-
-if(ENABLE_VCL_FOR_COMPILER)
-    if(ENABLE_SYSTEM_NPU_VCL_COMPILER)
-        message(STATUS "Using system NPU VCL compiler libraries, skip download")
-    else()
-        message(STATUS "Downloading prebuilt NPU VCL compiler libraries")
-        if(WIN32)
-            set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_lib/win")
-            set(VCL_COMPILER_LIBS_URL "https://github.com/openvinotoolkit/npu_compiler/releases/download/npu_ud_2025_38_rc4/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip")
-            set(VCL_COMPILER_LIBS_ZIP "${VCL_COMPILER_LIBS_DIR}/w_vpux_compiler_l0_win-7_4_3-Release_dyntbb_postcommit_cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218.zip")
-            set(VCL_COMPILER_LIBS_DIR_UNZIPPED "${VCL_COMPILER_LIBS_DIR}/cid_a826bd92b5e02af092e4d706a762252b1845f777_251010_2218")
-
-            download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_ZIP}" "${VCL_COMPILER_LIBS_DIR_UNZIPPED}" "MODIFY")
-            set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_UNZIPPED}/cid/lib")
-
-            configure_file(
-                ${VCL_COMPILER_LIB_PATH}/npu_driver_compiler.dll
-                ${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll
-                COPYONLY
-            )
-            set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/npu_vcl_compiler.dll")
-            file(COPY "${VCL_COMPILER_LIB}"
-                DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/${CMAKE_BUILD_TYPE}")
-            message(STATUS "Not Copying prebuilt VCL compiler libraries npu_vcl_compiler.dll to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for windows")
-        else()
-            # Check if the operating system is Linux and not macOS
-            if(UNIX AND NOT APPLE)
-                # Get the OS name and version
-                execute_process(COMMAND lsb_release -is OUTPUT_VARIABLE OS_NAME OUTPUT_STRIP_TRAILING_WHITESPACE)
-                execute_process(COMMAND lsb_release -rs OUTPUT_VARIABLE OS_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-                if(OS_NAME STREQUAL "Ubuntu")
-                    if(OS_VERSION STREQUAL "22.04")
-                        # Ubuntu 22.04-specific settings or actions
-                        set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu22.04")
-                        set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb")
-                        set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu22.04_amd64.deb")
-                        set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu22.04")
-
-                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
-
-                        set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
-                        configure_file(
-                            ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so
-                            ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so
-                            COPYONLY
-                        )
-                        set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
-                        file(COPY "${VCL_COMPILER_LIB}"
-                            DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
-                        message(STATUS "Not Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 22.04")
-                    elseif(OS_VERSION STREQUAL "24.04")
-                        message(STATUS "This is Ubuntu 24.04")
-                        # Ubuntu 24.04-specific settings or actions
-                        set(VCL_COMPILER_LIBS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/temp/vcl_compiler_libs/ubuntu24.04")
-                        set(VCL_COMPILER_LIBS_URL "https://github.com/intel/linux-npu-driver/releases/download/v1.19.0/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb")
-                        set(VCL_COMPILER_LIBS_DEB "${VCL_COMPILER_LIBS_DIR}/intel-driver-compiler-npu_1.19.0.20250707-16111289554_ubuntu24.04_amd64.deb")
-                        set(VCL_COMPILER_LIBS_DIR_EXTRACTED "${VCL_COMPILER_LIBS_DIR}/prebuilt_VCL_libs_from_1.19.0.20250707-16111289554_ubuntu24.04")
-
-                        download_and_extract("${VCL_COMPILER_LIBS_URL}" "${VCL_COMPILER_LIBS_DEB}" "${VCL_COMPILER_LIBS_DIR_EXTRACTED}" "NONE")
-
-                        set(VCL_COMPILER_LIB_PATH "${VCL_COMPILER_LIBS_DIR_EXTRACTED}/usr/lib/x86_64-linux-gnu")
-                        configure_file(
-                            ${VCL_COMPILER_LIB_PATH}/libnpu_driver_compiler.so
-                            ${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so
-                            COPYONLY
-                        )
-                        set(VCL_COMPILER_LIB "${VCL_COMPILER_LIB_PATH}/libnpu_vcl_compiler.so")
-                        # file(COPY "${VCL_COMPILER_LIB}"
-                        #     DESTINATION "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}")
-                        message(STATUS "Copying prebuilt VCL compiler libraries libnpu_vcl_compiler.so to ${CMAKE_LIBRARY_OUTPUT_DIRECTORY} for Ubuntu 24.04")
-                    else()
-                        message(STATUS "This is another version of Ubuntu: ${OS_VERSION}")
-                        # Other Ubuntu-specific settings or actions
-                    endif()
-                else()
-                    message(STATUS "This is a different Linux distribution: ${OS_NAME}, skip downloading prebuilt VCL compiler libraries")
-                    # Other Linux-specific settings or actions
-                endif()
-            endif()
-        endif()
-    endif()
-
-    install(FILES ${VCL_COMPILER_LIB}
-        DESTINATION ${OV_CPACK_RUNTIMEDIR} COMPONENT ${NPU_INTERNAL_COMPONENT})
-endif()
\ No newline at end of file
diff --git a/src/plugins/intel_npu/cmake/features.cmake b/src/plugins/intel_npu/cmake/features.cmake
index 1f462c0e461806..4190b8415b87ad 100644
--- a/src/plugins/intel_npu/cmake/features.cmake
+++ b/src/plugins/intel_npu/cmake/features.cmake
@@ -11,6 +11,3 @@ if(NOT ENABLE_NPU_PLUGIN_ENGINE AND ENABLE_TESTS)
 endif()
 
 ov_dependent_option(ENABLE_INTEL_NPU_PROTOPIPE "Enable Intel NPU Protopipe tool" ON "ENABLE_INTEL_NPU_INTERNAL" OFF)
-
-ov_option(ENABLE_VCL_FOR_COMPILER "Enable VCL for NPU compiler" ON)
-ov_option(ENABLE_SYSTEM_NPU_VCL_COMPILER "Use system VCL compiler libraries" OFF)
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index d9a533729eeeab..3005c4ae2ac634 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
     }
 
     static ov::intel_npu::CompilerType defaultValue() {
-        return ov::intel_npu::CompilerType::MLIR;
+        return ov::intel_npu::CompilerType::DRIVER;
     }
 
     static ov::intel_npu::CompilerType parse(std::string_view val) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 5b378aede0e398..509f8267ecbb6d 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -71,7 +71,6 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
-#ifdef VCL_FOR_COMPILER
     _logger.info("PLUGIN VCL compiler will be used.");
     try {
         auto vclCompilerPtr = VCLCompilerImpl::getInstance(deviceId);
@@ -97,12 +96,6 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
             throw std::runtime_error("Both VCL and MLIR compiler load failed, aborting.");
         }
     }
-#else
-    _logger.info("PLUGIN compiler will be used.");
-    std::string baseName = "npu_mlir_compiler";
-    auto libPath = ov::util::make_plugin_library_name(ov::util::get_ov_lib_path(), baseName + OV_BUILD_POSTFIX);
-    _compiler = load_compiler(libPath);
-#endif
 
     if (_zeroInitStruct == nullptr) {
         return;
@@ -136,29 +129,29 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
         // network
         try {
             graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
-#ifdef VCL_FOR_COMPILER
+
+            // if use vcl lib to compile, the metadata is empty and git the info from driver parser
             if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) {
                 // If the metadata is empty, we can try to get it from the driver parser
                 _logger.info("Metadata is empty, trying to get it from the driver parser");
                 networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
                 networkMeta.name = model->get_friendly_name();
+                networkDesc.metadata = networkMeta;
             }
-#endif
+
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
         }
+    } else {
+        _logger.debug("no zeGraphExt, metadata is empty from vcl compiler");
     }
 
     return std::make_shared<Graph>(
         _zeGraphExt,
         _zeroInitStruct,
         graphDesc,
-#ifdef VCL_FOR_COMPILER
-        std::move(networkMeta),
-#else
         std::move(networkDesc.metadata),
-#endif
         std::move(tensor),
         config,
         /* persistentBlob = */ true,  // exporting the blob shall be available in such a scenario
@@ -308,19 +301,26 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     std::vector<uint8_t> network(mainBlob.get_byte_size());
     GraphDescriptor mainGraphDesc;
 
-#ifdef VCL_FOR_COMPILER
-    _logger.debug("parse metadata from driver for vcl compiler");
-    if (_zeGraphExt) {
-        _logger.debug("parse start for vcl compiler");
-        mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
-        networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
-    }
-    _logger.debug("parse end for vcl compiler");
-#else
     _logger.debug("parse start");
     network.assign(reinterpret_cast<const uint8_t*>(mainBlob.data()),
                    reinterpret_cast<const uint8_t*>(mainBlob.data()) + mainBlob.get_byte_size());
     networkMeta = _compiler->parse(network, config);
+
+    if (_zeGraphExt) {
+        // if use vcl lib to compile, the metadata is empty and get the info from driver parser
+        if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) {
+            // If the metadata is empty, we can try to get it from the driver parser
+            _logger.info("Metadata is empty, trying to get it from the driver parser");
+            networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
+            if (model) {
+                networkMeta.name = model->get_friendly_name();
+            } else {
+                _logger.warning("networkMeta name is empty!");
+            }
+        }
+    } else {
+        _logger.warning("no zeGraphExt, metadata is empty from vcl compiler.");
+    }
     network.clear();
     network.shrink_to_fit();
 
@@ -329,7 +329,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     }
 
     _logger.debug("main schedule parse end");
-#endif
 
     // exporting the blob when we get it from cache or ov::hint::compiled_blob property
     // shall be available
@@ -396,10 +395,12 @@ uint32_t PluginCompilerAdapter::get_version() const {
 }
 
 std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
-#ifdef VCL_FOR_COMPILER
     // For VCL, we can return the supported options from compiler
     VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
     if (vclCompiler == nullptr) {
+        // If _compiler  cannot cover to VCLCompilerImpl, it should use the mlir library.
+        // PluginCompiler has all the same options as plugin
+        // Returing empty string to let the plugin fallback to legacy registration
         _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options.");
         return {};
     }
@@ -424,20 +425,18 @@ std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
         compilerOpts.push_back(option);
     }
     return compilerOpts;
-#else
-    // PluginCompiler has all the same options as plugin
-    // Returing empty string to let the plugin fallback to legacy registration
-    return {};
-#endif
 }
 
 bool PluginCompilerAdapter::is_option_supported(std::string optname) const {
-#ifdef VCL_FOR_COMPILER
     VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
     if (vclCompiler == nullptr) {
+        // If _compiler  cannot cover to VCLCompilerImpl, it should use the mlir library.
+        // This functions has no utility in PluginCompiler
+        // returning false for any request to avoid the option of spaming the plugin
         _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check.");
         return false;
     }
+
     if (vclCompiler->is_option_supported(optname)) {
         _logger.debug("Option %s is supported by VCLCompilerImpl", optname.c_str());
         return true;
@@ -445,11 +444,6 @@ bool PluginCompilerAdapter::is_option_supported(std::string optname) const {
         _logger.debug("Option %s is not supported by VCLCompilerImpl", optname.c_str());
         return false;
     }
-#else
-    // This functions has no utility in PluginCompiler
-    // returning false for any request to avoid the option of spaming the plugin
-    return false;
-#endif
 }
 
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 35637df12efccb..76a39de08a5247 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -116,7 +116,7 @@ const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
 void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) {
     std::unordered_map<std::string, vcl_device_desc_t> devicesDescsMap = {
         {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast<uint16_t>(-1), 2}},
-        {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast<uint16_t>(-1), 5}},
+        {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast<uint16_t>(-1), 6}},
         // For other devices, the tile configuration needs to be provided by the user.
     };
 
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 1ca5f8dba13872..d9cd543f24a176 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -642,7 +642,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
 
-#ifndef VCL_FOR_COMPILER
     const auto set_cache_dir = localConfig.get<CACHE_DIR>();
     if (!set_cache_dir.empty()) {
         const auto compilerType = localConfig.get<COMPILER_TYPE>();
@@ -650,7 +649,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
             OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
         }
     }
-#endif
 
     const auto platform =
         utils::getCompilationPlatform(localConfig.get<PLATFORM>(),

From 047aab18de6e1bc99b66f7629547a8007734da6d Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Thu, 13 Nov 2025 23:54:29 +0800
Subject: [PATCH 07/25] set device desc empty and auto parse in compile

---
 .../include/compiler_adapter_factory.hpp      |  7 +++---
 .../include/plugin_compiler_adapter.hpp       |  2 +-
 .../src/compiler_adapter/include/vcl_api.hpp  |  7 +++---
 .../src/plugin_compiler_adapter.cpp           |  9 ++++----
 .../src/compiler_adapter/src/vcl_api.cpp      | 23 +++----------------
 .../intel_npu/src/plugin/src/plugin.cpp       | 13 ++++-------
 6 files changed, 19 insertions(+), 42 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
index ada0d47fa19ff3..32e1fb384668b2 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_adapter_factory.hpp
@@ -15,14 +15,13 @@ namespace intel_npu {
 class CompilerAdapterFactory final {
 public:
     std::unique_ptr<ICompilerAdapter> getCompiler(const ov::SoPtr<IEngineBackend>& engineBackend,
-                                                  const ov::intel_npu::CompilerType type,
-                                                  std::string deviceID = "4000") const {
+                                                  const ov::intel_npu::CompilerType type) const {
         switch (type) {
         case ov::intel_npu::CompilerType::PLUGIN: {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
-                return std::make_unique<PluginCompilerAdapter>(nullptr, deviceID);
+                return std::make_unique<PluginCompilerAdapter>(nullptr);
             }
-            return std::make_unique<PluginCompilerAdapter>(engineBackend->getInitStructs(), deviceID);
+            return std::make_unique<PluginCompilerAdapter>(engineBackend->getInitStructs());
         }
         case ov::intel_npu::CompilerType::DRIVER: {
             if (engineBackend == nullptr || engineBackend->getName() != "LEVEL0") {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
index f89d634c6491cf..5bc7c236e45a10 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
@@ -18,7 +18,7 @@ namespace intel_npu {
 
 class PluginCompilerAdapter final : public ICompilerAdapter {
 public:
-    PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct, const std::string& deviceId);
+    PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
 
     std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
                                     const FilteredConfig& config) const override;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
index 54f65e8dc0260a..2eb451812e1f12 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
@@ -79,16 +79,15 @@ vcl_symbols_list();
 vcl_weak_symbols_list();
 #undef vcl_symbol_statement
 
-void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device);
 std::string supportVclCompiler(int major, int minor);
 
 class VCLCompilerImpl final : public intel_npu::ICompiler {
 public:
-    VCLCompilerImpl(const std::string& device);
+    VCLCompilerImpl();
     ~VCLCompilerImpl() override;
 
-    static std::shared_ptr<VCLCompilerImpl> getInstance(const std::string& device) {
-        std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>(device);
+    static std::shared_ptr<VCLCompilerImpl> getInstance() {
+        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
         return compiler;
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 509f8267ecbb6d..6db88441f76070 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -65,15 +65,14 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
 
 namespace intel_npu {
 
-PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct,
-                                             const std::string& deviceId)
+PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct)
     : _zeroInitStruct(zeroInitStruct),
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
     _logger.info("PLUGIN VCL compiler will be used.");
     try {
-        auto vclCompilerPtr = VCLCompilerImpl::getInstance(deviceId);
+        auto vclCompilerPtr = VCLCompilerImpl::getInstance();
         auto vclLib = VCLApi::getInstance()->getLibrary();
         if (vclCompilerPtr && vclLib) {
             _compiler = ov::SoPtr<intel_npu::ICompiler>(vclCompilerPtr, vclLib);
@@ -178,7 +177,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
         return starts_with(name, "main");
     };
 
-    FilteredConfig localConfig = config;
+    Config localConfig = config;
     if (!localConfig.has<SEPARATE_WEIGHTS_VERSION>()) {
         localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}});
     }
@@ -313,7 +312,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
             _logger.info("Metadata is empty, trying to get it from the driver parser");
             networkMeta = _zeGraphExt->getNetworkMeta(mainGraphDesc);
             if (model) {
-                networkMeta.name = model->get_friendly_name();
+                networkMeta.name = model.value()->get_friendly_name();
             } else {
                 _logger.warning("networkMeta name is empty!");
             }
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 76a39de08a5247..56342238e57675 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -113,24 +113,7 @@ const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
     return instance;
 }
 
-void setDeviceDesc(vcl_device_desc_t& device_desc, const std::string& device) {
-    std::unordered_map<std::string, vcl_device_desc_t> devicesDescsMap = {
-        {"3720", {sizeof(vcl_device_desc_t), 0xAD1D, static_cast<uint16_t>(-1), 2}},
-        {"4000", {sizeof(vcl_device_desc_t), 0x643E, static_cast<uint16_t>(-1), 6}},
-        // For other devices, the tile configuration needs to be provided by the user.
-    };
-
-    auto it = devicesDescsMap.find(device);
-    if (it != devicesDescsMap.end()) {
-        device_desc = it->second;
-    } else {
-        device_desc = devicesDescsMap["4000"];
-    }
-}
-
-VCLCompilerImpl::VCLCompilerImpl(const std::string& device)
-    : _logHandle(nullptr),
-      _logger("VCLCompilerImpl", Logger::global().level()) {
+VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) {
     _logger.debug("VCLCompilerImpl constructor start");
     // Initialize the VCL API
     THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr);
@@ -156,8 +139,8 @@ VCLCompilerImpl::VCLCompilerImpl(const std::string& device)
     compilerDesc.version = _vclVersion;
     compilerDesc.debugLevel = static_cast<__vcl_log_level_t>(static_cast<int>(Logger::global().level()) - 1);
 
-    vcl_device_desc_t device_desc;
-    setDeviceDesc(device_desc, device);
+    // Set device description as empty, the related info will be processed in compile phase if passed by user.
+    vcl_device_desc_t device_desc = {};
 
     THROW_ON_FAIL_FOR_VCL("vclCompilerCreate",
                           vclCompilerCreate(&compilerDesc, &device_desc, &_compilerHandle, &_logHandle),
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index d9cd543f24a176..632cbc62e76bdc 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -636,8 +636,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // create compiler
     std::string device_id = getDeviceFromProperties(localPropertiesMap);
     CompilerAdapterFactory compilerAdapterFactory;
-    auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties), device_id);
+    auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
 
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
@@ -935,9 +934,8 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     std::string device_id = getDeviceFromProperties(propertiesMap);
-    auto compiler = compilerAdapterFactory.getCompiler(_backend,
-                                                       resolveCompilerType(_globalConfig, npu_plugin_properties),
-                                                       device_id);
+    auto compiler =
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());
     const auto platform =
@@ -973,9 +971,8 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     std::string device_id = getDeviceFromProperties(propertiesMap);
-    auto compiler = compilerAdapterFactory.getCompiler(_backend,
-                                                       resolveCompilerType(_globalConfig, npu_plugin_properties),
-                                                       device_id);
+    auto compiler =
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
 
     OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config");
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime);

From baedc2b2f88aa3654a1d2d89ebbdc06f5c6c4475 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 17 Nov 2025 09:45:10 +0800
Subject: [PATCH 08/25] update vcl compiler to openvino_intel_npu_compiler

---
 .../intel_npu/src/compiler_adapter/src/vcl_api.cpp   | 12 ++++++------
 src/plugins/intel_npu/src/plugin/src/plugin.cpp      |  1 -
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 56342238e57675..e54568c483806c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -66,11 +66,11 @@ static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) {
         }                                               \
     }
 
-VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) {
-    const std::string baseName = "npu_vcl_compiler";
+VCLApi::VCLApi() : _logger("VCLApi", Logger::global().level()) {
+    const std::string baseName = "openvino_intel_npu_compiler";
     try {
         auto libpath = ov::util::make_plugin_library_name({}, baseName);
-        _logger.debug("Try to load npu_vcl_compiler");
+        _logger.debug("Try to load openvino_intel_npu_compiler");
 
 #if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT) && defined(_WIN32)
         this->lib = ov::util::load_shared_object(ov::util::string_to_wstring(libpath).c_str());
@@ -78,7 +78,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) {
         this->lib = ov::util::load_shared_object(libpath.c_str());
 #endif
     } catch (const std::runtime_error& error) {
-        _logger.debug("Failed to load npu_vcl_compiler");
+        _logger.debug("Failed to load openvino_intel_npu_compiler");
         OPENVINO_THROW(error.what());
     }
 
@@ -88,7 +88,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) {
         vcl_symbols_list();
 #undef vcl_symbol_statement
     } catch (const std::runtime_error& error) {
-        _logger.debug("Failed to get formal symbols from npu_vcl_compiler");
+        _logger.debug("Failed to get formal symbols from openvino_intel_npu_compiler");
         OPENVINO_THROW(error.what());
     }
 
@@ -96,7 +96,7 @@ VCLApi::VCLApi() : _logger("VCLApi", ov::log::Level::DEBUG) {
     try {                                                                                                     \
         this->vcl_symbol = reinterpret_cast<decltype(&::vcl_symbol)>(ov::util::get_symbol(lib, #vcl_symbol)); \
     } catch (const std::runtime_error&) {                                                                     \
-        _logger.debug("Failed to get %s from npu_vcl_compiler", #vcl_symbol);                                 \
+        _logger.debug("Failed to get %s from openvino_intel_npu_compiler", #vcl_symbol);                      \
         this->vcl_symbol = nullptr;                                                                           \
     }
     vcl_weak_symbols_list();
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 632cbc62e76bdc..f2575f3f41b45d 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -634,7 +634,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     update_log_level(localPropertiesMap);
 
     // create compiler
-    std::string device_id = getDeviceFromProperties(localPropertiesMap);
     CompilerAdapterFactory compilerAdapterFactory;
     auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
 

From 8f3f570112ef7207550495a123ab809d09dc88dc Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 17 Nov 2025 11:46:47 +0800
Subject: [PATCH 09/25] fix ie mdk issue for compilerType Inconsistency issues
 for 3720

---
 .../src/plugin_compiler_adapter.cpp           |  4 +-
 .../intel_npu/src/plugin/src/plugin.cpp       | 81 ++++++++++---------
 2 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 312a6947afa787..4bb069a463eb80 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -70,10 +70,11 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
       _logger("PluginCompilerAdapter", Logger::global().level()) {
     _logger.debug("initialize PluginCompilerAdapter start");
 
-    _logger.info("PLUGIN VCL compiler will be used.");
+    _logger.info("Loading PLUGIN compiler");
     try {
         auto vclCompilerPtr = VCLCompilerImpl::getInstance();
         auto vclLib = VCLApi::getInstance()->getLibrary();
+        _logger.info("PLUGIN VCL compiler is loading");
         if (vclCompilerPtr && vclLib) {
             _compiler = ov::SoPtr<intel_npu::ICompiler>(vclCompilerPtr, vclLib);
         } else {
@@ -123,7 +124,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
     GraphDescriptor graphDesc;
     NetworkMetadata networkMeta;
 
-    NetworkMetadata networkMeta = std::move(networkDesc.metadata);
     if (_zeGraphExt) {
         // Depending on the config, we may get an error when trying to get the graph handle from the compiled
         // network
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 6a098f6334ab5c..2016029ecc0bed 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -144,37 +144,50 @@ void update_log_level(const std::map<std::string, std::string>& propertiesMap) {
     }
 }
 
-static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) {
-    // first look if provided config changes compiler type
-    auto it = local_conf.find(std::string(COMPILER_TYPE::key()));
-    if (it != local_conf.end()) {
-        // if compiler_type is provided by local config = use that
-        return COMPILER_TYPE::parse(it->second.as<std::string>());
+std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
+    const std::string defaultDevice = std::string(ov::intel_npu::Platform::NPU4000);
+    auto it = propertiesMap.find(std::string(DEVICE_ID::key()));
+    if (it != propertiesMap.end()) {
+        return it->second.as<std::string>();
     }
 
-    // if there is no compiler_type provided = use base_config value
-    // update the compilerType by device id:
-    //  3720 -> DRIVER
-    //    4000 and later -> MLIR (default value)
-    auto it_device = local_conf.find(std::string(DEVICE_ID::key()));
-    if (it_device != local_conf.end()) {
-        // if platform is provided by local config = use that
-        if (it_device->second.as<std::string>() == ov::intel_npu::Platform::NPU3720) {
-            return ov::intel_npu::CompilerType::DRIVER;
-        }
+    it = propertiesMap.find(std::string(PLATFORM::key()));
+    if (it != propertiesMap.end()) {
+        return it->second.as<std::string>();
     }
+    return defaultDevice;
+}
 
-    // if there is no compiler_type provided = use base_config value
-    // update the compilerType by platform:
+void  checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) {
+    // if there is no compiler_type provided, use base_config value, check and update by the device
+    // update the compilerType by device:
     //  3720 -> DRIVER
-    //    4000 and later -> MLIR (default value)
-    auto it_platform = local_conf.find(std::string(PLATFORM::key()));
-    if (it_platform != local_conf.end()) {
+    //  4000 and later -> MLIR
+    auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key()));
+    if (it_compiler_type == propertiesMap.end()) {
         // if platform is provided by local config = use that
-        if (it_platform->second.as<std::string>() == ov::intel_npu::Platform::NPU3720) {
-            return ov::intel_npu::CompilerType::DRIVER;
+        const ov::AnyMap localProperties = propertiesMap;
+        std::string getdevice = getDeviceFromProperties(localProperties);
+        if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) {
+            if(base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
+                log.warning(
+                    "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
+                    "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
+            }
+            // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user
+            propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
         }
     }
+}
+
+static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) {
+    // first look if provided config changes compiler type
+    auto it = local_conf.find(std::string(COMPILER_TYPE::key()));
+    if (it != local_conf.end()) {
+        // if compiler_type is provided by local config = use that
+        return COMPILER_TYPE::parse(it->second.as<std::string>());
+    }
+    // if there is no compiler_type provided = use base_config value
     return base_conf.get<COMPILER_TYPE>();
 }
 
@@ -250,20 +263,6 @@ std::shared_ptr<const ov::Model> exclude_model_ptr_from_map(ov::AnyMap& properti
     return modelPtr;
 }
 
-std::string getDeviceFromProperties(const std::map<std::string, std::string>& propertiesMap) {
-    const std::string defaultDevice = "4000";
-    auto it = propertiesMap.find(std::string(DEVICE_ID::key()));
-    if (it != propertiesMap.end()) {
-        return it->second;
-    }
-
-    it = propertiesMap.find(std::string(PLATFORM::key()));
-    if (it != propertiesMap.end()) {
-        return it->second;
-    }
-    return defaultDevice;
-}
-
 }  // namespace
 
 namespace intel_npu {
@@ -663,6 +662,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // activate the NPUW path
     auto useNpuwKey = ov::intel_npu::use_npuw.name();
     ov::AnyMap localProperties = properties;
+
     if (localProperties.count(useNpuwKey)) {
         if (localProperties.at(useNpuwKey).as<bool>() == true) {
             return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
@@ -678,6 +678,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         _logger.warning("Model received in config will be ignored as it was already provided by parameter.");
     }
 
+    // For 3720, need check and update its compiler_type
+    checkUpdateforspecialPlatform(_globalConfig, localProperties, _logger);
+
     const std::map<std::string, std::string> localPropertiesMap = any_copy(localProperties);
     update_log_level(localPropertiesMap);
 
@@ -980,7 +983,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     exclude_model_ptr_from_map(npu_plugin_properties);
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    std::string device_id = getDeviceFromProperties(propertiesMap);
+    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
@@ -1017,7 +1020,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    std::string device_id = getDeviceFromProperties(propertiesMap);
+    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
 

From 4f1aa39d816ad8d6c934ee05b762bb596de8b55b Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 17 Nov 2025 15:59:15 +0800
Subject: [PATCH 10/25] fix clang-format

---
 .../src/compiler_adapter/src/plugin_compiler_adapter.cpp  | 2 +-
 src/plugins/intel_npu/src/plugin/src/plugin.cpp           | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 4bb069a463eb80..39531e33152a4c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -135,7 +135,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
                          "allowed. Only exports are available");
         }
     } else {
-        _logger.debug("no zeGraphExt, metadata is empty from vcl compiler");
+        _logger.warning("no zeGraphExt, metadata is empty from vcl compiler");
     }
 
     return std::make_shared<Graph>(
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 2016029ecc0bed..f52fdbcb3928fa 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -158,7 +158,7 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
     return defaultDevice;
 }
 
-void  checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) {
+void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) {
     // if there is no compiler_type provided, use base_config value, check and update by the device
     // update the compilerType by device:
     //  3720 -> DRIVER
@@ -169,13 +169,14 @@ void  checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap&
         const ov::AnyMap localProperties = propertiesMap;
         std::string getdevice = getDeviceFromProperties(localProperties);
         if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) {
-            if(base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
+            if (base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
                 log.warning(
                     "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
                     "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
             }
             // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user
-            propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
+            propertiesMap[std::string(COMPILER_TYPE::key())] =
+                COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
         }
     }
 }
@@ -662,7 +663,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // activate the NPUW path
     auto useNpuwKey = ov::intel_npu::use_npuw.name();
     ov::AnyMap localProperties = properties;
-
     if (localProperties.count(useNpuwKey)) {
         if (localProperties.at(useNpuwKey).as<bool>() == true) {
             return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);

From 1a60dfa947c956bd307cd3b6de82e6db5427da7a Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 17 Nov 2025 23:37:22 +0800
Subject: [PATCH 11/25] fix unit test

---
 .../src/plugin_compiler_adapter.cpp           | 19 ++++++++-----------
 .../intel_npu/src/plugin/src/plugin.cpp       | 12 ++++++++----
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 39531e33152a4c..4f4e8e285d3db7 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -292,16 +292,19 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     const std::optional<std::shared_ptr<const ov::Model>>& model) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
 
-    NetworkMetadata networkMeta;
-    std::vector<uint8_t> network(mainBlob.get_byte_size());
-    GraphDescriptor mainGraphDesc;
-
     _logger.debug("parse start");
+    std::vector<uint8_t> network(mainBlob.get_byte_size());
     network.assign(reinterpret_cast<const uint8_t*>(mainBlob.data()),
                    reinterpret_cast<const uint8_t*>(mainBlob.data()) + mainBlob.get_byte_size());
-    networkMeta = _compiler->parse(network, config);
+    auto networkMeta = _compiler->parse(network, config);
+    network.clear();
+    network.shrink_to_fit();
+
+    GraphDescriptor mainGraphDesc;
 
     if (_zeGraphExt) {
+        mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
+
         // if use vcl lib to compile, the metadata is empty and get the info from driver parser
         if (networkMeta.inputs.empty() && networkMeta.outputs.empty()) {
             // If the metadata is empty, we can try to get it from the driver parser
@@ -316,12 +319,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     } else {
         _logger.warning("no zeGraphExt, metadata is empty from vcl compiler.");
     }
-    network.clear();
-    network.shrink_to_fit();
-
-    if (_zeGraphExt) {
-        mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
-    }
 
     _logger.debug("main schedule parse end");
 
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index f52fdbcb3928fa..77757f0e9a4c10 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -164,6 +164,7 @@ void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap&
     //  3720 -> DRIVER
     //  4000 and later -> MLIR
     auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key()));
+    // if user set compilerType, will not update auto
     if (it_compiler_type == propertiesMap.end()) {
         // if platform is provided by local config = use that
         const ov::AnyMap localProperties = propertiesMap;
@@ -663,6 +664,7 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // activate the NPUW path
     auto useNpuwKey = ov::intel_npu::use_npuw.name();
     ov::AnyMap localProperties = properties;
+
     if (localProperties.count(useNpuwKey)) {
         if (localProperties.at(useNpuwKey).as<bool>() == true) {
             return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
@@ -871,6 +873,7 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap& remo
     return std::make_shared<RemoteContextImpl>(_backend);
 }
 
+// duo
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
 
@@ -907,9 +910,10 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
             OPENVINO_THROW("Blob size is too large to be represented on a std::streamsize!");
         }
         stream.read(tensor.data<char>(), static_cast<std::streamsize>(blobSize));
+        std::cout << "=======just to check issue========" << std ::endl;
         return parse(tensor, std::move(metadata), npu_plugin_properties);
     } catch (const std::exception& ex) {
-        OPENVINO_THROW("Can't import network: ", ex.what());
+        OPENVINO_THROW("Can't import network: ", ex.what());  /// get issue message
     } catch (...) {
         OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel");
     }
@@ -981,9 +985,9 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     CompilerAdapterFactory compilerAdapterFactory;
     auto npu_plugin_properties = properties;
     exclude_model_ptr_from_map(npu_plugin_properties);
+    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
@@ -1016,11 +1020,11 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     // ov::hint::model has no corresponding "Config" implementation thus we need to remove it from the
     // list of properties
     auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties);
-
+    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
+
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
 

From 21ea3961e73aa252b643bd02274877e1fe24f4e7 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 18 Nov 2025 10:19:49 +0800
Subject: [PATCH 12/25] fix SERIALIZATION_WEIGHTS_SIZE_THRESHOLD undeclared
 identifiers

---
 .../intel_npu/src/compiler_adapter/src/vcl_api.cpp   |  8 ++++----
 src/plugins/intel_npu/src/plugin/src/plugin.cpp      | 12 ++++--------
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index e54568c483806c..6cb2e5b2fdacea 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -228,9 +228,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                               ? updatedConfig.get<intel_npu::USE_BASE_MODEL_SERIALIZER>()
                                                : true,
-                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                           updatedConfig.get<intel_npu::SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
@@ -435,9 +435,9 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
+                                               ? updatedConfig.get<intel_npu::USE_BASE_MODEL_SERIALIZER>()
                                                : true,
-                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                           updatedConfig.get<intel_npu::SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 77757f0e9a4c10..e361946f853e38 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -159,12 +159,12 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
 }
 
 void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) {
-    // if there is no compiler_type provided, use base_config value, check and update by the device
+    // If there is no compiler_type provided, use base_config value, check and update by the device
     // update the compilerType by device:
     //  3720 -> DRIVER
     //  4000 and later -> MLIR
     auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key()));
-    // if user set compilerType, will not update auto
+    // If user set compilerType, will not update by device
     if (it_compiler_type == propertiesMap.end()) {
         // if platform is provided by local config = use that
         const ov::AnyMap localProperties = propertiesMap;
@@ -664,7 +664,6 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     // activate the NPUW path
     auto useNpuwKey = ov::intel_npu::use_npuw.name();
     ov::AnyMap localProperties = properties;
-
     if (localProperties.count(useNpuwKey)) {
         if (localProperties.at(useNpuwKey).as<bool>() == true) {
             return ov::npuw::ICompiledModel::create(model->clone(), shared_from_this(), localProperties);
@@ -873,7 +872,6 @@ ov::SoPtr<ov::IRemoteContext> Plugin::get_default_context(const ov::AnyMap& remo
     return std::make_shared<RemoteContextImpl>(_backend);
 }
 
-// duo
 std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, const ov::AnyMap& properties) const {
     OV_ITT_SCOPED_TASK(itt::domains::NPUPlugin, "Plugin::import_model");
 
@@ -910,10 +908,9 @@ std::shared_ptr<ov::ICompiledModel> Plugin::import_model(std::istream& stream, c
             OPENVINO_THROW("Blob size is too large to be represented on a std::streamsize!");
         }
         stream.read(tensor.data<char>(), static_cast<std::streamsize>(blobSize));
-        std::cout << "=======just to check issue========" << std ::endl;
         return parse(tensor, std::move(metadata), npu_plugin_properties);
     } catch (const std::exception& ex) {
-        OPENVINO_THROW("Can't import network: ", ex.what());  /// get issue message
+        OPENVINO_THROW("Can't import network: ", ex.what());
     } catch (...) {
         OPENVINO_THROW("NPU import_model got unexpected exception from CompiledModel");
     }
@@ -1021,13 +1018,12 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     // list of properties
     auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties);
     checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
+
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
-
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
-
     OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config");
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());

From da62ff89b09c8726a980b3c4f45569c7a003ec34 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 18 Nov 2025 11:47:11 +0800
Subject: [PATCH 13/25] revert namespace for
 SERIALIZATION_WEIGHTS_SIZE_THRESHOLD

---
 .../intel_npu/src/compiler_adapter/src/vcl_api.cpp        | 8 ++++----
 src/plugins/intel_npu/src/plugin/src/plugin.cpp           | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 6cb2e5b2fdacea..e54568c483806c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -228,9 +228,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<intel_npu::USE_BASE_MODEL_SERIALIZER>()
+                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
                                                : true,
-                                           updatedConfig.get<intel_npu::SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
@@ -435,9 +435,9 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<intel_npu::USE_BASE_MODEL_SERIALIZER>()
+                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
                                                : true,
-                                           updatedConfig.get<intel_npu::SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index e361946f853e38..3c9b0ea4dffb98 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -987,6 +987,7 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     update_log_level(propertiesMap);
     auto compiler =
         compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
+
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());
     const auto platform =

From 800c78e00a0c0b79e3ac022f80a02bd2726e6f7c Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 18 Nov 2025 14:26:28 +0800
Subject: [PATCH 14/25] remove USE_BASE_MODEL_SERIALIZER option

---
 .../intel_npu/src/compiler_adapter/src/vcl_api.cpp     | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index e54568c483806c..d46f221494fe30 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -228,9 +228,8 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
-                                               : true,
-                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                               ? config.get<USE_BASE_MODEL_SERIALIZER>()
+                                               : true);
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
@@ -435,9 +434,8 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
                                            compilerVersion,
                                            maxOpsetVersion,
                                            updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? updatedConfig.get<USE_BASE_MODEL_SERIALIZER>()
-                                               : true,
-                                           updatedConfig.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
+                                               ? config.get<USE_BASE_MODEL_SERIALIZER>()
+                                               : true);
 
     std::string buildFlags;
     buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);

From acafdc663945e821b37e68ac7076682becd79403 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 18 Nov 2025 14:46:03 +0800
Subject: [PATCH 15/25] fix 3720 platfrom compilerType issue and metadata name
 issue

---
 .../src/plugin_compiler_adapter.cpp           |   7 +-
 .../intel_npu/src/plugin/src/plugin.cpp       | 125 +++++++++++++-----
 2 files changed, 97 insertions(+), 35 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 4f4e8e285d3db7..dc817bf3c76e0d 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -130,6 +130,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
         try {
             graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
             networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
+            networkMeta.name = model->get_friendly_name();
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
@@ -390,7 +391,7 @@ std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
     // For VCL, we can return the supported options from compiler
     VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
     if (vclCompiler == nullptr) {
-        // If _compiler  cannot cover to VCLCompilerImpl, it should use the mlir library.
+        // If _compiler  cannot be cast to VCLCompilerImpl, it should use the mlir library.
         // PluginCompiler has all the same options as plugin
         // Returing empty string to let the plugin fallback to legacy registration
         _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning empty supported options.");
@@ -422,9 +423,9 @@ std::vector<std::string> PluginCompilerAdapter::get_supported_options() const {
 bool PluginCompilerAdapter::is_option_supported(std::string optname) const {
     VCLCompilerImpl* vclCompiler = dynamic_cast<VCLCompilerImpl*>(_compiler.operator->());
     if (vclCompiler == nullptr) {
-        // If _compiler  cannot cover to VCLCompilerImpl, it should use the mlir library.
+        // If _compiler  cannot be cast to VCLCompilerImpl, it should use the mlir library.
         // This functions has no utility in PluginCompiler
-        // returning false for any request to avoid the option of spaming the plugin
+        // returning false for any request to avoid the option of spamming the plugin
         _logger.warning("Failed to cast compiler to VCLCompilerImpl. Returning false for check.");
         return false;
     }
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index d7218e4ef4a8f4..b14c272ece5cba 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -145,7 +145,7 @@ void update_log_level(const std::map<std::string, std::string>& propertiesMap) {
 }
 
 std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
-    const std::string defaultDevice = std::string(ov::intel_npu::Platform::NPU4000);
+    const std::string defaultDevice = "";
     auto it = propertiesMap.find(std::string(DEVICE_ID::key()));
     if (it != propertiesMap.end()) {
         return it->second.as<std::string>();
@@ -153,43 +153,95 @@ std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
 
     it = propertiesMap.find(std::string(PLATFORM::key()));
     if (it != propertiesMap.end()) {
-        return it->second.as<std::string>();
+        auto platformStr = it->second.as<std::string>();
+        if (platformStr == ov::intel_npu::Platform::AUTO_DETECT) {
+            return defaultDevice;
+        }
+
+        platformStr = utils::getPlatformByDeviceName(platformStr);
+        platformStr = ov::intel_npu::Platform::standardize(platformStr);
+        return platformStr;
     }
     return defaultDevice;
 }
 
-void checkUpdateforspecialPlatform(const FilteredConfig& base_conf, ov::AnyMap& propertiesMap, Logger& log) {
-    // If there is no compiler_type provided, use base_config value, check and update by the device
-    // update the compilerType by device:
+void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf,
+                                   ov::AnyMap& propertiesMap,
+                                   const std::string& deviceName,
+                                   Logger& log) {
+    // If there is no compiler_type provided, use base_config default value
+    //  Default compilerType for different platform is up to device:
     //  3720 -> DRIVER
-    //  4000 and later -> MLIR
-    auto it_compiler_type = propertiesMap.find(std::string(COMPILER_TYPE::key()));
-    // If user set compilerType, will not update by device
-    if (it_compiler_type == propertiesMap.end()) {
-        // if platform is provided by local config = use that
-        const ov::AnyMap localProperties = propertiesMap;
-        std::string getdevice = getDeviceFromProperties(localProperties);
-        if (getdevice == std::string((ov::intel_npu::Platform::NPU3720))) {
-            if (base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
-                log.warning(
-                    "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
-                    "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
-            }
-            // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user
-            propertiesMap[std::string(COMPILER_TYPE::key())] =
-                COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
-        }
+    //  4000 and later -> default
+
+    // If user set compilerType in config, will not update by device
+    auto it = propertiesMap.find(std::string(COMPILER_TYPE::key()));
+    if(it != propertiesMap.end()) {
+        return;
+    }
+
+    std::string getDevice = getDeviceFromProperties(propertiesMap);
+
+    if (deviceName.empty() && getDevice.empty()) {
+        OPENVINO_THROW("Device name is empty!");
+    }
+
+    std::string usedDevice = deviceName;
+    if (deviceName != getDevice) {
+        log.info("The device from properties '%s' is different from the actual device '%s', use device '%s' to check "
+                 "compiler_type.",
+                 getDevice.c_str(),
+                 deviceName.c_str(),
+                 deviceName.c_str());
+
+        usedDevice = deviceName.empty() ? getDevice : deviceName;
+    }
+
+    // If the platform is not 3720, will not update by device
+    if (usedDevice != std::string(ov::intel_npu::Platform::NPU3720)) {
+        return;
     }
+
+    if (base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
+        log.warning(
+            "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
+            "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
+    }
+
+    // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user
+    propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
+
+    return;
 }
 
-static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) {
+static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf,
+                                                       const ov::AnyMap& local_conf,
+                                                       const std::string& deviceName) {
     // first look if provided config changes compiler type
     auto it = local_conf.find(std::string(COMPILER_TYPE::key()));
     if (it != local_conf.end()) {
         // if compiler_type is provided by local config = use that
         return COMPILER_TYPE::parse(it->second.as<std::string>());
     }
-    // if there is no compiler_type provided = use base_config value
+    // if there is no compiler_type provided = use base_config value and update default vaule by platform if needed
+    //  Default compilerType for different platform is up to device:
+    //  3720 -> DRIVER
+    //  4000 and later -> default
+    if (!deviceName.empty()) {
+        if (deviceName == std::string(ov::intel_npu::Platform::NPU3720)) {
+            return ov::intel_npu::CompilerType::DRIVER;
+        }
+    } else {
+        std::string getdevice = getDeviceFromProperties(local_conf);
+        if (getdevice == std::string(ov::intel_npu::Platform::NPU3720)) {
+            return ov::intel_npu::CompilerType::DRIVER;
+        }
+        if (getdevice == std::string(ov::intel_npu::Platform::AUTO_DETECT)) {
+            Logger::global().warning("Device is set to AUTO_DETECT, cannot decide the default compiler_type by device, "
+                                     "use the default compiler_type.");
+        }
+    }
+
     return base_conf.get<COMPILER_TYPE>();
 }
 
@@ -679,14 +731,16 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     }
 
     // For 3720, need check and update its compiler_type
-    checkUpdateforspecialPlatform(_globalConfig, localProperties, _logger);
-
+    auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice();
+    std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : "";
+    checkUpdateforSpecialPlatform(_globalConfig, localProperties, deviceName, _logger);
     const std::map<std::string, std::string> localPropertiesMap = any_copy(localProperties);
     update_log_level(localPropertiesMap);
 
     // create compiler
     CompilerAdapterFactory compilerAdapterFactory;
-    auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
+    auto compiler =
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, localProperties, deviceName));
 
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
@@ -981,12 +1035,14 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     CompilerAdapterFactory compilerAdapterFactory;
     auto npu_plugin_properties = properties;
     exclude_model_ptr_from_map(npu_plugin_properties);
-    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
+    auto device = _backend == nullptr ? nullptr : _backend->getDevice();
+    std::string deviceName = device != nullptr ? device->getName() : "";
+    checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger);
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
-
+        compilerAdapterFactory.getCompiler(_backend,
+                                           resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName));
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());
     const auto platform =
@@ -1017,13 +1073,18 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     // ov::hint::model has no corresponding "Config" implementation thus we need to remove it from the
     // list of properties
     auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties);
-    checkUpdateforspecialPlatform(_globalConfig, npu_plugin_properties, _logger);
+
+    auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice();
+    std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : "";
+    checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger);
 
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
+        compilerAdapterFactory.getCompiler(_backend,
+                                           resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName));
+
     OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config");
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());

From 0c9ae289cd12971f97ab9b9ef2d83d3c72cab31f Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Fri, 21 Nov 2025 12:14:54 +0800
Subject: [PATCH 16/25] update to use vcl serializer

---
 .../src/compiler_adapter/include/vcl_api.hpp  |   2 +
 .../src/compiler_adapter/src/vcl_api.cpp      | 121 +++++++++++++-----
 .../intel_npu/src/plugin/src/plugin.cpp       |   7 +-
 3 files changed, 96 insertions(+), 34 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
index 2eb451812e1f12..b0f040be32cc33 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
@@ -6,6 +6,7 @@
 
 #include <memory>
 
+#include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/icompiler.hpp"
 #include "npu_driver_compiler.h"
 #include "openvino/core/except.hpp"
@@ -79,6 +80,7 @@ vcl_symbols_list();
 vcl_weak_symbols_list();
 #undef vcl_symbol_statement
 
+bool isUseBaseModelSerializer(const FilteredConfig& config);
 std::string supportVclCompiler(int major, int minor);
 
 class VCLCompilerImpl final : public intel_npu::ICompiler {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index d46f221494fe30..1e102343a0b4c6 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -4,7 +4,6 @@
 
 #include "vcl_api.hpp"
 
-#include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/config/options.hpp"
 #include "intel_npu/npu_private_properties.hpp"
 #include "intel_npu/profiling.hpp"
@@ -125,9 +124,9 @@ VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerIm
     _logger.info("Use Lib VCL version to create compiler");
     if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major ||
         (VCL_COMPILER_VERSION_MAJOR == _vclVersion.major && VCL_COMPILER_VERSION_MINOR < _vclVersion.minor)) {
-        _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL %d.%d, "
+        _logger.warning("inside supported VCL version is lower than loaded VCL api:\n plugin was built with VCL %d.%d, "
                         "\n      but loaded VCL is %d.%d.\n"
-                        "Will downwise to use the latest plugin vcl compiler!!!",
+                        "Will downgrade to use the latest plugin vcl compiler!!!",
                         VCL_COMPILER_VERSION_MAJOR,
                         VCL_COMPILER_VERSION_MINOR,
                         _vclVersion.major,
@@ -196,6 +195,24 @@ struct vcl_allocator_malloc {
     }
 };
 
+bool isUseBaseModelSerializer(const FilteredConfig& config) {
+    // user pass use_base_model_serializer config
+    if (config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) &&
+        config.has(ov::intel_npu::use_base_model_serializer.name())) {
+        return config.get<intel_npu::USE_BASE_MODEL_SERIALIZER>();
+    }
+
+    // user pass model_serializer_version config
+    if (config.isAvailable(ov::intel_npu::model_serializer_version.name()) &&
+        config.has(ov::intel_npu::use_base_model_serializer.name())) {
+        return (config.get<intel_npu::MODEL_SERIALIZER_VERSION>() ==
+                ov::intel_npu::ModelSerializerVersion::ALL_WEIGHTS_COPY);
+    }
+
+    // vcl serializer method is not set by user, will default to use it.
+    return false;
+}
+
 std::string supportVclCompiler(int major, int minor) {
     if (major >= 7 && minor >= 4) {
         return "vclAllocatedExecutableCreate2";
@@ -204,12 +221,21 @@ std::string supportVclCompiler(int major, int minor) {
     } else {
         return "vclExecutableCreate";
     }
-    return "unsupported VCL version";
 }
 
 NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
     _logger.debug("compile start");
 
+    /// Check the linked vcl version whether supported in plugin
+    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
+        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
+    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
+        usedMajor = _vclVersion.major;
+        usedMinor = _vclVersion.minor;
+    }
+    _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor);
+
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
 
@@ -223,13 +249,31 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         OPENVINO_THROW("config is not FilteredConfig");
     }
     FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = true;
+
+    // vcl serializer is only support for vcl version >= 7.5
+    if (usedMajor >= 7 && usedMinor >= 5) {
+        useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    }
+
+    if (useBaseModelSerializer) {
+        _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
+    } else {
+        _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
+
+        // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
+        // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;
+        // otherwise, you will encounter a deserialization issue within the compiler.
+        _logger.warning("Add serializer config");
+        if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) {
+            updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}});
+        } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) {
+            updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}});
+        }
+    }
+
     auto serializedIR =
-        driver_compiler_utils::serializeIR(model,
-                                           compilerVersion,
-                                           maxOpsetVersion,
-                                           updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? config.get<USE_BASE_MODEL_SERIALIZER>()
-                                               : true);
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
@@ -237,7 +281,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     _logger.debug("create build flags");
     buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices);
     buildFlags += " ";
-    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
+    buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
     _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
 
     vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
@@ -246,15 +290,6 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
                                      buildFlags.size()};
     _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
 
-    /// Check the linked vcl version whether supported in plugin
-    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
-    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
-        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
-    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
-        usedMajor = _vclVersion.major;
-        usedMinor = _vclVersion.minor;
-    }
-
     if (usedMajor >= 7 && usedMinor >= 4) {
         if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
             _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
@@ -268,7 +303,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         }
         // support the lastest vcl api
         // For VCL 7.4 and later, we can use vclAllocatedExecutableCreate2
-        _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL < 7.5");
+        _logger.debug("Using vclAllocatedExecutableCreate2 for 7.4 <= VCL");
         vcl_allocator_vector allocator;
         uint8_t* blob = nullptr;
         size_t size = 0;
@@ -289,7 +324,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
             _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
                             "%d.%d, \n      but loaded VCL is %d.%d.\n"
-                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
                             VCL_COMPILER_VERSION_MAJOR,
                             VCL_COMPILER_VERSION_MINOR,
                             _vclVersion.major,
@@ -323,7 +358,7 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
             _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
                             "%d.%d, \n      but loaded VCL is %d.%d.\n"
-                            "Will downwise to form %s to use vclAllocatedExecutableCreate2",
+                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
                             VCL_COMPILER_VERSION_MAJOR,
                             VCL_COMPILER_VERSION_MINOR,
                             _vclVersion.major,
@@ -416,6 +451,17 @@ uint32_t VCLCompilerImpl::get_version() const {
 
 ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model>& model, const Config& config) const {
     _logger.debug("query start");
+
+    /// Check the linked vcl version whether supported in plugin
+    uint16_t usedMajor = VCL_COMPILER_VERSION_MAJOR, usedMinor = VCL_COMPILER_VERSION_MINOR;
+    if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) == _vclVersion.major) {
+        usedMinor = std::min(static_cast<uint16_t>(VCL_COMPILER_VERSION_MINOR), _vclVersion.minor);
+    } else if (static_cast<uint16_t>(VCL_COMPILER_VERSION_MAJOR) > _vclVersion.major) {
+        usedMajor = _vclVersion.major;
+        usedMinor = _vclVersion.minor;
+    }
+    _logger.debug("the finally used vcl version is %d.%d", usedMajor, usedMinor);
+
     const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
 
@@ -428,17 +474,32 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
         OPENVINO_THROW("config is not FilteredConfig");
     }
     FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = true;
+    // vcl serializer is only support for vcl version >= 7.5
+    if (usedMajor >= 7 && usedMinor >= 5) {
+        useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    }
 
+    if (useBaseModelSerializer) {
+        _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
+    } else {
+        _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
+
+        // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
+        // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;
+        // otherwise, you will encounter a deserialization issue within the compiler.
+        _logger.warning("Add serializer config");
+        if (updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())) {
+            updatedConfig.update({{ov::intel_npu::use_base_model_serializer.name(), "NO"}});
+        } else if (updatedConfig.isAvailable(ov::intel_npu::model_serializer_version.name())) {
+            updatedConfig.update({{ov::intel_npu::model_serializer_version.name(), "NO_WEIGHTS_COPY"}});
+        }
+    }
     auto serializedIR =
-        driver_compiler_utils::serializeIR(model,
-                                           compilerVersion,
-                                           maxOpsetVersion,
-                                           updatedConfig.isAvailable(ov::intel_npu::use_base_model_serializer.name())
-                                               ? config.get<USE_BASE_MODEL_SERIALIZER>()
-                                               : true);
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
 
     std::string buildFlags;
-    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
+    buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
     _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
 
     vcl_query_handle_t queryHandle;
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index b1ba60cdf215c1..007b63ae9f6698 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -176,7 +176,7 @@ void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf,
 
     // If user set compilerType in config, will not update by device
     auto it = propertiesMap.find(std::string(COMPILER_TYPE::key()));
-    if(it != propertiesMap.end()) {
+    if (it != propertiesMap.end()) {
         return;
     }
 
@@ -203,9 +203,8 @@ void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf,
     }
 
     if (base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
-        log.warning(
-            "Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
-            "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
+        log.warning("Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
+                    "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
     }
 
     // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user

From b363ff6d69b5dd3e65ae8f1194aeb68450d5f9b9 Mon Sep 17 00:00:00 2001
From: "Kang, Wenjing" <wenjing.kang@intel.com>
Date: Fri, 21 Nov 2025 22:31:09 +0800
Subject: [PATCH 17/25] Add compileWsOneShot and compileWsIterative for
 VCLCompilerImpl

Signed-off-by: Kang, Wenjing <wenjing.kang@intel.com>
---
 .../intel_npu/common/icompiler_adapter.hpp    |  35 +++++
 .../include/npu_driver_compiler.h             |   6 +-
 .../src/compiler_adapter/include/vcl_api.hpp  |  10 +-
 .../src/driver_compiler_adapter.cpp           |  33 -----
 .../src/compiler_adapter/src/graph.cpp        |   4 +
 .../src/plugin_compiler_adapter.cpp           | 134 ++++++++++--------
 .../src/compiler_adapter/src/vcl_api.cpp      |  94 +++++++++++-
 .../compiler_adapter/src/weightless_graph.cpp |   4 +
 8 files changed, 220 insertions(+), 100 deletions(-)

diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
index f41ceef203ad60..1ed01edb67ac4d 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
@@ -6,6 +6,9 @@
 
 #include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/common/igraph.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
+#include "openvino/op/constant.hpp"
 
 namespace intel_npu {
 
@@ -56,6 +59,38 @@ class ICompilerAdapter {
     virtual std::vector<std::string> get_supported_options() const = 0;
     virtual bool is_option_supported(std::string optname) const = 0;
 
+    /**
+     * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+     * serialization.
+     * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+     * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+     * information is required within the "weights separation" flow, therefore this function is here to store it.
+     * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+     * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+     * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+     * misinformed and lookups of weights offsets could fail.
+     *
+     * @param model Both source and target.
+     */
+    void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) const {
+        size_t constantId = 0;
+        for (auto&& node : model->get_ordered_ops()) {
+            if (ov::is_type<ov::op::v0::Constant>(node)) {
+                ov::RTMap& runtimeInfoMap = node->get_rt_info();
+                const auto& weightlessCacheAttrIt =
+                    runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
+
+                const std::string constantIdString = std::to_string(constantId++);
+                if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
+                    auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
+                    model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
+                    model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
+                    model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
+                }
+            }
+        }
+    }
+
     virtual ~ICompilerAdapter() = default;
 };
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
index a8c38506fc844c..e7f3d3bee21010 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
@@ -23,7 +23,7 @@ extern "C" {
 #endif
 
 #define VCL_COMPILER_VERSION_MAJOR  7
-#define VCL_COMPILER_VERSION_MINOR  4
+#define VCL_COMPILER_VERSION_MINOR  6
 #define VCL_PROFILING_VERSION_MAJOR 2
 #define VCL_PROFILING_VERSION_MINOR 0
 
@@ -272,6 +272,10 @@ VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreate2(vcl_compile
                                                                      uint8_t** blobBuffer,
                                                                      uint64_t* blobSize);
 
+VCL_APIEXPORT vcl_result_t VCL_APICALL vclAllocatedExecutableCreateWSOneShot(vcl_compiler_handle_t compiler,
+                                                                             vcl_executable_desc_t desc,
+                                                                             vcl_allocator2_t* allocator);
+
 ///////////////////////////////////////////////////////////////////////////////
 /// @brief Destroys the executable and releases the cached blob.
 VCL_APIEXPORT vcl_result_t VCL_APICALL vclExecutableDestroy(vcl_executable_handle_t executable);
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
index b0f040be32cc33..7163beada5322a 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
@@ -37,7 +37,8 @@ namespace intel_npu {
 #define vcl_weak_symbols_list()                             \
     vcl_symbol_statement(vclAllocatedExecutableCreate2)     \
     vcl_symbol_statement(vclGetCompilerSupportedOptions)    \
-    vcl_symbol_statement(vclGetCompilerIsOptionSupported)
+    vcl_symbol_statement(vclGetCompilerIsOptionSupported)   \
+    vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot)
 // clang-format on
 
 class VCLApi {
@@ -95,6 +96,13 @@ class VCLCompilerImpl final : public intel_npu::ICompiler {
 
     NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
 
+    std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
+                                                                      const Config& config) const override;
+
+    NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                          const Config& config,
+                                          size_t callNumber) const override;
+
     ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
 
     NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index 15d0091e73bd4f..4b11610d56b7a3 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -13,7 +13,6 @@
 #include "intel_npu/utils/logger/logger.hpp"
 #include "mem_usage.hpp"
 #include "openvino/core/model.hpp"
-#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
 #include "vcl_serializer.hpp"
 #include "weightless_graph.hpp"
 
@@ -26,38 +25,6 @@ bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
     return networkMetadata.inputs.at(0).isInitInputWeights;
 }
 
-/**
- * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
- * serialization.
- * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
- * regarding the offset of the weights within the binary file, as well as the original size and precision. This
- * information is required within the "weights separation" flow, therefore this function is here to store it.
- * @note Not calling this function in the weights separation flow would lead to this information being lost upon
- * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
- * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
- * misinformed and lookups of weights offsets could fail.
- *
- * @param model Both source and target.
- */
-void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
-    size_t constantId = 0;
-    for (auto&& node : model->get_ordered_ops()) {
-        if (ov::is_type<ov::op::v0::Constant>(node)) {
-            ov::RTMap& runtimeInfoMap = node->get_rt_info();
-            const auto& weightlessCacheAttrIt =
-                runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
-
-            const std::string constantIdString = std::to_string(constantId++);
-            if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
-                auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
-                model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
-                model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
-                model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
-            }
-        }
-    }
-}
-
 /**
  * @brief On-going migration from "use_base_model_serializer" to "model_serializer_version". So we have to check both,
  * depending on which one is supported by the compiler.
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
index 9ace471ea66ad4..a6d1d6bcc68f95 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
@@ -157,6 +157,10 @@ void Graph::set_argument_value(uint32_t argi, const void* argv) const {
 }
 
 void Graph::initialize(const Config& config) {
+    if (!_zeroInitStruct) {
+        _logger.warning("_zeroInitStruct is nullptr!");
+        return;
+    }
     _logger.debug("Graph initialize start");
 
     if (_zeGraphExt == nullptr || _graphDesc._handle == nullptr) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index f5928cb22f2a1b..4fd516f9b642bd 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -61,6 +61,13 @@ ov::Tensor make_tensor_from_vector(std::vector<uint8_t>& vector) {
     return ov::make_tensor(impl);
 }
 
+bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
+    if (networkMetadata.inputs.size() == 0) {
+        return false;
+    }
+    return networkMetadata.inputs.at(0).isInitInputWeights;
+}
+
 }  // namespace
 
 namespace intel_npu {
@@ -154,23 +161,12 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
                                                          const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
 
-    std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
-    std::shared_ptr<NetworkDescription> mainNetworkDescription;
+    // OPENVINO_ASSERT(_zeGraphExt);
+    storeWeightlessCacheAttribute(model);
 
     _logger.debug("compile start");
 
-    const auto starts_with = [](const std::string& str, const std::string& prefix) {
-        return str.substr(0, prefix.size()) == prefix;
-    };
-    const auto isInit = [&](std::string name) {
-        return starts_with(name, "init");
-    };
-
-    const auto isMain = [&](std::string name) {
-        return starts_with(name, "main");
-    };
-
-    Config localConfig = config;
+    FilteredConfig localConfig = config;
     if (!localConfig.has<SEPARATE_WEIGHTS_VERSION>()) {
         localConfig.update({{ov::intel_npu::separate_weights_version.name(), "ONE_SHOT"}});
     }
@@ -182,37 +178,87 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
     if (_logger.level() >= ov::log::Level::INFO) {
         compile_model_mem_start = get_peak_memory_usage();
     }
+
+    std::vector<GraphDescriptor> initGraphDescriptors;
+    std::vector<ov::Tensor> tensorsInits;
+    std::vector<NetworkMetadata> initNetworkMetadata;
+    std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
+
+    ov::Tensor tensorMain;
+    GraphDescriptor mainGraphDesc;
+    NetworkMetadata mainNetworkMetadata;
+    std::shared_ptr<NetworkDescription> mainNetworkDescription;
+    
     switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
     case ov::intel_npu::WSVersion::ONE_SHOT: {
         std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =
             _compiler->compileWsOneShot(model, localConfig);
 
-#if 0  // TODO: it is not clear whether we should change the name
-            OPENVINO_ASSERT(isMain(initMainNetworkDescriptions.back()->metadata.name),
-                            "Unexpected network name for main:",
-                            initMainNetworkDescriptions.back()->metadata.name);
-#endif
-
         mainNetworkDescription = initMainNetworkDescriptions.back();
         initMainNetworkDescriptions.pop_back();
         initNetworkDescriptions = std::move(initMainNetworkDescriptions);
+
+        tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
+        if (_zeGraphExt) {
+            // Depending on the config, we may get an error when trying to
+            // get the graph handle from the compiled network
+            try {
+                mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
+                mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
+            } catch (...) {
+                _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
+                             "allowed. Only exports are available");
+            }
+        }
+
+        initGraphDescriptors.reserve(initNetworkDescriptions.size());
+        tensorsInits.reserve(initNetworkDescriptions.size());
+        initNetworkMetadata.reserve(initNetworkDescriptions.size());
+        for (auto& networkDesc : initNetworkDescriptions) {
+            ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
+            GraphDescriptor initGraphDesc;
+            NetworkMetadata initNetworkMeta;
+            if (_zeGraphExt) {
+                try {
+                    initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+                    initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
+                } catch (...) {
+                }
+            }
+
+            initGraphDescriptors.push_back(initGraphDesc);
+            tensorsInits.push_back(std::move(tensor));
+            initNetworkMetadata.push_back(std::move(initNetworkMeta));
+        }
     } break;
     case ov::intel_npu::WSVersion::ITERATIVE: {
+        OPENVINO_ASSERT(_zeGraphExt,
+                        "The \"iterative\" implementation of the weights separation feature requires a Level Zero "
+                        "graph handle to compile a model.");
+
+        // The state of the model needs to be reset every iteration
         const std::shared_ptr<ov::Model> originalModel = model->clone();
         std::shared_ptr<ov::Model> targetModel = model;
         size_t i = 0;
 
         while (auto networkDescription =
                    std::make_shared<NetworkDescription>(_compiler->compileWsIterative(targetModel, localConfig, i++))) {
-            if (isInit(networkDescription->metadata.name)) {
-                initNetworkDescriptions.push_back(networkDescription);
+            ov::Tensor tensor = make_tensor_from_vector(networkDescription->compiledNetwork);
+            GraphDescriptor graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
+            NetworkMetadata networkMetadata = _zeGraphExt->getNetworkMeta(graphDesc);
+
+            if (isInitMetadata(networkDescription->metadata)) {
                 targetModel = originalModel->clone();
+                initGraphDescriptors.push_back(graphDesc);
+                tensorsInits.push_back(std::move(tensor));
+                initNetworkMetadata.push_back(std::move(networkMetadata));
+                initNetworkDescriptions.push_back(networkDescription);
                 continue;
             }
-            OPENVINO_ASSERT(isMain(networkDescription->metadata.name),
-                            "Unexpected network name: ",
-                            networkDescription->metadata.name);
 
+            tensorMain = std::move(tensor);
+            mainGraphDesc = graphDesc;
+            mainNetworkMetadata = std::move(networkMetadata);
             mainNetworkDescription = std::move(networkDescription);
             break;
         }
@@ -233,44 +279,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
 
     _logger.debug("compile end");
 
-    ov::Tensor tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);
-    GraphDescriptor mainGraphDesc;
-    NetworkMetadata mainNetworkMetadata;
-    if (_zeGraphExt) {
-        // Depending on the config, we may get an error when trying to
-        // get the graph handle from the compiled network
-        try {
-            mainGraphDesc = _zeGraphExt->getGraphDescriptor(tensorMain.data(), tensorMain.get_byte_size());
-            mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
-        } catch (...) {
-            _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
-                         "allowed. Only exports are available");
-        }
-    }
-
-    std::vector<GraphDescriptor> initGraphDescriptors;
-    std::vector<ov::Tensor> tensorsInits;
-    std::vector<NetworkMetadata> initNetworkMetadata;
-    initGraphDescriptors.reserve(initNetworkDescriptions.size());
-    tensorsInits.reserve(initNetworkDescriptions.size());
-    initNetworkMetadata.reserve(initNetworkDescriptions.size());
-    for (auto& networkDesc : initNetworkDescriptions) {
-        ov::Tensor tensor = make_tensor_from_vector(networkDesc->compiledNetwork);
-        GraphDescriptor initGraphDesc;
-        NetworkMetadata initNetworkMeta;
-        if (_zeGraphExt) {
-            try {
-                initGraphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
-                initNetworkMeta = _zeGraphExt->getNetworkMeta(initGraphDesc);
-            } catch (...) {
-            }
-        }
-
-        initGraphDescriptors.push_back(initGraphDesc);
-        tensorsInits.push_back(std::move(tensor));
-        initNetworkMetadata.push_back(std::move(initNetworkMeta));
-    }
-
     return std::make_shared<WeightlessGraph>(
         _zeGraphExt,
         _zeroInitStruct,
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
index 1e102343a0b4c6..d86e8b74dbf881 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
@@ -185,6 +185,27 @@ struct vcl_allocator_vector : vcl_allocator2_t {
     std::vector<uint8_t> m_vec;
 };
 
+struct vcl_allocator_vector_2 : vcl_allocator2_t {
+    vcl_allocator_vector_2() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
+
+    static uint8_t* vector_allocate(vcl_allocator2_t* allocator, size_t size) {
+        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
+        auto newVec = std::make_shared<std::vector<uint8_t>>();
+        newVec->resize(size);
+        uint8_t* ptr = newVec->data();
+        vecAllocator->m_vector.emplace_back(newVec);
+        return ptr;
+    }
+
+    static void vector_deallocate(vcl_allocator2_t* allocator, uint8_t* ptr) {
+        vcl_allocator_vector_2* vecAllocator = static_cast<vcl_allocator_vector_2*>(allocator);
+        vecAllocator->m_vector.clear();
+        vecAllocator->m_vector.shrink_to_fit();
+    }
+
+    std::vector<std::shared_ptr<std::vector<uint8_t>>> m_vector;
+};
+
 struct vcl_allocator_malloc {
     static uint8_t* vcl_allocate(uint64_t size) {
         return reinterpret_cast<uint8_t*>(malloc(size));
@@ -276,10 +297,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
 
     std::string buildFlags;
-    const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
     _logger.debug("create build flags");
-    buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices);
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, true);
     buildFlags += " ";
     buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
     _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
@@ -394,6 +414,76 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
     }
 }
 
+std::vector<std::shared_ptr<NetworkDescription>> VCLCompilerImpl::compileWsOneShot(
+    const std::shared_ptr<ov::Model>& model,
+    const Config& config) const {
+    _logger.debug("compileWsOneShot start");
+
+    const auto maxOpsetVersion = _compilerProperties.supportedOpsets;
+    _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
+
+    _logger.debug("serialize IR");
+    ze_graph_compiler_version_info_t compilerVersion;
+    compilerVersion.major = _compilerProperties.version.major;
+    compilerVersion.minor = _compilerProperties.version.minor;
+
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    bool useBaseModelSerializer = isUseBaseModelSerializer(updatedConfig);
+    auto serializedIR =
+        driver_compiler_utils::serializeIR(model, compilerVersion, maxOpsetVersion, useBaseModelSerializer);
+
+    std::string buildFlags;
+
+    _logger.debug("create build flags");
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, true);
+    buildFlags += " ";
+    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
+    _logger.debug("final build flags to compiler: %s", buildFlags.c_str());
+
+    vcl_executable_desc_t exeDesc = {serializedIR.second.get(),
+                                     serializedIR.first,
+                                     buildFlags.c_str(),
+                                     buildFlags.size()};
+    _logger.debug("compiler vcl version: %d.%d", _vclVersion.major, _vclVersion.minor);
+
+    _logger.debug("Using vclAllocatedExecutableCreateWSOneShot");
+    vcl_allocator_vector_2 allocator;
+
+    THROW_ON_FAIL_FOR_VCL("vclAllocatedExecutableCreateWSOneShot",
+                          vclAllocatedExecutableCreateWSOneShot(_compilerHandle, exeDesc, &allocator),
+                          _logHandle);
+
+    if (allocator.m_vector.size() == 0) {
+        OPENVINO_THROW("Failed to create VCL executable, blobCount is zero");
+    }
+
+    std::vector<std::shared_ptr<NetworkDescription>> networkDescrs;
+    for (uint32_t i = 0; i < allocator.m_vector.size(); i++) {
+        // Use empty metadata as VCL does not support metadata extraction
+        NetworkMetadata metadata;
+        networkDescrs.emplace_back(
+            std::make_shared<NetworkDescription>(std::move(*allocator.m_vector[i]), std::move(metadata)));
+    }
+    return networkDescrs;
+}
+
+NetworkDescription VCLCompilerImpl::compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                                       const Config& config,
+                                                       size_t callNumber) const {
+    _logger.debug("compileWsIterative start");
+    const FilteredConfig* filteredConfig = dynamic_cast<const FilteredConfig*>(&config);
+    if (filteredConfig == nullptr) {
+        OPENVINO_THROW("config is not FilteredConfig");
+    }
+    FilteredConfig updatedConfig = *filteredConfig;
+    updatedConfig.update({{ov::intel_npu::ws_compile_call_number.name(), std::to_string(callNumber)}});
+    return compile(model, config);
+}
+
 intel_npu::NetworkMetadata VCLCompilerImpl::parse(const std::vector<uint8_t>& network, const Config& config) const {
     _logger.debug("parse start");
     // VCL does not support parse, return empty metadata
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
index 1d8549f57f24b8..ec74095e410105 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_graph.cpp
@@ -275,6 +275,10 @@ std::pair<uint64_t, std::optional<std::vector<uint64_t>>> WeightlessGraph::expor
 }
 
 void WeightlessGraph::initialize(const Config& config) {
+    if (!_zeroInitStruct) {
+        _wgLogger.warning("_zeroInitStruct is nullptr!");
+        return;
+    }
     // Simplified version for init schedules
     const size_t numberOfInits = _initsGraphDesc.size();
     _initsCommandQueueOrdinals.resize(numberOfInits);

From 16d591c8fd55502a17d80fc9dfc4f74575f7750a Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Sat, 22 Nov 2025 00:04:34 +0800
Subject: [PATCH 18/25] clang-format

---
 .../src/compiler_adapter/src/plugin_compiler_adapter.cpp        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 4fd516f9b642bd..b2478e25866866 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -188,7 +188,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
     GraphDescriptor mainGraphDesc;
     NetworkMetadata mainNetworkMetadata;
     std::shared_ptr<NetworkDescription> mainNetworkDescription;
-    
+
     switch (localConfig.get<SEPARATE_WEIGHTS_VERSION>()) {
     case ov::intel_npu::WSVersion::ONE_SHOT: {
         std::vector<std::shared_ptr<NetworkDescription>> initMainNetworkDescriptions =

From 5ba9f3941ad71d9b759ebb5699d9f0597b70be50 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 24 Nov 2025 11:04:08 +0800
Subject: [PATCH 19/25] fix comments

---
 .../intel_npu/common/icompiler_adapter.hpp    |  35 -----
 .../{npu_driver_compiler.h => compiler.h}     |   0
 .../include/compiler_impl.hpp                 |  64 +++++++++
 .../include/plugin_compiler_adapter.hpp       |   1 -
 .../src/compiler_adapter/include/vcl_api.hpp  | 130 ------------------
 .../include/weightless_utils.hpp              |  24 ++++
 .../src/{vcl_api.cpp => compiler_impl.cpp}    | 117 +++++++++++-----
 .../src/driver_compiler_adapter.cpp           |   1 +
 .../src/plugin_compiler_adapter.cpp           |   8 +-
 .../compiler_adapter/src/weightless_utils.cpp |  44 ++++++
 .../intel_npu/src/plugin/src/plugin.cpp       | 114 +--------------
 11 files changed, 221 insertions(+), 317 deletions(-)
 rename src/plugins/intel_npu/src/compiler_adapter/include/{npu_driver_compiler.h => compiler.h} (100%)
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
 delete mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp
 rename src/plugins/intel_npu/src/compiler_adapter/src/{vcl_api.cpp => compiler_impl.cpp} (89%)
 create mode 100644 src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp

diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
index 1ed01edb67ac4d..f41ceef203ad60 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
@@ -6,9 +6,6 @@
 
 #include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/common/igraph.hpp"
-#include "openvino/core/model.hpp"
-#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
-#include "openvino/op/constant.hpp"
 
 namespace intel_npu {
 
@@ -59,38 +56,6 @@ class ICompilerAdapter {
     virtual std::vector<std::string> get_supported_options() const = 0;
     virtual bool is_option_supported(std::string optname) const = 0;
 
-    /**
-     * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
-     * serialization.
-     * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
-     * regarding the offset of the weights within the binary file, as well as the original size and precision. This
-     * information is required within the "weights separation" flow, therefore this function is here to store it.
-     * @note Not calling this function in the weights separation flow would lead to this information being lost upon
-     * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
-     * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
-     * misinformed and lookups of weights offsets could fail.
-     *
-     * @param model Both source and target.
-     */
-    void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) const {
-        size_t constantId = 0;
-        for (auto&& node : model->get_ordered_ops()) {
-            if (ov::is_type<ov::op::v0::Constant>(node)) {
-                ov::RTMap& runtimeInfoMap = node->get_rt_info();
-                const auto& weightlessCacheAttrIt =
-                    runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
-
-                const std::string constantIdString = std::to_string(constantId++);
-                if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
-                    auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
-                    model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
-                    model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
-                    model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
-                }
-            }
-        }
-    }
-
     virtual ~ICompilerAdapter() = default;
 };
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
similarity index 100%
rename from src/plugins/intel_npu/src/compiler_adapter/include/npu_driver_compiler.h
rename to src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
new file mode 100644
index 00000000000000..3f212f878bb795
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
@@ -0,0 +1,64 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <memory>
+
+#include "compiler.h"
+#include "intel_npu/common/filtered_config.hpp"
+#include "intel_npu/icompiler.hpp"
+#include "openvino/core/except.hpp"
+
+namespace intel_npu {
+
+bool isUseBaseModelSerializer(const FilteredConfig& config);
+std::string supportVclCompiler(int major, int minor);
+class VCLApi;
+
+class VCLCompilerImpl final : public intel_npu::ICompiler {
+public:
+    VCLCompilerImpl();
+    ~VCLCompilerImpl() override;
+
+    static std::shared_ptr<VCLCompilerImpl> getInstance() {
+        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
+        return compiler;
+    }
+
+    NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
+                                                                      const Config& config) const override;
+
+    NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
+                                          const Config& config,
+                                          size_t callNumber) const override;
+
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+
+    NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
+
+    uint32_t get_version() const override;
+
+    std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
+                                                            const std::vector<uint8_t>& network,
+                                                            const intel_npu::Config& config) const final override;
+
+    bool get_supported_options(std::vector<char>& options) const;
+
+    bool is_option_supported(const std::string& option) const;
+
+    std::shared_ptr<VCLApi> getLinkedLibrary() const;
+
+private:
+    vcl_log_handle_t _logHandle = nullptr;
+    vcl_compiler_handle_t _compilerHandle = nullptr;
+    vcl_compiler_properties_t _compilerProperties;
+    vcl_version_info_t _vclVersion;
+    vcl_version_info_t _vclProfilingVersion;
+    Logger _logger;
+};
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
index 5bc7c236e45a10..0675d964565947 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
@@ -11,7 +11,6 @@
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
 #include "openvino/runtime/so_ptr.hpp"
-#include "vcl_api.hpp"
 #include "ze_graph_ext_wrappers.hpp"
 
 namespace intel_npu {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
deleted file mode 100644
index 7163beada5322a..00000000000000
--- a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_api.hpp
+++ /dev/null
@@ -1,130 +0,0 @@
-// Copyright (C) 2018-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-
-#include <memory>
-
-#include "intel_npu/common/filtered_config.hpp"
-#include "intel_npu/icompiler.hpp"
-#include "npu_driver_compiler.h"
-#include "openvino/core/except.hpp"
-
-namespace intel_npu {
-
-// clang-format off
-#define vcl_symbols_list()                                  \
-    vcl_symbol_statement(vclGetVersion)                     \
-    vcl_symbol_statement(vclCompilerCreate)                 \
-    vcl_symbol_statement(vclCompilerDestroy)                \
-    vcl_symbol_statement(vclCompilerGetProperties)          \
-    vcl_symbol_statement(vclQueryNetworkCreate)             \
-    vcl_symbol_statement(vclQueryNetwork)                   \
-    vcl_symbol_statement(vclQueryNetworkDestroy)            \
-    vcl_symbol_statement(vclExecutableCreate)               \
-    vcl_symbol_statement(vclAllocatedExecutableCreate)      \
-    vcl_symbol_statement(vclExecutableDestroy)              \
-    vcl_symbol_statement(vclExecutableGetSerializableBlob)  \
-    vcl_symbol_statement(vclProfilingCreate)                \
-    vcl_symbol_statement(vclGetDecodedProfilingBuffer)      \
-    vcl_symbol_statement(vclProfilingDestroy)               \
-    vcl_symbol_statement(vclProfilingGetProperties)         \
-    vcl_symbol_statement(vclLogHandleGetString)
-
-
-//unsupported symbols with older ze_loader versions
-#define vcl_weak_symbols_list()                             \
-    vcl_symbol_statement(vclAllocatedExecutableCreate2)     \
-    vcl_symbol_statement(vclGetCompilerSupportedOptions)    \
-    vcl_symbol_statement(vclGetCompilerIsOptionSupported)   \
-    vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot)
-// clang-format on
-
-class VCLApi {
-public:
-    VCLApi();
-    VCLApi(const VCLApi& other) = delete;
-    VCLApi(VCLApi&& other) = delete;
-    void operator=(const VCLApi&) = delete;
-    void operator=(VCLApi&&) = delete;
-
-    static const std::shared_ptr<VCLApi>& getInstance();
-    std::shared_ptr<void> getLibrary() const {
-        return lib;
-    }
-
-#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol;
-    vcl_symbols_list();
-    vcl_weak_symbols_list();
-#undef vcl_symbol_statement
-
-private:
-    std::shared_ptr<void> lib;
-    Logger _logger;
-};
-
-#define vcl_symbol_statement(vcl_symbol)                                                                            \
-    template <typename... Args>                                                                                     \
-    inline typename std::invoke_result<decltype(&::vcl_symbol), Args...>::type wrapped_##vcl_symbol(Args... args) { \
-        const auto& ptr = VCLApi::getInstance();                                                                    \
-        if (ptr->vcl_symbol == nullptr) {                                                                           \
-            OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol);                                                  \
-        }                                                                                                           \
-        return ptr->vcl_symbol(std::forward<Args>(args)...);                                                        \
-    }
-vcl_symbols_list();
-vcl_weak_symbols_list();
-#undef vcl_symbol_statement
-#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol;
-vcl_symbols_list();
-vcl_weak_symbols_list();
-#undef vcl_symbol_statement
-
-bool isUseBaseModelSerializer(const FilteredConfig& config);
-std::string supportVclCompiler(int major, int minor);
-
-class VCLCompilerImpl final : public intel_npu::ICompiler {
-public:
-    VCLCompilerImpl();
-    ~VCLCompilerImpl() override;
-
-    static std::shared_ptr<VCLCompilerImpl> getInstance() {
-        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
-        return compiler;
-    }
-
-    NetworkDescription compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
-
-    std::vector<std::shared_ptr<NetworkDescription>> compileWsOneShot(const std::shared_ptr<ov::Model>& model,
-                                                                      const Config& config) const override;
-
-    NetworkDescription compileWsIterative(const std::shared_ptr<ov::Model>& model,
-                                          const Config& config,
-                                          size_t callNumber) const override;
-
-    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
-
-    NetworkMetadata parse(const std::vector<uint8_t>& network, const Config& config) const override;
-
-    uint32_t get_version() const override;
-
-    std::vector<ov::ProfilingInfo> process_profiling_output(const std::vector<uint8_t>& profData,
-                                                            const std::vector<uint8_t>& network,
-                                                            const intel_npu::Config& config) const final override;
-
-    bool get_supported_options(std::vector<char>& options) const;
-
-    bool is_option_supported(const std::string& option) const;
-
-private:
-    std::shared_ptr<VCLApi> _vclApi;
-    vcl_log_handle_t _logHandle = nullptr;
-    vcl_compiler_handle_t _compilerHandle = nullptr;
-    vcl_compiler_properties_t _compilerProperties;
-    vcl_version_info_t _vclVersion;
-    vcl_version_info_t _vclProfilingVersion;
-    Logger _logger;
-};
-
-}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp
new file mode 100644
index 00000000000000..d32abbd6ab4509
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/weightless_utils.hpp
@@ -0,0 +1,24 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/core/model.hpp"
+
+namespace intel_npu {
+/**
+ * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+ * serialization.
+ * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+ * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+ * information is required within the "weights separation" flow, therefore this function is here to store it.
+ * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+ * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+ * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+ * misinformed and lookups of weights offsets could fail.
+ *
+ * @param model Both source and target.
+ */
+void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model);
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
similarity index 89%
rename from src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
rename to src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
index d86e8b74dbf881..5396da3b89d0cc 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/vcl_api.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
@@ -2,7 +2,7 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "vcl_api.hpp"
+#include "compiler_impl.hpp"
 
 #include "intel_npu/config/options.hpp"
 #include "intel_npu/npu_private_properties.hpp"
@@ -15,6 +15,74 @@
 
 namespace intel_npu {
 
+// clang-format off
+#define vcl_symbols_list()                                  \
+    vcl_symbol_statement(vclGetVersion)                     \
+    vcl_symbol_statement(vclCompilerCreate)                 \
+    vcl_symbol_statement(vclCompilerDestroy)                \
+    vcl_symbol_statement(vclCompilerGetProperties)          \
+    vcl_symbol_statement(vclQueryNetworkCreate)             \
+    vcl_symbol_statement(vclQueryNetwork)                   \
+    vcl_symbol_statement(vclQueryNetworkDestroy)            \
+    vcl_symbol_statement(vclExecutableCreate)               \
+    vcl_symbol_statement(vclAllocatedExecutableCreate)      \
+    vcl_symbol_statement(vclExecutableDestroy)              \
+    vcl_symbol_statement(vclExecutableGetSerializableBlob)  \
+    vcl_symbol_statement(vclProfilingCreate)                \
+    vcl_symbol_statement(vclGetDecodedProfilingBuffer)      \
+    vcl_symbol_statement(vclProfilingDestroy)               \
+    vcl_symbol_statement(vclProfilingGetProperties)         \
+    vcl_symbol_statement(vclLogHandleGetString)             \
+    vcl_symbol_statement(vclAllocatedExecutableCreate2)     \
+    vcl_symbol_statement(vclGetCompilerSupportedOptions)    \
+    vcl_symbol_statement(vclGetCompilerIsOptionSupported)   \
+
+
+//unsupported symbols with older ze_loader versions
+#define vcl_weak_symbols_list()                             \
+    vcl_symbol_statement(vclAllocatedExecutableCreateWSOneShot)
+// clang-format on
+
+class VCLApi {
+public:
+    VCLApi();
+    VCLApi(const VCLApi& other) = delete;
+    VCLApi(VCLApi&& other) = delete;
+    void operator=(const VCLApi&) = delete;
+    void operator=(VCLApi&&) = delete;
+
+    static const std::shared_ptr<VCLApi>& getInstance();
+    std::shared_ptr<void> getLibrary() const {
+        return lib;
+    }
+
+#define vcl_symbol_statement(vcl_symbol) decltype(&::vcl_symbol) vcl_symbol;
+    vcl_symbols_list();
+    vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
+private:
+    std::shared_ptr<void> lib;
+    Logger _logger;
+};
+
+#define vcl_symbol_statement(vcl_symbol)                                                                            \
+    template <typename... Args>                                                                                     \
+    inline typename std::invoke_result<decltype(&::vcl_symbol), Args...>::type wrapped_##vcl_symbol(Args... args) { \
+        const auto& ptr = VCLApi::getInstance();                                                                    \
+        if (ptr->vcl_symbol == nullptr) {                                                                           \
+            OPENVINO_THROW("Unsupported vcl_symbol " #vcl_symbol);                                                  \
+        }                                                                                                           \
+        return ptr->vcl_symbol(std::forward<Args>(args)...);                                                        \
+    }
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+#define vcl_symbol_statement(vcl_symbol) inline decltype(&::vcl_symbol) vcl_symbol = wrapped_##vcl_symbol;
+vcl_symbols_list();
+vcl_weak_symbols_list();
+#undef vcl_symbol_statement
+
 static inline std::string getLatestVCLLog(vcl_log_handle_t logHandle) {
     Logger _logger("VCLAPI", Logger::global().level());
     _logger.debug("getLatestVCLLog start");
@@ -114,6 +182,10 @@ const std::shared_ptr<VCLApi>& VCLApi::getInstance() {
 
 VCLCompilerImpl::VCLCompilerImpl() : _logHandle(nullptr), _logger("VCLCompilerImpl", Logger::global().level()) {
     _logger.debug("VCLCompilerImpl constructor start");
+
+    // Load VCL library
+    (void)VCLApi::getInstance();
+
     // Initialize the VCL API
     THROW_ON_FAIL_FOR_VCL("vclGetVersion", vclGetVersion(&_vclVersion, &_vclProfilingVersion), nullptr);
 
@@ -167,6 +239,10 @@ VCLCompilerImpl::~VCLCompilerImpl() {
     _logger.info("VCL Compiler destroyed successfully");
 }
 
+std::shared_ptr<VCLApi> VCLCompilerImpl::getLinkedLibrary() const {
+    return VCLApi::getInstance();
+}
+
 struct vcl_allocator_vector : vcl_allocator2_t {
     vcl_allocator_vector() : vcl_allocator2_t{vector_allocate, vector_deallocate} {}
 
@@ -375,42 +451,9 @@ NetworkDescription VCLCompilerImpl::compile(const std::shared_ptr<const ov::Mode
         _logger.debug("compile end, blob size:%d", compiledNetwork.size());
         return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
     } else {
-        if (VCL_COMPILER_VERSION_MAJOR < _vclVersion.major) {
-            _logger.warning("inside supported VCL version is lower than used VCL api:\n plugin was built with VCL "
-                            "%d.%d, \n      but loaded VCL is %d.%d.\n"
-                            "Will downgrade to form %s to use vclAllocatedExecutableCreate2",
-                            VCL_COMPILER_VERSION_MAJOR,
-                            VCL_COMPILER_VERSION_MINOR,
-                            _vclVersion.major,
-                            _vclVersion.minor,
-                            supportVclCompiler(usedMajor, usedMinor).c_str());
-        }
-        // For versions before 6.1, we use vclExecutableCreate
-        _logger.debug("Using vclExecutableCreate for VCL < 6.1");
-        vcl_executable_handle_t exeHandle = nullptr;
-        THROW_ON_FAIL_FOR_VCL("vclExecutableCreate",
-                              vclExecutableCreate(_compilerHandle, exeDesc, &exeHandle),
-                              _logHandle);
-
-        size_t size = 0;
-        THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob",
-                              vclExecutableGetSerializableBlob(exeHandle, nullptr, &size),
-                              _logHandle);
-        if (size == 0) {
-            OPENVINO_THROW("Failed to get VCL executable blob size, size is zero");
-        }
-        std::vector<uint8_t> compiledNetwork(size);
-        THROW_ON_FAIL_FOR_VCL("vclExecutableGetSerializableBlob",
-                              vclExecutableGetSerializableBlob(exeHandle, compiledNetwork.data(), &size),
-                              _logHandle);
-
-        THROW_ON_FAIL_FOR_VCL("vclExecutableDestroy", vclExecutableDestroy(exeHandle), _logHandle);
-
-        // Use empty metadata as VCL does not support metadata extraction
-        NetworkMetadata metadata;
-
-        _logger.debug("compile end, blob size:%d", compiledNetwork.size());
-        return NetworkDescription(std::move(compiledNetwork), std::move(metadata));
+        OPENVINO_THROW("Not supported VCL version: %d.%d, please use VCL 6.1 or later",
+                       _vclVersion.major,
+                       _vclVersion.minor);
     }
 }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index 4b11610d56b7a3..f8d91edb8d62bb 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -15,6 +15,7 @@
 #include "openvino/core/model.hpp"
 #include "vcl_serializer.hpp"
 #include "weightless_graph.hpp"
+#include "weightless_utils.hpp"
 
 namespace {
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index b2478e25866866..3b13ac11c50949 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -7,6 +7,7 @@
 #include <memory>
 #include <string>
 
+#include "compiler_impl.hpp"
 #include "graph.hpp"
 #include "intel_npu/common/device_helpers.hpp"
 #include "intel_npu/common/itt.hpp"
@@ -22,6 +23,7 @@
 #include "openvino/util/file_util.hpp"
 #include "openvino/util/shared_object.hpp"
 #include "weightless_graph.hpp"
+#include "weightless_utils.hpp"
 
 namespace {
 
@@ -80,7 +82,7 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
     _logger.info("Loading PLUGIN compiler");
     try {
         auto vclCompilerPtr = VCLCompilerImpl::getInstance();
-        auto vclLib = VCLApi::getInstance()->getLibrary();
+        auto vclLib = vclCompilerPtr->getLinkedLibrary();
         _logger.info("PLUGIN VCL compiler is loading");
         if (vclCompilerPtr && vclLib) {
             _compiler = ov::SoPtr<intel_npu::ICompiler>(vclCompilerPtr, vclLib);
@@ -161,7 +163,6 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
                                                          const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
 
-    // OPENVINO_ASSERT(_zeGraphExt);
     storeWeightlessCacheAttribute(model);
 
     _logger.debug("compile start");
@@ -309,12 +310,9 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
         mainGraphDesc = _zeGraphExt->getGraphDescriptor(mainBlob.data(), mainBlob.get_byte_size());
         mainNetworkMetadata = _zeGraphExt->getNetworkMeta(mainGraphDesc);
         _logger.debug("main schedule parse end");
-        std::cout << "RUN here == for vcl adapter call===" << std::endl;
         if (model) {
-            std::cout << "RUN here == for vcl adapter call 1===" << std::endl;
             mainNetworkMetadata.name = model.value()->get_friendly_name();
         } else {
-            std::cout << "RUN here == for vcl adapter call 2===" << std::endl;
             _logger.warning("networkMeta name is empty in parse!");
         }
     } else {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
new file mode 100644
index 00000000000000..cac510735f68ef
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
@@ -0,0 +1,44 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "weightless_utils.hpp"
+
+#include "openvino/core/rt_info/weightless_caching_attributes.hpp"
+#include "openvino/op/constant.hpp"
+
+namespace intel_npu {
+/**
+ * @brief Stores the information within the "WeightlessCacheAttribute" as runtime fields that persist upon
+ * serialization.
+ * @details Constant nodes (weights) may contain as medatadata the "WeightlessCacheAttribute", that is information
+ * regarding the offset of the weights within the binary file, as well as the original size and precision. This
+ * information is required within the "weights separation" flow, therefore this function is here to store it.
+ * @note Not calling this function in the weights separation flow would lead to this information being lost upon
+ * serialization. The "WeightlessCacheAttribute" information that is populated upon de-serialization would represent
+ * metadata corresponding to the serialized stream, not the original weights file. Therefore the compiler would be
+ * misinformed and lookups of weights offsets could fail.
+ *
+ * @param model Both source and target.
+ */
+void storeWeightlessCacheAttribute(const std::shared_ptr<ov::Model>& model) {
+    size_t constantId = 0;
+    for (auto&& node : model->get_ordered_ops()) {
+        if (ov::is_type<ov::op::v0::Constant>(node)) {
+            ov::RTMap& runtimeInfoMap = node->get_rt_info();
+            const auto& weightlessCacheAttrIt =
+                runtimeInfoMap.find(ov::WeightlessCacheAttribute::get_type_info_static());
+
+            const std::string constantIdString = std::to_string(constantId++);
+            if (weightlessCacheAttrIt != runtimeInfoMap.end()) {
+                auto& weightlessCacheAttr = weightlessCacheAttrIt->second.as<ov::WeightlessCacheAttribute>();
+                model->set_rt_info(weightlessCacheAttr.bin_offset, "ws_bin_offset_" + constantIdString);
+                model->set_rt_info(weightlessCacheAttr.original_size, "ws_original_size_" + constantIdString);
+                model->set_rt_info(weightlessCacheAttr.original_dtype, "ws_original_dtype_" + constantIdString);
+            }
+        }
+    }
+}
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 007b63ae9f6698..97c6a7e09ceeac 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -144,103 +144,14 @@ void update_log_level(const std::map<std::string, std::string>& propertiesMap) {
     }
 }
 
-std::string getDeviceFromProperties(const ov::AnyMap& propertiesMap) {
-    const std::string defaultDevice = "";
-    auto it = propertiesMap.find(std::string(DEVICE_ID::key()));
-    if (it != propertiesMap.end()) {
-        return it->second.as<std::string>();
-    }
-
-    it = propertiesMap.find(std::string(PLATFORM::key()));
-    if (it != propertiesMap.end()) {
-        auto platformStr = it->second.as<std::string>();
-        if (platformStr == ov::intel_npu::Platform::AUTO_DETECT) {
-            return defaultDevice;
-        }
-
-        platformStr = utils::getPlatformByDeviceName(platformStr);
-        platformStr = ov::intel_npu::Platform::standardize(platformStr);
-        return platformStr;
-    }
-    return defaultDevice;
-}
-
-void checkUpdateforSpecialPlatform(const FilteredConfig& base_conf,
-                                   ov::AnyMap& propertiesMap,
-                                   const std::string& deviceName,
-                                   Logger& log) {
-    // If there is no compiler_type provided, use base_config default value
-    //  Default compilerType for different platform is up to device:
-    //  3720 -> DRIVER
-    //  4000 and later -> default
-
-    // If user set compilerType in config, will not update by device
-    auto it = propertiesMap.find(std::string(COMPILER_TYPE::key()));
-    if (it != propertiesMap.end()) {
-        return;
-    }
-
-    std::string getDevice = getDeviceFromProperties(propertiesMap);
-
-    if (deviceName.empty() && getDevice.empty()) {
-        OPENVINO_THROW("Device name is empty!");
-    }
-
-    std::string usedDevice = deviceName;
-    if (deviceName != getDevice) {
-        log.info("The device from properties '%s' is different from the actual device '%s', use device '%s' to check "
-                 "compiler_type.",
-                 getDevice.c_str(),
-                 deviceName.c_str(),
-                 deviceName.c_str());
-
-        usedDevice = deviceName.empty() ? getDevice : deviceName;
-    }
-
-    // If the platform is not 3720, will not update by device
-    if (usedDevice != std::string(ov::intel_npu::Platform::NPU3720)) {
-        return;
-    }
-
-    if (base_conf.get<COMPILER_TYPE>() != ov::intel_npu::CompilerType::DRIVER) {
-        log.warning("Platform '3720' is selected, but the used compiler_type is not set to 'DRIVER'. Forcely use the "
-                    "compiler_type to 'DRIVER'. Maybe cause the compilerType inconsistency issues.");
-    }
-
-    // To avoid compilerType inconsistency issues, only set DRIVER if compiler_type is not set by user
-    propertiesMap[std::string(COMPILER_TYPE::key())] = COMPILER_TYPE::toString(ov::intel_npu::CompilerType::DRIVER);
-
-    return;
-}
-
-static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf,
-                                                       const ov::AnyMap& local_conf,
-                                                       const std::string& deviceName) {
+static ov::intel_npu::CompilerType resolveCompilerType(const FilteredConfig& base_conf, const ov::AnyMap& local_conf) {
     // first look if provided config changes compiler type
     auto it = local_conf.find(std::string(COMPILER_TYPE::key()));
     if (it != local_conf.end()) {
         // if compiler_type is provided by local config = use that
         return COMPILER_TYPE::parse(it->second.as<std::string>());
     }
-    // if there is no compiler_type provided = use base_config value and update default vaule by platform if needed
-    //  Default compilerType for different platform is up to device:
-    //  3720 -> DRIVER
-    //  4000 and later -> default
-    if (!deviceName.empty()) {
-        if (deviceName == std::string(ov::intel_npu::Platform::NPU3720)) {
-            return ov::intel_npu::CompilerType::DRIVER;
-        }
-    } else {
-        std::string getdevice = getDeviceFromProperties(local_conf);
-        if (getdevice == std::string(ov::intel_npu::Platform::NPU3720)) {
-            return ov::intel_npu::CompilerType::DRIVER;
-        }
-        if (getdevice == std::string(ov::intel_npu::Platform::AUTO_DETECT)) {
-            Logger::global().warning("Device is set to AUTO_DETECT, cannot decide the default compiler_type by device, "
-                                     "use the default compiler_type.");
-        }
-    }
-
+    // if there is no compiler_type provided = use base_config value
     return base_conf.get<COMPILER_TYPE>();
 }
 
@@ -749,18 +660,12 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
         localProperties.erase(modelSerializerVersionKey);
     }
 
-    // For 3720, need check and update its compiler_type, if usr not pass in config
-    auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice();
-    std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : "";
-    checkUpdateforSpecialPlatform(_globalConfig, localProperties, deviceName, _logger);
-
     const std::map<std::string, std::string> localPropertiesMap = any_copy(localProperties);
     update_log_level(localPropertiesMap);
 
     // create compiler
     CompilerAdapterFactory compilerAdapterFactory;
-    auto compiler =
-        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, localProperties, deviceName));
+    auto compiler = compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, properties));
 
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
@@ -1065,14 +970,10 @@ ov::SupportedOpsMap Plugin::query_model(const std::shared_ptr<const ov::Model>&
     CompilerAdapterFactory compilerAdapterFactory;
     auto npu_plugin_properties = properties;
     exclude_model_ptr_from_map(npu_plugin_properties);
-    auto device = _backend == nullptr ? nullptr : _backend->getDevice();
-    std::string deviceName = device != nullptr ? device->getName() : "";
-    checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger);
     const std::map<std::string, std::string> propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     auto compiler =
-        compilerAdapterFactory.getCompiler(_backend,
-                                           resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName));
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::CompileTime);
     _logger.setLevel(localConfig.get<LOG_LEVEL>());
     const auto platform =
@@ -1104,16 +1005,11 @@ std::shared_ptr<ov::ICompiledModel> Plugin::parse(const ov::Tensor& tensorBig,
     // list of properties
     auto originalModel = exclude_model_ptr_from_map(npu_plugin_properties);
 
-    auto deviceBeforeCompilerCreate = _backend == nullptr ? nullptr : _backend->getDevice();
-    std::string deviceName = deviceBeforeCompilerCreate != nullptr ? deviceBeforeCompilerCreate->getName() : "";
-    checkUpdateforSpecialPlatform(_globalConfig, npu_plugin_properties, deviceName, _logger);
-
     CompilerAdapterFactory compilerAdapterFactory;
     const auto propertiesMap = any_copy(npu_plugin_properties);
     update_log_level(propertiesMap);
     auto compiler =
-        compilerAdapterFactory.getCompiler(_backend,
-                                           resolveCompilerType(_globalConfig, npu_plugin_properties, deviceName));
+        compilerAdapterFactory.getCompiler(_backend, resolveCompilerType(_globalConfig, npu_plugin_properties));
 
     OV_ITT_TASK_CHAIN(PLUGIN_PARSE_MODEL, itt::domains::NPUPlugin, "Plugin::parse", "fork_local_config");
     auto localConfig = fork_local_config(propertiesMap, compiler, OptionMode::RunTime);

From 24e293504116773810a42ece397da4d98da5aa49 Mon Sep 17 00:00:00 2001
From: "Kang, Wenjing" <wenjing.kang@intel.com>
Date: Mon, 24 Nov 2025 16:03:49 +0800
Subject: [PATCH 20/25] Add OPENVINO_ASSERT for one shot weightless compilation
 to check initMainNetworkDescriptions size

Signed-off-by: Kang, Wenjing <wenjing.kang@intel.com>
---
 .../src/compiler_adapter/src/plugin_compiler_adapter.cpp        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 3b13ac11c50949..cc2b0fdd226bfb 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -197,6 +197,8 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
 
         mainNetworkDescription = initMainNetworkDescriptions.back();
         initMainNetworkDescriptions.pop_back();
+        OPENVINO_ASSERT(initMainNetworkDescriptions.size() > 0,
+                        "The initMainNetworkDescriptions after getting mainNetworkDescription must not be empty!");
         initNetworkDescriptions = std::move(initMainNetworkDescriptions);
 
         tensorMain = make_tensor_from_vector(mainNetworkDescription->compiledNetwork);

From a54dae0e8a3493c9455f2e80afbe68d00356677a Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 24 Nov 2025 18:33:33 +0800
Subject: [PATCH 21/25] fix comments2

---
 .../intel_npu/src/compiler_adapter/include/compiler.h       | 5 ++---
 .../src/compiler_adapter/include/compiler_impl.hpp          | 2 +-
 src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp    | 4 ----
 .../src/compiler_adapter/src/plugin_compiler_adapter.cpp    | 6 +++++-
 .../intel_npu/src/compiler_adapter/src/weightless_utils.cpp | 2 --
 5 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
index e7f3d3bee21010..409798e64b4a1b 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler.h
@@ -1,6 +1,5 @@
-//
-// Copyright (C) 2023 Intel Corporation.
-// SPDX-License-Identifier: Apache 2.0
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
 //
 
 #ifndef VPUX_COMPILER_L0_H
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
index 3f212f878bb795..0dbeba9afafc97 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
@@ -23,7 +23,7 @@ class VCLCompilerImpl final : public intel_npu::ICompiler {
     ~VCLCompilerImpl() override;
 
     static std::shared_ptr<VCLCompilerImpl> getInstance() {
-        static std::shared_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
+        static std::weak_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
         return compiler;
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
index a6d1d6bcc68f95..9ace471ea66ad4 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/graph.cpp
@@ -157,10 +157,6 @@ void Graph::set_argument_value(uint32_t argi, const void* argv) const {
 }
 
 void Graph::initialize(const Config& config) {
-    if (!_zeroInitStruct) {
-        _logger.warning("_zeroInitStruct is nullptr!");
-        return;
-    }
     _logger.debug("Graph initialize start");
 
     if (_zeGraphExt == nullptr || _graphDesc._handle == nullptr) {
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index cc2b0fdd226bfb..62466ab3aae8af 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -140,12 +140,16 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
             graphDesc = _zeGraphExt->getGraphDescriptor(tensor.data(), tensor.get_byte_size());
             networkMeta = _zeGraphExt->getNetworkMeta(graphDesc);
             networkMeta.name = model->get_friendly_name();
+        } catch (const std::exception& ex) {
+            _logger.info("Failed to use the level zero graph handle: %s. Inference requests for this model are not "
+                         "allowed. Only exports are available",
+                         ex.what());
         } catch (...) {
             _logger.info("Failed to obtain the level zero graph handle. Inference requests for this model are not "
                          "allowed. Only exports are available");
         }
     } else {
-        _logger.warning("no zeGraphExt, metadata is empty from vcl compiler");
+        _logger.warning("No driver is found, zeGraphExt is nullptr, so metadata is empty. Only exports are available");
     }
 
     return std::make_shared<Graph>(
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
index cac510735f68ef..987ff372ea8532 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/weightless_utils.cpp
@@ -2,8 +2,6 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#pragma once
-
 #include "weightless_utils.hpp"
 
 #include "openvino/core/rt_info/weightless_caching_attributes.hpp"

From 6ea5978041c73290ba93aa51efbcf02f48445b1f Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 25 Nov 2025 00:18:30 +0800
Subject: [PATCH 22/25] add log for serialize IR

---
 .../intel_npu/src/compiler_adapter/src/compiler_impl.cpp        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
index 5396da3b89d0cc..917508d8e006c6 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
@@ -615,8 +615,10 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
 
     if (useBaseModelSerializer) {
         _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
+        std::cout << "1) serialize IR is base method (copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
     } else {
         _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
+        std::cout << "2) serialize IR is vcl method(No copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
 
         // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
         // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;

From 6d58c924b005f7b4b91a53d81385360cb463a7ee Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 25 Nov 2025 00:26:06 +0800
Subject: [PATCH 23/25] update compilerType to mlir

---
 .../intel_npu/src/al/include/intel_npu/config/options.hpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index 51403d304718bd..cadc4f6aa8d270 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -842,7 +842,7 @@ struct COMPILER_TYPE final : OptionBase<COMPILER_TYPE, ov::intel_npu::CompilerTy
     }
 
     static ov::intel_npu::CompilerType defaultValue() {
-        return ov::intel_npu::CompilerType::DRIVER;
+        return ov::intel_npu::CompilerType::PLUGIN;
     }
 
     static ov::intel_npu::CompilerType parse(std::string_view val) {

From 7de605ed20ef4159212bc71b4cf6b99bfbba38fe Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Tue, 25 Nov 2025 00:34:10 +0800
Subject: [PATCH 24/25] remove ov cache check in plugin

---
 src/plugins/intel_npu/src/plugin/src/plugin.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 97c6a7e09ceeac..578b2ba29d61e2 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -670,13 +670,13 @@ std::shared_ptr<ov::ICompiledModel> Plugin::compile_model(const std::shared_ptr<
     OV_ITT_TASK_CHAIN(PLUGIN_COMPILE_MODEL, itt::domains::NPUPlugin, "Plugin::compile_model", "fork_local_config");
     auto localConfig = fork_local_config(localPropertiesMap, compiler);
 
-    const auto set_cache_dir = localConfig.get<CACHE_DIR>();
-    if (!set_cache_dir.empty()) {
-        const auto compilerType = localConfig.get<COMPILER_TYPE>();
-        if (compilerType == ov::intel_npu::CompilerType::PLUGIN) {
-            OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
-        }
-    }
+    // const auto set_cache_dir = localConfig.get<CACHE_DIR>();
+    // if (!set_cache_dir.empty()) {
+    //     const auto compilerType = localConfig.get<COMPILER_TYPE>();
+    //     if (compilerType == ov::intel_npu::CompilerType::PLUGIN) {
+    //         OPENVINO_THROW("Option 'CACHE_DIR' is not supported with PLUGIN compiler type");
+    //     }
+    // }
 
     const auto platform =
         utils::getCompilationPlatform(localConfig.get<PLATFORM>(),

From 07be1a20c7f085139888d83d04f0382fe0b76e58 Mon Sep 17 00:00:00 2001
From: "Liu, Dan1" <dan1.liu@intel.com>
Date: Mon, 24 Nov 2025 18:33:33 +0800
Subject: [PATCH 25/25] fix comments2

---
 .../src/compiler_adapter/include/compiler_impl.hpp    | 11 ++++++++++-
 .../src/compiler_adapter/src/compiler_impl.cpp        |  6 ++++--
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
index 0dbeba9afafc97..a55c12faded9ec 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/compiler_impl.hpp
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <memory>
+#include <mutex>
 
 #include "compiler.h"
 #include "intel_npu/common/filtered_config.hpp"
@@ -23,7 +24,15 @@ class VCLCompilerImpl final : public intel_npu::ICompiler {
     ~VCLCompilerImpl() override;
 
     static std::shared_ptr<VCLCompilerImpl> getInstance() {
-        static std::weak_ptr<VCLCompilerImpl> compiler = std::make_shared<VCLCompilerImpl>();
+        static std::mutex mutex;
+        static std::weak_ptr<VCLCompilerImpl> weak_compiler;
+
+        std::lock_guard<std::mutex> lock(mutex);
+        auto compiler = weak_compiler.lock();
+        if (!compiler) {
+            compiler = std::make_shared<VCLCompilerImpl>();
+            weak_compiler = compiler;
+        }
         return compiler;
     }
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
index 917508d8e006c6..118bdfd4188ba9 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/compiler_impl.cpp
@@ -615,10 +615,12 @@ ov::SupportedOpsMap VCLCompilerImpl::query(const std::shared_ptr<const ov::Model
 
     if (useBaseModelSerializer) {
         _logger.debug("serialize IR is base method, useBaseModelSerializer is %d", useBaseModelSerializer);
-        std::cout << "1) serialize IR is base method (copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
+        std::cout << "1) serialize IR is base method (copy weights), useBaseModelSerializer is "
+                  << useBaseModelSerializer << std::endl;
     } else {
         _logger.debug("serialize IR is vcl method, useBaseModelSerializer is %d", useBaseModelSerializer);
-        std::cout << "2) serialize IR is vcl method(No copy weights), useBaseModelSerializer is " << useBaseModelSerializer << std::endl;
+        std::cout << "2) serialize IR is vcl method(No copy weights), useBaseModelSerializer is "
+                  << useBaseModelSerializer << std::endl;
 
         // To resolve the issue with the default configuration where no user passes the serializer config, the VCL
         // serializer will be used as the default in the plugin adapter. You need to pass the serializer config;