Skip to content

Report import failure error code #715

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: ovep-develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 24 additions & 22 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "core/providers/openvino/ov_interface.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/providers/openvino/qdq_transformations/qdq_scales_fix.h"

namespace onnxruntime {
Expand Down Expand Up @@ -154,30 +155,31 @@
subgraph_context_,
shared_context_,
model_stream);
} catch (const OnnxRuntimeException& ex) {
std::string exception_str = ex.what();

if (session_context_.device_type.find("NPU") != std::string::npos &&
exception_str.find("intel_npu") != std::string::npos) {
// Handle NPU device related errors
#ifndef NDEBUG
ORT_THROW(exception_str + "\nModel needs to be recompiled\n");
#else
std::string error_message = "UNKNOWN NPU ERROR";
std::string error_code = "code 0x0";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::regex error_code_pattern("code 0x[0-9a-fA-F]+");
std::smatch matches;
if (std::regex_search(exception_str, matches, error_message_pattern)) {
error_message = matches[0];
}
if (std::regex_search(exception_str, matches, error_code_pattern)) {
error_code = matches[0];
} catch (const ovep_exception& ex) {
#ifndef OPENVINO_DISABLE_NPU_FALLBACK
bool eligible_for_cpu_fallback = device_type.find("NPU") != std::string::npos &&
!session_context_.so_disable_cpu_ep_fallback &&
!subgraph_context_.is_ep_ctx_graph;
if (eligible_for_cpu_fallback) {
std::string exception_str = ex.what();
LOGS_DEFAULT(VERBOSE) << exception_str;
LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU."
<< "Falling back to OV CPU for execution";
session_context_.device_type = "CPU";
session_context_.precision = "FP32";
try {
concrete_backend_ = BackendFactory::MakeBackend(model_proto,
session_context_,
subgraph_context_,
shared_context_,
model_stream);
} catch (std::string const& msg) {
ORT_THROW(msg);
}
throw std::runtime_error(error_message + ", " + error_code + "\nModel needs to be recompiled\n");
} else

Check notice on line 179 in onnxruntime/core/providers/openvino/backend_manager.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/openvino/backend_manager.cc#L179

If an else has a brace on one side, it should have it on both [readability/braces] [5]
Raw output
onnxruntime/core/providers/openvino/backend_manager.cc:179:  If an else has a brace on one side, it should have it on both  [readability/braces] [5]
#endif
} else {
ORT_THROW(exception_str);
{
throw ex;
}
}
}
Expand Down
82 changes: 82 additions & 0 deletions onnxruntime/core/providers/openvino/exceptions.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// Copyright (C) Intel Corporation
// Licensed under the MIT License

#pragma once

#include <exception>
#include <regex>
#include <string>

#include "core/common/status.h"

namespace onnxruntime {
namespace openvino_ep {

struct ovep_exception : public std::exception {
enum class type {
compile_model,
import_model,
query_prop,
read_model,
unknown,
};

ovep_exception(const std::string& message,
enum class type type) : message_{message},
type_{type},
error_code_{ze_result_code_from_string(message)},
error_name_{ze_result_name_from_string(message)} {}

const char* what() const noexcept override {
return message_.data();
}

uint32_t get_code() const { return error_code_; }

operator common::Status() const {
common::StatusCategory category_ort{common::ONNXRUNTIME};

if (type_ == type::unknown) {
return {category_ort, common::FAIL, message_};
}

// Newer drivers
if ((type_ == type::import_model) &&
(error_code_ == 0x7800000f /* ZE_RESULT_ERROR_INVALID_NATIVE_BINARY */)) {
std::string message{error_name_ + ", code 0x" + std::to_string(error_code_) + "\nModel needs to be recompiled\n"};
return {category_ort, common::INVALID_GRAPH, message};
}

std::string error_message = "Unhandled exception type: " + std::to_string(static_cast<int>(type_));
return {category_ort, common::FAIL, error_message};
}

protected:
std::string message_;
type type_{type::unknown};
uint32_t error_code_{0};
std::string error_name_;

private:
uint32_t ze_result_code_from_string(const std::string& ov_exception_string) {
uint32_t error_code{0};
std::regex error_code_pattern("code 0x([0-9a-fA-F]+)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_code_pattern)) {
std::from_chars(&(*matches[1].first), &(*matches[1].second), error_code, 16);
}
return error_code;
}
std::string ze_result_name_from_string(const std::string& ov_exception_string) {
std::string error_message = "UNKNOWN NPU ERROR";
std::regex error_message_pattern(R"(\bZE_\w*\b)");
std::smatch matches;
if (std::regex_search(ov_exception_string, matches, error_message_pattern)) {
error_message = matches[0];
}
return error_message;
}
};

} // namespace openvino_ep
} // namespace onnxruntime
217 changes: 111 additions & 106 deletions onnxruntime/core/providers/openvino/openvino_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "core/providers/openvino/onnx_ctx_model_helper.h"
#include "core/providers/openvino/ov_versions/capability.h"
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
#include "core/providers/openvino/exceptions.h"
#include "core/session/onnxruntime_session_options_config_keys.h"
#include "openvino/core/version.hpp"
#ifdef USE_OVEP_NPU_MEMORY
Expand Down Expand Up @@ -94,124 +95,128 @@ common::Status OpenVINOExecutionProvider::Compile(
auto& logger = *GetLogger();
Status status = Status::OK();

bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
session_context_.onnx_opset_version =
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);

// OVIR wrapped in epctx should be treated as source but this code does not
// This corner case is not in use and will be addressed in a future commit
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
}

// The block below is executed during EP context model inference
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
if (session_context_.so_share_ep_contexts &&
is_epctx_model &&
metadata.empty()) {
fs::path context_model_file_path = session_context_.so_context_file_path;
if (context_model_file_path.empty()) {
// If ep.context_file_path is not set the input model path is used
context_model_file_path = session_context_.onnx_model_path_name;
try {
bool is_epctx_model = false;
if (!fused_nodes.empty()) {
// Assume these properties are constant for all the model subgraphs, otherwise move to SubGraphContext
const auto& graph_body_viewer_0 = fused_nodes[0].filtered_graph.get();
session_context_.onnx_model_path_name = graph_body_viewer_0.ModelPath().string();
session_context_.onnx_opset_version =
graph_body_viewer_0.DomainToVersionMap().at(kOnnxDomain);

// OVIR wrapped in epctx should be treated as source but this code does not
// This corner case is not in use and will be addressed in a future commit
is_epctx_model = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(graph_body_viewer_0);
}

// Metadata is always read from model location, this could be a source or epctx model
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
std::ifstream file(metadata_file_path, std::ios::binary);
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
file >> metadata;
}

struct OpenVINOEPFunctionState {
AllocateFunc allocate_func = nullptr;
DestroyFunc destroy_func = nullptr;
AllocatorHandle allocator_handle = nullptr;
BackendManager& backend_manager;
};

for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
const Node& fused_node = fused_node_graph.fused_node;

NodeComputeInfo compute_info;

// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
// For precompiled blob, directly load the model instead of compiling the model
// For original model, check if the user wants to export a model with pre-compiled blob

auto& backend_manager = backend_managers_.emplace_back(session_context_,
*shared_context_,
fused_node,
graph_body_viewer,
logger,
ep_ctx_handle_);

compute_info.create_state_func =
[&backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
.allocate_func = context->allocate_func,
.destroy_func = context->release_func,
.allocator_handle = context->allocator_handle,
.backend_manager = backend_manager};
*state = static_cast<FunctionState>(p);
return 0;
};

compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
try {
function_state->backend_manager.Compute(context);
} catch (const std::exception& ex) {
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
// The block below is executed during EP context model inference
auto& metadata = shared_context_->shared_weights.metadata; // Metadata object in memory
if (session_context_.so_share_ep_contexts &&
is_epctx_model &&
metadata.empty()) {
fs::path context_model_file_path = session_context_.so_context_file_path;
if (context_model_file_path.empty()) {
// If ep.context_file_path is not set the input model path is used
context_model_file_path = session_context_.onnx_model_path_name;
}
return Status::OK();

// Metadata is always read from model location, this could be a source or epctx model
fs::path metadata_filename = context_model_file_path.stem().string() + "_metadata.bin";
fs::path metadata_file_path = context_model_file_path.parent_path() / metadata_filename;
std::ifstream file(metadata_file_path, std::ios::binary);
ORT_RETURN_IF_NOT(file, "Metadata file was not found: " + metadata_file_path.string());
shared_context_->shared_weights.metadata_filepath = std::move(metadata_file_path);
file >> metadata;
}

struct OpenVINOEPFunctionState {
AllocateFunc allocate_func = nullptr;
DestroyFunc destroy_func = nullptr;
AllocatorHandle allocator_handle = nullptr;
BackendManager& backend_manager;
};

compute_info.release_state_func =
[](FunctionState state) {
if (state) {
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
delete function_state;
}
};
for (const FusedNodeAndGraph& fused_node_graph : fused_nodes) {
const GraphViewer& graph_body_viewer = fused_node_graph.filtered_graph;
const Node& fused_node = fused_node_graph.fused_node;

NodeComputeInfo compute_info;

// During backend creation, we check if user wants to use precompiled blob onnx model or the original model
// For precompiled blob, directly load the model instead of compiling the model
// For original model, check if the user wants to export a model with pre-compiled blob

auto& backend_manager = backend_managers_.emplace_back(session_context_,
*shared_context_,
fused_node,
graph_body_viewer,
logger,
ep_ctx_handle_);

compute_info.create_state_func =
[&backend_manager](ComputeContext* context, FunctionState* state) {
OpenVINOEPFunctionState* p = new OpenVINOEPFunctionState{
.allocate_func = context->allocate_func,
.destroy_func = context->release_func,
.allocator_handle = context->allocator_handle,
.backend_manager = backend_manager};
*state = static_cast<FunctionState>(p);
return 0;
};

compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
auto function_state = static_cast<OpenVINOEPFunctionState*>(state);
try {
function_state->backend_manager.Compute(context);
} catch (const std::exception& ex) {
return common::Status(common::ONNXRUNTIME, common::FAIL, ex.what());
}
return Status::OK();
};

node_compute_funcs.push_back(std::move(compute_info));
compute_info.release_state_func =
[](FunctionState state) {
if (state) {
OpenVINOEPFunctionState* function_state = static_cast<OpenVINOEPFunctionState*>(state);
delete function_state;
}
};

if (!status.IsOK()) {
break;
node_compute_funcs.push_back(std::move(compute_info));

if (!status.IsOK()) {
break;
}
}
}

// The block below is executed during EP context model generation
if (session_context_.so_context_enable &&
session_context_.so_share_ep_contexts &&
!metadata.empty()) {
// For models after the first the metadata name comes from the shared context
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
if (metadata_file_path.empty()) {
metadata_file_path = session_context_.so_context_file_path;
std::string name_append{"_metadata.bin"};
// The block below is executed during EP context model generation
if (session_context_.so_context_enable &&
session_context_.so_share_ep_contexts &&
!metadata.empty()) {
// For models after the first the metadata name comes from the shared context
fs::path metadata_file_path = shared_context_->shared_weights.metadata_filepath;
if (metadata_file_path.empty()) {
metadata_file_path = session_context_.onnx_model_path_name;
name_append = "_ctx" + name_append;
metadata_file_path = session_context_.so_context_file_path;
std::string name_append{"_metadata.bin"};
if (metadata_file_path.empty()) {
metadata_file_path = session_context_.onnx_model_path_name;
name_append = "_ctx" + name_append;
}
auto metadata_filename = metadata_file_path.stem().string() + name_append;
metadata_file_path.replace_filename(metadata_filename);
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
}
auto metadata_filename = metadata_file_path.stem().string() + name_append;
metadata_file_path.replace_filename(metadata_filename);
shared_context_->shared_weights.metadata_filepath = metadata_file_path;
}

// Metadata is generated only for shared contexts
// If saving metadata then save it to the provided path or use the original model path
// Multiple calls to Compile() will update the metadata and for the last call
// the resulting file will contain the aggregated content
std::ofstream file{metadata_file_path, std::ios::binary};
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
file << metadata;
// Metadata is generated only for shared contexts
// If saving metadata then save it to the provided path or use the original model path
// Multiple calls to Compile() will update the metadata and for the last call
// the resulting file will contain the aggregated content
std::ofstream file{metadata_file_path, std::ios::binary};
ORT_RETURN_IF_NOT(file, "Metadata file could not be written: ", metadata_file_path);
file << metadata;
}
} catch (const ovep_exception& ex) {
status = ex;
}

return status;
Expand Down
Loading
Loading