From 269a64c301ef185ed451731593c469794fd6019d Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Thu, 3 Jul 2025 15:14:16 +0200 Subject: [PATCH 01/35] Initial refactoring --- .gitignore | 14 +++++++++ CMakeLists.txt | 7 +++-- inc/segmentation.h | 11 ++++++++ src/main.cpp | 66 ++++++++----------------------------------- src/sam_inference.cpp | 2 +- src/segmentation.cpp | 53 ++++++++++++++++++++++++++++++++++ 6 files changed, 94 insertions(+), 59 deletions(-) create mode 100644 .gitignore create mode 100644 inc/segmentation.h create mode 100644 src/segmentation.cpp diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ddd4b43 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +build/ +images/* +onnxruntime*/ +onnxruntime/* +docker/* +CMakefile +CMakeCache.txt +CMakeFiles/* +cmake_install.cmake +Makefile +SPEED-SAM-C-TENSORRT/ +sam_inference/model/FastSAM-x.onnx +mask* +segmentation_results* diff --git a/CMakeLists.txt b/CMakeLists.txt index 8036a89..d1e926c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ include_directories(${OpenCV_INCLUDE_DIRS}) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) -set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../onnxruntime-linux-x64-gpu-1.21.1") +set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) # -------------- Cuda ------------------# @@ -25,6 +25,7 @@ include_directories(/usr/local/cuda/include) set(PROJECT_SOURCES src/main.cpp src/sam_inference.cpp + src/segmentation.cpp src/utils.cpp ) @@ -44,10 +45,10 @@ endif () # Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml # and put it in the same folder of the executable file -configure_file(./model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) # Copy yolov8n.onnx file to the same folder of the executable file -configure_file(./model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD diff --git a/inc/segmentation.h b/inc/segmentation.h new file mode 100644 index 0000000..9617001 --- /dev/null +++ b/inc/segmentation.h @@ -0,0 +1,11 @@ +#include +#include +#include +#include +#include +#include + +#include "sam_inference.h" + +std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); +void SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 7481567..3905ead 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,67 +1,23 @@ -#include -#include -#include "sam_inference.h" -#include -#include -#include +#include "segmentation.h" - - -void SegmentAnything() { - - SAM* samSegmentor = new SAM; - SEG::DL_INIT_PARAM params; - SEG::DL_INIT_PARAM params1; - - params.rectConfidenceThreshold = 0.1; - params.iouThreshold = 0.5; - params.modelPath = "SAM_encoder.onnx"; - params.imgSize = { 1024, 1024 }; - - params1 = params; - params1.modelType = SEG::SAM_SEGMENT_DECODER; - params1.modelPath = "SAM_mask_decoder.onnx"; - - - #ifdef USE_CUDA - params.cudaEnable = true; - #else - params.cudaEnable = false; - #endif - - - - //Running inference +int main() +{ + //Running inference + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = current_path / "../../pipeline/build/images"; - std::vector resSam; + std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; for (auto& i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); + SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - SEG::DL_RESULT res; - samSegmentor->CreateSession(params); - SEG::MODEL_TYPE modelTypeRef = params.modelType; - samSegmentor->RunSession(img, resSam, modelTypeRef, res); - - - - - samSegmentor->CreateSession(params1); - modelTypeRef = params1.modelType; - samSegmentor->RunSession(img, resSam, modelTypeRef, res); - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of Detection", img); - cv::waitKey(0); - cv::destroyAllWindows(); } } -} - -int main() -{ - SegmentAnything(); + return 0; } \ No newline at end of file diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3820338..69671ec 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -4,7 +4,7 @@ #include #define benchmark -//#define ROI +#define ROI // #define min(a,b) (((a) < (b)) ? (a) : (b)) SAM::SAM() { diff --git a/src/segmentation.cpp b/src/segmentation.cpp new file mode 100644 index 0000000..4b2c022 --- /dev/null +++ b/src/segmentation.cpp @@ -0,0 +1,53 @@ +#include "segmentation.h" + +std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer() +{ + std::vector> samSegmentors; + samSegmentors.push_back(std::make_unique()); + samSegmentors.push_back(std::make_unique()); + + std::unique_ptr samSegmentorEncoder = std::make_unique(); + std::unique_ptr samSegmentorDecoder = std::make_unique(); + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + + params_encoder.rectConfidenceThreshold = 0.1; + params_encoder.iouThreshold = 0.5; + params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.imgSize = { 1024, 1024 }; + + params_decoder = params_encoder; + params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; + params_decoder.modelPath = "SAM_mask_decoder.onnx"; + + + + #ifdef USE_CUDA + params_encoder.cudaEnable = true; + #else + params_encoder.cudaEnable = false; + #endif + + samSegmentorEncoder->CreateSession(params_encoder); + samSegmentorDecoder->CreateSession(params_decoder); + samSegmentors[0] = std::move(samSegmentorEncoder); + samSegmentors[1] = std::move(samSegmentorDecoder); + return {std::move(samSegmentors), params_encoder, params_decoder}; +} + +void SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { + + std::vector resSam; + SEG::DL_RESULT res; + + SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; + samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); + + + modelTypeRef = params_decoder.modelType; + samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); + std::cout << "Press any key to exit" << std::endl; + cv::imshow("Result of Detection", img); + cv::waitKey(0); + cv::destroyAllWindows(); +} From 3f0aa166208a0ecb4d18a356e1d0582c5d09bbcc Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:02:37 +0200 Subject: [PATCH 02/35] create catkin package --- package.xml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 package.xml diff --git a/package.xml b/package.xml new file mode 100644 index 0000000..cde009a --- /dev/null +++ b/package.xml @@ -0,0 +1,29 @@ + + + + sam_onnx_ros + 0.0.0 + Segment Anything Model (SAM) segmentation + + Iason Theodorou + + ToDo + + catkin + + libopencv-dev + libopencv-dev + onnxruntime_ros + onnxruntime_ros + + catkin_lint_cmake + + doxygen + + + + + + \ No newline at end of file From 6ec4622a07e99d7526fcd40b0d7f86ec488dc78b Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:03:43 +0200 Subject: [PATCH 03/35] included CI tests --- .github/workflows/main.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/workflows/main.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..4f3af87 --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,13 @@ +name: CI + +on: [push, pull_request] + +jobs: + tue-ci: + name: TUe CI - ${{ github.event_name }} + runs-on: ubuntu-latest + steps: + - name: TUe CI + uses: tue-robotics/tue-env/ci/main@master + with: + package: ${{ github.event.repository.name }} From 2ae0d7b04a12a04d9193cc16a1619e659c6eb304 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 10:04:18 +0200 Subject: [PATCH 04/35] update of CMakeLists to include some initial needed components --- CMakeLists.txt | 53 +++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d1e926c..9e7ed33 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,13 +1,17 @@ cmake_minimum_required(VERSION 3.5) set(PROJECT_NAME SAMOnnxRuntimeCPPInference) -project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) +project(sam_onnx_ros) + +# -------------- CMake Policies ------------------# +#add_compile_options(-Wall -Werror=all) +#add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) -set(CMAKE_INCLUDE_CURRENT_DIR ON) +#set(CMAKE_INCLUDE_CURRENT_DIR ON) # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) @@ -22,6 +26,33 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) +# find_package(catkin REQUIRED +# COMPONENTS +# onnxruntime_ros +# ) + +# ------------------------------------------------------------------------------------------------ +# CATKIN EXPORT +# ------------------------------------------------------------------------------------------------ + +# catkin_package( +# INCLUDE_DIRS include +# LIBRARIES ${PROJECT_NAME} +# CATKIN_DEPENDS +# DEPENDS OpenCV +# ) + +# ------------------------------------------------------------------------------------------------ +# BUILD +# ------------------------------------------------------------------------------------------------ + +include_directories( + include + SYSTEM + ${OpenCV_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} +) + set(PROJECT_SOURCES src/main.cpp src/sam_inference.cpp @@ -33,21 +64,11 @@ add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) # Link OpenCV libraries along with ONNX Runtime -target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) - -# For Windows system, copy onnxruntime.dll to the same folder of the executable file -if (WIN32) - add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${ONNXRUNTIME_ROOT}/lib/onnxruntime.dll" - $) -endif () - -# Download https://raw.githubusercontent.com/ultralytics/ultralytics/main/ultralytics/cfg/datasets/coco.yaml -# and put it in the same folder of the executable file -configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) -# Copy yolov8n.onnx file to the same folder of the executable file + +# Copy sam_.onnx file to the same folder of the executable file +configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file From 02af27f822a67317d41db26df1cdb65a5b528c73 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 4 Jul 2025 13:36:46 +0200 Subject: [PATCH 05/35] Return the mask on SegmentAnything function (not working properly) --- inc/dl_types.h | 1 + inc/segmentation.h | 2 +- src/main.cpp | 11 +++++++++-- src/segmentation.cpp | 15 +++++++++++---- 4 files changed, 22 insertions(+), 7 deletions(-) diff --git a/inc/dl_types.h b/inc/dl_types.h index 54bd60f..72bd1fe 100644 --- a/inc/dl_types.h +++ b/inc/dl_types.h @@ -29,6 +29,7 @@ typedef struct _DL_INIT_PARAM int intraOpNumThreads = 1; //std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // Overloaded output operator for _DL_INIT_PARAM to print its contents friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param) { os << "modelPath: " << param.modelPath << "\n"; diff --git a/inc/segmentation.h b/inc/segmentation.h index 9617001..46e954e 100644 --- a/inc/segmentation.h +++ b/inc/segmentation.h @@ -8,4 +8,4 @@ #include "sam_inference.h" std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -void SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file +std::vector SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 3905ead..5c22108 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -15,8 +15,15 @@ int main() { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); - SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - + std::vector masks; + masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); + for (int j = 0; j < masks.size(); j++) + { + std::cout << "Press any key to exit" << std::endl; + cv::imshow("Result of MASKS", masks[j]); + cv::waitKey(0); + cv::destroyAllWindows(); + } } } return 0; diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 4b2c022..2962563 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -35,7 +35,7 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P return {std::move(samSegmentors), params_encoder, params_decoder}; } -void SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { +std::vector SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { std::vector resSam; SEG::DL_RESULT res; @@ -46,8 +46,15 @@ void SegmentAnything(std::vector>& samSegmentors, SEG::DL_I modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of Detection", img); - cv::waitKey(0); + + //cv::destroyAllWindows(); + cv::Mat finalMask = res.masks[0]; + std::cout << "Final mask size: " << finalMask.size() << std::endl; + + for (const auto& mask : res.masks) { + cv::imshow("Mask", mask); + cv::waitKey(0); + } cv::destroyAllWindows(); + return std::move(res.masks); } From 7de1753da1dd547e69067c27a00023ad1abddd8b Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 19 Aug 2025 21:19:57 +0200 Subject: [PATCH 06/35] Updated CMake and removed not needed parts of the code --- CMakeLists.txt | 24 +++++++++++--------- {inc => include}/dl_types.h | 0 {inc => include}/sam_inference.h | 0 {inc => include}/segmentation.h | 0 {inc => include}/utils.h | 6 ----- src/sam_inference.cpp | 39 +------------------------------- src/utils.cpp | 7 ++---- 7 files changed, 16 insertions(+), 60 deletions(-) rename {inc => include}/dl_types.h (100%) rename {inc => include}/sam_inference.h (100%) rename {inc => include}/segmentation.h (100%) rename {inc => include}/utils.h (96%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9e7ed33..1270d93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.5) set(PROJECT_NAME SAMOnnxRuntimeCPPInference) project(sam_onnx_ros) +project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) # -------------- CMake Policies ------------------# #add_compile_options(-Wall -Werror=all) @@ -17,6 +18,7 @@ set(CMAKE_CXX_EXTENSIONS ON) find_package(OpenCV REQUIRED) include_directories(${OpenCV_INCLUDE_DIRS}) + # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") @@ -26,21 +28,21 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) -# find_package(catkin REQUIRED -# COMPONENTS -# onnxruntime_ros -# ) +find_package(catkin REQUIRED + COMPONENTS + #onnxruntime_ros +) # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT # ------------------------------------------------------------------------------------------------ -# catkin_package( -# INCLUDE_DIRS include -# LIBRARIES ${PROJECT_NAME} -# CATKIN_DEPENDS -# DEPENDS OpenCV -# ) +catkin_package( + INCLUDE_DIRS include + LIBRARIES ${PROJECT_NAME} + CATKIN_DEPENDS + DEPENDS OpenCV +) # ------------------------------------------------------------------------------------------------ # BUILD @@ -61,7 +63,7 @@ set(PROJECT_SOURCES ) add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/inc) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) # Link OpenCV libraries along with ONNX Runtime target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) diff --git a/inc/dl_types.h b/include/dl_types.h similarity index 100% rename from inc/dl_types.h rename to include/dl_types.h diff --git a/inc/sam_inference.h b/include/sam_inference.h similarity index 100% rename from inc/sam_inference.h rename to include/sam_inference.h diff --git a/inc/segmentation.h b/include/segmentation.h similarity index 100% rename from inc/segmentation.h rename to include/segmentation.h diff --git a/inc/utils.h b/include/utils.h similarity index 96% rename from inc/utils.h rename to include/utils.h index 0e7a8d7..1bded56 100644 --- a/inc/utils.h +++ b/include/utils.h @@ -2,12 +2,6 @@ #define RET_OK nullptr -#ifdef _WIN32 -#include -#include -#include -#endif - #include #include #include diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 69671ec..77cef8e 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -71,26 +71,12 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - //OrtTensorRTProviderOptions trtOptions{}; - //trtOptions.device_id = 0; - //trtOptions.trt_fp16_enable = true; - //sessionOption.AppendExecutionProvider_TensorRT(trtOptions); - sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); -#ifdef _WIN32 - int ModelPathSize = MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), nullptr, 0); - wchar_t* wide_cstr = new wchar_t[ModelPathSize + 1]; - MultiByteToWideChar(CP_UTF8, 0, iParams.modelPath.c_str(), static_cast(iParams.modelPath.length()), wide_cstr, ModelPathSize); - wide_cstr[ModelPathSize] = L'\0'; - const wchar_t* modelPath = wide_cstr; -#else const char* modelPath = iParams.modelPath.c_str(); -#endif // _WIN32 - //session = new Ort::Session(env, modelPath, sessionOption); session = std::make_unique(env, modelPath, sessionOption); Ort::AllocatorWithDefaultOptions allocator; size_t inputNodesNum = session->GetInputCount(); @@ -111,14 +97,6 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { } options = Ort::RunOptions{ nullptr }; - //std::vector input_shape; - //std::vector output_shape; - //size_t input_tensor_size = 0; - //size_t output_tensor_size = 0; - //Get input and output tensor size - - //auto input_tensor_size = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount(); - //auto output_tensor_size = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementCount(); auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType(); @@ -159,13 +137,7 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR } else if (modelType == SEG::SAM_SEGMENT_DECODER) { - // For SAM decoder model, the input size is different - // Assuming the input size is 236x64x64 for the decoder - // You can adjust this based on your actual model requirements - // For example, if the input size is 1x3x236x64, you can set it as follows: - // inputNodeDims = { 1, 3, 236, 64 }; - // But here we are using 1x236x64x64 as per your original code - + // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = { 1, 256, 64, 64 }; } TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); @@ -209,7 +181,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); std::vector outputNodeDims = tensor_info.GetShape(); auto output = outputTensor.front().GetTensorMutableData::type>(); - //std::vector outputNodeDims = outputTensor.front().GetTensorTypeAndShapeInfo().GetShape(); delete[] blob; int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size @@ -234,14 +205,12 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR break; } case SEG::SAM_SEGMENT_DECODER: - //case : { // Use embeddings from the last result std::vector embeddings = result.embeddings; // Create tensor for decoder std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - // Create point coordinates and labels #ifdef ROI @@ -258,8 +227,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR return "[SAM]: NO valid Box."; } - //cv::Rect bbox1(138, 29, 170, 301); - std::vector boundingBoxes; boundingBoxes.push_back(bbox); #endif // ROI @@ -344,10 +311,6 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR utilities.overlay(output_tensors, iImg, imgSize, result); - //std::cout << "Press any key to exit" << std::endl; - //cv::imshow("Result of INTERMEDIATE Detection", iImg); - //cv::waitKey(0); - //cv::destroyAllWindows(); } // Add the result to oResult oResult.push_back(result); diff --git a/src/utils.cpp b/src/utils.cpp index ce75a0b..153c0ac 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -172,7 +172,7 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } } - // 1. Calculate the dimensions the image had during preprocessing + // 1. Calculate the dimensions the image had during preprocessing float scale; int processedWidth, processedHeight; if (iImg.cols >= iImg.rows) { @@ -184,9 +184,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg processedWidth = int(iImg.cols * scale); processedHeight = imgSize[1]; } - // 2. Resize mask to match the SAM input dimensions - //cv::Mat resizedMask; - //cv::resize(mask, resizedMask, cv::Size(256, 256)); // 3. Extract the portion that corresponds to the actual image (no padding) int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0])); @@ -207,7 +204,7 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } // Apply the Guided Filter - // cv::Mat filteredMask; + cv::Mat filteredMask; int radius = 2; double eps = 0.01; cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps); From fbe8e00531e03fde1b86896fbe2a21963e490662 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 26 Aug 2025 20:34:24 +0200 Subject: [PATCH 07/35] Updated code format --- include/dl_types.h | 109 +++++----- include/sam_inference.h | 32 +-- include/utils.h | 68 +++--- src/main.cpp | 4 +- src/sam_inference.cpp | 447 ++++++++++++++++++++-------------------- src/segmentation.cpp | 19 +- src/utils.cpp | 207 ++++++++++--------- 7 files changed, 433 insertions(+), 453 deletions(-) diff --git a/include/dl_types.h b/include/dl_types.h index 72bd1fe..632c7c6 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -1,69 +1,66 @@ #pragma once namespace SEG { -enum MODEL_TYPE -{ - //FLOAT32 MODEL - SAM_SEGMENT_ENCODER = 1, - SAM_SEGMENT_DECODER = 2, - //YOLO_CLS = 3, - - //FLOAT16 MODEL - //YOLO_DETECT_V8_HALF = 4, - //YOLO_POSE_V8_HALF = 5, - //YOLO_CLS_HALF = 6 -}; - + enum MODEL_TYPE + { + // FLOAT32 MODEL + SAM_SEGMENT_ENCODER = 1, + SAM_SEGMENT_DECODER = 2, + // YOLO_CLS = 3, -typedef struct _DL_INIT_PARAM -{ - // Yolo & Common Part - std::string modelPath; - MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; - std::vector imgSize = { 640, 640 }; - float rectConfidenceThreshold = 0.6; - float iouThreshold = 0.5; - int keyPointsNum = 2; //Note:kpt number for pose - bool cudaEnable = false; - int logSeverityLevel = 3; - int intraOpNumThreads = 1; - //std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // FLOAT16 MODEL + // YOLO_DETECT_V8_HALF = 4, + // YOLO_POSE_V8_HALF = 5, + // YOLO_CLS_HALF = 6 + }; - // Overloaded output operator for _DL_INIT_PARAM to print its contents - friend std::ostream& operator<<(std::ostream& os, _DL_INIT_PARAM& param) + typedef struct _DL_INIT_PARAM { - os << "modelPath: " << param.modelPath << "\n"; - os << "modelType: " << param.modelType << "\n"; - os << "imgSize: "; - for (const auto& size : param.imgSize) - os << size << " "; - os << "\n"; - os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; - os << "iouThreshold: " << param.iouThreshold << "\n"; - os << "keyPointsNum: " << param.keyPointsNum << "\n"; - os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; - os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; - os << "intraOpNumThreads: " << param.intraOpNumThreads; - return os; - } - -} DL_INIT_PARAM; + // Yolo & Common Part + std::string modelPath; + MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; + std::vector imgSize = {640, 640}; + float rectConfidenceThreshold = 0.6; + float iouThreshold = 0.5; + int keyPointsNum = 2; // Note:kpt number for pose + bool cudaEnable = false; + int logSeverityLevel = 3; + int intraOpNumThreads = 1; + // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + // Overloaded output operator for _DL_INIT_PARAM to print its contents + friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM ¶m) + { + os << "modelPath: " << param.modelPath << "\n"; + os << "modelType: " << param.modelType << "\n"; + os << "imgSize: "; + for (const auto &size : param.imgSize) + os << size << " "; + os << "\n"; + os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; + os << "iouThreshold: " << param.iouThreshold << "\n"; + os << "keyPointsNum: " << param.keyPointsNum << "\n"; + os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; + os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; + os << "intraOpNumThreads: " << param.intraOpNumThreads; + return os; + } -typedef struct _DL_RESULT -{ + } DL_INIT_PARAM; - //Yolo Part - int classId; - float confidence; - std::vector boxes; // For SAM encoder model, this will be filled with detected boxes - std::vector keyPoints; + typedef struct _DL_RESULT + { - // Sam Part - std::vector embeddings; - // Masks for SAM decoder model output - std::vector masks; // Each cv::Mat represents a mask + // Yolo Part + int classId; + float confidence; + std::vector boxes; // For SAM encoder model, this will be filled with detected boxes + std::vector keyPoints; + // Sam Part + std::vector embeddings; + // Masks for SAM decoder model output + std::vector masks; // Each cv::Mat represents a mask -} DL_RESULT; + } DL_RESULT; } // namespace SEG \ No newline at end of file diff --git a/include/sam_inference.h b/include/sam_inference.h index 8a15c38..8910bda 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -1,12 +1,6 @@ #pragma once -#define RET_OK nullptr - -#ifdef _WIN32 -#include -#include -#include -#endif +#define RET_OK nullptr #include #include @@ -18,10 +12,6 @@ #include #endif - - - - class SAM { public: @@ -30,18 +20,15 @@ class SAM ~SAM(); public: + const char *CreateSession(SEG::DL_INIT_PARAM &iParams); - const char* CreateSession(SEG::DL_INIT_PARAM& iParams); - - const char* RunSession(const cv::Mat& iImg, std::vector& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result); - - char* WarmUpSession(SEG::MODEL_TYPE modelType); - - template - char* TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector& inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector& oResult, Utils& utilities, SEG::DL_RESULT& result); + const char *RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result); + char *WarmUpSession(SEG::MODEL_TYPE modelType); + template + char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); std::vector classes{}; @@ -50,12 +37,11 @@ class SAM std::unique_ptr session; bool cudaEnable; Ort::RunOptions options; - std::vector inputNodeNames; - std::vector outputNodeNames; + std::vector inputNodeNames; + std::vector outputNodeNames; SEG::MODEL_TYPE modelType; std::vector imgSize; float rectConfidenceThreshold; float iouThreshold; - }; \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index 1bded56..7ff7f9c 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,6 +1,6 @@ #pragma once -#define RET_OK nullptr +#define RET_OK nullptr #include #include @@ -14,42 +14,42 @@ class Utils { - public: - Utils(); - ~Utils(); - - void overlay(std::vector& output_tensors, const cv::Mat& iImg, std::vector iImgSize, SEG::DL_RESULT& result); - char* PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg); - void ScaleBboxPoints(const cv::Mat& iImg, std::vector iImgSize, std::vector& pointCoords, std::vector& PointsCoordsScaled); - - std::vector PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector& pointCoordsScaled, std::vector pointCoordsDims, - std::vector& pointLabels, std::vector pointLabelsDims, std::vector& maskInput, - std::vector maskInputDims, std::vector& hasMaskInput, std::vector hasMaskInputDims); - - // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. - // Note: Code in header file since it is used outside of this utils src code. - template - char* BlobFromImage(const cv::Mat& iImg, T& iBlob) { - int channels = iImg.channels(); - int imgHeight = iImg.rows; - int imgWidth = iImg.cols; - - for (int c = 0; c < channels; c++) +public: + Utils(); + ~Utils(); + + void overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); + char *PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg); + void ScaleBboxPoints(const cv::Mat &iImg, std::vector iImgSize, std::vector &pointCoords, std::vector &PointsCoordsScaled); + + std::vector PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector &pointCoordsScaled, std::vector pointCoordsDims, + std::vector &pointLabels, std::vector pointLabelsDims, std::vector &maskInput, + std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims); + + // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. + // Note: Code in header file since it is used outside of this utils src code. + template + char *BlobFromImage(const cv::Mat &iImg, T &iBlob) + { + int channels = iImg.channels(); + int imgHeight = iImg.rows; + int imgWidth = iImg.cols; + + for (int c = 0; c < channels; c++) + { + for (int h = 0; h < imgHeight; h++) { - for (int h = 0; h < imgHeight; h++) + for (int w = 0; w < imgWidth; w++) { - for (int w = 0; w < imgWidth; w++) - { - iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( - (iImg.at(h, w)[c]) / 255.0f); - } + iBlob[c * imgWidth * imgHeight + h * imgWidth + w] = typename std::remove_pointer::type( + (iImg.at(h, w)[c]) / 255.0f); } } - return RET_OK; } - private: - float resizeScales; - float resizeScalesBbox; //letterbox scale + return RET_OK; + } - - }; +private: + float resizeScales; + float resizeScalesBbox; // letterbox scale +}; diff --git a/src/main.cpp b/src/main.cpp index 5c22108..3c8091d 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -2,14 +2,14 @@ int main() { - //Running inference + // Running inference std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; - for (auto& i : std::filesystem::directory_iterator(imgs_path)) + for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") { diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 77cef8e..8a07b6b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -5,19 +5,20 @@ #define benchmark #define ROI -// #define min(a,b) (((a) < (b)) ? (a) : (b)) - -SAM::SAM() { +SAM::SAM() +{ } - -SAM::~SAM() { +SAM::~SAM() +{ // Clean up input/output node names - for (auto& name : inputNodeNames) { + for (auto &name : inputNodeNames) + { delete[] name; } - for (auto& name : outputNodeNames) { + for (auto &name : outputNodeNames) + { delete[] name; } } @@ -25,24 +26,30 @@ SAM::~SAM() { #ifdef USE_CUDA namespace Ort { - template<> - struct TypeToTensorType { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; }; + template <> + struct TypeToTensorType + { + static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; + }; } #endif - -const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { - const char* Ret = RET_OK; - if (session) { +const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) +{ + const char *Ret = RET_OK; + if (session) + { session.reset(); // Release previous session // Clear node names - for (auto& name : inputNodeNames) { + for (auto &name : inputNodeNames) + { delete[] name; } inputNodeNames.clear(); - for (auto& name : outputNodeNames) { + for (auto &name : outputNodeNames) + { delete[] name; } outputNodeNames.clear(); @@ -75,7 +82,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); - const char* modelPath = iParams.modelPath.c_str(); + const char *modelPath = iParams.modelPath.c_str(); session = std::make_unique(env, modelPath, sessionOption); Ort::AllocatorWithDefaultOptions allocator; @@ -83,7 +90,7 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { for (size_t i = 0; i < inputNodesNum; i++) { Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); - char* temp_buf = new char[50]; + char *temp_buf = new char[50]; strcpy(temp_buf, input_node_name.get()); inputNodeNames.push_back(temp_buf); } @@ -91,11 +98,11 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char* temp_buf = new char[10]; + char *temp_buf = new char[10]; strcpy(temp_buf, output_node_name.get()); outputNodeNames.push_back(temp_buf); } - options = Ort::RunOptions{ nullptr }; + options = Ort::RunOptions{nullptr}; auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); @@ -104,78 +111,79 @@ const char* SAM::CreateSession(SEG::DL_INIT_PARAM& iParams) { WarmUpSession(modelType); return RET_OK; } - catch (const std::exception& e) + catch (const std::exception &e) { - const char* str1 = "[SAM]:"; - const char* str2 = e.what(); + const char *str1 = "[SAM]:"; + const char *str2 = e.what(); std::string result = std::string(str1) + std::string(str2); - char* merged = new char[result.length() + 1]; + char *merged = new char[result.length() + 1]; std::strcpy(merged, result.c_str()); std::cout << merged << std::endl; delete[] merged; return "[SAM]:Create session failed."; } - } -const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result) { - #ifdef benchmark - clock_t starttime_1 = clock(); - #endif // benchmark - Utils utilities; - const char* Ret = RET_OK; - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) +const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) +{ +#ifdef benchmark + clock_t starttime_1 = clock(); +#endif // benchmark + Utils utilities; + const char *Ret = RET_OK; + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + if (modelType < 4) + { + float *blob = new float[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims; + if (modelType == SEG::SAM_SEGMENT_ENCODER) { - float* blob = new float[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) - { - inputNodeDims = { 1, 3, imgSize.at(0), imgSize.at(1) }; - } - else if (modelType == SEG::SAM_SEGMENT_DECODER) - { - // Input size or SAM decoder model is 256x64x64 for the decoder - inputNodeDims = { 1, 256, 64, 64 }; - } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); + inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; } - else + else if (modelType == SEG::SAM_SEGMENT_DECODER) { - #ifdef USE_CUDA - half* blob = new half[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims = { 1,3,imgSize.at(0),imgSize.at(1) }; - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); - #endif + // Input size or SAM decoder model is 256x64x64 for the decoder + inputNodeDims = {1, 256, 64, 64}; } - - return Ret; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); + } + else + { +#ifdef USE_CUDA + half *blob = new half[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); +#endif } - template - char* SAM::TensorProcess(clock_t& starttime_1, const cv::Mat& iImg, N& blob, std::vector& inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector& oResult, Utils& utilities, SEG::DL_RESULT& result) { + return Ret; +} - switch (modelType) - { - case SEG::SAM_SEGMENT_ENCODER: +template +char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) +{ + + switch (modelType) + { + case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: { Ort::Value inputTensor = Ort::Value::CreateTensor::type>( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), inputNodeDims.size()); - #ifdef benchmark +#ifdef benchmark clock_t starttime_2 = clock(); - #endif // benchmark +#endif // benchmark auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), - outputNodeNames.size()); - #ifdef benchmark + outputNodeNames.size()); +#ifdef benchmark clock_t starttime_3 = clock(); - #endif // benchmark +#endif // benchmark Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); @@ -184,10 +192,9 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR delete[] blob; int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size - result.embeddings.assign(output, output + embeddingSize); // Save embeddings + result.embeddings.assign(output, output + embeddingSize); // Save embeddings - - #ifdef benchmark +#ifdef benchmark clock_t starttime_4 = clock(); double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; @@ -200,149 +207,140 @@ const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oR { std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } - #endif // benchmark +#endif // benchmark break; } - case SEG::SAM_SEGMENT_DECODER: - { - // Use embeddings from the last result - std::vector embeddings = result.embeddings; - // Create tensor for decoder - std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - - // Create point coordinates and labels - #ifdef ROI - - // Create a window for user interaction - namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); - - // Let the user select the bounding box - cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - - // Check if a valid bounding box was selected - if (bbox.width == 0 || bbox.height == 0) - { - std::cerr << "No valid bounding box selected." << std::endl; - return "[SAM]: NO valid Box."; - } - - std::vector boundingBoxes; - boundingBoxes.push_back(bbox); - #endif // ROI - //boundingBoxes.push_back(bbox1); - // Declare timing variables BEFORE the loop - #ifdef benchmark - clock_t starttime_2 = 0; - clock_t starttime_3 = 0; - #endif // benchmark - - #ifdef ROI - for (const auto &bbox : boundingBoxes) - #else - for (const auto &bbox : result.boxes) - #endif // ROI - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - embeddings.data(), // Use the embeddings from the encoder - embeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size() - ); - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width/2; - float centerY = bbox.y + bbox.height/2; - - // Convert bounding box to points - std::vector pointCoords = { - (float)bbox.x, (float)bbox.y, // Top-left - (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right - }; - - - std::vector pointCoordsScaled; - - std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) - - // Labels for the points - std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels - std::vector pointLabelsDims = {1, 2}; - - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - - - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + case SEG::SAM_SEGMENT_DECODER: + { + // Use embeddings from the last result + std::vector embeddings = result.embeddings; + // Create tensor for decoder + std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements + // Create point coordinates and labels +#ifdef ROI + // Create a window for user interaction + namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); + // Let the user select the bounding box + cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims - ); - - #ifdef benchmark - starttime_2 = clock(); - #endif // benchmark - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); + // Check if a valid bounding box was selected + if (bbox.width == 0 || bbox.height == 0) + { + std::cerr << "No valid bounding box selected." << std::endl; + return "[SAM]: NO valid Box."; + } - #ifdef benchmark - starttime_3 = clock(); - #endif // benchmark + std::vector boundingBoxes; + boundingBoxes.push_back(bbox); +#endif // ROI + // boundingBoxes.push_back(bbox1); + // Declare timing variables BEFORE the loop +#ifdef benchmark + clock_t starttime_2 = 0; + clock_t starttime_3 = 0; +#endif // benchmark + +#ifdef ROI + for (const auto &bbox : boundingBoxes) +#else + for (const auto &bbox : result.boxes) +#endif // ROI + { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + embeddings.data(), // Use the embeddings from the encoder + embeddings.size(), // Total number of elements + decoderInputDims.data(), + decoderInputDims.size()); + // Use center of bounding box as foreground point + float centerX = bbox.x + bbox.width / 2; + float centerY = bbox.y + bbox.height / 2; + + // Convert bounding box to points + std::vector pointCoords = { + (float)bbox.x, (float)bbox.y, // Top-left + (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right + }; + std::vector pointCoordsScaled; + + std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) + + // Labels for the points + std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels + std::vector pointLabelsDims = {1, 2}; + + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, + pointCoordsScaled, + pointCoordsDims, + pointLabels, + pointLabelsDims, + maskInput, + maskInputDims, + hasMaskInput, + hasMaskInputDims); + +#ifdef benchmark + starttime_2 = clock(); +#endif // benchmark + auto output_tensors = session->Run( + options, + inputNodeNames.data(), + inputTensors.data(), + inputTensors.size(), + outputNodeNames.data(), + outputNodeNames.size()); - utilities.overlay(output_tensors, iImg, imgSize, result); - } - // Add the result to oResult - oResult.push_back(result); +#ifdef benchmark + starttime_3 = clock(); +#endif // benchmark - delete[] blob; + utilities.overlay(output_tensors, iImg, imgSize, result); + } + // Add the result to oResult + oResult.push_back(result); - #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - #endif // benchmark - break; - } + delete[] blob; - default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; +#ifdef benchmark + clock_t starttime_4 = clock(); + double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) + { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } - return RET_OK; - + else + { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; + } +#endif // benchmark + break; } + default: + std::cout << "[SAM]: " << "Not support model type." << std::endl; + } + return RET_OK; +} -char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { +char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) +{ clock_t starttime_1 = clock(); Utils utilities; cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); @@ -350,17 +348,18 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { utilities.PreProcess(iImg, imgSize, processedImg); if (modelType < 4) { - float* blob = new float[iImg.total() * 3]; + float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = { 1, 3, imgSize.at(0), imgSize.at(1) }; + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; switch (modelType) { - case SEG::SAM_SEGMENT_ENCODER: { + case SEG::SAM_SEGMENT_ENCODER: + { Ort::Value input_tensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; @@ -371,36 +370,36 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { break; } - case SEG::SAM_SEGMENT_DECODER: { - std::vector inputNodeDims = { 1, 256, 64, 64 }; // BUG: That was 236 instead of 256 + case SEG::SAM_SEGMENT_DECODER: + { + std::vector inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256 // Use embeddings from the last result - std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values - std::vector decoderInputDims = { 1, 256, 64, 64 }; // Adjust based on your decoder's requirements - + std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values + std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements // Create dummy point coordinates and labels std::vector boundingBoxes = { cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height) - //cv::Rect(0, 0, 473, 359) // Another example bounding box + // cv::Rect(0, 0, 473, 359) // Another example bounding box }; - for (const auto& bbox : boundingBoxes) { + for (const auto &bbox : boundingBoxes) + { Ort::Value decoderInputTensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), dummyEmbeddings.data(), // Use the embeddings from the encoder dummyEmbeddings.size(), // Total number of elements decoderInputDims.data(), - decoderInputDims.size() - ); + decoderInputDims.size()); // Convert bounding box to points // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width/2; - float centerY = bbox.y + bbox.height/2; + float centerX = bbox.x + bbox.width / 2; + float centerY = bbox.y + bbox.height / 2; std::vector pointCoords = { - centerX, centerY // Center point (foreground) + centerX, centerY // Center point (foreground) }; - std::vector pointCoordsDims = { 1, 1, 2 }; // 2 points, each with (x, y) + std::vector pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y) std::vector pointCoordsScaled; @@ -408,14 +407,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { // Labels for the points std::vector pointLabels = {1.0f}; // All points are foreground - std::vector pointLabelsDims = { 1, 1}; + std::vector pointLabelsDims = {1, 1}; // Create dummy mask_input and has_mask_input std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = { 1, 1, 256, 256 }; - std::vector hasMaskInput = { 0.0f }; // No mask provided - std::vector hasMaskInputDims = { 1 }; + std::vector maskInputDims = {1, 1, 256, 256}; + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; - std::vector inputTensors = utilities.PrepareInputTensor( + std::vector inputTensors = utilities.PrepareInputTensor( decoderInputTensor, pointCoordsScaled, pointCoordsDims, @@ -424,8 +423,7 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { maskInput, maskInputDims, hasMaskInput, - hasMaskInputDims - ); + hasMaskInputDims); auto output_tensors = session->Run( options, @@ -433,8 +431,8 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { inputTensors.data(), inputTensors.size(), outputNodeNames.data(), - outputNodeNames.size() - ); } + outputNodeNames.size()); + } outputNodeNames.size(); delete[] blob; @@ -447,15 +445,14 @@ char* SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { break; } - } - + } } else { #ifdef USE_CUDA - half* blob = new half[iImg.total() * 3]; + half *blob = new half[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = { 1,3,imgSize.at(0),imgSize.at(1) }; + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; Ort::Value input_tensor = Ort::Value::CreateTensor(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); delete[] blob; diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 2962563..8b5338c 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -14,19 +14,17 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "SAM_encoder.onnx"; - params_encoder.imgSize = { 1024, 1024 }; + params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; params_decoder.modelPath = "SAM_mask_decoder.onnx"; - - - #ifdef USE_CUDA +#ifdef USE_CUDA params_encoder.cudaEnable = true; - #else +#else params_encoder.cudaEnable = false; - #endif +#endif samSegmentorEncoder->CreateSession(params_encoder); samSegmentorDecoder->CreateSession(params_decoder); @@ -35,7 +33,8 @@ std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_P return {std::move(samSegmentors), params_encoder, params_decoder}; } -std::vector SegmentAnything(std::vector>& samSegmentors, SEG::DL_INIT_PARAM& params_encoder, SEG::DL_INIT_PARAM& params_decoder, cv::Mat& img) { +std::vector SegmentAnything(std::vector> &samSegmentors, SEG::DL_INIT_PARAM ¶ms_encoder, SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) +{ std::vector resSam; SEG::DL_RESULT res; @@ -43,15 +42,15 @@ std::vector SegmentAnything(std::vector>& samSegme SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); - modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - //cv::destroyAllWindows(); + // cv::destroyAllWindows(); cv::Mat finalMask = res.masks[0]; std::cout << "Final mask size: " << finalMask.size() << std::endl; - for (const auto& mask : res.masks) { + for (const auto &mask : res.masks) + { cv::imshow("Mask", mask); cv::waitKey(0); } diff --git a/src/utils.cpp b/src/utils.cpp index 153c0ac..8d76ac1 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -2,15 +2,16 @@ #include // for guided filter // Constructor -Utils::Utils(){ - +Utils::Utils() +{ } // Destructor -Utils::~Utils(){ +Utils::~Utils() +{ } -char* Utils::PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) +char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg) { if (iImg.channels() == 3) { @@ -22,41 +23,46 @@ char* Utils::PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); } - - if (iImg.cols >= iImg.rows) - { - resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); - } - else - { - resizeScales = iImg.rows / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); - } - cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); - oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); - oImg = tempImg; + if (iImg.cols >= iImg.rows) + { + resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + } + else + { + resizeScales = iImg.rows / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + } + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); + oImg = tempImg; return RET_OK; } -void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector imgSize, std::vector& pointCoords, std::vector& pointCoordsScaled){ +void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector imgSize, std::vector &pointCoords, std::vector &pointCoordsScaled) +{ pointCoordsScaled.clear(); // Calculate same scale as preprocessing float scale; - if (iImg.cols >= iImg.rows) { + if (iImg.cols >= iImg.rows) + { scale = imgSize[0] / (float)iImg.cols; resizeScalesBbox = iImg.cols / (float)imgSize[0]; - } else { + } + else + { scale = imgSize[1] / (float)iImg.rows; resizeScalesBbox = iImg.rows / (float)imgSize[1]; } - // TOP-LEFT placement (matching PreProcess) - for (size_t i = 0; i < pointCoords.size(); i += 2) { - if (i + 1 < pointCoords.size()) { + // Top-Left placement (matching PreProcess) + for (size_t i = 0; i < pointCoords.size(); i += 2) + { + if (i + 1 < pointCoords.size()) + { float x = pointCoords[i]; float y = pointCoords[i + 1]; @@ -70,68 +76,59 @@ void Utils::ScaleBboxPoints(const cv::Mat& iImg, std::vector imgSize, std:: } } -std::vector Utils::PrepareInputTensor(Ort::Value& decoderInputTensor, std::vector& pointCoordsScaled, std::vector pointCoordsDims, std::vector& pointLabels, - std::vector pointLabelsDims, std::vector& maskInput, std::vector maskInputDims, std::vector& hasMaskInput, std::vector hasMaskInputDims){ - -Ort::Value pointCoordsTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - pointCoordsScaled.data(), - pointCoordsScaled.size(), - pointCoordsDims.data(), - pointCoordsDims.size() -); - - - -Ort::Value pointLabelsTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - pointLabels.data(), - pointLabels.size(), - pointLabelsDims.data(), - pointLabelsDims.size() -); - - - -Ort::Value maskInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - maskInput.data(), - maskInput.size(), - maskInputDims.data(), - maskInputDims.size() -); - - - -Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - hasMaskInput.data(), - hasMaskInput.size(), - hasMaskInputDims.data(), - hasMaskInputDims.size() -); - -// Pass all inputs to the decoder -std::vector inputTensors; -inputTensors.push_back(std::move(decoderInputTensor)); -inputTensors.push_back(std::move(pointCoordsTensor)); -inputTensors.push_back(std::move(pointLabelsTensor)); -inputTensors.push_back(std::move(maskInputTensor)); -inputTensors.push_back(std::move(hasMaskInputTensor)); +std::vector Utils::PrepareInputTensor(Ort::Value &decoderInputTensor, std::vector &pointCoordsScaled, std::vector pointCoordsDims, std::vector &pointLabels, + std::vector pointLabelsDims, std::vector &maskInput, std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims) +{ -return inputTensors; + Ort::Value pointCoordsTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + pointCoordsScaled.data(), + pointCoordsScaled.size(), + pointCoordsDims.data(), + pointCoordsDims.size()); + + Ort::Value pointLabelsTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + pointLabels.data(), + pointLabels.size(), + pointLabelsDims.data(), + pointLabelsDims.size()); + + Ort::Value maskInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + maskInput.data(), + maskInput.size(), + maskInputDims.data(), + maskInputDims.size()); + + Ort::Value hasMaskInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + hasMaskInput.data(), + hasMaskInput.size(), + hasMaskInputDims.data(), + hasMaskInputDims.size()); + + // Pass all inputs to the decoder + std::vector inputTensors; + inputTensors.push_back(std::move(decoderInputTensor)); + inputTensors.push_back(std::move(pointCoordsTensor)); + inputTensors.push_back(std::move(pointLabelsTensor)); + inputTensors.push_back(std::move(maskInputTensor)); + inputTensors.push_back(std::move(hasMaskInputTensor)); + + return inputTensors; } -void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg, std::vector imgSize, SEG::DL_RESULT& result){ +void Utils::overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) +{ // Process decoder output (masks) if (output_tensors.size() > 0) { // Get the masks from the output tensor - auto scoresTensor = std::move(output_tensors[0]); // IoU scores - auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG + auto scoresTensor = std::move(output_tensors[0]); // IoU scores + auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); auto masksShape = masksInfo.GetShape(); - if (masksShape.size() == 4) { auto masksData = masksTensor.GetTensorMutableData(); @@ -142,7 +139,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg size_t height = masksShape[2]; // Height of mask size_t width = masksShape[3]; // Width of mask - // Find the best mask (highest IoU score) float bestScore = -1; size_t bestMaskIndex = 0; @@ -152,34 +148,39 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg float score = scoresData[i]; - if (score > bestScore) { + if (score > bestScore) + { bestScore = score; bestMaskIndex = i; } } - std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; - // Create OpenCV Mat for the mask - cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); + // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; + + // Create OpenCV Mat for the mask + cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); - // Convert float mask to binary mask - for (size_t h = 0; h < height; ++h) + // Convert float mask to binary mask + for (size_t h = 0; h < height; ++h) + { + for (size_t w = 0; w < width; ++w) { - for (size_t w = 0; w < width; ++w) - { - size_t idx = (bestMaskIndex * height * width) + (h * width) + w; - float value = masksData[idx]; - mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 - } + size_t idx = (bestMaskIndex * height * width) + (h * width) + w; + float value = masksData[idx]; + mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 } + } // 1. Calculate the dimensions the image had during preprocessing float scale; int processedWidth, processedHeight; - if (iImg.cols >= iImg.rows) { + if (iImg.cols >= iImg.rows) + { scale = (float)imgSize[0] / iImg.cols; processedWidth = imgSize[0]; processedHeight = int(iImg.rows * scale); - } else { + } + else + { scale = (float)imgSize[1] / iImg.rows; processedWidth = int(iImg.cols * scale); processedHeight = imgSize[1]; @@ -238,7 +239,6 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg } }*/ - // Find contours of the mask std::vector> contours; cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); @@ -251,16 +251,17 @@ void Utils::overlay(std::vector& output_tensors, const cv::Mat& iImg // Draw contours with a thick, high-contrast outline cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline - // Save or display the result cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg); cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask); - }else - { - std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; - } - }else - { - std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; - } - } \ No newline at end of file + } + else + { + std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; + } + } + else + { + std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; + } +} \ No newline at end of file From a5a3c187a9efef09939a4f194b2f384d477a4623 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 29 Aug 2025 13:35:05 +0200 Subject: [PATCH 08/35] Small refactoring of the module --- CMakeLists.txt | 6 +- include/dl_types.h | 17 +- include/sam_inference.h | 14 +- include/segmentation.h | 13 +- include/utils.h | 6 +- src/main.cpp | 6 +- src/sam_inference.cpp | 760 +++++++++++++++++++--------------------- src/segmentation.cpp | 83 ++--- 8 files changed, 434 insertions(+), 471 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1270d93..8cb430a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.5) -set(PROJECT_NAME SAMOnnxRuntimeCPPInference) -project(sam_onnx_ros) +set(PROJECT_NAME sam_onnx_ros) + project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) # -------------- CMake Policies ------------------# @@ -13,7 +13,7 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS ON) #set(CMAKE_INCLUDE_CURRENT_DIR ON) - +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) include_directories(${OpenCV_INCLUDE_DIRS}) diff --git a/include/dl_types.h b/include/dl_types.h index 632c7c6..5141284 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -1,4 +1,11 @@ -#pragma once +#ifndef DL_TYPES_H +#define DL_TYPES_H + +#include +#include +#include +#include + namespace SEG { enum MODEL_TYPE @@ -29,7 +36,7 @@ namespace SEG // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes // Overloaded output operator for _DL_INIT_PARAM to print its contents - friend std::ostream &operator<<(std::ostream &os, _DL_INIT_PARAM ¶m) + friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM ¶m) { os << "modelPath: " << param.modelPath << "\n"; os << "modelType: " << param.modelType << "\n"; @@ -51,9 +58,6 @@ namespace SEG typedef struct _DL_RESULT { - // Yolo Part - int classId; - float confidence; std::vector boxes; // For SAM encoder model, this will be filled with detected boxes std::vector keyPoints; @@ -63,4 +67,5 @@ namespace SEG std::vector masks; // Each cv::Mat represents a mask } DL_RESULT; -} // namespace SEG \ No newline at end of file +} // namespace SEG +#endif // DL_TYPES_H \ No newline at end of file diff --git a/include/sam_inference.h b/include/sam_inference.h index 8910bda..d63701c 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -1,12 +1,12 @@ -#pragma once +#ifndef SAMINFERENCE_H +#define SAMINFERENCE_H -#define RET_OK nullptr +#define RET_OK nullptr +#include #include #include #include -#include -#include "onnxruntime_cxx_api.h" #include "utils.h" #ifdef USE_CUDA #include @@ -27,7 +27,7 @@ class SAM char *WarmUpSession(SEG::MODEL_TYPE modelType); template - char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); std::vector classes{}; @@ -44,4 +44,6 @@ class SAM std::vector imgSize; float rectConfidenceThreshold; float iouThreshold; -}; \ No newline at end of file +}; + +#endif // SAMINFERENCE_H \ No newline at end of file diff --git a/include/segmentation.h b/include/segmentation.h index 46e954e..b341f8d 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -1,11 +1,10 @@ -#include -#include -#include -#include -#include +#ifndef SEGMENTATION_H +#define SEGMENTATION_H + #include #include "sam_inference.h" - std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -std::vector SegmentAnything(std::vector>& samSegmentors, SEG::_DL_INIT_PARAM& params_encoder, SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); \ No newline at end of file +std::vector SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); + +#endif // SEGMENTATION_H \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index 7ff7f9c..333c9e3 100644 --- a/include/utils.h +++ b/include/utils.h @@ -1,11 +1,11 @@ -#pragma once +#ifndef UTILS_H +#define UTILS_H #define RET_OK nullptr #include #include #include -#include #include "onnxruntime_cxx_api.h" #include "dl_types.h" #ifdef USE_CUDA @@ -53,3 +53,5 @@ class Utils float resizeScales; float resizeScalesBbox; // letterbox scale }; + +#endif // UTILS_H \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index 3c8091d..2b2d602 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,5 +1,8 @@ #include "segmentation.h" - +#include +#include +#include +#include int main() { // Running inference @@ -24,6 +27,7 @@ int main() cv::waitKey(0); cv::destroyAllWindows(); } + std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl; } } return 0; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 8a07b6b..9c0463b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,468 +1,426 @@ #include "sam_inference.h" #include "utils.h" #include -#include #define benchmark #define ROI -SAM::SAM() -{ -} +SAM::SAM() {} -SAM::~SAM() -{ - // Clean up input/output node names - for (auto &name : inputNodeNames) - { - delete[] name; - } - for (auto &name : outputNodeNames) - { - delete[] name; - } +SAM::~SAM() { + // Clean up input/output node names + for (auto &name : inputNodeNames) { + delete[] name; + } + for (auto &name : outputNodeNames) { + delete[] name; + } } #ifdef USE_CUDA -namespace Ort -{ - template <> - struct TypeToTensorType - { - static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; - }; -} +namespace Ort { +template <> struct TypeToTensorType { + static constexpr ONNXTensorElementDataType type = + ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16; +}; +} // namespace Ort #endif -const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) -{ - const char *Ret = RET_OK; - if (session) - { - session.reset(); // Release previous session - - // Clear node names - for (auto &name : inputNodeNames) - { - delete[] name; - } - inputNodeNames.clear(); - - for (auto &name : outputNodeNames) - { - delete[] name; - } - outputNodeNames.clear(); +const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { + const char *Ret = RET_OK; + if (session) { + session.reset(); // Release previous session + + // Clear node names + for (auto &name : inputNodeNames) { + delete[] name; } - std::regex pattern("[\u4e00-\u9fa5]"); - bool result = std::regex_search(iParams.modelPath, pattern); - if (result) - { - Ret = "[SAM]:Your model path is error.Change your model path without chinese characters."; - std::cout << Ret << std::endl; - return Ret; + inputNodeNames.clear(); + + for (auto &name : outputNodeNames) { + delete[] name; } - try - { - rectConfidenceThreshold = iParams.rectConfidenceThreshold; - iouThreshold = iParams.iouThreshold; - imgSize = iParams.imgSize; - modelType = iParams.modelType; - cudaEnable = iParams.cudaEnable; - env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); - Ort::SessionOptions sessionOption; - if (iParams.cudaEnable) - { - OrtCUDAProviderOptions cudaOption; - cudaOption.device_id = 0; - sessionOption.AppendExecutionProvider_CUDA(cudaOption); - } - - sessionOption.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL); - sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); - sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); - - const char *modelPath = iParams.modelPath.c_str(); - - session = std::make_unique(env, modelPath, sessionOption); - Ort::AllocatorWithDefaultOptions allocator; - size_t inputNodesNum = session->GetInputCount(); - for (size_t i = 0; i < inputNodesNum; i++) - { - Ort::AllocatedStringPtr input_node_name = session->GetInputNameAllocated(i, allocator); - char *temp_buf = new char[50]; - strcpy(temp_buf, input_node_name.get()); - inputNodeNames.push_back(temp_buf); - } - size_t OutputNodesNum = session->GetOutputCount(); - for (size_t i = 0; i < OutputNodesNum; i++) - { - Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char *temp_buf = new char[10]; - strcpy(temp_buf, output_node_name.get()); - outputNodeNames.push_back(temp_buf); - } - options = Ort::RunOptions{nullptr}; - - auto input_shape = session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_shape = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetElementType(); - - WarmUpSession(modelType); - return RET_OK; + outputNodeNames.clear(); + } + std::regex pattern("[\u4e00-\u9fa5]"); + bool result = std::regex_search(iParams.modelPath, pattern); + if (result) { + Ret = "[SAM]:Your model path is error.Change your model path without " + "chinese characters."; + std::cout << Ret << std::endl; + return Ret; + } + try { + rectConfidenceThreshold = iParams.rectConfidenceThreshold; + iouThreshold = iParams.iouThreshold; + imgSize = iParams.imgSize; + modelType = iParams.modelType; + cudaEnable = iParams.cudaEnable; + env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); + Ort::SessionOptions sessionOption; + if (iParams.cudaEnable) { + OrtCUDAProviderOptions cudaOption; + cudaOption.device_id = 0; + sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - catch (const std::exception &e) - { - const char *str1 = "[SAM]:"; - const char *str2 = e.what(); - std::string result = std::string(str1) + std::string(str2); - char *merged = new char[result.length() + 1]; - std::strcpy(merged, result.c_str()); - std::cout << merged << std::endl; - delete[] merged; - return "[SAM]:Create session failed."; + + sessionOption.SetGraphOptimizationLevel( + GraphOptimizationLevel::ORT_ENABLE_ALL); + sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); + sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); + + const char *modelPath = iParams.modelPath.c_str(); + + session = std::make_unique(env, modelPath, sessionOption); + Ort::AllocatorWithDefaultOptions allocator; + size_t inputNodesNum = session->GetInputCount(); + for (size_t i = 0; i < inputNodesNum; i++) { + Ort::AllocatedStringPtr input_node_name = + session->GetInputNameAllocated(i, allocator); + char *temp_buf = new char[50]; + strcpy(temp_buf, input_node_name.get()); + inputNodeNames.push_back(temp_buf); + } + size_t OutputNodesNum = session->GetOutputCount(); + for (size_t i = 0; i < OutputNodesNum; i++) { + Ort::AllocatedStringPtr output_node_name = + session->GetOutputNameAllocated(i, allocator); + char *temp_buf = new char[10]; + strcpy(temp_buf, output_node_name.get()); + outputNodeNames.push_back(temp_buf); } + options = Ort::RunOptions{nullptr}; + + auto input_shape = + session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_shape = + session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_type = session->GetOutputTypeInfo(0) + .GetTensorTypeAndShapeInfo() + .GetElementType(); + + WarmUpSession(modelType); + return RET_OK; + } catch (const std::exception &e) { + const char *str1 = "[SAM]:"; + const char *str2 = e.what(); + std::string str_result = std::string(str1) + std::string(str2); + char *merged = new char[str_result.length() + 1]; + std::strcpy(merged, str_result.c_str()); + std::cout << merged << std::endl; + delete[] merged; + return "[SAM]:Create session failed."; + } } -const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) -{ +const char *SAM::RunSession(const cv::Mat &iImg, + std::vector &oResult, + SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) { #ifdef benchmark - clock_t starttime_1 = clock(); + clock_t starttime_1 = clock(); #endif // benchmark - Utils utilities; - const char *Ret = RET_OK; - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) - { - float *blob = new float[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) - { - inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - } - else if (modelType == SEG::SAM_SEGMENT_DECODER) - { - // Input size or SAM decoder model is 256x64x64 for the decoder - inputNodeDims = {1, 256, 64, 64}; - } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); - } - else - { -#ifdef USE_CUDA - half *blob = new half[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); -#endif - } - - return Ret; + Utils utilities; + const char *Ret = RET_OK; + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + float *blob = new float[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims; + if (modelType == SEG::SAM_SEGMENT_ENCODER) { + inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; + } else if (modelType == SEG::SAM_SEGMENT_DECODER) { + // Input size or SAM decoder model is 256x64x64 for the decoder + inputNodeDims = {1, 256, 64, 64}; + } + TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, + utilities, result); + + return Ret; } template -char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) -{ - - switch (modelType) +const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, + N &blob, std::vector &inputNodeDims, + SEG::MODEL_TYPE modelType, + std::vector &oResult, + Utils &utilities, SEG::DL_RESULT &result) { + + switch (modelType) { + case SEG::SAM_SEGMENT_ENCODER: + // case OTHER_SAM_MODEL: { - case SEG::SAM_SEGMENT_ENCODER: - // case OTHER_SAM_MODEL: - { - Ort::Value inputTensor = Ort::Value::CreateTensor::type>( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - inputNodeDims.data(), inputNodeDims.size()); + Ort::Value inputTensor = + Ort::Value::CreateTensor::type>( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), + inputNodeDims.size()); #ifdef benchmark - clock_t starttime_2 = clock(); + clock_t starttime_2 = clock(); #endif // benchmark - auto outputTensor = session->Run(options, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), - outputNodeNames.size()); + auto outputTensor = + session->Run(options, inputNodeNames.data(), &inputTensor, 1, + outputNodeNames.data(), outputNodeNames.size()); #ifdef benchmark - clock_t starttime_3 = clock(); + clock_t starttime_3 = clock(); #endif // benchmark - Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); - auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); - std::vector outputNodeDims = tensor_info.GetShape(); - auto output = outputTensor.front().GetTensorMutableData::type>(); - delete[] blob; + Ort::TypeInfo typeInfo = outputTensor.front().GetTypeInfo(); + auto tensor_info = typeInfo.GetTensorTypeAndShapeInfo(); + std::vector outputNodeDims = tensor_info.GetShape(); + auto output = + outputTensor.front() + .GetTensorMutableData::type>(); + delete[] blob; - int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * outputNodeDims[3]; // Flattened size - result.embeddings.assign(output, output + embeddingSize); // Save embeddings + int embeddingSize = outputNodeDims[1] * outputNodeDims[2] * + outputNodeDims[3]; // Flattened size + result.embeddings.assign(output, + output + embeddingSize); // Save embeddings #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } + clock_t starttime_4 = clock(); + double pre_process_time = + (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = + (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = + (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } else { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } #endif // benchmark - break; - } - case SEG::SAM_SEGMENT_DECODER: - { - // Use embeddings from the last result - std::vector embeddings = result.embeddings; - // Create tensor for decoder - std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements - - // Create point coordinates and labels + break; + } + case SEG::SAM_SEGMENT_DECODER: { + // Use embeddings from the last result + std::vector embeddings = result.embeddings; + // Create tensor for decoder + std::vector decoderInputDims = { + 1, 256, 64, 64}; // Adjust based on your decoder's requirements + + // Create point coordinates and labels #ifdef ROI - // Create a window for user interaction - namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); + // Create a window for user interaction + namedWindow("Select and View Result", cv::WINDOW_AUTOSIZE); - // Let the user select the bounding box - cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); + // Let the user select the bounding box + cv::Rect bbox = selectROI("Select and View Result", iImg, false, false); - // Check if a valid bounding box was selected - if (bbox.width == 0 || bbox.height == 0) - { - std::cerr << "No valid bounding box selected." << std::endl; - return "[SAM]: NO valid Box."; - } + // Check if a valid bounding box was selected + if (bbox.width == 0 || bbox.height == 0) { + std::cerr << "No valid bounding box selected." << std::endl; + return "[SAM]: NO valid Box."; + } - std::vector boundingBoxes; - boundingBoxes.push_back(bbox); + std::vector boundingBoxes; + boundingBoxes.push_back(bbox); #endif // ROI // boundingBoxes.push_back(bbox1); // Declare timing variables BEFORE the loop #ifdef benchmark - clock_t starttime_2 = 0; - clock_t starttime_3 = 0; + clock_t starttime_2 = 0; + clock_t starttime_3 = 0; #endif // benchmark #ifdef ROI - for (const auto &bbox : boundingBoxes) + for (const auto &box : boundingBoxes) #else - for (const auto &bbox : result.boxes) + for (const auto &box : result.boxes) #endif // ROI - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - embeddings.data(), // Use the embeddings from the encoder - embeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size()); - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width / 2; - float centerY = bbox.y + bbox.height / 2; - - // Convert bounding box to points - std::vector pointCoords = { - (float)bbox.x, (float)bbox.y, // Top-left - (float)(bbox.x + bbox.width), (float)(bbox.y + bbox.height) // Bottom-right - }; - - std::vector pointCoordsScaled; - - std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) - - // Labels for the points - std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels - std::vector pointLabelsDims = {1, 2}; - - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); - - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims); + { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + embeddings.data(), // Use the embeddings from the encoder + embeddings.size(), // Total number of elements + decoderInputDims.data(), decoderInputDims.size()); + // Use center of bounding box as foreground point + float centerX = box.x + box.width / 2.0; + float centerY = box.y + box.height / 2.0; + + // Convert bounding box to points + std::vector pointCoords = { + (float)box.x, (float)box.y, // Top-left + (float)(box.x + box.width), + (float)(box.y + box.height) // Bottom-right + }; + + std::vector pointCoordsScaled; + + std::vector pointCoordsDims = {1, 2, + 2}; // 2 points, each with (x, y) + + // Labels for the points + std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels + std::vector pointLabelsDims = {1, 2}; + + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, + pointLabelsDims, maskInput, maskInputDims, hasMaskInput, + hasMaskInputDims); #ifdef benchmark - starttime_2 = clock(); + starttime_2 = clock(); #endif // benchmark - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); + auto output_tensors = session->Run( + options, inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); #ifdef benchmark - starttime_3 = clock(); + starttime_3 = clock(); #endif // benchmark - utilities.overlay(output_tensors, iImg, imgSize, result); - } - // Add the result to oResult - oResult.push_back(result); + utilities.overlay(output_tensors, iImg, imgSize, result); + } + // Add the result to oResult + oResult.push_back(result); - delete[] blob; + delete[] blob; #ifdef benchmark - clock_t starttime_4 = clock(); - double pre_process_time = (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; - double process_time = (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; - double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } - else - { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; - } -#endif // benchmark - break; + clock_t starttime_4 = clock(); + double pre_process_time = + (double)(starttime_2 - starttime_1) / CLOCKS_PER_SEC * 1000; + double process_time = + (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; + double post_process_time = + (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; + } else { + std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + << process_time << "ms inference, " << post_process_time + << "ms post-process." << std::endl; } +#endif // benchmark + break; + } - default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; - } - return RET_OK; + default: + std::cout << "[SAM]: " << "Not support model type." << std::endl; + } + return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) -{ - clock_t starttime_1 = clock(); - Utils utilities; - cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); - cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); - if (modelType < 4) - { - float *blob = new float[iImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; - switch (modelType) - { - case SEG::SAM_SEGMENT_ENCODER: - { - Ort::Value input_tensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), - SAM_input_node_dims.data(), SAM_input_node_dims.size()); - auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), - outputNodeNames.size()); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } - break; - } - - case SEG::SAM_SEGMENT_DECODER: - { - std::vector inputNodeDims = {1, 256, 64, 64}; // BUG: That was 236 instead of 256 - // Use embeddings from the last result - std::vector dummyEmbeddings(256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values - std::vector decoderInputDims = {1, 256, 64, 64}; // Adjust based on your decoder's requirements - - // Create dummy point coordinates and labels - std::vector boundingBoxes = { - cv::Rect(0, 0, 100, 100), // Example bounding box with (x, y, width, height) - // cv::Rect(0, 0, 473, 359) // Another example bounding box - }; - for (const auto &bbox : boundingBoxes) - { - Ort::Value decoderInputTensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - dummyEmbeddings.data(), // Use the embeddings from the encoder - dummyEmbeddings.size(), // Total number of elements - decoderInputDims.data(), - decoderInputDims.size()); - // Convert bounding box to points - // Use center of bounding box as foreground point - float centerX = bbox.x + bbox.width / 2; - float centerY = bbox.y + bbox.height / 2; - - std::vector pointCoords = { - centerX, centerY // Center point (foreground) - }; - - std::vector pointCoordsDims = {1, 1, 2}; // 2 points, each with (x, y) - - std::vector pointCoordsScaled; - - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); - - // Labels for the points - std::vector pointLabels = {1.0f}; // All points are foreground - std::vector pointLabelsDims = {1, 1}; - // Create dummy mask_input and has_mask_input - std::vector maskInput(256 * 256, 0.0f); // Fill with zeros - std::vector maskInputDims = {1, 1, 256, 256}; - std::vector hasMaskInput = {0.0f}; // No mask provided - std::vector hasMaskInputDims = {1}; - - std::vector inputTensors = utilities.PrepareInputTensor( - decoderInputTensor, - pointCoordsScaled, - pointCoordsDims, - pointLabels, - pointLabelsDims, - maskInput, - maskInputDims, - hasMaskInput, - hasMaskInputDims); - - auto output_tensors = session->Run( - options, - inputNodeNames.data(), - inputTensors.data(), - inputTensors.size(), - outputNodeNames.data(), - outputNodeNames.size()); - } - - outputNodeNames.size(); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } - - break; - } - } +char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { + clock_t starttime_1 = clock(); + Utils utilities; + cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); + cv::Mat processedImg; + utilities.PreProcess(iImg, imgSize, processedImg); + + float *blob = new float[iImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), + imgSize.at(1)}; + switch (modelType) { + case SEG::SAM_SEGMENT_ENCODER: { + Ort::Value input_tensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, + 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), + SAM_input_node_dims.size()); + auto output_tensors = + session->Run(options, inputNodeNames.data(), &input_tensor, 1, + outputNodeNames.data(), outputNodeNames.size()); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = + (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time + << " ms. " << std::endl; } - else - { -#ifdef USE_CUDA - half *blob = new half[iImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), imgSize.at(1)}; - Ort::Value input_tensor = Ort::Value::CreateTensor(Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); - auto output_tensors = session->Run(options, inputNodeNames.data(), &input_tensor, 1, outputNodeNames.data(), outputNodeNames.size()); - delete[] blob; - clock_t starttime_4 = clock(); - double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) - { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; - } -#endif + break; + } + + case SEG::SAM_SEGMENT_DECODER: { + std::vector inputNodeDims = { + 1, 256, 64, 64}; // BUG: That was 236 instead of 256 + // Use embeddings from the last result + std::vector dummyEmbeddings( + 256 * 64 * 64, 1.0f); // Fill with zeros or any dummy values + std::vector decoderInputDims = { + 1, 256, 64, 64}; // Adjust based on your decoder's requirements + + // Create dummy point coordinates and labels + std::vector boundingBoxes = { + cv::Rect(0, 0, 100, + 100), // Example bounding box with (x, y, width, height) + // cv::Rect(0, 0, 473, 359) // Another example bounding box + }; + for (const auto &bbox : boundingBoxes) { + Ort::Value decoderInputTensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + dummyEmbeddings.data(), // Use the embeddings from the encoder + dummyEmbeddings.size(), // Total number of elements + decoderInputDims.data(), decoderInputDims.size()); + // Convert bounding box to points + // Use center of bounding box as foreground point + float centerX = bbox.x + bbox.width / 2.0; + float centerY = bbox.y + bbox.height / 2.0; + + std::vector pointCoords = { + centerX, centerY // Center point (foreground) + }; + + std::vector pointCoordsDims = {1, 1, + 2}; // 2 points, each with (x, y) + + std::vector pointCoordsScaled; + + utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + + // Labels for the points + std::vector pointLabels = {1.0f}; // All points are foreground + std::vector pointLabelsDims = {1, 1}; + // Create dummy mask_input and has_mask_input + std::vector maskInput(256 * 256, 0.0f); // Fill with zeros + std::vector maskInputDims = {1, 1, 256, 256}; + std::vector hasMaskInput = {0.0f}; // No mask provided + std::vector hasMaskInputDims = {1}; + + std::vector inputTensors = utilities.PrepareInputTensor( + decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, + pointLabelsDims, maskInput, maskInputDims, hasMaskInput, + hasMaskInputDims); + + auto output_tensors = session->Run( + options, inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); } - return RET_OK; + + outputNodeNames.size(); + delete[] blob; + clock_t starttime_4 = clock(); + double post_process_time = + (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; + if (cudaEnable) { + std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time + << " ms. " << std::endl; + } + + break; + } + } + + return RET_OK; } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 8b5338c..585dd13 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,59 +1,52 @@ #include "segmentation.h" -std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM> Initializer() -{ - std::vector> samSegmentors; - samSegmentors.push_back(std::make_unique()); - samSegmentors.push_back(std::make_unique()); - - std::unique_ptr samSegmentorEncoder = std::make_unique(); - std::unique_ptr samSegmentorDecoder = std::make_unique(); - SEG::DL_INIT_PARAM params_encoder; - SEG::DL_INIT_PARAM params_decoder; - - params_encoder.rectConfidenceThreshold = 0.1; - params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "SAM_encoder.onnx"; - params_encoder.imgSize = {1024, 1024}; - - params_decoder = params_encoder; - params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; - params_decoder.modelPath = "SAM_mask_decoder.onnx"; +std::tuple>, SEG::DL_INIT_PARAM, + SEG::DL_INIT_PARAM> +Initializer() { + std::vector> samSegmentors; + samSegmentors.push_back(std::make_unique()); + samSegmentors.push_back(std::make_unique()); + + std::unique_ptr samSegmentorEncoder = std::make_unique(); + std::unique_ptr samSegmentorDecoder = std::make_unique(); + SEG::DL_INIT_PARAM params_encoder; + SEG::DL_INIT_PARAM params_decoder; + + params_encoder.rectConfidenceThreshold = 0.1; + params_encoder.iouThreshold = 0.5; + params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.imgSize = {1024, 1024}; + + params_decoder = params_encoder; + params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; + params_decoder.modelPath = "SAM_mask_decoder.onnx"; #ifdef USE_CUDA - params_encoder.cudaEnable = true; + params_encoder.cudaEnable = true; #else - params_encoder.cudaEnable = false; + params_encoder.cudaEnable = false; #endif - samSegmentorEncoder->CreateSession(params_encoder); - samSegmentorDecoder->CreateSession(params_decoder); - samSegmentors[0] = std::move(samSegmentorEncoder); - samSegmentors[1] = std::move(samSegmentorDecoder); - return {std::move(samSegmentors), params_encoder, params_decoder}; + samSegmentorEncoder->CreateSession(params_encoder); + samSegmentorDecoder->CreateSession(params_decoder); + samSegmentors[0] = std::move(samSegmentorEncoder); + samSegmentors[1] = std::move(samSegmentorDecoder); + return {std::move(samSegmentors), params_encoder, params_decoder}; } -std::vector SegmentAnything(std::vector> &samSegmentors, SEG::DL_INIT_PARAM ¶ms_encoder, SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) -{ +std::vector +SegmentAnything(std::vector> &samSegmentors, + const SEG::DL_INIT_PARAM ¶ms_encoder, + const SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) { - std::vector resSam; - SEG::DL_RESULT res; + std::vector resSam; + SEG::DL_RESULT res; - SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; - samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); + SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; + samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); - modelTypeRef = params_decoder.modelType; - samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); + modelTypeRef = params_decoder.modelType; + samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - // cv::destroyAllWindows(); - cv::Mat finalMask = res.masks[0]; - std::cout << "Final mask size: " << finalMask.size() << std::endl; - - for (const auto &mask : res.masks) - { - cv::imshow("Mask", mask); - cv::waitKey(0); - } - cv::destroyAllWindows(); - return std::move(res.masks); + return std::move(res.masks); } From 19571967dba60eced049acca0e302a6db9711829 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 2 Sep 2025 18:10:33 +0200 Subject: [PATCH 09/35] Refactor post processing for better accuracy and performance. Also correcting preprocessing scalling issue on long images --- include/utils.h | 3 +- src/main.cpp | 9 +- src/sam_inference.cpp | 10 +- src/utils.cpp | 221 ++++++++++++++++-------------------------- 4 files changed, 93 insertions(+), 150 deletions(-) diff --git a/include/utils.h b/include/utils.h index 333c9e3..e81b236 100644 --- a/include/utils.h +++ b/include/utils.h @@ -18,7 +18,6 @@ class Utils Utils(); ~Utils(); - void overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); char *PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg); void ScaleBboxPoints(const cv::Mat &iImg, std::vector iImgSize, std::vector &pointCoords, std::vector &PointsCoordsScaled); @@ -26,6 +25,8 @@ class Utils std::vector &pointLabels, std::vector pointLabelsDims, std::vector &maskInput, std::vector maskInputDims, std::vector &hasMaskInput, std::vector hasMaskInputDims); + void PostProcess(std::vector &output_tensors, const cv::Mat &iImg, std::vector iImgSize, SEG::DL_RESULT &result); + // Definition: Flattened image to blob (and normalizaed) for deep learning inference. Also reorganize from HWC to CHW. // Note: Code in header file since it is used outside of this utils src code. template diff --git a/src/main.cpp b/src/main.cpp index 2b2d602..9399779 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -20,14 +20,7 @@ int main() cv::Mat img = cv::imread(img_path); std::vector masks; masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); - for (int j = 0; j < masks.size(); j++) - { - std::cout << "Press any key to exit" << std::endl; - cv::imshow("Result of MASKS", masks[j]); - cv::waitKey(0); - cv::destroyAllWindows(); - } - std::cout << "OOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOo" << std::endl; + } } return 0; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 9c0463b..3f6a09e 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -206,7 +206,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector decoderInputDims = { 1, 256, 64, 64}; // Adjust based on your decoder's requirements - // Create point coordinates and labels + // Create point coordinates for testing purposes #ifdef ROI // Create a window for user interaction @@ -224,8 +224,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); #endif // ROI - // boundingBoxes.push_back(bbox1); - // Declare timing variables BEFORE the loop + #ifdef benchmark clock_t starttime_2 = 0; clock_t starttime_3 = 0; @@ -255,8 +254,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector pointCoordsScaled; - std::vector pointCoordsDims = {1, 2, - 2}; // 2 points, each with (x, y) + std::vector pointCoordsDims = {1, 2, 2}; // 2 points, each with (x, y) // Labels for the points std::vector pointLabels = {2.0f, 3.0f}; // Box prompt labels @@ -287,7 +285,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, starttime_3 = clock(); #endif // benchmark - utilities.overlay(output_tensors, iImg, imgSize, result); + utilities.PostProcess(output_tensors, iImg, imgSize, result); } // Add the result to oResult oResult.push_back(result); diff --git a/src/utils.cpp b/src/utils.cpp index 8d76ac1..f7721db 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -30,7 +30,7 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat } else { - resizeScales = iImg.rows / (float)iImgSize.at(0); + resizeScales = iImg.rows / (float)iImgSize.at(1); cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); } cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); @@ -118,150 +118,101 @@ std::vector Utils::PrepareInputTensor(Ort::Value &decoderInputTensor return inputTensors; } -void Utils::overlay(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) +void Utils::PostProcess(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) { - // Process decoder output (masks) - if (output_tensors.size() > 0) + if (output_tensors.size() < 2) { - // Get the masks from the output tensor - auto scoresTensor = std::move(output_tensors[0]); // IoU scores - auto masksTensor = std::move(output_tensors[1]); // First output should be the masks PROBABLY WRONG - auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); - auto masksShape = masksInfo.GetShape(); + std::cerr << "[SAM]: Decoder returned insufficient outputs." << std::endl; + return; + } + + // Assume [scores, masks]; consider shape-based detection later + auto scoresTensor = std::move(output_tensors[0]); + auto masksTensor = std::move(output_tensors[1]); + + auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); + auto masksShape = masksInfo.GetShape(); + + if (masksShape.size() == 4) + { + auto masksData = masksTensor.GetTensorMutableData(); + auto scoresData = scoresTensor.GetTensorMutableData(); - if (masksShape.size() == 4) + const size_t numMasks = static_cast(masksShape[1]); + const size_t height = static_cast(masksShape[2]); + const size_t width = static_cast(masksShape[3]); + + // Pick best mask by score + float bestScore = -1.0f; + size_t bestMaskIndex = 0; + for (size_t i = 0; i < numMasks; ++i) { - auto masksData = masksTensor.GetTensorMutableData(); - auto scoresData = scoresTensor.GetTensorMutableData(); - - size_t batchSize = masksShape[0]; // Usually 1 - size_t numMasks = masksShape[1]; // Number of masks (typically 1) - size_t height = masksShape[2]; // Height of mask - size_t width = masksShape[3]; // Width of mask - - // Find the best mask (highest IoU score) - float bestScore = -1; - size_t bestMaskIndex = 0; - - for (size_t i = 0; i < numMasks; ++i) - { - - float score = scoresData[i]; - - if (score > bestScore) - { - bestScore = score; - bestMaskIndex = i; - } - } - // std::cout << "Best mask index: " << bestMaskIndex << ", Score: " << bestScore << std::endl; - - // Create OpenCV Mat for the mask - cv::Mat mask = cv::Mat::zeros(height, width, CV_8UC1); - - // Convert float mask to binary mask - for (size_t h = 0; h < height; ++h) - { - for (size_t w = 0; w < width; ++w) - { - size_t idx = (bestMaskIndex * height * width) + (h * width) + w; - float value = masksData[idx]; - mask.at(h, w) = (value > 0.5f) ? 255 : 0; // Threshold at 0.5 - } - } - - // 1. Calculate the dimensions the image had during preprocessing - float scale; - int processedWidth, processedHeight; - if (iImg.cols >= iImg.rows) - { - scale = (float)imgSize[0] / iImg.cols; - processedWidth = imgSize[0]; - processedHeight = int(iImg.rows * scale); - } - else - { - scale = (float)imgSize[1] / iImg.rows; - processedWidth = int(iImg.cols * scale); - processedHeight = imgSize[1]; - } - - // 3. Extract the portion that corresponds to the actual image (no padding) - int cropWidth = std::min(256, int(256 * processedWidth / (float)imgSize[0])); - int cropHeight = std::min(256, int(256 * processedHeight / (float)imgSize[1])); - cv::Mat croppedMask = mask(cv::Rect(0, 0, cropWidth, cropHeight)); - - // 4. Resize directly to original image dimensions in one step - cv::Mat finalMask; - - // Use INTER_NEAREST for binary masks - preserves hard edges - cv::resize(croppedMask, finalMask, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_NEAREST); - - ////////////////////// GUIDED BILATERAL FILTER ///////////////////////// - // Convert the upscaled mask to CV_8UC1 if necessary - if (finalMask.type() != CV_8UC1) - { - finalMask.convertTo(finalMask, CV_8UC1); - } - - // Apply the Guided Filter - cv::Mat filteredMask; - int radius = 2; - double eps = 0.01; - cv::ximgproc::guidedFilter(iImg, finalMask, finalMask, radius, eps); - ////////////////////// END: GUIDED BILATERAL FILTER ///////////////////////// - - ////////////////////// MORPHOLOGICAN OPERATIONS ///////////////////////// - // Morphological operations to clean up the mask - int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); // Adaptive size - cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize)); - - // CLOSE operation: fills small holes in the mask - cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel); - - // OPEN operation: removes small noise - cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel); - - ////////////////////// END: MORPHOLOGICAN OPERATIONS ///////////////////////// - - // Re-threshold after resizing to ensure binary mask (critical step) - - cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY); - result.masks.push_back(finalMask); - - /*// Add IoU scores if available (typically second tensor) - if (output_tensors.size() > 1) { - auto scoresTensor = std::move(output_tensors[1]); - auto scoresData = scoresTensor.GetTensorMutableData(); - if (i < scoresTensor.GetTensorTypeAndShapeInfo().GetShape()[1]) { - result.confidence = scoresData[i]; - std::cout << "Mask confidence: " << result.confidence << std::endl; - } - }*/ - - // Find contours of the mask - std::vector> contours; - cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); - - // Create a semi-transparent overlay - cv::Mat colorMask = cv::Mat::zeros(iImg.size(), CV_8UC3); - colorMask.setTo(cv::Scalar(0, 200, 0), finalMask); // Green fill - cv::addWeighted(iImg, 0.7, colorMask, 0.3, 0, iImg); - - // Draw contours with a thick, high-contrast outline - cv::drawContours(iImg, contours, -1, cv::Scalar(0, 255, 255), 2); // Yellow outline - - // Save or display the result - cv::imwrite("segmentation_result_" + std::to_string(bestMaskIndex) + ".jpg", iImg); - cv::imwrite("mask_" + std::to_string(bestMaskIndex) + ".jpg", finalMask); + const float s = scoresData ? scoresData[i] : 0.0f; + if (s > bestScore) { bestScore = s; bestMaskIndex = i; } + } + + // Compute preprocessed region (top-left anchored) + float scale; + int processedWidth, processedHeight; + if (iImg.cols >= iImg.rows) + { + scale = static_cast(imgSize[0]) / static_cast(iImg.cols); + processedWidth = imgSize[0]; + processedHeight = static_cast(iImg.rows * scale); } else { - std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; + scale = static_cast(imgSize[1]) / static_cast(iImg.rows); + processedWidth = static_cast(iImg.cols * scale); + processedHeight = imgSize[1]; } + + auto clampDim = [](int v, int lo, int hi) { return std::max(lo, std::min(v, hi)); }; + + // Wrap selected mask plane as float prob map + const size_t planeOffset = bestMaskIndex * height * width; + cv::Mat prob32f(static_cast(height), static_cast(width), CV_32F, + const_cast(masksData + planeOffset)); + + // Crop in mask space using proportional dimensions (no hardcoded 256) + const int cropW = clampDim(static_cast(std::round(static_cast(width) * processedWidth / static_cast(imgSize[0]))), 1, static_cast(width)); + const int cropH = clampDim(static_cast(std::round(static_cast(height) * processedHeight / static_cast(imgSize[1]))), 1, static_cast(height)); + cv::Mat probCropped = prob32f(cv::Rect(0, 0, cropW, cropH)); + + // Resize probabilities to original image (linear) + cv::Mat probResized; + cv::resize(probCropped, probResized, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_LINEAR); + + // Threshold once to binary mask + cv::Mat finalMask; + cv::compare(probResized, 0.5f, finalMask, cv::CMP_GT); // CV_8U 0/255 + + // Morphological cleanup (light, then ensure binary) + int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); + cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize)); + cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel); + cv::morphologyEx(finalMask, finalMask, cv::MORPH_OPEN, kernel); + cv::threshold(finalMask, finalMask, 127, 255, cv::THRESH_BINARY); + + // Save mask + result.masks.push_back(finalMask); + + // Overlay for display on a copy (iImg is const) + cv::Mat overlay = iImg.clone(); + std::vector> contours; + cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); + + cv::Mat colorMask = cv::Mat::zeros(overlay.size(), CV_8UC3); + colorMask.setTo(cv::Scalar(0, 200, 0), finalMask); + cv::addWeighted(overlay, 0.7, colorMask, 0.3, 0, overlay); + cv::drawContours(overlay, contours, -1, cv::Scalar(0, 255, 255), 2); + + cv::imshow("SAM Segmentation", overlay); + cv::waitKey(0); + cv::destroyAllWindows(); } else { - std::cerr << "[SAM]: No masks found in the output tensor." << std::endl; + std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; } } \ No newline at end of file From e6623fc08cc32564f2752036bcd17c11d70a3550 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 2 Sep 2025 21:06:50 +0200 Subject: [PATCH 10/35] Added tests (still not working with catkin) --- CMakeLists.txt | 44 ++++++++++++++---- package.xml | 7 ++- test/sam_test.cpp | 114 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 11 deletions(-) create mode 100644 test/sam_test.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8cb430a..f4087dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,8 +1,6 @@ -cmake_minimum_required(VERSION 3.5) +cmake_minimum_required(VERSION 3.0.2) -set(PROJECT_NAME sam_onnx_ros) - -project(${PROJECT_NAME} VERSION 0.0.1 LANGUAGES CXX) +project(sam_onnx_ros) # -------------- CMake Policies ------------------# #add_compile_options(-Wall -Werror=all) @@ -30,6 +28,11 @@ include_directories(/usr/local/cuda/include) find_package(catkin REQUIRED COMPONENTS + roscpp + tue_config + tue_filesystem + code_profiler + #onnxruntime_ros ) @@ -62,12 +65,22 @@ set(PROJECT_SOURCES src/utils.cpp ) -add_executable(${PROJECT_NAME} ${PROJECT_SOURCES}) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include) - -# Link OpenCV libraries along with ONNX Runtime -target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so) +# Build core library (no main.cpp here) +add_library(sam_onnx_ros_core + src/sam_inference.cpp + src/segmentation.cpp + src/utils.cpp +) +target_link_libraries(sam_onnx_ros_core + ${OpenCV_LIBS} + ${catkin_LIBRARIES} + ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so +) +target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) +# Main executable links the core lib +add_executable(${PROJECT_NAME} src/main.cpp) +target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) @@ -78,6 +91,19 @@ add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images ) +# Enable testing and add a gtest +if (CATKIN_ENABLE_TESTING) + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + + catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) + if(TARGET sam_onnx_ros_tests) + target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES}) + target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + endif() +endif() + + #If you want to debug set(CMAKE_BUILD_TYPE Debug) set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index cde009a..250abed 100644 --- a/package.xml +++ b/package.xml @@ -12,14 +12,17 @@ ToDo catkin + roscpp + libpcl-common + libopencv-dev libopencv-dev onnxruntime_ros onnxruntime_ros - catkin_lint_cmake - +gtest +rostest doxygen diff --git a/test/sam_test.cpp b/test/sam_test.cpp new file mode 100644 index 0000000..5f1024f --- /dev/null +++ b/test/sam_test.cpp @@ -0,0 +1,114 @@ +#include "segmentation.h" +#include "sam_inference.h" +#include +#include +#include "dl_types.h" +#include "utils.h" +#include + +class SamInferenceTest : public ::testing::Test +{ +protected: + void SetUp() override + { + // Create test images with different characteristics + testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255; + testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128; + + // Create a more realistic test image with some patterns + testImage_realistic = cv::Mat(640, 640, CV_8UC3); + cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255)); + + // Setup common parameters + NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; + + sam = std::make_unique(); + params.rectConfidenceThreshold = 0.1f; + params.iouThreshold = 0.5f; + params.imgSize = {1024, 1024}; + params.modelType = SEG::SAM_SEGMENT_ENCODER; + params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake +#ifdef USE_CUDA + params.cudaEnable = true; +#else + params.cudaEnable = false; +#endif + } + + void TearDown() override { sam.reset(); } + + // Test data + Utils utilities; + cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; + SEG::DL_INIT_PARAM params; + std::unique_ptr sam; + std::vector NonSquareImgSize; +}; + + + +TEST_F(SamInferenceTest, ObjectCreation) +{ + EXPECT_NO_THROW({ + SAM localSam; + }); +} + +TEST_F(SamInferenceTest, PreProcessSquareImage) +{ + cv::Mat processedImg; + const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg); + + EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; + EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; + EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; +} + +TEST_F(SamInferenceTest, PreProcessRectangularImage) +{ + cv::Mat processedImg; + const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg); + + EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; + EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600"; + EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; +} + +TEST_F(SamInferenceTest, CreateSessionWithValidModel) +{ + if (!std::filesystem::exists("SAM_encoder.onnx")) { + GTEST_SKIP() << "Model not found in build dir"; + } + const char* result = sam->CreateSession(params); + EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters"; +} + +TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) +{ + params.modelPath = "nonexistent_model.onnx"; + const char* result = sam->CreateSession(params); + EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; +} + +TEST_F(SamInferenceTest, FullInferencePipeline) +{ + if (!std::filesystem::exists("SAM_encoder.onnx") || + !std::filesystem::exists("SAM_mask_decoder.onnx")) { + GTEST_SKIP() << "Models not found in build dir"; + } + + // Use the package Initializer/SegmentAnything for the full pipeline + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder, params_decoder; + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + + auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); + EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; +} + +// Run all tests +int main(int argc, char **argv) +{ + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} \ No newline at end of file From bdaf3178a40027db14e755fae88aca2ad1bdaa28 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Thu, 4 Sep 2025 12:55:03 +0200 Subject: [PATCH 11/35] Fixed catkin workspace for both code and tests --- CMakeLists.txt | 26 +++++++++++++------------- package.xml | 8 ++------ src/main.cpp | 2 +- src/sam_inference.cpp | 2 +- src/segmentation.cpp | 4 ++-- test/sam_test.cpp | 35 +++++++++++++++-------------------- 6 files changed, 34 insertions(+), 43 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f4087dc..cece29a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,10 +28,10 @@ include_directories(/usr/local/cuda/include) find_package(catkin REQUIRED COMPONENTS - roscpp - tue_config - tue_filesystem - code_profiler + # roscpp + # tue_config + # tue_filesystem + # code_profiler #onnxruntime_ros ) @@ -42,7 +42,8 @@ find_package(catkin REQUIRED catkin_package( INCLUDE_DIRS include - LIBRARIES ${PROJECT_NAME} + #LIBRARIES ${PROJECT_NAME} + LIBRARIES sam_onnx_ros_core CATKIN_DEPENDS DEPENDS OpenCV ) @@ -83,18 +84,18 @@ add_executable(${PROJECT_NAME} src/main.cpp) target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file -configure_file(../hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(../hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images ) -# Enable testing and add a gtest +# # Enable testing if (CATKIN_ENABLE_TESTING) - find_package(catkin_lint_cmake REQUIRED) - catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") +# find_package(catkin_lint_cmake REQUIRED) +# catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) if(TARGET sam_onnx_ros_tests) @@ -103,7 +104,6 @@ if (CATKIN_ENABLE_TESTING) endif() endif() - #If you want to debug -set(CMAKE_BUILD_TYPE Debug) -set(CMAKE_CXX_FLAGS_DEBUG "-g") +# set(CMAKE_BUILD_TYPE Debug) +# set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index 250abed..b00e6d6 100644 --- a/package.xml +++ b/package.xml @@ -9,20 +9,16 @@ Iason Theodorou - ToDo + BSD catkin - roscpp - libpcl-common - libopencv-dev libopencv-dev onnxruntime_ros onnxruntime_ros + catkin_lint_cmake -gtest -rostest doxygen diff --git a/src/main.cpp b/src/main.cpp index 9399779..454e086 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -11,7 +11,7 @@ int main() SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = current_path / "../../hero_sam/pipeline/build/images"; + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3f6a09e..c9bacbe 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -85,7 +85,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = session->GetOutputNameAllocated(i, allocator); - char *temp_buf = new char[10]; + char *temp_buf = new char[50]; strcpy(temp_buf, output_node_name.get()); outputNodeNames.push_back(temp_buf); } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 585dd13..25b8fae 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -14,12 +14,12 @@ Initializer() { params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "SAM_encoder.onnx"; + params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; params_decoder.modelType = SEG::SAM_SEGMENT_DECODER; - params_decoder.modelPath = "SAM_mask_decoder.onnx"; + params_decoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_mask_decoder.onnx"; #ifdef USE_CUDA params_encoder.cudaEnable = true; diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 5f1024f..75a9c98 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -22,27 +22,25 @@ class SamInferenceTest : public ::testing::Test // Setup common parameters NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; - sam = std::make_unique(); - params.rectConfidenceThreshold = 0.1f; - params.iouThreshold = 0.5f; - params.imgSize = {1024, 1024}; - params.modelType = SEG::SAM_SEGMENT_ENCODER; - params.modelPath = "SAM_encoder.onnx"; // copied to build/ by CMake + // Use the package Initializer/SegmentAnything for the full pipeline + + std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + #ifdef USE_CUDA - params.cudaEnable = true; + params_encoder.cudaEnable = true; #else - params.cudaEnable = false; + params_encoder.cudaEnable = false; #endif } - void TearDown() override { sam.reset(); } + void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); } // Test data Utils utilities; cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; - SEG::DL_INIT_PARAM params; - std::unique_ptr sam; std::vector NonSquareImgSize; + std::vector> samSegmentors; + SEG::DL_INIT_PARAM params_encoder, params_decoder; }; @@ -57,7 +55,7 @@ TEST_F(SamInferenceTest, ObjectCreation) TEST_F(SamInferenceTest, PreProcessSquareImage) { cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_640x640, params.imgSize, processedImg); + const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg); EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; @@ -79,14 +77,14 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel) if (!std::filesystem::exists("SAM_encoder.onnx")) { GTEST_SKIP() << "Model not found in build dir"; } - const char* result = sam->CreateSession(params); - EXPECT_EQ(result, nullptr) << "CreateSession should succeed with valid parameters"; + + EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters"; } TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) { - params.modelPath = "nonexistent_model.onnx"; - const char* result = sam->CreateSession(params); + params_encoder.modelPath = "nonexistent_model.onnx"; + const char* result = samSegmentors[0]->CreateSession(params_encoder); EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; } @@ -97,10 +95,7 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - // Use the package Initializer/SegmentAnything for the full pipeline - std::vector> samSegmentors; - SEG::DL_INIT_PARAM params_encoder, params_decoder; - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; From bdab5a14c3ef617a7381e2986d2041a05011ed88 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 12:38:53 +0200 Subject: [PATCH 12/35] fixed functionallity for the tests to pass and added logging definition --- src/main.cpp | 3 ++- src/sam_inference.cpp | 4 +++- src/utils.cpp | 6 +++++- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index 454e086..c9624c5 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -3,6 +3,7 @@ #include #include #include + int main() { // Running inference @@ -11,7 +12,7 @@ int main() SEG::DL_INIT_PARAM params_decoder; std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/pipeline/build/images"; // current_path / <- you could use + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index c9bacbe..24af832 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -3,7 +3,7 @@ #include #define benchmark -#define ROI +//#define ROI SAM::SAM() {} @@ -223,6 +223,8 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); +#else + result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows)); #endif // ROI #ifdef benchmark diff --git a/src/utils.cpp b/src/utils.cpp index f7721db..ca66b6c 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,5 +1,6 @@ #include "utils.h" #include // for guided filter +#define LOGGING // Constructor Utils::Utils() @@ -33,7 +34,8 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat resizeScales = iImg.rows / (float)iImgSize.at(1); cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); } - cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3); oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); oImg = tempImg; @@ -198,6 +200,7 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & result.masks.push_back(finalMask); // Overlay for display on a copy (iImg is const) + #ifdef LOGGING cv::Mat overlay = iImg.clone(); std::vector> contours; cv::findContours(finalMask, contours, cv::RETR_EXTERNAL, cv::CHAIN_APPROX_SIMPLE); @@ -210,6 +213,7 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & cv::imshow("SAM Segmentation", overlay); cv::waitKey(0); cv::destroyAllWindows(); + #endif // LOGGING } else { From 8b7f91378c34329271d56ff845fb8e3a3a9432a6 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 12:56:59 +0200 Subject: [PATCH 13/35] renamed private members of utils and sam_inference --- include/sam_inference.h | 22 +++---- include/utils.h | 4 +- src/sam_inference.cpp | 132 ++++++++++++++++++++-------------------- src/utils.cpp | 12 ++-- 4 files changed, 85 insertions(+), 85 deletions(-) diff --git a/include/sam_inference.h b/include/sam_inference.h index d63701c..7bff0b1 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -33,17 +33,17 @@ class SAM std::vector classes{}; private: - Ort::Env env; - std::unique_ptr session; - bool cudaEnable; - Ort::RunOptions options; - std::vector inputNodeNames; - std::vector outputNodeNames; - - SEG::MODEL_TYPE modelType; - std::vector imgSize; - float rectConfidenceThreshold; - float iouThreshold; + Ort::Env _env; + std::unique_ptr _session; + bool _cudaEnable; + Ort::RunOptions _options; + std::vector _inputNodeNames; + std::vector _outputNodeNames; + + SEG::MODEL_TYPE _modelType; + std::vector _imgSize; + float _rectConfidenceThreshold; + float _iouThreshold; }; #endif // SAMINFERENCE_H \ No newline at end of file diff --git a/include/utils.h b/include/utils.h index e81b236..6cb8819 100644 --- a/include/utils.h +++ b/include/utils.h @@ -51,8 +51,8 @@ class Utils } private: - float resizeScales; - float resizeScalesBbox; // letterbox scale + float _resizeScales; + float _resizeScalesBbox; // letterbox scale }; #endif // UTILS_H \ No newline at end of file diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 24af832..f12c56b 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -9,10 +9,10 @@ SAM::SAM() {} SAM::~SAM() { // Clean up input/output node names - for (auto &name : inputNodeNames) { + for (auto &name : _inputNodeNames) { delete[] name; } - for (auto &name : outputNodeNames) { + for (auto &name : _outputNodeNames) { delete[] name; } } @@ -28,19 +28,19 @@ template <> struct TypeToTensorType { const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { const char *Ret = RET_OK; - if (session) { - session.reset(); // Release previous session + if (_session) { + _session.reset(); // Release previous _session // Clear node names - for (auto &name : inputNodeNames) { + for (auto &name : _inputNodeNames) { delete[] name; } - inputNodeNames.clear(); + _inputNodeNames.clear(); - for (auto &name : outputNodeNames) { + for (auto &name : _outputNodeNames) { delete[] name; } - outputNodeNames.clear(); + _outputNodeNames.clear(); } std::regex pattern("[\u4e00-\u9fa5]"); bool result = std::regex_search(iParams.modelPath, pattern); @@ -51,55 +51,55 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { return Ret; } try { - rectConfidenceThreshold = iParams.rectConfidenceThreshold; - iouThreshold = iParams.iouThreshold; - imgSize = iParams.imgSize; - modelType = iParams.modelType; - cudaEnable = iParams.cudaEnable; - env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); - Ort::SessionOptions sessionOption; + _rectConfidenceThreshold = iParams.rectConfidenceThreshold; + _iouThreshold = iParams.iouThreshold; + _imgSize = iParams.imgSize; + _modelType = iParams.modelType; + _cudaEnable = iParams.cudaEnable; + _env = Ort::Env(ORT_LOGGING_LEVEL_WARNING, "Sam"); + Ort::SessionOptions _sessionOption; if (iParams.cudaEnable) { OrtCUDAProviderOptions cudaOption; cudaOption.device_id = 0; - sessionOption.AppendExecutionProvider_CUDA(cudaOption); + _sessionOption.AppendExecutionProvider_CUDA(cudaOption); } - sessionOption.SetGraphOptimizationLevel( + _sessionOption.SetGraphOptimizationLevel( GraphOptimizationLevel::ORT_ENABLE_ALL); - sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); - sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); + _sessionOption.SetIntraOpNumThreads(iParams.intraOpNumThreads); + _sessionOption.SetLogSeverityLevel(iParams.logSeverityLevel); const char *modelPath = iParams.modelPath.c_str(); - session = std::make_unique(env, modelPath, sessionOption); + _session = std::make_unique(_env, modelPath, _sessionOption); Ort::AllocatorWithDefaultOptions allocator; - size_t inputNodesNum = session->GetInputCount(); + size_t inputNodesNum = _session->GetInputCount(); for (size_t i = 0; i < inputNodesNum; i++) { Ort::AllocatedStringPtr input_node_name = - session->GetInputNameAllocated(i, allocator); + _session->GetInputNameAllocated(i, allocator); char *temp_buf = new char[50]; strcpy(temp_buf, input_node_name.get()); - inputNodeNames.push_back(temp_buf); + _inputNodeNames.push_back(temp_buf); } - size_t OutputNodesNum = session->GetOutputCount(); + size_t OutputNodesNum = _session->GetOutputCount(); for (size_t i = 0; i < OutputNodesNum; i++) { Ort::AllocatedStringPtr output_node_name = - session->GetOutputNameAllocated(i, allocator); + _session->GetOutputNameAllocated(i, allocator); char *temp_buf = new char[50]; strcpy(temp_buf, output_node_name.get()); - outputNodeNames.push_back(temp_buf); + _outputNodeNames.push_back(temp_buf); } - options = Ort::RunOptions{nullptr}; + _options = Ort::RunOptions{nullptr}; auto input_shape = - session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_shape = - session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = session->GetOutputTypeInfo(0) + _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + auto output_type = _session->GetOutputTypeInfo(0) .GetTensorTypeAndShapeInfo() .GetElementType(); - WarmUpSession(modelType); + WarmUpSession(_modelType); return RET_OK; } catch (const std::exception &e) { const char *str1 = "[SAM]:"; @@ -109,30 +109,30 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { std::strcpy(merged, str_result.c_str()); std::cout << merged << std::endl; delete[] merged; - return "[SAM]:Create session failed."; + return "[SAM]:Create _session failed."; } } const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, - SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result) { + SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) { #ifdef benchmark clock_t starttime_1 = clock(); #endif // benchmark Utils utilities; const char *Ret = RET_OK; cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); + utilities.PreProcess(iImg, _imgSize, processedImg); float *blob = new float[processedImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); std::vector inputNodeDims; - if (modelType == SEG::SAM_SEGMENT_ENCODER) { - inputNodeDims = {1, 3, imgSize.at(0), imgSize.at(1)}; - } else if (modelType == SEG::SAM_SEGMENT_DECODER) { + if (_modelType == SEG::SAM_SEGMENT_ENCODER) { + inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)}; + } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = {1, 256, 64, 64}; } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, + TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, utilities, result); return Ret; @@ -141,11 +141,11 @@ const char *SAM::RunSession(const cv::Mat &iImg, template const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, - SEG::MODEL_TYPE modelType, + SEG::MODEL_TYPE _modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result) { - switch (modelType) { + switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: { @@ -153,14 +153,14 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, Ort::Value inputTensor = Ort::Value::CreateTensor::type>( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), - blob, 3 * imgSize.at(0) * imgSize.at(1), inputNodeDims.data(), + blob, 3 * _imgSize.at(0) * _imgSize.at(1), inputNodeDims.data(), inputNodeDims.size()); #ifdef benchmark clock_t starttime_2 = clock(); #endif // benchmark auto outputTensor = - session->Run(options, inputNodeNames.data(), &inputTensor, 1, - outputNodeNames.data(), outputNodeNames.size()); + _session->Run(_options, _inputNodeNames.data(), &inputTensor, 1, + _outputNodeNames.data(), _outputNodeNames.size()); #ifdef benchmark clock_t starttime_3 = clock(); #endif // benchmark @@ -186,7 +186,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; @@ -269,7 +269,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector hasMaskInput = {0.0f}; // No mask provided std::vector hasMaskInputDims = {1}; - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled); std::vector inputTensors = utilities.PrepareInputTensor( decoderInputTensor, pointCoordsScaled, pointCoordsDims, pointLabels, @@ -279,15 +279,15 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef benchmark starttime_2 = clock(); #endif // benchmark - auto output_tensors = session->Run( - options, inputNodeNames.data(), inputTensors.data(), - inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); + auto output_tensors = _session->Run( + _options, _inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); #ifdef benchmark starttime_3 = clock(); #endif // benchmark - utilities.PostProcess(output_tensors, iImg, imgSize, result); + utilities.PostProcess(output_tensors, iImg, _imgSize, result); } // Add the result to oResult oResult.push_back(result); @@ -302,7 +302,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, (double)(starttime_3 - starttime_2) / CLOCKS_PER_SEC * 1000; double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; @@ -321,31 +321,31 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { +char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { clock_t starttime_1 = clock(); Utils utilities; - cv::Mat iImg = cv::Mat(cv::Size(imgSize.at(0), imgSize.at(1)), CV_8UC3); + cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3); cv::Mat processedImg; - utilities.PreProcess(iImg, imgSize, processedImg); + utilities.PreProcess(iImg, _imgSize, processedImg); float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, imgSize.at(0), - imgSize.at(1)}; - switch (modelType) { + std::vector SAM_input_node_dims = {1, 3, _imgSize.at(0), + _imgSize.at(1)}; + switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: { Ort::Value input_tensor = Ort::Value::CreateTensor( Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, - 3 * imgSize.at(0) * imgSize.at(1), SAM_input_node_dims.data(), + 3 * _imgSize.at(0) * _imgSize.at(1), SAM_input_node_dims.data(), SAM_input_node_dims.size()); auto output_tensors = - session->Run(options, inputNodeNames.data(), &input_tensor, 1, - outputNodeNames.data(), outputNodeNames.size()); + _session->Run(_options, _inputNodeNames.data(), &input_tensor, 1, + _outputNodeNames.data(), _outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } @@ -387,7 +387,7 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { std::vector pointCoordsScaled; - utilities.ScaleBboxPoints(iImg, imgSize, pointCoords, pointCoordsScaled); + utilities.ScaleBboxPoints(iImg, _imgSize, pointCoords, pointCoordsScaled); // Labels for the points std::vector pointLabels = {1.0f}; // All points are foreground @@ -403,17 +403,17 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE modelType) { pointLabelsDims, maskInput, maskInputDims, hasMaskInput, hasMaskInputDims); - auto output_tensors = session->Run( - options, inputNodeNames.data(), inputTensors.data(), - inputTensors.size(), outputNodeNames.data(), outputNodeNames.size()); + auto output_tensors = _session->Run( + _options, _inputNodeNames.data(), inputTensors.data(), + inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); } - outputNodeNames.size(); + _outputNodeNames.size(); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; - if (cudaEnable) { + if (_cudaEnable) { std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time << " ms. " << std::endl; } diff --git a/src/utils.cpp b/src/utils.cpp index ca66b6c..2c826ab 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -26,13 +26,13 @@ char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat if (iImg.cols >= iImg.rows) { - resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / resizeScales))); + _resizeScales = iImg.cols / (float)iImgSize.at(0); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / _resizeScales))); } else { - resizeScales = iImg.rows / (float)iImgSize.at(1); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / resizeScales), iImgSize.at(1))); + _resizeScales = iImg.rows / (float)iImgSize.at(1); + cv::resize(oImg, oImg, cv::Size(int(iImg.cols / _resizeScales), iImgSize.at(1))); } //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3); @@ -52,12 +52,12 @@ void Utils::ScaleBboxPoints(const cv::Mat &iImg, std::vector imgSize, std:: if (iImg.cols >= iImg.rows) { scale = imgSize[0] / (float)iImg.cols; - resizeScalesBbox = iImg.cols / (float)imgSize[0]; + _resizeScalesBbox = iImg.cols / (float)imgSize[0]; } else { scale = imgSize[1] / (float)iImg.rows; - resizeScalesBbox = iImg.rows / (float)imgSize[1]; + _resizeScalesBbox = iImg.rows / (float)imgSize[1]; } // Top-Left placement (matching PreProcess) From e1130c6cc150f1ab35bc8eb3606c5b13a366d90b Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 5 Sep 2025 15:10:28 +0200 Subject: [PATCH 14/35] Separrated test files per category (utils or sam related for now) --- CMakeLists.txt | 18 ++++- test/sam_test.cpp | 68 +++++++---------- test/test_utils.cpp | 175 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 214 insertions(+), 47 deletions(-) create mode 100644 test/test_utils.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index cece29a..ffc7611 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,13 +97,23 @@ if (CATKIN_ENABLE_TESTING) # find_package(catkin_lint_cmake REQUIRED) # catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") - catkin_add_gtest(sam_onnx_ros_tests test/sam_test.cpp) - if(TARGET sam_onnx_ros_tests) - target_link_libraries(sam_onnx_ros_tests sam_onnx_ros_core ${catkin_LIBRARIES}) - target_include_directories(sam_onnx_ros_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + # Utils unit tests (no models needed) + catkin_add_gtest(utils_tests test/test_utils.cpp) + if(TARGET utils_tests) + target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) + target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + endif() + + # SAM integration-ish tests (may need models) + catkin_add_gtest(sam_tests test/sam_test.cpp) + if(TARGET sam_tests) + target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) + target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() endif() + + #If you want to debug # set(CMAKE_BUILD_TYPE Debug) # set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 75a9c98..521b0d8 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -1,41 +1,46 @@ -#include "segmentation.h" -#include "sam_inference.h" #include #include -#include "dl_types.h" -#include "utils.h" #include +#include "segmentation.h" +#include "sam_inference.h" +#include "dl_types.h" + +// This file contains higher-level (integration-ish) tests. +// They cover object/session creation and a full pipeline run using synthetic images. +// These tests may require the .onnx model files to be present next to the binary or in a known dir. class SamInferenceTest : public ::testing::Test { protected: void SetUp() override { - // Create test images with different characteristics + // Create simple synthetic images: + // - a white 640x640 (square) + // - a gray 800x600 (non-square) testImage_640x640 = cv::Mat::ones(640, 640, CV_8UC3) * 255; testImage_800x600 = cv::Mat::ones(600, 800, CV_8UC3) * 128; - // Create a more realistic test image with some patterns + // A "random noise" image to simulate realistic content for end-to-end checks. testImage_realistic = cv::Mat(640, 640, CV_8UC3); cv::randu(testImage_realistic, cv::Scalar(0,0,0), cv::Scalar(255,255,255)); - // Setup common parameters + // Cache non-square size for preprocessing helpers. NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; - // Use the package Initializer/SegmentAnything for the full pipeline - + // Use package helpers to build default params and SAM objects. std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); #ifdef USE_CUDA - params_encoder.cudaEnable = true; + params_encoder.cudaEnable = true; // Enable CUDA if compiled with it #else - params_encoder.cudaEnable = false; + params_encoder.cudaEnable = false; // Otherwise run on CPU #endif } + // Clean up the SAM objects after each test. void TearDown() override { samSegmentors[0].reset(); samSegmentors[1].reset(); } - // Test data + // Test data and objects shared across tests. Utils utilities; cv::Mat testImage_640x640, testImage_800x600, testImage_realistic; std::vector NonSquareImgSize; @@ -43,8 +48,7 @@ class SamInferenceTest : public ::testing::Test SEG::DL_INIT_PARAM params_encoder, params_decoder; }; - - +// Simple smoke test: we can construct a SAM object without throwing. TEST_F(SamInferenceTest, ObjectCreation) { EXPECT_NO_THROW({ @@ -52,26 +56,8 @@ TEST_F(SamInferenceTest, ObjectCreation) }); } -TEST_F(SamInferenceTest, PreProcessSquareImage) -{ - cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_640x640, params_encoder.imgSize, processedImg); - - EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; - EXPECT_EQ(processedImg.size(), cv::Size(1024, 1024)) << "Output should be letterboxed to 1024x1024"; - EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; -} - -TEST_F(SamInferenceTest, PreProcessRectangularImage) -{ - cv::Mat processedImg; - const char* result = utilities.PreProcess(testImage_800x600, NonSquareImgSize, processedImg); - - EXPECT_EQ(result, nullptr) << "PreProcess should succeed"; - EXPECT_EQ(processedImg.size(), cv::Size(800, 600)) << "Output should be letterboxed to 800x600"; - EXPECT_FALSE(processedImg.empty()) << "Processed image should not be empty"; -} - +// Confirms that with a present encoder model we can initialize a session. +// Skips if the model file is not available. TEST_F(SamInferenceTest, CreateSessionWithValidModel) { if (!std::filesystem::exists("SAM_encoder.onnx")) { @@ -81,6 +67,7 @@ TEST_F(SamInferenceTest, CreateSessionWithValidModel) EXPECT_NE(samSegmentors[0], nullptr) << "CreateSession should succeed with valid parameters"; } +// Confirms that giving an invalid model path returns an error (no crash). TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) { params_encoder.modelPath = "nonexistent_model.onnx"; @@ -88,6 +75,8 @@ TEST_F(SamInferenceTest, CreateSessionWithInvalidModel) EXPECT_NE(result, nullptr) << "CreateSession should fail with invalid model path"; } +// End-to-end check: with both encoder/decoder models present, the pipeline runs +// and returns a mask vector. Skips if models are not available. TEST_F(SamInferenceTest, FullInferencePipeline) { if (!std::filesystem::exists("SAM_encoder.onnx") || @@ -95,15 +84,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - - auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); - EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; -} -// Run all tests -int main(int argc, char **argv) -{ - testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); + // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) + EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; } \ No newline at end of file diff --git a/test/test_utils.cpp b/test/test_utils.cpp new file mode 100644 index 0000000..27eaaa0 --- /dev/null +++ b/test/test_utils.cpp @@ -0,0 +1,175 @@ +#include +#include +#include "utils.h" + +// This file contains small, focused unit tests for Utils. +// We verify image preprocessing (channel conversion, aspect-preserving resize, padding) +// and coordinate scaling to match preprocessing. + +// Lightweight fixture: gives each test a fresh Utils instance. +class UtilsTest : public ::testing::Test { +protected: + Utils u; +}; + +// Checks that a grayscale (1-channel) image is converted to RGB (3-channel) +// and the output image is exactly the requested target size (letterboxed). +TEST_F(UtilsTest, GrayscaleToRGBKeepsSize) { + cv::Mat gray = cv::Mat::zeros(300, 500, CV_8UC1); + cv::Mat out; + std::vector target{1024, 1024}; + + // Call PreProcess and expect no error. + const char* err = u.PreProcess(gray, target, out); + ASSERT_EQ(err, nullptr); + + // After preprocessing, we must have 3 channels (RGB). + EXPECT_EQ(out.channels(), 3); + + // The letterboxed output must match the target canvas size. + EXPECT_EQ(out.size(), cv::Size(target[0], target[1])); +} + +// Verifies three things: +// 1) Aspect ratio is preserved when resizing to the target. +// 2) The resized image is placed at the top-left (0,0). +// 3) The padding area is zero (black). +TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) { + const cv::Scalar fill(10, 20, 30); // Input color in BGR + cv::Mat img(720, 1280, CV_8UC3, fill); + cv::Mat out; + std::vector target{1024, 1024}; + + ASSERT_EQ(u.PreProcess(img, target, out), nullptr); + ASSERT_EQ(out.size(), cv::Size(target[0], target[1])); + ASSERT_EQ(out.channels(), 3); + + // Width drives resizing here (landscape). Width becomes 1024, height scales accordingly. + int resized_w = target[0]; + int resized_h = static_cast(img.rows / (img.cols / static_cast(target[0]))); + + // PreProcess converts BGR -> RGB, so expected color is swapped. + cv::Scalar expected_rgb(fill[2], fill[1], fill[0]); + + // The top-left region (resized content) should keep the image color. + cv::Mat roi_top = out(cv::Rect(0, 0, resized_w, resized_h)); + cv::Scalar mean_top = cv::mean(roi_top); + EXPECT_NEAR(mean_top[0], expected_rgb[0], 1.0); + EXPECT_NEAR(mean_top[1], expected_rgb[1], 1.0); + EXPECT_NEAR(mean_top[2], expected_rgb[2], 1.0); + + // The area below the resized content (padding) must be zeros. + if (resized_h < target[1]) { + cv::Mat roi_pad = out(cv::Rect(0, resized_h, target[0], target[1] - resized_h)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } +} + +// Parameterized fixture: used with TEST_P to run the same test body +// for many (input size, target size) pairs. +class UtilsPreprocessParamTest + : public ::testing::TestWithParam> { +protected: + Utils u; +}; + +// TEST_P defines a parameterized test. It runs once per parameter set. +// We assert that: +// - Output size equals the target canvas. +// - Output has 3 channels (RGB). +// - The padding area (bottom or right) is zero depending on which side letterboxes. +TEST_P(UtilsPreprocessParamTest, LetterboxWithinBoundsAndChannels3) { + const auto [inSize, target] = GetParam(); + cv::Mat img(inSize, CV_8UC3, cv::Scalar(1, 2, 3)); + cv::Mat out; + + ASSERT_EQ(u.PreProcess(img, {target.width, target.height}, out), nullptr); + EXPECT_EQ(out.size(), target); + EXPECT_EQ(out.channels(), 3); + + // Detect which side letterboxes and check that the padded region is zeros. + if (inSize.width >= inSize.height) { + int resized_h = static_cast(inSize.height / (inSize.width / static_cast(target.width))); + if (resized_h < target.height) { + cv::Mat roi_pad = out(cv::Rect(0, resized_h, target.width, target.height - resized_h)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } + } else { + int resized_w = static_cast(inSize.width / (inSize.height / static_cast(target.height))); + if (resized_w < target.width) { + cv::Mat roi_pad = out(cv::Rect(resized_w, 0, target.width - resized_w, target.height)); + cv::Mat gray; cv::cvtColor(roi_pad, gray, cv::COLOR_BGR2GRAY); + EXPECT_EQ(cv::countNonZero(gray), 0); + } + } +} + +// INSTANTIATE_TEST_SUITE_P provides the concrete parameter values. +// Each pair (input size, target size) creates a separate test instance. +INSTANTIATE_TEST_SUITE_P( + ManySizes, + UtilsPreprocessParamTest, + ::testing::Values( + std::make_tuple(cv::Size(640, 640), cv::Size(1024, 1024)), // square -> square + std::make_tuple(cv::Size(800, 600), cv::Size(800, 600)), // same size (no resize) + std::make_tuple(cv::Size(600, 800), cv::Size(800, 600)), // portrait -> landscape + std::make_tuple(cv::Size(1280, 720), cv::Size(1024, 1024)) // wide -> square + ) +); + +// Separate fixture for point scaling tests. +class UtilsScaleBboxPointsTest : public ::testing::Test { +protected: + Utils u; +}; + +// If the input size and target size are the same, scaling should do nothing. +TEST_F(UtilsScaleBboxPointsTest, IdentityWhenSameSize) { + cv::Mat img(600, 800, CV_8UC3); + std::vector target{800, 600}; + std::vector pts{100.f, 100.f, 700.f, 500.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + EXPECT_NEAR(scaled[0], pts[0], 1e-3); + EXPECT_NEAR(scaled[1], pts[1], 1e-3); + EXPECT_NEAR(scaled[2], pts[2], 1e-3); + EXPECT_NEAR(scaled[3], pts[3], 1e-3); +} + +// When width drives the resize (landscape), both x and y are scaled by the same factor. +// We expect coordinates to be multiplied by target_width / input_width. +TEST_F(UtilsScaleBboxPointsTest, ScalesWidthDominant) { + cv::Mat img(300, 600, CV_8UC3); // h=300, w=600 (w >= h) + std::vector target{1200, 600}; // width doubles + std::vector pts{100.f, 50.f, 500.f, 250.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + const float scale = target[0] / static_cast(img.cols); // 1200/600 = 2 + EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3); + EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); + EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); + EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); +} + +// When height drives the resize (portrait), both x and y are scaled by the same factor. +// We expect coordinates to be multiplied by target_height / input_height. +TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) { + cv::Mat img(600, 300, CV_8UC3); // h=600, w=300 (h > w) + std::vector target{600, 1200}; // height doubles + std::vector pts{100.f, 50.f, 200.f, 500.f}; + std::vector scaled; + + u.ScaleBboxPoints(img, target, pts, scaled); + ASSERT_EQ(scaled.size(), pts.size()); + const float scale = target[1] / static_cast(img.rows); // 1200/600 = 2 + EXPECT_NEAR(scaled[0], pts[0] * scale, 1e-3); + EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); + EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); + EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); +} \ No newline at end of file From c34410edb9d9bdda47a46783567784273e18a543 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 14:35:19 +0200 Subject: [PATCH 15/35] Updated initializer and SegmentAnything modules to store the data to the custom result structs properly --- CMakeLists.txt | 6 +++--- include/segmentation.h | 6 ++++-- src/main.cpp | 8 +++++--- src/segmentation.cpp | 18 +++++++++--------- src/utils.cpp | 2 +- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ffc7611..1489bcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ include_directories(${OpenCV_INCLUDE_DIRS}) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) -set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1") +set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) # -------------- Cuda ------------------# @@ -84,8 +84,8 @@ add_executable(${PROJECT_NAME} src/main.cpp) target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) # Copy sam_.onnx file to the same folder of the executable file -configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(~/Documents/repos/hero_sam/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) # Create folder name images in the same folder of the executable file add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD diff --git a/include/segmentation.h b/include/segmentation.h index b341f8d..e6a6d67 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -4,7 +4,9 @@ #include #include "sam_inference.h" -std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM> Initializer(); -std::vector SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, cv::Mat& img); +std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer(); +void SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img, +std::vector &resSam, + SEG::DL_RESULT &res); #endif // SEGMENTATION_H \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index c9624c5..e52e9ce 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,7 +10,9 @@ int main() std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + std::vector resSam; + SEG::DL_RESULT res; + std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) @@ -19,8 +21,8 @@ int main() { std::string img_path = i.path().string(); cv::Mat img = cv::imread(img_path); - std::vector masks; - masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, img); + + SegmentAnything(samSegmentors, params_encoder, params_decoder, img, resSam, res); } } diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 25b8fae..5bb9744 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,7 +1,7 @@ #include "segmentation.h" std::tuple>, SEG::DL_INIT_PARAM, - SEG::DL_INIT_PARAM> + SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer() { std::vector> samSegmentors; samSegmentors.push_back(std::make_unique()); @@ -11,7 +11,8 @@ Initializer() { std::unique_ptr samSegmentorDecoder = std::make_unique(); SEG::DL_INIT_PARAM params_encoder; SEG::DL_INIT_PARAM params_decoder; - + SEG::DL_RESULT res; + std::vector resSam; params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; @@ -31,16 +32,15 @@ Initializer() { samSegmentorDecoder->CreateSession(params_decoder); samSegmentors[0] = std::move(samSegmentorEncoder); samSegmentors[1] = std::move(samSegmentorDecoder); - return {std::move(samSegmentors), params_encoder, params_decoder}; + return {std::move(samSegmentors), params_encoder, params_decoder, res, resSam}; } -std::vector -SegmentAnything(std::vector> &samSegmentors, +void SegmentAnything(std::vector> &samSegmentors, const SEG::DL_INIT_PARAM ¶ms_encoder, - const SEG::DL_INIT_PARAM ¶ms_decoder, cv::Mat &img) { + const SEG::DL_INIT_PARAM ¶ms_decoder, const cv::Mat &img, std::vector &resSam, + SEG::DL_RESULT &res) { + - std::vector resSam; - SEG::DL_RESULT res; SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); @@ -48,5 +48,5 @@ SegmentAnything(std::vector> &samSegmentors, modelTypeRef = params_decoder.modelType; samSegmentors[1]->RunSession(img, resSam, modelTypeRef, res); - return std::move(res.masks); + // return std::move(res.masks); } diff --git a/src/utils.cpp b/src/utils.cpp index 2c826ab..28a7ded 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,6 +1,6 @@ #include "utils.h" #include // for guided filter -#define LOGGING +//#define LOGGING // Constructor Utils::Utils() From 69f312646eb8e5de5aefc98b0fb2cd19badb0906 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 15:51:18 +0200 Subject: [PATCH 16/35] Enabled cuda on the decoder as well --- src/segmentation.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 5bb9744..23efd59 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -24,6 +24,8 @@ Initializer() { #ifdef USE_CUDA params_encoder.cudaEnable = true; + params_decoder.cudaEnable = true; + #else params_encoder.cudaEnable = false; #endif From 23d479089af02d78f4cd39ca17653361e03210ac Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 10 Sep 2025 16:14:56 +0200 Subject: [PATCH 17/35] Fixed small bug of adding an extra (full img) bounding box --- src/sam_inference.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index f12c56b..de1b408 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -223,8 +223,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, std::vector boundingBoxes; boundingBoxes.push_back(bbox); -#else - result.boxes.push_back(cv::Rect(0, 0, iImg.cols, iImg.rows)); + #endif // ROI #ifdef benchmark From ce94fa8745b2a4ce754c15c0df3885b0bcaefdbc Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 12:13:22 +0200 Subject: [PATCH 18/35] Aligned dimensions [high width] between onnx and opencv --- src/main.cpp | 2 +- src/sam_inference.cpp | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/main.cpp b/src/main.cpp index e52e9ce..9744737 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -14,7 +14,7 @@ int main() SEG::DL_RESULT res; std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam/sam_inference/build/images"; // current_path / <- you could use + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam.bak/sam_inference/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index de1b408..0ccec9a 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -93,6 +93,16 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + // Optional shape check when model has fixed dims (not -1) + if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) { + const int64_t expectH = _imgSize.at(1); + const int64_t expectW = _imgSize.at(0); + if (input_shape[2] != expectH || input_shape[3] != expectW) { + std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3] + << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")." + << std::endl; + } + } auto output_shape = _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = _session->GetOutputTypeInfo(0) @@ -127,9 +137,9 @@ const char *SAM::RunSession(const cv::Mat &iImg, utilities.BlobFromImage(processedImg, blob); std::vector inputNodeDims; if (_modelType == SEG::SAM_SEGMENT_ENCODER) { - inputNodeDims = {1, 3, _imgSize.at(0), _imgSize.at(1)}; + // NCHW: H = imgSize[1], W = imgSize[0] + inputNodeDims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { - // Input size or SAM decoder model is 256x64x64 for the decoder inputNodeDims = {1, 256, 64, 64}; } TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, @@ -329,8 +339,9 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { float *blob = new float[iImg.total() * 3]; utilities.BlobFromImage(processedImg, blob); - std::vector SAM_input_node_dims = {1, 3, _imgSize.at(0), - _imgSize.at(1)}; + + // NCHW: H = imgSize[1], W = imgSize[0] + std::vector SAM_input_node_dims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: { Ort::Value input_tensor = Ort::Value::CreateTensor( From ce19e96f544820fe57cf865f87b4a9d4f1bd945e Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 12:14:29 +0200 Subject: [PATCH 19/35] corrected tests for the new segmentation way of inference (initialize and segment anything interfaces changed) and added one more test to check the image dimensions W,H --- src/sam_inference.cpp | 11 +---------- test/sam_test.cpp | 9 ++++++--- test/test_utils.cpp | 16 ++++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 0ccec9a..3ae5677 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -93,16 +93,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - // Optional shape check when model has fixed dims (not -1) - if (input_shape.size() >= 4 && input_shape[2] > 0 && input_shape[3] > 0) { - const int64_t expectH = _imgSize.at(1); - const int64_t expectW = _imgSize.at(0); - if (input_shape[2] != expectH || input_shape[3] != expectW) { - std::cerr << "[SAM]: Model input (H,W)=(" << input_shape[2] << "," << input_shape[3] - << ") mismatches configured imgSize (W,H)=(" << _imgSize[0] << "," << _imgSize[1] << ")." - << std::endl; - } - } + auto output_shape = _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); auto output_type = _session->GetOutputTypeInfo(0) diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 521b0d8..2ae4c73 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -28,13 +28,14 @@ class SamInferenceTest : public ::testing::Test NonSquareImgSize = { testImage_800x600.cols, testImage_800x600.rows }; // Use package helpers to build default params and SAM objects. - std::tie(samSegmentors, params_encoder, params_decoder) = Initializer(); + std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); #ifdef USE_CUDA params_encoder.cudaEnable = true; // Enable CUDA if compiled with it #else params_encoder.cudaEnable = false; // Otherwise run on CPU #endif + } // Clean up the SAM objects after each test. @@ -46,6 +47,8 @@ class SamInferenceTest : public ::testing::Test std::vector NonSquareImgSize; std::vector> samSegmentors; SEG::DL_INIT_PARAM params_encoder, params_decoder; + SEG::DL_RESULT res; + std::vector resSam; }; // Simple smoke test: we can construct a SAM object without throwing. @@ -84,8 +87,8 @@ TEST_F(SamInferenceTest, FullInferencePipeline) GTEST_SKIP() << "Models not found in build dir"; } - auto masks = SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic); + SegmentAnything(samSegmentors, params_encoder, params_decoder, testImage_realistic, resSam, res); // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) - EXPECT_TRUE(masks.size() >= 0) << "Masks should be a valid output vector"; + EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector"; } \ No newline at end of file diff --git a/test/test_utils.cpp b/test/test_utils.cpp index 27eaaa0..a03b31b 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -66,6 +66,22 @@ TEST_F(UtilsTest, PreprocessTopLeftPaddingAndAspect) { } } +// Explicitly ensure imgSize is interpreted as [W, H] in PreProcess for non-square targets. +TEST_F(UtilsTest, PreprocessNonSquareWidthHeightOrder) { + // Input image: H=300, W=500 + cv::Mat img(300, 500, CV_8UC3, cv::Scalar(5, 6, 7)); + + // Target canvas (W,H) with non-square dims + std::vector target{640, 480}; + cv::Mat out; + + ASSERT_EQ(u.PreProcess(img, target, out), nullptr); + // cols = width, rows = height + EXPECT_EQ(out.cols, target[0]); + EXPECT_EQ(out.rows, target[1]); + EXPECT_EQ(out.size(), cv::Size(target[0], target[1])); +} + // Parameterized fixture: used with TEST_P to run the same test body // for many (input size, target size) pairs. class UtilsPreprocessParamTest From 24de2e587964993efaa719386119ce967939215a Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Fri, 12 Sep 2025 14:10:23 +0200 Subject: [PATCH 20/35] Removed typo / from model path --- src/segmentation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 23efd59..41176de 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -15,7 +15,7 @@ Initializer() { std::vector resSam; params_encoder.rectConfidenceThreshold = 0.1; params_encoder.iouThreshold = 0.5; - params_encoder.modelPath = "/home/amigo//Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; + params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; params_decoder = params_encoder; From 53be89c2e90d33357a2f65bf9f1eaee276b3be51 Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Tue, 16 Sep 2025 10:23:52 +0200 Subject: [PATCH 21/35] Bump min required cmake version to 3.14 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1489bcf..12ca294 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.0.2) +cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) From 6ddc3e661e7038947aeecf5159c53d69eb296324 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 16 Sep 2025 20:11:04 +0200 Subject: [PATCH 22/35] EOF line added and package.xml structure update --- LICENSE | 37 ++++++++++++++++++++----------------- README.md | 11 +++-------- include/dl_types.h | 2 +- include/sam_inference.h | 2 +- include/segmentation.h | 2 +- include/utils.h | 2 +- package.xml | 7 +++++-- src/main.cpp | 2 +- src/utils.cpp | 2 +- test/sam_test.cpp | 2 +- test/test_utils.cpp | 2 +- 11 files changed, 36 insertions(+), 35 deletions(-) diff --git a/LICENSE b/LICENSE index 7121e4a..c640c63 100644 --- a/LICENSE +++ b/LICENSE @@ -1,22 +1,25 @@ -Custom License Agreement +BSD 2-Clause License -1. License Grant You are hereby granted a non-exclusive, non-transferable license to use, reproduce, and distribute the code (hereinafter referred to as "the Software") under the following conditions: +Copyright (c) 2021, Eindhoven University of Technology - CST Robotics Group +All rights reserved. -2. Conditions of Use +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: -Non-Commercial Use: You may use the Software for personal, educational, or non-commercial purposes without any additional permissions. -Commercial Use: Any commercial use of the Software, including but not limited to selling, licensing, or using it in a commercial product, requires prior written permission from the original developer. -3. Contact Requirement +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. -If you wish to use the Software for commercial purposes, you must contact the original developer at [https://www.linkedin.com/in/hamdi-boukamcha/] to obtain a commercial license. -The terms of any commercial license will be mutually agreed upon and may involve a licensing fee. -4. Attribution +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. -Regardless of whether you are using the Software for commercial or non-commercial purposes, you must provide appropriate credit to the original developer in any distributions or products that use the Software. -5. Disclaimer of Warranty - -The Software is provided "as is," without warranty of any kind, express or implied, including but not limited to the warranties of merchantability, fitness for a particular purpose, and non-infringement. In no event shall the original developer be liable for any claim, damages, or other liability, whether in an action of contract, tort, or otherwise, arising from, out of, or in connection with the Software or the use or other dealings in the Software. -6. Governing Law - -This License Agreement shall be governed by and construed in accordance with the laws of France. -By using the Software, you agree to abide by the terms outlined in this License Agreement. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index 0efca14..4692d73 100644 --- a/README.md +++ b/README.md @@ -19,8 +19,8 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles. CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling. -## 📢 Performance - ### Infernce Time +## 📢 Performance + ### Infernce Time | Component | SpeedSAM | |----------------------------|-----------| @@ -51,7 +51,7 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten │ ├── main.cpp # Main entry point │ └── speedSam.cpp # Implementation of the SpeedSam class └── CMakeLists.txt # CMake configuration - + # 🚀 Installation ## Prerequisites git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git @@ -94,8 +94,3 @@ If you use this code in your research, please cite the repository as follows: publisher = {GitHub}, howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}}, } - - - - - diff --git a/include/dl_types.h b/include/dl_types.h index 5141284..033df56 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -68,4 +68,4 @@ namespace SEG } DL_RESULT; } // namespace SEG -#endif // DL_TYPES_H \ No newline at end of file +#endif // DL_TYPES_H diff --git a/include/sam_inference.h b/include/sam_inference.h index 7bff0b1..6b4713e 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -46,4 +46,4 @@ class SAM float _iouThreshold; }; -#endif // SAMINFERENCE_H \ No newline at end of file +#endif // SAMINFERENCE_H diff --git a/include/segmentation.h b/include/segmentation.h index e6a6d67..83102e0 100644 --- a/include/segmentation.h +++ b/include/segmentation.h @@ -9,4 +9,4 @@ void SegmentAnything(std::vector>& samSegmentors, const SEG std::vector &resSam, SEG::DL_RESULT &res); -#endif // SEGMENTATION_H \ No newline at end of file +#endif // SEGMENTATION_H diff --git a/include/utils.h b/include/utils.h index 6cb8819..a471512 100644 --- a/include/utils.h +++ b/include/utils.h @@ -55,4 +55,4 @@ class Utils float _resizeScalesBbox; // letterbox scale }; -#endif // UTILS_H \ No newline at end of file +#endif // UTILS_H diff --git a/package.xml b/package.xml index b00e6d6..7d17b8d 100644 --- a/package.xml +++ b/package.xml @@ -7,6 +7,7 @@ 0.0.0 Segment Anything Model (SAM) segmentation + Iason Theodorou Iason Theodorou BSD @@ -14,15 +15,17 @@ catkin libopencv-dev - libopencv-dev onnxruntime_ros + + libopencv-dev onnxruntime_ros catkin_lint_cmake + doxygen - \ No newline at end of file + diff --git a/src/main.cpp b/src/main.cpp index 9744737..cd0f9dd 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -27,4 +27,4 @@ int main() } } return 0; -} \ No newline at end of file +} diff --git a/src/utils.cpp b/src/utils.cpp index 28a7ded..643dba4 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -219,4 +219,4 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & { std::cerr << "[SAM]: Unexpected mask tensor shape." << std::endl; } -} \ No newline at end of file +} diff --git a/test/sam_test.cpp b/test/sam_test.cpp index 2ae4c73..cd54d7e 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -91,4 +91,4 @@ TEST_F(SamInferenceTest, FullInferencePipeline) // We only check that a vector is returned. (You can strengthen this to EXPECT_FALSE(masks.empty()).) EXPECT_TRUE(res.masks.size() >= 0) << "Masks should be a valid output vector"; -} \ No newline at end of file +} diff --git a/test/test_utils.cpp b/test/test_utils.cpp index a03b31b..92d20ab 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -188,4 +188,4 @@ TEST_F(UtilsScaleBboxPointsTest, ScalesHeightDominant) { EXPECT_NEAR(scaled[1], pts[1] * scale, 1e-3); EXPECT_NEAR(scaled[2], pts[2] * scale, 1e-3); EXPECT_NEAR(scaled[3], pts[3] * scale, 1e-3); -} \ No newline at end of file +} From ee403f2568406da052e157bb6ec552713109fd3e Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 18:51:58 +0200 Subject: [PATCH 23/35] Update read me and include better comments --- README.md | 93 +++++++++++++++---------------------------- src/sam_inference.cpp | 9 +++-- 2 files changed, 37 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index 4692d73..4b7a36b 100644 --- a/README.md +++ b/README.md @@ -1,40 +1,32 @@ -# SPEED SAM C++ TENSORRT -![SAM C++ TENSORRT](assets/speed_sam_cpp_tenosrrt.PNG) +# SAM C++ ONNX implementation - - GitHub - - - - License - +Inspired by SAM NN from meta and Tensor-RT implementation from: https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git ## 🌐 Overview -A high-performance C++ implementation for SAM (segment anything model) using TensorRT and CUDA, optimized for real-time image segmentation tasks. +A high-performance C++ implementation for SAM (segment anything model) using ONNX and CUDA, optimized for real-time image segmentation tasks. -## 📢 Updates - Model Conversion: Build TensorRT engines from ONNX models for accelerated inference. - Segmentation with Points and BBoxes: Easily segment images using selected points or bounding boxes. - FP16 Precision: Choose between FP16 and FP32 for speed and precision balance. - Dynamic Shape Support: Efficient handling of variable input sizes using optimization profiles. - CUDA Optimization: Leverage CUDA for preprocessing and efficient memory handling. ## 📢 Performance + +### Warm-Up cost :fire: + NVIDIA GeForce RTX 3050 + Encoder Cuda warm-up cost 66.875 ms. + Decoder Cuda warm-up cost 53.87 ms. + ### Infernce Time -| Component | SpeedSAM | -|----------------------------|-----------| -| **Image Encoder** | | -| Parameters | 5M | -| Speed | 8ms | -| **Mask Decoder** | | -| Parameters | 3.876M | -| Speed | 4ms | -| **Whole Pipeline (Enc+Dec)** | | -| Parameters | 9.66M | -| Speed | 12ms | -### Results -![SPEED-SAM-C-TENSORRT RESULT](assets/Speed_SAM_Results.JPG) +| Component | Pre processing | Inference | Post processing | +|----------------------------|----------------| --------- | ----------------| +| **Image Encoder** | | || +| Parameters | 5M |- | -| +| Speed | 8ms | 33.322ms | 0.437ms | +| **Mask Decoder** | | || +| Parameters | 3.876M |- |- | +| Speed | 34ms | 11.176ms | 5.984| +| **Whole Pipeline (Enc+Dec)** | | | | +| Parameters | 9.66M | -| -| +| Su of Speed | 92.92ms | - |- | + ## 📂 Project Structure SPEED-SAM-CPP-TENSORRT/ @@ -53,44 +45,23 @@ A high-performance C++ implementation for SAM (segment anything model) using Ten └── CMakeLists.txt # CMake configuration # 🚀 Installation -## Prerequisites - git clone https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT.git - cd SPEED-SAM-CPP-TENSORRT - +## Compile + git clone + cd sam_onnx_ros # Create a build directory and compile mkdir build && cd build cmake .. make -j$(nproc) -Note: Update the CMakeLists.txt with the correct paths for TensorRT and OpenCV. + +Note: Update the CMakeLists.txt with the correct paths for Onnxruntime and OpenCV and Onnx Models (since for TechUnited we keep them on separate repositories). + +You can use main.cpp to run the application + +## ROS option + You can also run the code as a catkin package. ## 📦 Dependencies CUDA: NVIDIA's parallel computing platform - TensorRT: High-performance deep learning inference + Onnx: High-performance deep learning inference OpenCV: Image processing library C++17: Required standard for compilation - -# 🔍 Code Overview -## Main Components - SpeedSam Class (speedSam.h): Manages image encoding and mask decoding. - EngineTRT Class (engineTRT.h): TensorRT engine creation and inference. - CUDA Utilities (cuda_utils.h): Macros for CUDA error handling. - Config (config.h): Defines model parameters and precision settings. -## Key Functions - EngineTRT::build: Builds the TensorRT engine from an ONNX model. - EngineTRT::infer: Runs inference on the provided input data. - SpeedSam::predict: Segments an image using input points or bounding boxes. -## 📞 Contact - -For advanced inquiries, feel free to contact me on LinkedIn: LinkedIn - -## 📜 Citation - -If you use this code in your research, please cite the repository as follows: - - @misc{boukamcha2024SpeedSam, - author = {Hamdi Boukamcha}, - title = {SPEED-SAM-C-TENSORRT}, - year = {2024}, - publisher = {GitHub}, - howpublished = {\url{https://github.com/hamdiboukamcha/SPEED-SAM-C-TENSORRT//}}, - } diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 3ae5677..8e61ec4 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -188,11 +188,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } else { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } @@ -235,6 +235,7 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef ROI for (const auto &box : boundingBoxes) #else + for (const auto &box : result.boxes) #endif // ROI { @@ -303,11 +304,11 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } else { - std::cout << "[SAM(CPU)]: " << pre_process_time << "ms pre-process, " + std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, " << process_time << "ms inference, " << post_process_time << "ms post-process." << std::endl; } From 33cde1a0677f53b81f76087f6073d199c333c240 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 20:13:06 +0200 Subject: [PATCH 24/35] Deleted redundant code and fixed some brackets --- include/dl_types.h | 24 +++--------------------- include/sam_inference.h | 1 - src/sam_inference.cpp | 15 ++++++--------- src/segmentation.cpp | 4 ---- 4 files changed, 9 insertions(+), 35 deletions(-) diff --git a/include/dl_types.h b/include/dl_types.h index 033df56..0f5874d 100644 --- a/include/dl_types.h +++ b/include/dl_types.h @@ -10,15 +10,8 @@ namespace SEG { enum MODEL_TYPE { - // FLOAT32 MODEL SAM_SEGMENT_ENCODER = 1, SAM_SEGMENT_DECODER = 2, - // YOLO_CLS = 3, - - // FLOAT16 MODEL - // YOLO_DETECT_V8_HALF = 4, - // YOLO_POSE_V8_HALF = 5, - // YOLO_CLS_HALF = 6 }; typedef struct _DL_INIT_PARAM @@ -27,13 +20,9 @@ namespace SEG std::string modelPath; MODEL_TYPE modelType = SAM_SEGMENT_ENCODER; std::vector imgSize = {640, 640}; - float rectConfidenceThreshold = 0.6; - float iouThreshold = 0.5; - int keyPointsNum = 2; // Note:kpt number for pose bool cudaEnable = false; int logSeverityLevel = 3; int intraOpNumThreads = 1; - // std::vector boxes; // For SAM encoder model, this will be filled with detected boxes // Overloaded output operator for _DL_INIT_PARAM to print its contents friend std::ostream &operator<<(std::ostream &os, const _DL_INIT_PARAM ¶m) @@ -44,9 +33,6 @@ namespace SEG for (const auto &size : param.imgSize) os << size << " "; os << "\n"; - os << "rectConfidenceThreshold: " << param.rectConfidenceThreshold << "\n"; - os << "iouThreshold: " << param.iouThreshold << "\n"; - os << "keyPointsNum: " << param.keyPointsNum << "\n"; os << "cudaEnable: " << (param.cudaEnable ? "true" : "false") << "\n"; os << "logSeverityLevel: " << param.logSeverityLevel << "\n"; os << "intraOpNumThreads: " << param.intraOpNumThreads; @@ -57,14 +43,10 @@ namespace SEG typedef struct _DL_RESULT { - - std::vector boxes; // For SAM encoder model, this will be filled with detected boxes - std::vector keyPoints; - - // Sam Part + // For SAM encoder model, this will be filled with detected boxes from object detection model. + std::vector boxes; std::vector embeddings; - // Masks for SAM decoder model output - std::vector masks; // Each cv::Mat represents a mask + std::vector masks; } DL_RESULT; } // namespace SEG diff --git a/include/sam_inference.h b/include/sam_inference.h index 6b4713e..250e254 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -43,7 +43,6 @@ class SAM SEG::MODEL_TYPE _modelType; std::vector _imgSize; float _rectConfidenceThreshold; - float _iouThreshold; }; #endif // SAMINFERENCE_H diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 8e61ec4..444163f 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -51,8 +51,6 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { return Ret; } try { - _rectConfidenceThreshold = iParams.rectConfidenceThreshold; - _iouThreshold = iParams.iouThreshold; _imgSize = iParams.imgSize; _modelType = iParams.modelType; _cudaEnable = iParams.cudaEnable; @@ -116,7 +114,8 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { const char *SAM::RunSession(const cv::Mat &iImg, std::vector &oResult, - SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) { + SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) +{ #ifdef benchmark clock_t starttime_1 = clock(); #endif // benchmark @@ -133,19 +132,19 @@ const char *SAM::RunSession(const cv::Mat &iImg, } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { inputNodeDims = {1, 256, 64, 64}; } - TensorProcess(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, + TensorProcess_(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, utilities, result); return Ret; } template -const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, +const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE _modelType, std::vector &oResult, - Utils &utilities, SEG::DL_RESULT &result) { - + Utils &utilities, SEG::DL_RESULT &result) +{ switch (_modelType) { case SEG::SAM_SEGMENT_ENCODER: // case OTHER_SAM_MODEL: @@ -235,7 +234,6 @@ const char *SAM::TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, #ifdef ROI for (const auto &box : boundingBoxes) #else - for (const auto &box : result.boxes) #endif // ROI { @@ -410,7 +408,6 @@ char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { inputTensors.size(), _outputNodeNames.data(), _outputNodeNames.size()); } - _outputNodeNames.size(); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 41176de..0394db3 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -13,8 +13,6 @@ Initializer() { SEG::DL_INIT_PARAM params_decoder; SEG::DL_RESULT res; std::vector resSam; - params_encoder.rectConfidenceThreshold = 0.1; - params_encoder.iouThreshold = 0.5; params_encoder.modelPath = "/home/amigo/Documents/repos/sam_onnx_ros/build/SAM_encoder.onnx"; params_encoder.imgSize = {1024, 1024}; @@ -42,8 +40,6 @@ void SegmentAnything(std::vector> &samSegmentors, const SEG::DL_INIT_PARAM ¶ms_decoder, const cv::Mat &img, std::vector &resSam, SEG::DL_RESULT &res) { - - SEG::MODEL_TYPE modelTypeRef = params_encoder.modelType; samSegmentors[0]->RunSession(img, resSam, modelTypeRef, res); From 80bd8181756aa208ff96d228f5c964766df93179 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 23 Sep 2025 20:13:49 +0200 Subject: [PATCH 25/35] made private member methods that were needed to be --- include/sam_inference.h | 9 ++++----- src/sam_inference.cpp | 12 ++++++++---- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/include/sam_inference.h b/include/sam_inference.h index 250e254..c3694f0 100644 --- a/include/sam_inference.h +++ b/include/sam_inference.h @@ -24,15 +24,14 @@ class SAM const char *RunSession(const cv::Mat &iImg, std::vector &oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT &result); - char *WarmUpSession(SEG::MODEL_TYPE modelType); +private: + + char *WarmUpSession_(SEG::MODEL_TYPE modelType); template - const char *TensorProcess(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, + const char *TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, N &blob, std::vector &inputNodeDims, SEG::MODEL_TYPE modelType, std::vector &oResult, Utils &utilities, SEG::DL_RESULT &result); - std::vector classes{}; - -private: Ort::Env _env; std::unique_ptr _session; bool _cudaEnable; diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 444163f..e8622ef 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -5,9 +5,12 @@ #define benchmark //#define ROI -SAM::SAM() {} +SAM::SAM() +{ +} -SAM::~SAM() { +SAM::~SAM() +{ // Clean up input/output node names for (auto &name : _inputNodeNames) { delete[] name; @@ -98,7 +101,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { .GetTensorTypeAndShapeInfo() .GetElementType(); - WarmUpSession(_modelType); + WarmUpSession_(_modelType); return RET_OK; } catch (const std::exception &e) { const char *str1 = "[SAM]:"; @@ -320,7 +323,8 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, return RET_OK; } -char *SAM::WarmUpSession(SEG::MODEL_TYPE _modelType) { +char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) +{ clock_t starttime_1 = clock(); Utils utilities; cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3); From ebe053989921b2eb674b6928c928307b68506a26 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Wed, 24 Sep 2025 15:39:03 +0200 Subject: [PATCH 26/35] Fixed structurre of CMakeLists and package.xml and logged with console bridge --- CMakeLists.txt | 132 +++++++++++++++++++++++++----------------- package.xml | 5 ++ src/sam_inference.cpp | 46 +++++++++------ 3 files changed, 112 insertions(+), 71 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 12ca294..eaec0e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,19 +3,17 @@ cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) # -------------- CMake Policies ------------------# -#add_compile_options(-Wall -Werror=all) -#add_compile_options(-Wextra -Werror=extra) +# add_compile_options(-Wall -Werror=all) +# add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_CXX_EXTENSIONS ON) -#set(CMAKE_INCLUDE_CURRENT_DIR ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + # -------------- OpenCV ------------------# find_package(OpenCV REQUIRED) -include_directories(${OpenCV_INCLUDE_DIRS}) - +find_package(console_bridge REQUIRED) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) @@ -26,94 +24,124 @@ include_directories(${ONNXRUNTIME_ROOT}/include) add_definitions(-DUSE_CUDA=1) include_directories(/usr/local/cuda/include) +# -------------- Models ------------------# +# TODO: Find proper folder Copy sam_.onnx file to the same folder of the executable file +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) + + find_package(catkin REQUIRED COMPONENTS - # roscpp - # tue_config - # tue_filesystem - # code_profiler - + rosconsole + console_bridge #onnxruntime_ros ) +find_package(console_bridge REQUIRED) + # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT # ------------------------------------------------------------------------------------------------ catkin_package( INCLUDE_DIRS include - #LIBRARIES ${PROJECT_NAME} - LIBRARIES sam_onnx_ros_core + LIBRARIES ${PROJECT_NAME}_lib CATKIN_DEPENDS - DEPENDS OpenCV + DEPENDS OpenCV console_bridge ) # ------------------------------------------------------------------------------------------------ # BUILD # ------------------------------------------------------------------------------------------------ -include_directories( - include - SYSTEM - ${OpenCV_INCLUDE_DIRS} - ${catkin_INCLUDE_DIRS} -) - -set(PROJECT_SOURCES - src/main.cpp +# Build core library +add_library(${PROJECT_NAME}_lib src/sam_inference.cpp src/segmentation.cpp src/utils.cpp ) -# Build core library (no main.cpp here) -add_library(sam_onnx_ros_core - src/sam_inference.cpp - src/segmentation.cpp - src/utils.cpp +target_include_directories(${PROJECT_NAME}_lib + PUBLIC + include + SYSTEM + ${OpenCV_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} + ${console_bridge_INCLUDE_DIRS} + ${ONNXRUNTIME_ROOT}/include ) -target_link_libraries(sam_onnx_ros_core + + +target_link_libraries(${PROJECT_NAME}_lib ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) -target_include_directories(sam_onnx_ros_core PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) # Main executable links the core lib -add_executable(${PROJECT_NAME} src/main.cpp) -target_link_libraries(${PROJECT_NAME} sam_onnx_ros_core) +add_executable(${PROJECT_NAME} + src/main.cpp +) -# Copy sam_.onnx file to the same folder of the executable file -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +target_link_libraries(${PROJECT_NAME} + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so +) + +# ------------------------------------------------------------------------------------------------ +# Install Targets +# ------------------------------------------------------------------------------------------------ + +install( + DIRECTORY include/ + DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION} +) -# Create folder name images in the same folder of the executable file -add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E make_directory ${CMAKE_CURRENT_BINARY_DIR}/images +install( + TARGETS + ${PROJECT_NAME}_lib + ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} ) -# # Enable testing +install( + TARGETS + ${PROJECT_NAME} + DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +# ------------------------------------------------------------------------------------------------ +# Testing +# ------------------------------------------------------------------------------------------------ if (CATKIN_ENABLE_TESTING) -# find_package(catkin_lint_cmake REQUIRED) -# catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") - # Utils unit tests (no models needed) + # Utils unit tests (no models needed) catkin_add_gtest(utils_tests test/test_utils.cpp) if(TARGET utils_tests) - target_link_libraries(utils_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) - target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + target_link_libraries( + utils_tests + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + GTest::gtest + GTest::gtest_main + ) + #target_include_directories(utils_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() # SAM integration-ish tests (may need models) catkin_add_gtest(sam_tests test/sam_test.cpp) if(TARGET sam_tests) - target_link_libraries(sam_tests sam_onnx_ros_core GTest::gtest_main ${catkin_LIBRARIES}) - target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) + target_link_libraries( + sam_tests + ${PROJECT_NAME}_lib + ${catkin_LIBRARIES} + GTest::gtest + GTest::gtest_main + ) + #target_include_directories(sam_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/include) endif() endif() - - - -#If you want to debug -# set(CMAKE_BUILD_TYPE Debug) -# set(CMAKE_CXX_FLAGS_DEBUG "-g") diff --git a/package.xml b/package.xml index 7d17b8d..2228902 100644 --- a/package.xml +++ b/package.xml @@ -12,13 +12,18 @@ BSD + catkin + libconsole-bridge-dev + libopencv-dev onnxruntime_ros + rosconsole libopencv-dev onnxruntime_ros + rosconsole catkin_lint_cmake diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index e8622ef..457dcb5 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,6 +1,8 @@ #include "sam_inference.h" #include "utils.h" + #include +#include #define benchmark //#define ROI @@ -50,7 +52,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { if (result) { Ret = "[SAM]:Your model path is error.Change your model path without " "chinese characters."; - std::cout << Ret << std::endl; + CONSOLE_BRIDGE_logWarn("%s", Ret); return Ret; } try { @@ -109,7 +111,7 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { std::string str_result = std::string(str1) + std::string(str2); char *merged = new char[str_result.length() + 1]; std::strcpy(merged, str_result.c_str()); - std::cout << merged << std::endl; + CONSOLE_BRIDGE_logWarn("%s", merged); delete[] merged; return "[SAM]:Create _session failed."; } @@ -190,13 +192,17 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM_encoder(CUDA)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_encoder(CUDA)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } else { - std::cout << "[SAM_encoder(CPU)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_encoder(CPU)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } #endif // benchmark @@ -305,20 +311,24 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, double post_process_time = (double)(starttime_4 - starttime_3) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM_decoder(CUDA)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_decoder(CUDA)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } else { - std::cout << "[SAM_decoder(CPU)]: " << pre_process_time << "ms pre-process, " - << process_time << "ms inference, " << post_process_time - << "ms post-process." << std::endl; + CONSOLE_BRIDGE_logInform("[SAM_decoder(CPU)]: %.2fms pre-process, %.2fms inference, " + "%.2fms post-process.", + pre_process_time, process_time, + post_process_time + ); } #endif // benchmark break; } default: - std::cout << "[SAM]: " << "Not support model type." << std::endl; + CONSOLE_BRIDGE_logWarn("[SAM]: " "Not support model type."); } return RET_OK; } @@ -350,8 +360,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time - << " ms. " << std::endl; + CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time); } break; } @@ -417,8 +426,7 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) double post_process_time = (double)(starttime_4 - starttime_1) / CLOCKS_PER_SEC * 1000; if (_cudaEnable) { - std::cout << "[SAM(CUDA)]: " << "Cuda warm-up cost " << post_process_time - << " ms. " << std::endl; + CONSOLE_BRIDGE_logInform("[SAM(CUDA)]: Cuda warm-up cost %.2f ms.", post_process_time); } break; From 0b63cefb61adfec2b29539b1caeda0e93f58c96f Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Mon, 29 Sep 2025 13:27:58 +0200 Subject: [PATCH 27/35] updated CMakeLists, included .hpp suffix and sam_onnx_ros include dir and configuration .hpp.in file --- CMakeLists.txt | 48 +++++++++++-------- include/sam_onnx_ros/config.hpp.in | 7 +++ .../{dl_types.h => sam_onnx_ros/dl_types.hpp} | 0 .../sam_inference.hpp} | 4 +- .../segmentation.hpp} | 3 +- include/{utils.h => sam_onnx_ros/utils.hpp} | 4 +- src/main.cpp | 5 +- src/sam_inference.cpp | 21 ++++---- src/segmentation.cpp | 2 +- src/utils.cpp | 4 +- test/sam_test.cpp | 7 +-- test/test_utils.cpp | 3 +- 12 files changed, 61 insertions(+), 47 deletions(-) create mode 100644 include/sam_onnx_ros/config.hpp.in rename include/{dl_types.h => sam_onnx_ros/dl_types.hpp} (100%) rename include/{sam_inference.h => sam_onnx_ros/sam_inference.hpp} (96%) rename include/{segmentation.h => sam_onnx_ros/segmentation.hpp} (91%) rename include/{utils.h => sam_onnx_ros/utils.hpp} (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index eaec0e5..fcda779 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,26 +3,19 @@ cmake_minimum_required(VERSION 3.14) project(sam_onnx_ros) # -------------- CMake Policies ------------------# -# add_compile_options(-Wall -Werror=all) -# add_compile_options(-Wextra -Werror=extra) +add_compile_options(-Wall -Werror=all) +add_compile_options(-Wextra -Werror=extra) # -------------- Support C++17 for using filesystem ------------------# set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -# -------------- OpenCV ------------------# -find_package(OpenCV REQUIRED) -find_package(console_bridge REQUIRED) # -------------- ONNXRuntime ------------------# set(ONNXRUNTIME_VERSION 1.21.0) set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1") include_directories(${ONNXRUNTIME_ROOT}/include) -# -------------- Cuda ------------------# -add_definitions(-DUSE_CUDA=1) -include_directories(/usr/local/cuda/include) # -------------- Models ------------------# # TODO: Find proper folder Copy sam_.onnx file to the same folder of the executable file @@ -37,7 +30,20 @@ find_package(catkin REQUIRED #onnxruntime_ros ) -find_package(console_bridge REQUIRED) +find_package(OpenCV REQUIRED) + +# -------------- Cuda ------------------# +add_definitions(-DUSE_CUDA=1) +include_directories(/usr/local/cuda/include) + +set(${PROJECT_NAME}_CUDA_ENABLED ${onnxruntime_ros_CUDA_ENABLED}) +if(onnxruntime_ros_CUDA_ENABLED) + find_package(CUDAToolkit REQUIRED) +endif() + +configure_file(include/${PROJECT_NAME}/config.hpp.in ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp) +# add_custom_target(generate_config_hpp +# DEPENDS ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp # ------------------------------------------------------------------------------------------------ # CATKIN EXPORT @@ -45,7 +51,7 @@ find_package(console_bridge REQUIRED) catkin_package( INCLUDE_DIRS include - LIBRARIES ${PROJECT_NAME}_lib + LIBRARIES ${PROJECT_NAME} CATKIN_DEPENDS DEPENDS OpenCV console_bridge ) @@ -54,14 +60,14 @@ catkin_package( # BUILD # ------------------------------------------------------------------------------------------------ -# Build core library -add_library(${PROJECT_NAME}_lib +# Build library +add_library(${PROJECT_NAME} src/sam_inference.cpp src/segmentation.cpp src/utils.cpp ) -target_include_directories(${PROJECT_NAME}_lib +target_include_directories(${PROJECT_NAME} PUBLIC include SYSTEM @@ -72,19 +78,19 @@ target_include_directories(${PROJECT_NAME}_lib ) -target_link_libraries(${PROJECT_NAME}_lib +target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) # Main executable links the core lib -add_executable(${PROJECT_NAME} +add_executable(test_${PROJECT_NAME} src/main.cpp ) -target_link_libraries(${PROJECT_NAME} - ${PROJECT_NAME}_lib +target_link_libraries(test_${PROJECT_NAME} + ${PROJECT_NAME} ${catkin_LIBRARIES} ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) @@ -100,7 +106,7 @@ install( install( TARGETS - ${PROJECT_NAME}_lib + ${PROJECT_NAME} ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} @@ -116,8 +122,8 @@ install( # Testing # ------------------------------------------------------------------------------------------------ if (CATKIN_ENABLE_TESTING) - find_package(catkin_lint_cmake REQUIRED) - catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + #find_package(catkin_lint_cmake REQUIRED) + #catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") # Utils unit tests (no models needed) catkin_add_gtest(utils_tests test/test_utils.cpp) diff --git a/include/sam_onnx_ros/config.hpp.in b/include/sam_onnx_ros/config.hpp.in new file mode 100644 index 0000000..2794619 --- /dev/null +++ b/include/sam_onnx_ros/config.hpp.in @@ -0,0 +1,7 @@ +#ifndef YOLO_ONNX_SAM_CONFIG_HPP_ +#define YOLO_ONNX_SAM_CONFIG_HPP_ + +//Set which version of the Tree Interface to use +#define YOLO_ONNX_SAM_CUDA_ENABLED @onnx_sam_ros_CUDA_ENABLED@ + +#endif //#define YOLO_ONNX_SAM_CONFIG_HPP_ diff --git a/include/dl_types.h b/include/sam_onnx_ros/dl_types.hpp similarity index 100% rename from include/dl_types.h rename to include/sam_onnx_ros/dl_types.hpp diff --git a/include/sam_inference.h b/include/sam_onnx_ros/sam_inference.hpp similarity index 96% rename from include/sam_inference.h rename to include/sam_onnx_ros/sam_inference.hpp index c3694f0..d098d4a 100644 --- a/include/sam_inference.h +++ b/include/sam_onnx_ros/sam_inference.hpp @@ -4,14 +4,14 @@ #define RET_OK nullptr #include -#include #include #include -#include "utils.h" #ifdef USE_CUDA #include #endif +#include "sam_onnx_ros/utils.hpp" + class SAM { public: diff --git a/include/segmentation.h b/include/sam_onnx_ros/segmentation.hpp similarity index 91% rename from include/segmentation.h rename to include/sam_onnx_ros/segmentation.hpp index 83102e0..c7ebfd0 100644 --- a/include/segmentation.h +++ b/include/sam_onnx_ros/segmentation.hpp @@ -1,9 +1,8 @@ #ifndef SEGMENTATION_H #define SEGMENTATION_H -#include +#include "sam_onnx_ros/sam_inference.hpp" -#include "sam_inference.h" std::tuple>, SEG::_DL_INIT_PARAM, SEG::_DL_INIT_PARAM, SEG::DL_RESULT, std::vector> Initializer(); void SegmentAnything(std::vector>& samSegmentors, const SEG::_DL_INIT_PARAM& params_encoder, const SEG::_DL_INIT_PARAM& params_decoder, const cv::Mat& img, std::vector &resSam, diff --git a/include/utils.h b/include/sam_onnx_ros/utils.hpp similarity index 98% rename from include/utils.h rename to include/sam_onnx_ros/utils.hpp index a471512..e8084a1 100644 --- a/include/utils.h +++ b/include/sam_onnx_ros/utils.hpp @@ -3,15 +3,15 @@ #define RET_OK nullptr -#include #include #include #include "onnxruntime_cxx_api.h" -#include "dl_types.h" #ifdef USE_CUDA #include #endif +#include "sam_onnx_ros/dl_types.hpp" + class Utils { public: diff --git a/src/main.cpp b/src/main.cpp index cd0f9dd..ed09788 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,9 +1,8 @@ -#include "segmentation.h" -#include -#include #include #include +#include "sam_onnx_ros/segmentation.hpp" + int main() { // Running inference diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index 457dcb5..ccfb2b3 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -1,9 +1,9 @@ -#include "sam_inference.h" -#include "utils.h" - #include #include +#include "sam_onnx_ros/sam_inference.hpp" +#include "sam_onnx_ros/utils.hpp" + #define benchmark //#define ROI @@ -97,11 +97,11 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { auto input_shape = _session->GetInputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_shape = - _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); - auto output_type = _session->GetOutputTypeInfo(0) - .GetTensorTypeAndShapeInfo() - .GetElementType(); + // auto output_shape = + // _session->GetOutputTypeInfo(0).GetTensorTypeAndShapeInfo().GetShape(); + // auto output_type = _session->GetOutputTypeInfo(0) + // .GetTensorTypeAndShapeInfo() + // .GetElementType(); WarmUpSession_(_modelType); return RET_OK; @@ -251,9 +251,10 @@ const char *SAM::TensorProcess_(clock_t &starttime_1, const cv::Mat &iImg, embeddings.data(), // Use the embeddings from the encoder embeddings.size(), // Total number of elements decoderInputDims.data(), decoderInputDims.size()); + // Use center of bounding box as foreground point - float centerX = box.x + box.width / 2.0; - float centerY = box.y + box.height / 2.0; + // float centerX = box.x + box.width / 2.0; + // float centerY = box.y + box.height / 2.0; // Convert bounding box to points std::vector pointCoords = { diff --git a/src/segmentation.cpp b/src/segmentation.cpp index 0394db3..ad66eaa 100644 --- a/src/segmentation.cpp +++ b/src/segmentation.cpp @@ -1,4 +1,4 @@ -#include "segmentation.h" +#include "sam_onnx_ros/segmentation.hpp" std::tuple>, SEG::DL_INIT_PARAM, SEG::DL_INIT_PARAM, SEG::DL_RESULT, std::vector> diff --git a/src/utils.cpp b/src/utils.cpp index 643dba4..8af08a3 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,5 +1,5 @@ -#include "utils.h" -#include // for guided filter +#include "sam_onnx_ros/utils.hpp" + //#define LOGGING // Constructor diff --git a/test/sam_test.cpp b/test/sam_test.cpp index cd54d7e..5a64126 100644 --- a/test/sam_test.cpp +++ b/test/sam_test.cpp @@ -1,9 +1,10 @@ #include #include #include -#include "segmentation.h" -#include "sam_inference.h" -#include "dl_types.h" + +#include "sam_onnx_ros/sam_inference.hpp" +#include "sam_onnx_ros/dl_types.hpp" +#include "sam_onnx_ros/segmentation.hpp" // This file contains higher-level (integration-ish) tests. // They cover object/session creation and a full pipeline run using synthetic images. diff --git a/test/test_utils.cpp b/test/test_utils.cpp index 92d20ab..bde7145 100644 --- a/test/test_utils.cpp +++ b/test/test_utils.cpp @@ -1,6 +1,7 @@ #include #include -#include "utils.h" + +#include "sam_onnx_ros/utils.hpp" // This file contains small, focused unit tests for Utils. // We verify image preprocessing (channel conversion, aspect-preserving resize, padding) From b7a833892fbce11153f4778fbf9fddcf2da41f7f Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 30 Sep 2025 20:16:52 +0200 Subject: [PATCH 28/35] updated CMakeLists rosconsole bridge --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fcda779..ca92d96 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,10 +26,10 @@ configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.on find_package(catkin REQUIRED COMPONENTS rosconsole - console_bridge + #onnxruntime_ros ) - +find_package(console_bridge REQUIRED) find_package(OpenCV REQUIRED) # -------------- Cuda ------------------# From da1613388f961b5c87c10a9af0aadde1df40b915 Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 21 Oct 2025 20:57:34 +0200 Subject: [PATCH 29/35] updated CMakeLists for onnxruntime ros package and also .vscode configs --- .vscode/c_cpp_properties_json | 23 ++++++++ .vscode/launch.json | 94 ++++++++++++++++++++++++++++++ .vscode/settings.json | 77 ++++++++++++++++++++++++ .vscode/tasks.json | 94 ++++++++++++++++++++++++++++++ CMakeLists.txt | 79 ++++++++++++++----------- include/sam_onnx_ros/config.hpp.in | 11 ++-- 6 files changed, 340 insertions(+), 38 deletions(-) create mode 100644 .vscode/c_cpp_properties_json create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 .vscode/tasks.json diff --git a/.vscode/c_cpp_properties_json b/.vscode/c_cpp_properties_json new file mode 100644 index 0000000..7b0dd00 --- /dev/null +++ b/.vscode/c_cpp_properties_json @@ -0,0 +1,23 @@ +{ + "configurations": [ + { + "name": "Linux", + "includePath": [ + "${workspaceFolder}/include/**", + "${workspaceFolder}/../hero_sam/onnxruntime-linux-x64-gpu-1.21.1/include", + "/usr/local/cuda/include", + "/usr/include/opencv4/**", + "/opt/ros/noetic/include/**" + ], + "defines": [ + + ], + "compilerPath": "/usr/bin/gcc", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "linux-gcc-x64", + "configurationProvider": "ms-vscode.cmake-tools" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..0b5ef6c --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,94 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Debug SAM Model", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/devel/lib/sam_onnx_ros/test_sam_onnx_ros", // Path to the executable + "args": [], // Add any command-line arguments for your program here + "stopAtEntry": false, + "cwd": "${workspaceFolder}/build", // Set the working directory + "environment": [], + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ], + "preLaunchTask": "build-sam-project" // Ensure the project is built before debugging + }, + { + "name": "Debug GTest (pick binary)", + "type": "cppdbg", + "request": "launch", + "program": "${input:testBinary}", // pick utils_tests or sam_tests + "args": [ + //"--gtest_filter=${input:gtestFilter}" // uncomment to filter tests + ], + "cwd": "${workspaceFolder}", // cwd doesn’t matter if SAM_MODELS_DIR is set + "environment": [ + { + "name": "SAM_MODELS_DIR", + "value": "${env:SAM_MODELS_DIR}" + } // or hardcode a path here + ], + "stopAtEntry": false, + "externalConsole": false, + "MIMode": "gdb", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + } + ] + }, + { + "name": "Run all tests (ctest)", + "type": "cppdbg", + "request": "launch", + "program": "/usr/bin/ctest", + "args": [ + "--test-dir", + "/home/amigo/ros/noetic/system/build/sam_onnx_ros", + "-R", + "(sam_tests|utils_tests)", + "--output-on-failure", + "-j", + "4" + ], + "cwd": "/home/amigo/ros/noetic/system/build/sam_onnx_ros", + "environment": [ + { + "name": "SAM_MODELS_DIR", + "value": "${env:SAM_MODELS_DIR}" + } + ], + "stopAtEntry": false, + "externalConsole": false, + "MIMode": "gdb" + // Note: ctest launching isn't great for step-debugging individual tests. + } + ], + "inputs": [ + { + "id": "testBinary", + "type": "pickString", + "description": "Select gtest executable", + "options": [ + "/home/amigo/ros/noetic/system/devel/.private/sam_onnx_ros/lib/sam_onnx_ros/utils_tests", + "/home/amigo/ros/noetic/system/devel/.private/sam_onnx_ros/lib/sam_onnx_ros/sam_tests" + ] + }, + { + "id": "gtestFilter", + "type": "promptString", + "description": "gtest filter (e.g. Utils* or SamInferenceTest.PreProcess*)", + "default": "*" + } + ], +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..57bd363 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,77 @@ +{ + "files.associations": { + "iostream": "cpp", + "cctype": "cpp", + "clocale": "cpp", + "cmath": "cpp", + "cstdarg": "cpp", + "cstddef": "cpp", + "cstdio": "cpp", + "cstdlib": "cpp", + "cstring": "cpp", + "ctime": "cpp", + "cwchar": "cpp", + "cwctype": "cpp", + "array": "cpp", + "atomic": "cpp", + "strstream": "cpp", + "bit": "cpp", + "*.tcc": "cpp", + "bitset": "cpp", + "chrono": "cpp", + "codecvt": "cpp", + "complex": "cpp", + "condition_variable": "cpp", + "cstdint": "cpp", + "deque": "cpp", + "list": "cpp", + "map": "cpp", + "set": "cpp", + "unordered_map": "cpp", + "unordered_set": "cpp", + "vector": "cpp", + "exception": "cpp", + "algorithm": "cpp", + "functional": "cpp", + "iterator": "cpp", + "memory": "cpp", + "memory_resource": "cpp", + "numeric": "cpp", + "optional": "cpp", + "random": "cpp", + "ratio": "cpp", + "regex": "cpp", + "string": "cpp", + "string_view": "cpp", + "system_error": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "fstream": "cpp", + "initializer_list": "cpp", + "iomanip": "cpp", + "iosfwd": "cpp", + "istream": "cpp", + "limits": "cpp", + "mutex": "cpp", + "new": "cpp", + "ostream": "cpp", + "shared_mutex": "cpp", + "sstream": "cpp", + "stdexcept": "cpp", + "streambuf": "cpp", + "thread": "cpp", + "cfenv": "cpp", + "cinttypes": "cpp", + "typeindex": "cpp", + "typeinfo": "cpp", + "variant": "cpp", + "compare": "cpp", + "concepts": "cpp", + "numbers": "cpp", + "semaphore": "cpp", + "stop_token": "cpp", + "*.txx": "cpp", + "filesystem": "cpp" + } +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..c610e53 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,94 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build-sam-project", + "dependsOrder": "sequence", + "dependsOn": ["configure-sam", "build-sam-binary"], + "problemMatcher": ["$gcc"], + "group": "build" + }, + { + "label": "build-pipeline-project", + "dependsOrder": "sequence", + "dependsOn": ["configure-pipeline", "build-pipeline-binary"], + "problemMatcher": ["$gcc"], + "group": "build" + }, + { + "label": "build-yolo-project", + "dependsOrder": "sequence", + "dependsOn": ["configure-yolo", "build-yolo-binary"], + "problemMatcher": ["$gcc"], + "group": "build" + }, + { + "label": "configure-sam", + "type": "shell", + "command": "cmake", + "args": [ + "-DCMAKE_BUILD_TYPE=Debug", + "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", + "-S", "${workspaceFolder}/", + "-B", "${workspaceFolder}/build" + ], + "problemMatcher": ["$gcc"] + }, + { + "label": "configure-pipeline", + "type": "shell", + "command": "cmake", + "args": [ + "-DCMAKE_BUILD_TYPE=Debug", + "-S", "${workspaceFolder}/pipeline", + "-B", "${workspaceFolder}/pipeline/build" + ], + "problemMatcher": ["$gcc"] + }, + { + "label": "configure-yolo", + "type": "shell", + "command": "cmake", + "args": [ + "-DCMAKE_BUILD_TYPE=Debug", + "-S", "${workspaceFolder}/yolo_inference", + "-B", "${workspaceFolder}/yolo_inference/build" + ], + "problemMatcher": ["$gcc"] + }, + { + "label": "build-sam-binary", + "type": "shell", + "command": "cmake", + "args": [ + "--build", + "${workspaceFolder}/build", + "--config", "Debug" + ], + "problemMatcher": ["$gcc"] + }, + { + "label": "build-pipeline-binary", + "type": "shell", + "command": "cmake", + "args": [ + "--build", + "${workspaceFolder}/pipeline/build", + "--config", "Debug" + ], + "problemMatcher": ["$gcc"] + }, + { + "label": "build-yolo-binary", + "type": "shell", + "command": "cmake", + "args": [ + "--build", + "${workspaceFolder}/yolo_inference/build", + "--config", "Debug" + ], + "problemMatcher": ["$gcc"] + } + + ] +} diff --git a/CMakeLists.txt b/CMakeLists.txt index ca92d96..753bb2b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14) +cmake_minimum_required(VERSION 3.5) project(sam_onnx_ros) @@ -11,36 +11,52 @@ set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) -# -------------- ONNXRuntime ------------------# -set(ONNXRUNTIME_VERSION 1.21.0) -set(ONNXRUNTIME_ROOT "${CMAKE_CURRENT_SOURCE_DIR}/../hero_sam.bak/onnxruntime-linux-x64-gpu-1.21.1") -include_directories(${ONNXRUNTIME_ROOT}/include) - - # -------------- Models ------------------# -# TODO: Find proper folder Copy sam_.onnx file to the same folder of the executable file -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) -configure_file(~/Documents/repos/hero_sam.bak/sam_inference/model/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +set(SAM_MODELS_DIR "$ENV{HOME}/Documents/repos/hero_sam.bak/sam_inference/model" CACHE PATH "SAM models dir") +if (EXISTS "${SAM_MODELS_DIR}/SAM_mask_decoder.onnx") + configure_file(${SAM_MODELS_DIR}/SAM_mask_decoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_mask_decoder.onnx COPYONLY) +endif() +if (EXISTS "${SAM_MODELS_DIR}/SAM_encoder.onnx") + configure_file(${SAM_MODELS_DIR}/SAM_encoder.onnx ${CMAKE_CURRENT_BINARY_DIR}/SAM_encoder.onnx COPYONLY) +endif() +# ------------------------------------------------------------------------------------------------ +# CATKIN EXPORT +# ------------------------------------------------------------------------------------------------ find_package(catkin REQUIRED COMPONENTS rosconsole - - #onnxruntime_ros + onnxruntime_ros ) find_package(console_bridge REQUIRED) find_package(OpenCV REQUIRED) -# -------------- Cuda ------------------# -add_definitions(-DUSE_CUDA=1) -include_directories(/usr/local/cuda/include) - +# -------------- CUDA (optional, version-safe) ------------------# set(${PROJECT_NAME}_CUDA_ENABLED ${onnxruntime_ros_CUDA_ENABLED}) -if(onnxruntime_ros_CUDA_ENABLED) - find_package(CUDAToolkit REQUIRED) +set(${PROJECT_NAME}_CUDA_INCLUDE_DIRS "") +set(${PROJECT_NAME}_CUDA_TARGET_LINK_LIBRARIES "") + +if(${PROJECT_NAME}_CUDA_ENABLED) + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.17") + find_package(CUDAToolkit QUIET) + if(CUDAToolkit_FOUND) + set(${PROJECT_NAME}_CUDA_INCLUDE_DIRS "${CUDAToolkit_INCLUDE_DIRS}") + list(APPEND ${PROJECT_NAME}_CUDA_TARGET_LINK_LIBRARIES CUDA::cudart) + else() + message(STATUS "CUDAToolkit not found; continuing without explicit cudart link") + endif() + else() + message(STATUS "CMake ${CMAKE_VERSION} < 3.17: skipping find_package(CUDAToolkit); trying /usr/local/cuda") + list(APPEND ${PROJECT_NAME}_CUDA_INCLUDE_DIRS /usr/local/cuda/include) + find_library(CUDART_LIB cudart HINTS /usr/local/cuda/lib64 /usr/lib/x86_64-linux-gnu) + if(CUDART_LIB) + list(APPEND ${PROJECT_NAME}_CUDA_TARGET_LINK_LIBRARIES ${CUDART_LIB}) + endif() + endif() endif() + configure_file(include/${PROJECT_NAME}/config.hpp.in ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp) # add_custom_target(generate_config_hpp # DEPENDS ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION}/${PROJECT_NAME}/config.hpp @@ -52,7 +68,7 @@ configure_file(include/${PROJECT_NAME}/config.hpp.in ${CATKIN_DEVEL_PREFIX}/${CA catkin_package( INCLUDE_DIRS include LIBRARIES ${PROJECT_NAME} - CATKIN_DEPENDS + CATKIN_DEPENDS rosconsole onnxruntime_ros DEPENDS OpenCV console_bridge ) @@ -62,37 +78,34 @@ catkin_package( # Build library add_library(${PROJECT_NAME} - src/sam_inference.cpp - src/segmentation.cpp - src/utils.cpp + src/sam_inference.cpp + src/segmentation.cpp + src/utils.cpp ) target_include_directories(${PROJECT_NAME} PUBLIC include + ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION} SYSTEM + ${${PROJECT_NAME}_CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS} - ${console_bridge_INCLUDE_DIRS} - ${ONNXRUNTIME_ROOT}/include ) - target_link_libraries(${PROJECT_NAME} - ${OpenCV_LIBS} - ${catkin_LIBRARIES} - ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so + ${OpenCV_LIBS} + ${catkin_LIBRARIES} + ${${PROJECT_NAME}_CUDA_TARGET_LINK_LIBRARIES} ) -# Main executable links the core lib +# Main executable links the core lib but its used for testing (the lib is used on ed sensor integration) add_executable(test_${PROJECT_NAME} src/main.cpp ) target_link_libraries(test_${PROJECT_NAME} ${PROJECT_NAME} - ${catkin_LIBRARIES} - ${ONNXRUNTIME_ROOT}/lib/libonnxruntime.so ) # ------------------------------------------------------------------------------------------------ @@ -122,8 +135,8 @@ install( # Testing # ------------------------------------------------------------------------------------------------ if (CATKIN_ENABLE_TESTING) - #find_package(catkin_lint_cmake REQUIRED) - #catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") + find_package(catkin_lint_cmake REQUIRED) + catkin_add_catkin_lint_test("-W2 --ignore HEADER_OUTSIDE_PACKAGE_INCLUDE_PATH") # Utils unit tests (no models needed) catkin_add_gtest(utils_tests test/test_utils.cpp) diff --git a/include/sam_onnx_ros/config.hpp.in b/include/sam_onnx_ros/config.hpp.in index 2794619..3601585 100644 --- a/include/sam_onnx_ros/config.hpp.in +++ b/include/sam_onnx_ros/config.hpp.in @@ -1,7 +1,8 @@ -#ifndef YOLO_ONNX_SAM_CONFIG_HPP_ -#define YOLO_ONNX_SAM_CONFIG_HPP_ +#ifndef SAM_ONNX_ROS_CONFIG_HPP_ +#define SAM_ONNX_ROS_CONFIG_HPP_ -//Set which version of the Tree Interface to use -#define YOLO_ONNX_SAM_CUDA_ENABLED @onnx_sam_ros_CUDA_ENABLED@ +#define SAM_ONNX_FALSE 0 +#define SAM_ONNX_TRUE 1 +#define SAM_ONNX_ROS_CUDA_ENABLED SAM_ONNX_@sam_onnx_ros_CUDA_ENABLED@ -#endif //#define YOLO_ONNX_SAM_CONFIG_HPP_ +#endif //#define SAM_ONNX_ROS_CONFIG_HPP_ From 85b8df2a575baf9ce78a03a9d17930c7862406ed Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Wed, 22 Oct 2025 07:51:58 +0200 Subject: [PATCH 30/35] fix(cmake): opencv libraries --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 753bb2b..aa94a54 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -94,7 +94,7 @@ target_include_directories(${PROJECT_NAME} ) target_link_libraries(${PROJECT_NAME} - ${OpenCV_LIBS} + ${OpenCV_LIBRARIES} ${catkin_LIBRARIES} ${${PROJECT_NAME}_CUDA_TARGET_LINK_LIBRARIES} ) From a84c4f652d7a956dc99b9cb59f8dc6fcaeedc793 Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Wed, 22 Oct 2025 08:36:36 +0200 Subject: [PATCH 31/35] Add missing linking to opencv libs for test --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index aa94a54..575b8f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -157,6 +157,7 @@ if (CATKIN_ENABLE_TESTING) target_link_libraries( sam_tests ${PROJECT_NAME}_lib + ${OpenCV_LIBRARIES} ${catkin_LIBRARIES} GTest::gtest GTest::gtest_main From 9eced3511c75b383d82edb7ad2097f4aa033e09d Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Wed, 22 Oct 2025 08:44:43 +0200 Subject: [PATCH 32/35] Correct linking of tests --- CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 575b8f6..d812544 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,7 +87,7 @@ target_include_directories(${PROJECT_NAME} PUBLIC include ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION} - SYSTEM + SYSTEM ${${PROJECT_NAME}_CUDA_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS} ${catkin_INCLUDE_DIRS} @@ -143,7 +143,7 @@ if (CATKIN_ENABLE_TESTING) if(TARGET utils_tests) target_link_libraries( utils_tests - ${PROJECT_NAME}_lib + ${PROJECT_NAME} ${catkin_LIBRARIES} GTest::gtest GTest::gtest_main @@ -156,7 +156,7 @@ if (CATKIN_ENABLE_TESTING) if(TARGET sam_tests) target_link_libraries( sam_tests - ${PROJECT_NAME}_lib + ${PROJECT_NAME} ${OpenCV_LIBRARIES} ${catkin_LIBRARIES} GTest::gtest From d9c168445e86f94e7b4414a4d0c70256bcf1795a Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Sat, 25 Oct 2025 13:08:21 +0200 Subject: [PATCH 33/35] Checkpoint best working version --- .vscode/launch.json | 4 +- .vscode/tasks.json | 5 ++- src/main.cpp | 2 +- src/sam_inference.cpp | 79 +++++++++++++++++++--------------------- src/utils.cpp | 85 +++++++++++++++++++++++-------------------- 5 files changed, 91 insertions(+), 84 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0b5ef6c..3c7955e 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,7 +5,7 @@ "name": "Debug SAM Model", "type": "cppdbg", "request": "launch", - "program": "${workspaceFolder}/build/devel/lib/sam_onnx_ros/test_sam_onnx_ros", // Path to the executable + "program": "/home/amigo/ros/noetic/system/devel/lib/sam_onnx_ros/test11_sam_onnx_ros", // Path to the executable "args": [], // Add any command-line arguments for your program here "stopAtEntry": false, "cwd": "${workspaceFolder}/build", // Set the working directory @@ -19,7 +19,7 @@ "ignoreFailures": true } ], - "preLaunchTask": "build-sam-project" // Ensure the project is built before debugging + //"preLaunchTask": "build-sam-project" // Ensure the project is built before debugging }, { "name": "Debug GTest (pick binary)", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index c610e53..1f85ca1 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -52,7 +52,9 @@ "args": [ "-DCMAKE_BUILD_TYPE=Debug", "-S", "${workspaceFolder}/yolo_inference", - "-B", "${workspaceFolder}/yolo_inference/build" + "-B", "${workspaceFolder}/yolo_inference/build", + "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", + ], "problemMatcher": ["$gcc"] }, @@ -74,6 +76,7 @@ "args": [ "--build", "${workspaceFolder}/pipeline/build", + "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", "--config", "Debug" ], "problemMatcher": ["$gcc"] diff --git a/src/main.cpp b/src/main.cpp index ed09788..b9e3240 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -13,7 +13,7 @@ int main() SEG::DL_RESULT res; std::tie(samSegmentors, params_encoder, params_decoder, res, resSam) = Initializer(); std::filesystem::path current_path = std::filesystem::current_path(); - std::filesystem::path imgs_path = "/home/amigo/Documents/repos/hero_sam.bak/sam_inference/build/images"; // current_path / <- you could use + std::filesystem::path imgs_path = "/home/amigo/Documents/repos/yolo_onnx_ros/build/images"; // current_path / <- you could use for (auto &i : std::filesystem::directory_iterator(imgs_path)) { if (i.path().extension() == ".jpg" || i.path().extension() == ".png" || i.path().extension() == ".jpeg") diff --git a/src/sam_inference.cpp b/src/sam_inference.cpp index ccfb2b3..f2fd975 100644 --- a/src/sam_inference.cpp +++ b/src/sam_inference.cpp @@ -5,7 +5,7 @@ #include "sam_onnx_ros/utils.hpp" #define benchmark -//#define ROI +// #define ROI SAM::SAM() { @@ -117,30 +117,29 @@ const char *SAM::CreateSession(SEG::DL_INIT_PARAM &iParams) { } } -const char *SAM::RunSession(const cv::Mat &iImg, - std::vector &oResult, - SEG::MODEL_TYPE _modelType, SEG::DL_RESULT &result) -{ -#ifdef benchmark - clock_t starttime_1 = clock(); -#endif // benchmark - Utils utilities; - const char *Ret = RET_OK; - cv::Mat processedImg; - utilities.PreProcess(iImg, _imgSize, processedImg); - float *blob = new float[processedImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - std::vector inputNodeDims; - if (_modelType == SEG::SAM_SEGMENT_ENCODER) { - // NCHW: H = imgSize[1], W = imgSize[0] - inputNodeDims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; - } else if (_modelType == SEG::SAM_SEGMENT_DECODER) { - inputNodeDims = {1, 256, 64, 64}; - } - TensorProcess_(starttime_1, iImg, blob, inputNodeDims, _modelType, oResult, - utilities, result); - - return Ret; +const char* SAM::RunSession(const cv::Mat& iImg, std::vector& oResult, SEG::MODEL_TYPE modelType, SEG::DL_RESULT& result) { + #ifdef benchmark + clock_t starttime_1 = clock(); + #endif + Utils utilities; + const char* Ret = RET_OK; + cv::Mat processedImg; + utilities.PreProcess(iImg, _imgSize, processedImg); + + if (modelType < 4) { + float* blob = new float[processedImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + std::vector inputNodeDims; + if (modelType == SEG::SAM_SEGMENT_ENCODER) { + // NCHW with H=imgSize[1], W=imgSize[0] // FIX + inputNodeDims = { 1, 3, _imgSize.at(1), _imgSize.at(0) }; // FIX + } else if (modelType == SEG::SAM_SEGMENT_DECODER) { + inputNodeDims = { 1, 256, 64, 64 }; + } + TensorProcess_(starttime_1, iImg, blob, inputNodeDims, modelType, oResult, utilities, result); + } + // ...existing code... + return Ret; } template @@ -341,21 +340,19 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) cv::Mat iImg = cv::Mat(cv::Size(_imgSize.at(0), _imgSize.at(1)), CV_8UC3); cv::Mat processedImg; utilities.PreProcess(iImg, _imgSize, processedImg); - - float *blob = new float[iImg.total() * 3]; - utilities.BlobFromImage(processedImg, blob); - - // NCHW: H = imgSize[1], W = imgSize[0] - std::vector SAM_input_node_dims = {1, 3, _imgSize.at(1), _imgSize.at(0)}; - switch (_modelType) { - case SEG::SAM_SEGMENT_ENCODER: { - Ort::Value input_tensor = Ort::Value::CreateTensor( - Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), blob, - 3 * _imgSize.at(0) * _imgSize.at(1), SAM_input_node_dims.data(), - SAM_input_node_dims.size()); - auto output_tensors = - _session->Run(_options, _inputNodeNames.data(), &input_tensor, 1, - _outputNodeNames.data(), _outputNodeNames.size()); + if (_modelType < 4) { + float* blob = new float[iImg.total() * 3]; + utilities.BlobFromImage(processedImg, blob); + // NCHW: H=imgSize[1], W=imgSize[0] // FIX + std::vector SAM_input_node_dims = { 1, 3, _imgSize.at(1), _imgSize.at(0) }; // FIX + switch (_modelType) { + case SEG::SAM_SEGMENT_ENCODER: { + Ort::Value input_tensor = Ort::Value::CreateTensor( + Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU), + blob, 3 * _imgSize.at(0) * _imgSize.at(1), + SAM_input_node_dims.data(), SAM_input_node_dims.size()); + auto output_tensors = _session->Run(_options, _inputNodeNames.data(), &input_tensor, 1, + _outputNodeNames.data(), _outputNodeNames.size()); delete[] blob; clock_t starttime_4 = clock(); double post_process_time = @@ -433,6 +430,6 @@ char *SAM::WarmUpSession_(SEG::MODEL_TYPE _modelType) break; } } - + } return RET_OK; } diff --git a/src/utils.cpp b/src/utils.cpp index 8af08a3..3e1f449 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,6 +1,6 @@ #include "sam_onnx_ros/utils.hpp" -//#define LOGGING +// #define LOGGING // Constructor Utils::Utils() @@ -12,29 +12,26 @@ Utils::~Utils() { } -char *Utils::PreProcess(const cv::Mat &iImg, std::vector iImgSize, cv::Mat &oImg) +char* Utils::PreProcess(const cv::Mat& iImg, std::vector iImgSize, cv::Mat& oImg) { - if (iImg.channels() == 3) - { + if (iImg.channels() == 3) { oImg = iImg.clone(); cv::cvtColor(oImg, oImg, cv::COLOR_BGR2RGB); - } - else - { + } else { cv::cvtColor(iImg, oImg, cv::COLOR_GRAY2RGB); } - if (iImg.cols >= iImg.rows) - { - _resizeScales = iImg.cols / (float)iImgSize.at(0); - cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), int(iImg.rows / _resizeScales))); + if (iImg.cols >= iImg.rows) { + // Width-dominant: scale by target width (iImgSize[0]) + _resizeScales = iImg.cols / static_cast(iImgSize.at(0)); + cv::resize(oImg, oImg, cv::Size(iImgSize.at(0), static_cast(iImg.rows / _resizeScales))); // fixed + } else { + // Height-dominant: scale by target height (iImgSize[1]) + _resizeScales = iImg.rows / static_cast(iImgSize.at(1)); + cv::resize(oImg, oImg, cv::Size(static_cast(iImg.cols / _resizeScales), iImgSize.at(1))); // fixed } - else - { - _resizeScales = iImg.rows / (float)iImgSize.at(1); - cv::resize(oImg, oImg, cv::Size(int(iImg.cols / _resizeScales), iImgSize.at(1))); - } - //cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(0), iImgSize.at(1), CV_8UC3); + + // Letterbox top-left into a canvas of size (H=iImgSize[1], W=iImgSize[0]) cv::Mat tempImg = cv::Mat::zeros(iImgSize.at(1), iImgSize.at(0), CV_8UC3); oImg.copyTo(tempImg(cv::Rect(0, 0, oImg.cols, oImg.rows))); oImg = tempImg; @@ -122,15 +119,29 @@ std::vector Utils::PrepareInputTensor(Ort::Value &decoderInputTensor } void Utils::PostProcess(std::vector &output_tensors, const cv::Mat &iImg, std::vector imgSize, SEG::DL_RESULT &result) { - if (output_tensors.size() < 2) - { - std::cerr << "[SAM]: Decoder returned insufficient outputs." << std::endl; + if (output_tensors.empty()) { + std::cerr << "[SAM]: Decoder returned no outputs." << std::endl; + return; + } + + // Detect masks (4D) and scores (1D/2D) by shape + int masksIdx = -1, scoresIdx = -1; + for (int i = 0; i < static_cast(output_tensors.size()); ++i) { + const auto &val = output_tensors[i]; + auto shape = val.GetTensorTypeAndShapeInfo().GetShape(); + if (shape.size() == 4) masksIdx = i; + else if (shape.size() <= 2) scoresIdx = i; + } + if (masksIdx < 0) { + std::cerr << "[SAM]: No 4D mask tensor found in decoder outputs." << std::endl; return; } - // Assume [scores, masks]; consider shape-based detection later - auto scoresTensor = std::move(output_tensors[0]); - auto masksTensor = std::move(output_tensors[1]); + auto masksTensor = std::move(output_tensors[masksIdx]); + const float* scoresData = nullptr; + if (scoresIdx >= 0) { + scoresData = output_tensors[scoresIdx].GetTensorMutableData(); + } auto masksInfo = masksTensor.GetTensorTypeAndShapeInfo(); auto masksShape = masksInfo.GetShape(); @@ -138,32 +149,29 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & if (masksShape.size() == 4) { auto masksData = masksTensor.GetTensorMutableData(); - auto scoresData = scoresTensor.GetTensorMutableData(); const size_t numMasks = static_cast(masksShape[1]); const size_t height = static_cast(masksShape[2]); const size_t width = static_cast(masksShape[3]); - // Pick best mask by score + // Pick best mask by score if available float bestScore = -1.0f; size_t bestMaskIndex = 0; - for (size_t i = 0; i < numMasks; ++i) - { - const float s = scoresData ? scoresData[i] : 0.0f; - if (s > bestScore) { bestScore = s; bestMaskIndex = i; } + if (scoresData) { + for (size_t i = 0; i < numMasks; ++i) { + const float s = scoresData[i]; + if (s > bestScore) { bestScore = s; bestMaskIndex = i; } + } } - // Compute preprocessed region (top-left anchored) + // Compute preprocessed region (top-left anchored) to undo letterbox float scale; int processedWidth, processedHeight; - if (iImg.cols >= iImg.rows) - { + if (iImg.cols >= iImg.rows) { scale = static_cast(imgSize[0]) / static_cast(iImg.cols); processedWidth = imgSize[0]; processedHeight = static_cast(iImg.rows * scale); - } - else - { + } else { scale = static_cast(imgSize[1]) / static_cast(iImg.rows); processedWidth = static_cast(iImg.cols * scale); processedHeight = imgSize[1]; @@ -176,20 +184,19 @@ void Utils::PostProcess(std::vector &output_tensors, const cv::Mat & cv::Mat prob32f(static_cast(height), static_cast(width), CV_32F, const_cast(masksData + planeOffset)); - // Crop in mask space using proportional dimensions (no hardcoded 256) + // Crop padding region in mask space const int cropW = clampDim(static_cast(std::round(static_cast(width) * processedWidth / static_cast(imgSize[0]))), 1, static_cast(width)); const int cropH = clampDim(static_cast(std::round(static_cast(height) * processedHeight / static_cast(imgSize[1]))), 1, static_cast(height)); cv::Mat probCropped = prob32f(cv::Rect(0, 0, cropW, cropH)); - // Resize probabilities to original image (linear) + // Resize to original image size and threshold cv::Mat probResized; cv::resize(probCropped, probResized, cv::Size(iImg.cols, iImg.rows), 0, 0, cv::INTER_LINEAR); - // Threshold once to binary mask cv::Mat finalMask; cv::compare(probResized, 0.5f, finalMask, cv::CMP_GT); // CV_8U 0/255 - // Morphological cleanup (light, then ensure binary) + // Optional cleanup int kernelSize = std::max(5, std::min(iImg.cols, iImg.rows) / 100); cv::Mat kernel = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(kernelSize, kernelSize)); cv::morphologyEx(finalMask, finalMask, cv::MORPH_CLOSE, kernel); From 515639d963464eaa0abf034a2934964933ca1e2a Mon Sep 17 00:00:00 2001 From: Matthijs van der Burgh Date: Tue, 28 Oct 2025 11:13:44 +0100 Subject: [PATCH 34/35] Various CMake fixes --- CMakeLists.txt | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d812544..374ceac 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -76,14 +76,7 @@ catkin_package( # BUILD # ------------------------------------------------------------------------------------------------ -# Build library -add_library(${PROJECT_NAME} - src/sam_inference.cpp - src/segmentation.cpp - src/utils.cpp -) - -target_include_directories(${PROJECT_NAME} +include_directories( PUBLIC include ${CATKIN_DEVEL_PREFIX}/${CATKIN_GLOBAL_INCLUDE_DESTINATION} @@ -93,6 +86,12 @@ target_include_directories(${PROJECT_NAME} ${catkin_INCLUDE_DIRS} ) +add_library(${PROJECT_NAME} + src/sam_inference.cpp + src/segmentation.cpp + src/utils.cpp +) + target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBRARIES} ${catkin_LIBRARIES} @@ -103,7 +102,6 @@ target_link_libraries(${PROJECT_NAME} add_executable(test_${PROJECT_NAME} src/main.cpp ) - target_link_libraries(test_${PROJECT_NAME} ${PROJECT_NAME} ) @@ -112,9 +110,14 @@ target_link_libraries(test_${PROJECT_NAME} # Install Targets # ------------------------------------------------------------------------------------------------ +install(FILES + ${CATKIN_DEVEL_PREFIX}/${CATKIN_PACKAGE_INCLUDE_DESTINATION}/config.hpp + DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +) + install( - DIRECTORY include/ - DESTINATION ${CATKIN_GLOBAL_INCLUDE_DESTINATION} + DIRECTORY include/${PROJECT_NAME}/ + DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} ) install( From 3591e2138634c00a469cdbf4d7b0e809683eb83b Mon Sep 17 00:00:00 2001 From: IasonTheodorou Date: Tue, 28 Oct 2025 20:14:42 +0100 Subject: [PATCH 35/35] Removed redundant .vscode lines --- .vscode/launch.json | 4 +-- .vscode/tasks.json | 61 --------------------------------------------- 2 files changed, 2 insertions(+), 63 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 3c7955e..1112e0b 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -5,7 +5,7 @@ "name": "Debug SAM Model", "type": "cppdbg", "request": "launch", - "program": "/home/amigo/ros/noetic/system/devel/lib/sam_onnx_ros/test11_sam_onnx_ros", // Path to the executable + "program": "/home/amigo/ros/noetic/system/devel/lib/sam_onnx_ros/test_sam_onnx_ros", // Path to the executable "args": [], // Add any command-line arguments for your program here "stopAtEntry": false, "cwd": "${workspaceFolder}/build", // Set the working directory @@ -19,7 +19,7 @@ "ignoreFailures": true } ], - //"preLaunchTask": "build-sam-project" // Ensure the project is built before debugging + "preLaunchTask": "build-sam-project" // Ensure the project is built before debugging }, { "name": "Debug GTest (pick binary)", diff --git a/.vscode/tasks.json b/.vscode/tasks.json index 1f85ca1..2ee8426 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -8,20 +8,6 @@ "problemMatcher": ["$gcc"], "group": "build" }, - { - "label": "build-pipeline-project", - "dependsOrder": "sequence", - "dependsOn": ["configure-pipeline", "build-pipeline-binary"], - "problemMatcher": ["$gcc"], - "group": "build" - }, - { - "label": "build-yolo-project", - "dependsOrder": "sequence", - "dependsOn": ["configure-yolo", "build-yolo-binary"], - "problemMatcher": ["$gcc"], - "group": "build" - }, { "label": "configure-sam", "type": "shell", @@ -34,30 +20,6 @@ ], "problemMatcher": ["$gcc"] }, - { - "label": "configure-pipeline", - "type": "shell", - "command": "cmake", - "args": [ - "-DCMAKE_BUILD_TYPE=Debug", - "-S", "${workspaceFolder}/pipeline", - "-B", "${workspaceFolder}/pipeline/build" - ], - "problemMatcher": ["$gcc"] - }, - { - "label": "configure-yolo", - "type": "shell", - "command": "cmake", - "args": [ - "-DCMAKE_BUILD_TYPE=Debug", - "-S", "${workspaceFolder}/yolo_inference", - "-B", "${workspaceFolder}/yolo_inference/build", - "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", - - ], - "problemMatcher": ["$gcc"] - }, { "label": "build-sam-binary", "type": "shell", @@ -69,29 +31,6 @@ ], "problemMatcher": ["$gcc"] }, - { - "label": "build-pipeline-binary", - "type": "shell", - "command": "cmake", - "args": [ - "--build", - "${workspaceFolder}/pipeline/build", - "-DCMAKE_POLICY_VERSION_MINIMUM=3.5", - "--config", "Debug" - ], - "problemMatcher": ["$gcc"] - }, - { - "label": "build-yolo-binary", - "type": "shell", - "command": "cmake", - "args": [ - "--build", - "${workspaceFolder}/yolo_inference/build", - "--config", "Debug" - ], - "problemMatcher": ["$gcc"] - } ] }