From 7c342d63ebcc3dc8047d8061cede2ea0d67289c0 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:16:08 +0530 Subject: [PATCH 1/8] Add QcAllocator and DSP Init and Deinit Functions --- modules/fastcv/include/opencv2/fastcv.hpp | 2 + .../include/opencv2/fastcv/allocator.hpp | 64 ++++++++++ .../include/opencv2/fastcv/dsp_init.hpp | 49 ++++++++ modules/fastcv/src/allocator.cpp | 113 ++++++++++++++++++ modules/fastcv/src/dsp_init.cpp | 46 +++++++ modules/fastcv/src/precomp.hpp | 113 +++++++++++++++++- 6 files changed, 385 insertions(+), 2 deletions(-) create mode 100644 modules/fastcv/include/opencv2/fastcv/allocator.hpp create mode 100644 modules/fastcv/include/opencv2/fastcv/dsp_init.hpp create mode 100644 modules/fastcv/src/allocator.cpp create mode 100644 modules/fastcv/src/dsp_init.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index 292e83a2dc3..55cd768d219 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -30,6 +30,8 @@ #include "opencv2/fastcv/thresh.hpp" #include "opencv2/fastcv/tracking.hpp" #include "opencv2/fastcv/warp.hpp" +#include "opencv2/fastcv/allocator.hpp" +#include "opencv2/fastcv/dsp_init.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/allocator.hpp b/modules/fastcv/include/opencv2/fastcv/allocator.hpp new file mode 100644 index 00000000000..a70666723ca --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/allocator.hpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_ALLOCATOR_HPP +#define OPENCV_FASTCV_ALLOCATOR_HPP + +#include +#include +#include + +namespace cv { +namespace fastcv { + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Resource manager for FastCV allocations. + * This class manages active allocations. + */ +class QcResourceManager { +public: + static QcResourceManager& getInstance(); + + void addAllocation(void* ptr); + void removeAllocation(void* ptr); + +private: + QcResourceManager() = default; + std::set activeAllocations; + std::mutex resourceMutex; +}; + +/** + * @brief Qualcomm's custom allocator. + * This allocator uses Qualcomm's memory management functions. + */ +class QcAllocator : public cv::MatAllocator { + public: + QcAllocator(); + ~QcAllocator(); + + cv::UMatData* allocate(int dims, const int* sizes, int type, void* data0, size_t* step, cv::AccessFlag flags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE; + bool allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const CV_OVERRIDE; + void deallocate(cv::UMatData* u) const CV_OVERRIDE; +}; + +/** + * @brief Gets the default Qualcomm's allocator. + * This function returns a pointer to the default Qualcomm's allocator, which is optimized + * for use with DSP. + * + * @return Pointer to the default FastCV allocator. + */ +CV_EXPORTS cv::MatAllocator* getQcAllocator(); + +//! @} + +} // namespace fastcv +} // namespace cv + +#endif // OPENCV_FASTCV_ALLOCATOR_HPP diff --git a/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp new file mode 100644 index 00000000000..16fc163d1f1 --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_DSP_INIT_HPP +#define OPENCV_FASTCV_DSP_INIT_HPP + +#include + +namespace cv { +namespace fastcv { +namespace dsp { + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Initializes the FastCV DSP environment. + * + * This function sets up the necessary environment and resources for the DSP to operate. + * It must be called once at the very beginning of the use case or program to ensure that + * the DSP is properly initialized before any DSP-related operations are performed. + * + * @note This function must be called at the start of the use case or program, before any + * DSP-related operations. + * + * @return int Returns 0 on success, and a non-zero value on failure. + */ +CV_EXPORTS_W int fcvdspinit(); + +/** + * @brief Deinitializes the FastCV DSP environment. + * + * This function releases the resources and environment set up by the 'fcvdspinit' function. + * It should be called before the use case or program exits to ensure that all DSP resources + * are properly cleaned up and no memory leaks occur. + * + * @note This function must be called at the end of the use case or program, after all DSP-related + * operations are complete. + */ +CV_EXPORTS_W void fcvdspdeinit(); +//! @} + +} // dsp:: +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_DSP_INIT_HPP diff --git a/modules/fastcv/src/allocator.cpp b/modules/fastcv/src/allocator.cpp new file mode 100644 index 00000000000..e89835678d8 --- /dev/null +++ b/modules/fastcv/src/allocator.cpp @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { + +QcResourceManager& QcResourceManager::getInstance() { + static QcResourceManager instance; + return instance; +} + +void QcResourceManager::addAllocation(void* ptr) { + std::lock_guard lock(resourceMutex); + activeAllocations.insert(ptr); + CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size())); +} + +void QcResourceManager::removeAllocation(void* ptr) { + std::lock_guard lock(resourceMutex); + activeAllocations.erase(ptr); + CV_LOG_DEBUG(NULL, cv::format("Active Allocations: %zu", activeAllocations.size())); +} + +QcAllocator::QcAllocator() +{ +} + +QcAllocator::~QcAllocator() +{ +} + +cv::UMatData* QcAllocator::allocate(int dims, const int* sizes, int type, + void* data0, size_t* step, cv::AccessFlag flags, + cv::UMatUsageFlags usageFlags) const +{ + CV_UNUSED(flags); + CV_UNUSED(usageFlags); + + size_t total = CV_ELEM_SIZE(type); + for( int i = dims-1; i >= 0; i-- ) + { + if( step ) + { + if( data0 && step[i] != CV_AUTOSTEP ) + { + CV_Assert(total <= step[i]); + total = step[i]; + } + else + step[i] = total; + } + total *= sizes[i]; + } + uchar* data = data0 ? (uchar*)data0 : (uchar*)fcvHwMemAlloc(total, 16); + cv::UMatData* u = new cv::UMatData(this); + u->data = u->origdata = data; + u->size = total; + if(data0) + u->flags |= cv::UMatData::USER_ALLOCATED; + + u->userdata = new std::string("QCOM"); + + // Add to active allocations + cv::fastcv::QcResourceManager::getInstance().addAllocation(data); + + return u; +} + +bool QcAllocator::allocate(cv::UMatData* u, cv::AccessFlag accessFlags, cv::UMatUsageFlags usageFlags) const +{ + CV_UNUSED(accessFlags); + CV_UNUSED(usageFlags); + + return u != nullptr; +} + +void QcAllocator::deallocate(cv::UMatData* u) const +{ + if(!u) + return; + + CV_Assert(u->urefcount == 0); + CV_Assert(u->refcount == 0); + if( !(u->flags & cv::UMatData::USER_ALLOCATED) ) + { + fcvHwMemFree(u->origdata); + + // Remove from active allocations + cv::fastcv::QcResourceManager::getInstance().removeAllocation(u->origdata); + u->origdata = 0; + } + + if (u->userdata) + { + delete static_cast(u->userdata); + u->userdata = nullptr; + } + + delete u; +} + +cv::MatAllocator* getQcAllocator() +{ + static cv::MatAllocator* allocator = new QcAllocator; + return allocator; +} + +} +} diff --git a/modules/fastcv/src/dsp_init.cpp b/modules/fastcv/src/dsp_init.cpp new file mode 100644 index 00000000000..ee0bff8ba1d --- /dev/null +++ b/modules/fastcv/src/dsp_init.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { +//CHANGE FASTCV Q6 INIT +int fcvdspinit() +{ + FastCvDspContext& context = FastCvDspContext::getContext(); + + if (context.isInitialized()) { + CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization"); + return 0; + } + if (!context.initialize()) { + CV_LOG_ERROR(NULL, "Failed to initialize FastCV DSP"); + return -1; + } + CV_LOG_INFO(NULL, "FastCV DSP initialized successfully"); + return 0; +} + +void fcvdspdeinit() +{ + // Deinitialize the DSP environment + FastCvDspContext& context = FastCvDspContext::getContext(); + + if (!context.isInitialized()) { + CV_LOG_INFO(NULL, "FastCV DSP already deinitialized, skipping deinitialization"); + return; + } + if (!context.deinitialize()) { + CV_LOG_ERROR(NULL, "Failed to deinitialize FastCV DSP"); + } + CV_LOG_INFO(NULL, "FastCV DSP deinitialized successfully"); +} + + +} // namespace dsp +} // namespace fastcv +} // namespace cv \ No newline at end of file diff --git a/modules/fastcv/src/precomp.hpp b/modules/fastcv/src/precomp.hpp index c2929d76cc1..7a95f24fcd5 100644 --- a/modules/fastcv/src/precomp.hpp +++ b/modules/fastcv/src/precomp.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024 Qualcomm Innovation Center, Inc. All rights reserved. + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ @@ -10,11 +10,13 @@ #include #include "opencv2/core/private.hpp" #include "opencv2/core/utils/logger.hpp" - +#include #include #include +#include #include "fastcv.h" +#include "fastcvDsp.h" namespace cv { namespace fastcv { @@ -30,6 +32,7 @@ namespace fastcv { #define FCV_KernelSize_SHIFT 3 #define FCV_MAKETYPE(ksize,depth) ((ksize< fcvStatusStrings = { @@ -72,6 +75,112 @@ struct FastCvContext bool isInitialized; }; +namespace dsp { + struct FastCvDspContext; + + #define IS_FASTCV_ALLOCATED(mat) \ + ((mat.u && mat.u->userdata && \ + *static_cast(mat.u->userdata) == "QCOM") ? true : \ + (CV_Error(cv::Error::StsBadArg, cv::format("Matrix '%s' not allocated with FastCV allocator. " \ + "Please ensure that the matrix is created using " \ + "cv::fastcv::getQcAllocator().", #mat)), false)) + + #define FASTCV_CHECK_DSP_INIT() \ + if (!FastCvDspContext::getContext().isInitialized() && \ + fcvdspinit() != 0) \ + { \ + CV_Error(cv::Error::StsError, "Failed to initialize DSP"); \ + } + + struct FastCvDspContext + { + private: + mutable cv::Mutex initMutex; + std::atomic isDspInitialized{false}; + std::atomic initializationCount{0}; + std::atomic deInitializationCount{0}; + + static FastCvDspContext& getInstanceImpl() { + static FastCvDspContext context; + return context; + } + public: + static FastCvDspContext& getContext() { + return getInstanceImpl(); + } + + FastCvDspContext(const FastCvDspContext&) = delete; + FastCvDspContext& operator=(const FastCvDspContext&) = delete; + + bool initialize() { + cv::AutoLock lock(initMutex); + + if (isDspInitialized.load(std::memory_order_acquire)) { + CV_LOG_INFO(NULL, "FastCV DSP already initialized, skipping initialization"); + return true; + } + + CV_LOG_INFO(NULL, "Initializing FastCV DSP"); + + if (fcvQ6Init() == 0) { + isDspInitialized.store(true, std::memory_order_release); + initializationCount++; + CV_LOG_DEBUG(NULL, cv::format("FastCV DSP initialized (init count: %lu, deinit count: %lu)", + initializationCount.load(), deInitializationCount.load())); + + return true; + } + + CV_LOG_ERROR(NULL, "FastCV DSP initialization failed"); + return false; + } + + bool deinitialize() { + cv::AutoLock lock(initMutex); + + if (!isDspInitialized.load(std::memory_order_acquire)) { + CV_LOG_DEBUG(NULL, "FastCV DSP already deinitialized, skipping deinitialization"); + return true; + } + + CV_LOG_INFO(NULL, "Deinitializing FastCV DSP"); + + try { + fcvQ6DeInit(); + isDspInitialized.store(false, std::memory_order_release); + deInitializationCount++; + CV_LOG_DEBUG(NULL, cv::format("FastCV DSP deinitialized (init count: %lu, deinit count: %lu)", + initializationCount.load(), deInitializationCount.load())); + + return true; + } + catch (...) { + CV_LOG_ERROR(NULL, "Exception occurred during FastCV DSP deinitialization"); + return false; + } + } + + bool isInitialized() const { + return isDspInitialized.load(std::memory_order_acquire); + } + + uint64_t getDspInitCount() const { + return initializationCount.load(std::memory_order_acquire); + } + + uint64_t getDspDeInitCount() const { + return deInitializationCount.load(std::memory_order_acquire); + } + + const cv::Mutex& getInitMutex() const { + return initMutex; + } + + private: + FastCvDspContext() = default; +}; + +} // namespace dsp } // namespace fastcv } // namespace cv From 2bed4bcfe1070257e7fce82219133fe2071fa6e7 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:24:05 +0530 Subject: [PATCH 2/8] Added sumOfAbsoluteDiffs DSP API with Accuracy and Perf Test --- modules/fastcv/include/opencv2/fastcv.hpp | 1 + .../fastcv/include/opencv2/fastcv/sad_dsp.hpp | 34 +++++++++++++ modules/fastcv/perf/perf_sad_dsp.cpp | 50 +++++++++++++++++++ modules/fastcv/src/sad_dsp.cpp | 46 +++++++++++++++++ modules/fastcv/test/test_sad_dsp.cpp | 42 ++++++++++++++++ 5 files changed, 173 insertions(+) create mode 100644 modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp create mode 100644 modules/fastcv/perf/perf_sad_dsp.cpp create mode 100644 modules/fastcv/src/sad_dsp.cpp create mode 100644 modules/fastcv/test/test_sad_dsp.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index 55cd768d219..18a26e78f63 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -32,6 +32,7 @@ #include "opencv2/fastcv/warp.hpp" #include "opencv2/fastcv/allocator.hpp" #include "opencv2/fastcv/dsp_init.hpp" +#include "opencv2/fastcv/sad_dsp.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp new file mode 100644 index 00000000000..125a81eb102 --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_SAD_HPP +#define OPENCV_FASTCV_SAD_HPP + +#include + +namespace cv { +namespace fastcv { +namespace dsp { + +/** + * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions + */ + +//! @addtogroup fastcv +//! @{ +/** + * @brief Sum of absolute differences of an image against an 8x8 template. + * @param _patch The first input image data, type CV_8UC1 + * @param _src The input image data, type CV_8UC1 + * @param _dst The output image data, type CV_16UC1 +*/ +CV_EXPORTS_W void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst); +//! @} + +} // dsp:: +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_SAD_HPP diff --git a/modules/fastcv/perf/perf_sad_dsp.cpp b/modules/fastcv/perf/perf_sad_dsp.cpp new file mode 100644 index 00000000000..ec26de2615e --- /dev/null +++ b/modules/fastcv/perf/perf_sad_dsp.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef std::tuple SumOfAbsDiffsPerfParams; +typedef perf::TestBaseWithParam SumOfAbsDiffsPerfTest; + +PERF_TEST_P(SumOfAbsDiffsPerfTest, run, + ::testing::Values(cv::Size(640, 480), // VGA + cv::Size(1280, 720), // 720p + cv::Size(1920, 1080)) // 1080p +) +{ + // Initialize FastCV DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + auto p = GetParam(); + cv::Size srcSize = std::get<0>(p); + + RNG& rng = cv::theRNG(); + cv::Mat patch, src; + + patch.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for patch + src.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for src + + patch.create(8, 8, CV_8UC1); + src.create(srcSize, CV_8UC1); + + cvtest::randUni(rng, patch, cv::Scalar::all(0), cv::Scalar::all(255)); + cvtest::randUni(rng, src, cv::Scalar::all(0), cv::Scalar::all(255)); + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); // Use FastCV allocator for dst + + while(next()) + { + startTimer(); + cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst); + stopTimer(); + } + SANITY_CHECK_NOTHING(); +} + +} // namespace diff --git a/modules/fastcv/src/sad_dsp.cpp b/modules/fastcv/src/sad_dsp.cpp new file mode 100644 index 00000000000..a58c1383cf6 --- /dev/null +++ b/modules/fastcv/src/sad_dsp.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + +void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst) +{ + cv::Mat patch = _patch.getMat(); + cv::Mat src = _src.getMat(); + + // Check if matrices are allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(patch)); + CV_Assert(IS_FASTCV_ALLOCATED(src)); + + CV_Assert(!_src.empty() && "src is empty"); + CV_Assert(_src.type() == CV_8UC1 && "src type is not CV_8UC1"); + CV_Assert(_src.step() * _src.rows() > MIN_REMOTE_BUF_SIZE && "src buffer size is too small"); + CV_Assert(!_patch.empty() && "patch is empty"); + CV_Assert(_patch.type() == CV_8UC1 && "patch type is not CV_8UC1"); + CV_Assert(_patch.size() == cv::Size(8, 8) && "patch size is not 8x8"); + + cv::Size size = _src.size(); + _dst.create(size, CV_16UC1); + cv::Mat dst = _dst.getMat(); + + CV_Assert(((intptr_t)src.data & 0x7) == 0 && "src data is not 8-byte aligned"); + CV_Assert(((intptr_t)dst.data & 0x7) == 0 && "dst data is not 8-byte aligned"); + + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + fcvSumOfAbsoluteDiffs8x8u8_v2Q((uint8_t*)patch.data, patch.step, (uint8_t*)src.data, src.cols, src.rows, src.step, (uint16_t*)dst.data, dst.step); +} + +} // dsp:: +} // fastcv:: +} // cv:: diff --git a/modules/fastcv/test/test_sad_dsp.cpp b/modules/fastcv/test/test_sad_dsp.cpp new file mode 100644 index 00000000000..a71f5469728 --- /dev/null +++ b/modules/fastcv/test/test_sad_dsp.cpp @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +using namespace cv::fastcv::dsp; + +namespace opencv_test { namespace { + +TEST(SadTest, accuracy) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + // Create an 8x8 template patch + cv::Mat patch; + patch.allocator = cv::fastcv::getQcAllocator(); + patch.create(8, 8, CV_8UC1); + patch.setTo(cv::Scalar(0)); + + // Create a source image + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(512, 512, CV_8UC1); + src.setTo(cv::Scalar(255)); + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + cv::fastcv::dsp::sumOfAbsoluteDiffs(patch, src, dst); + + EXPECT_FALSE(dst.empty()); + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); +} + +} +} From 2ea338adc934870d0bb2fc201a487dfeadb82732 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:47:37 +0530 Subject: [PATCH 3/8] Add thresholdOtSu DSP API with Accuracy and Perf Test --- modules/fastcv/include/opencv2/fastcv.hpp | 1 + .../include/opencv2/fastcv/thresh_dsp.hpp | 39 ++++++++ modules/fastcv/perf/perf_thresh_dsp.cpp | 50 +++++++++++ modules/fastcv/src/thresh_dsp.cpp | 55 ++++++++++++ modules/fastcv/test/test_thresh_dsp.cpp | 88 +++++++++++++++++++ 5 files changed, 233 insertions(+) create mode 100644 modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp create mode 100644 modules/fastcv/perf/perf_thresh_dsp.cpp create mode 100644 modules/fastcv/src/thresh_dsp.cpp create mode 100644 modules/fastcv/test/test_thresh_dsp.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index 18a26e78f63..d975b51a43c 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -33,6 +33,7 @@ #include "opencv2/fastcv/allocator.hpp" #include "opencv2/fastcv/dsp_init.hpp" #include "opencv2/fastcv/sad_dsp.hpp" +#include "opencv2/fastcv/thresh_dsp.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp new file mode 100644 index 00000000000..27021ad0d0d --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_THRESH_DSP_HPP +#define OPENCV_FASTCV_THRESH_DSP_HPP + +#include + +namespace cv { +namespace fastcv { +namespace dsp { + + //! @addtogroup fastcv + //! @{ + + /** + * @brief Binarizes a grayscale image using Otsu's method. + * Sets the pixel to max(255) if it's value is greater than the threshold; + * else, set the pixel to min(0). The threshold is searched that minimizes + * the intra-class variance (the variance within the class). + * + * @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes. + * @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes. + * @param type Threshold type that can be either 0 or 1. + * NOTE: For threshold type=0, the pixel is set as + * maxValue if it's value is greater than the threshold; else, it is set as zero. + * For threshold type=1, the pixel is set as zero if it's + * value is greater than the threshold; else, it is set as maxValue. + */ + CV_EXPORTS_W void thresholdOtsu(InputArray _src, OutputArray _dst, bool type); + + //! @} +} // dsp:: +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_THRESH_DSP_HPP \ No newline at end of file diff --git a/modules/fastcv/perf/perf_thresh_dsp.cpp b/modules/fastcv/perf/perf_thresh_dsp.cpp new file mode 100644 index 00000000000..e5affeea7f9 --- /dev/null +++ b/modules/fastcv/perf/perf_thresh_dsp.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef std::tuple ThresholdOtsuPerfParams; +typedef perf::TestBaseWithParam ThresholdOtsuPerfTest; + +PERF_TEST_P(ThresholdOtsuPerfTest, run, + ::testing::Combine(::testing::Values(Size(320, 240), Size(640, 480), Size(1280, 720), Size(1920, 1080)), + ::testing::Values(false, true) // type + ) +) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + auto p = GetParam(); + cv::Size size = std::get<0>(p); + bool type = std::get<1>(p); + + RNG& rng = cv::theRNG(); + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(size, CV_8UC1); + + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256)); + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + while (next()) + { + startTimer(); + cv::fastcv::dsp::thresholdOtsu(src, dst, type); + stopTimer(); + } + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + SANITY_CHECK_NOTHING(); +} + +} // namespace \ No newline at end of file diff --git a/modules/fastcv/src/thresh_dsp.cpp b/modules/fastcv/src/thresh_dsp.cpp new file mode 100644 index 00000000000..9c74e619d37 --- /dev/null +++ b/modules/fastcv/src/thresh_dsp.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + + void thresholdOtsu(InputArray _src, OutputArray _dst, bool type) + { + CV_Assert( + !_src.empty() && + _src.type() == CV_8UC1 && + IS_FASTCV_ALLOCATED(_src.getMat()) + ); + + CV_Assert((_src.step() * _src.rows()) > MIN_REMOTE_BUF_SIZE); + CV_Assert(_src.cols() % 8 == 0); + CV_Assert(_src.step() % 8 == 0); + + Mat src = _src.getMat(); + CV_Assert(((uintptr_t)src.data & 0x7) == 0); + + _dst.create(_src.size(), CV_8UC1); + CV_Assert(_dst.step() % 8 == 0); + CV_Assert(_dst.cols() % 8 == 0); + Mat dst = _dst.getMat(); + + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + CV_Assert(((uintptr_t)dst.data & 0x7) == 0); + + if (src.data == dst.data) { + CV_Assert(src.step == dst.step); + } + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + fcvThreshType threshType; + + if (type) + threshType = FCV_THRESH_BINARY_INV; + else + threshType = FCV_THRESH_BINARY; + + fcvFilterThresholdOtsuu8Q(src.data, src.cols, src.rows, src.step, dst.data, dst.step, threshType); + } + +} // dsp:: +} // fastcv:: +} // cv:: \ No newline at end of file diff --git a/modules/fastcv/test/test_thresh_dsp.cpp b/modules/fastcv/test/test_thresh_dsp.cpp new file mode 100644 index 00000000000..ef784301894 --- /dev/null +++ b/modules/fastcv/test/test_thresh_dsp.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(ThresholdOtsuTest, accuracy) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(src.empty()) << "Could not read the image file."; + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + bool type = 0; + + cv::fastcv::dsp::thresholdOtsu(src, dst, type); + + // De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + EXPECT_FALSE(dst.empty()); + EXPECT_EQ(src.size(), dst.size()); + + // Compare the result against the reference cv::threshold function with Otsu's method + cv::Mat referenceDst; + cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU); + + double maxDifference = 10.0; + cv::Mat diff; + cv::absdiff(dst, referenceDst, diff); + double maxVal; + cv::minMaxLoc(diff, nullptr, &maxVal); + + EXPECT_LE(maxVal, maxDifference) << "The custom threshold result differs from the reference result by more than the acceptable threshold."; +} + +TEST(ThresholdOtsuTest, inPlaceAccuracy) +{ + // Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(src.empty()) << "Could not read the image file."; + + // Use the same buffer for in-place operation + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + src.copyTo(dst); + + bool type = false; + + // Call the thresholdOtsu function for in-place operation + cv::fastcv::dsp::thresholdOtsu(dst, dst, type); + + // De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + // Check if the output is not empty + EXPECT_FALSE(dst.empty()); + EXPECT_EQ(src.size(), dst.size()); + + // Compare the result against the reference cv::threshold function with Otsu's method + cv::Mat referenceDst; + cv::threshold(src, referenceDst, 0, 255, cv::THRESH_BINARY | cv::THRESH_OTSU); + + double maxDifference = 10.0; + cv::Mat diff; + cv::absdiff(dst, referenceDst, diff); + double maxVal; + cv::minMaxLoc(diff, nullptr, &maxVal); + + EXPECT_LE(maxVal, maxDifference) << "The in-place threshold result differs from the reference result by more than the acceptable threshold."; +} + +}} // namespaces opencv_test, :: From 51f92b0b8cb8e60eb2b0fe0361cefb6608c4c7c2 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:50:14 +0530 Subject: [PATCH 4/8] Add FFT and IFFT DSP API with Accuracy and Perf Test --- modules/fastcv/include/opencv2/fastcv.hpp | 1 + .../fastcv/include/opencv2/fastcv/fft_dsp.hpp | 49 ++++++++++ modules/fastcv/perf/perf_fft_dsp.cpp | 81 ++++++++++++++++ modules/fastcv/src/fft_dsp.cpp | 96 +++++++++++++++++++ modules/fastcv/test/test_fft_dsp.cpp | 94 ++++++++++++++++++ 5 files changed, 321 insertions(+) create mode 100644 modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp create mode 100644 modules/fastcv/perf/perf_fft_dsp.cpp create mode 100644 modules/fastcv/src/fft_dsp.cpp create mode 100644 modules/fastcv/test/test_fft_dsp.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index d975b51a43c..586c0c1d066 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -34,6 +34,7 @@ #include "opencv2/fastcv/dsp_init.hpp" #include "opencv2/fastcv/sad_dsp.hpp" #include "opencv2/fastcv/thresh_dsp.hpp" +#include "opencv2/fastcv/fft_dsp.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp new file mode 100644 index 00000000000..bb60111c72c --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_FFT_DSP_HPP +#define OPENCV_FASTCV_FFT_DSP_HPP + +#include + +namespace cv { +namespace fastcv { +namespace dsp { + +//! @addtogroup fastcv +//! @{ + +/** +* @brief Computes the 1D or 2D Fast Fourier Transform of a real valued matrix. + For the 2D case, the width and height of the input and output matrix must be powers of 2. + For the 1D case, the height of the matrices must be 1, while the width must be a power of 2. + +* @param src Input array of CV_8UC1. The dimensions of the matrix must be powers of 2 for the 2D case, + and in the 1D case, the height must be 1, while the width must be a power of 2. +* @param dst The computed FFT matrix of type CV_32FC2. The FFT Re and Im coefficients are stored in different channels. + Hence the dimensions of the dst are (srcWidth, srcHeight) +*/ +CV_EXPORTS_W void FFT(InputArray src, OutputArray dst); + +/** +* @brief Computes the 1D or 2D Inverse Fast Fourier Transform of a complex valued matrix. + For the 2D case, The width and height of the input and output matrix must be powers of 2. + For the 1D case, the height of the matrices must be 1, while the width must be a power of 2. + +* @param src Input array of type CV_32FC2 containing FFT Re and Im coefficients stored in separate channels. + The dimensions of the matrix must be powers of 2 for the 2D case, and in the 1D case, the height must be 1, + while the width must be a power of 2. +* @param dst The computed IFFT matrix of type CV_8U. The matrix is real valued and has no imaginary components. + Hence the dimensions of the dst are (srcWidth , srcHeight) +*/ +CV_EXPORTS_W void IFFT(InputArray src, OutputArray dst); + +//! @} + +} // dsp:: +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_FFT_DSP_HPP diff --git a/modules/fastcv/perf/perf_fft_dsp.cpp b/modules/fastcv/perf/perf_fft_dsp.cpp new file mode 100644 index 00000000000..edded0ca4c3 --- /dev/null +++ b/modules/fastcv/perf/perf_fft_dsp.cpp @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef perf::TestBaseWithParam FFT_DSPExtPerfTest; + +PERF_TEST_P_(FFT_DSPExtPerfTest, forward) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + Size size = GetParam(); + + RNG& rng = cv::theRNG(); + + Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(size, CV_8UC1); + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256)); + + Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + while (next()) + { + startTimer(); + cv::fastcv::dsp::FFT(src, dst); + stopTimer(); + } + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + SANITY_CHECK_NOTHING(); +} + +PERF_TEST_P_(FFT_DSPExtPerfTest, inverse) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + Size size = GetParam(); + + RNG& rng = cv::theRNG(); + + Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(size, CV_8UC1); + + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256)); + + Mat fwd, back; + fwd.allocator = cv::fastcv::getQcAllocator(); + back.allocator = cv::fastcv::getQcAllocator(); + + cv::fastcv::dsp::FFT(src, fwd); + + while (next()) + { + startTimer(); + cv::fastcv::dsp::IFFT(fwd, back); + stopTimer(); + } + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + SANITY_CHECK_NOTHING(); +} + +INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtPerfTest, + ::testing::Values(Size(256, 256), Size(512, 512))); + +} // namespace diff --git a/modules/fastcv/src/fft_dsp.cpp b/modules/fastcv/src/fft_dsp.cpp new file mode 100644 index 00000000000..f3fd07024ea --- /dev/null +++ b/modules/fastcv/src/fft_dsp.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + +static bool isPow2(int x) +{ + return x && (!(x & (x - 1))); +} + +void FFT(InputArray _src, OutputArray _dst) +{ + CV_Assert( + !_src.empty() && + _src.type() == CV_8UC1 && + IS_FASTCV_ALLOCATED(_src.getMat()) + ); + + CV_Assert(isPow2(_src.rows()) || _src.rows() == 1); + CV_Assert(isPow2(_src.cols())); + CV_Assert(_src.step() % 8 == 0); + CV_Assert(static_cast(_src.rows() * _src.cols()) > MIN_REMOTE_BUF_SIZE); + + Mat src = _src.getMat(); + CV_Assert(reinterpret_cast(src.data) % 8 == 0); + + _dst.create(_src.rows(), _src.cols(), CV_32FC2); + CV_Assert(_dst.step() % 8 == 0); + Mat dst = _dst.getMat(); + + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + CV_Assert(reinterpret_cast(dst.data) % 8 == 0); + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + fcvStatus status = fcvFFTu8Q(src.data, src.cols, src.rows, src.step, + (float*)dst.data, dst.step); + + if (status != FASTCV_SUCCESS) + { + std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown"; + CV_Error(cv::Error::StsInternal, "FastCV error: " + s); + } +} + +void IFFT(InputArray _src, OutputArray _dst) +{ + CV_Assert( + !_src.empty() && + _src.type() == CV_32FC2 && + IS_FASTCV_ALLOCATED(_src.getMat()) + ); + + CV_Assert(isPow2(_src.rows()) || _src.rows() == 1); + CV_Assert(isPow2(_src.cols())); + + CV_Assert(_src.step() % 8 == 0); + CV_Assert(static_cast(_src.rows() * _src.cols() * sizeof(float32_t)) > MIN_REMOTE_BUF_SIZE); + + Mat src = _src.getMat(); + + CV_Assert(reinterpret_cast(src.data) % 8 == 0); + + _dst.create(_src.rows(), _src.cols(), CV_8UC1); + + CV_Assert(_dst.step() % 8 == 0); + + Mat dst = _dst.getMat(); + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + CV_Assert(reinterpret_cast(dst.data) % 8 == 0); + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + fcvStatus status = fcvIFFTf32Q((const float*)src.data, src.cols * 2, src.rows, src.step, + dst.data, dst.step); + + if (status != FASTCV_SUCCESS) + { + std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown"; + CV_Error(cv::Error::StsInternal, "FastCV error: " + s); + } +} + +} // dsp:: +} // fastcv:: +} // cv:: \ No newline at end of file diff --git a/modules/fastcv/test/test_fft_dsp.cpp b/modules/fastcv/test/test_fft_dsp.cpp new file mode 100644 index 00000000000..1582a3de5ee --- /dev/null +++ b/modules/fastcv/test/test_fft_dsp.cpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +class FFT_DSPExtTest : public ::testing::TestWithParam {}; + +TEST_P(FFT_DSPExtTest, forward) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + Size size = GetParam(); + + RNG& rng = cv::theRNG(); + + Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(size, CV_8UC1); + + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256)); + + Mat srcFloat; + src.convertTo(srcFloat, CV_32F); + + Mat dst, ref; + dst.allocator = cv::fastcv::getQcAllocator(); + cv::fastcv::dsp::FFT(src, dst); + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + cv::dft(srcFloat, ref, DFT_COMPLEX_OUTPUT); + + double normInf = cvtest::norm(dst, ref, cv::NORM_INF); + double normL2 = cvtest::norm(dst, ref, cv::NORM_L2) / dst.size().area(); + + EXPECT_LT(normInf, 19.1); // for 512x512 case + EXPECT_LT(normL2, 18.0 / 256.0 ); +} + +TEST_P(FFT_DSPExtTest, inverse) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + Size size = GetParam(); + + RNG& rng = cv::theRNG(); + + Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(size, CV_8UC1); + + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(256)); + + Mat srcFloat; + src.convertTo(srcFloat, CV_32F); + + Mat fwd, back; + fwd.allocator = cv::fastcv::getQcAllocator(); + back.allocator = cv::fastcv::getQcAllocator(); + + cv::fastcv::dsp::FFT(src, fwd); + cv::fastcv::dsp::IFFT(fwd, back); + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + Mat backFloat; + back.convertTo(backFloat, CV_32F); + + Mat fwdRef, backRef; + cv::dft(srcFloat, fwdRef, DFT_COMPLEX_OUTPUT); + cv::idft(fwdRef, backRef, DFT_REAL_OUTPUT); + + backRef *= 1./(src.size().area()); + + double normInf = cvtest::norm(backFloat, backRef, cv::NORM_INF); + double normL2 = cvtest::norm(backFloat, backRef, cv::NORM_L2) / src.size().area(); + + EXPECT_LT(normInf, 9.16e-05); + EXPECT_LT(normL2, 1.228e-06); +} + +INSTANTIATE_TEST_CASE_P(FastCV_Extension, FFT_DSPExtTest, ::testing::Values(Size(256, 256), Size(512, 512))); + +}} // namespaces opencv_test, :: From ffa4c845f3988a7642c8037caacc605e8dc81078 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:53:39 +0530 Subject: [PATCH 5/8] Add canny DSP API with Accuracy and Perf Test --- modules/fastcv/include/opencv2/fastcv.hpp | 1 + .../include/opencv2/fastcv/edges_dsp.hpp | 38 +++++++++++ modules/fastcv/perf/perf_edges_dsp.cpp | 54 ++++++++++++++++ modules/fastcv/src/edges_dsp.cpp | 63 +++++++++++++++++++ modules/fastcv/test/test_edges_dsp.cpp | 37 +++++++++++ 5 files changed, 193 insertions(+) create mode 100644 modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp create mode 100644 modules/fastcv/perf/perf_edges_dsp.cpp create mode 100644 modules/fastcv/src/edges_dsp.cpp create mode 100644 modules/fastcv/test/test_edges_dsp.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index 586c0c1d066..5ba16fd73e9 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -35,6 +35,7 @@ #include "opencv2/fastcv/sad_dsp.hpp" #include "opencv2/fastcv/thresh_dsp.hpp" #include "opencv2/fastcv/fft_dsp.hpp" +#include "opencv2/fastcv/edges_dsp.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp new file mode 100644 index 00000000000..10f206c1809 --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_EDGES_DSP_HPP +#define OPENCV_FASTCV_EDGES_DSP_HPP + +#include "opencv2/core/mat.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + +/** +* @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions +*/ + +//! @addtogroup fastcv +//! @{ + +/** + * @brief Canny edge detector applied to a 8 bit grayscale image + * @param _src Input image with type CV_8UC1 + * @param _dst Output 8-bit image containing the edge detection results + * @param lowThreshold First threshold + * @param highThreshold Second threshold + * @param apertureSize The Sobel kernel size for calculating gradient. Supported sizes are 3, 5 and 7. + * @param L2gradient L2 Gradient or L1 Gradient +*/ +CV_EXPORTS_W void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false); +//! @} + +} // dsp:: +} // fastcv:: +} // cv:: + +#endif //OPENCV_FASTCV_EDGES_DSP_HPP diff --git a/modules/fastcv/perf/perf_edges_dsp.cpp b/modules/fastcv/perf/perf_edges_dsp.cpp new file mode 100644 index 00000000000..100a1649f39 --- /dev/null +++ b/modules/fastcv/perf/perf_edges_dsp.cpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef perf::TestBaseWithParam, bool>> CannyPerfTest; + +PERF_TEST_P(CannyPerfTest, run, + ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p, perf::sz1080p), // image size + ::testing::Values(3, 5, 7), // aperture size + ::testing::Values(make_pair(0, 50), make_pair(100, 150), make_pair(50, 150)), // low and high thresholds + ::testing::Values(false, true) // L2gradient + ) +) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + cv::Size srcSize = get<0>(GetParam()); + int apertureSize = get<1>(GetParam()); + auto thresholds = get<2>(GetParam()); + bool L2gradient = get<3>(GetParam()); + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(srcSize, CV_8UC1); + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + cv::randu(src, 0, 256); + + int lowThreshold = thresholds.first; + int highThreshold = thresholds.second; + + while (next()) + { + startTimer(); + cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient); + stopTimer(); + } + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + SANITY_CHECK_NOTHING(); +} + +} //namespace \ No newline at end of file diff --git a/modules/fastcv/src/edges_dsp.cpp b/modules/fastcv/src/edges_dsp.cpp new file mode 100644 index 00000000000..4c847f88155 --- /dev/null +++ b/modules/fastcv/src/edges_dsp.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + +void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient) +{ + CV_Assert( + !_src.empty() && + lowThreshold <= highThreshold && + IS_FASTCV_ALLOCATED(_src.getMat()) + ); + + int type = _src.type(); + CV_Assert(type == CV_8UC1); + CV_Assert(_src.step() % 8 == 0); + + Size size = _src.size(); + _dst.create(size, type); + Mat src = _src.getMat(); + CV_Assert(src.step >= (size_t)src.cols); + CV_Assert(reinterpret_cast(src.data) % 8 == 0); + + Mat dst = _dst.getMat(); + + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + CV_Assert(reinterpret_cast(dst.data) % 8 == 0); + CV_Assert(dst.step >= (size_t)src.cols); + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + fcvNormType norm; + + if (L2gradient) + norm = FASTCV_NORM_L2; + else + norm = FASTCV_NORM_L1; + + int16_t* gx = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16); + int16_t* gy = (int16_t*)fcvHwMemAlloc(src.cols * src.rows * sizeof(int16_t), 16); + uint32_t gstride = 2 * src.cols; + fcvStatus status = fcvFilterCannyu8Q((uint8_t*)src.data, src.cols, src.rows, src.step, apertureSize, lowThreshold, highThreshold, norm, (uint8_t*)dst.data, dst.step, gx, gy, gstride); + fcvHwMemFree(gx); + fcvHwMemFree(gy); + + if (status != FASTCV_SUCCESS) + { + std::string s = fcvStatusStrings.count(status) ? fcvStatusStrings.at(status) : "unknown"; + CV_Error(cv::Error::StsInternal, "FastCV error: " + s); + } +} + +} // dsp:: +} // fastcv:: +} // cv:: \ No newline at end of file diff --git a/modules/fastcv/test/test_edges_dsp.cpp b/modules/fastcv/test/test_edges_dsp.cpp new file mode 100644 index 00000000000..c93fb8d3b4c --- /dev/null +++ b/modules/fastcv/test/test_edges_dsp.cpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +TEST(DSP_CannyTest, accuracy) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + cv::imread(cvtest::findDataFile("cv/detectors_descriptors_evaluation/planar/box_in_scene.png"), src, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(src.empty()) << "Could not read the image file."; + + cv::Mat dst; + dst.allocator = cv::fastcv::getQcAllocator(); + + int lowThreshold = 0; + int highThreshold = 150; + + cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, 3, true); + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + EXPECT_FALSE(dst.empty()); + EXPECT_EQ(src.size(), dst.size()); +} + +} +} From 350f7f9f8f10c3c5cee281aa3ec85bf97d8b2cbf Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Wed, 30 Apr 2025 00:57:57 +0530 Subject: [PATCH 6/8] Add filter2D DSP API with Accuracy and Perf Test --- modules/fastcv/include/opencv2/fastcv.hpp | 1 + .../include/opencv2/fastcv/blur_dsp.hpp | 34 +++++ modules/fastcv/perf/perf_blur_dsp.cpp | 71 +++++++++ modules/fastcv/src/blur_dsp.cpp | 138 ++++++++++++++++++ modules/fastcv/test/test_blur_dsp.cpp | 73 +++++++++ 5 files changed, 317 insertions(+) create mode 100644 modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp create mode 100644 modules/fastcv/perf/perf_blur_dsp.cpp create mode 100644 modules/fastcv/src/blur_dsp.cpp create mode 100644 modules/fastcv/test/test_blur_dsp.cpp diff --git a/modules/fastcv/include/opencv2/fastcv.hpp b/modules/fastcv/include/opencv2/fastcv.hpp index 5ba16fd73e9..6626c4c9b5a 100644 --- a/modules/fastcv/include/opencv2/fastcv.hpp +++ b/modules/fastcv/include/opencv2/fastcv.hpp @@ -36,6 +36,7 @@ #include "opencv2/fastcv/thresh_dsp.hpp" #include "opencv2/fastcv/fft_dsp.hpp" #include "opencv2/fastcv/edges_dsp.hpp" +#include "opencv2/fastcv/blur_dsp.hpp" /** * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions diff --git a/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp new file mode 100644 index 00000000000..776b1def6c1 --- /dev/null +++ b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#ifndef OPENCV_FASTCV_BLUR_DSP_HPP +#define OPENCV_FASTCV_BLUR_DSP_HPP + +#include + +namespace cv { +namespace fastcv { +namespace dsp { +/** + * @defgroup fastcv Module-wrapper for FastCV hardware accelerated functions + */ + +/** + * @brief Filter an image with non-separable kernel + * @param _src Intput image with type CV_8UC1, src size should be greater than 176*144 + * @param _dst Output image with type CV_8UC1, CV_16SC1 or CV_32FC1 + * @param ddepth The depth of output image + * @param _kernel Filer kernel data + * + * @sa Filter2D + */ +CV_EXPORTS_W void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel); + +//! @} +} // dsp:: +} // fastcv:: +} // cv:: + +#endif // OPENCV_FASTCV_BLUR_DSP_HPP diff --git a/modules/fastcv/perf/perf_blur_dsp.cpp b/modules/fastcv/perf/perf_blur_dsp.cpp new file mode 100644 index 00000000000..a1fadac62fd --- /dev/null +++ b/modules/fastcv/perf/perf_blur_dsp.cpp @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "perf_precomp.hpp" + +namespace opencv_test { + +typedef perf::TestBaseWithParam> Filter2DPerfTest_DSP; + +PERF_TEST_P(Filter2DPerfTest_DSP, run, + ::testing::Combine(::testing::Values(perf::szVGA, perf::sz720p), // image size + ::testing::Values(CV_8U,CV_16S,CV_32F), // dst image depth + ::testing::Values(3, 5, 7) // kernel size + ) + ) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + cv::Size srcSize = get<0>(GetParam()); + int ddepth = get<1>(GetParam()); + int ksize = get<2>(GetParam()); + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(srcSize, CV_8U); + + cv::Mat kernel; + cv::Mat dst; + kernel.allocator = cv::fastcv::getQcAllocator(); + dst.allocator = cv::fastcv::getQcAllocator(); + + switch (ddepth) + { + case CV_8U: + case CV_16S: + { + kernel.create(ksize,ksize,CV_8S); + break; + } + case CV_32F: + { + kernel.create(ksize,ksize,CV_32F); + break; + } + default: + break; + } + + cv::randu(src, 0, 256); + cv::randu(kernel, INT8_MIN, INT8_MAX); + RNG& rng = cv::theRNG(); + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255)); + + while (next()) + { + startTimer(); + cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel); + stopTimer(); + } + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + SANITY_CHECK_NOTHING(); +} + +} // namespace \ No newline at end of file diff --git a/modules/fastcv/src/blur_dsp.cpp b/modules/fastcv/src/blur_dsp.cpp new file mode 100644 index 00000000000..4f59d65ded5 --- /dev/null +++ b/modules/fastcv/src/blur_dsp.cpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "precomp.hpp" + +namespace cv { +namespace fastcv { +namespace dsp { + +class FcvFilter2DLoop_Invoker : public ParallelLoopBody +{ + public: + + FcvFilter2DLoop_Invoker(const Mat& _src, Mat& _dst, const Mat& _kernel) : + ParallelLoopBody(), src(_src), dst(_dst), kernel(_kernel) + { + width = src.cols; + height = src.rows; + ksize = kernel.size().width; + halfKsize = ksize/2; + } + + virtual void operator()(const Range& range) const CV_OVERRIDE + { + int topLines = 0; + int rangeHeight = range.end-range.start; + int paddedHeight = rangeHeight; + + if(range.start >= halfKsize) + { + topLines += halfKsize; + paddedHeight += halfKsize; + } + + if(range.end <= height-halfKsize) + { + paddedHeight += halfKsize; + } + + Mat srcPadded, dstPadded; + srcPadded.allocator = cv::fastcv::getQcAllocator(); + dstPadded.allocator = cv::fastcv::getQcAllocator(); + + srcPadded = src(Rect(0, range.start - topLines, width, paddedHeight)); + dstPadded.create(paddedHeight, width, dst.depth()); + + CV_Assert(IS_FASTCV_ALLOCATED(srcPadded)); + CV_Assert(IS_FASTCV_ALLOCATED(dstPadded)); + + if (dst.depth() == CV_8U) + fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step, + dstPadded.data, dstPadded.step); + else if (dst.depth() == CV_16S) + fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step, + (int16_t*)dstPadded.data, dstPadded.step); + else if (dst.depth() == CV_32F) + fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize, srcPadded.data, width, paddedHeight, srcPadded.step, + (float32_t*)dstPadded.data, dstPadded.step); + + // Only copy center part back to output image and ignore the padded lines + Mat temp1 = dstPadded(Rect(0, topLines, width, rangeHeight)); + Mat temp2 = dst(Rect(0, range.start, width, rangeHeight)); + temp1.copyTo(temp2); + } + + private: + const Mat& src; + Mat& dst; + const Mat& kernel; + int width; + int height; + int ksize; + int halfKsize; + + FcvFilter2DLoop_Invoker(const FcvFilter2DLoop_Invoker &); // = delete; + const FcvFilter2DLoop_Invoker& operator= (const FcvFilter2DLoop_Invoker &); // = delete; +}; + +void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel) +{ + CV_Assert( + !_src.empty() && + _src.type() == CV_8UC1 && + IS_FASTCV_ALLOCATED(_src.getMat()) && + IS_FASTCV_ALLOCATED(_kernel.getMat()) + ); + + Mat kernel = _kernel.getMat(); + + Size ksize = kernel.size(); + CV_Assert(ksize.width == ksize.height); + CV_Assert(ksize.width % 2 == 1); + + _dst.create(_src.size(), ddepth); + Mat src = _src.getMat(); + Mat dst = _dst.getMat(); + + // Check if dst is allocated by the QcAllocator + CV_Assert(IS_FASTCV_ALLOCATED(dst)); + + // Check DSP initialization status and initialize if needed + FASTCV_CHECK_DSP_INIT(); + + int nThreads = getNumThreads(); + int nStripes = (nThreads > 1) ? ((src.rows > 60) ? 3 * nThreads : 1) : 1; + + if (ddepth == CV_8U && ksize.width == 3) + fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step); + + switch (ddepth) + { + case CV_8U: + case CV_16S: + { + CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_8S); + parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes); + break; + } + case CV_32F: + { + CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_32F); + parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes); + break; + } + default: + { + CV_Error(cv::Error::StsBadArg, cv::format("Kernel Size:%d, Dst type:%s is not supported", ksize.width, + depthToString(ddepth))); + break; + } + } +} + +} // dsp:: +} // fastcv:: +} // cv:: \ No newline at end of file diff --git a/modules/fastcv/test/test_blur_dsp.cpp b/modules/fastcv/test/test_blur_dsp.cpp new file mode 100644 index 00000000000..58c61483090 --- /dev/null +++ b/modules/fastcv/test/test_blur_dsp.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2025 Qualcomm Innovation Center, Inc. All rights reserved. + * SPDX-License-Identifier: Apache-2.0 +*/ + +#include "test_precomp.hpp" + +namespace opencv_test { namespace { + +typedef testing::TestWithParam> Filter2DTest_DSP; + +TEST_P(Filter2DTest_DSP, accuracy) +{ + //Initialize DSP + int initStatus = cv::fastcv::dsp::fcvdspinit(); + ASSERT_EQ(initStatus, 0) << "Failed to initialize FastCV DSP"; + + Size srcSize = get<0>(GetParam()); + int ddepth = get<1>(GetParam()); + int ksize = get<2>(GetParam()); + + cv::Mat src; + src.allocator = cv::fastcv::getQcAllocator(); + src.create(srcSize, CV_8U); + + cv::Mat kernel; + cv::Mat dst, ref; + kernel.allocator = cv::fastcv::getQcAllocator(); + dst.allocator = cv::fastcv::getQcAllocator(); + + switch (ddepth) + { + case CV_8U: + case CV_16S: + { + kernel.create(ksize,ksize,CV_8S); + break; + } + case CV_32F: + { + kernel.create(ksize,ksize,CV_32F); + break; + } + default: + return; + } + + RNG& rng = cv::theRNG(); + cvtest::randUni(rng, src, Scalar::all(0), Scalar::all(255)); + cvtest::randUni(rng, kernel, Scalar::all(INT8_MIN), Scalar::all(INT8_MAX)); + + cv::fastcv::dsp::filter2D(src, dst, ddepth, kernel); + + //De-Initialize DSP + cv::fastcv::dsp::fcvdspdeinit(); + + cv::filter2D(src, ref, ddepth, kernel); + cv::Mat difference; + dst.convertTo(dst, CV_8U); + ref.convertTo(ref, CV_8U); + cv::absdiff(dst, ref, difference); + + int num_diff_pixels = cv::countNonZero(difference); + EXPECT_LT(num_diff_pixels, (src.rows+src.cols)*ksize); +} + +INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine( +/*image sie*/ Values(perf::szVGA, perf::sz720p, perf::sz1080p), +/*dst depth*/ Values(CV_8U,CV_16S,CV_32F), +/*kernel size*/ Values(3, 5, 7, 9, 11) +)); + +}} // namespaces opencv_test, :: \ No newline at end of file From a0c5c8c3c46618333d1d5e98aa1e0bf89a0155a1 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Mon, 5 May 2025 13:33:54 +0530 Subject: [PATCH 7/8] Remove parallel_for_ from filter2D dsp API, update mat allocation check --- .../include/opencv2/fastcv/blur_dsp.hpp | 2 +- .../include/opencv2/fastcv/dsp_init.hpp | 4 +- .../include/opencv2/fastcv/edges_dsp.hpp | 2 +- .../fastcv/include/opencv2/fastcv/fft_dsp.hpp | 4 +- .../fastcv/include/opencv2/fastcv/sad_dsp.hpp | 2 +- .../include/opencv2/fastcv/thresh_dsp.hpp | 36 ++++---- modules/fastcv/perf/perf_edges_dsp.cpp | 2 +- modules/fastcv/src/allocator.cpp | 8 -- modules/fastcv/src/blur_dsp.cpp | 90 +++---------------- modules/fastcv/src/edges_dsp.cpp | 2 +- modules/fastcv/src/precomp.hpp | 3 +- modules/fastcv/test/test_blur_dsp.cpp | 2 +- modules/fastcv/test/test_edges_dsp.cpp | 2 +- 13 files changed, 41 insertions(+), 118 deletions(-) diff --git a/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp index 776b1def6c1..eff331d02c8 100644 --- a/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp +++ b/modules/fastcv/include/opencv2/fastcv/blur_dsp.hpp @@ -24,7 +24,7 @@ namespace dsp { * * @sa Filter2D */ -CV_EXPORTS_W void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel); +CV_EXPORTS void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel); //! @} } // dsp:: diff --git a/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp index 16fc163d1f1..942b7fdaa95 100644 --- a/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp +++ b/modules/fastcv/include/opencv2/fastcv/dsp_init.hpp @@ -27,7 +27,7 @@ namespace dsp { * * @return int Returns 0 on success, and a non-zero value on failure. */ -CV_EXPORTS_W int fcvdspinit(); +CV_EXPORTS int fcvdspinit(); /** * @brief Deinitializes the FastCV DSP environment. @@ -39,7 +39,7 @@ CV_EXPORTS_W int fcvdspinit(); * @note This function must be called at the end of the use case or program, after all DSP-related * operations are complete. */ -CV_EXPORTS_W void fcvdspdeinit(); +CV_EXPORTS void fcvdspdeinit(); //! @} } // dsp:: diff --git a/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp index 10f206c1809..37b2aef5515 100644 --- a/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp +++ b/modules/fastcv/include/opencv2/fastcv/edges_dsp.hpp @@ -28,7 +28,7 @@ namespace dsp { * @param apertureSize The Sobel kernel size for calculating gradient. Supported sizes are 3, 5 and 7. * @param L2gradient L2 Gradient or L1 Gradient */ -CV_EXPORTS_W void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false); +CV_EXPORTS void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize = 3, bool L2gradient = false); //! @} } // dsp:: diff --git a/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp index bb60111c72c..b4e4e44ecdc 100644 --- a/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp +++ b/modules/fastcv/include/opencv2/fastcv/fft_dsp.hpp @@ -25,7 +25,7 @@ namespace dsp { * @param dst The computed FFT matrix of type CV_32FC2. The FFT Re and Im coefficients are stored in different channels. Hence the dimensions of the dst are (srcWidth, srcHeight) */ -CV_EXPORTS_W void FFT(InputArray src, OutputArray dst); +CV_EXPORTS void FFT(InputArray src, OutputArray dst); /** * @brief Computes the 1D or 2D Inverse Fast Fourier Transform of a complex valued matrix. @@ -38,7 +38,7 @@ CV_EXPORTS_W void FFT(InputArray src, OutputArray dst); * @param dst The computed IFFT matrix of type CV_8U. The matrix is real valued and has no imaginary components. Hence the dimensions of the dst are (srcWidth , srcHeight) */ -CV_EXPORTS_W void IFFT(InputArray src, OutputArray dst); +CV_EXPORTS void IFFT(InputArray src, OutputArray dst); //! @} diff --git a/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp index 125a81eb102..b9ae9079686 100644 --- a/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp +++ b/modules/fastcv/include/opencv2/fastcv/sad_dsp.hpp @@ -24,7 +24,7 @@ namespace dsp { * @param _src The input image data, type CV_8UC1 * @param _dst The output image data, type CV_16UC1 */ -CV_EXPORTS_W void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst); +CV_EXPORTS void sumOfAbsoluteDiffs(cv::InputArray _patch, cv::InputArray _src, cv::OutputArray _dst); //! @} } // dsp:: diff --git a/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp index 27021ad0d0d..25824e72097 100644 --- a/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp +++ b/modules/fastcv/include/opencv2/fastcv/thresh_dsp.hpp @@ -12,26 +12,26 @@ namespace cv { namespace fastcv { namespace dsp { - //! @addtogroup fastcv - //! @{ +//! @addtogroup fastcv +//! @{ - /** - * @brief Binarizes a grayscale image using Otsu's method. - * Sets the pixel to max(255) if it's value is greater than the threshold; - * else, set the pixel to min(0). The threshold is searched that minimizes - * the intra-class variance (the variance within the class). - * - * @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes. - * @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes. - * @param type Threshold type that can be either 0 or 1. - * NOTE: For threshold type=0, the pixel is set as - * maxValue if it's value is greater than the threshold; else, it is set as zero. - * For threshold type=1, the pixel is set as zero if it's - * value is greater than the threshold; else, it is set as maxValue. - */ - CV_EXPORTS_W void thresholdOtsu(InputArray _src, OutputArray _dst, bool type); +/** + * @brief Binarizes a grayscale image using Otsu's method. + * Sets the pixel to max(255) if it's value is greater than the threshold; + * else, set the pixel to min(0). The threshold is searched that minimizes + * the intra-class variance (the variance within the class). + * + * @param _src Input 8-bit grayscale image. Size of buffer is srcStride*srcHeight bytes. + * @param _dst Output 8-bit binarized image. Size of buffer is dstStride*srcHeight bytes. + * @param type Threshold type that can be either 0 or 1. + * NOTE: For threshold type=0, the pixel is set as + * maxValue if it's value is greater than the threshold; else, it is set as zero. + * For threshold type=1, the pixel is set as zero if it's + * value is greater than the threshold; else, it is set as maxValue. + */ +CV_EXPORTS void thresholdOtsu(InputArray _src, OutputArray _dst, bool type); - //! @} +//! @} } // dsp:: } // fastcv:: } // cv:: diff --git a/modules/fastcv/perf/perf_edges_dsp.cpp b/modules/fastcv/perf/perf_edges_dsp.cpp index 100a1649f39..42e1e1d3acd 100644 --- a/modules/fastcv/perf/perf_edges_dsp.cpp +++ b/modules/fastcv/perf/perf_edges_dsp.cpp @@ -41,7 +41,7 @@ PERF_TEST_P(CannyPerfTest, run, while (next()) { startTimer(); - cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient); + cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, apertureSize, L2gradient); stopTimer(); } diff --git a/modules/fastcv/src/allocator.cpp b/modules/fastcv/src/allocator.cpp index e89835678d8..83147d2354a 100644 --- a/modules/fastcv/src/allocator.cpp +++ b/modules/fastcv/src/allocator.cpp @@ -61,8 +61,6 @@ cv::UMatData* QcAllocator::allocate(int dims, const int* sizes, int type, u->size = total; if(data0) u->flags |= cv::UMatData::USER_ALLOCATED; - - u->userdata = new std::string("QCOM"); // Add to active allocations cv::fastcv::QcResourceManager::getInstance().addAllocation(data); @@ -94,12 +92,6 @@ void QcAllocator::deallocate(cv::UMatData* u) const u->origdata = 0; } - if (u->userdata) - { - delete static_cast(u->userdata); - u->userdata = nullptr; - } - delete u; } diff --git a/modules/fastcv/src/blur_dsp.cpp b/modules/fastcv/src/blur_dsp.cpp index 4f59d65ded5..9c726241996 100644 --- a/modules/fastcv/src/blur_dsp.cpp +++ b/modules/fastcv/src/blur_dsp.cpp @@ -9,75 +9,6 @@ namespace cv { namespace fastcv { namespace dsp { -class FcvFilter2DLoop_Invoker : public ParallelLoopBody -{ - public: - - FcvFilter2DLoop_Invoker(const Mat& _src, Mat& _dst, const Mat& _kernel) : - ParallelLoopBody(), src(_src), dst(_dst), kernel(_kernel) - { - width = src.cols; - height = src.rows; - ksize = kernel.size().width; - halfKsize = ksize/2; - } - - virtual void operator()(const Range& range) const CV_OVERRIDE - { - int topLines = 0; - int rangeHeight = range.end-range.start; - int paddedHeight = rangeHeight; - - if(range.start >= halfKsize) - { - topLines += halfKsize; - paddedHeight += halfKsize; - } - - if(range.end <= height-halfKsize) - { - paddedHeight += halfKsize; - } - - Mat srcPadded, dstPadded; - srcPadded.allocator = cv::fastcv::getQcAllocator(); - dstPadded.allocator = cv::fastcv::getQcAllocator(); - - srcPadded = src(Rect(0, range.start - topLines, width, paddedHeight)); - dstPadded.create(paddedHeight, width, dst.depth()); - - CV_Assert(IS_FASTCV_ALLOCATED(srcPadded)); - CV_Assert(IS_FASTCV_ALLOCATED(dstPadded)); - - if (dst.depth() == CV_8U) - fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step, - dstPadded.data, dstPadded.step); - else if (dst.depth() == CV_16S) - fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize, 0, srcPadded.data, width, paddedHeight, srcPadded.step, - (int16_t*)dstPadded.data, dstPadded.step); - else if (dst.depth() == CV_32F) - fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize, srcPadded.data, width, paddedHeight, srcPadded.step, - (float32_t*)dstPadded.data, dstPadded.step); - - // Only copy center part back to output image and ignore the padded lines - Mat temp1 = dstPadded(Rect(0, topLines, width, rangeHeight)); - Mat temp2 = dst(Rect(0, range.start, width, rangeHeight)); - temp1.copyTo(temp2); - } - - private: - const Mat& src; - Mat& dst; - const Mat& kernel; - int width; - int height; - int ksize; - int halfKsize; - - FcvFilter2DLoop_Invoker(const FcvFilter2DLoop_Invoker &); // = delete; - const FcvFilter2DLoop_Invoker& operator= (const FcvFilter2DLoop_Invoker &); // = delete; -}; - void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel) { CV_Assert( @@ -103,25 +34,25 @@ void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel) // Check DSP initialization status and initialize if needed FASTCV_CHECK_DSP_INIT(); - int nThreads = getNumThreads(); - int nStripes = (nThreads > 1) ? ((src.rows > 60) ? 3 * nThreads : 1) : 1; - - if (ddepth == CV_8U && ksize.width == 3) - fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step); - switch (ddepth) { case CV_8U: + { + if(ksize.width == 3) + fcvFilterCorr3x3s8_v2Q((int8_t*)kernel.data, src.data, src.cols, src.rows, src.step, dst.data, dst.step); + else + fcvFilterCorrNxNu8Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, dst.data, dst.step); + + break; + } case CV_16S: { - CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_8S); - parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes); + fcvFilterCorrNxNu8s16Q((int8_t*)kernel.data, ksize.width, 0, src.data, src.cols, src.rows, src.step, (int16_t*)dst.data, dst.step); break; } case CV_32F: { - CV_Assert(CV_MAT_DEPTH(kernel.type()) == CV_32F); - parallel_for_(Range(0, src.rows), FcvFilter2DLoop_Invoker(src, dst, kernel), nStripes); + fcvFilterCorrNxNu8f32Q((float32_t*)kernel.data, ksize.width, src.data, src.cols, src.rows, src.step, (float32_t*)dst.data, dst.step); break; } default: @@ -131,6 +62,7 @@ void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel) break; } } + } } // dsp:: diff --git a/modules/fastcv/src/edges_dsp.cpp b/modules/fastcv/src/edges_dsp.cpp index 4c847f88155..ea121e73d04 100644 --- a/modules/fastcv/src/edges_dsp.cpp +++ b/modules/fastcv/src/edges_dsp.cpp @@ -9,7 +9,7 @@ namespace cv { namespace fastcv { namespace dsp { -void canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient) +void Canny(InputArray _src, OutputArray _dst, int lowThreshold, int highThreshold, int apertureSize, bool L2gradient) { CV_Assert( !_src.empty() && diff --git a/modules/fastcv/src/precomp.hpp b/modules/fastcv/src/precomp.hpp index 7a95f24fcd5..c5485eeff1a 100644 --- a/modules/fastcv/src/precomp.hpp +++ b/modules/fastcv/src/precomp.hpp @@ -79,8 +79,7 @@ namespace dsp { struct FastCvDspContext; #define IS_FASTCV_ALLOCATED(mat) \ - ((mat.u && mat.u->userdata && \ - *static_cast(mat.u->userdata) == "QCOM") ? true : \ + ((mat.allocator == cv::fastcv::getQcAllocator()) ? true : \ (CV_Error(cv::Error::StsBadArg, cv::format("Matrix '%s' not allocated with FastCV allocator. " \ "Please ensure that the matrix is created using " \ "cv::fastcv::getQcAllocator().", #mat)), false)) diff --git a/modules/fastcv/test/test_blur_dsp.cpp b/modules/fastcv/test/test_blur_dsp.cpp index 58c61483090..7db20aa4647 100644 --- a/modules/fastcv/test/test_blur_dsp.cpp +++ b/modules/fastcv/test/test_blur_dsp.cpp @@ -65,7 +65,7 @@ TEST_P(Filter2DTest_DSP, accuracy) } INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine( -/*image sie*/ Values(perf::szVGA, perf::sz720p, perf::sz1080p), +/*image sie*/ Values(perf::szVGA, perf::sz720p), /*dst depth*/ Values(CV_8U,CV_16S,CV_32F), /*kernel size*/ Values(3, 5, 7, 9, 11) )); diff --git a/modules/fastcv/test/test_edges_dsp.cpp b/modules/fastcv/test/test_edges_dsp.cpp index c93fb8d3b4c..2b385ffe4aa 100644 --- a/modules/fastcv/test/test_edges_dsp.cpp +++ b/modules/fastcv/test/test_edges_dsp.cpp @@ -24,7 +24,7 @@ TEST(DSP_CannyTest, accuracy) int lowThreshold = 0; int highThreshold = 150; - cv::fastcv::dsp::canny(src, dst, lowThreshold, highThreshold, 3, true); + cv::fastcv::dsp::Canny(src, dst, lowThreshold, highThreshold, 3, true); //De-Initialize DSP cv::fastcv::dsp::fcvdspdeinit(); From a0e609fbf6432a6f3dbe37e91bf5de2e9099f3a2 Mon Sep 17 00:00:00 2001 From: Aakash Preetam Date: Mon, 5 May 2025 14:21:27 +0530 Subject: [PATCH 8/8] Remove redundant break and correct typo --- modules/fastcv/src/blur_dsp.cpp | 2 -- modules/fastcv/test/test_blur_dsp.cpp | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/fastcv/src/blur_dsp.cpp b/modules/fastcv/src/blur_dsp.cpp index 9c726241996..b6147b54ba3 100644 --- a/modules/fastcv/src/blur_dsp.cpp +++ b/modules/fastcv/src/blur_dsp.cpp @@ -59,10 +59,8 @@ void filter2D(InputArray _src, OutputArray _dst, int ddepth, InputArray _kernel) { CV_Error(cv::Error::StsBadArg, cv::format("Kernel Size:%d, Dst type:%s is not supported", ksize.width, depthToString(ddepth))); - break; } } - } } // dsp:: diff --git a/modules/fastcv/test/test_blur_dsp.cpp b/modules/fastcv/test/test_blur_dsp.cpp index 7db20aa4647..2b453ba6ed7 100644 --- a/modules/fastcv/test/test_blur_dsp.cpp +++ b/modules/fastcv/test/test_blur_dsp.cpp @@ -65,7 +65,7 @@ TEST_P(Filter2DTest_DSP, accuracy) } INSTANTIATE_TEST_CASE_P(FastCV_Extension, Filter2DTest_DSP, Combine( -/*image sie*/ Values(perf::szVGA, perf::sz720p), +/*image size*/ Values(perf::szVGA, perf::sz720p), /*dst depth*/ Values(CV_8U,CV_16S,CV_32F), /*kernel size*/ Values(3, 5, 7, 9, 11) ));