From 33e616003a9b388a9b0d1cecb3f3198323c89538 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 24 Jun 2026 13:52:16 +0200 Subject: [PATCH 01/14] feat: add keypoint detection task pipeline and native JSI NMS support --- .cspell-wordlist.txt | 3 + .../cpp/extensions/cv/box_ops.cpp | 243 ++++++++++++++++++ .../cpp/extensions/cv/box_ops.h | 7 + .../cpp/extensions/cv/install.cpp | 3 + .../react-native-executorch/src/constants.ts | 40 +++ .../src/extensions/cv/ops/boxes.ts | 167 ++++++++++++ .../src/extensions/cv/ops/index.ts | 2 + .../src/extensions/cv/ops/points.ts | 47 ++++ .../extensions/cv/tasks/keypointDetection.ts | 237 +++++++++++++++++ .../src/hooks/useKeypointDetector.ts | 49 ++++ packages/react-native-executorch/src/index.ts | 2 + .../react-native-executorch/src/models.ts | 57 ++++ 12 files changed, 857 insertions(+) create mode 100644 packages/react-native-executorch/cpp/extensions/cv/box_ops.cpp create mode 100644 packages/react-native-executorch/cpp/extensions/cv/box_ops.h create mode 100644 packages/react-native-executorch/src/extensions/cv/ops/boxes.ts create mode 100644 packages/react-native-executorch/src/extensions/cv/ops/points.ts create mode 100644 packages/react-native-executorch/src/extensions/cv/tasks/keypointDetection.ts create mode 100644 packages/react-native-executorch/src/hooks/useKeypointDetector.ts diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 3a5226232a..f649158ce0 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -234,6 +234,7 @@ Amdahl Amdahl's xyxy xywh +cxcywh subfolders podspec logcat @@ -241,3 +242,5 @@ modelname optionalsize pushd popd +yolov +YOLOV diff --git a/packages/react-native-executorch/cpp/extensions/cv/box_ops.cpp b/packages/react-native-executorch/cpp/extensions/cv/box_ops.cpp new file mode 100644 index 0000000000..89a8385e2a --- /dev/null +++ b/packages/react-native-executorch/cpp/extensions/cv/box_ops.cpp @@ -0,0 +1,243 @@ +#include "box_ops.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/dtype.h" +#include "core/tensor.h" + +namespace rnexecutorch::extensions::cv::box_ops { +namespace jsi = facebook::jsi; +using TensorHostObject = rnexecutorch::core::tensor::TensorHostObject; + +namespace { +enum class BoxFormat { + XYXY, + XYWH, + CXCYWH +}; + +BoxFormat parseBoxFormat(const std::string &s) { + if (s == "xyxy") { + return BoxFormat::XYXY; + } else if (s == "xywh") { + return BoxFormat::XYWH; + } else if (s == "cxcywh") { + return BoxFormat::CXCYWH; + } + throw std::invalid_argument("unsupported boxFormat '" + s + "'"); +} + +enum class NmsType { + Standard, + Weighted +}; + +NmsType parseNmsType(const std::string &s) { + if (s == "standard") { + return NmsType::Standard; + } else if (s == "weighted") { + return NmsType::Weighted; + } + throw std::invalid_argument("unsupported nmsType '" + s + "'"); +} + +std::array decodeToXyxy( + float a, float b, float c, float d, + BoxFormat format) { + switch (format) { + case BoxFormat::XYXY: + return {a, b, c, d}; + case BoxFormat::XYWH: + return {a, b, a + c, b + d}; + case BoxFormat::CXCYWH: + return {a - c / 2.0f, b - d / 2.0f, a + c / 2.0f, b + d / 2.0f}; + } +} +} // namespace + +void install_nms(jsi::Runtime &rt, jsi::Object &module) { + auto name = "nms"; + auto fnBody = [](jsi::Runtime &rt, const jsi::Value &thisVal, const jsi::Value *args, size_t count) -> jsi::Value { + if (count < 3) { + throw jsi::JSError(rt, "Usage: nms(boxes, scores, options)"); + } + + if (!args[0].isObject() || !args[0].asObject(rt).isHostObject(rt) || + !args[1].isObject() || !args[1].asObject(rt).isHostObject(rt)) { + throw jsi::JSError(rt, "nms: boxes and scores must be Tensors"); + } + + if (!args[2].isObject()) { + throw jsi::JSError(rt, "nms: options must be an object"); + } + + auto boxes = args[0].asObject(rt).getHostObject(rt); + auto scores = args[1].asObject(rt).getHostObject(rt); + auto opts = args[2].asObject(rt); + + if (boxes.get() == scores.get()) { + throw jsi::JSError(rt, "nms: boxes and scores cannot be the same tensor."); + } + + if (!opts.hasProperty(rt, "iouThreshold") || + !opts.hasProperty(rt, "boxFormat") || + !opts.hasProperty(rt, "confidenceThreshold") || + !opts.hasProperty(rt, "nmsType")) { + throw jsi::JSError(rt, "nms: options must specify iouThreshold, boxFormat, confidenceThreshold, and nmsType"); + } + + float iouThreshold = static_cast(opts.getProperty(rt, "iouThreshold").asNumber()); + float confidenceThreshold = static_cast(opts.getProperty(rt, "confidenceThreshold").asNumber()); + + std::string nmsTypeStr = opts.getProperty(rt, "nmsType").asString(rt).utf8(rt); + std::string boxFormatStr = opts.getProperty(rt, "boxFormat").asString(rt).utf8(rt); + + NmsType nmsType; + BoxFormat boxFormat; + try { + nmsType = parseNmsType(nmsTypeStr); + boxFormat = parseBoxFormat(boxFormatStr); + } catch (const std::invalid_argument &e) { + throw jsi::JSError(rt, "nms: " + std::string(e.what())); + } + + std::shared_lock boxesLock(boxes->mutex_, std::try_to_lock); + std::shared_lock scoresLock(scores->mutex_, std::try_to_lock); + + if (!boxesLock.owns_lock() || !scoresLock.owns_lock()) { + throw jsi::JSError(rt, "nms: one of the tensors is currently locked"); + } + + if (!boxes->data_ || !scores->data_) { + throw jsi::JSError(rt, "nms: tensors must not be disposed"); + } + + if (scores->shape_.size() != 1) { + throw jsi::JSError(rt, "nms: scores must be a 1D tensor with shape [N]"); + } + std::int32_t numAnchors = scores->shape_[0]; + + if (boxes->shape_.size() != 2 || boxes->shape_[1] != 4) { + throw jsi::JSError(rt, "nms: boxes must be a 2D tensor with shape [N, 4]"); + } + + if (boxes->shape_[0] != numAnchors) { + throw jsi::JSError(rt, "nms: boxes and scores must have the same number of elements"); + } + + if (boxes->dtype_ != rnexecutorch::core::types::DType::float32 || scores->dtype_ != rnexecutorch::core::types::DType::float32) { + throw jsi::JSError(rt, "nms: boxes and scores must have dtype float32"); + } + + const float *boxesPtr = reinterpret_cast(boxes->data_.get()); + const float *scoresPtr = reinterpret_cast(scores->data_.get()); + + std::vector> candidates; + candidates.reserve(numAnchors); + + for (size_t idx = 0; idx < numAnchors; ++idx) { + float score = scoresPtr[idx]; + + if (score >= confidenceThreshold) { + candidates.push_back({idx, score}); + } + } + + if (candidates.empty()) { + return jsi::Array(rt, 0); + } + + std::ranges::sort(candidates, [](const auto &lhs, const auto &rhs) { return lhs.second > rhs.second; }); + + std::vector> groups; + std::vector suppressed(candidates.size(), false); + + for (size_t i = 0; i < candidates.size(); ++i) { + if (suppressed[i]) { + continue; + } + + std::int32_t idxI = candidates[i].first; + + auto [xminA, yminA, xmaxA, ymaxA] = decodeToXyxy( + boxesPtr[idxI * 4 + 0], + boxesPtr[idxI * 4 + 1], + boxesPtr[idxI * 4 + 2], + boxesPtr[idxI * 4 + 3], + boxFormat); + + float areaA = (xmaxA - xminA) * (ymaxA - yminA); + + std::vector overlapping = {idxI}; + + for (size_t j = i + 1; j < candidates.size(); ++j) { + if (suppressed[j]) { + continue; + } + + std::int32_t idxJ = candidates[j].first; + + auto [xminB, yminB, xmaxB, ymaxB] = decodeToXyxy( + boxesPtr[idxJ * 4 + 0], + boxesPtr[idxJ * 4 + 1], + boxesPtr[idxJ * 4 + 2], + boxesPtr[idxJ * 4 + 3], + boxFormat); + + float areaB = (xmaxB - xminB) * (ymaxB - yminB); + + float interYMin = std::max(yminA, yminB); + float interXMin = std::max(xminA, xminB); + float interYMax = std::min(ymaxA, ymaxB); + float interXMax = std::min(xmaxA, xmaxB); + + float interH = std::max(0.0f, interYMax - interYMin); + float interW = std::max(0.0f, interXMax - interXMin); + float intersection = interH * interW; + + float unionArea = areaA + areaB - intersection; + float iou = (unionArea > 0.0f) ? (intersection / unionArea) : 0.0f; + + if (iou > iouThreshold) { + if (nmsType == NmsType::Weighted) { + overlapping.push_back(idxJ); + } + suppressed[j] = true; + } + } + + groups.push_back(std::move(overlapping)); + } + + switch (nmsType) { + case NmsType::Standard: { + jsi::Array result = jsi::Array(rt, groups.size()); + for (size_t i = 0; i < groups.size(); ++i) { + result.setValueAtIndex(rt, i, jsi::Value(static_cast(groups[i][0]))); + } + return result; + } + case NmsType::Weighted: { + jsi::Array resultGroups = jsi::Array(rt, groups.size()); + for (size_t i = 0; i < groups.size(); ++i) { + jsi::Array singleGroup = jsi::Array(rt, groups[i].size()); + for (size_t j = 0; j < groups[i].size(); ++j) { + singleGroup.setValueAtIndex(rt, j, jsi::Value(static_cast(groups[i][j]))); + } + resultGroups.setValueAtIndex(rt, i, singleGroup); + } + return resultGroups; + } + } + }; + + module.setProperty(rt, name, jsi::Function::createFromHostFunction(rt, jsi::PropNameID::forAscii(rt, name), 3, fnBody)); +} +} // namespace rnexecutorch::extensions::cv::box_ops diff --git a/packages/react-native-executorch/cpp/extensions/cv/box_ops.h b/packages/react-native-executorch/cpp/extensions/cv/box_ops.h new file mode 100644 index 0000000000..7e001b67b7 --- /dev/null +++ b/packages/react-native-executorch/cpp/extensions/cv/box_ops.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +namespace rnexecutorch::extensions::cv::box_ops { +void install_nms(facebook::jsi::Runtime &rt, facebook::jsi::Object &module); +} // namespace rnexecutorch::extensions::cv::box_ops diff --git a/packages/react-native-executorch/cpp/extensions/cv/install.cpp b/packages/react-native-executorch/cpp/extensions/cv/install.cpp index f559c68552..ae370a9046 100644 --- a/packages/react-native-executorch/cpp/extensions/cv/install.cpp +++ b/packages/react-native-executorch/cpp/extensions/cv/install.cpp @@ -1,4 +1,5 @@ #include "install.h" +#include "box_ops.h" #include "image_ops.h" namespace rnexecutorch::extensions::cv { @@ -14,6 +15,8 @@ void install(facebook::jsi::Runtime &rt, facebook::jsi::Object &module) { image_ops::install_normalize(rt, cvModule); image_ops::install_applyColormap(rt, cvModule); + box_ops::install_nms(rt, cvModule); + module.setProperty(rt, "cv", cvModule); } } // namespace rnexecutorch::extensions::cv diff --git a/packages/react-native-executorch/src/constants.ts b/packages/react-native-executorch/src/constants.ts index 55ce8d9993..a1c671d702 100644 --- a/packages/react-native-executorch/src/constants.ts +++ b/packages/react-native-executorch/src/constants.ts @@ -1055,3 +1055,43 @@ export const IMAGENET_NORM = { alpha: [1 / (255.0 * 0.229), 1 / (255.0 * 0.224), 1 / (255.0 * 0.225)], beta: [-0.485 / 0.229, -0.456 / 0.224, -0.406 / 0.225], } as const; + +/** + * BlazeFace landmarks list. + * @category Constants + */ +export const BLAZEFACE_LANDMARKS = [ + 'leftEye', + 'rightEye', + 'noseTip', + 'mouthCenter', + 'leftEar', + 'rightEar', +] as const; + +/** + * COCO human pose landmarks list. + * @category Constants + */ +export const COCO_LANDMARKS = [ + 'nose', + 'leftEye', + 'rightEye', + 'leftEar', + 'rightEar', + 'leftShoulder', + 'rightShoulder', + 'leftElbow', + 'rightElbow', + 'leftWrist', + 'rightWrist', + 'leftHip', + 'rightHip', + 'leftKnee', + 'rightKnee', + 'leftAnkle', + 'rightAnkle', +] as const; + +export type BlazeFaceLandmark = (typeof BLAZEFACE_LANDMARKS)[number]; +export type CocoLandmark = (typeof COCO_LANDMARKS)[number]; diff --git a/packages/react-native-executorch/src/extensions/cv/ops/boxes.ts b/packages/react-native-executorch/src/extensions/cv/ops/boxes.ts new file mode 100644 index 0000000000..90aaeefeb1 --- /dev/null +++ b/packages/react-native-executorch/src/extensions/cv/ops/boxes.ts @@ -0,0 +1,167 @@ +import { rnexecutorchJsi } from '../../../native/bridge'; +import type { Tensor } from '../../../core/tensor'; +import type { ResizeMode } from './image'; +import { scalePoint } from './points'; + +/** + * Mapping of bounding box formats to their coordinate representations. + * @category Types + */ +export type BoxMap = { + xyxy: { xmin: number; ymin: number; xmax: number; ymax: number }; + xywh: { xmin: number; ymin: number; w: number; h: number }; + cxcywh: { cx: number; cy: number; w: number; h: number }; +}; + +/** + * The formats of bounding boxes. + * @category Types + */ +export type BoxFormat = keyof BoxMap; + +/** + * Representation of a bounding box under a specific format. + * @category Types + */ +export type BoundingBox = F extends any + ? { readonly format: F } & Readonly + : never; + +/** + * Decodes bounding box coordinates from a 4-tuple into a structured BoundingBox + * object. + * @category Utils + * @typeParam F Bounding box coordinate format. + * @param tuple A 4-tuple array containing coordinates. + * @param format The coordinate format to decode into. + * @returns The decoded BoundingBox object. + */ +export function decodeBox( + tuple: [number, number, number, number], + format: F +): BoundingBox { + 'worklet'; + const [a, b, c, d] = tuple; + switch (format) { + case 'xyxy': + return { format: 'xyxy', xmin: a, ymin: b, xmax: c, ymax: d } as BoundingBox; + case 'xywh': + return { format: 'xywh', xmin: a, ymin: b, w: c, h: d } as BoundingBox; + case 'cxcywh': + return { format: 'cxcywh', cx: a, cy: b, w: c, h: d } as BoundingBox; + } +} + +/** + * Scales bounding box coordinates based on scaling options and resize modes. + * @category Utils + * @typeParam F Bounding box coordinate format. + * @param box The original BoundingBox. + * @param opts Options defining dimensions and resize modes. + * @param opts.from The source bounds (e.g. model input dimensions). + * @param opts.to The destination bounds (e.g. original image dimensions). + * @param opts.resizeMode The mode used to resize the image ('letterbox' or + * 'stretch'). + * @returns The scaled BoundingBox object. + */ +export function scaleBox( + box: BoundingBox, + opts: { + readonly from: { readonly width: number; readonly height: number }; + readonly to: { readonly width: number; readonly height: number }; + readonly resizeMode: Exclude; + } +): BoundingBox { + 'worklet'; + const { from, to, resizeMode } = opts; + + let scaleX: number; + let scaleY: number; + switch (resizeMode) { + case 'letterbox': { + const scale = Math.min(from.width / to.width, from.height / to.height); + scaleX = scale; + scaleY = scale; + break; + } + case 'stretch': + scaleX = from.width / to.width; + scaleY = from.height / to.height; + break; + } + + switch (box.format) { + case 'xyxy': { + const pMin = scalePoint({ x: box.xmin, y: box.ymin }, opts); + const pMax = scalePoint({ x: box.xmax, y: box.ymax }, opts); + return { + format: 'xyxy', + xmin: pMin.x, + ymin: pMin.y, + xmax: pMax.x, + ymax: pMax.y, + } as BoundingBox; + } + case 'xywh': { + const pMin = scalePoint({ x: box.xmin, y: box.ymin }, opts); + return { + format: 'xywh', + xmin: pMin.x, + ymin: pMin.y, + w: box.w / scaleX, + h: box.h / scaleY, + } as BoundingBox; + } + case 'cxcywh': { + const pCenter = scalePoint({ x: box.cx, y: box.cy }, opts); + return { + format: 'cxcywh', + cx: pCenter.x, + cy: pCenter.y, + w: box.w / scaleX, + h: box.h / scaleY, + } as BoundingBox; + } + } +} + +/** + * Options for Non-Maximum Suppression (NMS). + * @category Types + */ +export type NmsOptions = { + readonly boxFormat: BoxFormat; + readonly iouThreshold: number; + readonly confidenceThreshold: number; + readonly nmsType: 'standard' | 'weighted'; +}; + +/** + * Executes Non-Maximum Suppression (NMS) on bounding boxes and confidence + * scores. + * @category Utils + * @param boxes Bounding boxes coordinate tensor. + * @param scores Bounding boxes confidence scores tensor. + * @param opts Options configure NMS thresholds and execution mode. + * @returns The resulting indices of the non-suppressed boxes: + * - For `standard` NMS: A 1D array of indices (`number[]`) representing the + * selected boxes. + * - For `weighted` NMS: A 2D array of indices (`number[][]`) representing + * groups of overlapping boxes, where the first element of each group is the + * top candidate and the group indices are used to calculate the weighted + * average of coordinates. + */ +export function nms( + boxes: Tensor, + scores: Tensor, + opts: NmsOptions & { readonly nmsType: 'standard' } +): number[]; +export function nms( + boxes: Tensor, + scores: Tensor, + opts: NmsOptions & { readonly nmsType: 'weighted' } +): number[][]; +export function nms(boxes: Tensor, scores: Tensor, opts: NmsOptions): number[] | number[][] { + 'worklet'; + return rnexecutorchJsi.cv.nms(boxes, scores, opts); +} diff --git a/packages/react-native-executorch/src/extensions/cv/ops/index.ts b/packages/react-native-executorch/src/extensions/cv/ops/index.ts index d0492fef8b..84a274101d 100644 --- a/packages/react-native-executorch/src/extensions/cv/ops/index.ts +++ b/packages/react-native-executorch/src/extensions/cv/ops/index.ts @@ -1 +1,3 @@ export * as image from './image'; +export * as boxes from './boxes'; +export * as points from './points'; diff --git a/packages/react-native-executorch/src/extensions/cv/ops/points.ts b/packages/react-native-executorch/src/extensions/cv/ops/points.ts new file mode 100644 index 0000000000..4464061e18 --- /dev/null +++ b/packages/react-native-executorch/src/extensions/cv/ops/points.ts @@ -0,0 +1,47 @@ +import type { ResizeMode } from './image'; + +/** + * Represents a 2D coordinate point with x and y values. + * @category Types + */ +export type Point = { + readonly x: number; + readonly y: number; +}; + +/** + * Helper function to scale a 2D point based on resize mode and resolution + * changes. + * @category Utils + * @param point The original coordinate point to scale. + * @param opts Options detailing the scaling factors and resize mode. + * @param opts.from The source bounds (e.g. model input dimensions). + * @param opts.to The destination bounds (e.g. original image dimensions). + * @param opts.resizeMode The mode used to resize the image ('letterbox' or + * 'stretch'). + * @returns The scaled coordinate point. + */ +export function scalePoint( + point: Point, + opts: { + readonly from: { readonly width: number; readonly height: number }; + readonly to: { readonly width: number; readonly height: number }; + readonly resizeMode: Exclude; + } +): Point { + 'worklet'; + const { from, to, resizeMode } = opts; + switch (resizeMode) { + case 'letterbox': { + const scale = Math.min(from.width / to.width, from.height / to.height); + const offsetX = (from.width - to.width * scale) / 2.0; + const offsetY = (from.height - to.height * scale) / 2.0; + return { x: (point.x - offsetX) / scale, y: (point.y - offsetY) / scale }; + } + case 'stretch': { + const scaleX = from.width / to.width; + const scaleY = from.height / to.height; + return { x: point.x / scaleX, y: point.y / scaleY }; + } + } +} diff --git a/packages/react-native-executorch/src/extensions/cv/tasks/keypointDetection.ts b/packages/react-native-executorch/src/extensions/cv/tasks/keypointDetection.ts new file mode 100644 index 0000000000..d4d7a4f052 --- /dev/null +++ b/packages/react-native-executorch/src/extensions/cv/tasks/keypointDetection.ts @@ -0,0 +1,237 @@ +import type { WorkletRuntime } from 'react-native-worklets'; + +import { tensor, type Tensor } from '../../../core/tensor'; +import { loadModel } from '../../../core/model'; +import { validateModelSchema, SymbolicTensor } from '../../../core/modelSchema'; +import { wrapAsync } from '../../../core/runtime'; + +import type { ImageBuffer } from '../image'; +import { createImagePreprocessor, type ImagePreprocessorOptions } from './preprocessing'; + +import type { ResizeMode } from '../ops/image'; +import { scalePoint, type Point } from '../ops/points'; +import { nms, type BoundingBox, type BoxFormat, decodeBox, scaleBox } from '../ops/boxes'; + +export type { BoxFormat }; + +/** + * Options for configuring a keypoint detector runner. + * @category Types + */ +export type KeypointDetectorOptions = Omit< + ImagePreprocessorOptions, + 'resizeMode' +> & { + readonly resizeMode: Exclude; + readonly boxFormat: F; + readonly landmarks: readonly L[]; + readonly defaultIouThreshold: number; + readonly defaultConfidenceThreshold: number; +}; + +/** + * Model configuration required to instantiate a keypoint detector task runner. + * @category Types + */ +export type KeypointDetectorModel = { + readonly modelPath: string; + readonly opts: KeypointDetectorOptions; +}; + +/** + * Plural landmarks mapped by their names to coordinates and detection + * confidence. + * @category Types + */ +export type Landmarks = Record; + +/** + * Result structure representing a single detected bounding box and its + * associated landmarks. + * @category Types + */ +export type KeypointDetection = { + readonly box: BoundingBox; + readonly confidence: number; + readonly landmarks: Landmarks; +}; + +/** + * Post-processes model outputs by applying Non-Maximum Suppression (NMS) and + * scaling coordinates. + * @category Utils + * @param tBoxes Bounding boxes tensor output from inference. + * @param tScores Scores tensor output from inference. + * @param tKeypoints Keypoints tensor output from inference. + * @param opts Post-processing configuration options. + * @returns Structured keypoint detection results list. + */ +function postprocess( + tBoxes: Tensor, + tScores: Tensor, + tKeypoints: Tensor, + opts: { + readonly from: { readonly width: number; readonly height: number }; + readonly to: { readonly width: number; readonly height: number }; + readonly boxFormat: F; + readonly landmarks: readonly L[]; + readonly iouThreshold: number; + readonly confidenceThreshold: number; + readonly resizeMode: Exclude; + } +): KeypointDetection[] { + 'worklet'; + + const nmsGroups = nms(tBoxes, tScores, { ...opts, nmsType: 'weighted' }); + + const boxes = tBoxes.getData(new Float32Array(tBoxes.numel)); + const scores = tScores.getData(new Float32Array(tScores.numel)); + const keypoints = tKeypoints.getData(new Float32Array(tKeypoints.numel)); + + const results: KeypointDetection[] = []; + + for (const group of nmsGroups) { + const totalScore = group.reduce((total, idx) => total + (scores[idx] ?? 0), 0); + const weightedBox = new Float32Array(4); + const weightedKpt = new Float32Array(opts.landmarks.length * 3); + + for (const idx of group) { + const score = scores[idx]!; + weightedBox.forEach((v, i) => { + weightedBox[i] = v + score * boxes[idx * 4 + i]!; + }); + weightedKpt.forEach((v, i) => { + weightedKpt[i] = v + score * keypoints[idx * opts.landmarks.length * 3 + i]!; + }); + } + + weightedBox.forEach((v, i) => { + weightedBox[i] = v / totalScore; + }); + weightedKpt.forEach((v, i) => { + weightedKpt[i] = v / totalScore; + }); + + const [a, b, c, d] = weightedBox; + const box = scaleBox(decodeBox([a!, b!, c!, d!], opts.boxFormat), opts); + const landmarks = {} as Landmarks; + + for (const [i, key] of opts.landmarks.entries()) { + const point = scalePoint({ x: weightedKpt[i * 3]!, y: weightedKpt[i * 3 + 1]! }, opts); + const confidence = weightedKpt[i * 3 + 2]!; + landmarks[key] = { ...point, confidence }; + } + + results.push({ box, confidence: totalScore / group.length, landmarks }); + } + + return results; +} + +/** + * Creates an image keypoint detector runner for executing local Keypoint/Pose + * Detection models. + * + * It validates model inputs and output shapes (bounding boxes, confidence + * scores, and landmark coordinates), pre-allocates execution tensors, setups + * preprocessing, and sets up lifecycle disposals. + * @category Typescript API + * @typeParam F The bounding box format. + * @typeParam L The landmark labels type. + * @param config Keypoint task configuration containing path and options. + * @param runtime Optional worklet runtime thread on which to run the model + * execution. + * @returns A promise resolving to an object containing keypoint detection and + * disposal bindings. + */ +export async function createKeypointDetector( + config: KeypointDetectorModel, + runtime?: WorkletRuntime +): Promise<{ + /** + * Releases all allocated native resources. + */ + dispose: () => void; + /** + * Performs asynchronous keypoint and bounding box detection on the given + * input image. + * @param input The input image buffer. + * @param options Configuration options for keypoint detection. + * @param options.confidenceThreshold Minimum confidence score for a + * detection. If omitted, uses the model default. + * @param options.iouThreshold Intersection over Union (IoU) threshold for + * NMS. If omitted, uses the model default. + * @returns A promise resolving to the list of keypoint detections. + */ + detectKeypoints: ( + input: ImageBuffer, + options?: { confidenceThreshold?: number; iouThreshold?: number } + ) => Promise[]>; + /** + * Synchronous version of {@link detectKeypoints} to be executed directly on + * the caller or worklet thread. + */ + detectKeypointsWorklet: ( + input: ImageBuffer, + options?: { confidenceThreshold?: number; iouThreshold?: number } + ) => KeypointDetection[]; +}> { + const { modelPath, opts } = config; + const { landmarks } = opts; + const model = await wrapAsync(loadModel, runtime)(modelPath); + const meta = validateModelSchema( + model, + 'forward', + [SymbolicTensor('float32', [1, 3, 'H', 'W'])], + [ + SymbolicTensor('float32', ['N', 4]), + SymbolicTensor('float32', ['N']), + SymbolicTensor('float32', ['N', landmarks.length, 3]), + ] + ); + + const inpShape = meta.inputTensorMeta[0]!.shape; + const numAnchors = meta.outputTensorMeta[0]!.shape[0]!; + + const targetH = inpShape.at(-2)!; + const targetW = inpShape.at(-1)!; + + const tensors = [ + tensor('float32', [numAnchors, 4]), + tensor('float32', [numAnchors]), + tensor('float32', [numAnchors, landmarks.length, 3]), + ] as const; + + const [tBoxes, tScores, tKeypoints] = tensors; + const preprocessor = createImagePreprocessor(opts, inpShape); + + const dispose = () => { + preprocessor.dispose(); + tensors.forEach((t) => t.dispose()); + model.dispose(); + }; + + const detectKeypointsWorklet = ( + input: ImageBuffer, + options?: { confidenceThreshold?: number; iouThreshold?: number } + ): KeypointDetection[] => { + 'worklet'; + const tInput = preprocessor.process(input); + model.execute('forward', [tInput], [tBoxes, tScores, tKeypoints]); + + const iouThreshold = options?.iouThreshold ?? opts.defaultIouThreshold; + const confidenceThreshold = options?.confidenceThreshold ?? opts.defaultConfidenceThreshold; + + return postprocess(tBoxes, tScores, tKeypoints, { + ...opts, + iouThreshold, + confidenceThreshold, + from: { width: targetW, height: targetH }, + to: { width: input.width, height: input.height }, + }); + }; + + const detectKeypoints = wrapAsync(detectKeypointsWorklet, runtime); + + return { detectKeypoints, detectKeypointsWorklet, dispose }; +} diff --git a/packages/react-native-executorch/src/hooks/useKeypointDetector.ts b/packages/react-native-executorch/src/hooks/useKeypointDetector.ts new file mode 100644 index 0000000000..6da33eb842 --- /dev/null +++ b/packages/react-native-executorch/src/hooks/useKeypointDetector.ts @@ -0,0 +1,49 @@ +import { useModel } from './useModel'; +import { useResourceDownload } from './useResourceDownload'; +import { + createKeypointDetector, + type KeypointDetectorModel, + type BoxFormat, +} from '../extensions/cv/tasks/keypointDetection'; + +/** + * React hook to load and run a keypoint detection model. + * + * This hook manages downloading (if it's a remote URL) and loading the model + * file, compiling it, tracking download progress and compilation errors, and + * cleaning up native model memory when the component unmounts or configuration + * changes. + * @category Hooks + * @typeParam F The bounding box format. + * @typeParam L The landmark labels type. + * @param config The keypoint detection model configuration. + * @param options Hook options. + * @param options.preventLoad If true, prevents downloading and compiling the + * model. + * @returns An object containing the model's loading state, error, download + * progress, and keypoint detection functions. + */ +export function useKeypointDetector( + config: KeypointDetectorModel, + options?: { preventLoad?: boolean } +) { + const { localPath, downloadProgress, downloadError } = useResourceDownload( + config.modelPath, + options?.preventLoad + ); + const { model, error } = useModel( + createKeypointDetector, + localPath ? { ...config, modelPath: localPath } : null, + [localPath] + ); + + return { + isReady: !!model, + error: downloadError || error, + downloadProgress, + localPath, + landmarks: config.opts.landmarks, + detectKeypoints: model?.detectKeypoints, + detectKeypointsWorklet: model?.detectKeypointsWorklet, + }; +} diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts index c955c8a3d0..48b8517942 100644 --- a/packages/react-native-executorch/src/index.ts +++ b/packages/react-native-executorch/src/index.ts @@ -5,6 +5,7 @@ export * from './hooks/useSemanticSegmenter'; export * from './hooks/useTokenizer'; export * from './hooks/useResourceDownload'; export * from './hooks/useModel'; +export * from './hooks/useKeypointDetector'; // Constants export { models } from './models'; @@ -14,6 +15,7 @@ export * as constants from './constants'; export * from './extensions/cv/tasks/classification'; export * from './extensions/cv/tasks/styleTransfer'; export * from './extensions/cv/tasks/semanticSegmentation'; +export * from './extensions/cv/tasks/keypointDetection'; export * from './extensions/nlp/tasks/tokenization'; // Core primitives — for library builders and power users diff --git a/packages/react-native-executorch/src/models.ts b/packages/react-native-executorch/src/models.ts index ca5ee8b658..585e50fd28 100644 --- a/packages/react-native-executorch/src/models.ts +++ b/packages/react-native-executorch/src/models.ts @@ -1,12 +1,17 @@ import type { ClassifierModel } from './extensions/cv/tasks/classification'; import type { StyleTransferModel } from './extensions/cv/tasks/styleTransfer'; import type { SemanticSegmentationModel } from './extensions/cv/tasks/semanticSegmentation'; +import type { KeypointDetectorModel } from './extensions/cv/tasks/keypointDetection'; import { IMAGENET_NORM, IMAGENET1K_LABELS, PASCAL_VOC_LABELS, + BLAZEFACE_LANDMARKS, + COCO_LANDMARKS, type ImageNet1KLabel, type PascalVocLabel, + type BlazeFaceLandmark, + type CocoLandmark, } from './constants'; const BASE_URL = 'https://huggingface.co/software-mansion/react-native-executorch'; @@ -200,6 +205,46 @@ const FCN_RESNET101_XNNPACK_INT8: SemanticSegmentationModel = { opts: FCN_OPTS, }; +// ============================================================================= +// Keypoint Detection +// ============================================================================= +const BLAZEFACE_XNNPACK_FP32: KeypointDetectorModel<'xyxy', BlazeFaceLandmark> = { + modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/blazeface_xnnpack_fp32.pte`, + opts: { + boxFormat: 'xyxy', + resizeMode: 'letterbox', + interpolation: 'linear', + alpha: 1 / 127.5, + beta: -1.0, + defaultIouThreshold: 0.3, + defaultConfidenceThreshold: 0.75, + landmarks: BLAZEFACE_LANDMARKS, + }, +}; + +const YOLOV8N_POSE_OPTS = { + boxFormat: 'xyxy' as const, + resizeMode: 'letterbox' as const, + interpolation: 'linear' as const, + alpha: 1 / 255.0, + beta: 0.0, + defaultIouThreshold: 0.7, + defaultConfidenceThreshold: 0.25, + landmarks: COCO_LANDMARKS, +}; +const YOLOV8N_POSE_384_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_384_xnnpack_fp32.pte`, + opts: YOLOV8N_POSE_OPTS, +}; +const YOLOV8N_POSE_512_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_512_xnnpack_fp32.pte`, + opts: YOLOV8N_POSE_OPTS, +}; +const YOLOV8N_POSE_640_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_640_xnnpack_fp32.pte`, + opts: YOLOV8N_POSE_OPTS, +}; + // ============================================================================= // Tokenizers // ============================================================================= @@ -288,6 +333,18 @@ export const models = { XNNPACK_INT8: FCN_RESNET101_XNNPACK_INT8, }, }, + keypointDetection: { + BLAZEFACE: { + ...BLAZEFACE_XNNPACK_FP32, + XNNPACK_FP32: BLAZEFACE_XNNPACK_FP32, + }, + YOLOV8N_POSE: { + ...YOLOV8N_POSE_384_XNNPACK_FP32, + SIZE_384: { XNNPACK_FP32: YOLOV8N_POSE_384_XNNPACK_FP32 }, + SIZE_512: { XNNPACK_FP32: YOLOV8N_POSE_512_XNNPACK_FP32 }, + SIZE_640: { XNNPACK_FP32: YOLOV8N_POSE_640_XNNPACK_FP32 }, + }, + }, tokenizer: { ALL_MINILM_L6_V2: ALL_MINILM_L6_V2_TOKENIZER, }, From f24e72819f6ab0cc32dd2cbbaa18fdf4eca76e18 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 24 Jun 2026 14:21:18 +0200 Subject: [PATCH 02/14] style: change the order of exports --- packages/react-native-executorch/src/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/react-native-executorch/src/index.ts b/packages/react-native-executorch/src/index.ts index 48b8517942..7ec0f0c31d 100644 --- a/packages/react-native-executorch/src/index.ts +++ b/packages/react-native-executorch/src/index.ts @@ -2,10 +2,10 @@ export * from './hooks/useClassifier'; export * from './hooks/useStyleTransfer'; export * from './hooks/useSemanticSegmenter'; +export * from './hooks/useKeypointDetector'; export * from './hooks/useTokenizer'; export * from './hooks/useResourceDownload'; export * from './hooks/useModel'; -export * from './hooks/useKeypointDetector'; // Constants export { models } from './models'; From 36aa8cdf2ee7955bbc1a5bade794d81548450373 Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 24 Jun 2026 16:24:49 +0200 Subject: [PATCH 03/14] chore: update keypoint model options and repository links in models.ts --- .../react-native-executorch/src/models.ts | 34 +++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/packages/react-native-executorch/src/models.ts b/packages/react-native-executorch/src/models.ts index 585e50fd28..cebbdd5aac 100644 --- a/packages/react-native-executorch/src/models.ts +++ b/packages/react-native-executorch/src/models.ts @@ -233,18 +233,40 @@ const YOLOV8N_POSE_OPTS = { landmarks: COCO_LANDMARKS, }; const YOLOV8N_POSE_384_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { - modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_384_xnnpack_fp32.pte`, + modelPath: `${BASE_URL}-yolov8n-pose/${NEXT_VERSION_TAG}/xnnpack/yolov8n_pose_384_xnnpack_fp32.pte`, opts: YOLOV8N_POSE_OPTS, }; const YOLOV8N_POSE_512_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { - modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_512_xnnpack_fp32.pte`, + modelPath: `${BASE_URL}-yolov8n-pose/${NEXT_VERSION_TAG}/xnnpack/yolov8n_pose_512_xnnpack_fp32.pte`, opts: YOLOV8N_POSE_OPTS, }; const YOLOV8N_POSE_640_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { - modelPath: `https://huggingface.co/bhanc/scratch/resolve/main/yolov8n_pose_640_xnnpack_fp32.pte`, + modelPath: `${BASE_URL}-yolov8n-pose/${NEXT_VERSION_TAG}/xnnpack/yolov8n_pose_640_xnnpack_fp32.pte`, opts: YOLOV8N_POSE_OPTS, }; +const RFDETR_KEYPOINT_OPTS = { + boxFormat: 'xyxy' as const, + resizeMode: 'stretch' as const, + interpolation: 'linear' as const, + ...IMAGENET_NORM, + defaultIouThreshold: 0.55, + defaultConfidenceThreshold: 0.5, + landmarks: COCO_LANDMARKS, +}; +const RFDETR_KEYPOINT_XNNPACK_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `${BASE_URL}-rfdetr-keypoint/${VERSION_TAG}/preview/xnnpack/rfdetr_keypoint_preview_xnnpack_fp32.pte`, + opts: RFDETR_KEYPOINT_OPTS, +}; +const RFDETR_KEYPOINT_COREML_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `${BASE_URL}-rfdetr-keypoint/${VERSION_TAG}/preview/coreml/rfdetr_keypoint_preview_coreml_fp32.pte`, + opts: RFDETR_KEYPOINT_OPTS, +}; +const RFDETR_KEYPOINT_MLX_FP32: KeypointDetectorModel<'xyxy', CocoLandmark> = { + modelPath: `${BASE_URL}-rfdetr-keypoint/${VERSION_TAG}/preview/mlx/rfdetr_keypoint_preview_mlx_fp32.pte`, + opts: RFDETR_KEYPOINT_OPTS, +}; + // ============================================================================= // Tokenizers // ============================================================================= @@ -344,6 +366,12 @@ export const models = { SIZE_512: { XNNPACK_FP32: YOLOV8N_POSE_512_XNNPACK_FP32 }, SIZE_640: { XNNPACK_FP32: YOLOV8N_POSE_640_XNNPACK_FP32 }, }, + RFDETR_KEYPOINT: { + ...RFDETR_KEYPOINT_XNNPACK_FP32, + XNNPACK_FP32: RFDETR_KEYPOINT_XNNPACK_FP32, + COREML_FP32: RFDETR_KEYPOINT_COREML_FP32, + MLX_FP32: RFDETR_KEYPOINT_MLX_FP32, + }, }, tokenizer: { ALL_MINILM_L6_V2: ALL_MINILM_L6_V2_TOKENIZER, From 80cff4c25becff932fc9c4547f8fa085947227ce Mon Sep 17 00:00:00 2001 From: Bartosz Hanc Date: Wed, 24 Jun 2026 16:30:38 +0200 Subject: [PATCH 04/14] feat(computer-vision): add common BoundingBox component and use in keypoint screen --- apps/computer-vision/app/keypoint/index.tsx | 277 ++++++++++++++++++ .../components/BoundingBox.tsx | 68 +++++ 2 files changed, 345 insertions(+) create mode 100644 apps/computer-vision/app/keypoint/index.tsx create mode 100644 apps/computer-vision/components/BoundingBox.tsx diff --git a/apps/computer-vision/app/keypoint/index.tsx b/apps/computer-vision/app/keypoint/index.tsx new file mode 100644 index 0000000000..12d206aa44 --- /dev/null +++ b/apps/computer-vision/app/keypoint/index.tsx @@ -0,0 +1,277 @@ +import React, { useState } from 'react'; +import { View, Text, StyleSheet, ScrollView, Dimensions, Platform } from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; +import { commonStyles, theme } from '../../theme'; +import { useImage } from '@shopify/react-native-skia'; +import { useKeypointDetector, models } from 'react-native-executorch'; +import ScreenWrapper from '../../components/ScreenWrapper'; +import { getImage } from '../../utils'; +import { ModelPicker, type ModelOption } from '../../components/ModelPicker'; +import { ImageViewport } from '../../components/ImageViewport'; +import { ModelStatus } from '../../components/ModelStatus'; +import { LatencyIndicator } from '../../components/LatencyIndicator'; +import { Button } from '../../components/Button'; +import { BoundingBox } from '../../components/BoundingBox'; + +const MODEL_OPTIONS: ModelOption[] = [ + { + label: 'BlazeFace (XNNPACK FP32)', + value: models.keypointDetection.BLAZEFACE, + }, + { + label: 'YOLOv8n Pose (XNNPACK FP32)', + value: models.keypointDetection.YOLOV8N_POSE.SIZE_384.XNNPACK_FP32, + }, + { + label: 'RF-DETR Keypoint (XNNPACK FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.XNNPACK_FP32, + }, + { + label: 'RF-DETR Keypoint (CoreML FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.COREML_FP32, + disabled: Platform.OS !== 'ios', + }, + { + label: 'RF-DETR Keypoint (MLX FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.MLX_FP32, + disabled: Platform.OS !== 'ios', + }, +]; + +const VIEW_WIDTH = Dimensions.get('window').width - 32; +const VIEW_HEIGHT = Math.round((VIEW_WIDTH * 16) / 9); + +function KeypointContent() { + const insets = useSafeAreaInsets(); + const [selectedModel, setSelectedModel] = useState(MODEL_OPTIONS[0].value); + const [imageUri, setImageUri] = useState(null); + const [isProcessing, setIsProcessing] = useState(false); + const [results, setResults] = useState([]); + const [latency, setLatency] = useState(null); + const [error, setError] = useState(null); + + const skiaImage = useImage(imageUri, (err) => setError(err.message || String(err))); + + const { + isReady, + downloadProgress, + error: loadError, + detectKeypoints, + detectKeypointsWorklet, + } = useKeypointDetector(selectedModel); + + const handlePickImage = async (useCamera: boolean) => { + setError(null); + try { + const uri = await getImage(useCamera); + if (uri) { + setImageUri(uri); + setResults([]); + setLatency(null); + } + } catch (e: any) { + setError(e.message || String(e)); + } + }; + + const runDetection = async (sync: boolean) => { + if (!skiaImage || !detectKeypoints || !detectKeypointsWorklet) return; + if (!sync) setIsProcessing(true); + setError(null); + try { + const pixels = skiaImage.readPixels(); + if (!pixels) { + throw new Error('Failed to read pixels from image'); + } + if (!(pixels instanceof Uint8Array)) { + throw new Error('Expected Uint8Array from readPixels'); + } + const buffer = { + data: pixels, + width: skiaImage.width(), + height: skiaImage.height(), + format: 'rgba' as const, + layout: 'hwc' as const, + }; + const start = Date.now(); + const output = sync ? detectKeypointsWorklet(buffer) : await detectKeypoints(buffer); + + setLatency(Date.now() - start); + setResults(output); + } catch (e: any) { + setError(e.message || String(e)); + } finally { + if (!sync) setIsProcessing(false); + } + }; + + let scaleX = 1; + let scaleY = 1; + let offsetX = 0; + let offsetY = 0; + + if (skiaImage) { + const imgW = skiaImage.width(); + const imgH = skiaImage.height(); + const scale = Math.min(VIEW_WIDTH / imgW, VIEW_HEIGHT / imgH); + const displayedW = imgW * scale; + const displayedH = imgH * scale; + offsetX = (VIEW_WIDTH - displayedW) / 2; + offsetY = (VIEW_HEIGHT - displayedH) / 2; + scaleX = scale; + scaleY = scale; + } + + const activeError = loadError ? String(loadError) : error; + + return ( + + + Upload or capture an image to run keypoint/pose estimation on it. + + + { + setSelectedModel(model); + setResults([]); + setLatency(null); + setError(null); + }} + /> + + + + handlePickImage(false)}> + {skiaImage && results.length > 0 && ( + + {results.map((det: any, index: number) => { + const strokeColor = '#00ff00'; + const bgColor = 'rgba(0, 255, 0, 0.15)'; + const landmarkColor = '#ff00ff'; + + const left = offsetX + det.box.xmin * scaleX; + const top = offsetY + det.box.ymin * scaleY; + const width = (det.box.xmax - det.box.xmin) * scaleX; + const height = (det.box.ymax - det.box.ymin) * scaleY; + + return ( + + {/* Bounding Box */} + + + {/* Landmarks */} + {Object.entries(det.landmarks).map(([key, point]: [string, any]) => { + const x = offsetX + point.x * scaleX; + const y = offsetY + point.y * scaleY; + return ( + + + + {key}: {Math.round(point.confidence * 100)}% + + + ); + })} + + ); + })} + + )} + + + +