diff --git a/.cspell-wordlist.txt b/.cspell-wordlist.txt index 3a5226232a..f649158ce0 100644 --- a/.cspell-wordlist.txt +++ b/.cspell-wordlist.txt @@ -234,6 +234,7 @@ Amdahl Amdahl's xyxy xywh +cxcywh subfolders podspec logcat @@ -241,3 +242,5 @@ modelname optionalsize pushd popd +yolov +YOLOV diff --git a/apps/computer-vision/app/_layout.tsx b/apps/computer-vision/app/_layout.tsx index d6e2335b75..eb61c766b6 100644 --- a/apps/computer-vision/app/_layout.tsx +++ b/apps/computer-vision/app/_layout.tsx @@ -41,6 +41,13 @@ export default function Layout() { title: 'Semantic Segmentation', }} /> + router.navigate('segmentation/')}> Semantic Segmentation + router.navigate('keypoint/')}> + Keypoint Detection + router.navigate('inspect/')}> Model Inspector diff --git a/apps/computer-vision/app/keypoint/index.tsx b/apps/computer-vision/app/keypoint/index.tsx new file mode 100644 index 0000000000..336eedea5d --- /dev/null +++ b/apps/computer-vision/app/keypoint/index.tsx @@ -0,0 +1,279 @@ +import React, { useState } from 'react'; +import { View, Text, StyleSheet, ScrollView, Dimensions, Platform } from 'react-native'; +import { useSafeAreaInsets } from 'react-native-safe-area-context'; +import { commonStyles, theme } from '../../theme'; +import { useImage } from '@shopify/react-native-skia'; +import { useKeypointDetector, models, type KeypointDetection } from 'react-native-executorch'; +import ScreenWrapper from '../../components/ScreenWrapper'; +import { getImage } from '../../utils'; +import { ModelPicker, type ModelOption } from '../../components/ModelPicker'; +import { ImageViewport } from '../../components/ImageViewport'; +import { ModelStatus } from '../../components/ModelStatus'; +import { LatencyIndicator } from '../../components/LatencyIndicator'; +import { Button } from '../../components/Button'; +import { BoundingBox } from '../../components/BoundingBox'; + +const MODEL_OPTIONS: ModelOption[] = [ + { + label: 'BlazeFace (XNNPACK FP32)', + value: models.keypointDetection.BLAZEFACE, + }, + { + label: 'YOLO26 Pose (XNNPACK FP32)', + value: models.keypointDetection.YOLO26_POSE.SIZE_384.XNNPACK_FP32, + }, + { + label: 'RF-DETR Keypoint (XNNPACK FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.XNNPACK_FP32, + }, + { + label: 'RF-DETR Keypoint (CoreML FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.COREML_FP32, + disabled: Platform.OS !== 'ios', + }, + { + label: 'RF-DETR Keypoint (MLX FP32)', + value: models.keypointDetection.RFDETR_KEYPOINT.MLX_FP32, + disabled: Platform.OS !== 'ios', + }, +]; + +const VIEW_WIDTH = Dimensions.get('window').width - 32; +const VIEW_HEIGHT = Math.round((VIEW_WIDTH * 16) / 9); + +function KeypointContent() { + const insets = useSafeAreaInsets(); + const [selectedModel, setSelectedModel] = useState(MODEL_OPTIONS[0].value); + const [imageUri, setImageUri] = useState(null); + const [isProcessing, setIsProcessing] = useState(false); + const [results, setResults] = useState[]>([]); + const [latency, setLatency] = useState(null); + const [error, setError] = useState(null); + + const skiaImage = useImage(imageUri, (err) => setError(err.message || String(err))); + + const { + isReady, + downloadProgress, + error: loadError, + detectKeypoints, + detectKeypointsWorklet, + } = useKeypointDetector(selectedModel); + + const handlePickImage = async (useCamera: boolean) => { + setError(null); + try { + const uri = await getImage(useCamera); + if (uri) { + setImageUri(uri); + setResults([]); + setLatency(null); + } + } catch (e: any) { + setError(e.message || String(e)); + } + }; + + const runDetection = async (sync: boolean) => { + if (!skiaImage || !detectKeypoints || !detectKeypointsWorklet) return; + if (!sync) setIsProcessing(true); + setError(null); + try { + const pixels = skiaImage.readPixels(); + if (!pixels) { + throw new Error('Failed to read pixels from image'); + } + if (!(pixels instanceof Uint8Array)) { + throw new Error('Expected Uint8Array from readPixels'); + } + const buffer = { + data: pixels, + width: skiaImage.width(), + height: skiaImage.height(), + format: 'rgba' as const, + layout: 'hwc' as const, + }; + const start = Date.now(); + const output = ( + sync ? detectKeypointsWorklet(buffer) : await detectKeypoints(buffer) + ) as KeypointDetection<'xyxy', string>[]; + + setLatency(Date.now() - start); + setResults(output); + } catch (e: any) { + setError(e.message || String(e)); + } finally { + if (!sync) setIsProcessing(false); + } + }; + + let scaleX = 1; + let scaleY = 1; + let offsetX = 0; + let offsetY = 0; + + if (skiaImage) { + const imgW = skiaImage.width(); + const imgH = skiaImage.height(); + const scale = Math.min(VIEW_WIDTH / imgW, VIEW_HEIGHT / imgH); + const displayedW = imgW * scale; + const displayedH = imgH * scale; + offsetX = (VIEW_WIDTH - displayedW) / 2; + offsetY = (VIEW_HEIGHT - displayedH) / 2; + scaleX = scale; + scaleY = scale; + } + + const activeError = loadError ? String(loadError) : error; + + return ( + + + Upload or capture an image to run keypoint/pose estimation on it. + + + { + setSelectedModel(model); + setResults([]); + setLatency(null); + setError(null); + }} + /> + + + + handlePickImage(false)}> + {skiaImage && results.length > 0 && ( + + {results.map((det, index: number) => { + const strokeColor = '#00ff00'; + const bgColor = 'rgba(0, 255, 0, 0.15)'; + const landmarkColor = '#ff00ff'; + + const left = offsetX + det.box.xmin * scaleX; + const top = offsetY + det.box.ymin * scaleY; + const width = (det.box.xmax - det.box.xmin) * scaleX; + const height = (det.box.ymax - det.box.ymin) * scaleY; + + return ( + + {/* Bounding Box */} + + + {/* Landmarks */} + {Object.entries(det.landmarks).map(([key, point]) => { + const x = offsetX + point.x * scaleX; + const y = offsetY + point.y * scaleY; + return ( + + + + {key}: {Math.round(point.confidence * 100)}% + + + ); + })} + + ); + })} + + )} + + + +