diff --git a/benchmark/tfl/CMakeLists.txt b/benchmark/tfl/CMakeLists.txt deleted file mode 100644 index 9bedf51..0000000 --- a/benchmark/tfl/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ -cmake_minimum_required(VERSION 3.14) -project(tfl) - -set(CMAKE_CXX_STANDARD 17) - -find_package(ament_cmake REQUIRED) - -add_library(tfl STATIC src/buffer_core.cpp) -target_include_directories(tfl PUBLIC - $ - $ -) - -install(TARGETS tfl - EXPORT tflTargets - ARCHIVE DESTINATION lib - LIBRARY DESTINATION lib -) - -install(DIRECTORY include/ DESTINATION include) - -ament_export_targets(tflTargets HAS_LIBRARY_TARGET) -ament_export_include_directories(include) - -ament_package() diff --git a/benchmark/tfl/include/tfl/buffer_core.hpp b/benchmark/tfl/include/tfl/buffer_core.hpp deleted file mode 100644 index 95eb8d8..0000000 --- a/benchmark/tfl/include/tfl/buffer_core.hpp +++ /dev/null @@ -1,77 +0,0 @@ -#pragma once -#include -#include -#include -#include "tfl/frame_time_cache.hpp" - -namespace tfl { - -struct FrameSpec { - std::string name; - FrameID id; - FrameID parent_id = INVALID_FRAME; - bool is_static = false; -}; - -enum class WalkEnding { - Identity, - TargetParentOfSource, - SourceParentOfTarget, - FullPath, - Error -}; - -class BufferCore { - public: - // frames: must include all frames. id=0 is reserved (INVALID_FRAME). - // Frame IDs must be contiguous from 1..max_id. - BufferCore(const std::vector& specs, - int64_t cache_duration_ns = 10'000'000'000LL); - - // Writer (single-threaded) - void setTransform(FrameID child, TimeNs stamp, Quat rot, Vec3 trans); - - // Reader (lock-free, multi-threaded) - TransformResult lookupTransform(FrameID target, FrameID source, TimeNs time) const; - TransformResult lookupTransform(const std::string& target, - const std::string& source, TimeNs time) const; - bool canTransform(FrameID target, FrameID source, TimeNs time) const; - - FrameID resolveFrameId(const std::string& name) const; - size_t frameCount() const { return frames_.size(); } - - private: - struct TransformAccum { - Quat source_to_top_quat{0, 0, 0, 1}; - Vec3 source_to_top_vec{0, 0, 0}; - Quat target_to_top_quat{0, 0, 0, 1}; - Vec3 target_to_top_vec{0, 0, 0}; - TimeNs time = 0; - - void accumSource(const TransformData& st) { - source_to_top_vec = quatRotate(st.rotation, source_to_top_vec) + st.translation; - source_to_top_quat = quatMultiply(st.rotation, source_to_top_quat); - } - - void accumTarget(const TransformData& st) { - target_to_top_vec = quatRotate(st.rotation, target_to_top_vec) + st.translation; - target_to_top_quat = quatMultiply(st.rotation, target_to_top_quat); - } - - TransformResult finalize(WalkEnding ending) const; - }; - - WalkEnding walkToTopParent(TransformAccum& accum, - FrameID target_id, FrameID source_id, - TimeNs time) const; - - TimeNs getLatestCommonTime(FrameID target_id, FrameID source_id) const; - - std::vector frames_; - std::vector parent_ids_; - std::unordered_map name_to_id_; - std::vector id_to_name_; - int64_t cache_duration_ns_; -}; - -} // namespace tfl diff --git a/benchmark/tfl/include/tfl/frame_time_cache.hpp b/benchmark/tfl/include/tfl/frame_time_cache.hpp deleted file mode 100644 index 82f6ec0..0000000 --- a/benchmark/tfl/include/tfl/frame_time_cache.hpp +++ /dev/null @@ -1,268 +0,0 @@ -#pragma once -#include -#include -#include -#include "tfl/types.hpp" - -namespace tfl { - -// SeqLock + circular buffer per-frame cache. -// Single writer, multiple lock-free readers. -class FrameTimeCache { - public: - explicit FrameTimeCache(uint32_t capacity = 2000, bool is_static = false) - : capacity_(is_static ? 1 : capacity), - is_static_(is_static), - buf_(std::make_unique(is_static ? 1 : capacity)) {} - - FrameTimeCache(FrameTimeCache&& o) noexcept - : seq_(o.seq_.load(std::memory_order_relaxed)), - buf_(std::move(o.buf_)), - capacity_(o.capacity_), - head_(o.head_), - size_(o.size_), - is_static_(o.is_static_) {} - - FrameTimeCache& operator=(FrameTimeCache&& o) noexcept { - if (this != &o) { - seq_.store(o.seq_.load(std::memory_order_relaxed), std::memory_order_relaxed); - buf_ = std::move(o.buf_); - capacity_ = o.capacity_; - head_ = o.head_; - size_ = o.size_; - is_static_ = o.is_static_; - } - return *this; - } - - // ── Writer (single-threaded) ────────────────────────────── - - void insert(const TransformData& data, TimeNs cache_duration_ns) { - // Begin write - seq_.fetch_add(1, std::memory_order_release); // odd = writing - std::atomic_thread_fence(std::memory_order_release); - - if (is_static_) { - buf_[0] = data; - } else { - // Reject old data - if (size_ > 0) { - TimeNs latest = buf_[head_].stamp_ns; - if (data.stamp_ns < latest - cache_duration_ns) { - // Too old, skip — still need to end the SeqLock - std::atomic_thread_fence(std::memory_order_release); - seq_.fetch_add(1, std::memory_order_release); - return; - } - } - - // Advance head - if (size_ == 0) { - head_ = 0; - size_ = 1; - buf_[0] = data; - } else { - // Find insertion point: data should go in timestamp order - // Common case: new data is newest - if (data.stamp_ns >= buf_[head_].stamp_ns) { - head_ = (head_ + 1) % capacity_; - if (size_ < capacity_) size_++; - buf_[head_] = data; - } else { - // Out-of-order insert: find correct position - // For simplicity, insert at head anyway (SeqLock will protect) - // In practice, tf2 also just inserts at the right position - head_ = (head_ + 1) % capacity_; - if (size_ < capacity_) size_++; - buf_[head_] = data; - // Sort the logical window to maintain descending order - sortBuffer(); - } - } - - // Prune old entries - if (size_ > 1) { - TimeNs newest = buf_[head_].stamp_ns; - while (size_ > 1) { - uint32_t tail = physIdx(size_ - 1); - if (buf_[tail].stamp_ns >= newest - cache_duration_ns) break; - size_--; - } - } - } - - // End write - std::atomic_thread_fence(std::memory_order_release); - seq_.fetch_add(1, std::memory_order_release); // even = done - } - - // ── Reader (lock-free, multi-threaded) ──────────────────── - - // Returns: 0 = no data, 1 = exact match (in d1), 2 = interpolate (d1=older, d2=newer) - uint8_t getData(TimeNs time, TransformData& d1, TransformData& d2) const { - for (int retry = 0; retry < 64; ++retry) { - uint64_t s1 = seq_.load(std::memory_order_acquire); - if (s1 & 1) continue; // write in progress - - uint8_t result = getDataUnsafe(time, d1, d2); - - std::atomic_thread_fence(std::memory_order_acquire); - uint64_t s2 = seq_.load(std::memory_order_acquire); - if (s1 == s2) return result; - // Sequence changed, retry - } - return 0; // give up after retries - } - - // Single-result convenience - uint8_t getData(TimeNs time, TransformData& out) const { - TransformData d1, d2; - uint8_t n = getData(time, d1, d2); - if (n == 0) return 0; - if (n == 1) { - out = d1; - return 1; - } - // Interpolate - if (d1.parent_id != d2.parent_id) { - out = d1; // parent changed, no interpolation - return 1; - } - double total = static_cast(d2.stamp_ns - d1.stamp_ns); - if (total <= 0.0) { - out = d2; - return 1; - } - double ratio = static_cast(time - d1.stamp_ns) / total; - out.rotation = slerp(d1.rotation, d2.rotation, ratio); - out.translation = lerp(d1.translation, d2.translation, ratio); - out.stamp_ns = time; - out.parent_id = d1.parent_id; - return 1; - } - - TimeNs getLatestStamp() const { - for (int retry = 0; retry < 64; ++retry) { - uint64_t s1 = seq_.load(std::memory_order_acquire); - if (s1 & 1) continue; - - TimeNs result = 0; - if (is_static_) { - result = 0; // static frames have no meaningful timestamp - } else if (size_ > 0) { - result = buf_[head_].stamp_ns; - } - - std::atomic_thread_fence(std::memory_order_acquire); - uint64_t s2 = seq_.load(std::memory_order_acquire); - if (s1 == s2) return result; - } - return 0; - } - - FrameID getParent(TimeNs time) const { - TransformData d; - if (getData(time, d) > 0) return d.parent_id; - return INVALID_FRAME; - } - - bool empty() const { - uint64_t s1 = seq_.load(std::memory_order_acquire); - if (s1 & 1) return true; // writing, treat as empty - bool result = (!is_static_ && size_ == 0); - uint64_t s2 = seq_.load(std::memory_order_acquire); - if (s1 != s2) return true; - return result; - } - - bool isStatic() const { return is_static_; } - - private: - // Logical index i (0=newest) to physical buffer index - uint32_t physIdx(uint32_t i) const { - return (head_ + capacity_ - i) % capacity_; - } - - // Must be called inside SeqLock read section (no seq_ check) - uint8_t getDataUnsafe(TimeNs time, TransformData& d1, TransformData& d2) const { - if (is_static_) { - d1 = buf_[0]; - d1.stamp_ns = time; // static cache overwrites timestamp (like tf2) - return 1; - } - - if (size_ == 0) return 0; - - // time == 0 means "latest" - if (time == 0) { - d1 = buf_[head_]; - return 1; - } - - // Single element - if (size_ == 1) { - if (buf_[head_].stamp_ns == time) { - d1 = buf_[head_]; - return 1; - } - return 0; // extrapolation error - } - - TimeNs newest = buf_[head_].stamp_ns; - uint32_t oldest_idx = physIdx(size_ - 1); - TimeNs oldest = buf_[oldest_idx].stamp_ns; - - // Exact match with newest - if (time == newest) { d1 = buf_[head_]; return 1; } - // Exact match with oldest - if (time == oldest) { d1 = buf_[oldest_idx]; return 1; } - - // Out of range - if (time > newest || time < oldest) return 0; - - // Binary search: find first logical index where stamp < time - // Buffer is logically sorted descending: [newest ... oldest] - uint32_t lo = 0, hi = size_ - 1; - while (lo < hi) { - uint32_t mid = (lo + hi) / 2; - if (buf_[physIdx(mid)].stamp_ns > time) { - lo = mid + 1; - } else { - hi = mid; - } - } - // lo = first index where stamp <= time - if (buf_[physIdx(lo)].stamp_ns == time) { - d1 = buf_[physIdx(lo)]; - return 1; - } - // lo points to older, lo-1 points to newer - d1 = buf_[physIdx(lo)]; // older - d2 = buf_[physIdx(lo - 1)]; // newer - return 2; - } - - // Sort the logical buffer window (called only on out-of-order insert) - void sortBuffer() { - // Simple insertion sort on the logical window (small and rare) - for (uint32_t i = 1; i < size_; ++i) { - uint32_t pi = physIdx(i); - TransformData key = buf_[pi]; - uint32_t j = i; - while (j > 0 && buf_[physIdx(j - 1)].stamp_ns < key.stamp_ns) { - buf_[physIdx(j)] = buf_[physIdx(j - 1)]; - j--; - } - if (j != i) buf_[physIdx(j)] = key; - } - } - - alignas(64) std::atomic seq_{0}; - std::unique_ptr buf_; - uint32_t capacity_; - uint32_t head_ = 0; - uint32_t size_ = 0; - bool is_static_; -}; - -} // namespace tfl diff --git a/benchmark/tfl/include/tfl/types.hpp b/benchmark/tfl/include/tfl/types.hpp deleted file mode 100644 index 517384b..0000000 --- a/benchmark/tfl/include/tfl/types.hpp +++ /dev/null @@ -1,114 +0,0 @@ -#pragma once -#include -#include -#include - -namespace tfl { - -using TimeNs = int64_t; -using FrameID = uint32_t; - -constexpr FrameID INVALID_FRAME = 0; -constexpr uint32_t MAX_GRAPH_DEPTH = 1000; - -// ── POD math types ────────────────────────────────────────────── - -struct Vec3 { - double x = 0.0, y = 0.0, z = 0.0; -}; - -struct Quat { - double x = 0.0, y = 0.0, z = 0.0, w = 1.0; -}; - -struct TransformData { - Quat rotation; - Vec3 translation; - TimeNs stamp_ns = 0; - FrameID parent_id = INVALID_FRAME; - uint32_t _pad = 0; -}; - -static_assert(std::is_trivially_copyable_v); - -struct TransformResult { - Quat rotation; - Vec3 translation; - TimeNs stamp_ns = 0; -}; - -// ── Inline math ───────────────────────────────────────────────── - -inline double dot(Quat a, Quat b) { - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; -} - -inline Quat quatMultiply(Quat a, Quat b) { - return { - a.w * b.x + a.x * b.w + a.y * b.z - a.z * b.y, - a.w * b.y - a.x * b.z + a.y * b.w + a.z * b.x, - a.w * b.z + a.x * b.y - a.y * b.x + a.z * b.w, - a.w * b.w - a.x * b.x - a.y * b.y - a.z * b.z - }; -} - -inline Quat quatInverse(Quat q) { - // unit quaternion: conjugate == inverse - return {-q.x, -q.y, -q.z, q.w}; -} - -inline Vec3 quatRotate(Quat q, Vec3 v) { - // q * (v,0) * q^-1 — expanded for efficiency - // t = 2 * cross(q.xyz, v) - double tx = 2.0 * (q.y * v.z - q.z * v.y); - double ty = 2.0 * (q.z * v.x - q.x * v.z); - double tz = 2.0 * (q.x * v.y - q.y * v.x); - return { - v.x + q.w * tx + (q.y * tz - q.z * ty), - v.y + q.w * ty + (q.z * tx - q.x * tz), - v.z + q.w * tz + (q.x * ty - q.y * tx) - }; -} - -inline Quat slerp(Quat q1, Quat q2, double t) { - double d = dot(q1, q2); - // Take shortest path - Quat q2a = q2; - if (d < 0.0) { - d = -d; - q2a = {-q2.x, -q2.y, -q2.z, -q2.w}; - } - if (d > 1.0 - 1e-6) { - // Very close — linear interpolation - return { - q1.x + t * (q2a.x - q1.x), - q1.y + t * (q2a.y - q1.y), - q1.z + t * (q2a.z - q1.z), - q1.w + t * (q2a.w - q1.w) - }; - } - double theta = std::acos(d); - double sin_theta = std::sin(theta); - double ra = std::sin((1.0 - t) * theta) / sin_theta; - double rb = std::sin(t * theta) / sin_theta; - return { - ra * q1.x + rb * q2a.x, - ra * q1.y + rb * q2a.y, - ra * q1.z + rb * q2a.z, - ra * q1.w + rb * q2a.w - }; -} - -inline Vec3 lerp(Vec3 a, Vec3 b, double t) { - return { - a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t - }; -} - -inline Vec3 operator+(Vec3 a, Vec3 b) { return {a.x + b.x, a.y + b.y, a.z + b.z}; } -inline Vec3 operator-(Vec3 a, Vec3 b) { return {a.x - b.x, a.y - b.y, a.z - b.z}; } -inline Vec3 operator-(Vec3 a) { return {-a.x, -a.y, -a.z}; } - -} // namespace tfl diff --git a/benchmark/tfl/package.xml b/benchmark/tfl/package.xml deleted file mode 100644 index 513b97b..0000000 --- a/benchmark/tfl/package.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - tfl - 0.0.1 - tfl (tf light) - lock-free transform library - nishino - BSD-3-Clause - - ament_cmake - - - ament_cmake - - diff --git a/benchmark/tfl/src/buffer_core.cpp b/benchmark/tfl/src/buffer_core.cpp deleted file mode 100644 index 07b6fb7..0000000 --- a/benchmark/tfl/src/buffer_core.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include "tfl/buffer_core.hpp" -#include -#include - -namespace tfl { - -BufferCore::BufferCore(const std::vector& specs, int64_t cache_duration_ns) - : cache_duration_ns_(cache_duration_ns) { - // Find max frame ID - FrameID max_id = 0; - for (auto& s : specs) { - if (s.id > max_id) max_id = s.id; - } - - // Allocate frame vector (index 0 is unused sentinel) - frames_.reserve(max_id + 1); - for (FrameID i = 0; i <= max_id; ++i) { - frames_.emplace_back(2000, false); // default dynamic - } - parent_ids_.resize(max_id + 1, INVALID_FRAME); - id_to_name_.resize(max_id + 1); - - // Configure each frame - for (auto& s : specs) { - frames_[s.id] = FrameTimeCache(s.is_static ? 1 : 2000, s.is_static); - parent_ids_[s.id] = s.parent_id; - name_to_id_[s.name] = s.id; - id_to_name_[s.id] = s.name; - } -} - -void BufferCore::setTransform(FrameID child, TimeNs stamp, Quat rot, Vec3 trans) { - if (child == INVALID_FRAME || child >= frames_.size()) return; - - TransformData data; - data.rotation = rot; - data.translation = trans; - data.stamp_ns = stamp; - data.parent_id = parent_ids_[child]; - - frames_[child].insert(data, cache_duration_ns_); -} - -FrameID BufferCore::resolveFrameId(const std::string& name) const { - auto it = name_to_id_.find(name); - if (it == name_to_id_.end()) return INVALID_FRAME; - return it->second; -} - -// ── walkToTopParent ───────────────────────────────────────── - -WalkEnding BufferCore::walkToTopParent( - TransformAccum& accum, FrameID target_id, FrameID source_id, TimeNs time) const { - if (source_id == target_id) { - return WalkEnding::Identity; - } - - // If time == 0, find latest common time - if (time == 0) { - time = getLatestCommonTime(target_id, source_id); - if (time == 0) return WalkEnding::Error; - } - accum.time = time; - - // Walk source → root - FrameID frame = source_id; - FrameID top_parent = INVALID_FRAME; - uint32_t depth = 0; - - while (frame != INVALID_FRAME && depth < MAX_GRAPH_DEPTH) { - if (frame >= frames_.size()) return WalkEnding::Error; - - TransformData st; - if (frames_[frame].getData(time, st) == 0) { - return WalkEnding::Error; - } - - // Check if we reached target - if (frame == target_id) { - // Source is a descendant of target — but we haven't accumulated - // the current frame yet since it IS the target - return WalkEnding::TargetParentOfSource; - } - - if (depth > 0) { - // First iteration: frame == source_id, st is source→parent transform - // Don't accumulate the first call's own frame - } - accum.accumSource(st); - - top_parent = frame; - frame = st.parent_id; - depth++; - - if (frame == target_id) { - return WalkEnding::TargetParentOfSource; - } - } - if (depth >= MAX_GRAPH_DEPTH) return WalkEnding::Error; - - // Walk target → top_parent - frame = target_id; - depth = 0; - - while (frame != top_parent && frame != INVALID_FRAME && depth < MAX_GRAPH_DEPTH) { - if (frame >= frames_.size()) return WalkEnding::Error; - - TransformData st; - if (frames_[frame].getData(time, st) == 0) { - return WalkEnding::Error; - } - - if (frame == source_id) { - return WalkEnding::SourceParentOfTarget; - } - - accum.accumTarget(st); - frame = st.parent_id; - depth++; - - if (frame == source_id) { - return WalkEnding::SourceParentOfTarget; - } - } - - if (frame != top_parent) return WalkEnding::Error; // disconnected - - return WalkEnding::FullPath; -} - -// ── finalize ──────────────────────────────────────────────── - -TransformResult BufferCore::TransformAccum::finalize(WalkEnding ending) const { - TransformResult result; - result.stamp_ns = time; - - switch (ending) { - case WalkEnding::Identity: - result.rotation = {0, 0, 0, 1}; - result.translation = {0, 0, 0}; - break; - - case WalkEnding::TargetParentOfSource: - result.rotation = source_to_top_quat; - result.translation = source_to_top_vec; - break; - - case WalkEnding::SourceParentOfTarget: { - Quat inv_q = quatInverse(target_to_top_quat); - result.rotation = inv_q; - result.translation = quatRotate(inv_q, -target_to_top_vec); - break; - } - - case WalkEnding::FullPath: { - Quat inv_q = quatInverse(target_to_top_quat); - Vec3 inv_v = quatRotate(inv_q, -target_to_top_vec); - result.rotation = quatMultiply(inv_q, source_to_top_quat); - result.translation = quatRotate(inv_q, source_to_top_vec) + inv_v; - break; - } - - default: - result.rotation = {0, 0, 0, 1}; - result.translation = {0, 0, 0}; - break; - } - return result; -} - -// ── lookupTransform ───────────────────────────────────────── - -TransformResult BufferCore::lookupTransform( - FrameID target, FrameID source, TimeNs time) const { - TransformAccum accum; - WalkEnding ending = walkToTopParent(accum, target, source, time); - if (ending == WalkEnding::Error) { - return TransformResult{{0, 0, 0, 1}, {0, 0, 0}, 0}; - } - return accum.finalize(ending); -} - -TransformResult BufferCore::lookupTransform( - const std::string& target, const std::string& source, TimeNs time) const { - FrameID t = resolveFrameId(target); - FrameID s = resolveFrameId(source); - if (t == INVALID_FRAME || s == INVALID_FRAME) { - return TransformResult{{0, 0, 0, 1}, {0, 0, 0}, 0}; - } - return lookupTransform(t, s, time); -} - -bool BufferCore::canTransform(FrameID target, FrameID source, TimeNs time) const { - TransformAccum accum; - return walkToTopParent(accum, target, source, time) != WalkEnding::Error; -} - -// ── getLatestCommonTime ───────────────────────────────────── - -TimeNs BufferCore::getLatestCommonTime(FrameID target_id, FrameID source_id) const { - if (source_id == target_id) { - if (source_id < frames_.size()) { - TimeNs t = frames_[source_id].getLatestStamp(); - return t; - } - return 0; - } - - // Walk source → root, collecting (stamp, frame_id) pairs - struct StampFrame { TimeNs stamp; FrameID id; }; - StampFrame source_chain[MAX_GRAPH_DEPTH]; - uint32_t source_len = 0; - - FrameID frame = source_id; - TimeNs common_time = std::numeric_limits::max(); - - while (frame != INVALID_FRAME && source_len < MAX_GRAPH_DEPTH) { - if (frame >= frames_.size()) break; - - TimeNs stamp = frames_[frame].getLatestStamp(); - if (frames_[frame].isStatic()) { - // Static frames don't contribute to common time - } else { - if (stamp == 0) return 0; // no data - if (stamp < common_time) common_time = stamp; - } - - FrameID parent = frames_[frame].getParent(0); // get latest parent - source_chain[source_len++] = {stamp, frame}; - - if (frame == target_id) { - return common_time == std::numeric_limits::max() ? 0 : common_time; - } - frame = parent; - } - - // Walk target → root, look for intersection with source chain - frame = target_id; - uint32_t depth = 0; - - while (frame != INVALID_FRAME && depth < MAX_GRAPH_DEPTH) { - if (frame >= frames_.size()) break; - - TimeNs stamp = frames_[frame].getLatestStamp(); - if (!frames_[frame].isStatic()) { - if (stamp == 0) return 0; - if (stamp < common_time) common_time = stamp; - } - - FrameID parent = frames_[frame].getParent(0); - - // Check if this frame is in the source chain - for (uint32_t i = 0; i < source_len; ++i) { - if (source_chain[i].id == frame || parent == source_chain[i].id) { - // Found common ancestor - // Walk source chain up to this point, taking min timestamp - TimeNs result = common_time; - for (uint32_t j = 0; j <= i; ++j) { - if (!frames_[source_chain[j].id].isStatic() && source_chain[j].stamp < result) { - result = source_chain[j].stamp; - } - } - return result == std::numeric_limits::max() ? 0 : result; - } - } - - frame = parent; - depth++; - } - - return 0; // disconnected -} - -} // namespace tfl