Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1 +1 @@
Captures the current high-resolution time in ticks, suitable for measuring elapsed intervals with `get_time_usec`.
Returns a monotonic timestamp in nanoseconds since an unspecified epoch (the same epoch within a process; not comparable across processes or reboots). Use with `get_time_usec(ref)` / `get_time_nsec(ref)` for elapsed-interval math; raw subtraction `now - then` is also valid since the unit is always nanoseconds on every platform.
80 changes: 57 additions & 23 deletions src/hal/performance_time.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,36 +15,71 @@ namespace das {

#endif

// ref_time_ticks() returns CLOCK_MONOTONIC-style nanoseconds on every platform.
// Prior to this normalization, Windows returned raw QueryPerformanceCounter
// ticks (~10 MHz typical) while Linux/macOS already returned ns — so callers
// that did `ref_time_ticks() + int64(timeout_sec * 1_000_000)` got 30 s on
// Windows (lucky math at 10 MHz) but 30 ms on POSIX. The footgun is gone: raw
// subtraction `now - then` always yields nanoseconds elapsed.
//
// Prefer `get_time_usec(start)` / `get_time_nsec(start)` for elapsed-time
// comparisons — those wrap the subtraction and continue to work portably.

#ifdef _MSC_VER

#define WIN32_LEAN_AND_MEAN
#include <windows.h>

// QueryPerformanceFrequency is invariant after boot — cache once per process
// to avoid a syscall on every ref_time_ticks() call. Race-tolerant: parallel
// initialisers all compute the same value, and int64 stores are atomic on
// x64/arm64.
//
// We also precompute `qpc_ns_per_tick = 1e9 / freq` when it divides cleanly
// (the universal Win 7+ case where QPF = 10 MHz → 100 ns/tick). The fast path
// is one multiply per call, so ref_time_ticks() stays within ~1 ns of the
// bare QueryPerformanceCounter cost — critical for the function profiler,
// which brackets every call. Fallback split path handles non-divisible
// frequencies (theoretical; not observed on modern Windows).
static int64_t qpc_ns_per_tick = 0; // 0 -> use the fallback split path

static int64_t qpc_freq() {
static int64_t cached = 0;
if ( cached == 0 ) {
LARGE_INTEGER f;
QueryPerformanceFrequency(&f);
cached = f.QuadPart;
qpc_ns_per_tick = (1000000000LL % cached == 0) ? (1000000000LL / cached) : 0;
}
return cached;
}

extern "C" int64_t ref_time_ticks () {
LARGE_INTEGER t0;
LARGE_INTEGER t0;
QueryPerformanceCounter(&t0);
return t0.QuadPart;
const int64_t freq = qpc_freq();
if ( qpc_ns_per_tick ) {
return t0.QuadPart * qpc_ns_per_tick;
}
// Fallback: convert QPC counter to nanoseconds without overflowing int64:
// ns = (ticks / freq) * 1e9 + (ticks % freq) * 1e9 / freq
// freq is typically 10 MHz, so (ticks / freq) fits comfortably and the
// remainder * 1e9 also fits (max ~1e16, well under 2^63).
const int64_t whole = t0.QuadPart / freq;
const int64_t rem = t0.QuadPart % freq;
return whole * 1000000000LL + (rem * 1000000000LL) / freq;
}

extern "C" int get_time_usec ( int64_t reft ) {
int64_t t0 = ref_time_ticks();
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return int((t0-reft)*1000000LL/freq.QuadPart);
return int((ref_time_ticks() - reft) / 1000LL);
}

extern "C" int64_t get_time_nsec ( int64_t reft ) {
int64_t t0 = ref_time_ticks();
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
return int64_t((t0-reft)*1000000000LL/freq.QuadPart);
return ref_time_ticks() - reft;
}

extern "C" int64_t ref_time_delta_to_usec(int64_t ref)
{
LARGE_INTEGER freq;
QueryPerformanceCounter(&freq);
return ref * 1000000LL/freq.QuadPart;
extern "C" int64_t ref_time_delta_to_usec ( int64_t ref ) {
return ref / 1000LL;
}

#elif __linux__ || defined(_EMSCRIPTEN_VER) || defined __HAIKU__
Expand All @@ -59,37 +94,36 @@ extern "C" int64_t ref_time_ticks () {
DAS_ASSERT(false);
return -1;
}

return ts.tv_sec * NSEC_IN_SEC + ts.tv_nsec;
}

extern "C" int get_time_usec ( int64_t reft ) {
return int((ref_time_ticks() - reft) / (NSEC_IN_SEC/1000000LL));
return int((ref_time_ticks() - reft) / 1000LL);
}

extern "C" int64_t get_time_nsec ( int64_t reft ) {
return ref_time_ticks() - reft;
return ref_time_ticks() - reft;
}

extern "C" int64_t ref_time_delta_to_usec(int64_t ref) { return ref / (NSEC_IN_SEC/1000000LL); }
extern "C" int64_t ref_time_delta_to_usec ( int64_t ref ) { return ref / 1000LL; }


#else // osx

#include <time.h>

extern "C" int64_t ref_time_ticks() {
extern "C" int64_t ref_time_ticks () {
return clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW);
}

extern "C" int get_time_usec ( int64_t reft ) {
return (clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW) - reft)/1000LL;
return int((ref_time_ticks() - reft) / 1000LL);
}

extern "C" int64_t get_time_nsec ( int64_t reft ) {
return clock_gettime_nsec_np(CLOCK_MONOTONIC_RAW) - reft;
return ref_time_ticks() - reft;
}

extern "C" int64_t ref_time_delta_to_usec(int64_t ref) { return ref / 1000LL; }
extern "C" int64_t ref_time_delta_to_usec ( int64_t ref ) { return ref / 1000LL; }

#endif
Loading