Skip to content

Commit 4891683

Browse files
committed
q_shared: better magic constants for Q_rsqrt(), add Q_rsqrt_precise()
1 parent 79598bf commit 4891683

File tree

1 file changed

+114
-20
lines changed

1 file changed

+114
-20
lines changed

src/engine/qcommon/q_shared.h

+114-20
Original file line numberDiff line numberDiff line change
@@ -338,26 +338,6 @@ extern const quat_t quatIdentity;
338338

339339
#define Q_ftol(x) ((long)(x))
340340

341-
// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
342-
// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
343-
inline float Q_rsqrt( float number )
344-
{
345-
float x = 0.5f * number;
346-
float y;
347-
348-
// compute approximate inverse square root
349-
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
350-
// SSE rsqrt relative error bound: 3.7 * 10^-4
351-
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
352-
#else
353-
y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
354-
y *= ( 1.5f - ( x * y * y ) ); // initial iteration
355-
// relative error bound after the initial iteration: 1.8 * 10^-3
356-
#endif
357-
y *= ( 1.5f - ( x * y * y ) ); // second iteration for higher precision
358-
return y;
359-
}
360-
361341
inline float Q_fabs( float x )
362342
{
363343
return fabsf( x );
@@ -495,6 +475,120 @@ void SnapVector( V &&v )
495475
v[ 2 ] = roundf( v[ 2 ] );
496476
}
497477

478+
/* The original Q_rsqrt algorithm is:
479+
480+
float Q_rsqrt( float n )
481+
{
482+
uint32_t magic = 0x5f3759dful;
483+
float a = 0.5f;
484+
float b = 3.0f;
485+
union { float f; uint32_t u; } o = {n};
486+
o.u = magic - ( o.u >> 1 );
487+
return a * o.f * ( b - n * o.f * o.f );
488+
}
489+
490+
It could be written like this, this is what Quake 3 did:
491+
492+
float Q_rsqrt( float n )
493+
{
494+
uint32_t magic = 0x5f3759dful;
495+
float a = 0.5f;
496+
float b = 3.0f;
497+
float c = a * b; // 1.5f
498+
union { float f; uint32_t u; } o = {n};
499+
o.u = magic - ( o.u >> 1);
500+
float x = n * a;
501+
return o.f * ( c - ( x * o.f * o.f ) );
502+
}
503+
504+
It was written with a second iteration commented out:
505+
506+
float Q_rsqrt( float n )
507+
{
508+
uint32_t magic = 0x5f3759dful;
509+
float a = 0.5f;
510+
float b = 3.0f;
511+
float c = a * b; // 1.5f
512+
union { float f; uint32_t u; } o = {n};
513+
o.u = magic - ( o.u >> 1);
514+
float x = n * a;
515+
o.f *= c - ( x * o.f * o.f );
516+
// o.f *= c - ( x * o.f * o.f );
517+
return o.f;
518+
}
519+
520+
The relative error bound after the initial iteration was: 1.8×10⁻³
521+
The relative error bound after a second iteration was: 5×10⁻⁶
522+
523+
Chris lomont computed a better magic constant of 0x5f375a86 while
524+
keeping the other values of 0.5 and 3.0 allowing a second iteration:
525+
https://www.lomont.org/papers/2003/InvSqrt.pdf
526+
527+
Better constants were computed by Jan Kadlec but they only allow
528+
a single iteration: http://rrrola.wz.cz/inv_sqrt.html
529+
530+
float Q_rsqrt( float n )
531+
{
532+
uint32_t magic = 0x5f1ffff9ul:
533+
float a = 0.703952253f;
534+
float b = 2.38924456f;
535+
union { float f; uint32_t u; } o = {n};
536+
o.u = magic - ( o.u >> 1 );
537+
return a * o.f * ( b - n * y.f * y.f );
538+
}
539+
540+
The relative error bound is: 2.00010826×10⁻⁷ */
541+
542+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
543+
#else
544+
inline float ReverseSqrtMagic( const float n, const uint32_t magic )
545+
{
546+
return Util::bit_cast<float>( magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
547+
}
548+
#endif
549+
550+
// Compute approximate inverse square root.
551+
inline float Q_rsqrt( const float n )
552+
{
553+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
554+
float o;
555+
_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
556+
#else
557+
/* Magic constants by Jan Kadlec.
558+
See: http://rrrola.wz.cz/inv_sqrt.html */
559+
static const float a = 0.703952253f;
560+
static const float b = 2.38924456f;
561+
static const uint32_t magic = 0x5f1ffff9ul;
562+
float o = ReverseqrtMagic( n, magic );
563+
o *= a * ( b - n * o * o );
564+
#endif
565+
return o;
566+
}
567+
568+
inline float Q_rsqrt_precise( const float n )
569+
{
570+
static const float a = 0.5f;
571+
static const float b = 3.0f;
572+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
573+
float o;
574+
// SSE rsqrt relative error bound: 3.7 * 10^-4
575+
_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
576+
#else
577+
/* Magic constant from Quake 3.
578+
See https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb/code/game/q_math.c#L561
579+
const uint32_t magic = 0x5f3759dful; */
580+
581+
/* Magic constant computed by Chris Lomont.
582+
See: https://www.lomont.org/papers/2003/InvSqrt.pdf */
583+
static const uint32_t magic = 0x5f375a86ul;
584+
float o = ReverseSqrtMagic( n, magic );
585+
o *= a * ( b - n * o * o );
586+
#endif
587+
// Two iterations for higher precision.
588+
o *= a * ( b - n * o * o );
589+
return o;
590+
}
591+
498592
#define VectorLerpTrem( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
499593
( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
500594
( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))

0 commit comments

Comments
 (0)