Skip to content

Commit 95fa8f8

Browse files
committed
q_shared: better magic constant for the Q_rsqrt trick
1 parent cfd4d16 commit 95fa8f8

File tree

1 file changed

+93
-23
lines changed

1 file changed

+93
-23
lines changed

src/engine/qcommon/q_shared.h

+93-23
Original file line numberDiff line numberDiff line change
@@ -338,29 +338,6 @@ extern const quat_t quatIdentity;
338338

339339
#define Q_ftol(x) ((long)(x))
340340

341-
// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
342-
// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
343-
inline float Q_rsqrt( float number )
344-
{
345-
// compute approximate inverse square root
346-
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
347-
float y;
348-
// SSE rsqrt relative error bound: 3.7 * 10^-4
349-
_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
350-
#else
351-
float x = 0.5f * number;
352-
float y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
353-
// initial iteration
354-
// relative error bound after the initial iteration: 1.8 * 10^-3
355-
y *= ( 1.5f - ( x * y * y ) );
356-
#if 0
357-
// second iteration for higher precision
358-
y *= ( 1.5f - ( x * y * y ) );
359-
#endif
360-
#endif
361-
return y;
362-
}
363-
364341
inline float Q_fabs( float x )
365342
{
366343
return fabsf( x );
@@ -498,6 +475,99 @@ void SnapVector( V &&v )
498475
v[ 2 ] = roundf( v[ 2 ] );
499476
}
500477

478+
/* The original Q_rsqrt algorithm is:
479+
480+
typedef union { float f; uint32_t u; } uf_t;
481+
float Q_rsqrt( float n )
482+
{
483+
uint32_t magic = 0x5f3759dful;
484+
float a = 0.5f;
485+
float b = 3.0f;
486+
uf_t o; o.f = n;
487+
o.u = magic - ( o.u >> 1 );
488+
return a * o.f * ( b - n * o.f * o.f );
489+
}
490+
491+
It could be written like this, this is what Quake 3 did:
492+
493+
float Q_rsqrt( float n )
494+
{
495+
uint32_t magic = 0x5f3759dful;
496+
float a = 0.5f;
497+
float b = 3.0f;
498+
float c = a * b; // 1.5f
499+
uf_t o = n;
500+
o.u = magic - ( o.u >> 1);
501+
float x = n * a;
502+
return o.f * a ( c - ( x * o.f * o.f ) );
503+
}
504+
505+
It was written with a second iteration commented out:
506+
507+
float Q_rsqrt( float n )
508+
{
509+
uint32_t magic = 0x5f3759dful;
510+
float a = 0.5f;
511+
float b = 3.0f;
512+
float c = a * b; // 1.5f
513+
uf_t o; o.f = n;
514+
o.u = magic - ( o.u >> 1);
515+
float x = n * a;
516+
o.f *= a * ( c - ( x * o.f * o.f ) );
517+
// o.f *= a * ( c - ( x * o.f * o.f ) );
518+
return o.f;
519+
}
520+
521+
The relative error bound after the initial iteration was: 1.8×10⁻³
522+
The relative error bound after a second iteration was: 5×10⁻⁶
523+
524+
Better values are usable from: http://rrrola.wz.cz/inv_sqrt.html
525+
526+
float Q_rsqrt( float n )
527+
{
528+
uint32_t magic = 0x5f1ffff9ul:
529+
float a = 0.703952253f;
530+
float b = 2.38924456f;
531+
uf_t o; o.f = n;
532+
o.u = magic - ( o.u >> 1 );
533+
return a * o.f * ( b - n * y.f * y.f );
534+
}
535+
536+
The relative error bound after the initial iteration is: 2.00010826×10⁻⁷ */
537+
538+
#define Q_RSQRT_QUAKE3_CONSTANTS 0
539+
#define Q_RSQRT_DOUBLE_ITERATION 0
540+
541+
#if Q_RSQRT_QUAKE3_CONSTANTS
542+
// Constants used in Quake 3.
543+
static uint32_t qrsqrt_magic = 0x5f3759dful;
544+
static float qrsqrt_a = 0.5f;
545+
static float qrsqrt_b = 3.0f;
546+
#else
547+
// Constants computed by Řrřola.
548+
static uint32_t qrsqrt_magic = 0x5f1ffff9ul;
549+
static float qrsqrt_a = 0.703974056f;
550+
static float qrsqrt_b = 2.38919526f;
551+
#endif
552+
553+
inline float Q_rsqrt( float n )
554+
{
555+
// Compute approximate inverse square root.
556+
#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
557+
float o;
558+
// SSE rsqrt relative error bound: 3.7 * 10^-4
559+
_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
560+
#else
561+
float o = Util::bit_cast<float>( qrsqrt_magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
562+
o *= qrsqrt_a * ( qrsqrt_b - n * o * o );
563+
#if Q_RSQRT_DOUBLE_ITERATION
564+
// Second iteration for higher precision.
565+
o *= qsqrt_a * ( qsqrt_b - n * o * o );
566+
#endif
567+
#endif
568+
return o;
569+
}
570+
501571
#define VectorLerpTrem( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
502572
( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
503573
( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))

0 commit comments

Comments
 (0)