@@ -338,26 +338,6 @@ extern const quat_t quatIdentity;
338
338
339
339
#define Q_ftol (x ) ((long )(x))
340
340
341
- // Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
342
- // https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
343
- inline float Q_rsqrt ( float number )
344
- {
345
- float x = 0 .5f * number;
346
- float y;
347
-
348
- // compute approximate inverse square root
349
- #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
350
- // SSE rsqrt relative error bound: 3.7 * 10^-4
351
- _mm_store_ss ( &y, _mm_rsqrt_ss ( _mm_load_ss ( &number ) ) );
352
- #else
353
- y = Util::bit_cast<float >( 0x5f3759df - ( Util::bit_cast<uint32_t >( number ) >> 1 ) );
354
- y *= ( 1 .5f - ( x * y * y ) ); // initial iteration
355
- // relative error bound after the initial iteration: 1.8 * 10^-3
356
- #endif
357
- y *= ( 1 .5f - ( x * y * y ) ); // second iteration for higher precision
358
- return y;
359
- }
360
-
361
341
inline float Q_fabs ( float x )
362
342
{
363
343
return fabsf ( x );
@@ -495,6 +475,120 @@ void SnapVector( V &&v )
495
475
v[ 2 ] = roundf ( v[ 2 ] );
496
476
}
497
477
478
+ /* The original Q_rsqrt algorithm is:
479
+
480
+ float Q_rsqrt( float n )
481
+ {
482
+ uint32_t magic = 0x5f3759dful;
483
+ float a = 0.5f;
484
+ float b = 3.0f;
485
+ union { float f; uint32_t u; } o = {n};
486
+ o.u = magic - ( o.u >> 1 );
487
+ return a * o.f * ( b - n * o.f * o.f );
488
+ }
489
+
490
+ It could be written like this, this is what Quake 3 did:
491
+
492
+ float Q_rsqrt( float n )
493
+ {
494
+ uint32_t magic = 0x5f3759dful;
495
+ float a = 0.5f;
496
+ float b = 3.0f;
497
+ float c = a * b; // 1.5f
498
+ union { float f; uint32_t u; } o = {n};
499
+ o.u = magic - ( o.u >> 1);
500
+ float x = n * a;
501
+ return o.f * ( c - ( x * o.f * o.f ) );
502
+ }
503
+
504
+ It was written with a second iteration commented out:
505
+
506
+ float Q_rsqrt( float n )
507
+ {
508
+ uint32_t magic = 0x5f3759dful;
509
+ float a = 0.5f;
510
+ float b = 3.0f;
511
+ float c = a * b; // 1.5f
512
+ union { float f; uint32_t u; } o = {n};
513
+ o.u = magic - ( o.u >> 1);
514
+ float x = n * a;
515
+ o.f *= c - ( x * o.f * o.f );
516
+ // o.f *= c - ( x * o.f * o.f );
517
+ return o.f;
518
+ }
519
+
520
+ The relative error bound after the initial iteration was: 1.8×10⁻³
521
+ The relative error bound after a second iteration was: 5×10⁻⁶
522
+
523
+ Chris lomont computed a better magic constant of 0x5f375a86 while
524
+ keeping the other values of 0.5 and 3.0 allowing a second iteration:
525
+ https://www.lomont.org/papers/2003/InvSqrt.pdf
526
+
527
+ Better constants were computed by Jan Kadlec but they only allow
528
+ a single iteration: http://rrrola.wz.cz/inv_sqrt.html
529
+
530
+ float Q_rsqrt( float n )
531
+ {
532
+ uint32_t magic = 0x5f1ffff9ul:
533
+ float a = 0.703952253f;
534
+ float b = 2.38924456f;
535
+ union { float f; uint32_t u; } o = {n};
536
+ o.u = magic - ( o.u >> 1 );
537
+ return a * o.f * ( b - n * y.f * y.f );
538
+ }
539
+
540
+ The relative error bound is: 2.00010826×10⁻⁷ */
541
+
542
+ #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
543
+ #else
544
+ inline float ReverseSqrtMagic ( const float n, const uint32_t magic )
545
+ {
546
+ return Util::bit_cast<float >( magic - ( Util::bit_cast<uint32_t >( n ) >> 1 ) );
547
+ }
548
+ #endif
549
+
550
+ // Compute approximate inverse square root.
551
+ inline float Q_rsqrt ( const float n )
552
+ {
553
+ #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
554
+ float o;
555
+ _mm_store_ss ( &o, _mm_rsqrt_ss ( _mm_load_ss ( &n ) ) );
556
+ #else
557
+ /* Magic constants by Jan Kadlec.
558
+ See: http://rrrola.wz.cz/inv_sqrt.html */
559
+ static const float a = 0 .703952253f ;
560
+ static const float b = 2 .38924456f ;
561
+ static const uint32_t magic = 0x5f1ffff9ul ;
562
+ float o = ReverseqrtMagic ( n, magic );
563
+ o *= a * ( b - n * o * o );
564
+ #endif
565
+ return o;
566
+ }
567
+
568
+ inline float Q_rsqrt_precise ( const float n )
569
+ {
570
+ static const float a = 0 .5f ;
571
+ static const float b = 3 .0f ;
572
+ #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
573
+ float o;
574
+ // SSE rsqrt relative error bound: 3.7 * 10^-4
575
+ _mm_store_ss ( &o, _mm_rsqrt_ss ( _mm_load_ss ( &n ) ) );
576
+ #else
577
+ /* Magic constant from Quake 3.
578
+ See https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb/code/game/q_math.c#L561
579
+ const uint32_t magic = 0x5f3759dful; */
580
+
581
+ /* Magic constant computed by Chris Lomont.
582
+ See: https://www.lomont.org/papers/2003/InvSqrt.pdf */
583
+ static const uint32_t magic = 0x5f375a86ul ;
584
+ float o = ReverseSqrtMagic ( n, magic );
585
+ o *= a * ( b - n * o * o );
586
+ #endif
587
+ // Two iterations for higher precision.
588
+ o *= a * ( b - n * o * o );
589
+ return o;
590
+ }
591
+
498
592
#define VectorLerpTrem ( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
499
593
( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
500
594
( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))
0 commit comments