@@ -338,29 +338,6 @@ extern const quat_t quatIdentity;
338
338
339
339
#define Q_ftol (x ) ((long )(x))
340
340
341
- // Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
342
- // https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
343
- inline float Q_rsqrt ( float number )
344
- {
345
- // compute approximate inverse square root
346
- #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
347
- float y;
348
- // SSE rsqrt relative error bound: 3.7 * 10^-4
349
- _mm_store_ss ( &y, _mm_rsqrt_ss ( _mm_load_ss ( &number ) ) );
350
- #else
351
- float x = 0 .5f * number;
352
- float y = Util::bit_cast<float >( 0x5f3759df - ( Util::bit_cast<uint32_t >( number ) >> 1 ) );
353
- // initial iteration
354
- // relative error bound after the initial iteration: 1.8 * 10^-3
355
- y *= ( 1 .5f - ( x * y * y ) );
356
- #if 0
357
- // second iteration for higher precision
358
- y *= ( 1.5f - ( x * y * y ) );
359
- #endif
360
- #endif
361
- return y;
362
- }
363
-
364
341
inline float Q_fabs ( float x )
365
342
{
366
343
return fabsf ( x );
@@ -498,6 +475,99 @@ void SnapVector( V &&v )
498
475
v[ 2 ] = roundf ( v[ 2 ] );
499
476
}
500
477
478
+ /* The original Q_rsqrt algorithm is:
479
+
480
+ typedef union { float f; uint32_t u; } uf_t;
481
+ float Q_rsqrt( float n )
482
+ {
483
+ uint32_t magic = 0x5f3759dful;
484
+ float a = 0.5f;
485
+ float b = 3.0f;
486
+ uf_t o; o.f = n;
487
+ o.u = magic - ( o.u >> 1 );
488
+ return a * o.f * ( b - n * o.f * o.f );
489
+ }
490
+
491
+ It could be written like this, this is what Quake 3 did:
492
+
493
+ float Q_rsqrt( float n )
494
+ {
495
+ uint32_t magic = 0x5f3759dful;
496
+ float a = 0.5f;
497
+ float b = 3.0f;
498
+ float c = a * b; // 1.5f
499
+ uf_t o = n;
500
+ o.u = magic - ( o.u >> 1);
501
+ float x = n * a;
502
+ return o.f * a ( c - ( x * o.f * o.f ) );
503
+ }
504
+
505
+ It was written with a second iteration commented out:
506
+
507
+ float Q_rsqrt( float n )
508
+ {
509
+ uint32_t magic = 0x5f3759dful;
510
+ float a = 0.5f;
511
+ float b = 3.0f;
512
+ float c = a * b; // 1.5f
513
+ uf_t o; o.f = n;
514
+ o.u = magic - ( o.u >> 1);
515
+ float x = n * a;
516
+ o.f *= a * ( c - ( x * o.f * o.f ) );
517
+ // o.f *= a * ( c - ( x * o.f * o.f ) );
518
+ return o.f;
519
+ }
520
+
521
+ The relative error bound after the initial iteration was: 1.8×10⁻³
522
+ The relative error bound after a second iteration was: 5×10⁻⁶
523
+
524
+ Better values are usable from: http://rrrola.wz.cz/inv_sqrt.html
525
+
526
+ float Q_rsqrt( float n )
527
+ {
528
+ uint32_t magic = 0x5f1ffff9ul:
529
+ float a = 0.703952253f;
530
+ float b = 2.38924456f;
531
+ uf_t o; o.f = n;
532
+ o.u = magic - ( o.u >> 1 );
533
+ return a * o.f * ( b - n * y.f * y.f );
534
+ }
535
+
536
+ The relative error bound after the initial iteration is: 2.00010826×10⁻⁷ */
537
+
538
+ #define Q_RSQRT_QUAKE3_CONSTANTS 0
539
+ #define Q_RSQRT_DOUBLE_ITERATION 0
540
+
541
+ #if Q_RSQRT_QUAKE3_CONSTANTS
542
+ // Constants used in Quake 3.
543
+ static uint32_t qrsqrt_magic = 0x5f3759dful ;
544
+ static float qrsqrt_a = 0 .5f ;
545
+ static float qrsqrt_b = 3 .0f ;
546
+ #else
547
+ // Constants computed by Řrřola.
548
+ static uint32_t qrsqrt_magic = 0x5f1ffff9ul ;
549
+ static float qrsqrt_a = 0 .703974056f ;
550
+ static float qrsqrt_b = 2 .38919526f ;
551
+ #endif
552
+
553
+ inline float Q_rsqrt ( float n )
554
+ {
555
+ // Compute approximate inverse square root.
556
+ #if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
557
+ float o;
558
+ // SSE rsqrt relative error bound: 3.7 * 10^-4
559
+ _mm_store_ss ( &o, _mm_rsqrt_ss ( _mm_load_ss ( &n ) ) );
560
+ #else
561
+ float o = Util::bit_cast<float >( qrsqrt_magic - ( Util::bit_cast<uint32_t >( n ) >> 1 ) );
562
+ o *= qrsqrt_a * ( qrsqrt_b - n * o * o );
563
+ #if Q_RSQRT_DOUBLE_ITERATION
564
+ // Second iteration for higher precision.
565
+ o *= qsqrt_a * ( qsqrt_b - n * o * o );
566
+ #endif
567
+ #endif
568
+ return o;
569
+ }
570
+
501
571
#define VectorLerpTrem ( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
502
572
( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
503
573
( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))
0 commit comments