q_shared: better magic constants for Q_rsqrt(), add Q_rsqrt_precise()

illwieckz · illwieckz · commit 4891683c4963 · 2024-12-09T18:36:40.000+01:00
diff --git a/src/engine/qcommon/q_shared.h b/src/engine/qcommon/q_shared.h
@@ -338,26 +338,6 @@ extern const quat_t   quatIdentity;
 
 #define Q_ftol(x) ((long)(x))
 
-	// Overall relative error bound (ignoring unknown powerpc case): 5 * 10^-6
-	// https://en.wikipedia.org/wiki/Fast_inverse_square_root#/media/File:2nd-iter.png
-	inline float Q_rsqrt( float number )
-	{
-		float x = 0.5f * number;
-		float y;
-
-		// compute approximate inverse square root
-#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
-		// SSE rsqrt relative error bound: 3.7 * 10^-4
-		_mm_store_ss( &y, _mm_rsqrt_ss( _mm_load_ss( &number ) ) );
-#else
-		y = Util::bit_cast<float>( 0x5f3759df - ( Util::bit_cast<uint32_t>( number ) >> 1 ) );
-		y *= ( 1.5f - ( x * y * y ) ); // initial iteration
-		// relative error bound after the initial iteration: 1.8 * 10^-3
-#endif
-		y *= ( 1.5f - ( x * y * y ) ); // second iteration for higher precision
-		return y;
-	}
-
 inline float Q_fabs( float x )
 {
 	return fabsf( x );
@@ -495,6 +475,120 @@ void SnapVector( V &&v )
 	v[ 2 ] = roundf( v[ 2 ] );
 }
 
+/* The original Q_rsqrt algorithm is:
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f3759dful;
+	float a = 0.5f;
+	float b = 3.0f;
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1 );
+	return a * o.f * ( b - n * o.f * o.f );
+}
+
+It could be written like this, this is what Quake 3 did:
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f3759dful;
+	float a = 0.5f;
+	float b = 3.0f;
+	float c = a * b; // 1.5f
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1);
+	float x = n * a;
+	return o.f * ( c - ( x * o.f * o.f ) );
+}
+
+It was written with a second iteration commented out:
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f3759dful;
+	float a = 0.5f;
+	float b = 3.0f;
+	float c = a * b; // 1.5f
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1);
+	float x = n * a;
+	o.f *= c - ( x * o.f * o.f );
+//	o.f *= c - ( x * o.f * o.f );
+	return o.f;
+}
+
+The relative error bound after the initial iteration was: 1.8×10⁻³
+The relative error bound after a second iteration was: 5×10⁻⁶
+
+Chris lomont computed a better magic constant of 0x5f375a86 while
+keeping the other values of 0.5 and 3.0 allowing a second iteration:
+https://www.lomont.org/papers/2003/InvSqrt.pdf
+
+Better constants were computed by Jan Kadlec but they only allow
+a single iteration: http://rrrola.wz.cz/inv_sqrt.html
+
+float Q_rsqrt( float n )
+{
+	uint32_t magic = 0x5f1ffff9ul:
+	float a = 0.703952253f;
+	float b = 2.38924456f;
+	union { float f; uint32_t u; } o = {n};
+	o.u = magic - ( o.u >> 1 );
+	return a * o.f * ( b - n * y.f * y.f );
+}
+
+The relative error bound is: 2.00010826×10⁻⁷ */
+
+#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
+#else
+inline float ReverseSqrtMagic( const float n, const uint32_t magic )
+{
+	return Util::bit_cast<float>( magic - ( Util::bit_cast<uint32_t>( n ) >> 1 ) );
+}
+#endif
+
+// Compute approximate inverse square root.
+inline float Q_rsqrt( const float n )
+{
+#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
+	float o;
+	_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
+#else
+	/* Magic constants by Jan Kadlec.
+	See: http://rrrola.wz.cz/inv_sqrt.html */
+	static const float a = 0.703952253f;
+	static const float b = 2.38924456f;
+	static const uint32_t magic = 0x5f1ffff9ul;
+	float o = ReverseqrtMagic( n, magic );
+	o *= a * ( b - n * o * o );
+#endif
+	return o;
+}
+
+inline float Q_rsqrt_precise( const float n )
+{
+	static const float a = 0.5f;
+	static const float b = 3.0f;
+#if defined(DAEMON_USE_ARCH_INTRINSICS_i686_sse)
+	float o;
+	// SSE rsqrt relative error bound: 3.7 * 10^-4
+	_mm_store_ss( &o, _mm_rsqrt_ss( _mm_load_ss( &n ) ) );
+#else
+	/* Magic constant from Quake 3.
+	See https://github.com/id-Software/Quake-III-Arena/blob/dbe4ddb/code/game/q_math.c#L561
+	const uint32_t magic = 0x5f3759dful; */
+
+	/* Magic constant computed by Chris Lomont.
+	See: https://www.lomont.org/papers/2003/InvSqrt.pdf */
+	static const uint32_t magic = 0x5f375a86ul;
+	float o = ReverseSqrtMagic( n, magic );
+	o *= a * ( b - n * o * o );
+#endif
+	// Two iterations for higher precision.
+	o *= a * ( b - n * o * o );
+	return o;
+}
+
 #define VectorLerpTrem( f, s, e, r ) (( r )[ 0 ] = ( s )[ 0 ] + ( f ) * (( e )[ 0 ] - ( s )[ 0 ] ), \
                                       ( r )[ 1 ] = ( s )[ 1 ] + ( f ) * (( e )[ 1 ] - ( s )[ 1 ] ), \
                                       ( r )[ 2 ] = ( s )[ 2 ] + ( f ) * (( e )[ 2 ] - ( s )[ 2 ] ))