11#include "ruby.h"
22#include "ruby/encoding.h"
3+ #include "../vendor/ryu.h"
34
45/* shims */
56/* This is the fallback definition from Ruby 3.4 */
@@ -20,6 +21,16 @@ typedef unsigned char _Bool;
2021#endif
2122#endif
2223
24+ #if SIZEOF_UINT64_T == SIZEOF_LONG_LONG
25+ # define INT64T2NUM (x ) LL2NUM(x)
26+ # define UINT64T2NUM (x ) ULL2NUM(x)
27+ #elif SIZEOF_UINT64_T == SIZEOF_LONG
28+ # define INT64T2NUM (x ) LONG2NUM(x)
29+ # define UINT64T2NUM (x ) ULONG2NUM(x)
30+ #else
31+ # error No uint64_t conversion
32+ #endif
33+
2334#include "../simd/simd.h"
2435
2536#ifndef RB_UNLIKELY
@@ -755,26 +766,6 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
755766}
756767
757768#define MAX_FAST_INTEGER_SIZE 18
758- static inline VALUE fast_decode_integer (const char * p , const char * pe )
759- {
760- bool negative = false;
761- if (* p == '-' ) {
762- negative = true;
763- p ++ ;
764- }
765-
766- long long memo = 0 ;
767- while (p < pe ) {
768- memo *= 10 ;
769- memo += * p - '0' ;
770- p ++ ;
771- }
772-
773- if (negative ) {
774- memo = - memo ;
775- }
776- return LL2NUM (memo );
777- }
778769
779770static VALUE json_decode_large_integer (const char * start , long len )
780771{
@@ -788,17 +779,27 @@ static VALUE json_decode_large_integer(const char *start, long len)
788779}
789780
790781static inline VALUE
791- json_decode_integer (const char * start , const char * end )
782+ json_decode_integer (uint64_t mantissa , int mantissa_digits , bool negative , const char * start , const char * end )
792783{
793- long len = end - start ;
794- if (RB_LIKELY (len < MAX_FAST_INTEGER_SIZE )) {
795- return fast_decode_integer (start , end );
784+ if (RB_LIKELY (mantissa_digits < MAX_FAST_INTEGER_SIZE )) {
785+ if (negative ) {
786+ return INT64T2NUM (- ((int64_t )mantissa ));
787+ }
788+ return UINT64T2NUM (mantissa );
796789 }
797- return json_decode_large_integer (start , len );
790+
791+ return json_decode_large_integer (start , end - start );
798792}
799793
800794static VALUE json_decode_large_float (const char * start , long len )
801795{
796+ if (RB_LIKELY (len < 64 )) {
797+ char buffer [64 ];
798+ MEMCPY (buffer , start , char , len );
799+ buffer [len ] = '\0' ;
800+ return DBL2NUM (rb_cstr_to_dbl (buffer , 1 ));
801+ }
802+
802803 VALUE buffer_v ;
803804 char * buffer = RB_ALLOCV_N (char , buffer_v , len + 1 );
804805 MEMCPY (buffer , start , char , len );
@@ -808,21 +809,24 @@ static VALUE json_decode_large_float(const char *start, long len)
808809 return number ;
809810}
810811
811- static VALUE json_decode_float (JSON_ParserConfig * config , const char * start , const char * end )
812+ /* Ruby JSON optimized float decoder using vendored Ryu algorithm
813+ * Accepts pre-extracted mantissa and exponent from first-pass validation
814+ */
815+ static inline VALUE json_decode_float (JSON_ParserConfig * config , uint64_t mantissa , int mantissa_digits , int32_t exponent , bool negative ,
816+ const char * start , const char * end )
812817{
813- long len = end - start ;
814-
815818 if (RB_UNLIKELY (config -> decimal_class )) {
816- VALUE text = rb_str_new (start , len );
819+ VALUE text = rb_str_new (start , end - start );
817820 return rb_funcallv (config -> decimal_class , config -> decimal_method_id , 1 , & text );
818- } else if (RB_LIKELY (len < 64 )) {
819- char buffer [64 ];
820- MEMCPY (buffer , start , char , len );
821- buffer [len ] = '\0' ;
822- return DBL2NUM (rb_cstr_to_dbl (buffer , 1 ));
823- } else {
824- return json_decode_large_float (start , len );
825821 }
822+
823+ // Fall back to rb_cstr_to_dbl for potential subnormals (rare edge case)
824+ // Ryu has rounding issues with subnormals around 1e-310 (< 2.225e-308)
825+ if (RB_UNLIKELY (mantissa_digits > 17 || mantissa_digits + exponent < -307 )) {
826+ return json_decode_large_float (start , end - start );
827+ }
828+
829+ return DBL2NUM (ryu_s2d_from_parts (mantissa , mantissa_digits , exponent , negative ));
826830}
827831
828832static inline VALUE json_decode_array (JSON_ParserState * state , JSON_ParserConfig * config , long count )
@@ -1082,57 +1086,90 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10821086 case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : {
10831087 bool integer = true;
10841088
1089+ // Variables for Ryu optimization - extract digits during parsing
1090+ uint64_t mantissa = 0 ;
1091+ int mantissa_digits = 0 ;
1092+ int32_t exponent = 0 ;
1093+ bool negative = false;
1094+ int decimal_point_pos = -1 ;
1095+
10851096 // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
10861097 const char * start = state -> cursor ;
1087- state -> cursor ++ ;
10881098
1089- while ((state -> cursor < state -> end ) && (* state -> cursor >= '0' ) && (* state -> cursor <= '9' )) {
1099+ // Handle optional negative sign
1100+ if (* state -> cursor == '-' ) {
1101+ negative = true;
10901102 state -> cursor ++ ;
1103+ if (state -> cursor >= state -> end || !rb_isdigit (* state -> cursor )) {
1104+ raise_parse_error_at ("invalid number: %s" , state , start );
1105+ }
10911106 }
10921107
1093- long integer_length = state -> cursor - start ;
1108+ // Parse integer part and extract mantissa digits
1109+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1110+ mantissa = mantissa * 10 + (* state -> cursor - '0' );
1111+ mantissa_digits ++ ;
1112+ state -> cursor ++ ;
1113+ }
10941114
1095- if (RB_UNLIKELY (start [0 ] == '0' && integer_length > 1 )) {
1115+ if (RB_UNLIKELY (start [0 ] == '0' && mantissa_digits > 1 )) {
10961116 raise_parse_error_at ("invalid number: %s" , state , start );
1097- } else if (RB_UNLIKELY (integer_length > 2 && start [0 ] == '-' && start [1 ] == '0' )) {
1098- raise_parse_error_at ("invalid number: %s" , state , start );
1099- } else if (RB_UNLIKELY (integer_length == 1 && start [0 ] == '-' )) {
1117+ } else if (RB_UNLIKELY (mantissa_digits > 1 && negative && start [1 ] == '0' )) {
11001118 raise_parse_error_at ("invalid number: %s" , state , start );
11011119 }
11021120
1121+ // Parse fractional part
11031122 if ((state -> cursor < state -> end ) && (* state -> cursor == '.' )) {
11041123 integer = false;
1124+ decimal_point_pos = mantissa_digits ; // Remember position of decimal point
11051125 state -> cursor ++ ;
11061126
1107- if (state -> cursor == state -> end || * state -> cursor < '0' || * state -> cursor > '9' ) {
1108- raise_parse_error ("invalid number: %s" , state );
1127+ if (state -> cursor == state -> end || ! rb_isdigit ( * state -> cursor ) ) {
1128+ raise_parse_error_at ("invalid number: %s" , state , start );
11091129 }
11101130
1111- while ((state -> cursor < state -> end ) && (* state -> cursor >= '0' ) && (* state -> cursor <= '9' )) {
1131+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1132+ mantissa = mantissa * 10 + (* state -> cursor - '0' );
1133+ mantissa_digits ++ ;
11121134 state -> cursor ++ ;
11131135 }
11141136 }
11151137
1138+ // Parse exponent
11161139 if ((state -> cursor < state -> end ) && ((* state -> cursor == 'e' ) || (* state -> cursor == 'E' ))) {
11171140 integer = false;
11181141 state -> cursor ++ ;
1119- if ((state -> cursor < state -> end ) && ((* state -> cursor == '+' ) || (* state -> cursor == '-' ))) {
1142+
1143+ bool negative_exponent = false;
1144+ if ((state -> cursor < state -> end ) && ((* state -> cursor == '-' ) || (* state -> cursor == '+' ))) {
1145+ negative_exponent = (* state -> cursor == '-' );
11201146 state -> cursor ++ ;
11211147 }
11221148
1123- if (state -> cursor == state -> end || * state -> cursor < '0' || * state -> cursor > '9' ) {
1124- raise_parse_error ("invalid number: %s" , state );
1149+ if (state -> cursor == state -> end || ! rb_isdigit ( * state -> cursor ) ) {
1150+ raise_parse_error_at ("invalid number: %s" , state , start );
11251151 }
11261152
1127- while ((state -> cursor < state -> end ) && (* state -> cursor >= '0' ) && (* state -> cursor <= '9' )) {
1153+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1154+ exponent = exponent * 10 + (* state -> cursor - '0' );
11281155 state -> cursor ++ ;
11291156 }
1157+
1158+ if (negative_exponent ) {
1159+ exponent = - exponent ;
1160+ }
11301161 }
11311162
11321163 if (integer ) {
1133- return json_push_value (state , config , json_decode_integer (start , state -> cursor ));
1164+ return json_push_value (state , config , json_decode_integer (mantissa , mantissa_digits , negative , start , state -> cursor ));
11341165 }
1135- return json_push_value (state , config , json_decode_float (config , start , state -> cursor ));
1166+
1167+ // Adjust exponent based on decimal point position
1168+ if (decimal_point_pos >= 0 ) {
1169+ exponent -= (mantissa_digits - decimal_point_pos );
1170+ }
1171+
1172+ return json_push_value (state , config , json_decode_float (config , mantissa , mantissa_digits , exponent , negative , start , state -> cursor ));
11361173 }
11371174 case '"' : {
11381175 // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
0 commit comments