@@ -1022,6 +1022,92 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10221022 return Qfalse ;
10231023}
10241024
1025+ static inline int json_parse_digits (JSON_ParserState * state , uint64_t * accumulator )
1026+ {
1027+ const char * start = state -> cursor ;
1028+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1029+ * accumulator = * accumulator * 10 + (* state -> cursor - '0' );
1030+ state -> cursor ++ ;
1031+ }
1032+ return (int )(state -> cursor - start );
1033+ }
1034+
1035+ static inline VALUE json_parse_number (JSON_ParserState * state , JSON_ParserConfig * config , bool negative , const char * start )
1036+ {
1037+ bool integer = true;
1038+ const char first_digit = * state -> cursor ;
1039+
1040+ // Variables for Ryu optimization - extract digits during parsing
1041+ int32_t exponent = 0 ;
1042+ int decimal_point_pos = -1 ;
1043+ uint64_t mantissa = 0 ;
1044+
1045+ // Parse integer part and extract mantissa digits
1046+ int mantissa_digits = json_parse_digits (state , & mantissa );
1047+
1048+ if (RB_UNLIKELY (first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0 )) {
1049+ raise_parse_error_at ("invalid number: %s" , state , start );
1050+ }
1051+
1052+ // Parse fractional part
1053+ if ((state -> cursor < state -> end ) && (* state -> cursor == '.' )) {
1054+ integer = false;
1055+ decimal_point_pos = mantissa_digits ; // Remember position of decimal point
1056+ state -> cursor ++ ;
1057+
1058+ int fractional_digits = json_parse_digits (state , & mantissa );
1059+ mantissa_digits += fractional_digits ;
1060+
1061+ if (RB_UNLIKELY (!fractional_digits )) {
1062+ raise_parse_error_at ("invalid number: %s" , state , start );
1063+ }
1064+ }
1065+
1066+ // Parse exponent
1067+ if ((state -> cursor < state -> end ) && ((rb_tolower (* state -> cursor ) == 'e' ))) {
1068+ integer = false;
1069+ state -> cursor ++ ;
1070+
1071+ bool negative_exponent = false;
1072+ if ((state -> cursor < state -> end ) && ((* state -> cursor == '-' ) || (* state -> cursor == '+' ))) {
1073+ negative_exponent = (* state -> cursor == '-' );
1074+ state -> cursor ++ ;
1075+ }
1076+
1077+ uint64_t abs_exponent = 0 ;
1078+ int exponent_digits = json_parse_digits (state , & abs_exponent );
1079+
1080+ if (RB_UNLIKELY (!exponent_digits )) {
1081+ raise_parse_error_at ("invalid number: %s" , state , start );
1082+ }
1083+
1084+ exponent = negative_exponent ? - ((int32_t )abs_exponent ) : ((int32_t )abs_exponent );
1085+ }
1086+
1087+ if (integer ) {
1088+ return json_decode_integer (mantissa , mantissa_digits , negative , start , state -> cursor );
1089+ }
1090+
1091+ // Adjust exponent based on decimal point position
1092+ if (decimal_point_pos >= 0 ) {
1093+ exponent -= (mantissa_digits - decimal_point_pos );
1094+ }
1095+
1096+ return json_decode_float (config , mantissa , mantissa_digits , exponent , negative , start , state -> cursor );
1097+ }
1098+
1099+ static inline VALUE json_parse_positive_number (JSON_ParserState * state , JSON_ParserConfig * config )
1100+ {
1101+ return json_parse_number (state , config , false, state -> cursor );
1102+ }
1103+
1104+ static inline VALUE json_parse_negative_number (JSON_ParserState * state , JSON_ParserConfig * config )
1105+ {
1106+ const char * start = state -> cursor ;
1107+ state -> cursor ++ ;
1108+ return json_parse_number (state , config , true, start );
1109+ }
1110+
10251111static VALUE json_parse_any (JSON_ParserState * state , JSON_ParserConfig * config )
10261112{
10271113 json_eat_whitespace (state );
@@ -1072,7 +1158,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10721158
10731159 raise_parse_error ("unexpected token %s" , state );
10741160 break ;
1075- case '-' :
1161+ case '-' : {
10761162 // Note: memcmp with a small power of two compile to an integer comparison
10771163 if ((state -> end - state -> cursor >= 9 ) && (memcmp (state -> cursor + 1 , "Infinity" , 8 ) == 0 )) {
10781164 if (config -> allow_nan ) {
@@ -1082,95 +1168,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10821168 raise_parse_error ("unexpected token %s" , state );
10831169 }
10841170 }
1085- // Fallthrough
1086- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : {
1087- bool integer = true;
1088-
1089- // Variables for Ryu optimization - extract digits during parsing
1090- uint64_t mantissa = 0 ;
1091- int mantissa_digits = 0 ;
1092- int32_t exponent = 0 ;
1093- bool negative = false;
1094- int decimal_point_pos = -1 ;
1095-
1096- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1097- const char * start = state -> cursor ;
1098-
1099- // Handle optional negative sign
1100- if (* state -> cursor == '-' ) {
1101- negative = true;
1102- state -> cursor ++ ;
1103- if (state -> cursor >= state -> end || !rb_isdigit (* state -> cursor )) {
1104- raise_parse_error_at ("invalid number: %s" , state , start );
1105- }
1106- }
1107-
1108- // Parse integer part and extract mantissa digits
1109- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1110- mantissa = mantissa * 10 + (* state -> cursor - '0' );
1111- mantissa_digits ++ ;
1112- state -> cursor ++ ;
1113- }
1114-
1115- if (RB_UNLIKELY (start [0 ] == '0' && mantissa_digits > 1 )) {
1116- raise_parse_error_at ("invalid number: %s" , state , start );
1117- } else if (RB_UNLIKELY (mantissa_digits > 1 && negative && start [1 ] == '0' )) {
1118- raise_parse_error_at ("invalid number: %s" , state , start );
1119- }
1120-
1121- // Parse fractional part
1122- if ((state -> cursor < state -> end ) && (* state -> cursor == '.' )) {
1123- integer = false;
1124- decimal_point_pos = mantissa_digits ; // Remember position of decimal point
1125- state -> cursor ++ ;
1126-
1127- if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1128- raise_parse_error_at ("invalid number: %s" , state , start );
1129- }
1130-
1131- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1132- mantissa = mantissa * 10 + (* state -> cursor - '0' );
1133- mantissa_digits ++ ;
1134- state -> cursor ++ ;
1135- }
1136- }
1137-
1138- // Parse exponent
1139- if ((state -> cursor < state -> end ) && ((* state -> cursor == 'e' ) || (* state -> cursor == 'E' ))) {
1140- integer = false;
1141- state -> cursor ++ ;
1142-
1143- bool negative_exponent = false;
1144- if ((state -> cursor < state -> end ) && ((* state -> cursor == '-' ) || (* state -> cursor == '+' ))) {
1145- negative_exponent = (* state -> cursor == '-' );
1146- state -> cursor ++ ;
1147- }
1148-
1149- if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1150- raise_parse_error_at ("invalid number: %s" , state , start );
1151- }
1152-
1153- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1154- exponent = exponent * 10 + (* state -> cursor - '0' );
1155- state -> cursor ++ ;
1156- }
1157-
1158- if (negative_exponent ) {
1159- exponent = - exponent ;
1160- }
1161- }
1162-
1163- if (integer ) {
1164- return json_push_value (state , config , json_decode_integer (mantissa , mantissa_digits , negative , start , state -> cursor ));
1165- }
1166-
1167- // Adjust exponent based on decimal point position
1168- if (decimal_point_pos >= 0 ) {
1169- exponent -= (mantissa_digits - decimal_point_pos );
1170- }
1171-
1172- return json_push_value (state , config , json_decode_float (config , mantissa , mantissa_digits , exponent , negative , start , state -> cursor ));
1171+ return json_push_value (state , config , json_parse_negative_number (state , config ));
1172+ break ;
11731173 }
1174+ case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
1175+ return json_push_value (state , config , json_parse_positive_number (state , config ));
1176+ break ;
11741177 case '"' : {
11751178 // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
11761179 return json_parse_string (state , config , false);
0 commit comments