@@ -1022,6 +1022,95 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10221022 return Qfalse ;
10231023}
10241024
1025+ static inline VALUE json_parse_number (JSON_ParserState * state , JSON_ParserConfig * config , bool negative , const char * start )
1026+ {
1027+ bool integer = true;
1028+
1029+ // Variables for Ryu optimization - extract digits during parsing
1030+ uint64_t mantissa = 0 ;
1031+ int mantissa_digits = 0 ;
1032+ int32_t exponent = 0 ;
1033+ int decimal_point_pos = -1 ;
1034+
1035+ const char first_digit = * state -> cursor ;
1036+
1037+ // Parse integer part and extract mantissa digits
1038+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1039+ mantissa = mantissa * 10 + (* state -> cursor - '0' );
1040+ mantissa_digits ++ ;
1041+ state -> cursor ++ ;
1042+ }
1043+
1044+ if (RB_UNLIKELY (first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0 )) {
1045+ raise_parse_error_at ("invalid number: %s" , state , start );
1046+ }
1047+
1048+ // Parse fractional part
1049+ if ((state -> cursor < state -> end ) && (* state -> cursor == '.' )) {
1050+ integer = false;
1051+ decimal_point_pos = mantissa_digits ; // Remember position of decimal point
1052+ state -> cursor ++ ;
1053+
1054+ if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1055+ raise_parse_error_at ("invalid number: %s" , state , start );
1056+ }
1057+
1058+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1059+ mantissa = mantissa * 10 + (* state -> cursor - '0' );
1060+ mantissa_digits ++ ;
1061+ state -> cursor ++ ;
1062+ }
1063+ }
1064+
1065+ // Parse exponent
1066+ if ((state -> cursor < state -> end ) && ((* state -> cursor == 'e' ) || (* state -> cursor == 'E' ))) {
1067+ integer = false;
1068+ state -> cursor ++ ;
1069+
1070+ bool negative_exponent = false;
1071+ if ((state -> cursor < state -> end ) && ((* state -> cursor == '-' ) || (* state -> cursor == '+' ))) {
1072+ negative_exponent = (* state -> cursor == '-' );
1073+ state -> cursor ++ ;
1074+ }
1075+
1076+ if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1077+ raise_parse_error_at ("invalid number: %s" , state , start );
1078+ }
1079+
1080+ while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1081+ exponent = exponent * 10 + (* state -> cursor - '0' );
1082+ state -> cursor ++ ;
1083+ }
1084+
1085+ if (negative_exponent ) {
1086+ exponent = - exponent ;
1087+ }
1088+ }
1089+
1090+ if (integer ) {
1091+ return json_decode_integer (mantissa , mantissa_digits , negative , start , state -> cursor );
1092+ }
1093+
1094+ // Adjust exponent based on decimal point position
1095+ if (decimal_point_pos >= 0 ) {
1096+ exponent -= (mantissa_digits - decimal_point_pos );
1097+ }
1098+
1099+ return json_decode_float (config , mantissa , mantissa_digits , exponent , negative , start , state -> cursor );
1100+ }
1101+
1102+ static inline VALUE json_parse_positive_number (JSON_ParserState * state , JSON_ParserConfig * config )
1103+ {
1104+ return json_parse_number (state , config , false, state -> cursor );
1105+ }
1106+
1107+ static inline VALUE json_parse_negative_number (JSON_ParserState * state , JSON_ParserConfig * config )
1108+ {
1109+ const char * start = state -> cursor ;
1110+ state -> cursor ++ ;
1111+ return json_parse_number (state , config , true, start );
1112+ }
1113+
10251114static VALUE json_parse_any (JSON_ParserState * state , JSON_ParserConfig * config )
10261115{
10271116 json_eat_whitespace (state );
@@ -1072,7 +1161,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10721161
10731162 raise_parse_error ("unexpected token %s" , state );
10741163 break ;
1075- case '-' :
1164+ case '-' : {
10761165 // Note: memcmp with a small power of two compile to an integer comparison
10771166 if ((state -> end - state -> cursor >= 9 ) && (memcmp (state -> cursor + 1 , "Infinity" , 8 ) == 0 )) {
10781167 if (config -> allow_nan ) {
@@ -1082,95 +1171,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10821171 raise_parse_error ("unexpected token %s" , state );
10831172 }
10841173 }
1085- // Fallthrough
1086- case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' : {
1087- bool integer = true;
1088-
1089- // Variables for Ryu optimization - extract digits during parsing
1090- uint64_t mantissa = 0 ;
1091- int mantissa_digits = 0 ;
1092- int32_t exponent = 0 ;
1093- bool negative = false;
1094- int decimal_point_pos = -1 ;
1095-
1096- // /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1097- const char * start = state -> cursor ;
1098-
1099- // Handle optional negative sign
1100- if (* state -> cursor == '-' ) {
1101- negative = true;
1102- state -> cursor ++ ;
1103- if (state -> cursor >= state -> end || !rb_isdigit (* state -> cursor )) {
1104- raise_parse_error_at ("invalid number: %s" , state , start );
1105- }
1106- }
1107-
1108- // Parse integer part and extract mantissa digits
1109- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1110- mantissa = mantissa * 10 + (* state -> cursor - '0' );
1111- mantissa_digits ++ ;
1112- state -> cursor ++ ;
1113- }
1114-
1115- if (RB_UNLIKELY (start [0 ] == '0' && mantissa_digits > 1 )) {
1116- raise_parse_error_at ("invalid number: %s" , state , start );
1117- } else if (RB_UNLIKELY (mantissa_digits > 1 && negative && start [1 ] == '0' )) {
1118- raise_parse_error_at ("invalid number: %s" , state , start );
1119- }
1120-
1121- // Parse fractional part
1122- if ((state -> cursor < state -> end ) && (* state -> cursor == '.' )) {
1123- integer = false;
1124- decimal_point_pos = mantissa_digits ; // Remember position of decimal point
1125- state -> cursor ++ ;
1126-
1127- if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1128- raise_parse_error_at ("invalid number: %s" , state , start );
1129- }
1130-
1131- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1132- mantissa = mantissa * 10 + (* state -> cursor - '0' );
1133- mantissa_digits ++ ;
1134- state -> cursor ++ ;
1135- }
1136- }
1137-
1138- // Parse exponent
1139- if ((state -> cursor < state -> end ) && ((* state -> cursor == 'e' ) || (* state -> cursor == 'E' ))) {
1140- integer = false;
1141- state -> cursor ++ ;
1142-
1143- bool negative_exponent = false;
1144- if ((state -> cursor < state -> end ) && ((* state -> cursor == '-' ) || (* state -> cursor == '+' ))) {
1145- negative_exponent = (* state -> cursor == '-' );
1146- state -> cursor ++ ;
1147- }
1148-
1149- if (state -> cursor == state -> end || !rb_isdigit (* state -> cursor )) {
1150- raise_parse_error_at ("invalid number: %s" , state , start );
1151- }
1152-
1153- while ((state -> cursor < state -> end ) && rb_isdigit (* state -> cursor )) {
1154- exponent = exponent * 10 + (* state -> cursor - '0' );
1155- state -> cursor ++ ;
1156- }
1157-
1158- if (negative_exponent ) {
1159- exponent = - exponent ;
1160- }
1161- }
1162-
1163- if (integer ) {
1164- return json_push_value (state , config , json_decode_integer (mantissa , mantissa_digits , negative , start , state -> cursor ));
1165- }
1166-
1167- // Adjust exponent based on decimal point position
1168- if (decimal_point_pos >= 0 ) {
1169- exponent -= (mantissa_digits - decimal_point_pos );
1170- }
1171-
1172- return json_push_value (state , config , json_decode_float (config , mantissa , mantissa_digits , exponent , negative , start , state -> cursor ));
1174+ return json_push_value (state , config , json_parse_negative_number (state , config ));
1175+ break ;
11731176 }
1177+ case '0' : case '1' : case '2' : case '3' : case '4' : case '5' : case '6' : case '7' : case '8' : case '9' :
1178+ return json_push_value (state , config , json_parse_positive_number (state , config ));
1179+ break ;
11741180 case '"' : {
11751181 // %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
11761182 return json_parse_string (state , config , false);
0 commit comments