Skip to content

Commit c009c85

Browse files
authored
Merge pull request #883 from byroot/refactor-number-parsing
Refactor number parsing
2 parents e1f4224 + 1bf405e commit c009c85

File tree

1 file changed

+92
-89
lines changed

1 file changed

+92
-89
lines changed

ext/json/ext/parser/parser.c

Lines changed: 92 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,92 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10221022
return Qfalse;
10231023
}
10241024

1025+
static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulator)
1026+
{
1027+
const char *start = state->cursor;
1028+
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1029+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
1030+
state->cursor++;
1031+
}
1032+
return (int)(state->cursor - start);
1033+
}
1034+
1035+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
1036+
{
1037+
bool integer = true;
1038+
const char first_digit = *state->cursor;
1039+
1040+
// Variables for Ryu optimization - extract digits during parsing
1041+
int32_t exponent = 0;
1042+
int decimal_point_pos = -1;
1043+
uint64_t mantissa = 0;
1044+
1045+
// Parse integer part and extract mantissa digits
1046+
int mantissa_digits = json_parse_digits(state, &mantissa);
1047+
1048+
if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) {
1049+
raise_parse_error_at("invalid number: %s", state, start);
1050+
}
1051+
1052+
// Parse fractional part
1053+
if ((state->cursor < state->end) && (*state->cursor == '.')) {
1054+
integer = false;
1055+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
1056+
state->cursor++;
1057+
1058+
int fractional_digits = json_parse_digits(state, &mantissa);
1059+
mantissa_digits += fractional_digits;
1060+
1061+
if (RB_UNLIKELY(!fractional_digits)) {
1062+
raise_parse_error_at("invalid number: %s", state, start);
1063+
}
1064+
}
1065+
1066+
// Parse exponent
1067+
if ((state->cursor < state->end) && ((rb_tolower(*state->cursor) == 'e'))) {
1068+
integer = false;
1069+
state->cursor++;
1070+
1071+
bool negative_exponent = false;
1072+
if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) {
1073+
negative_exponent = (*state->cursor == '-');
1074+
state->cursor++;
1075+
}
1076+
1077+
uint64_t abs_exponent = 0;
1078+
int exponent_digits = json_parse_digits(state, &abs_exponent);
1079+
1080+
if (RB_UNLIKELY(!exponent_digits)) {
1081+
raise_parse_error_at("invalid number: %s", state, start);
1082+
}
1083+
1084+
exponent = negative_exponent ? -((int32_t)abs_exponent) : ((int32_t)abs_exponent);
1085+
}
1086+
1087+
if (integer) {
1088+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
1089+
}
1090+
1091+
// Adjust exponent based on decimal point position
1092+
if (decimal_point_pos >= 0) {
1093+
exponent -= (mantissa_digits - decimal_point_pos);
1094+
}
1095+
1096+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1097+
}
1098+
1099+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1100+
{
1101+
return json_parse_number(state, config, false, state->cursor);
1102+
}
1103+
1104+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1105+
{
1106+
const char *start = state->cursor;
1107+
state->cursor++;
1108+
return json_parse_number(state, config, true, start);
1109+
}
1110+
10251111
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10261112
{
10271113
json_eat_whitespace(state);
@@ -1072,7 +1158,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10721158

10731159
raise_parse_error("unexpected token %s", state);
10741160
break;
1075-
case '-':
1161+
case '-': {
10761162
// Note: memcmp with a small power of two compile to an integer comparison
10771163
if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
10781164
if (config->allow_nan) {
@@ -1082,95 +1168,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10821168
raise_parse_error("unexpected token %s", state);
10831169
}
10841170
}
1085-
// Fallthrough
1086-
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1087-
bool integer = true;
1088-
1089-
// Variables for Ryu optimization - extract digits during parsing
1090-
uint64_t mantissa = 0;
1091-
int mantissa_digits = 0;
1092-
int32_t exponent = 0;
1093-
bool negative = false;
1094-
int decimal_point_pos = -1;
1095-
1096-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1097-
const char *start = state->cursor;
1098-
1099-
// Handle optional negative sign
1100-
if (*state->cursor == '-') {
1101-
negative = true;
1102-
state->cursor++;
1103-
if (state->cursor >= state->end || !rb_isdigit(*state->cursor)) {
1104-
raise_parse_error_at("invalid number: %s", state, start);
1105-
}
1106-
}
1107-
1108-
// Parse integer part and extract mantissa digits
1109-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1110-
mantissa = mantissa * 10 + (*state->cursor - '0');
1111-
mantissa_digits++;
1112-
state->cursor++;
1113-
}
1114-
1115-
if (RB_UNLIKELY(start[0] == '0' && mantissa_digits > 1)) {
1116-
raise_parse_error_at("invalid number: %s", state, start);
1117-
} else if (RB_UNLIKELY(mantissa_digits > 1 && negative && start[1] == '0')) {
1118-
raise_parse_error_at("invalid number: %s", state, start);
1119-
}
1120-
1121-
// Parse fractional part
1122-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
1123-
integer = false;
1124-
decimal_point_pos = mantissa_digits; // Remember position of decimal point
1125-
state->cursor++;
1126-
1127-
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1128-
raise_parse_error_at("invalid number: %s", state, start);
1129-
}
1130-
1131-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1132-
mantissa = mantissa * 10 + (*state->cursor - '0');
1133-
mantissa_digits++;
1134-
state->cursor++;
1135-
}
1136-
}
1137-
1138-
// Parse exponent
1139-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1140-
integer = false;
1141-
state->cursor++;
1142-
1143-
bool negative_exponent = false;
1144-
if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) {
1145-
negative_exponent = (*state->cursor == '-');
1146-
state->cursor++;
1147-
}
1148-
1149-
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1150-
raise_parse_error_at("invalid number: %s", state, start);
1151-
}
1152-
1153-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1154-
exponent = exponent * 10 + (*state->cursor - '0');
1155-
state->cursor++;
1156-
}
1157-
1158-
if (negative_exponent) {
1159-
exponent = -exponent;
1160-
}
1161-
}
1162-
1163-
if (integer) {
1164-
return json_push_value(state, config, json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor));
1165-
}
1166-
1167-
// Adjust exponent based on decimal point position
1168-
if (decimal_point_pos >= 0) {
1169-
exponent -= (mantissa_digits - decimal_point_pos);
1170-
}
1171-
1172-
return json_push_value(state, config, json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor));
1171+
return json_push_value(state, config, json_parse_negative_number(state, config));
1172+
break;
11731173
}
1174+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1175+
return json_push_value(state, config, json_parse_positive_number(state, config));
1176+
break;
11741177
case '"': {
11751178
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
11761179
return json_parse_string(state, config, false);

0 commit comments

Comments
 (0)