Skip to content

Commit 2681b23

Browse files
committed
parser.c: Extract json_parse_number
1 parent e1f4224 commit 2681b23

File tree

1 file changed

+95
-89
lines changed

1 file changed

+95
-89
lines changed

ext/json/ext/parser/parser.c

Lines changed: 95 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,95 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
10221022
return Qfalse;
10231023
}
10241024

1025+
static inline VALUE json_parse_number(JSON_ParserState *state, JSON_ParserConfig *config, bool negative, const char *start)
1026+
{
1027+
bool integer = true;
1028+
1029+
// Variables for Ryu optimization - extract digits during parsing
1030+
uint64_t mantissa = 0;
1031+
int mantissa_digits = 0;
1032+
int32_t exponent = 0;
1033+
int decimal_point_pos = -1;
1034+
1035+
const char first_digit = *state->cursor;
1036+
1037+
// Parse integer part and extract mantissa digits
1038+
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1039+
mantissa = mantissa * 10 + (*state->cursor - '0');
1040+
mantissa_digits++;
1041+
state->cursor++;
1042+
}
1043+
1044+
if (RB_UNLIKELY(first_digit == '0' && mantissa_digits > 1 || negative && mantissa_digits == 0)) {
1045+
raise_parse_error_at("invalid number: %s", state, start);
1046+
}
1047+
1048+
// Parse fractional part
1049+
if ((state->cursor < state->end) && (*state->cursor == '.')) {
1050+
integer = false;
1051+
decimal_point_pos = mantissa_digits; // Remember position of decimal point
1052+
state->cursor++;
1053+
1054+
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1055+
raise_parse_error_at("invalid number: %s", state, start);
1056+
}
1057+
1058+
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1059+
mantissa = mantissa * 10 + (*state->cursor - '0');
1060+
mantissa_digits++;
1061+
state->cursor++;
1062+
}
1063+
}
1064+
1065+
// Parse exponent
1066+
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1067+
integer = false;
1068+
state->cursor++;
1069+
1070+
bool negative_exponent = false;
1071+
if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) {
1072+
negative_exponent = (*state->cursor == '-');
1073+
state->cursor++;
1074+
}
1075+
1076+
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1077+
raise_parse_error_at("invalid number: %s", state, start);
1078+
}
1079+
1080+
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1081+
exponent = exponent * 10 + (*state->cursor - '0');
1082+
state->cursor++;
1083+
}
1084+
1085+
if (negative_exponent) {
1086+
exponent = -exponent;
1087+
}
1088+
}
1089+
1090+
if (integer) {
1091+
return json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor);
1092+
}
1093+
1094+
// Adjust exponent based on decimal point position
1095+
if (decimal_point_pos >= 0) {
1096+
exponent -= (mantissa_digits - decimal_point_pos);
1097+
}
1098+
1099+
return json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor);
1100+
}
1101+
1102+
static inline VALUE json_parse_positive_number(JSON_ParserState *state, JSON_ParserConfig *config)
1103+
{
1104+
return json_parse_number(state, config, false, state->cursor);
1105+
}
1106+
1107+
static inline VALUE json_parse_negative_number(JSON_ParserState *state, JSON_ParserConfig *config)
1108+
{
1109+
const char *start = state->cursor;
1110+
state->cursor++;
1111+
return json_parse_number(state, config, true, start);
1112+
}
1113+
10251114
static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10261115
{
10271116
json_eat_whitespace(state);
@@ -1072,7 +1161,7 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10721161

10731162
raise_parse_error("unexpected token %s", state);
10741163
break;
1075-
case '-':
1164+
case '-': {
10761165
// Note: memcmp with a small power of two compile to an integer comparison
10771166
if ((state->end - state->cursor >= 9) && (memcmp(state->cursor + 1, "Infinity", 8) == 0)) {
10781167
if (config->allow_nan) {
@@ -1082,95 +1171,12 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10821171
raise_parse_error("unexpected token %s", state);
10831172
}
10841173
}
1085-
// Fallthrough
1086-
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': {
1087-
bool integer = true;
1088-
1089-
// Variables for Ryu optimization - extract digits during parsing
1090-
uint64_t mantissa = 0;
1091-
int mantissa_digits = 0;
1092-
int32_t exponent = 0;
1093-
bool negative = false;
1094-
int decimal_point_pos = -1;
1095-
1096-
// /\A-?(0|[1-9]\d*)(\.\d+)?([Ee][-+]?\d+)?/
1097-
const char *start = state->cursor;
1098-
1099-
// Handle optional negative sign
1100-
if (*state->cursor == '-') {
1101-
negative = true;
1102-
state->cursor++;
1103-
if (state->cursor >= state->end || !rb_isdigit(*state->cursor)) {
1104-
raise_parse_error_at("invalid number: %s", state, start);
1105-
}
1106-
}
1107-
1108-
// Parse integer part and extract mantissa digits
1109-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1110-
mantissa = mantissa * 10 + (*state->cursor - '0');
1111-
mantissa_digits++;
1112-
state->cursor++;
1113-
}
1114-
1115-
if (RB_UNLIKELY(start[0] == '0' && mantissa_digits > 1)) {
1116-
raise_parse_error_at("invalid number: %s", state, start);
1117-
} else if (RB_UNLIKELY(mantissa_digits > 1 && negative && start[1] == '0')) {
1118-
raise_parse_error_at("invalid number: %s", state, start);
1119-
}
1120-
1121-
// Parse fractional part
1122-
if ((state->cursor < state->end) && (*state->cursor == '.')) {
1123-
integer = false;
1124-
decimal_point_pos = mantissa_digits; // Remember position of decimal point
1125-
state->cursor++;
1126-
1127-
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1128-
raise_parse_error_at("invalid number: %s", state, start);
1129-
}
1130-
1131-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1132-
mantissa = mantissa * 10 + (*state->cursor - '0');
1133-
mantissa_digits++;
1134-
state->cursor++;
1135-
}
1136-
}
1137-
1138-
// Parse exponent
1139-
if ((state->cursor < state->end) && ((*state->cursor == 'e') || (*state->cursor == 'E'))) {
1140-
integer = false;
1141-
state->cursor++;
1142-
1143-
bool negative_exponent = false;
1144-
if ((state->cursor < state->end) && ((*state->cursor == '-') || (*state->cursor == '+'))) {
1145-
negative_exponent = (*state->cursor == '-');
1146-
state->cursor++;
1147-
}
1148-
1149-
if (state->cursor == state->end || !rb_isdigit(*state->cursor)) {
1150-
raise_parse_error_at("invalid number: %s", state, start);
1151-
}
1152-
1153-
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
1154-
exponent = exponent * 10 + (*state->cursor - '0');
1155-
state->cursor++;
1156-
}
1157-
1158-
if (negative_exponent) {
1159-
exponent = -exponent;
1160-
}
1161-
}
1162-
1163-
if (integer) {
1164-
return json_push_value(state, config, json_decode_integer(mantissa, mantissa_digits, negative, start, state->cursor));
1165-
}
1166-
1167-
// Adjust exponent based on decimal point position
1168-
if (decimal_point_pos >= 0) {
1169-
exponent -= (mantissa_digits - decimal_point_pos);
1170-
}
1171-
1172-
return json_push_value(state, config, json_decode_float(config, mantissa, mantissa_digits, exponent, negative, start, state->cursor));
1174+
return json_push_value(state, config, json_parse_negative_number(state, config));
1175+
break;
11731176
}
1177+
case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9':
1178+
return json_push_value(state, config, json_parse_positive_number(state, config));
1179+
break;
11741180
case '"': {
11751181
// %r{\A"[^"\\\t\n\x00]*(?:\\[bfnrtu\\/"][^"\\]*)*"}
11761182
return json_parse_string(state, config, false);

0 commit comments

Comments
 (0)