Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,32 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c

#define MAX_FAST_INTEGER_SIZE 18

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
// Additional References:
// https://johnnylee-sde.github.io/Fast-numeric-string-to-int/
// http://0x80.pl/notesen/2014-10-12-parsing-decimal-numbers-part-1-swar.html
static inline uint32_t parse_eight_digits_unrolled(const char *p) {
uint64_t val;
memcpy(&val, p, sizeof(uint64_t));
const uint64_t mask = 0x000000FF000000FF;
const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32)
const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32)
val -= 0x3030303030303030;
val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32;
return (uint32_t) val;
}

// From: https://github.com/simdjson/simdjson/blob/32b301893c13d058095a07d9868edaaa42ee07aa/include/simdjson/generic/numberparsing.h#L333
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
static inline int has_eight_consecutive_digits(const char *p) {
uint64_t val;
memcpy(&val, p, sizeof(uint64_t));
return (((val & 0xF0F0F0F0F0F0F0F0) | (((val + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4)) == 0x3333333333333333);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm thinking we could use that trick combined with clz (or similar) to know how many consecutive digits we have.

I suspect 8 consecutive digits aren't that common, but if we also had a 4 digits (uint32_t) version and a fast dispatch, that could help on more benchmarks.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I think we can simply do (comp & 0xFFFFFFFF) == 0x33333333 to check for 4 consecutive digits.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

4bytes version:

static inline uint32_t parse_four_digits_unrolled(const char *p) {
    uint64_t large_val;
    memcpy(&large_val, p, sizeof(uint64_t));
    uint32_t val = (uint32_t)large_val;

    const uint32_t mask = 0x000000FF;
    const uint32_t mul1 = 100;
    val -= 0x30303030;
    val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8;
    val = ((val & mask) * mul1) + (((val >> 16) & mask));
    return (uint32_t)val;
}

}
#endif

static VALUE json_decode_large_integer(const char *start, long len)
{
VALUE buffer_v;
Expand Down Expand Up @@ -1105,6 +1131,19 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
}
}

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
while (state->cursor + 8 <= state->end) {
if (has_eight_consecutive_digits(state->cursor)) {
uint64_t val = (uint64_t) parse_eight_digits_unrolled(state->cursor);
mantissa = mantissa * 100000000 + val;
mantissa_digits += 8;
state->cursor += 8;
continue;
}
break;
}
#endif

// Parse integer part and extract mantissa digits
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
mantissa = mantissa * 10 + (*state->cursor - '0');
Expand All @@ -1128,6 +1167,18 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
raise_parse_error_at("invalid number: %s", state, start);
}

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
while (state->cursor + 8 <= state->end) {
if (has_eight_consecutive_digits(state->cursor)) {
uint64_t val = (uint64_t) parse_eight_digits_unrolled(state->cursor);
mantissa = mantissa * 100000000 + val;
mantissa_digits += 8;
state->cursor += 8;
continue;
}
break;
}
#endif
while ((state->cursor < state->end) && rb_isdigit(*state->cursor)) {
mantissa = mantissa * 10 + (*state->cursor - '0');
mantissa_digits++;
Expand Down
6 changes: 6 additions & 0 deletions test/json/json_parser_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ def test_parse_numbers
capture_output { assert_equal(Float::INFINITY, parse("23456789012E666")) }
end

def test_parse_bignum
bignum = Integer('1234567890' * 10)
assert_equal(bignum, JSON.parse(bignum.to_s))
assert_equal(bignum.to_f, JSON.parse(bignum.to_s + ".0"))
end

def test_parse_bigdecimals
assert_equal(BigDecimal, JSON.parse('{"foo": 9.01234567890123456789}', decimal_class: BigDecimal)["foo"].class)
assert_equal(BigDecimal("0.901234567890123456789E1"),JSON.parse('{"foo": 9.01234567890123456789}', decimal_class: BigDecimal)["foo"] )
Expand Down
Loading