From 7d0578211045a1e6874ea5a0c1df491163d60070 Mon Sep 17 00:00:00 2001 From: Pearl Li Date: Wed, 6 Apr 2022 18:58:02 -0700 Subject: [PATCH] Parse Inf as float --- src/field.jl | 127 +++++++++++++++++++++++---------------- src/utf8optimizations.jl | 91 +++++++++++++++------------- test/runtests.jl | 8 +++ 3 files changed, 132 insertions(+), 94 deletions(-) diff --git a/src/field.jl b/src/field.jl index 1ec8a20..7269dbe 100644 --- a/src/field.jl +++ b/src/field.jl @@ -140,6 +140,23 @@ end @inline _is_positive(str, i) = str[i]=='+' +@inline function _is_inf(str, i) + y = iterate(str, i) + if !isnothing(y) && lowercase(y[1]) == 'i' + i = y[2] + y = iterate(str, i) + if !isnothing(y) && lowercase(y[1]) == 'n' + i = y[2] + y = iterate(str, i) + return !isnothing(y) && lowercase(y[1]) == 'f' + else + return false + end + else + return false + end +end + const pre_comp_exp_double = Double64[Double64(10.0)^i for i=0:308] @inline function convert_to_double(f1::Int64, exp::Int) @@ -178,69 +195,76 @@ end i = y1[2] end - f1::Int64 = 0 + # check if inf + y2 = iterate(str, i) + if y2!==nothing && _is_inf(str, i) + f = F(Inf) + i = y2[2] + 2 + else + f1::Int64 = 0 - # read an integer up to the decimal point - f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1) - idecpt = read_digits(str, idecpt, len) # get any trailing digits - i = idecpt + # read an integer up to the decimal point + f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1) + idecpt = read_digits(str, idecpt, len) # get any trailing digits + i = idecpt - ie = i - frac_digits = 0 + ie = i + frac_digits = 0 - # next thing must be dec pt. - y2 = iterate(str, i) - if y2!==nothing && y2[1]=='.' - i =y2[2] - f1, rval2, ie = parse_uint_and_stop(str, i, len, f1) - # TODO This is incorrect for string types where a digit takes up - # more than one codeunit, we need to return the number of digits - # from parse_uint_and_stop instead. Ok for now because we are - # not handling any such string types. - frac_digits = ie - i - - ie = read_digits(str, ie, len) # get any trailing digits - elseif !rval1 # no first number, and now no deciaml point => invalid - @goto error - end + # next thing must be dec pt. + y2 = iterate(str, i) + if y2!==nothing && y2[1]=='.' + i =y2[2] + f1, rval2, ie = parse_uint_and_stop(str, i, len, f1) + # TODO This is incorrect for string types where a digit takes up + # more than one codeunit, we need to return the number of digits + # from parse_uint_and_stop instead. Ok for now because we are + # not handling any such string types. + frac_digits = ie - i + + ie = read_digits(str, ie, len) # get any trailing digits + elseif !rval1 # no first number, and now no deciaml point => invalid + @goto error + end - # Next thing must be exponent - i = ie - eval::Int32 = 0 + # Next thing must be exponent + i = ie + eval::Int32 = 0 - y3 = iterate(str, i) - if y3!==nothing && _is_e(str, i) - i = y3[2] - - y4 = iterate(str, i) - if y4!==nothing - enegate = false - if _is_negative(str, i) - enegate = true - i = y4[2] - elseif _is_positive(str, i) - i = y4[2] + y3 = iterate(str, i) + if y3!==nothing && _is_e(str, i) + i = y3[2] + + y4 = iterate(str, i) + if y4!==nothing + enegate = false + if _is_negative(str, i) + enegate = true + i = y4[2] + elseif _is_positive(str, i) + i = y4[2] + end + end + eval, rval3, i = parse_uint_and_stop(str, i, len, eval) + if enegate + eval *= Int32(-1) end end - eval, rval3, i = parse_uint_and_stop(str, i, len, eval) - if enegate - eval *= Int32(-1) - end - end - exp = eval - frac_digits + exp = eval - frac_digits - maxexp = 308 - minexp = -307 + maxexp = 308 + minexp = -307 - if frac_digits <= 15 && -22 <= exp <= 22 - if exp >= 0 - f = F(f1)*10.0^exp + if frac_digits <= 15 && -22 <= exp <= 22 + if exp >= 0 + f = F(f1)*10.0^exp + else + f = F(f1)/10.0^(-exp) + end else - f = F(f1)/10.0^(-exp) + f = convert_to_double(f1, exp) end - else - f = convert_to_double(f1, exp) end if negate @@ -754,4 +778,3 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T} @label done return R(convert(T, res)), i end - diff --git a/src/utf8optimizations.jl b/src/utf8optimizations.jl index 59dda15..f988b84 100644 --- a/src/utf8optimizations.jl +++ b/src/utf8optimizations.jl @@ -140,62 +140,69 @@ const pre_comp_exp = Float64[10.0^i for i=0:22] i +=1 end - f1::Int64 = 0 + # check if inf + y2 = iterate(str, i) + if y2!==nothing && _is_inf(str, i) + f = F(Inf) + i = y2[2] + 2 + else + f1::Int64 = 0 - # read an integer up to the decimal point - f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1) - idecpt = read_digits(str, idecpt, len) # get any trailing digits - i = idecpt + # read an integer up to the decimal point + f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1) + idecpt = read_digits(str, idecpt, len) # get any trailing digits + i = idecpt - ie = i - frac_digits = 0 + ie = i + frac_digits = 0 - # next thing must be dec pt. - if i <= len && @inbounds(codeunit(str, i)) == 0x2e # Check for '.' - i += 1 - f1, rval2, ie = parse_uint_and_stop(str, i, len, f1) - frac_digits = ie - i + # next thing must be dec pt. + if i <= len && @inbounds(codeunit(str, i)) == 0x2e # Check for '.' + i += 1 + f1, rval2, ie = parse_uint_and_stop(str, i, len, f1) + frac_digits = ie - i - ie = read_digits(str, ie, len) # get any trailing digits - elseif !rval1 # no first number, and now no deciaml point => invalid - @goto error - end + ie = read_digits(str, ie, len) # get any trailing digits + elseif !rval1 # no first number, and now no deciaml point => invalid + @goto error + end - # Next thing must be exponent - i = ie - eval::Int32 = 0 + # Next thing must be exponent + i = ie + eval::Int32 = 0 - if i <= len && _is_e(str, i) - i += 1 + if i <= len && _is_e(str, i) + i += 1 - enegate = false - if i<=len - if _is_negative(str, i) - enegate = true - i += 1 - elseif _is_positive(str, i) - i += 1 + enegate = false + if i<=len + if _is_negative(str, i) + enegate = true + i += 1 + elseif _is_positive(str, i) + i += 1 + end + end + eval, rval3, i = parse_uint_and_stop(str, i, len, eval) + if enegate + eval *= Int32(-1) end end - eval, rval3, i = parse_uint_and_stop(str, i, len, eval) - if enegate - eval *= Int32(-1) - end - end - exp = eval - frac_digits + exp = eval - frac_digits - maxexp = 308 - minexp = -307 + maxexp = 308 + minexp = -307 - if frac_digits <= 15 && -22 <= exp <= 22 - if exp >= 0 - f = F(f1)*pre_comp_exp[exp+1] + if frac_digits <= 15 && -22 <= exp <= 22 + if exp >= 0 + f = F(f1)*pre_comp_exp[exp+1] + else + f = F(f1)/pre_comp_exp[-exp+1] + end else - f = F(f1)/pre_comp_exp[-exp+1] + f = convert_to_double(f1, exp) end - else - f = convert_to_double(f1, exp) end if negate diff --git a/test/runtests.jl b/test/runtests.jl index 4bdf90f..6da7353 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,6 +75,10 @@ import TextParse: fromtype, Percentage @test tryparsenext(fromtype(Float64), "-1e-12") |> unwrap == (-1.0e-12,7) @test tryparsenext(fromtype(Float64), "-1.0E-12", 1, 8) |> unwrap == (-1.0e-12,9) @test tryparsenext(fromtype(Float64), "5.e-3", 1, 5) |> unwrap == (5.0e-3,6) # 32 + @test tryparsenext(fromtype(Float64), "Inf", 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), "inf", 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), "INF", 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), "-Inf", 1, 4) |> unwrap == (-Inf,5) @test tryparsenext(Percentage(), "33%") |> unwrap == (.33,4) @test tryparsenext(Percentage(), "3.3%") |> unwrap == (.033,5) @@ -89,6 +93,10 @@ import TextParse: fromtype, Percentage @test tryparsenext(fromtype(Float64), SubString("-1e-12", 1)) |> unwrap == (-1.0e-12,7) @test tryparsenext(fromtype(Float64), SubString("-1.0E-12", 1), 1, 8) |> unwrap == (-1.0e-12,9) @test tryparsenext(fromtype(Float64), SubString("5.e-3", 1), 1, 5) |> unwrap == (5.0e-3,6) # 32 + @test tryparsenext(fromtype(Float64), SubString("Inf", 1), 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), SubString("inf", 1), 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), SubString("INF", 1), 1, 3) |> unwrap == (Inf,4) + @test tryparsenext(fromtype(Float64), SubString("-Inf", 1), 1, 4) |> unwrap == (-Inf,5) @test tryparsenext(Percentage(), SubString("33%", 1)) |> unwrap == (.33,4) @test tryparsenext(Percentage(), SubString("3.3%", 1)) |> unwrap == (.033,5)