From 7d0578211045a1e6874ea5a0c1df491163d60070 Mon Sep 17 00:00:00 2001
From: Pearl Li <pearlzli16@gmail.com>
Date: Wed, 6 Apr 2022 18:58:02 -0700
Subject: [PATCH] Parse Inf as float

---
 src/field.jl             | 127 +++++++++++++++++++++++----------------
 src/utf8optimizations.jl |  91 +++++++++++++++-------------
 test/runtests.jl         |   8 +++
 3 files changed, 132 insertions(+), 94 deletions(-)

diff --git a/src/field.jl b/src/field.jl
index 1ec8a20..7269dbe 100644
--- a/src/field.jl
+++ b/src/field.jl
@@ -140,6 +140,23 @@ end
 
 @inline _is_positive(str, i) = str[i]=='+'
 
+@inline function _is_inf(str, i)
+    y = iterate(str, i)
+    if !isnothing(y) && lowercase(y[1]) == 'i'
+        i = y[2]
+        y = iterate(str, i)
+        if !isnothing(y) && lowercase(y[1]) == 'n'
+            i = y[2]
+            y = iterate(str, i)
+            return !isnothing(y) && lowercase(y[1]) == 'f'
+        else
+            return false
+        end
+    else
+        return false
+    end
+end
+
 const pre_comp_exp_double = Double64[Double64(10.0)^i for i=0:308]
 
 @inline function convert_to_double(f1::Int64, exp::Int)
@@ -178,69 +195,76 @@ end
         i = y1[2]
     end
 
-    f1::Int64 = 0
+    # check if inf
+    y2 = iterate(str, i)
+    if y2!==nothing && _is_inf(str, i)
+        f = F(Inf)
+        i = y2[2] + 2
+    else
+        f1::Int64 = 0
 
-    # read an integer up to the decimal point
-    f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1)
-    idecpt = read_digits(str, idecpt, len) # get any trailing digits
-    i = idecpt
+        # read an integer up to the decimal point
+        f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1)
+        idecpt = read_digits(str, idecpt, len) # get any trailing digits
+        i = idecpt
 
-    ie = i
-    frac_digits = 0
+        ie = i
+        frac_digits = 0
 
-    # next thing must be dec pt.
-    y2 = iterate(str, i)
-    if y2!==nothing && y2[1]=='.'
-        i =y2[2]
-        f1, rval2, ie = parse_uint_and_stop(str, i, len, f1)
-        # TODO This is incorrect for string types where a digit takes up
-        # more than one codeunit, we need to return the number of digits
-        # from parse_uint_and_stop instead. Ok for now because we are
-        # not handling any such string types.
-        frac_digits = ie - i
-
-        ie = read_digits(str, ie, len) # get any trailing digits
-    elseif !rval1 # no first number, and now no deciaml point => invalid
-        @goto error
-    end
+        # next thing must be dec pt.
+        y2 = iterate(str, i)
+        if y2!==nothing && y2[1]=='.'
+            i =y2[2]
+            f1, rval2, ie = parse_uint_and_stop(str, i, len, f1)
+            # TODO This is incorrect for string types where a digit takes up
+            # more than one codeunit, we need to return the number of digits
+            # from parse_uint_and_stop instead. Ok for now because we are
+            # not handling any such string types.
+            frac_digits = ie - i
+
+            ie = read_digits(str, ie, len) # get any trailing digits
+        elseif !rval1 # no first number, and now no deciaml point => invalid
+            @goto error
+        end
 
-    # Next thing must be exponent
-    i = ie
-    eval::Int32 = 0
+        # Next thing must be exponent
+        i = ie
+        eval::Int32 = 0
 
-    y3 = iterate(str, i)
-    if y3!==nothing && _is_e(str, i)
-        i = y3[2]
-
-        y4 = iterate(str, i)
-        if y4!==nothing
-            enegate = false
-            if _is_negative(str, i)
-                enegate = true
-                i = y4[2]
-            elseif _is_positive(str, i)
-                i = y4[2]
+        y3 = iterate(str, i)
+        if y3!==nothing && _is_e(str, i)
+            i = y3[2]
+
+            y4 = iterate(str, i)
+            if y4!==nothing
+                enegate = false
+                if _is_negative(str, i)
+                    enegate = true
+                    i = y4[2]
+                elseif _is_positive(str, i)
+                    i = y4[2]
+                end
+            end
+            eval, rval3, i = parse_uint_and_stop(str, i, len, eval)
+            if enegate
+                eval *= Int32(-1)
             end
         end
-        eval, rval3, i = parse_uint_and_stop(str, i, len, eval)
-        if enegate
-            eval *= Int32(-1)
-        end
-    end
 
-    exp = eval - frac_digits
+        exp = eval - frac_digits
 
-    maxexp = 308
-    minexp = -307
+        maxexp = 308
+        minexp = -307
 
-    if frac_digits <= 15 && -22 <= exp <= 22
-        if exp >= 0
-            f = F(f1)*10.0^exp
+        if frac_digits <= 15 && -22 <= exp <= 22
+            if exp >= 0
+                f = F(f1)*10.0^exp
+            else
+                f = F(f1)/10.0^(-exp)
+            end
         else
-            f = F(f1)/10.0^(-exp)
+            f = convert_to_double(f1, exp)
         end
-    else
-          f = convert_to_double(f1, exp)
     end
 
     if negate
@@ -754,4 +778,3 @@ function tryparsenext(f::Field{T}, str, i, len, opts) where {T}
     @label done
     return R(convert(T, res)), i
 end
-
diff --git a/src/utf8optimizations.jl b/src/utf8optimizations.jl
index 59dda15..f988b84 100644
--- a/src/utf8optimizations.jl
+++ b/src/utf8optimizations.jl
@@ -140,62 +140,69 @@ const pre_comp_exp = Float64[10.0^i for i=0:22]
         i +=1
     end
 
-    f1::Int64 = 0
+    # check if inf
+    y2 = iterate(str, i)
+    if y2!==nothing && _is_inf(str, i)
+        f = F(Inf)
+        i = y2[2] + 2
+    else
+        f1::Int64 = 0
 
-    # read an integer up to the decimal point
-    f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1)
-    idecpt = read_digits(str, idecpt, len) # get any trailing digits
-    i = idecpt
+        # read an integer up to the decimal point
+        f1, rval1, idecpt = parse_uint_and_stop(str, i, len, f1)
+        idecpt = read_digits(str, idecpt, len) # get any trailing digits
+        i = idecpt
 
-    ie = i
-    frac_digits = 0
+        ie = i
+        frac_digits = 0
 
-    # next thing must be dec pt.
-    if i <= len && @inbounds(codeunit(str, i)) == 0x2e # Check for '.'
-        i += 1
-        f1, rval2, ie = parse_uint_and_stop(str, i, len, f1)
-        frac_digits = ie - i
+        # next thing must be dec pt.
+        if i <= len && @inbounds(codeunit(str, i)) == 0x2e # Check for '.'
+            i += 1
+            f1, rval2, ie = parse_uint_and_stop(str, i, len, f1)
+            frac_digits = ie - i
 
-        ie = read_digits(str, ie, len) # get any trailing digits
-    elseif !rval1 # no first number, and now no deciaml point => invalid
-        @goto error
-    end
+            ie = read_digits(str, ie, len) # get any trailing digits
+        elseif !rval1 # no first number, and now no deciaml point => invalid
+            @goto error
+        end
 
-    # Next thing must be exponent
-    i = ie
-    eval::Int32 = 0
+        # Next thing must be exponent
+        i = ie
+        eval::Int32 = 0
 
-    if i <= len && _is_e(str, i)
-        i += 1
+        if i <= len && _is_e(str, i)
+            i += 1
 
-        enegate = false
-        if i<=len
-            if _is_negative(str, i)
-                enegate = true
-                i += 1
-            elseif _is_positive(str, i)
-                i += 1
+            enegate = false
+            if i<=len
+                if _is_negative(str, i)
+                    enegate = true
+                    i += 1
+                elseif _is_positive(str, i)
+                    i += 1
+                end
+            end
+            eval, rval3, i = parse_uint_and_stop(str, i, len, eval)
+            if enegate
+                eval *= Int32(-1)
             end
         end
-        eval, rval3, i = parse_uint_and_stop(str, i, len, eval)
-        if enegate
-            eval *= Int32(-1)
-        end
-    end
 
-    exp = eval - frac_digits
+        exp = eval - frac_digits
 
-    maxexp = 308
-    minexp = -307
+        maxexp = 308
+        minexp = -307
 
-    if frac_digits <= 15 && -22 <= exp <= 22
-        if exp >= 0
-            f = F(f1)*pre_comp_exp[exp+1]
+        if frac_digits <= 15 && -22 <= exp <= 22
+            if exp >= 0
+                f = F(f1)*pre_comp_exp[exp+1]
+            else
+                f = F(f1)/pre_comp_exp[-exp+1]
+            end
         else
-            f = F(f1)/pre_comp_exp[-exp+1]
+            f = convert_to_double(f1, exp)
         end
-    else
-          f = convert_to_double(f1, exp)
     end
 
     if negate
diff --git a/test/runtests.jl b/test/runtests.jl
index 4bdf90f..6da7353 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -75,6 +75,10 @@ import TextParse: fromtype, Percentage
     @test tryparsenext(fromtype(Float64), "-1e-12") |> unwrap == (-1.0e-12,7)
     @test tryparsenext(fromtype(Float64), "-1.0E-12", 1, 8) |> unwrap == (-1.0e-12,9)
     @test tryparsenext(fromtype(Float64), "5.e-3", 1, 5) |> unwrap == (5.0e-3,6) # 32
+    @test tryparsenext(fromtype(Float64), "Inf", 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), "inf", 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), "INF", 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), "-Inf", 1, 4) |> unwrap == (-Inf,5)
     @test tryparsenext(Percentage(), "33%") |> unwrap == (.33,4)
     @test tryparsenext(Percentage(), "3.3%") |> unwrap == (.033,5)
 
@@ -89,6 +93,10 @@ import TextParse: fromtype, Percentage
     @test tryparsenext(fromtype(Float64), SubString("-1e-12", 1)) |> unwrap == (-1.0e-12,7)
     @test tryparsenext(fromtype(Float64), SubString("-1.0E-12", 1), 1, 8) |> unwrap == (-1.0e-12,9)
     @test tryparsenext(fromtype(Float64), SubString("5.e-3", 1), 1, 5) |> unwrap == (5.0e-3,6) # 32
+    @test tryparsenext(fromtype(Float64), SubString("Inf", 1), 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), SubString("inf", 1), 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), SubString("INF", 1), 1, 3) |> unwrap == (Inf,4)
+    @test tryparsenext(fromtype(Float64), SubString("-Inf", 1), 1, 4) |> unwrap == (-Inf,5)
     @test tryparsenext(Percentage(), SubString("33%", 1)) |> unwrap == (.33,4)
     @test tryparsenext(Percentage(), SubString("3.3%", 1)) |> unwrap == (.033,5)