Skip to content

Commit

Permalink
[GR-18163] Implement missing StringScanner methods #peek_byte, #scan_…
Browse files Browse the repository at this point in the history
…byte and #scan_integer

PullRequest: truffleruby/4477
  • Loading branch information
andrykonchin committed Feb 21, 2025
2 parents ffd7f0b + 271fd61 commit 335c63c
Show file tree
Hide file tree
Showing 27 changed files with 1,314 additions and 27 deletions.
22 changes: 22 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
# 25.0.0

New features:


Bug fixes:


Compatibility:

* Implement `StringScanner#{peek_byte,scan_byte,scan_integer,named_captures}` methods (#3788, @andrykonchin).
* Support String patterns in `StringScanner#{exist?,scan_until,skip_until,check_until,search_full}` methods (@andrykonchin).

Performance:


Changes:


Memory Footprint:


# 24.2.0

New features:
Expand Down
73 changes: 61 additions & 12 deletions lib/truffle/strscan.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class ScanError < StandardError
class StringScanner

Id = 'None$Id'.freeze
Version = '1.0.0'.freeze
Version = '3.1.3'.freeze

attr_reader :pos
alias_method :pointer, :pos
Expand Down Expand Up @@ -205,6 +205,10 @@ def matched_size
Primitive.match_data_byte_end(@match, 0) - Primitive.match_data_byte_begin(@match, 0) if @match
end

def named_captures
@match&.named_captures || {}
end

def post_match
@match&.post_match
end
Expand Down Expand Up @@ -244,6 +248,39 @@ def scan(pattern)
scan_internal pattern, true, true, true
end

def scan_byte
if eos?
@match = nil
return nil
end

pos = @pos
@match = Primitive.matchdata_create_single_group(/./mn, @string, pos, pos + 1)
@prev_pos = pos
@pos = pos + 1

@string.getbyte(pos)
end

def scan_integer(base: 10)
unless @string.encoding.ascii_compatible?
raise Encoding::CompatibilityError, "ASCII incompatible encoding: #{@string.encoding.name}"
end

case base
when 10
substr = scan(/[+-]?\d+/)
when 16
substr = scan(/[+-]?(?:0x)?[0-9a-fA-F]+/)
else
raise ArgumentError, "Unsupported integer base: #{base.inspect}, expected 10 or 16"
end

if substr
Primitive.string_to_inum(substr, base, true, true)
end
end

def scan_until(pattern)
scan_internal pattern, true, true, false
end
Expand Down Expand Up @@ -312,19 +349,20 @@ def peek(len)
@string.byteslice(@pos, len)
end

def peek_byte
@string.getbyte(@pos)
end

def peep(len)
warn 'StringScanner#peep is obsolete; use #peek instead' if $VERBOSE
peek len
end

private def scan_check_args(pattern, headonly)
case pattern
when String
raise TypeError, 'wrong argument type String (expected Regexp)' unless headonly
when Regexp
else
unless Primitive.is_a?(pattern, Regexp) || Primitive.is_a?(pattern, String)
raise TypeError, "bad pattern argument: #{pattern.inspect}"
end

raise ArgumentError, 'uninitialized StringScanner object' unless @string
end

Expand All @@ -335,12 +373,13 @@ def peep(len)
scan_check_args(pattern, headonly)

if Primitive.is_a?(pattern, String)
md = scan_internal_string_pattern(pattern)
md = scan_internal_string_pattern(pattern, headonly)
else
start = @fixed_anchor ? 0 : @pos
md = Truffle::RegexpOperations.match_in_region pattern, @string, @pos, @string.bytesize, headonly, start
Primitive.matchdata_fixup_positions(md, start) if md
end

if md
@match = md
scan_internal_set_pos_and_str(advance_pos, getstr, md)
Expand All @@ -349,13 +388,23 @@ def peep(len)
end
end

private def scan_internal_string_pattern(pattern)
# always headonly=true, see #scan_check_args
private def scan_internal_string_pattern(pattern, headonly)
pos = @pos
if @string.byteslice(pos..).start_with?(pattern)
Primitive.matchdata_create_single_group(pattern, @string.dup, pos, pos + pattern.bytesize)

if headonly
if @string.byteslice(pos..).start_with?(pattern)
Primitive.matchdata_create_single_group(pattern, @string.dup, pos, pos + pattern.bytesize)
else
nil
end
else
nil
relative_pos = @string.byteslice(pos..).byteindex(pattern)
if relative_pos
found_pos = pos + relative_pos
Primitive.matchdata_create_single_group(pattern, @string.dup, found_pos, found_pos + pattern.bytesize)
else
nil
end
end
end

Expand Down
36 changes: 36 additions & 0 deletions spec/ruby/library/stringscanner/captures_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
require_relative '../../spec_helper'
require 'strscan'

describe "StringScanner#captures" do
before do
@s = StringScanner.new('Fri Dec 12 1975 14:39')
end

it "returns the array of captured values of the most recent matching" do
@s.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\d+)/)
@s.captures.should == ["Fri", "Dec", "12"]
end

it "returns nil if the last match fails" do
@s.scan(/nope/)
@s.captures.should == nil
end

it "returns nil if there is no any match done" do
@s.captures.should == nil
end

version_is StringScanner::Version, ""..."3.0.8" do # ruby_version_is ""..."3.3.3"
it "returns '' for an optional capturing group if it doesn't match" do
@s.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\s+)?/)
@s.captures.should == ["Fri", "Dec", ""]
end
end

version_is StringScanner::Version, "3.0.8" do # ruby_version_is "3.3.3"
it "returns nil for an optional capturing group if it doesn't match" do
@s.exist?(/(?<wday>\w+) (?<month>\w+) (?<day>\s+)?/)
@s.captures.should == ["Fri", "Dec", nil]
end
end
end
18 changes: 18 additions & 0 deletions spec/ruby/library/stringscanner/charpos_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
require_relative '../../spec_helper'
require 'strscan'

describe "StringScanner#charpos" do
it "returns character index corresponding to the current position" do
s = StringScanner.new("abc")

s.scan_until(/b/)
s.charpos.should == 2
end

it "is multi-byte character sensitive" do
s = StringScanner.new("abcädeföghi")

s.scan_until(/ö/)
s.charpos.should == 8
end
end
68 changes: 68 additions & 0 deletions spec/ruby/library/stringscanner/check_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,72 @@
@s.matched.should == nil
end

describe "#[] successive call with a capture group name" do
context "when #check was called with a Regexp pattern" do
it "returns matched substring when matching succeeded" do
@s.check(/(?<a>This)/)
@s.should.matched?
@s[:a].should == "This"
end

it "returns nil when matching failed" do
@s.check(/(?<a>2008)/)
@s.should_not.matched?
@s[:a].should be_nil
end
end

context "when #check was called with a String pattern" do
# https://github.com/ruby/strscan/issues/139
version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3"
it "returns nil when matching succeeded" do
@s.check("This")
@s.should.matched?
@s[:a].should be_nil
end
end
version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4"
it "raises IndexError when matching succeeded" do
@s.check("This")
@s.should.matched?
-> { @s[:a] }.should raise_error(IndexError)
end
end

it "returns nil when matching failed" do
@s.check("2008")
@s.should_not.matched?
@s[:a].should be_nil
end

it "returns a matching substring when given Integer index" do
@s.check("This")
@s[0].should == "This"
end

# https://github.com/ruby/strscan/issues/135
version_is StringScanner::Version, "3.1.1"..."3.1.3" do # ruby_version_is "3.4.0"..."3.4.3"
it "ignores the previous matching with Regexp" do
@s.exist?(/(?<a>This)/)
@s.should.matched?
@s[:a].should == "This"

@s.check("This")
@s.should.matched?
@s[:a].should be_nil
end
end
version_is StringScanner::Version, "3.1.3" do # ruby_version_is "3.4.0"..."3.4.3"
it "ignores the previous matching with Regexp" do
@s.exist?(/(?<a>This)/)
@s.should.matched?
@s[:a].should == "This"

@s.check("This")
@s.should.matched?
-> { @s[:a] }.should raise_error(IndexError)
end
end
end
end
end
Loading

0 comments on commit 335c63c

Please sign in to comment.