From e87f70130d9e43681c56d14d6f7181d9104e98d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Kondzior?= Date: Tue, 28 Oct 2025 21:20:08 +0100 Subject: [PATCH] Make URI parsing more platform aware --- lib/ruby_indexer/lib/ruby_indexer/uri.rb | 57 ++++++++++++++++++------ lib/ruby_indexer/test/uri_test.rb | 30 ++++++------- sorbet/rbi/shims/uri.rbi | 16 +++++++ 3 files changed, 75 insertions(+), 28 deletions(-) diff --git a/lib/ruby_indexer/lib/ruby_indexer/uri.rb b/lib/ruby_indexer/lib/ruby_indexer/uri.rb index 6eeff8b478..61b1fed85c 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/uri.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/uri.rb @@ -12,22 +12,22 @@ class Generic # NOTE: We also define this in the shim PARSER = const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER + # This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we + # do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the + # same to avoid inconsistencies in our URIs, which are used to identify resources + UNSAFE_REGEX = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]} + class << self #: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic - def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil) - # This unsafe regex is the same one used in the URI::RFC2396_REGEXP class with the exception of the fact that we - # do not include colon as a safe character. VS Code URIs always escape colons and we need to ensure we do the - # same to avoid inconsistencies in our URIs, which are used to identify resources - unsafe_regex = %r{[^\-_.!~*'()a-zA-Z\d;/?@&=+$,\[\]]} - + def from_win_path(path:, fragment: nil, scheme: "file", load_path_entry: nil) # On Windows, if the path begins with the disk name, we need to add a leading slash to make it a valid URI escaped_path = if /^[A-Z]:/i.match?(path) - PARSER.escape("/#{path}", unsafe_regex) + PARSER.escape("/#{path}", UNSAFE_REGEX) elsif path.start_with?("//?/") # Some paths on Windows start with "//?/". This is a special prefix that allows for long file paths - PARSER.escape(path.delete_prefix("//?"), unsafe_regex) + PARSER.escape(path.delete_prefix("//?"), UNSAFE_REGEX) else - PARSER.escape(path, unsafe_regex) + PARSER.escape(path, UNSAFE_REGEX) end uri = build(scheme: scheme, path: escaped_path, fragment: fragment) @@ -38,6 +38,21 @@ def from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil) uri end + + #: (path: String, ?fragment: String?, ?scheme: String, ?load_path_entry: String?) -> URI::Generic + def from_unix_path(path:, fragment: nil, scheme: "file", load_path_entry: nil) + escaped_path = PARSER.escape(path, UNSAFE_REGEX) + + uri = build(scheme: scheme, path: escaped_path, fragment: fragment) + + if load_path_entry + uri.require_path = path.delete_prefix("#{load_path_entry}/").delete_suffix(".rb") + end + + uri + end + + alias_method :from_path, Gem.win_platform? ? :from_win_path : :from_unix_path end #: String? @@ -52,14 +67,18 @@ def add_require_path_from_load_entry(load_path_entry) end #: -> String? - def to_standardized_path + # On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward + # slash + def to_standardized_win_path parsed_path = path + return unless parsed_path + # we can bail out parsing if there is nothing to unescape + return parsed_path unless parsed_path.match?(/%[0-9A-Fa-f]{2}/) + unescaped_path = PARSER.unescape(parsed_path) - # On Windows, when we're getting the file system path back from the URI, we need to remove the leading forward - # slash if %r{^/[A-Z]:}i.match?(unescaped_path) unescaped_path.delete_prefix("/") else @@ -67,6 +86,18 @@ def to_standardized_path end end - alias_method :full_path, :to_standardized_path + #: -> String? + def to_standardized_unix_path + unescaped_path = path + return unless unescaped_path + + # we can bail out parsing if there is nothing to be unescaped + return unescaped_path unless unescaped_path.match?(/%[0-9A-Fa-f]{2}/) + + PARSER.unescape(unescaped_path) + end + + alias_method :to_standardized_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path + alias_method :full_path, Gem.win_platform? ? :to_standardized_win_path : :to_standardized_unix_path end end diff --git a/lib/ruby_indexer/test/uri_test.rb b/lib/ruby_indexer/test/uri_test.rb index 8c064a1a77..39600066a9 100644 --- a/lib/ruby_indexer/test/uri_test.rb +++ b/lib/ruby_indexer/test/uri_test.rb @@ -6,38 +6,38 @@ module RubyIndexer class URITest < Minitest::Test def test_from_path_on_unix - uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb") + uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb") assert_equal("/some/unix/path/to/file.rb", uri.path) end def test_from_path_on_windows - uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb") + uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb") assert_equal("/C%3A/some/windows/path/to/file.rb", uri.path) end def test_from_path_on_windows_with_lowercase_drive - uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb") + uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb") assert_equal("/c%3A/some/windows/path/to/file.rb", uri.path) end def test_to_standardized_path_on_unix - uri = URI::Generic.from_path(path: "/some/unix/path/to/file.rb") - assert_equal(uri.path, uri.to_standardized_path) + uri = URI::Generic.from_unix_path(path: "/some/unix/path/to/file.rb") + assert_equal(uri.path, uri.to_standardized_win_path) end def test_to_standardized_path_on_windows - uri = URI::Generic.from_path(path: "C:/some/windows/path/to/file.rb") - assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_path) + uri = URI::Generic.from_win_path(path: "C:/some/windows/path/to/file.rb") + assert_equal("C:/some/windows/path/to/file.rb", uri.to_standardized_win_path) end def test_to_standardized_path_on_windows_with_lowercase_drive - uri = URI::Generic.from_path(path: "c:/some/windows/path/to/file.rb") - assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path) + uri = URI::Generic.from_win_path(path: "c:/some/windows/path/to/file.rb") + assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path) end def test_to_standardized_path_on_windows_with_received_uri uri = URI("file:///c%3A/some/windows/path/to/file.rb") - assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_path) + assert_equal("c:/some/windows/path/to/file.rb", uri.to_standardized_win_path) end def test_plus_signs_are_properly_unescaped @@ -52,8 +52,8 @@ def test_from_path_with_fragment end def test_from_path_windows_long_file_paths - uri = URI::Generic.from_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb") - assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_path) + uri = URI::Generic.from_win_path(path: "//?/C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb") + assert_equal("C:/hostedtoolcache/windows/Ruby/3.3.1/x64/lib/ruby/3.3.0/open-uri.rb", uri.to_standardized_win_path) end def test_from_path_computes_require_path_when_load_path_entry_is_given @@ -70,14 +70,14 @@ def test_allows_adding_require_path_with_load_path_entry end def test_from_path_escapes_colon_characters - uri = URI::Generic.from_path(path: "c:/some/windows/path with/spaces/file.rb") - assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_path) + uri = URI::Generic.from_win_path(path: "c:/some/windows/path with/spaces/file.rb") + assert_equal("c:/some/windows/path with/spaces/file.rb", uri.to_standardized_win_path) assert_equal("file:///c%3A/some/windows/path%20with/spaces/file.rb", uri.to_s) end def test_from_path_with_unicode_characters path = "/path/with/unicode/文件.rb" - uri = URI::Generic.from_path(path: path) + uri = URI::Generic.from_unix_path(path: path) assert_equal(path, uri.to_standardized_path) assert_equal("file:///path/with/unicode/%E6%96%87%E4%BB%B6.rb", uri.to_s) end diff --git a/sorbet/rbi/shims/uri.rbi b/sorbet/rbi/shims/uri.rbi index 79ec8c59f8..21948f3128 100644 --- a/sorbet/rbi/shims/uri.rbi +++ b/sorbet/rbi/shims/uri.rbi @@ -5,6 +5,22 @@ module URI class Generic PARSER = T.let(const_defined?(:RFC2396_PARSER) ? RFC2396_PARSER : DEFAULT_PARSER, RFC2396_Parser) + + sig { returns(T.nilable(String)) } + def to_standardized_path; end + + sig { returns(T.nilable(String)) } + def full_path; end + + sig do + params( + path: String, + fragment: T.nilable(String), + scheme: String, + load_path_entry: T.nilable(String) + ).returns(::URI::Generic) + end + def self.from_path(path:, fragment: nil, scheme: "file", load_path_entry: nil); end end class File