diff --git a/tests/test_url.py b/tests/test_url.py index 92a5bc3..cbb8c09 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -302,3 +302,25 @@ def test_percent_encoding_spaces() -> None: # Test that actual spaces get encoded properly url_with_spaces = URL("https://somepath.com/test") / "Test path" / "my test file.txt" assert str(url_with_spaces) == "https://somepath.com/test/Test%20path/my%20test%20file.txt" + + +def test_colon_in_filename() -> None: + """Test that colons in filenames are not treated as scheme separators.""" + # Reported bug: URL('http://www.example.com/abc:def.html') was truncated to 'http://www.example.com/def.html' + url = URL("http://www.example.com/abc:def.html") + assert str(url) == "http://www.example.com/abc:def.html" + assert url.name == "abc:def.html" + assert url.path == "/abc:def.html" + + # Test various positions and uses of colons + assert str(URL("http://www.example.com/file:name.txt")) == "http://www.example.com/file:name.txt" + assert str(URL("http://www.example.com/path/to/file:v2.html")) == "http://www.example.com/path/to/file:v2.html" + assert str(URL("http://www.example.com/:colon.txt")) == "http://www.example.com/:colon.txt" + assert str(URL("http://www.example.com/colon:.txt")) == "http://www.example.com/colon:.txt" + + # Test with query and fragment + url = URL("http://www.example.com/abc:def.html?key=value#frag") + assert url.name == "abc:def.html" + assert url.query == "key=value" + assert url.fragment == "frag" + assert str(url) == "http://www.example.com/abc:def.html?key=value#frag" diff --git a/urlpath/__init__.py b/urlpath/__init__.py index e16365a..7355c93 100644 --- a/urlpath/__init__.py +++ b/urlpath/__init__.py @@ -479,6 +479,41 @@ def path(self) -> str: + self.trailing_sep ) + @property + @cached_property + def _name_parts(self) -> tuple[str, str, str]: + """Parse super().name into (path, query, fragment) without using urlsplit. + + We can't use urlsplit here because it treats colons as scheme separators, + which breaks filenames like 'abc:def.html'. + + Parsing order: fragment first (after #), then query (after ?), then path. + + Returns: + Tuple of (path, query, fragment) strings. + """ + full_name = super().name + + # Fragment takes priority - everything after # is fragment + fragment_idx = full_name.find("#") + if fragment_idx != -1: + fragment = full_name[fragment_idx + 1 :] + before_fragment = full_name[:fragment_idx] + else: + fragment = "" + before_fragment = full_name + + # Query is everything after ? (but before #) + query_idx = before_fragment.find("?") + if query_idx != -1: + query = before_fragment[query_idx + 1 :] + path = before_fragment[:query_idx] + else: + query = "" + path = before_fragment + + return path, query, fragment + @property @cached_property def name(self) -> str: @@ -487,7 +522,7 @@ def name(self) -> str: Returns: The decoded filename or last path segment. """ - return urllib.parse.unquote(urllib.parse.urlsplit(super().name).path.rstrip(self._flavour.sep)) + return urllib.parse.unquote(self._name_parts[0].rstrip(self._flavour.sep)) @property @cached_property @@ -497,7 +532,7 @@ def query(self) -> str: Returns: The raw query string (without the leading '?'). """ - return urllib.parse.urlsplit(super().name).query + return self._name_parts[1] @property @cached_property @@ -507,7 +542,7 @@ def fragment(self) -> str: Returns: The fragment string (without the leading '#'). """ - return urllib.parse.urlsplit(super().name).fragment + return self._name_parts[2] @property @cached_property @@ -517,7 +552,7 @@ def trailing_sep(self) -> str: Returns: The trailing '/' characters, or empty string if none. """ - match = re.search("(" + re.escape(self._flavour.sep) + "*)$", urllib.parse.urlsplit(super().name).path) + match = re.search("(" + re.escape(self._flavour.sep) + "*)$", self._name_parts[0]) assert match is not None return match.group(0)