diff --git a/urlpath/_compat.py b/urlpath/_compat.py new file mode 100644 index 0000000..eb92331 --- /dev/null +++ b/urlpath/_compat.py @@ -0,0 +1,10 @@ +"""Python version compatibility helpers.""" + +from __future__ import annotations + +__all__ = ("IS_PY312_PLUS",) + +import sys + +# Python 3.12+ introduced significant pathlib changes +IS_PY312_PLUS = sys.version_info >= (3, 12) diff --git a/urlpath/_flavour.py b/urlpath/_flavour.py index e48d982..e06ce9d 100644 --- a/urlpath/_flavour.py +++ b/urlpath/_flavour.py @@ -5,19 +5,20 @@ __all__ = ("_URLFlavour",) import posixpath -import sys +from typing import TYPE_CHECKING +from ._compat import IS_PY312_PLUS from ._utils import _url_splitroot # Python 3.12+ removed _PosixFlavour class, replaced with module-based approach -if sys.version_info >= (3, 12): - _PosixFlavour = None # noqa: F811 -else: +if not TYPE_CHECKING and not IS_PY312_PLUS: from pathlib import _PosixFlavour +else: + _PosixFlavour = object # Python 3.12+ compatibility: create flavour class or simple object -if sys.version_info >= (3, 12): +if IS_PY312_PLUS: # Python 3.12+: _flavour is a module, we create a simple object with required attributes class _URLFlavour: r"""Custom pathlib flavour for parsing URLs as filesystem paths (Python 3.12+). @@ -114,7 +115,7 @@ def normcase(self, path: str) -> str: else: # Python 3.9-3.11: Inherit from _PosixFlavour class - class _URLFlavour(_PosixFlavour): + class _URLFlavour(_PosixFlavour): # type: ignore[no-redef] r"""Custom pathlib flavour for parsing URLs as filesystem paths. Extends PosixFlavour to treat URLs as paths by: diff --git a/urlpath/_url.py b/urlpath/_url.py index 04fd527..7879380 100644 --- a/urlpath/_url.py +++ b/urlpath/_url.py @@ -9,7 +9,6 @@ import os import posixpath import re -import sys import urllib.parse from pathlib import PurePath from typing import Any @@ -17,8 +16,9 @@ import requests +from ._compat import IS_PY312_PLUS from ._flavour import _URLFlavour -from ._utils import FrozenMultiDict, cached_property, netlocjoin +from ._utils import FrozenMultiDict, cached_property, cleanup_escapes, netlocjoin try: import jmespath @@ -73,7 +73,7 @@ def __new__(cls, *args: Any) -> URL: Returns: New URL instance """ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Python 3.12: Canonicalize for stricter PurePath validation # Note: This happens BEFORE _parse_args, so it's not redundant canonicalized_args = tuple(cls._canonicalize_arg(a) for a in args) @@ -95,17 +95,17 @@ def __init__(self, *args: Any) -> None: Args: *args: URL components (need to be canonicalized again for Python 3.12) """ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Python 3.12: Must canonicalize args again (__init__ gets original args) canonicalized_args = tuple(self._canonicalize_arg(a) for a in args) if len(canonicalized_args) > 1: - combined = type(self)._combine_args(canonicalized_args) # type: ignore[attr-defined] + combined = type(self)._combine_args(canonicalized_args) super().__init__(*combined) else: super().__init__(*canonicalized_args) # else: Python < 3.12 doesn't call parent __init__ (it's object.__init__) - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: @classmethod def _combine_args(cls, canonicalized_args: tuple[str, ...]) -> tuple[str, ...]: @@ -184,10 +184,10 @@ def _parse_path(cls, path: str) -> tuple[str, str, list[str]]: return drv, root, tail_parts # Python 3.12 compatibility: _parts was replaced with _tail_cached - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: @property - def _parts(self) -> list[str]: # type: ignore[misc] + def _parts(self) -> list[str]: """Compatibility property for Python 3.12+ with manual caching. In Python 3.12, pathlib uses _tail_cached instead of _parts. This property @@ -202,7 +202,7 @@ def _parts(self) -> list[str]: # type: ignore[misc] """ # Check if we have a cached value if hasattr(self, "_parts_cache"): - return self._parts_cache # type: ignore[return-value] + return self._parts_cache self._ensure_parts_loaded() # In Python 3.12, the structure is: _raw_paths contains input, @@ -219,14 +219,14 @@ def _parts(self) -> list[str]: # type: ignore[misc] # Clean up \x00 escape in last part (used to escape / in query/fragment/trailing) if parts: - parts[-1] = parts[-1].replace("\\x00", "/") + parts[-1] = cleanup_escapes(parts[-1]) # Cache the result for future access object.__setattr__(self, "_parts_cache", parts) return parts @_parts.setter - def _parts(self, value: list[str]) -> None: # type: ignore[misc] + def _parts(self, value: list[str]) -> None: """Compatibility setter for Python 3.12+. Converts _parts list back to _tail_cached tuple. Clears the cache @@ -259,11 +259,7 @@ def _from_parts(cls, args: Any) -> URL: Returns: New URL instance """ - if sys.version_info >= (3, 12): - # Python 3.12 removed _from_parts, use direct construction - ret = cls(*args) - else: - ret = super()._from_parts(args) + ret = cls(*args) if IS_PY312_PLUS else super()._from_parts(args) ret._init() return ret @@ -284,7 +280,7 @@ def _from_parsed_parts(cls, drv: str, root: str, parts: list[str]) -> URL: """ # Python 3.12 changed _from_parsed_parts from classmethod to instance method # Signature changed from (drv, root, parts) to (self, drv, root, tail) - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # In Python 3.12, we need to create an instance first and set _raw_paths self = object.__new__(cls) # Reconstruct the path string for _raw_paths @@ -378,12 +374,12 @@ def _bootstrap_legacy_parts(self) -> None: def _ensure_parts_loaded(self) -> None: """Ensure internal path parts are available across Python versions.""" - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: if hasattr(self, "_load_parts"): try: - _ = self._tail_cached # type: ignore[attr-defined] + _ = self._tail_cached except AttributeError: - self._load_parts() # type: ignore[attr-defined] + self._load_parts() else: self._bootstrap_legacy_parts() @@ -397,7 +393,7 @@ def _init(self) -> None: if self._parts: # trick to escape '/' in query and fragment and trailing - self._parts[-1] = self._parts[-1].replace("\\x00", "/") + self._parts[-1] = cleanup_escapes(self._parts[-1]) def _make_child(self, args: Any) -> URL: # replace by parts that have no query and have no fragment @@ -450,7 +446,7 @@ def joinpath(self, *pathsegments: Any) -> URL: >>> str(url / '/absolute') 'http://example.com/absolute' """ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Python 3.12: Manually implement join logic # First, canonicalize all segments (handles webob.Request, etc.) canonicalized_segments = tuple(self._canonicalize_arg(seg) for seg in pathsegments) @@ -501,9 +497,9 @@ def joinpath(self, *pathsegments: Any) -> URL: else: return super().joinpath(*pathsegments) - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: - def __truediv__(self, key: Any) -> URL: # type: ignore[override] + def __truediv__(self, key: Any) -> URL: """Ensure the / operator reuses joinpath on Python 3.12+.""" return self.joinpath(key) @@ -690,8 +686,8 @@ def _name_parts(self) -> tuple[str, str, str]: """ full_name = super().name # In Python 3.12, super().name may have \x00 escape, clean it up - if sys.version_info >= (3, 12): - full_name = full_name.replace("\\x00", "/") + if IS_PY312_PLUS: + full_name = cleanup_escapes(full_name) # Fragment takes priority - everything after # is fragment fragment_idx = full_name.find("#") @@ -1236,7 +1232,7 @@ def __init__(self, *args: Any, root: Any = None) -> None: # The root argument is already handled in __new__ # In Python < 3.12, PurePath.__init__ does nothing, so we can't pass args # In Python 3.12, we need to canonicalize and pass args (without root kwarg) - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Must canonicalize args (__init__ receives original args) canonicalized_args = tuple(self._canonicalize_arg(a) for a in args) super().__init__(*canonicalized_args) @@ -1250,7 +1246,7 @@ def _from_parts(cls, args: Any) -> URL: a dynamic subclass and calls _from_parts again, causing infinite recursion. Instead, we use object.__new__ directly. """ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Create instance using object.__new__ to bypass __new__ self = object.__new__(cls) # Set _raw_paths which is required for _load_parts @@ -1309,7 +1305,7 @@ def joinpath(self, *pathsegments: Any) -> JailedURL: >>> str(jail / '../../escape') # Prevented by _init 'http://example.com/app/' """ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: chroot = self._chroot assert chroot is not None # Always set by __new__ @@ -1337,7 +1333,7 @@ def joinpath(self, *pathsegments: Any) -> JailedURL: ) ) joined = type(self)._combine_args( - (chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :]) # type: ignore[attr-defined] + (chroot_url_str, seg_str.lstrip("/"), *canonicalized_segments[i + 1 :]) ) return type(self)(*joined) @@ -1351,24 +1347,24 @@ def joinpath(self, *pathsegments: Any) -> JailedURL: "", ) ) - joined = type(self)._combine_args((clean_url_str, *canonicalized_segments)) # type: ignore[attr-defined] + joined = type(self)._combine_args((clean_url_str, *canonicalized_segments)) return type(self)(*joined) else: # Python < 3.12: use _make_child which handles jailed logic - result: JailedURL = super().joinpath(*pathsegments) # type: ignore[assignment] - return result + result = super().joinpath(*pathsegments) + return result # type: ignore[return-value] def _init(self) -> None: # Python 3.12+: Must call _load_parts() to initialize _drv, _root, _parts - if sys.version_info >= (3, 12) and hasattr(self, "_load_parts"): - self._load_parts() # type: ignore[attr-defined] + if IS_PY312_PLUS and hasattr(self, "_load_parts"): + self._load_parts() chroot = self._chroot assert chroot is not None # Always set by __new__ - if self._parts[: len(chroot.parts)] != list(chroot.parts): # type: ignore[has-type] + if self._parts[: len(chroot.parts)] != list(chroot.parts): self._drv, self._root, self._parts = chroot._drv, chroot._root, chroot._parts[:] - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: object.__setattr__(self, "_raw_paths", [str(chroot)]) if hasattr(self, "_parts_cache"): object.__delattr__(self, "_parts_cache") @@ -1398,7 +1394,7 @@ def resolve(self) -> URL: chroot = self._chroot assert chroot is not None # Always set by __new__ - if sys.version_info >= (3, 12): + if IS_PY312_PLUS: # Python 3.12: _parts is a property computed from _drv, _root, _tail_cached # The resolve logic for jailed URLs needs _parts to look like: # ["http://example.com/app/", "path", "to", "content", "..", "file"] diff --git a/urlpath/_utils.py b/urlpath/_utils.py index 02e4e6c..92767ca 100644 --- a/urlpath/_utils.py +++ b/urlpath/_utils.py @@ -2,7 +2,15 @@ from __future__ import annotations -__all__ = ("FrozenDict", "FrozenMultiDict", "MultiDictMixin", "cached_property", "netlocjoin", "_url_splitroot") +__all__ = ( + "FrozenDict", + "FrozenMultiDict", + "MultiDictMixin", + "cached_property", + "netlocjoin", + "_url_splitroot", + "cleanup_escapes", +) import functools import re @@ -79,7 +87,7 @@ def get_one( key: Any, default: Any = None, predicate: Callable[[Any], bool] | None = None, - type: Callable[[Any], Any] | None = None, + type_: Callable[[Any], Any] | None = None, ) -> Any: """Get the first value for a key that matches the predicate. @@ -87,10 +95,10 @@ def get_one( key: The dictionary key to look up default: Value to return if key not found or no value matches predicate predicate: Optional callable to filter values (e.g., from inspect.getmembers) - type: Optional callable to transform the returned value + type_: Optional callable to transform the returned value Returns: - The first matching value, optionally transformed by type callable, + The first matching value, optionally transformed by type_ callable, or default if no match found. """ try: @@ -100,7 +108,7 @@ def get_one( else: for value in values: if not predicate or predicate(value): - return value if not type else type(value) + return value if not type_ else type_(value) return default @@ -214,7 +222,22 @@ def _url_splitroot(part: str, sep: str = "/") -> tuple[str, str, str]: drive = urllib.parse.urlunsplit((scheme, netloc, "", "", "")) match = re.match(f"^({re.escape(sep)}*)(.*)$", path) - assert match is not None + assert match is not None # we're sure it's always valid for this regex root, path = match.groups() - return drive, root, path + + +def cleanup_escapes(text: str) -> str: + r"""Clean up escape sequences used for URL component separation. + + Replaces the internal escape character (\x00) used to protect + forward slashes in query and fragment components back to regular + forward slashes. + + Args: + text: String potentially containing \x00 escape sequences + + Returns: + String with \x00 replaced by / + """ + return text.replace("\\x00", "/")