Skip to content

Commit

Permalink
Fix memoisation of lazy parser & bump version
Browse files Browse the repository at this point in the history
Reported by @Rafiot: the lazy parser is not memoised, this has limited
effect on the basic / pure Python parser as its initialisation is
trivial, but it *significantly* impact the re2 and regex parsers as
they need to process regexes into a filter tree.

The memoization was mistakenly removed in #230: while refactoring
initialisation I removed the setting of the `parser` global.

- add a test to ensure the parser is correctly memoized, not
  re-instantiated every time
- reinstate setting the global
- add a mutex on `__getattr__`, it should only be used on first access
  and avoids two threads creating an expensive parser at the same
  time (which is a waste of CPU)

Fixes #253
  • Loading branch information
masklinn committed Feb 1, 2025
1 parent 5f5f338 commit ce12905
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "ua-parser"
description = "Python port of Browserscope's user agent parser"
version = "1.0.0"
version = "1.0.1"
readme = "README.rst"
requires-python = ">=3.9"
dependencies = ["ua-parser-builtins"]
Expand Down
29 changes: 21 additions & 8 deletions src/ua_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@
]

import importlib.util
from typing import Callable, Optional
import threading
from typing import Callable, Optional, cast

from .basic import Resolver as BasicResolver
from .caching import CachingResolver, S3Fifo as Cache
Expand Down Expand Up @@ -78,7 +79,7 @@
)


VERSION = (1, 0, 0)
VERSION = (1, 0, 1)


class Parser:
Expand Down Expand Up @@ -135,15 +136,27 @@ def parse_device(self: Resolver, ua: str) -> Optional[Device]:
initialisation, rather than pay for it at first call.
"""

_lazy_globals_lock = threading.Lock()


def __getattr__(name: str) -> Parser:
global parser
if name == "parser":
if RegexResolver or Re2Resolver or IS_GRAAL:
matchers = load_lazy_builtins()
else:
matchers = load_builtins()
return Parser.from_matchers(matchers)
with _lazy_globals_lock:
if name == "parser":
# if two threads access `ua_parser.parser` before it's
# initialised, the second one will wait until the first
# one's finished by which time the parser global should be
# set and can be returned with no extra work
if p := globals().get("parser"):
return cast(Parser, p)

if RegexResolver or Re2Resolver or IS_GRAAL:
matchers = load_lazy_builtins()
else:
matchers = load_builtins()
parser = Parser.from_matchers(matchers)
return parser

raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


Expand Down
17 changes: 17 additions & 0 deletions tests/test_convenience_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,23 @@
import ua_parser
from ua_parser import Domain, Parser, PartialResult, Result


def test_parser_memoized() -> None:
"""The global parser should be lazily instantiated but memoized"""
# ensure there is no global parser
vars(ua_parser).pop("parser", None)

p1 = ua_parser.parser
p2 = ua_parser.parser

assert p1 is p2

# force the creation of a clean parser
del ua_parser.parser
p3 = ua_parser.parser
assert p3 is not p1


def resolver(s: str, d: Domain) -> PartialResult:
return PartialResult(d, None, None, None, s)

Expand Down

0 comments on commit ce12905

Please sign in to comment.