From 92327e63a393d4db72bb896b950d1089aed47713 Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Sun, 15 Jun 2025 17:31:45 -0400 Subject: [PATCH 1/4] Generate introspection signatures for compiled functions --- mypyc/codegen/emitclass.py | 5 +- mypyc/codegen/emitfunc.py | 18 +++- mypyc/codegen/emitmodule.py | 13 ++- mypyc/doc/differences_from_python.rst | 3 +- mypyc/ir/func_ir.py | 85 +++++++++++++++++- mypyc/test-data/run-signatures.test | 125 ++++++++++++++++++++++++++ mypyc/test/test_run.py | 1 + 7 files changed, 242 insertions(+), 8 deletions(-) create mode 100644 mypyc/test-data/run-signatures.test diff --git a/mypyc/codegen/emitclass.py b/mypyc/codegen/emitclass.py index da3d14f9dafe..5bb58340bc59 100644 --- a/mypyc/codegen/emitclass.py +++ b/mypyc/codegen/emitclass.py @@ -6,7 +6,7 @@ from typing import Callable from mypyc.codegen.emit import Emitter, HeaderDeclaration, ReturnHandler -from mypyc.codegen.emitfunc import native_function_header +from mypyc.codegen.emitfunc import native_function_doc_initializer, native_function_header from mypyc.codegen.emitwrapper import ( generate_bin_op_wrapper, generate_bool_wrapper, @@ -841,7 +841,8 @@ def generate_methods_table(cl: ClassIR, name: str, emitter: Emitter) -> None: elif fn.decl.kind == FUNC_CLASSMETHOD: flags.append("METH_CLASS") - emitter.emit_line(" {}, NULL}},".format(" | ".join(flags))) + doc = native_function_doc_initializer(fn) + emitter.emit_line(" {}, {}}},".format(" | ".join(flags), doc)) # Provide a default __getstate__ and __setstate__ if not cl.has_method("__setstate__") and not cl.has_method("__getstate__"): diff --git a/mypyc/codegen/emitfunc.py b/mypyc/codegen/emitfunc.py index c854516825af..7971b19dc5bc 100644 --- a/mypyc/codegen/emitfunc.py +++ b/mypyc/codegen/emitfunc.py @@ -5,6 +5,7 @@ from typing import Final from mypyc.analysis.blockfreq import frequently_executed_blocks +from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.emit import DEBUG_ERRORS, Emitter, TracebackAndGotoHandler, c_array_initializer from mypyc.common import ( HAVE_IMMORTAL, @@ -16,7 +17,14 @@ TYPE_VAR_PREFIX, ) from mypyc.ir.class_ir import ClassIR -from mypyc.ir.func_ir import FUNC_CLASSMETHOD, FUNC_STATICMETHOD, FuncDecl, FuncIR, all_values +from mypyc.ir.func_ir import ( + FUNC_CLASSMETHOD, + FUNC_STATICMETHOD, + FuncDecl, + FuncIR, + all_values, + get_text_signature, +) from mypyc.ir.ops import ( ERR_FALSE, NAMESPACE_MODULE, @@ -105,6 +113,14 @@ def native_function_header(fn: FuncDecl, emitter: Emitter) -> str: ) +def native_function_doc_initializer(func: FuncIR) -> str: + text_sig = get_text_signature(func) + if text_sig is None: + return "NULL" + docstring = f"{text_sig}\n--\n\n" + return c_string_initializer(docstring.encode("ascii", errors="backslashreplace")) + + def generate_native_function( fn: FuncIR, emitter: Emitter, source_path: str, module_name: str ) -> None: diff --git a/mypyc/codegen/emitmodule.py b/mypyc/codegen/emitmodule.py index f914bfd6345d..b4dc43ba2dc5 100644 --- a/mypyc/codegen/emitmodule.py +++ b/mypyc/codegen/emitmodule.py @@ -30,7 +30,11 @@ from mypyc.codegen.cstring import c_string_initializer from mypyc.codegen.emit import Emitter, EmitterContext, HeaderDeclaration, c_array_initializer from mypyc.codegen.emitclass import generate_class, generate_class_type_decl -from mypyc.codegen.emitfunc import generate_native_function, native_function_header +from mypyc.codegen.emitfunc import ( + generate_native_function, + native_function_doc_initializer, + native_function_header, +) from mypyc.codegen.emitwrapper import ( generate_legacy_wrapper_function, generate_wrapper_function, @@ -915,11 +919,14 @@ def emit_module_methods( flag = "METH_FASTCALL" else: flag = "METH_VARARGS" + doc = native_function_doc_initializer(fn) emitter.emit_line( ( '{{"{name}", (PyCFunction){prefix}{cname}, {flag} | METH_KEYWORDS, ' - "NULL /* docstring */}}," - ).format(name=name, cname=fn.cname(emitter.names), prefix=PREFIX, flag=flag) + "{doc} /* docstring */}}," + ).format( + name=name, cname=fn.cname(emitter.names), prefix=PREFIX, flag=flag, doc=doc + ) ) emitter.emit_line("{NULL, NULL, 0, NULL}") emitter.emit_line("};") diff --git a/mypyc/doc/differences_from_python.rst b/mypyc/doc/differences_from_python.rst index 65ad709677af..d2fae7848e7f 100644 --- a/mypyc/doc/differences_from_python.rst +++ b/mypyc/doc/differences_from_python.rst @@ -316,7 +316,8 @@ non-exhaustive list of what won't work: - Instance ``__annotations__`` is usually not kept - Frames of compiled functions can't be inspected using ``inspect`` - Compiled methods aren't considered methods by ``inspect.ismethod`` -- ``inspect.signature`` chokes on compiled functions +- ``inspect.signature`` chokes on compiled functions with default arguments that + are not simple literals Profiling hooks and tracing *************************** diff --git a/mypyc/ir/func_ir.py b/mypyc/ir/func_ir.py index beef8def7f43..a18e419fd332 100644 --- a/mypyc/ir/func_ir.py +++ b/mypyc/ir/func_ir.py @@ -2,6 +2,7 @@ from __future__ import annotations +import inspect from collections.abc import Sequence from typing import Final @@ -11,13 +12,24 @@ Assign, AssignMulti, BasicBlock, + Box, ControlOp, DeserMaps, + Float, + Integer, LoadAddress, + LoadLiteral, Register, + TupleSet, Value, ) -from mypyc.ir.rtypes import RType, bitmap_rprimitive, deserialize_type +from mypyc.ir.rtypes import ( + RType, + bitmap_rprimitive, + deserialize_type, + is_bool_rprimitive, + is_none_rprimitive, +) from mypyc.namegen import NameGenerator @@ -379,3 +391,74 @@ def all_values_full(args: list[Register], blocks: list[BasicBlock]) -> list[Valu values.append(op) return values + + +_ARG_KIND_TO_INSPECT: Final = { + ArgKind.ARG_POS: inspect.Parameter.POSITIONAL_OR_KEYWORD, + ArgKind.ARG_OPT: inspect.Parameter.POSITIONAL_OR_KEYWORD, + ArgKind.ARG_STAR: inspect.Parameter.VAR_POSITIONAL, + ArgKind.ARG_NAMED: inspect.Parameter.KEYWORD_ONLY, + ArgKind.ARG_STAR2: inspect.Parameter.VAR_KEYWORD, + ArgKind.ARG_NAMED_OPT: inspect.Parameter.KEYWORD_ONLY, +} + +# Sentinel indicating a value that cannot be represented in a text signature. +_NOT_REPRESENTABLE = object() + + +def get_text_signature(fn: FuncIR) -> str | None: + """Return a text signature in CPython's internal doc format, or None + if the function's signature cannot be represented. + """ + parameters = [] + mark_self = fn.class_name is not None and fn.decl.kind != FUNC_STATICMETHOD + for arg in fn.decl.sig.args: + if arg.name.startswith("__bitmap") or arg.name == "__mypyc_self__": + continue + kind = ( + inspect.Parameter.POSITIONAL_ONLY if arg.pos_only else _ARG_KIND_TO_INSPECT[arg.kind] + ) + default: object = inspect.Parameter.empty + if arg.optional: + default = _find_default_argument(arg.name, fn.blocks) + if default is _NOT_REPRESENTABLE: + # This default argument cannot be represented in a __text_signature__ + return None + + curr_param = inspect.Parameter(arg.name, kind, default=default) + parameters.append(curr_param) + if mark_self: + # Parameter.__init__ does not accept $ + curr_param._name = f"${arg.name}" # type: ignore[attr-defined] + mark_self = False + sig = inspect.Signature(parameters) + return f"{fn.name}{sig}" + + +def _find_default_argument(name: str, blocks: list[BasicBlock]) -> object: + # Find assignment inserted by gen_arg_defaults. Assumed to be the first assignment. + for block in blocks: + for op in block.ops: + if isinstance(op, Assign) and op.dest.name == name: + return _extract_python_literal(op.src) + return _NOT_REPRESENTABLE + + +def _extract_python_literal(value: Value) -> object: + if isinstance(value, Integer): + if is_none_rprimitive(value.type): + return None + val = value.numeric_value() + return bool(val) if is_bool_rprimitive(value.type) else val + elif isinstance(value, Float): + return value.value + elif isinstance(value, LoadLiteral): + return value.value + elif isinstance(value, Box): + return _extract_python_literal(value.src) + elif isinstance(value, TupleSet): + items = tuple(_extract_python_literal(item) for item in value.items) + if any(itm is _NOT_REPRESENTABLE for itm in items): + return _NOT_REPRESENTABLE + return items + return _NOT_REPRESENTABLE diff --git a/mypyc/test-data/run-signatures.test b/mypyc/test-data/run-signatures.test new file mode 100644 index 000000000000..f8a9ca29b9fc --- /dev/null +++ b/mypyc/test-data/run-signatures.test @@ -0,0 +1,125 @@ +[case testSignaturesBasic] +import inspect + +def f1(): pass +def f2(x): pass +def f3(x, /): pass +def f4(*, x): pass +def f5(*x): pass +def f6(**x): pass +def f7(x=None): pass +def f8(x=None, /): pass +def f9(*, x=None): pass +def f10(a, /, b, c=None, *args, d=None, **h): pass + +def test_basic() -> None: + assert str(inspect.signature(f1)) == "()" + assert str(inspect.signature(f2)) == "(x)" + assert str(inspect.signature(f3)) == "(x, /)" + assert str(inspect.signature(f4)) == "(*, x)" + assert str(inspect.signature(f5)) == "(*x)" + assert str(inspect.signature(f6)) == "(**x)" + assert str(inspect.signature(f7)) == "(x=None)" + assert str(inspect.signature(f8)) == "(x=None, /)" + assert str(inspect.signature(f9)) == "(*, x=None)" + assert str(inspect.signature(f10)) == "(a, /, b, c=None, *args, d=None, **h)" + +[case testSignaturesValidDefaults] +import inspect + +def default_int(x=1): pass +def default_str(x="a"): pass +def default_float(x=1.0): pass +def default_true(x=True): pass +def default_false(x=False): pass +def default_none(x=None): pass +def default_tuple_empty(x=()): pass +def default_tuple_literals(x=(1, "a", 1.0, False, True, None, (), (1,2,(3,4)))): pass +def default_tuple_singleton(x=(1,)): pass + +def test_valid_defaults() -> None: + assert str(inspect.signature(default_int)) == "(x=1)" + assert str(inspect.signature(default_str)) == "(x='a')" + assert str(inspect.signature(default_float)) == "(x=1.0)" + assert str(inspect.signature(default_true)) == "(x=True)" + assert str(inspect.signature(default_false)) == "(x=False)" + assert str(inspect.signature(default_none)) == "(x=None)" + assert str(inspect.signature(default_tuple_empty)) == "(x=())" + assert str(inspect.signature(default_tuple_literals)) == "(x=(1, 'a', 1.0, False, True, None, (), (1, 2, (3, 4))))" + + # Check __text_signature__ directly since inspect.signature produces + # an incorrect signature for 1-tuple default arguments prior to + # Python 3.12 (cpython#102379). + # assert str(inspect.signature(default_tuple_singleton)) == "(x=(1,))" + assert getattr(default_tuple_singleton, "__text_signature__") == "(x=(1,))" + +[case testSignaturesStringDefaults] +import inspect + +def f1(x="'foo"): pass +def f2(x='"foo'): pass +def f3(x=""""Isn\'t," they said."""): pass +def f4(x="\\ \a \b \f \n \r \t \v \x00"): pass +def f5(x="\N{BANANA}sv"): pass + +def test_string_defaults() -> None: + assert str(inspect.signature(f1)) == """(x="'foo")""" + assert str(inspect.signature(f2)) == """(x='"foo')""" + assert str(inspect.signature(f3)) == r"""(x='"Isn\'t," they said.')""" + assert str(inspect.signature(f4)) == r"""(x='\\ \x07 \x08 \x0c \n \r \t \x0b \x00')""" + assert str(inspect.signature(f5)) == """(x='\U0001F34Csv')""" + +[case testSignaturesIrrepresentableDefaults] +import inspect +from typing import Any + +from testutil import assertRaises + +def bad1(x=[]): pass +def bad2(x={}): pass +def bad3(x=set()): pass +def bad4(x=int): pass +def bad5(x=lambda: None): pass +def bad6(x=bad1): pass +# note: inspect supports constant folding for defaults in text signatures +def bad7(x=1+2): pass +def bad8(x=1-2): pass +def bad9(x=1|2): pass +def bad10(x=float("nan")): pass +def bad11(x=([],)): pass + +def test_irrepresentable_defaults() -> None: + bad: Any + for bad in [bad1, bad2, bad3, bad4, bad5, bad6, bad7, bad8, bad9, bad10, bad11]: + assert bad.__text_signature__ is None, f"{bad.__name__} has unexpected __text_signature__" + with assertRaises(ValueError, "no signature found for builtin"): + inspect.signature(bad) + +[case testSignaturesMethods] +import inspect + +class Foo: + def f1(self, x): pass + @classmethod + def f2(cls, x): pass + @staticmethod + def f3(x): pass + +def test_methods() -> None: + assert getattr(Foo.f1, "__text_signature__") == "($self, x)" + assert str(inspect.signature(Foo.f1)) == "(self, /, x)" + + assert getattr(Foo.f2, "__text_signature__") == "($cls, x)" + assert str(inspect.signature(Foo.f2)) == "(x)" + + assert getattr(Foo.f3, "__text_signature__") == "(x)" + assert str(inspect.signature(Foo.f3)) == "(x)" + + assert getattr(Foo().f1, "__text_signature__") == "($self, x)" + assert str(inspect.signature(Foo().f1)) == "(x)" + + assert getattr(Foo().f2, "__text_signature__") == "($cls, x)" + assert str(inspect.signature(Foo().f2)) == "(x)" + + assert getattr(Foo().f3, "__text_signature__") == "(x)" + assert str(inspect.signature(Foo().f3)) == "(x)" diff --git a/mypyc/test/test_run.py b/mypyc/test/test_run.py index b96c4241f30d..407d9c8b3912 100644 --- a/mypyc/test/test_run.py +++ b/mypyc/test/test_run.py @@ -71,6 +71,7 @@ "run-attrs.test", "run-python37.test", "run-python38.test", + "run-signatures.test", ] if sys.version_info >= (3, 10): From 007e7144a6f46c6aca7aed148917e2dcbb5b008e Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Mon, 16 Jun 2025 10:50:02 -0400 Subject: [PATCH 2/4] Fix signature generation for historical positional-only syntax --- mypyc/ir/func_ir.py | 15 ++++++++++++--- mypyc/test-data/run-signatures.test | 22 ++++++++++++++++++++++ 2 files changed, 34 insertions(+), 3 deletions(-) diff --git a/mypyc/ir/func_ir.py b/mypyc/ir/func_ir.py index a18e419fd332..14cd4c176f68 100644 --- a/mypyc/ir/func_ir.py +++ b/mypyc/ir/func_ir.py @@ -412,11 +412,20 @@ def get_text_signature(fn: FuncIR) -> str | None: """ parameters = [] mark_self = fn.class_name is not None and fn.decl.kind != FUNC_STATICMETHOD - for arg in fn.decl.sig.args: + # Pre-scan for end of positional-only parameters. + # This is needed to handle signatures like 'def foo(self, __x)', where mypy + # currently sees 'self' as being positional-or-keyword and '__x' as positional-only. + pos_only_idx = -1 + for idx, arg in enumerate(fn.decl.sig.args): + if arg.pos_only and arg.kind in (ArgKind.ARG_POS, ArgKind.ARG_OPT): + pos_only_idx = idx + for idx, arg in enumerate(fn.decl.sig.args): if arg.name.startswith("__bitmap") or arg.name == "__mypyc_self__": continue kind = ( - inspect.Parameter.POSITIONAL_ONLY if arg.pos_only else _ARG_KIND_TO_INSPECT[arg.kind] + inspect.Parameter.POSITIONAL_ONLY + if idx <= pos_only_idx + else _ARG_KIND_TO_INSPECT[arg.kind] ) default: object = inspect.Parameter.empty if arg.optional: @@ -428,7 +437,7 @@ def get_text_signature(fn: FuncIR) -> str | None: curr_param = inspect.Parameter(arg.name, kind, default=default) parameters.append(curr_param) if mark_self: - # Parameter.__init__ does not accept $ + # Parameter.__init__/Parameter.replace do not accept $ curr_param._name = f"${arg.name}" # type: ignore[attr-defined] mark_self = False sig = inspect.Signature(parameters) diff --git a/mypyc/test-data/run-signatures.test b/mypyc/test-data/run-signatures.test index f8a9ca29b9fc..be6a8fdebe1d 100644 --- a/mypyc/test-data/run-signatures.test +++ b/mypyc/test-data/run-signatures.test @@ -123,3 +123,25 @@ def test_methods() -> None: assert getattr(Foo().f3, "__text_signature__") == "(x)" assert str(inspect.signature(Foo().f3)) == "(x)" + +[case testSignaturesHistoricalPositionalOnly] +import inspect + +def f1(__x): pass +def f2(__x, y): pass +def f3(*, __y): pass +def f4(x, *, __y): pass +def f5(__x, *, __y): pass + +class A: + def func(self, __x): pass + +def test_historical_positional_only() -> None: + assert str(inspect.signature(f1)) == "(__x, /)" + assert str(inspect.signature(f2)) == "(__x, /, y)" + assert str(inspect.signature(f3)) == "(*, __y)" + assert str(inspect.signature(f4)) == "(x, *, __y)" + assert str(inspect.signature(f5)) == "(__x, /, *, __y)" + + assert str(inspect.signature(A.func)) == "(self, __x, /)" + assert str(inspect.signature(A().func)) == "(__x, /)" From 617d952ff56ef0d1f370bb69d8d9507064cee52f Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Mon, 16 Jun 2025 11:26:21 -0400 Subject: [PATCH 3/4] Add special method test --- mypyc/test-data/run-signatures.test | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mypyc/test-data/run-signatures.test b/mypyc/test-data/run-signatures.test index be6a8fdebe1d..83db6430ddd9 100644 --- a/mypyc/test-data/run-signatures.test +++ b/mypyc/test-data/run-signatures.test @@ -104,26 +104,29 @@ class Foo: def f2(cls, x): pass @staticmethod def f3(x): pass + def __eq__(self, x: object): pass def test_methods() -> None: assert getattr(Foo.f1, "__text_signature__") == "($self, x)" - assert str(inspect.signature(Foo.f1)) == "(self, /, x)" - - assert getattr(Foo.f2, "__text_signature__") == "($cls, x)" - assert str(inspect.signature(Foo.f2)) == "(x)" - - assert getattr(Foo.f3, "__text_signature__") == "(x)" - assert str(inspect.signature(Foo.f3)) == "(x)" - assert getattr(Foo().f1, "__text_signature__") == "($self, x)" + assert str(inspect.signature(Foo.f1)) == "(self, /, x)" assert str(inspect.signature(Foo().f1)) == "(x)" + assert getattr(Foo.f2, "__text_signature__") == "($cls, x)" assert getattr(Foo().f2, "__text_signature__") == "($cls, x)" + assert str(inspect.signature(Foo.f2)) == "(x)" assert str(inspect.signature(Foo().f2)) == "(x)" + assert getattr(Foo.f3, "__text_signature__") == "(x)" assert getattr(Foo().f3, "__text_signature__") == "(x)" + assert str(inspect.signature(Foo.f3)) == "(x)" assert str(inspect.signature(Foo().f3)) == "(x)" + assert getattr(Foo.__eq__, "__text_signature__") == "($self, value, /)" + assert getattr(Foo().__eq__, "__text_signature__") == "($self, value, /)" + assert str(inspect.signature(Foo.__eq__)) == "(self, value, /)" + assert str(inspect.signature(Foo().__eq__)) == "(value, /)" + [case testSignaturesHistoricalPositionalOnly] import inspect From a4094d00ef9a7b3296cc3d751fb8876f7bcc43c7 Mon Sep 17 00:00:00 2001 From: Brian Schubert Date: Mon, 16 Jun 2025 16:32:49 -0400 Subject: [PATCH 4/4] Convert ternary to if statement as mypyc bug workaround See mypyc#1109 --- mypyc/ir/func_ir.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypyc/ir/func_ir.py b/mypyc/ir/func_ir.py index 14cd4c176f68..5c1d7b1f2b2f 100644 --- a/mypyc/ir/func_ir.py +++ b/mypyc/ir/func_ir.py @@ -458,7 +458,9 @@ def _extract_python_literal(value: Value) -> object: if is_none_rprimitive(value.type): return None val = value.numeric_value() - return bool(val) if is_bool_rprimitive(value.type) else val + if is_bool_rprimitive(value.type): + return bool(val) + return val elif isinstance(value, Float): return value.value elif isinstance(value, LoadLiteral):