From 091c89681deabe9cad4061174413729765d748b4 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:37:51 -0400 Subject: [PATCH 01/10] [mypyc] feat: make frozenset literal compilation deterministic --- mypyc/codegen/literals.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 4cd41e0f4d32..ac58baf3f1ad 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -165,7 +165,15 @@ def _encode_collection_values( for i in range(count): value = value_by_index[i] result.append(str(len(value))) - for item in value: + if isinstance(value, frozenset): + # even though frozensets are not sorted in python, we need to sort the items here + # to improve the determinism of the generated C file, making it easier to compare + # differences between compilation units. + sort_keys_to_values = {str(v): v for v in value} + items = tuple(sort_keys_to-values[sort_key] for sort_key in sorted(sort_keys_to_values)) + else: + items = value + for item in items: assert _is_literal_value(item) index = self.literal_index(item) result.append(str(index)) From 9c389e130720b6049dcdd33d80b6050059e9410d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 1 Oct 2025 20:42:02 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/literals.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index ac58baf3f1ad..6df6d39e772a 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -170,9 +170,11 @@ def _encode_collection_values( # to improve the determinism of the generated C file, making it easier to compare # differences between compilation units. sort_keys_to_values = {str(v): v for v in value} - items = tuple(sort_keys_to-values[sort_key] for sort_key in sorted(sort_keys_to_values)) + items = tuple( + sort_keys_to - values[sort_key] for sort_key in sorted(sort_keys_to_values) + ) else: - items = value + items = value for item in items: assert _is_literal_value(item) index = self.literal_index(item) From 4f89fa9ae9f5d9b5b0276bc3ede43ab6922a0c59 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:44:25 -0400 Subject: [PATCH 03/10] Update literals.py --- mypyc/codegen/literals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 6df6d39e772a..6f706077f47d 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -171,7 +171,7 @@ def _encode_collection_values( # differences between compilation units. sort_keys_to_values = {str(v): v for v in value} items = tuple( - sort_keys_to - values[sort_key] for sort_key in sorted(sort_keys_to_values) + sort_keys_to_values[sort_key] for sort_key in sorted(sort_keys_to_values) ) else: items = value From ede7bf02e3e3f435f9e2b862b4b3f006e50e141c Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Wed, 1 Oct 2025 16:46:04 -0400 Subject: [PATCH 04/10] Update literals.py --- mypyc/codegen/literals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 6f706077f47d..566d035c300c 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -169,7 +169,7 @@ def _encode_collection_values( # even though frozensets are not sorted in python, we need to sort the items here # to improve the determinism of the generated C file, making it easier to compare # differences between compilation units. - sort_keys_to_values = {str(v): v for v in value} + sort_keys_to_values = {str(v) + type(v).__name__: v for v in value} items = tuple( sort_keys_to_values[sort_key] for sort_key in sorted(sort_keys_to_values) ) From e67f4673a4754305a22f128bed8757677e6043a3 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 2 Oct 2025 02:09:59 -0400 Subject: [PATCH 05/10] Update literals.py --- mypyc/codegen/literals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 566d035c300c..c796ff5a9e8a 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -168,7 +168,7 @@ def _encode_collection_values( if isinstance(value, frozenset): # even though frozensets are not sorted in python, we need to sort the items here # to improve the determinism of the generated C file, making it easier to compare - # differences between compilation units. + # differences in the C files generated by different versions of your code. sort_keys_to_values = {str(v) + type(v).__name__: v for v in value} items = tuple( sort_keys_to_values[sort_key] for sort_key in sorted(sort_keys_to_values) From 44407a5e728e66bf68347f4d5870357c6fdd32ab Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 2 Oct 2025 02:11:35 -0400 Subject: [PATCH 06/10] Update literals.py --- mypyc/codegen/literals.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index c796ff5a9e8a..63620f8d545b 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -169,10 +169,8 @@ def _encode_collection_values( # even though frozensets are not sorted in python, we need to sort the items here # to improve the determinism of the generated C file, making it easier to compare # differences in the C files generated by different versions of your code. - sort_keys_to_values = {str(v) + type(v).__name__: v for v in value} - items = tuple( - sort_keys_to_values[sort_key] for sort_key in sorted(sort_keys_to_values) - ) + sort_helper = {str(v) + type(v).__name__: v for v in value} + items = tuple(sort_helper[key] for key in sorted(sort_helper)) else: items = value for item in items: From f8f4700086cd5efef1981354b81f1e084fa0baf7 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 2 Oct 2025 12:31:17 -0400 Subject: [PATCH 07/10] recurse --- mypyc/codegen/literals.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 63620f8d545b..e050186c4431 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -141,7 +141,20 @@ def encoded_tuple_values(self) -> list[str]: return self._encode_collection_values(self.tuple_literals) def encoded_frozenset_values(self) -> list[str]: - return self._encode_collection_values(self.frozenset_literals) + def sort_frozenset_recursive(frozen: frozenset[object]) -> list[object]: + # even though frozensets are not sorted in python, we need to sort the items here + # to improve the determinism of the generated C file, making it easier to compare + # differences in the C files generated by different versions of your code. + sort_helper = {repr(v) + type(v).__name__: v for v in value} + items = [] + for key in sorted(sort_helper): + v = sort_helper[key] + if isinstance(v, frozenset): + v = sort_frozenset_items(v) + items.append(v) + return items + + return self._encode_collection_values({sort_frozenset_items(fr): i for fr, i in self.frozenset_literals.items()}) def _encode_collection_values( self, values: dict[tuple[object, ...], int] | dict[frozenset[object], int] @@ -165,15 +178,7 @@ def _encode_collection_values( for i in range(count): value = value_by_index[i] result.append(str(len(value))) - if isinstance(value, frozenset): - # even though frozensets are not sorted in python, we need to sort the items here - # to improve the determinism of the generated C file, making it easier to compare - # differences in the C files generated by different versions of your code. - sort_helper = {str(v) + type(v).__name__: v for v in value} - items = tuple(sort_helper[key] for key in sorted(sort_helper)) - else: - items = value - for item in items: + for item in value: assert _is_literal_value(item) index = self.literal_index(item) result.append(str(index)) From 34afe243a96572889e947524dcdc42f7a621193b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 2 Oct 2025 16:32:38 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- mypyc/codegen/literals.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index e050186c4431..e56895775936 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -154,7 +154,9 @@ def sort_frozenset_recursive(frozen: frozenset[object]) -> list[object]: items.append(v) return items - return self._encode_collection_values({sort_frozenset_items(fr): i for fr, i in self.frozenset_literals.items()}) + return self._encode_collection_values( + {sort_frozenset_items(fr): i for fr, i in self.frozenset_literals.items()} + ) def _encode_collection_values( self, values: dict[tuple[object, ...], int] | dict[frozenset[object], int] From 97377e39eeda72603a43242715dada090f3bf223 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 2 Oct 2025 12:42:53 -0400 Subject: [PATCH 09/10] Update literals.py --- mypyc/codegen/literals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index e56895775936..754245df904a 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -150,12 +150,12 @@ def sort_frozenset_recursive(frozen: frozenset[object]) -> list[object]: for key in sorted(sort_helper): v = sort_helper[key] if isinstance(v, frozenset): - v = sort_frozenset_items(v) + v = sort_frozenset_recursive(v) items.append(v) return items return self._encode_collection_values( - {sort_frozenset_items(fr): i for fr, i in self.frozenset_literals.items()} + {sort_frozenset_recursive(fr): i for fr, i in self.frozenset_literals.items()} ) def _encode_collection_values( From 1ad379a784687783a3df20c6801428c02fd75ed6 Mon Sep 17 00:00:00 2001 From: BobTheBuidler <70677534+BobTheBuidler@users.noreply.github.com> Date: Thu, 2 Oct 2025 12:43:57 -0400 Subject: [PATCH 10/10] Update literals.py --- mypyc/codegen/literals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mypyc/codegen/literals.py b/mypyc/codegen/literals.py index 754245df904a..9d69e1f2aeb3 100644 --- a/mypyc/codegen/literals.py +++ b/mypyc/codegen/literals.py @@ -141,7 +141,7 @@ def encoded_tuple_values(self) -> list[str]: return self._encode_collection_values(self.tuple_literals) def encoded_frozenset_values(self) -> list[str]: - def sort_frozenset_recursive(frozen: frozenset[object]) -> list[object]: + def sort_frozenset_recursive(value: frozenset[object]) -> list[object]: # even though frozensets are not sorted in python, we need to sort the items here # to improve the determinism of the generated C file, making it easier to compare # differences in the C files generated by different versions of your code.