From 8c18703d581651f797511a9ee590d319abf48e52 Mon Sep 17 00:00:00 2001 From: edanhub Date: Fri, 26 Sep 2025 12:43:59 +0200 Subject: [PATCH 1/5] pySCG: Adding explanation of the 'is' operator to CWE-595 Signed-off-by: edanhub --- .../CWE-697/CWE-595/README.md | 34 ++++++++++++++----- .../CWE-697/CWE-595/compliant01.py | 21 ++++++++---- .../CWE-697/CWE-595/noncompliant01.py | 9 +++++ 3 files changed, 50 insertions(+), 14 deletions(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md index b30114ce1..7b958ef62 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md @@ -6,7 +6,7 @@ You want to implement the `__eq__` method on a class if you believe you ever wan ## Non-Compliant Code Example -The non-compliant code shows how the default comparison operator compares object references rather than the object values. Furthermore, it displays how this causes issues when comparing lists of objects, although it applies to other types of collections as well. Finally, it shows how the `in` operator also depends on the behavior of the `__eq__` method and, therefore, also returns a non-desirable result. +The non-compliant code shows how the default comparison operator compares object references rather than the object values. Furthermore, it displays how this causes issues when comparing lists of objects, although it applies to other types of collections as well. Then, it shows how the `in` operator also depends on the behavior of the `__eq__` method and, therefore, also returns a non-desirable result. Finally, it performs the comparison with the `is` operator, which checks as to whether the references point to the same object regardless of the stored value. [*noncompliant01.py:*](noncompliant01.py) @@ -27,31 +27,40 @@ print(Integer(12) == Integer(12)) print([Integer(12)] == [Integer(12)]) # And this is equally this will always be False as well print(Integer(12) in [Integer(10), Integer(12)]) +# The 'is' will return True only if both references point to the same object +a = Integer(12) +b = a +# Here, a and b point to the same Integer, so 'is' returns True +print(a is b) + +b = Integer(12) +# Even though b still points to an Integer of the same value, it is a new object, so 'is' returns False +print(a is b) ``` ## Compliant Solution -In this compliant solution the `__eq__` method is implemented and all the comparisons now correctly compares the object values, rather than the object reference. +In this compliant solution, the `__eq__` method is implemented and the comparisons that not use `is` now correctly compare the object values, rather than the object reference. The `is` operator does not call `__eq__`, hence the last print will still display `False`. [*compliant01.py:*](compliant01.py) ```py """ Compliant Code Example """ - - + + class Integer: def __init__(self, value): self.value = value - + def __eq__(self, other): if isinstance(other, type(self)): return self.value == other.value if isinstance(other, int): return self.value == other return False - - + + ##################### # exploiting above code example ##################### @@ -59,9 +68,18 @@ class Integer: print(Integer(12) == Integer(12)) print([Integer(12)] == [Integer(12)]) print(Integer(12) in [Integer(10), Integer(12)]) - + # By adding the handling for int we also support print(Integer(12) == 12) +# The 'is' will return True only if both references point to the same object +a = Integer(12) +b = a +# Here, a and b point to the same Integer, so 'is' returns True +print(a is b) + +b = Integer(12) +# Since the 'is' operator does not call __eq__, print below will still return False +print(a is b) ``` diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/compliant01.py b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/compliant01.py index 26403152b..d4d60152f 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/compliant01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/compliant01.py @@ -1,20 +1,20 @@ # SPDX-FileCopyrightText: OpenSSF project contributors # SPDX-License-Identifier: MIT """ Compliant Code Example """ - - + + class Integer: def __init__(self, value): self.value = value - + def __eq__(self, other): if isinstance(other, type(self)): return self.value == other.value if isinstance(other, int): return self.value == other return False - - + + ##################### # exploiting above code example ##################### @@ -22,6 +22,15 @@ def __eq__(self, other): print(Integer(12) == Integer(12)) print([Integer(12)] == [Integer(12)]) print(Integer(12) in [Integer(10), Integer(12)]) - + # By adding the handling for int we also support print(Integer(12) == 12) +# The 'is' will return True only if both references point to the same object +a = Integer(12) +b = a +# Here, a and b point to the same Integer, so 'is' returns True +print(a is b) + +b = Integer(12) +# Since the 'is' operator does not call __eq__, print below will still return False +print(a is b) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/noncompliant01.py b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/noncompliant01.py index eeb05ce09..7759a4f0a 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/noncompliant01.py +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/noncompliant01.py @@ -16,3 +16,12 @@ def __init__(self, value): print([Integer(12)] == [Integer(12)]) # And this is equally this will always be False as well print(Integer(12) in [Integer(10), Integer(12)]) +# The 'is' will return True only if both references point to the same object +a = Integer(12) +b = a +# Here, a and b point to the same Integer, so 'is' returns True +print(a is b) + +b = Integer(12) +# Even though b still points to an Integer of the same value, it is a new object, so 'is' returns False +print(a is b) From d0e7050f326d2612beda92503b9804bcde5d7280 Mon Sep 17 00:00:00 2001 From: edanhub Date: Fri, 3 Oct 2025 13:19:32 +0200 Subject: [PATCH 2/5] CWE-595: Added code example for string interning and integer caching Signed-off-by: edanhub --- .../CWE-697/CWE-595/README.md | 55 ++++++++++++++++++- .../CWE-697/CWE-595/example01.py | 25 +++++++++ 2 files changed, 79 insertions(+), 1 deletion(-) create mode 100644 docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/example01.py diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md index 7b958ef62..95acc9bf2 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md @@ -1,9 +1,60 @@ # CWE-595: Comparison of Object References Instead of Object Contents -In Python, the `==` operator is implemented by the `__eq__` method on an object [[python.org data model 2023](https://docs.python.org/3/reference/datamodel.html?highlight=__eq__#object.__eq__)]. For built-in types like `int` and `str`, the comparison is implemented in the interpreter. The main issue comes when implementing custom classes, where the default implementation compares object references using the `is` operator. The `is` operator compares the identities of the objects, equivalent to `id(obj1) == id(obj2)`. The `id` function is built into Python, and in the CPython interpreter, the standard implementation, it returns the object's memory address [[de Langen 2023](https://realpython.com/python-is-identity-vs-equality/)]. +Prevent unexpected results by knowing the differences between comparison operators such as `==` and `is`. + + Python falls back to comparing objects' `id()` if the `__eq__` implementation is missing for a custom class. In Python, the `==` operator is implemented by the `__eq__` method on an object [[python.org data model 2023](https://docs.python.org/3/reference/datamodel.html?highlight=__eq__#object.__eq__)]. For built-in types like `int` and `str`, the comparison is implemented in the interpreter. The main issue comes when implementing custom classes, where the default implementation compares object references using the `is` operator. The `is` operator compares the identities of the objects, equivalent to `id(obj1) == id(obj2)`. The `id` function is built into Python, and in the CPython interpreter, the standard implementation, it returns the object's memory address [[de Langen 2023](https://realpython.com/python-is-identity-vs-equality/)]. You want to implement the `__eq__` method on a class if you believe you ever want to compare it to another object or find it in a list of objects. Actually, it is so common that the `dataclasses.dataclass` decorator by default implements it for you [[dataclasses — Data Classes — Python 3.11.4 documentation](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass)]. +Be aware of Python's memory optimization for strings and numbers as demonstrated in `example01.py` code. +Python tries to avoid allocating more memory for the same string. The process of reusing already existing strings is a Python optimization technique known as **String interning** [[sys — System-specific parameters and functions — Python 3.11.4 documentation](https://docs.python.org/3/library/sys.html#sys.intern)] According to the documentation, "CPython keeps an array of integer objects for all integers between `-5` and `256`. When you create an `int` in that range you actually just get back a reference to the existing object." [[Integer objects — Python 3.11.4 documentation](https://docs.python.org/3/c-api/long.html#c.PyLong_FromLong)] + +_[example01.py:](example01.py)_ + +```py +""" Code Example """ + +print("-" * 10 + "Memory optimization with strings" + 10 * "-") +a = "foobar" +b = "foobar" +c = ''.join(["foo", "bar"]) +print(f"a is b: {a} is {b}?", a is b) +print(f"a is c: {a} is {c}?", a is c) +print(f"a == c: {a} == {c}?", a == c) +print(f"size? len(a)={len(a)} len(b)={len(b)} len(c)={len(c)}") + +print("-" * 10 + "Memory optimization with numbers" + 10 * "-") +a = b = 256 +print (f"{a} is {b}?", a is b) +a = b = 257 +print (f"{a} is {b}?", a is b) + +print("-" * 10 + "Memory optimization with numbers in a loop" + 10 * "-") +a = b = 255 +while(a is b): + a += 1 + b += 1 + print (f"{a} is {b}?", a is b) +``` + + __Output of example01.py:__ + +```bash +----------Memory optimization with strings---------- +a is b: foobar is foobar? True +a is c: foobar is foobar? False +a == c: foobar == foobar? True +size? len(a)=6 len(b)=6 len(c)=6 +----------Memory optimization with numbers---------- +256 is 256? True +257 is 257? True +----------Memory optimization with numbers in a loop---------- +256 is 256? True +257 is 257? False +``` + +The first set of print statements illustrates string interning. While `a` and `b` reuse the same object, `c` is created by joining two new strings, which results in an object with a different `id()`. The variables in the middle example both point to the same number object, which is why comparing them after `a = b = 257` still returns `True` even though `257` falls outside of the cached range. However, when assigning values in a loop, Python needs to allocate new objects for numbers greater than `256` and thus will create two separate objects as soon as it hits `257`. The way caching and interning works may differ between running a Python script from a file and using REPL, which may produce different results when running `example01.py` in Python's interactive mode. + ## Non-Compliant Code Example The non-compliant code shows how the default comparison operator compares object references rather than the object values. Furthermore, it displays how this causes issues when comparing lists of objects, although it applies to other types of collections as well. Then, it shows how the `in` operator also depends on the behavior of the `__eq__` method and, therefore, also returns a non-desirable result. Finally, it performs the comparison with the `is` operator, which checks as to whether the references point to the same object regardless of the stored value. @@ -104,3 +155,5 @@ print(a is b) |[[python.org data model 2023](https://docs.python.org/3/reference/datamodel.html?highlight=__eq__#object.__eq__)]|[3. Data model — Python 3.11.3 documentation](https://docs.python.org/3/reference/datamodel.html?highlight=__eq__#object.__eq__)| |[[de Langen 2023](https://realpython.com/python-is-identity-vs-equality/)]|[Python '!=' Is Not 'is not': Comparing Objects in Python – Real Python](https://realpython.com/python-is-identity-vs-equality/)| |[[dataclasses — Data Classes — Python 3.11.4 documentation](https://docs.python.org/3/library/dataclasses.html#dataclasses.dataclass)]|[9. Classes — Python 3.11.3 documentation](https://docs.python.org/3/tutorial/classes.html)| +|[[sys — System-specific parameters and functions — Python 3.11.4 documentation](https://docs.python.org/3/library/sys.html#sys.intern)]|[sys — System-specific parameters and functions — Python 3.11.3 documentation](https://docs.python.org/3/library/sys.html#sys.intern)| +|[[Integer objects — Python 3.11.4 documentation](https://docs.python.org/3/c-api/long.html#c.PyLong_FromLong)]|[Integer objects — Python 3.11.4 documentation](https://docs.python.org/3/c-api/long.html#c.PyLong_FromLong)| diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/example01.py b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/example01.py new file mode 100644 index 000000000..cfc6ccacb --- /dev/null +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/example01.py @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: OpenSSF project contributors +# SPDX-License-Identifier: MIT +""" Code Example """ + +print("-" * 10 + "Memory optimization with strings" + 10 * "-") +a = "foobar" +b = "foobar" +c = ''.join(["foo", "bar"]) +print(f"a is b: {a} is {b}?", a is b) +print(f"a is c: {a} is {c}?", a is c) +print(f"a == c: {a} == {c}?", a == c) +print(f"size? len(a)={len(a)} len(b)={len(b)} len(c)={len(c)}") + +print("-" * 10 + "Memory optimization with numbers" + 10 * "-") +a = b = 256 +print (f"{a} is {b}?", a is b) +a = b = 257 +print (f"{a} is {b}?", a is b) + +print("-" * 10 + "Memory optimization with numbers in a loop" + 10 * "-") +a = b = 255 +while(a is b): + a += 1 + b += 1 + print (f"{a} is {b}?", a is b) From 551425b77769185782a64f5bc72eb26e8455da18 Mon Sep 17 00:00:00 2001 From: edanhub Date: Fri, 3 Oct 2025 13:27:19 +0200 Subject: [PATCH 3/5] Fix markdown linter errors Signed-off-by: edanhub --- .../CWE-697/CWE-595/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md index 95acc9bf2..a9186cc2a 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md @@ -37,7 +37,7 @@ while(a is b): print (f"{a} is {b}?", a is b) ``` - __Output of example01.py:__ + **Output of example01.py:** ```bash ----------Memory optimization with strings---------- @@ -59,7 +59,7 @@ The first set of print statements illustrates string interning. While `a` and `b The non-compliant code shows how the default comparison operator compares object references rather than the object values. Furthermore, it displays how this causes issues when comparing lists of objects, although it applies to other types of collections as well. Then, it shows how the `in` operator also depends on the behavior of the `__eq__` method and, therefore, also returns a non-desirable result. Finally, it performs the comparison with the `is` operator, which checks as to whether the references point to the same object regardless of the stored value. -[*noncompliant01.py:*](noncompliant01.py) +_[noncompliant01.py:](noncompliant01.py)_ ```py """ Non-compliant Code Example """ @@ -94,7 +94,7 @@ print(a is b) In this compliant solution, the `__eq__` method is implemented and the comparisons that not use `is` now correctly compare the object values, rather than the object reference. The `is` operator does not call `__eq__`, hence the last print will still display `False`. -[*compliant01.py:*](compliant01.py) +_[compliant01.py:](compliant01.py)_ ```py """ Compliant Code Example """ From a307c58fb551faa8a54c849b894d69f897e653aa Mon Sep 17 00:00:00 2001 From: Hubert Daniszewski <61824500+s19110@users.noreply.github.com> Date: Fri, 3 Oct 2025 13:39:28 +0200 Subject: [PATCH 4/5] Update docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md Co-authored-by: myteron Signed-off-by: Hubert Daniszewski <61824500+s19110@users.noreply.github.com> --- .../CWE-697/CWE-595/README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md index a9186cc2a..7902ab8a2 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md @@ -57,7 +57,13 @@ The first set of print statements illustrates string interning. While `a` and `b ## Non-Compliant Code Example -The non-compliant code shows how the default comparison operator compares object references rather than the object values. Furthermore, it displays how this causes issues when comparing lists of objects, although it applies to other types of collections as well. Then, it shows how the `in` operator also depends on the behavior of the `__eq__` method and, therefore, also returns a non-desirable result. Finally, it performs the comparison with the `is` operator, which checks as to whether the references point to the same object regardless of the stored value. +The `noncompliant01.py` code demonstrates potentially unexpected outcomes when using different comparisons. + +* The `==` operator using `__eq__`, checks value equality for most build-in types, checks for reference equality if the `__eq__` is missing in a custom class. So 12 == 12 is True and Integer(12) == Integer(12) is False. +* The `==` comparing lists of objects, that also applies to other types of collections. +* The `in` operator also depends on the behavior of the `__eq__` method +* The `is` operator that checks the references point to the same object regardless of the stored value. + _[noncompliant01.py:](noncompliant01.py)_ From cd706453ff20d71761cc46e9fbcdc1ac576fd002 Mon Sep 17 00:00:00 2001 From: edanhub Date: Fri, 3 Oct 2025 19:09:07 +0200 Subject: [PATCH 5/5] Markdown linter fixes for the suggestion Signed-off-by: edanhub --- .../Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md index 7902ab8a2..0cf02bb73 100644 --- a/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md +++ b/docs/Secure-Coding-Guide-for-Python/CWE-697/CWE-595/README.md @@ -57,14 +57,13 @@ The first set of print statements illustrates string interning. While `a` and `b ## Non-Compliant Code Example -The `noncompliant01.py` code demonstrates potentially unexpected outcomes when using different comparisons. +The `noncompliant01.py` code demonstrates potentially unexpected outcomes when using different comparisons. * The `==` operator using `__eq__`, checks value equality for most build-in types, checks for reference equality if the `__eq__` is missing in a custom class. So 12 == 12 is True and Integer(12) == Integer(12) is False. -* The `==` comparing lists of objects, that also applies to other types of collections. +* The `==` comparing lists of objects, that also applies to other types of collections. * The `in` operator also depends on the behavior of the `__eq__` method * The `is` operator that checks the references point to the same object regardless of the stored value. - _[noncompliant01.py:](noncompliant01.py)_ ```py