Skip to content

PYTHON-5289 Validate ignored bits are 0 on write for bson.BinaryVector #2397

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions bson/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from __future__ import annotations

import struct
import warnings
from enum import Enum
from typing import TYPE_CHECKING, Any, Optional, Sequence, Tuple, Type, Union, overload
from uuid import UUID
Expand Down Expand Up @@ -255,6 +256,9 @@ def __eq__(self, other: Any) -> bool:
self.dtype == other.dtype and self.padding == other.padding and self.data == other.data
)

def __len__(self) -> int:
return len(self.data)


class Binary(bytes):
"""Representation of BSON binary data.
Expand Down Expand Up @@ -471,6 +475,10 @@ def from_vector(

metadata = struct.pack("<sB", dtype.value, padding)
data = struct.pack(f"<{len(vector)}{format_str}", *vector) # type: ignore
if padding and len(vector) and not (data[-1] & ((1 << padding) - 1)) == 0:
raise ValueError(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. They must be zero."
)
return cls(metadata + data, subtype=VECTOR_SUBTYPE)

def as_vector(self) -> BinaryVector:
Expand Down Expand Up @@ -522,6 +530,12 @@ def as_vector(self) -> BinaryVector:
dtype_format = "B"
format_string = f"<{n_values}{dtype_format}"
unpacked_uint8s = list(struct.unpack_from(format_string, self, position))
if padding and n_values and unpacked_uint8s[-1] & (1 << padding) - 1 != 0:
warnings.warn(
"Vector has a padding P, but bits in the final byte lower than P are non-zero. In the next major version, they must be zero.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in the next major version, this warning will become an error to match the behavior in from_vector?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We make the significant change (exception not warning) in the next major change and document in changelog and api. The ticket for that is PYTHON-5280 and has 5.0 as its fix version.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add "pymongo version 5.0" rather than "the next major version". This helps should them the version (5.0) and that the warning is coming from pymongo.

DeprecationWarning,
stacklevel=2,
)
return BinaryVector(unpacked_uint8s, dtype, padding)

else:
Expand Down
3 changes: 3 additions & 0 deletions doc/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,9 @@ PyMongo 4.13 brings a number of changes including:
or the `migration guide <https://www.mongodb.com/docs/languages/python/pymongo-driver/current/reference/migration/>`_ for more information.
- Fixed a bug where :class:`pymongo.write_concern.WriteConcern` repr was not eval-able
when using ``w="majority"``.
- Ignored bits in a BSON BinaryVector of PACKED_BIT dtype should be set to zero.
On writes, this is enforced and is a breaking change.
Reads from the database will not fail, however a warning will be triggered.

Issues Resolved
...............
Expand Down
17 changes: 12 additions & 5 deletions test/test_bson.py
Original file line number Diff line number Diff line change
Expand Up @@ -739,7 +739,7 @@ def test_vector(self):
"""Tests of subtype 9"""
# We start with valid cases, across the 3 dtypes implemented.
# Work with a simple vector that can be interpreted as int8, float32, or ubyte
list_vector = [127, 7]
list_vector = [127, 8]
# As INT8, vector has length 2
binary_vector = Binary.from_vector(list_vector, BinaryVectorDtype.INT8)
vector = binary_vector.as_vector()
Expand All @@ -764,18 +764,18 @@ def test_vector(self):
uncompressed = ""
for val in list_vector:
uncompressed += format(val, "08b")
assert uncompressed[:-padding] == "0111111100000"
assert uncompressed[:-padding] == "0111111100001"

# It is worthwhile explicitly showing the values encoded to BSON
padded_doc = {"padded_vec": padded_vec}
assert (
encode(padded_doc)
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x07\x00"
== b"\x1a\x00\x00\x00\x05padded_vec\x00\x04\x00\x00\x00\t\x10\x03\x7f\x08\x00"
)
# and dumped to json
assert (
json_util.dumps(padded_doc)
== '{"padded_vec": {"$binary": {"base64": "EAN/Bw==", "subType": "09"}}}'
== '{"padded_vec": {"$binary": {"base64": "EAN/CA==", "subType": "09"}}}'
)

# FLOAT32 is also implemented
Expand All @@ -791,8 +791,15 @@ def test_vector(self):
else:
self.fail("Failed to raise an exception.")

# Test form of Binary.from_vector(BinaryVector)
# Test one must pass zeros for all ignored bits
try:
Binary.from_vector([255], BinaryVectorDtype.PACKED_BIT, padding=7)
except Exception as exc:
self.assertIsInstance(exc, ValueError)
else:
self.fail("Failed to raise an exception.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assertion should use assertRaises.


# Test form of Binary.from_vector(BinaryVector)
assert padded_vec == Binary.from_vector(
BinaryVector(list_vector, BinaryVectorDtype.PACKED_BIT, padding)
)
Expand Down
Loading