Skip to content

Commit fd8276b

Browse files
authored
VER: Release 0.43.1
See release notes.
2 parents dba53d7 + f7cf43b commit fd8276b

File tree

7 files changed

+56
-11
lines changed

7 files changed

+56
-11
lines changed

CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,14 @@
11
# Changelog
22

3+
## 0.43.1 - 2024-10-15
4+
5+
#### Enhancements
6+
- Keyword arguments to `DBNStore.to_parquet` will now allow `where` and `schema` to be specified
7+
- Improved record processing time for the `Live` client
8+
9+
#### Bug fixes
10+
- Fixed an issue where validating the checksum of a batch file loaded the entire file into memory
11+
312
## 0.43.0 - 2024-10-09
413

514
This release drops support for Python 3.8 which has reached end-of-life.

databento/common/dbnstore.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -963,7 +963,7 @@ def to_df(
963963
def to_parquet(
964964
self,
965965
path: PathLike[str] | str,
966-
price_type: Literal["fixed", "float"] = "float",
966+
price_type: PriceType | str = PriceType.FLOAT,
967967
pretty_ts: bool = True,
968968
map_symbols: bool = True,
969969
schema: Schema | str | None = None,
@@ -992,6 +992,9 @@ def to_parquet(
992992
This is only required when reading a DBN stream with mixed record types.
993993
mode : str, default "w"
994994
The file write mode to use, either "x" or "w".
995+
**kwargs : Any
996+
Keyword arguments to pass to the `pyarrow.parquet.ParquetWriter`.
997+
These can be used to override the default behavior of the writer.
995998
996999
Raises
9971000
------
@@ -1000,10 +1003,12 @@ def to_parquet(
10001003
If the DBN schema is unspecified and cannot be determined.
10011004
10021005
"""
1003-
if price_type == "decimal":
1006+
file_path = validate_file_write_path(path, "path", exist_ok=mode == "w")
1007+
price_type = validate_enum(price_type, PriceType, "price_type")
1008+
1009+
if price_type == PriceType.DECIMAL:
10041010
raise ValueError("the 'decimal' price type is not currently supported")
10051011

1006-
file_path = validate_file_write_path(path, "path", exist_ok=mode == "w")
10071012
schema = validate_maybe_enum(schema, Schema, "schema")
10081013
if schema is None:
10091014
if self.schema is None:
@@ -1025,8 +1030,8 @@ def to_parquet(
10251030
# Initialize the writer using the first DataFrame
10261031
parquet_schema = pa.Schema.from_pandas(frame)
10271032
writer = pq.ParquetWriter(
1028-
where=file_path,
1029-
schema=parquet_schema,
1033+
where=kwargs.pop("where", file_path),
1034+
schema=kwargs.pop("schema", parquet_schema),
10301035
**kwargs,
10311036
)
10321037
writer.write_table(

databento/historical/api/batch.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -431,7 +431,11 @@ def _download_batch_file(
431431
hash_algo, _, hash_hex = batch_download_file.hash_str.partition(":")
432432

433433
if hash_algo == "sha256":
434-
output_hash = hashlib.sha256(output_path.read_bytes())
434+
output_hash = hashlib.new(hash_algo)
435+
with open(output_path, "rb") as fd:
436+
while chunk := fd.read(32_000_000):
437+
output_hash.update(chunk)
438+
435439
if output_hash.hexdigest() != hash_hex:
436440
warn_msg = f"Downloaded file failed checksum validation: {output_path.name}"
437441
logger.warning(warn_msg)

databento/live/session.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ def __init__(
199199
self._metadata: SessionMetadata = metadata
200200
self._user_callbacks = user_callbacks
201201
self._user_streams = user_streams
202-
self._last_ts_event: pd.Timestamp | None = None
202+
self._last_ts_event: int | None = None
203203

204204
def received_metadata(self, metadata: databento_dbn.Metadata) -> None:
205205
if self._metadata:
@@ -228,7 +228,7 @@ def received_record(self, record: DBNRecord) -> None:
228228
self._dispatch_callbacks(record)
229229
if self._dbn_queue.is_enabled():
230230
self._queue_for_iteration(record)
231-
self._last_ts_event = record.pretty_ts_event
231+
self._last_ts_event = record.ts_event
232232

233233
return super().received_record(record)
234234

@@ -653,7 +653,7 @@ async def _reconnect(self) -> None:
653653
if self._protocol._last_ts_event is not None:
654654
gap_start = pd.Timestamp(self._protocol._last_ts_event, tz="UTC")
655655
elif self._metadata.data is not None:
656-
gap_start = self._metadata.data.start
656+
gap_start = pd.Timestamp(self._metadata.data.start, tz="UTC")
657657
else:
658658
gap_start = pd.Timestamp.utcnow()
659659

databento/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.43.0"
1+
__version__ = "0.43.1"

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "databento"
3-
version = "0.43.0"
3+
version = "0.43.1"
44
description = "Official Python client library for Databento"
55
authors = [
66
"Databento <[email protected]>",

tests/test_historical_bento.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,33 @@ def test_to_parquet(
731731
pd.testing.assert_frame_equal(actual, expected)
732732

733733

734+
def test_to_parquet_kwargs(
735+
monkeypatch: pytest.MonkeyPatch,
736+
tmp_path: Path,
737+
test_data: Callable[[Dataset, Schema], bytes],
738+
) -> None:
739+
# Arrange
740+
monkeypatch.setattr(databento.common.dbnstore, "PARQUET_CHUNK_SIZE", 1)
741+
stub_data = test_data(Dataset.GLBX_MDP3, Schema.MBO)
742+
data = DBNStore.from_bytes(data=stub_data)
743+
parquet_file = tmp_path / "test.parquet"
744+
745+
# Act
746+
expected = data.to_df()
747+
data.to_parquet(
748+
parquet_file,
749+
compression="zstd",
750+
write_statistics="false",
751+
)
752+
actual = pd.read_parquet(parquet_file)
753+
754+
# Replace None values with np.nan
755+
actual.fillna(value=np.nan)
756+
757+
# Assert
758+
pd.testing.assert_frame_equal(actual, expected)
759+
760+
734761
@pytest.mark.parametrize(
735762
"expected_schema",
736763
[pytest.param(schema, id=str(schema)) for schema in Schema.variants()],

0 commit comments

Comments
 (0)