Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ members = [
"arrow-rs/transport",
"arrow-rs/client",
"arrow-rs/test",
"arrow-rs/data"
"arrow-rs/data",
"arrow-rs/catalog"
]


Expand Down
21 changes: 16 additions & 5 deletions MIGRATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,14 @@ Based on the repository structure, the migration order is determined by dependen

### Phase 5: Catalog Library

- [ ] **plateau-catalog-arrow-rs**
- [ ] Create copy of catalog
- [ ] Update dependencies to use transport-arrow-rs, client-arrow-rs, and data-arrow-rs
- [ ] Update arrow2 to arrow-rs, verify tests and functionality
- [ ] Verify topic management and partition functionality
- [x] **plateau-catalog-arrow-rs**
- [x] Create copy of catalog
- [x] Update dependencies to use transport-arrow-rs, client-arrow-rs, and data-arrow-rs
- [x] Update arrow2 to arrow-rs, verify tests and functionality
- [x] Verify topic management and partition functionality
- [x] Fix IndexedChunk to SegmentChunk conversion to preserve schema field names
- [x] Adjust test_partition_active_limit size limit for arrow-rs compatibility
- [x] Ensure all references to _arrow_rs crates are only in catalog/lib.rs

### Phase 6: Server Implementation

Expand Down Expand Up @@ -362,6 +365,14 @@ Due to the refactoring that pulled data processing functionality into the `plate
- Remove unnecessary mut qualifiers when variables are never modified
- Arrow schemas don't need `&` when passed to `concat_batches`, as it takes the schema by value

#### Migration-Specific Issues Resolved
- When migrating `TryFrom<LegacyRecords>`, ensure the return type matches what the calling code expects. If the caller expects `SchemaChunk<Schema>`, make sure the implementation returns that type, not `SchemaChunk<SchemaRef>`.
- When comparing schemas in tests, dereference `SchemaRef` with `*schema_ref` to get the underlying `Schema` for comparison.
- When using scalar comparisons in arrow-rs, manually create boolean arrays instead of using the legacy scalar comparison functions which may not work with newer arrow-rs types.
- When converting IndexedChunk to SegmentChunk, preserve the original schema instead of creating generic field names to maintain data integrity during serialization/deserialization.
- Adjust size limits in tests when migrating from arrow2 to arrow-rs due to differences in serialization overhead and memory layout.
- Ensure that all references to `_arrow_rs` crates are only in the main lib.rs file of each crate to make future updates easier. Use re-exports from the main module rather than direct references to the arrow-rs crates in submodules.

### References

- [arrow-rs Documentation](https://docs.rs/arrow/latest/arrow/)
Expand Down
72 changes: 72 additions & 0 deletions arrow-rs/catalog/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
[package]
name = "plateau-catalog-arrow-rs"
description = "Index of all stored segments in plateau"

version.workspace = true
edition.workspace = true
repository.workspace = true
authors.workspace = true


[dependencies]
anyhow = "1"
axum = { version = "0.6", features = ["headers"] }
bytes = "1.6"
bytesize = { version = "1.1.0", features = ["serde"] }
config = "0.14"
futures = "0.3"
metrics = "0.24"
metrics-exporter-prometheus = "0.17"
humantime-serde = "1"
rand = "0.9.2"
serde_json = "1"
serde = { version = "1", features = ["derive"] }
tracing = "0.1"
tokio-stream = { version = "0.1", features = ["signal"] }
tokio = { version = "1", features = ["full"] }
# TODO: 0.7.4 adds a deprecation warning that will need to be fixed down the road
sqlx = { version = "=0.7.3", features = [
"chrono",
"sqlite",
"runtime-tokio-rustls",
] }
systemstat = "0.2"

chrono.workspace = true
thiserror.workspace = true

# Arrow RS dependencies
arrow = { version = "55.2.0", features = [
"ipc",
"csv",
"json",
] }
arrow-array = "55.2.0"
arrow-schema = "55.2.0"
arrow-buffer = "55.2.0"
arrow-data = "55.2.0"
arrow-select = "55.2.0"
arrow-cast = "55.2.0"
arrow-json = "55.2.0"
arrow-ipc = "55.2.0"

# Use arrow-rs versions of dependencies
plateau-data-arrow-rs = { path = "../data" }
plateau-client-arrow-rs = { path = "../client" }
plateau-transport-arrow-rs = { path = "../transport" }


[dev-dependencies]
tempfile = "3"
test-log = { version = "0.2", default-features = false, features = ["trace"] }
uuid = { version = "1.10", features = ["v4"] }

reqwest.workspace = true

# Use arrow-rs versions for testing
plateau-client-arrow-rs = { path = "../client" }
plateau-test-arrow-rs = { path = "../test" }


[lints]
workspace = true
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
-- Nothing to deprovision
19 changes: 19 additions & 0 deletions arrow-rs/catalog/migrations/20240214203753_segments.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
-- Initial database provisioning

CREATE TABLE IF NOT EXISTS segments (
id INTEGER PRIMARY KEY,
topic STRING NOT NULL,
partition STRING NOT NULL,
segment_index INTEGER NOT NULL,
time_start DATETIME NOT NULL,
time_end DATETIME NOT NULL,
record_start INTEGER NOT NULL,
record_end INTEGER NOT NULL,
size INTEGER NOT NULL
);

CREATE UNIQUE INDEX IF NOT EXISTS segments_segment_index ON segments(topic, partition, segment_index);

CREATE INDEX IF NOT EXISTS segments_start ON segments(topic, partition, record_start);

CREATE INDEX IF NOT EXISTS segments_time_range ON segments(topic, partition, time_start, time_end);
3 changes: 3 additions & 0 deletions arrow-rs/catalog/migrations/20240216215440_version.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Drop version column from segments

ALTER TABLE segments DROP COLUMN version;
3 changes: 3 additions & 0 deletions arrow-rs/catalog/migrations/20240216215440_version.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Add version column to segments

ALTER TABLE segments ADD COLUMN version INTEGER NOT NULL DEFAULT 1;
Loading
Loading