Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions rust/sedona-spatial-join/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -107,3 +107,8 @@ harness = false
name = "stream_repartitioner"
path = "bench/partitioning/stream_repartitioner.rs"
harness = false

[[bench]]
name = "flat_vs_rtree"
path = "bench/partitioning/flat_vs_rtree.rs"
harness = false
6 changes: 3 additions & 3 deletions rust/sedona-spatial-join/bench/partitioning/flat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@
// specific language governing permissions and limitations
// under the License.

mod common;

use std::hint::black_box;

use common::{default_extent, grid_partitions, sample_queries, GRID_DIM, QUERY_BATCH_SIZE};
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
use sedona_spatial_join::partitioning::{flat::FlatPartitioner, SpatialPartitioner};
use sedona_spatial_join::utils::internal_benchmark_util::{
default_extent, grid_partitions, sample_queries, GRID_DIM, QUERY_BATCH_SIZE,
};

fn bench_flat_partition_queries(c: &mut Criterion) {
let extent = default_extent();
Expand Down
86 changes: 86 additions & 0 deletions rust/sedona-spatial-join/bench/partitioning/flat_vs_rtree.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! Head-to-head benchmark of FlatPartitioner vs RTreePartitioner across
//! varying partition counts to find the optimal switch point.

use std::hint::black_box;

use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use sedona_spatial_join::partitioning::{
flat::FlatPartitioner, rtree::RTreePartitioner, SpatialPartitioner,
};
use sedona_spatial_join::utils::internal_benchmark_util::{
default_extent, grid_partitions, sample_queries, QUERY_BATCH_SIZE,
};

/// Grid dimensions to benchmark. Each produces dim*dim partitions.
/// 4x4=16, 5x5=25, 6x6=36, 8x8=64, 10x10=100, 16x16=256, 20x20=400
const GRID_DIMS: [usize; 7] = [4, 5, 6, 8, 10, 16, 20];

fn bench_flat_vs_rtree(c: &mut Criterion) {
let extent = default_extent();

let mut group = c.benchmark_group("flat_vs_rtree");
group.throughput(Throughput::Elements(QUERY_BATCH_SIZE as u64));

for &dim in &GRID_DIMS {
let num_partitions = dim * dim;
let partitions = grid_partitions(&extent, dim);
let queries = sample_queries(&extent, QUERY_BATCH_SIZE);

let flat =
FlatPartitioner::try_new(partitions.clone()).expect("failed to build FlatPartitioner");
let rtree =
RTreePartitioner::try_new(partitions).expect("failed to build RTreePartitioner");

group.bench_with_input(
BenchmarkId::new("flat", num_partitions),
&flat,
|b, partitioner| {
b.iter(|| {
for query in &queries {
let result = partitioner
.partition(black_box(query))
.expect("partition failed");
black_box(result);
}
});
},
);

group.bench_with_input(
BenchmarkId::new("rtree", num_partitions),
&rtree,
|b, partitioner| {
b.iter(|| {
for query in &queries {
let result = partitioner
.partition(black_box(query))
.expect("partition failed");
black_box(result);
}
});
},
);
}

group.finish();
}

criterion_group!(flat_vs_rtree, bench_flat_vs_rtree);
criterion_main!(flat_vs_rtree);
6 changes: 3 additions & 3 deletions rust/sedona-spatial-join/bench/partitioning/rtree.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@
// specific language governing permissions and limitations
// under the License.

mod common;

use std::hint::black_box;

use common::{default_extent, grid_partitions, sample_queries, GRID_DIM, QUERY_BATCH_SIZE};
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use sedona_geometry::bounding_box::BoundingBox;
use sedona_spatial_join::partitioning::{rtree::RTreePartitioner, SpatialPartitioner};
use sedona_spatial_join::utils::internal_benchmark_util::{
default_extent, grid_partitions, sample_queries, GRID_DIM, QUERY_BATCH_SIZE,
};
const NODE_SIZES: [u16; 5] = [4, 8, 16, 32, 64]; // smaller node size => deeper tree

fn bench_rtree_partition_queries(c: &mut Criterion) {
Expand Down
115 changes: 0 additions & 115 deletions rust/sedona-spatial-join/src/build_index.rs

This file was deleted.

20 changes: 16 additions & 4 deletions rust/sedona-spatial-join/src/prepare.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ use crate::{
flat::FlatPartitioner,
kdb::KDBPartitioner,
round_robin::RoundRobinPartitioner,
rtree::RTreePartitioner,
stream_repartitioner::{SpilledPartition, SpilledPartitions, StreamRepartitioner},
PartitionedSide, SpatialPartition, SpatialPartitioner,
},
Expand Down Expand Up @@ -296,8 +297,14 @@ impl SpatialJoinComponentsBuilder {
Ok(build_partitioner)
}

/// Construct a `SpatialPartitioner` (e.g. Flat) from the statistics of partitioned build
/// side for partitioning the probe side.
/// The number of partitions above which the probe side uses an RTree
/// partitioner instead of a flat (linear-scan) partitioner. Benchmarks
/// show the crossover at ~36 partitions; 48 gives a comfortable margin.
const RTREE_PARTITION_THRESHOLD: usize = 48;

/// Construct a `SpatialPartitioner` for partitioning the probe side.
/// Uses a flat linear-scan partitioner when the number of partitions is
/// small, and switches to an RTree-based partitioner for larger counts.
fn create_spatial_partitioner_for_probe_side(
&self,
num_partitions: usize,
Expand All @@ -309,7 +316,7 @@ impl SpatialJoinComponentsBuilder {
) {
Box::new(BroadcastPartitioner::new(num_partitions))
} else {
// Build a flat partitioner using these partitions
// Collect partition bounding boxes from the spilled partitions
let mut partition_bounds = Vec::with_capacity(num_partitions);
for k in 0..num_partitions {
let partition = SpatialPartition::Regular(k as u32);
Expand All @@ -320,7 +327,12 @@ impl SpatialJoinComponentsBuilder {
.unwrap_or(BoundingBox::empty());
partition_bounds.push(partition_bound);
}
Box::new(FlatPartitioner::try_new(partition_bounds)?)

if num_partitions <= Self::RTREE_PARTITION_THRESHOLD {
Box::new(FlatPartitioner::try_new(partition_bounds)?)
} else {
Box::new(RTreePartitioner::try_new(partition_bounds)?)
}
};
Ok(probe_partitioner)
}
Expand Down
1 change: 1 addition & 0 deletions rust/sedona-spatial-join/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ pub(crate) mod arrow_utils;
pub(crate) mod bbox_sampler;
pub(crate) mod disposable_async_cell;
pub(crate) mod init_once_array;
pub mod internal_benchmark_util;
pub(crate) mod join_utils;
pub(crate) mod once_fut;
pub(crate) mod spill;
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
// under the License.

//! Shared helpers for partitioner benchmarks.
//!
//! This module is **not** part of the public API and exists only to share
//! utility code across benchmark binaries without resorting to a
//! module-level `#![allow(dead_code)]`.

use rand::{rngs::StdRng, RngExt, SeedableRng};
use sedona_geometry::{bounding_box::BoundingBox, interval::IntervalTrait};

pub const GRID_DIM: usize = 4; // 4x4 grid => 16 partitions like typical workloads
Expand Down Expand Up @@ -59,22 +62,27 @@ pub fn grid_partitions(extent: &BoundingBox, cells_per_axis: usize) -> Vec<Bound
}

pub fn sample_queries(extent: &BoundingBox, batch_size: usize) -> Vec<BoundingBox> {
let mut rng = StdRng::seed_from_u64(RNG_SEED);
let mut rng = fastrand::Rng::with_seed(RNG_SEED);
let characteristic_span = extent_span(extent) / 8.0;
(0..batch_size)
.map(|_| random_bbox(extent, &mut rng, characteristic_span))
.collect()
}

fn random_bbox(extent: &BoundingBox, rng: &mut impl RngExt, max_span: f64) -> BoundingBox {
/// Generate a random f64 in `[lo, hi]`.
fn random_f64_range(rng: &mut fastrand::Rng, lo: f64, hi: f64) -> f64 {
lo + rng.f64() * (hi - lo)
}

fn random_bbox(extent: &BoundingBox, rng: &mut fastrand::Rng, max_span: f64) -> BoundingBox {
let (min_x, max_x) = (extent.x().lo(), extent.x().hi());
let (min_y, max_y) = (extent.y().lo(), extent.y().hi());

let span_x = rng.random_range(0.01..max_span).min(max_x - min_x);
let span_y = rng.random_range(0.01..max_span).min(max_y - min_y);
let span_x = random_f64_range(rng, 0.01, max_span).min(max_x - min_x);
let span_y = random_f64_range(rng, 0.01, max_span).min(max_y - min_y);

let start_x = rng.random_range(min_x..=max_x - span_x);
let start_y = rng.random_range(min_y..=max_y - span_y);
let start_x = random_f64_range(rng, min_x, max_x - span_x);
let start_y = random_f64_range(rng, min_y, max_y - span_y);

BoundingBox::xy((start_x, start_x + span_x), (start_y, start_y + span_y))
}
Expand Down