Skip to content

Commit ee1e166

Browse files
feat(vortex-geo): native Geometry extension type with point support
Add a single logical extension type `vortex.geo.geometry` for GeoArrow-native geometry. The geometry kind (point, linestring, ...) and the CRS live in the extension metadata; the storage dtype is the kind's GeoArrow separated coordinate layout, and the coordinate dimension is recovered from the storage field names. Only point columns are supported end to end so far; other kinds are rejected at dtype construction until their scalar unpacking and kernels exist. Add a `vortex.geo.distance` scalar function computing planar (Euclidean) distance. The signature takes geometry operands and execution dispatches on their kinds, with a point x point kernel; operands are type-checked at construction to be non-nullable point columns sharing a CRS. Signed-off-by: Nemo Yu <zyu379@wisc.edu>
1 parent 8acef3a commit ee1e166

9 files changed

Lines changed: 1148 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-geo/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ wkb = { workspace = true }
2626
[dev-dependencies]
2727
geo-traits = { workspace = true }
2828
geo-types = { workspace = true }
29+
rstest = { workspace = true }
2930

3031
[lints]
3132
workspace = true
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Coordinate building blocks for the [`Geometry`](crate::extension::Geometry) extension type:
5+
//! the `Struct<x: f64, y: f64, {z: f64}, {m: f64}>` storage, its [`Dimension`], and the decoded
6+
//! [`Coordinate`] value.
7+
//!
8+
//! The coordinate fields, where `{}` marks a field that may be absent (every field present is
9+
//! non-nullable), are:
10+
//! - `x` — longitude or easting
11+
//! - `y` — latitude or northing
12+
//! - `z` — elevation
13+
//! - `m` — measure: an arbitrary per-point value such as distance along a route or a timestamp
14+
15+
use std::fmt::Display;
16+
use std::fmt::Formatter;
17+
18+
use vortex_array::ArrayRef;
19+
use vortex_array::ExecutionCtx;
20+
use vortex_array::arrays::ExtensionArray;
21+
use vortex_array::arrays::PrimitiveArray;
22+
use vortex_array::arrays::StructArray;
23+
use vortex_array::arrays::extension::ExtensionArrayExt;
24+
use vortex_array::arrays::struct_::StructArrayExt;
25+
use vortex_array::dtype::DType;
26+
use vortex_array::dtype::FieldNames;
27+
use vortex_array::dtype::Nullability;
28+
use vortex_array::dtype::PType;
29+
use vortex_array::scalar::Scalar;
30+
use vortex_error::VortexResult;
31+
use vortex_error::vortex_bail;
32+
use vortex_error::vortex_ensure;
33+
use vortex_error::vortex_err;
34+
35+
use crate::extension::Geometry;
36+
use crate::extension::GeometryKind;
37+
38+
/// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`.
39+
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
40+
pub(crate) enum Dimension {
41+
/// 2D: `x`, `y`.
42+
Xy,
43+
/// 3D with elevation: `x`, `y`, `z`.
44+
Xyz,
45+
/// 3D with a measure: `x`, `y`, `m`.
46+
Xym,
47+
/// 4D: `x`, `y`, `z`, `m`.
48+
Xyzm,
49+
}
50+
51+
impl Dimension {
52+
/// Recover the dimension from a coordinate's field names, in GeoArrow order.
53+
pub(crate) fn from_field_names(names: &FieldNames) -> VortexResult<Dimension> {
54+
let mut strs = [""; 4];
55+
vortex_ensure!(
56+
names.len() <= strs.len(),
57+
"not a valid GeoArrow coordinate dimension: {names:?}"
58+
);
59+
for (slot, name) in strs.iter_mut().zip(names.iter()) {
60+
*slot = name.as_ref();
61+
}
62+
Ok(match &strs[..names.len()] {
63+
["x", "y"] => Dimension::Xy,
64+
["x", "y", "z"] => Dimension::Xyz,
65+
["x", "y", "m"] => Dimension::Xym,
66+
["x", "y", "z", "m"] => Dimension::Xyzm,
67+
_ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"),
68+
})
69+
}
70+
}
71+
72+
/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them.
73+
///
74+
/// This is the native value produced when unpacking a point-kind
75+
/// [`Geometry`](crate::extension::Geometry) scalar; the rest of the coordinate machinery is
76+
/// crate-internal.
77+
#[derive(Debug, Clone, Copy, PartialEq)]
78+
pub struct Coordinate {
79+
/// The x (longitude/easting) ordinate.
80+
pub x: f64,
81+
/// The y (latitude/northing) ordinate.
82+
pub y: f64,
83+
/// The optional `z` (elevation) ordinate.
84+
pub z: Option<f64>,
85+
/// The optional `m` (measure) ordinate.
86+
pub m: Option<f64>,
87+
}
88+
89+
impl Coordinate {
90+
/// A 2D coordinate (`z`/`m` unset).
91+
pub fn xy(x: f64, y: f64) -> Self {
92+
Coordinate {
93+
x,
94+
y,
95+
z: None,
96+
m: None,
97+
}
98+
}
99+
}
100+
101+
impl Display for Coordinate {
102+
fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result {
103+
match (self.z, self.m) {
104+
(None, None) => write!(fmt, "POINT({} {})", self.x, self.y),
105+
(Some(z), None) => write!(fmt, "POINT Z ({} {} {})", self.x, self.y, z),
106+
(None, Some(m)) => write!(fmt, "POINT M ({} {} {})", self.x, self.y, m),
107+
(Some(z), Some(m)) => write!(fmt, "POINT ZM ({} {} {} {})", self.x, self.y, z, m),
108+
}
109+
}
110+
}
111+
112+
/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its
113+
/// [`Dimension`]. Any of the four GeoArrow dimensions validates.
114+
pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> {
115+
let DType::Struct(fields, _) = dtype else {
116+
vortex_bail!("coordinate storage must be a Struct, was {dtype}");
117+
};
118+
for (name, field) in fields.names().iter().zip(fields.fields()) {
119+
vortex_ensure!(
120+
matches!(
121+
field,
122+
DType::Primitive(PType::F64, Nullability::NonNullable)
123+
),
124+
"coordinate field {name} must be non-nullable f64, was {field}"
125+
);
126+
}
127+
Dimension::from_field_names(fields.names())
128+
}
129+
130+
/// Decode a [`Coordinate`] from a coordinate `Struct<x, y, {z}, {m}>` scalar (`z`/`m` read iff
131+
/// present, so the same decoder serves every dimension).
132+
pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> {
133+
let fields = scalar.as_struct();
134+
let required = |name: &str| -> VortexResult<f64> {
135+
f64::try_from(
136+
&fields
137+
.field(name)
138+
.ok_or_else(|| vortex_err!("coordinate missing {name}"))?,
139+
)
140+
};
141+
let optional = |name: &str| -> VortexResult<Option<f64>> {
142+
fields
143+
.field(name)
144+
.map(|value| f64::try_from(&value))
145+
.transpose()
146+
};
147+
Ok(Coordinate {
148+
x: required("x")?,
149+
y: required("y")?,
150+
z: optional("z")?,
151+
m: optional("m")?,
152+
})
153+
}
154+
155+
/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate
156+
/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns.
157+
pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> {
158+
match scalar.as_extension_opt() {
159+
Some(ext_scalar) => coordinate_from_struct(&ext_scalar.to_storage_scalar()),
160+
None => coordinate_from_struct(scalar),
161+
}
162+
}
163+
164+
/// Validated, executed `x`/`y` columns of a point array. The bulk counterpart to [`Coordinate`];
165+
/// `z`/`m` are not executed.
166+
pub(crate) struct ParsedCoordinates {
167+
/// The flat `f64` `x` column.
168+
pub(crate) xs: PrimitiveArray,
169+
/// The flat `f64` `y` column.
170+
pub(crate) ys: PrimitiveArray,
171+
}
172+
173+
/// Validate a point column's geometry kind and coordinate storage (layout and non-nullability),
174+
/// then execute its `x`/`y` columns.
175+
pub(crate) fn parse_storage(
176+
points: &ArrayRef,
177+
ctx: &mut ExecutionCtx,
178+
) -> VortexResult<ParsedCoordinates> {
179+
if let Some(ext) = points.dtype().as_extension_opt()
180+
&& ext.is::<Geometry>()
181+
{
182+
let kind = ext.metadata::<Geometry>().kind()?;
183+
vortex_ensure!(
184+
kind == GeometryKind::Point,
185+
"expected a point column, was {kind}"
186+
);
187+
}
188+
let storage = points
189+
.clone()
190+
.execute::<ExtensionArray>(ctx)?
191+
.storage_array()
192+
.clone()
193+
.execute::<StructArray>(ctx)?;
194+
coordinate_dimension(storage.dtype())?;
195+
vortex_ensure!(
196+
!storage.dtype().is_nullable(),
197+
"coordinate storage must be non-nullable to read unmasked ordinates, was {}",
198+
storage.dtype()
199+
);
200+
let xs = storage
201+
.unmasked_field_by_name("x")?
202+
.clone()
203+
.execute::<PrimitiveArray>(ctx)?;
204+
let ys = storage
205+
.unmasked_field_by_name("y")?
206+
.clone()
207+
.execute::<PrimitiveArray>(ctx)?;
208+
Ok(ParsedCoordinates { xs, ys })
209+
}
210+
211+
#[cfg(test)]
212+
mod tests {
213+
use vortex_array::IntoArray;
214+
use vortex_array::VortexSessionExecute;
215+
use vortex_array::arrays::ExtensionArray;
216+
use vortex_array::arrays::PrimitiveArray;
217+
use vortex_array::arrays::StructArray;
218+
use vortex_array::dtype::FieldNames;
219+
use vortex_array::session::ArraySession;
220+
use vortex_array::validity::Validity;
221+
use vortex_error::VortexResult;
222+
use vortex_session::VortexSession;
223+
224+
use super::Coordinate;
225+
use super::parse_storage;
226+
use crate::extension::Geometry;
227+
use crate::extension::GeometryKind;
228+
229+
/// Display emits WKT, including `z`/`m` when present.
230+
#[test]
231+
fn display_is_wkt() {
232+
let coordinate = |z, m| Coordinate {
233+
x: 1.0,
234+
y: 2.0,
235+
z,
236+
m,
237+
};
238+
assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)");
239+
assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)");
240+
assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)");
241+
assert_eq!(
242+
coordinate(Some(3.0), Some(4.0)).to_string(),
243+
"POINT ZM (1 2 3 4)"
244+
);
245+
}
246+
247+
/// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must
248+
/// be rejected at parse time rather than decoding null rows as garbage ordinates.
249+
#[test]
250+
fn parse_rejects_nullable_points() -> VortexResult<()> {
251+
let session = VortexSession::empty().with::<ArraySession>();
252+
let mut ctx = session.create_execution_ctx();
253+
254+
let storage = StructArray::try_new(
255+
FieldNames::from(["x", "y"]),
256+
vec![
257+
PrimitiveArray::from_iter(vec![1.0f64]).into_array(),
258+
PrimitiveArray::from_iter(vec![2.0f64]).into_array(),
259+
],
260+
1,
261+
Validity::AllValid,
262+
)?
263+
.into_array();
264+
let dtype = Geometry::dtype(GeometryKind::Point, None, storage.dtype().clone())?;
265+
let points = ExtensionArray::new(dtype.erased(), storage).into_array();
266+
267+
assert!(parse_storage(&points, &mut ctx).is_err());
268+
Ok(())
269+
}
270+
}

0 commit comments

Comments
 (0)