|
| 1 | +// SPDX-License-Identifier: Apache-2.0 |
| 2 | +// SPDX-FileCopyrightText: Copyright the Vortex contributors |
| 3 | + |
| 4 | +//! Coordinate building blocks for the [`Geometry`](crate::extension::Geometry) extension type: |
| 5 | +//! the `Struct<x: f64, y: f64, {z: f64}, {m: f64}>` storage, its [`Dimension`], and the decoded |
| 6 | +//! [`Coordinate`] value. |
| 7 | +//! |
| 8 | +//! The coordinate fields, where `{}` marks a field that may be absent (every field present is |
| 9 | +//! non-nullable), are: |
| 10 | +//! - `x` — longitude or easting |
| 11 | +//! - `y` — latitude or northing |
| 12 | +//! - `z` — elevation |
| 13 | +//! - `m` — measure: an arbitrary per-point value such as distance along a route or a timestamp |
| 14 | +
|
| 15 | +use std::fmt::Display; |
| 16 | +use std::fmt::Formatter; |
| 17 | + |
| 18 | +use vortex_array::ArrayRef; |
| 19 | +use vortex_array::ExecutionCtx; |
| 20 | +use vortex_array::arrays::ExtensionArray; |
| 21 | +use vortex_array::arrays::PrimitiveArray; |
| 22 | +use vortex_array::arrays::StructArray; |
| 23 | +use vortex_array::arrays::extension::ExtensionArrayExt; |
| 24 | +use vortex_array::arrays::struct_::StructArrayExt; |
| 25 | +use vortex_array::dtype::DType; |
| 26 | +use vortex_array::dtype::FieldNames; |
| 27 | +use vortex_array::dtype::Nullability; |
| 28 | +use vortex_array::dtype::PType; |
| 29 | +use vortex_array::scalar::Scalar; |
| 30 | +use vortex_error::VortexResult; |
| 31 | +use vortex_error::vortex_bail; |
| 32 | +use vortex_error::vortex_ensure; |
| 33 | +use vortex_error::vortex_err; |
| 34 | + |
| 35 | +use crate::extension::Geometry; |
| 36 | +use crate::extension::GeometryKind; |
| 37 | + |
| 38 | +/// Coordinate dimensions, matching GeoArrow. Field order is fixed: `x`, `y`, then `z` before `m`. |
| 39 | +#[derive(Debug, Clone, Copy, PartialEq, Eq)] |
| 40 | +pub(crate) enum Dimension { |
| 41 | + /// 2D: `x`, `y`. |
| 42 | + Xy, |
| 43 | + /// 3D with elevation: `x`, `y`, `z`. |
| 44 | + Xyz, |
| 45 | + /// 3D with a measure: `x`, `y`, `m`. |
| 46 | + Xym, |
| 47 | + /// 4D: `x`, `y`, `z`, `m`. |
| 48 | + Xyzm, |
| 49 | +} |
| 50 | + |
| 51 | +impl Dimension { |
| 52 | + /// Recover the dimension from a coordinate's field names, in GeoArrow order. |
| 53 | + pub(crate) fn from_field_names(names: &FieldNames) -> VortexResult<Dimension> { |
| 54 | + let mut strs = [""; 4]; |
| 55 | + vortex_ensure!( |
| 56 | + names.len() <= strs.len(), |
| 57 | + "not a valid GeoArrow coordinate dimension: {names:?}" |
| 58 | + ); |
| 59 | + for (slot, name) in strs.iter_mut().zip(names.iter()) { |
| 60 | + *slot = name.as_ref(); |
| 61 | + } |
| 62 | + Ok(match &strs[..names.len()] { |
| 63 | + ["x", "y"] => Dimension::Xy, |
| 64 | + ["x", "y", "z"] => Dimension::Xyz, |
| 65 | + ["x", "y", "m"] => Dimension::Xym, |
| 66 | + ["x", "y", "z", "m"] => Dimension::Xyzm, |
| 67 | + _ => vortex_bail!("not a valid GeoArrow coordinate dimension: {names:?}"), |
| 68 | + }) |
| 69 | + } |
| 70 | +} |
| 71 | + |
| 72 | +/// A decoded coordinate. `z`/`m` are `Some` iff the storage dimension includes them. |
| 73 | +/// |
| 74 | +/// This is the native value produced when unpacking a point-kind |
| 75 | +/// [`Geometry`](crate::extension::Geometry) scalar; the rest of the coordinate machinery is |
| 76 | +/// crate-internal. |
| 77 | +#[derive(Debug, Clone, Copy, PartialEq)] |
| 78 | +pub struct Coordinate { |
| 79 | + /// The x (longitude/easting) ordinate. |
| 80 | + pub x: f64, |
| 81 | + /// The y (latitude/northing) ordinate. |
| 82 | + pub y: f64, |
| 83 | + /// The optional `z` (elevation) ordinate. |
| 84 | + pub z: Option<f64>, |
| 85 | + /// The optional `m` (measure) ordinate. |
| 86 | + pub m: Option<f64>, |
| 87 | +} |
| 88 | + |
| 89 | +impl Coordinate { |
| 90 | + /// A 2D coordinate (`z`/`m` unset). |
| 91 | + pub fn xy(x: f64, y: f64) -> Self { |
| 92 | + Coordinate { |
| 93 | + x, |
| 94 | + y, |
| 95 | + z: None, |
| 96 | + m: None, |
| 97 | + } |
| 98 | + } |
| 99 | +} |
| 100 | + |
| 101 | +impl Display for Coordinate { |
| 102 | + fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result { |
| 103 | + match (self.z, self.m) { |
| 104 | + (None, None) => write!(fmt, "POINT({} {})", self.x, self.y), |
| 105 | + (Some(z), None) => write!(fmt, "POINT Z ({} {} {})", self.x, self.y, z), |
| 106 | + (None, Some(m)) => write!(fmt, "POINT M ({} {} {})", self.x, self.y, m), |
| 107 | + (Some(z), Some(m)) => write!(fmt, "POINT ZM ({} {} {} {})", self.x, self.y, z, m), |
| 108 | + } |
| 109 | + } |
| 110 | +} |
| 111 | + |
| 112 | +/// Validate that `dtype` is a coordinate struct of non-nullable `f64` fields, returning its |
| 113 | +/// [`Dimension`]. Any of the four GeoArrow dimensions validates. |
| 114 | +pub(crate) fn coordinate_dimension(dtype: &DType) -> VortexResult<Dimension> { |
| 115 | + let DType::Struct(fields, _) = dtype else { |
| 116 | + vortex_bail!("coordinate storage must be a Struct, was {dtype}"); |
| 117 | + }; |
| 118 | + for (name, field) in fields.names().iter().zip(fields.fields()) { |
| 119 | + vortex_ensure!( |
| 120 | + matches!( |
| 121 | + field, |
| 122 | + DType::Primitive(PType::F64, Nullability::NonNullable) |
| 123 | + ), |
| 124 | + "coordinate field {name} must be non-nullable f64, was {field}" |
| 125 | + ); |
| 126 | + } |
| 127 | + Dimension::from_field_names(fields.names()) |
| 128 | +} |
| 129 | + |
| 130 | +/// Decode a [`Coordinate`] from a coordinate `Struct<x, y, {z}, {m}>` scalar (`z`/`m` read iff |
| 131 | +/// present, so the same decoder serves every dimension). |
| 132 | +pub(crate) fn coordinate_from_struct(scalar: &Scalar) -> VortexResult<Coordinate> { |
| 133 | + let fields = scalar.as_struct(); |
| 134 | + let required = |name: &str| -> VortexResult<f64> { |
| 135 | + f64::try_from( |
| 136 | + &fields |
| 137 | + .field(name) |
| 138 | + .ok_or_else(|| vortex_err!("coordinate missing {name}"))?, |
| 139 | + ) |
| 140 | + }; |
| 141 | + let optional = |name: &str| -> VortexResult<Option<f64>> { |
| 142 | + fields |
| 143 | + .field(name) |
| 144 | + .map(|value| f64::try_from(&value)) |
| 145 | + .transpose() |
| 146 | + }; |
| 147 | + Ok(Coordinate { |
| 148 | + x: required("x")?, |
| 149 | + y: required("y")?, |
| 150 | + z: optional("z")?, |
| 151 | + m: optional("m")?, |
| 152 | + }) |
| 153 | +} |
| 154 | + |
| 155 | +/// Decode a [`Coordinate`] from an extension-typed point scalar (unwrapped to its coordinate |
| 156 | +/// storage) or a bare coordinate `Struct` scalar. The per-row decode used by the distance fns. |
| 157 | +pub(crate) fn coordinate_from_scalar(scalar: &Scalar) -> VortexResult<Coordinate> { |
| 158 | + match scalar.as_extension_opt() { |
| 159 | + Some(ext_scalar) => coordinate_from_struct(&ext_scalar.to_storage_scalar()), |
| 160 | + None => coordinate_from_struct(scalar), |
| 161 | + } |
| 162 | +} |
| 163 | + |
| 164 | +/// Validated, executed `x`/`y` columns of a point array. The bulk counterpart to [`Coordinate`]; |
| 165 | +/// `z`/`m` are not executed. |
| 166 | +pub(crate) struct ParsedCoordinates { |
| 167 | + /// The flat `f64` `x` column. |
| 168 | + pub(crate) xs: PrimitiveArray, |
| 169 | + /// The flat `f64` `y` column. |
| 170 | + pub(crate) ys: PrimitiveArray, |
| 171 | +} |
| 172 | + |
| 173 | +/// Validate a point column's geometry kind and coordinate storage (layout and non-nullability), |
| 174 | +/// then execute its `x`/`y` columns. |
| 175 | +pub(crate) fn parse_storage( |
| 176 | + points: &ArrayRef, |
| 177 | + ctx: &mut ExecutionCtx, |
| 178 | +) -> VortexResult<ParsedCoordinates> { |
| 179 | + if let Some(ext) = points.dtype().as_extension_opt() |
| 180 | + && ext.is::<Geometry>() |
| 181 | + { |
| 182 | + let kind = ext.metadata::<Geometry>().kind()?; |
| 183 | + vortex_ensure!( |
| 184 | + kind == GeometryKind::Point, |
| 185 | + "expected a point column, was {kind}" |
| 186 | + ); |
| 187 | + } |
| 188 | + let storage = points |
| 189 | + .clone() |
| 190 | + .execute::<ExtensionArray>(ctx)? |
| 191 | + .storage_array() |
| 192 | + .clone() |
| 193 | + .execute::<StructArray>(ctx)?; |
| 194 | + coordinate_dimension(storage.dtype())?; |
| 195 | + vortex_ensure!( |
| 196 | + !storage.dtype().is_nullable(), |
| 197 | + "coordinate storage must be non-nullable to read unmasked ordinates, was {}", |
| 198 | + storage.dtype() |
| 199 | + ); |
| 200 | + let xs = storage |
| 201 | + .unmasked_field_by_name("x")? |
| 202 | + .clone() |
| 203 | + .execute::<PrimitiveArray>(ctx)?; |
| 204 | + let ys = storage |
| 205 | + .unmasked_field_by_name("y")? |
| 206 | + .clone() |
| 207 | + .execute::<PrimitiveArray>(ctx)?; |
| 208 | + Ok(ParsedCoordinates { xs, ys }) |
| 209 | +} |
| 210 | + |
| 211 | +#[cfg(test)] |
| 212 | +mod tests { |
| 213 | + use vortex_array::IntoArray; |
| 214 | + use vortex_array::VortexSessionExecute; |
| 215 | + use vortex_array::arrays::ExtensionArray; |
| 216 | + use vortex_array::arrays::PrimitiveArray; |
| 217 | + use vortex_array::arrays::StructArray; |
| 218 | + use vortex_array::dtype::FieldNames; |
| 219 | + use vortex_array::session::ArraySession; |
| 220 | + use vortex_array::validity::Validity; |
| 221 | + use vortex_error::VortexResult; |
| 222 | + use vortex_session::VortexSession; |
| 223 | + |
| 224 | + use super::Coordinate; |
| 225 | + use super::parse_storage; |
| 226 | + use crate::extension::Geometry; |
| 227 | + use crate::extension::GeometryKind; |
| 228 | + |
| 229 | + /// Display emits WKT, including `z`/`m` when present. |
| 230 | + #[test] |
| 231 | + fn display_is_wkt() { |
| 232 | + let coordinate = |z, m| Coordinate { |
| 233 | + x: 1.0, |
| 234 | + y: 2.0, |
| 235 | + z, |
| 236 | + m, |
| 237 | + }; |
| 238 | + assert_eq!(coordinate(None, None).to_string(), "POINT(1 2)"); |
| 239 | + assert_eq!(coordinate(Some(3.0), None).to_string(), "POINT Z (1 2 3)"); |
| 240 | + assert_eq!(coordinate(None, Some(4.0)).to_string(), "POINT M (1 2 4)"); |
| 241 | + assert_eq!( |
| 242 | + coordinate(Some(3.0), Some(4.0)).to_string(), |
| 243 | + "POINT ZM (1 2 3 4)" |
| 244 | + ); |
| 245 | + } |
| 246 | + |
| 247 | + /// [`parse_storage`] reads the coordinate fields unmasked, so a nullable point column must |
| 248 | + /// be rejected at parse time rather than decoding null rows as garbage ordinates. |
| 249 | + #[test] |
| 250 | + fn parse_rejects_nullable_points() -> VortexResult<()> { |
| 251 | + let session = VortexSession::empty().with::<ArraySession>(); |
| 252 | + let mut ctx = session.create_execution_ctx(); |
| 253 | + |
| 254 | + let storage = StructArray::try_new( |
| 255 | + FieldNames::from(["x", "y"]), |
| 256 | + vec![ |
| 257 | + PrimitiveArray::from_iter(vec![1.0f64]).into_array(), |
| 258 | + PrimitiveArray::from_iter(vec![2.0f64]).into_array(), |
| 259 | + ], |
| 260 | + 1, |
| 261 | + Validity::AllValid, |
| 262 | + )? |
| 263 | + .into_array(); |
| 264 | + let dtype = Geometry::dtype(GeometryKind::Point, None, storage.dtype().clone())?; |
| 265 | + let points = ExtensionArray::new(dtype.erased(), storage).into_array(); |
| 266 | + |
| 267 | + assert!(parse_storage(&points, &mut ctx).is_err()); |
| 268 | + Ok(()) |
| 269 | + } |
| 270 | +} |
0 commit comments