33
44use std:: fmt:: Display ;
55use std:: ops:: Deref ;
6+ use std:: sync:: Arc ;
67
8+ use arrow_array:: ArrayRef as ArrowArrayRef ;
9+ use arrow_schema:: DataType ;
10+ use arrow_schema:: Field ;
11+ use arrow_schema:: extension:: ExtensionType ;
12+ use geoarrow:: array:: GenericWkbArray ;
13+ use geoarrow:: array:: IntoArrow ;
14+ use geoarrow:: array:: WkbViewArray ;
15+ use geoarrow:: datatypes:: Crs ;
16+ use geoarrow:: datatypes:: Metadata ;
17+ use geoarrow:: datatypes:: WkbType ;
718use prost:: Message ;
819use vortex_array:: ArrayRef ;
20+ use vortex_array:: ExecutionCtx ;
21+ use vortex_array:: IntoArray ;
922use vortex_array:: arrays:: ExtensionArray ;
1023use vortex_array:: arrays:: extension:: ExtensionArrayExt ;
24+ use vortex_array:: arrow:: ArrowExport ;
25+ use vortex_array:: arrow:: ArrowExportVTable ;
26+ use vortex_array:: arrow:: ArrowImport ;
27+ use vortex_array:: arrow:: ArrowImportVTable ;
28+ use vortex_array:: arrow:: ArrowSession ;
29+ use vortex_array:: arrow:: ArrowSessionExt ;
30+ use vortex_array:: arrow:: FromArrowArray ;
31+ use vortex_array:: dtype:: DType ;
1132use vortex_array:: dtype:: extension:: ExtDType ;
1233use vortex_array:: dtype:: extension:: ExtId ;
1334use vortex_array:: dtype:: extension:: ExtVTable ;
@@ -17,6 +38,8 @@ use vortex_error::VortexResult;
1738use vortex_error:: vortex_bail;
1839use vortex_error:: vortex_ensure;
1940use vortex_error:: vortex_err;
41+ use vortex_session:: registry:: CachedId ;
42+ use vortex_session:: registry:: Id ;
2043use wkb:: reader:: GeometryType ;
2144
2245use crate :: extension:: GeoMetadata ;
@@ -140,3 +163,155 @@ impl ExtVTable for WellKnownBinary {
140163 Wkb :: try_from_bytes ( storage_value. as_binary ( ) . as_slice ( ) )
141164 }
142165}
166+
167+ static ARROW_WKB : CachedId = CachedId :: new ( WkbType :: NAME ) ;
168+
169+ impl ArrowExportVTable for WellKnownBinary {
170+ fn arrow_ext_id ( & self ) -> Id {
171+ * ARROW_WKB
172+ }
173+
174+ fn vortex_id ( & self ) -> Id {
175+ self . id ( )
176+ }
177+
178+ fn to_arrow_field (
179+ & self ,
180+ name : & str ,
181+ dtype : & DType ,
182+ session : & ArrowSession ,
183+ ) -> VortexResult < Option < Field > > {
184+ let ext_type = dtype. as_extension ( ) ;
185+ let geo_metadata = ext_type. metadata :: < WellKnownBinary > ( ) ;
186+
187+ let mut field = session. to_arrow_field ( name, ext_type. storage_dtype ( ) ) ?;
188+ field. try_with_extension_type ( wkb_type ( geo_metadata) ) ?;
189+
190+ Ok ( Some ( field) )
191+ }
192+
193+ fn execute_arrow (
194+ & self ,
195+ array : ArrayRef ,
196+ target : & Field ,
197+ ctx : & mut ExecutionCtx ,
198+ ) -> VortexResult < ArrowExport > {
199+ let is_wkb = array
200+ . dtype ( )
201+ . as_extension_opt ( )
202+ . map ( |ext| ext. is :: < WellKnownBinary > ( ) )
203+ . unwrap_or ( false ) ;
204+ if !is_wkb {
205+ return Ok ( ArrowExport :: Unsupported ( array) ) ;
206+ }
207+
208+ let Ok ( wkb_meta) = target. try_extension_type :: < WkbType > ( ) else {
209+ return Ok ( ArrowExport :: Unsupported ( array) ) ;
210+ } ;
211+
212+ let executed = array. execute :: < ExtensionArray > ( ctx) ?;
213+ let storage = executed. storage_array ( ) . clone ( ) ;
214+
215+ let storage_field = Field :: new (
216+ String :: new ( ) ,
217+ target. data_type ( ) . clone ( ) ,
218+ target. is_nullable ( ) ,
219+ ) ;
220+ let session = ctx. session ( ) . clone ( ) ;
221+ let arrow_storage = session
222+ . arrow ( )
223+ . execute_arrow ( storage, Some ( & storage_field) , ctx) ?;
224+
225+ // Round-trip through the GeoArrow WKB array types: this validates that the storage
226+ // is a binary-family Arrow array and produces the canonical physical representation
227+ // expected for a `WkbType` extension field.
228+ let arrow_ref: ArrowArrayRef = match target. data_type ( ) {
229+ DataType :: Binary => Arc :: new (
230+ GenericWkbArray :: < i32 > :: try_from ( ( arrow_storage. as_ref ( ) , wkb_meta) )
231+ . map_err ( |e| vortex_err ! ( "failed to construct WkbArray: {e}" ) ) ?
232+ . into_arrow ( ) ,
233+ ) ,
234+ DataType :: LargeBinary => Arc :: new (
235+ GenericWkbArray :: < i64 > :: try_from ( ( arrow_storage. as_ref ( ) , wkb_meta) )
236+ . map_err ( |e| vortex_err ! ( "failed to construct LargeWkbArray: {e}" ) ) ?
237+ . into_arrow ( ) ,
238+ ) ,
239+ DataType :: BinaryView => Arc :: new (
240+ WkbViewArray :: try_from ( ( arrow_storage. as_ref ( ) , wkb_meta) )
241+ . map_err ( |e| vortex_err ! ( "failed to construct WkbViewArray: {e}" ) ) ?
242+ . into_arrow ( ) ,
243+ ) ,
244+ _ => unreachable ! ( "target data type was validated above" ) ,
245+ } ;
246+
247+ Ok ( ArrowExport :: Exported ( arrow_ref) )
248+ }
249+ }
250+
251+ impl ArrowImportVTable for WellKnownBinary {
252+ fn arrow_ext_id ( & self ) -> Id {
253+ * ARROW_WKB
254+ }
255+
256+ fn from_arrow_field ( & self , field : & Field ) -> VortexResult < Option < DType > > {
257+ let Ok ( wkb_meta) = field. try_extension_type :: < WkbType > ( ) else {
258+ return Ok ( None ) ;
259+ } ;
260+
261+ let storage_dtype = DType :: Binary ( field. is_nullable ( ) . into ( ) ) ;
262+ Ok ( Some ( DType :: Extension (
263+ ExtDType :: try_with_vtable ( WellKnownBinary , geo_metadata ( & wkb_meta) , storage_dtype) ?
264+ . erased ( ) ,
265+ ) ) )
266+ }
267+
268+ fn from_arrow_array (
269+ & self ,
270+ array : ArrowArrayRef ,
271+ field : & Field ,
272+ dtype : & DType ,
273+ ) -> VortexResult < ArrowImport > {
274+ let Some ( ext_dtype) = dtype. as_extension_opt ( ) else {
275+ return Ok ( ArrowImport :: Unsupported ( array) ) ;
276+ } ;
277+ if !ext_dtype. is :: < WellKnownBinary > ( )
278+ || field. try_extension_type :: < WkbType > ( ) . is_err ( )
279+ || !matches ! (
280+ array. data_type( ) ,
281+ DataType :: Binary | DataType :: LargeBinary | DataType :: BinaryView
282+ )
283+ {
284+ return Ok ( ArrowImport :: Unsupported ( array) ) ;
285+ }
286+
287+ let storage = ArrayRef :: from_arrow ( array. as_ref ( ) , field. is_nullable ( ) ) ?;
288+ Ok ( ArrowImport :: Imported (
289+ ExtensionArray :: new ( ext_dtype. clone ( ) , storage) . into_array ( ) ,
290+ ) )
291+ }
292+ }
293+
294+ fn wkb_type ( geo_metadata : & GeoMetadata ) -> WkbType {
295+ let metadata = Metadata :: new (
296+ geo_metadata
297+ . crs
298+ . as_ref ( )
299+ . map ( |crs| Crs :: from_unknown_crs_type ( crs. to_string ( ) ) )
300+ . unwrap_or_default ( ) ,
301+ None ,
302+ ) ;
303+ WkbType :: new ( Arc :: new ( metadata) )
304+ }
305+
306+ fn geo_metadata ( wkb_type : & WkbType ) -> GeoMetadata {
307+ let crs = wkb_type. metadata ( ) . crs ( ) . crs_value ( ) . map ( |value| {
308+ // `Crs::from_unknown_crs_type` stores the user's string verbatim as a JSON string
309+ // value, so prefer the raw string when available to round-trip cleanly. For other
310+ // CRS encodings (PROJJSON object, etc.), fall back to the JSON-encoded form.
311+ value
312+ . as_str ( )
313+ . map ( str:: to_string)
314+ . unwrap_or_else ( || value. to_string ( ) )
315+ } ) ;
316+ GeoMetadata { crs }
317+ }
0 commit comments