1
1
//! Arrays with owned data
2
+ //!
3
+ //! [`Array`] is a union of all array types supported by `marrow`.
2
4
use half:: f16;
3
5
4
6
use crate :: {
@@ -22,6 +24,8 @@ const _: () = {
22
24
} ;
23
25
24
26
/// An array with owned data
27
+ ///
28
+ /// The corresponding view is [`View`].
25
29
#[ derive( Clone , Debug , PartialEq ) ]
26
30
#[ non_exhaustive]
27
31
pub enum Array {
@@ -199,6 +203,10 @@ impl Array {
199
203
}
200
204
201
205
/// Get the view for this array
206
+ ///
207
+ /// While the data of the arrays is borrowed, any metadata is copied (e.g., field definitions
208
+ /// for structs). The reason is that views may be constructed from foreign Arrow implementation
209
+ /// with different ways to store metadata.
202
210
pub fn as_view ( & self ) -> View < ' _ > {
203
211
match self {
204
212
Self :: Null ( array) => View :: Null ( array. as_view ( ) ) ,
@@ -242,6 +250,8 @@ impl Array {
242
250
}
243
251
244
252
/// An array without data
253
+ ///
254
+ /// The corresponding view is [`NullView`].
245
255
#[ derive( Clone , Debug , PartialEq ) ]
246
256
pub struct NullArray {
247
257
/// The len of the array
@@ -256,6 +266,8 @@ impl NullArray {
256
266
}
257
267
258
268
/// A `bool` array
269
+ ///
270
+ /// The corresponding view is [`BooleanView`].
259
271
#[ derive( Clone , Debug , PartialEq ) ]
260
272
pub struct BooleanArray {
261
273
// Note: len is required to know how many bits of values are used
@@ -285,6 +297,8 @@ impl BooleanArray {
285
297
}
286
298
287
299
/// An array of primitive values
300
+ ///
301
+ /// The corresponding view is [`PrimitiveView`].
288
302
#[ derive( Clone , Debug , PartialEq ) ]
289
303
pub struct PrimitiveArray < T > {
290
304
/// The validity of the elements as a bitmap
@@ -307,6 +321,8 @@ impl<T> PrimitiveArray<T> {
307
321
}
308
322
309
323
/// An array time values (e.g., `"12:53"`)
324
+ ///
325
+ /// The corresponding view is [`TimeView`].
310
326
#[ derive( Debug , Clone , PartialEq ) ]
311
327
pub struct TimeArray < T > {
312
328
/// The time unit of the values
@@ -332,6 +348,8 @@ impl<T> TimeArray<T> {
332
348
}
333
349
334
350
/// An array of timestamps with an optional timezone
351
+ ///
352
+ /// The corresponding view is [`TimestampView`].
335
353
#[ derive( Debug , Clone , PartialEq ) ]
336
354
337
355
pub struct TimestampArray {
@@ -361,6 +379,8 @@ impl TimestampArray {
361
379
}
362
380
363
381
/// An array of structs
382
+ ///
383
+ /// The corresponding view is [`StructView`].
364
384
#[ derive( Clone , Debug , PartialEq ) ]
365
385
pub struct StructArray {
366
386
/// The number of elements in the array
@@ -390,6 +410,8 @@ impl StructArray {
390
410
}
391
411
392
412
/// An array of maps
413
+ ///
414
+ /// The corresponding view is [`MapView`].
393
415
#[ derive( Clone , Debug , PartialEq ) ]
394
416
pub struct MapArray {
395
417
/// The validity of the elements as a bitmap
@@ -451,6 +473,8 @@ impl MapArray {
451
473
/// An array of lists
452
474
///
453
475
/// The value element `i` is given by the pseudo code `elements[offsets[i]..[offsets[i+1]]`
476
+ ///
477
+ /// The corresponding view is [`ListView`].
454
478
#[ derive( Clone , Debug , PartialEq ) ]
455
479
pub struct ListArray < O > {
456
480
/// The validity of the elements as a bitmap
@@ -479,6 +503,10 @@ impl<O> ListArray<O> {
479
503
}
480
504
481
505
/// An array of lists of fixed size
506
+ ///
507
+ /// The value of element `i` is given by pseudo code `elements[(n * i)..(n * (i + 1))]`
508
+ ///
509
+ /// The corresponding view is [`FixedSizeListView`].
482
510
#[ derive( Clone , Debug , PartialEq ) ]
483
511
pub struct FixedSizeListArray {
484
512
/// The number of elements in this array, each a list with `n` children
@@ -512,6 +540,8 @@ impl FixedSizeListArray {
512
540
/// An array of bytes with varying sizes
513
541
///
514
542
/// The value of element `i` can be access by the pseudo code `data[offsets[i]..offsets[i + 1]]`
543
+ ///
544
+ /// The corresponding view is [`BytesView`].
515
545
#[ derive( Clone , Debug , PartialEq ) ]
516
546
pub struct BytesArray < O > {
517
547
/// The validity of the elements as a bitmap
@@ -537,6 +567,8 @@ impl<O> BytesArray<O> {
537
567
}
538
568
539
569
/// An array of byte vectors with fixed length
570
+ ///
571
+ /// The corresponding view is [`FixedSizeBinaryView`].
540
572
#[ derive( Clone , Debug , PartialEq ) ]
541
573
pub struct FixedSizeBinaryArray {
542
574
/// The number of bytes per element
@@ -563,7 +595,9 @@ impl FixedSizeBinaryArray {
563
595
564
596
/// An array of fixed point values
565
597
///
566
- /// The value of element `i` can be computed by the pseudo code: `values[i] * (10 ** -scale)`
598
+ /// The value of element `i` can be computed by the pseudo code: `values[i] * (10).pow(-scale)`
599
+ ///
600
+ /// The corresponding view is [`DecimalView`].
567
601
#[ derive( Clone , Debug , PartialEq ) ]
568
602
pub struct DecimalArray < T > {
569
603
/// The precision, i.e., the number of digits
@@ -594,6 +628,8 @@ impl<T> DecimalArray<T> {
594
628
/// An array that deduplicates elements
595
629
///
596
630
/// For element `i`, the value can be looked up by the pseudo code `values[indices[i]]`
631
+ ///
632
+ /// The corresponding view is [`DictionaryView`].
597
633
#[ derive( Clone , Debug , PartialEq ) ]
598
634
pub struct DictionaryArray {
599
635
/// The indices into the values array for each element
@@ -615,9 +651,20 @@ impl DictionaryArray {
615
651
/// A union of different data types
616
652
///
617
653
/// This corresponds roughly to Rust's enums. Each element has a type, which indicates the
618
- /// underlying array to use. For fast lookups the offsets into the underlying arrays are stored as
619
- /// well. For element `ì`, the value can be looked up by the pseudo code
620
- /// `fields[types[i]].1[offsets[i]]`.
654
+ /// underlying array to use.
655
+ ///
656
+ /// The Arrow format supports two types of enums: sparse unions and dense unions. In dense unions
657
+ /// the lengths of all fields sums to the overall length, whereas in sparse unions each field has
658
+ /// the same length as the overall array.
659
+ ///
660
+ /// The value of element `i` in a union can be looked up by the pseudo code
661
+ ///
662
+ /// - `fields[types[i]].1[offsets[i]]` (for dense unions)
663
+ /// - `fields[types[i]].1[i]` (for sparse unions)
664
+ ///
665
+ /// For sparse unions `offsets` must be `None`.
666
+ ///
667
+ /// The corresponding view is [`UnionView`].
621
668
#[ derive( Clone , Debug , PartialEq ) ]
622
669
pub struct UnionArray {
623
670
/// The type of each element
@@ -644,6 +691,11 @@ impl UnionArray {
644
691
}
645
692
646
693
/// An array with runs of deduplicated values
694
+ ///
695
+ /// The value for element `i` can be looked up via `values[j]` for `j` such that `run_ends[j - 1] <=
696
+ /// i && i < run_ends[j]`.
697
+ ///
698
+ /// The corresponding view is [`RunEndEncodedView`].
647
699
#[ derive( Clone , Debug , PartialEq ) ]
648
700
pub struct RunEndEncodedArray {
649
701
/// The metadata for the arrays
0 commit comments