Skip to content

Commit 3885935

Browse files
authored
Merge pull request #12 from chmp/release/0.2.0
Release/0.2.0
2 parents 53269ed + 55d6ffd commit 3885935

File tree

6 files changed

+65
-9
lines changed

6 files changed

+65
-9
lines changed

Cargo.lock

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

marrow/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,13 @@
22
edition = "2021"
33

44
name = "marrow"
5-
version = "0.1.0"
5+
version = "0.2.0"
66
authors = ["Christopher Prohm <[email protected]>"]
77
description = "Minimalist Arrow interop"
88
readme = "../Readme.md"
99
repository = "https://github.com/chmp/marrow"
1010
license = "MIT"
11+
keywords = ["arrow"]
1112

1213
[package.metadata.docs.rs]
1314
# arrow-version:replace: features = ["arrow2-0-17", "arrow-{version}"]

marrow/src/array.rs

+56-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
//! Arrays with owned data
2+
//!
3+
//! [`Array`] is a union of all array types supported by `marrow`.
24
use half::f16;
35

46
use crate::{
@@ -22,6 +24,8 @@ const _: () = {
2224
};
2325

2426
/// An array with owned data
27+
///
28+
/// The corresponding view is [`View`].
2529
#[derive(Clone, Debug, PartialEq)]
2630
#[non_exhaustive]
2731
pub enum Array {
@@ -199,6 +203,10 @@ impl Array {
199203
}
200204

201205
/// Get the view for this array
206+
///
207+
/// While the data of the arrays is borrowed, any metadata is copied (e.g., field definitions
208+
/// for structs). The reason is that views may be constructed from foreign Arrow implementation
209+
/// with different ways to store metadata.
202210
pub fn as_view(&self) -> View<'_> {
203211
match self {
204212
Self::Null(array) => View::Null(array.as_view()),
@@ -242,6 +250,8 @@ impl Array {
242250
}
243251

244252
/// An array without data
253+
///
254+
/// The corresponding view is [`NullView`].
245255
#[derive(Clone, Debug, PartialEq)]
246256
pub struct NullArray {
247257
/// The len of the array
@@ -256,6 +266,8 @@ impl NullArray {
256266
}
257267

258268
/// A `bool` array
269+
///
270+
/// The corresponding view is [`BooleanView`].
259271
#[derive(Clone, Debug, PartialEq)]
260272
pub struct BooleanArray {
261273
// Note: len is required to know how many bits of values are used
@@ -285,6 +297,8 @@ impl BooleanArray {
285297
}
286298

287299
/// An array of primitive values
300+
///
301+
/// The corresponding view is [`PrimitiveView`].
288302
#[derive(Clone, Debug, PartialEq)]
289303
pub struct PrimitiveArray<T> {
290304
/// The validity of the elements as a bitmap
@@ -307,6 +321,8 @@ impl<T> PrimitiveArray<T> {
307321
}
308322

309323
/// An array time values (e.g., `"12:53"`)
324+
///
325+
/// The corresponding view is [`TimeView`].
310326
#[derive(Debug, Clone, PartialEq)]
311327
pub struct TimeArray<T> {
312328
/// The time unit of the values
@@ -332,6 +348,8 @@ impl<T> TimeArray<T> {
332348
}
333349

334350
/// An array of timestamps with an optional timezone
351+
///
352+
/// The corresponding view is [`TimestampView`].
335353
#[derive(Debug, Clone, PartialEq)]
336354

337355
pub struct TimestampArray {
@@ -361,6 +379,8 @@ impl TimestampArray {
361379
}
362380

363381
/// An array of structs
382+
///
383+
/// The corresponding view is [`StructView`].
364384
#[derive(Clone, Debug, PartialEq)]
365385
pub struct StructArray {
366386
/// The number of elements in the array
@@ -390,6 +410,8 @@ impl StructArray {
390410
}
391411

392412
/// An array of maps
413+
///
414+
/// The corresponding view is [`MapView`].
393415
#[derive(Clone, Debug, PartialEq)]
394416
pub struct MapArray {
395417
/// The validity of the elements as a bitmap
@@ -451,6 +473,8 @@ impl MapArray {
451473
/// An array of lists
452474
///
453475
/// The value element `i` is given by the pseudo code `elements[offsets[i]..[offsets[i+1]]`
476+
///
477+
/// The corresponding view is [`ListView`].
454478
#[derive(Clone, Debug, PartialEq)]
455479
pub struct ListArray<O> {
456480
/// The validity of the elements as a bitmap
@@ -479,6 +503,10 @@ impl<O> ListArray<O> {
479503
}
480504

481505
/// An array of lists of fixed size
506+
///
507+
/// The value of element `i` is given by pseudo code `elements[(n * i)..(n * (i + 1))]`
508+
///
509+
/// The corresponding view is [`FixedSizeListView`].
482510
#[derive(Clone, Debug, PartialEq)]
483511
pub struct FixedSizeListArray {
484512
/// The number of elements in this array, each a list with `n` children
@@ -512,6 +540,8 @@ impl FixedSizeListArray {
512540
/// An array of bytes with varying sizes
513541
///
514542
/// The value of element `i` can be access by the pseudo code `data[offsets[i]..offsets[i + 1]]`
543+
///
544+
/// The corresponding view is [`BytesView`].
515545
#[derive(Clone, Debug, PartialEq)]
516546
pub struct BytesArray<O> {
517547
/// The validity of the elements as a bitmap
@@ -537,6 +567,8 @@ impl<O> BytesArray<O> {
537567
}
538568

539569
/// An array of byte vectors with fixed length
570+
///
571+
/// The corresponding view is [`FixedSizeBinaryView`].
540572
#[derive(Clone, Debug, PartialEq)]
541573
pub struct FixedSizeBinaryArray {
542574
/// The number of bytes per element
@@ -563,7 +595,9 @@ impl FixedSizeBinaryArray {
563595

564596
/// An array of fixed point values
565597
///
566-
/// The value of element `i` can be computed by the pseudo code: `values[i] * (10 ** -scale)`
598+
/// The value of element `i` can be computed by the pseudo code: `values[i] * (10).pow(-scale)`
599+
///
600+
/// The corresponding view is [`DecimalView`].
567601
#[derive(Clone, Debug, PartialEq)]
568602
pub struct DecimalArray<T> {
569603
/// The precision, i.e., the number of digits
@@ -594,6 +628,8 @@ impl<T> DecimalArray<T> {
594628
/// An array that deduplicates elements
595629
///
596630
/// For element `i`, the value can be looked up by the pseudo code `values[indices[i]]`
631+
///
632+
/// The corresponding view is [`DictionaryView`].
597633
#[derive(Clone, Debug, PartialEq)]
598634
pub struct DictionaryArray {
599635
/// The indices into the values array for each element
@@ -615,9 +651,20 @@ impl DictionaryArray {
615651
/// A union of different data types
616652
///
617653
/// This corresponds roughly to Rust's enums. Each element has a type, which indicates the
618-
/// underlying array to use. For fast lookups the offsets into the underlying arrays are stored as
619-
/// well. For element `ì`, the value can be looked up by the pseudo code
620-
/// `fields[types[i]].1[offsets[i]]`.
654+
/// underlying array to use.
655+
///
656+
/// The Arrow format supports two types of enums: sparse unions and dense unions. In dense unions
657+
/// the lengths of all fields sums to the overall length, whereas in sparse unions each field has
658+
/// the same length as the overall array.
659+
///
660+
/// The value of element `i` in a union can be looked up by the pseudo code
661+
///
662+
/// - `fields[types[i]].1[offsets[i]]` (for dense unions)
663+
/// - `fields[types[i]].1[i]` (for sparse unions)
664+
///
665+
/// For sparse unions `offsets` must be `None`.
666+
///
667+
/// The corresponding view is [`UnionView`].
621668
#[derive(Clone, Debug, PartialEq)]
622669
pub struct UnionArray {
623670
/// The type of each element
@@ -644,6 +691,11 @@ impl UnionArray {
644691
}
645692

646693
/// An array with runs of deduplicated values
694+
///
695+
/// The value for element `i` can be looked up via `values[j]` for `j` such that `run_ends[j - 1] <=
696+
/// i && i < run_ends[j]`.
697+
///
698+
/// The corresponding view is [`RunEndEncodedView`].
647699
#[derive(Clone, Debug, PartialEq)]
648700
pub struct RunEndEncodedArray {
649701
/// The metadata for the arrays

marrow/src/lib.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
//! - [`Array`][crate::array::Array]: an array with owned data
1515
//! - [`View`][crate::view::View]: an array with borrowed data
1616
//! - [`Field`][crate::datatypes::Field]: the data type and metadata of a field
17-
//! - [`DataType`][crate::datatypes::DataType]: data types of array
17+
//! - [`DataType`][crate::datatypes::DataType]: data types of arrays
1818
//!
1919
//! ## Conversions
2020
//!
@@ -50,7 +50,7 @@
5050
//! // build the arrow array
5151
//! let arrow_array = Int32Array::from(vec![Some(1), Some(2), Some(3)]);
5252
//!
53-
//! // construct the view into this array
53+
//! // construct a view of this array
5454
//! let marrow_view = View::try_from(&arrow_array as &dyn arrow::array::Array)?;
5555
//!
5656
//! // access the underlying data

marrow/src/view.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Arrays with borrowed data
22
//!
3-
//! Each view corresponds 1:1 to an array.
3+
//! [`View`] is the union of all views supported by `marrow`. Each view corresponds 1:1 to an array.
44
use half::f16;
55

66
use crate::{

test_with_arrow/Cargo.toml

+3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
[package]
2+
# make sure this package is not accidentally published
3+
publish = false
4+
25
name = "test_with_arrow"
36
version = "0.1.0"
47
edition = "2021"

0 commit comments

Comments
 (0)