diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 00000000..6697b3e5 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +[*] +tab_width = 4 + +[.*] +tab_width = 4 diff --git a/.gitmodules b/.gitmodules index 4a7f3792..592e6baa 100644 --- a/.gitmodules +++ b/.gitmodules @@ -2,3 +2,11 @@ path = amadeus-testing url = https://github.com/alecmocatta/amadeus-testing.git branch = master +[submodule "amadeus-parquet/thrift"] + path = amadeus-parquet/thrift + url = https://github.com/apache/thrift.git + branch = master +[submodule "amadeus-parquet/parquet-format"] + path = amadeus-parquet/parquet-format + url = https://github.com/apache/parquet-format.git + branch = master diff --git a/.mergify.yml b/.mergify.yml index ff967fa0..4cc93b93 100644 --- a/.mergify.yml +++ b/.mergify.yml @@ -7,7 +7,6 @@ pull_request_rules: - "#approved-reviews-by>=1" - "#review-requested=0" - "#changes-requested-reviews-by=0" - - "#commented-reviews-by=0" actions: merge: method: merge @@ -21,7 +20,6 @@ pull_request_rules: - author=alecmocatta # https://github.com/Mergifyio/mergify-engine/issues/451 - "#review-requested=0" - "#changes-requested-reviews-by=0" - - "#commented-reviews-by=0" actions: merge: method: merge diff --git a/Cargo.toml b/Cargo.toml index 53050f7a..2c13402e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ [package] name = "amadeus" -version = "0.1.3" +version = "0.1.4" license = "Apache-2.0" authors = ["Alec Mocatta "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -14,7 +14,7 @@ parquet postgres aws s3 cloudfront elb json csv logs hadoop hdfs arrow common cr """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" @@ -36,23 +36,20 @@ doc = ["amadeus-aws/doc", "amadeus-commoncrawl/doc", "amadeus-parquet/doc", "ama features = ["doc", "constellation", "aws", "commoncrawl", "postgres", "csv", "json"] [dependencies] -amadeus-core = { version = "=0.1.3", path = "amadeus-core" } -amadeus-derive = { version = "=0.1.3", path = "amadeus-derive" } -amadeus-types = { version = "=0.1.3", path = "amadeus-types" } -amadeus-aws = { version = "=0.1.3", path = "amadeus-aws", optional = true } -amadeus-commoncrawl = { version = "=0.1.3", path = "amadeus-commoncrawl", optional = true } -amadeus-parquet = { version = "=0.1.3", path = "amadeus-parquet", optional = true } -amadeus-postgres = { version = "=0.1.3", path = "amadeus-postgres", optional = true } -amadeus-serde = { version = "=0.1.3", path = "amadeus-serde", optional = true } +amadeus-core = { version = "=0.1.4", path = "amadeus-core" } +amadeus-derive = { version = "=0.1.4", path = "amadeus-derive" } +amadeus-types = { version = "=0.1.4", path = "amadeus-types" } +amadeus-aws = { version = "=0.1.4", path = "amadeus-aws", optional = true } +amadeus-commoncrawl = { version = "=0.1.4", path = "amadeus-commoncrawl", optional = true } +amadeus-parquet = { version = "=0.1.4", path = "amadeus-parquet", optional = true } +amadeus-postgres = { version = "=0.1.4", path = "amadeus-postgres", optional = true } +amadeus-serde = { version = "=0.1.4", path = "amadeus-serde", optional = true } constellation-rs = { version = "0.1", default-features = false, optional = true } futures-preview = "=0.3.0-alpha.19" pin-utils = "0.1.0-alpha.4" serde = { version = "1.0", features = ["derive"] } serde_closure = "0.2" -serde_traitobject = "0.1.6" - -# pin; broken for some reason -bincode = "=1.0.1" +serde_traitobject = "0.2" [dev-dependencies] either = { version = "1.5", features = ["serde"] } diff --git a/README.md b/README.md index a3cbc0eb..8c6276f5 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@

- Docs + Docs

diff --git a/amadeus-aws/Cargo.toml b/amadeus-aws/Cargo.toml index ae2f61f8..0d57ee06 100644 --- a/amadeus-aws/Cargo.toml +++ b/amadeus-aws/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "amadeus-aws" -version = "0.1.3" +version = "0.1.4" license = "Apache-2.0" authors = ["Alec Mocatta "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -10,7 +10,7 @@ Harmonious distributed data analysis in Rust. """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" @@ -22,8 +22,8 @@ maintenance = { status = "actively-developed" } doc = [] [dependencies] -amadeus-core = { version = "=0.1.3", path = "../amadeus-core" } -amadeus-types = { version = "=0.1.3", path = "../amadeus-types" } +amadeus-core = { version = "=0.1.4", path = "../amadeus-core" } +amadeus-types = { version = "=0.1.4", path = "../amadeus-types" } chrono = { version = "0.4", default-features = false } flate2 = "1.0" futures-01 = { package = "futures", version = "0.1" } diff --git a/amadeus-aws/src/cloudfront.rs b/amadeus-aws/src/cloudfront.rs index 05ab926c..65dc3c2c 100644 --- a/amadeus-aws/src/cloudfront.rs +++ b/amadeus-aws/src/cloudfront.rs @@ -59,6 +59,7 @@ impl Source for Cloudfront { .into_dist_iter() .flat_map(FnMut!(move |key: String| { let client = S3Client::new(region.clone()); + let mut errors = 0; ResultExpand( loop { match self::block_on_01(self::retry(|| { @@ -68,11 +69,15 @@ impl Source for Cloudfront { ..GetObjectRequest::default() }) })) { - Err(RusotoError::HttpDispatch(_)) => continue, + Err(RusotoError::HttpDispatch(_)) if errors < 10 => { + errors += 1; + continue; + } Err(RusotoError::Unknown(response)) - if response.status.is_server_error() => + if response.status.is_server_error() && errors < 10 => { - continue + errors += 1; + continue; } res => break res, } diff --git a/amadeus-aws/src/file.rs b/amadeus-aws/src/file.rs index 43984525..e6631f7c 100644 --- a/amadeus-aws/src/file.rs +++ b/amadeus-aws/src/file.rs @@ -206,6 +206,7 @@ impl Page for S3Page { Box::pin(async move { let len: u64 = buf.len().try_into().unwrap(); let mut cursor = io::Cursor::new(buf); + let mut errors = 0; while len - cursor.position() > 0 { let (start, end) = (offset + cursor.position(), offset + len - 1); let res = Compat01As03::new(self.client.get_object(GetObjectRequest { @@ -216,9 +217,15 @@ impl Page for S3Page { })); let res = res.await; match res { - Err(RusotoError::HttpDispatch(_)) => continue, - Err(RusotoError::Unknown(response)) if response.status.is_server_error() => { - continue + Err(RusotoError::HttpDispatch(_)) if errors < 10 => { + errors += 1; + continue; + } + Err(RusotoError::Unknown(response)) + if response.status.is_server_error() && errors < 10 => + { + errors += 1; + continue; } _ => (), } diff --git a/amadeus-aws/src/lib.rs b/amadeus-aws/src/lib.rs index 4958c035..5301861b 100644 --- a/amadeus-aws/src/lib.rs +++ b/amadeus-aws/src/lib.rs @@ -1,4 +1,4 @@ -#![doc(html_root_url = "https://docs.rs/amadeus-aws/0.1.3")] +#![doc(html_root_url = "https://docs.rs/amadeus-aws/0.1.4")] #![feature(type_alias_impl_trait)] mod cloudfront; diff --git a/amadeus-commoncrawl/Cargo.toml b/amadeus-commoncrawl/Cargo.toml index 8d117a38..26a3e0b0 100644 --- a/amadeus-commoncrawl/Cargo.toml +++ b/amadeus-commoncrawl/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "amadeus-commoncrawl" -version = "0.1.3" +version = "0.1.4" license = "MIT OR Apache-2.0" authors = ["Stephen Becker IV ", "Alec Mocatta "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -10,7 +10,7 @@ Harmonious distributed data analysis in Rust. """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" @@ -22,8 +22,8 @@ maintenance = { status = "actively-developed" } doc = [] [dependencies] -amadeus-core = { version = "=0.1.3", path = "../amadeus-core" } -amadeus-types = { version = "=0.1.3", path = "../amadeus-types" } +amadeus-core = { version = "=0.1.4", path = "../amadeus-core" } +amadeus-types = { version = "=0.1.4", path = "../amadeus-types" } flate2 = "1.0" reqwest = "0.9" reqwest_resume = "0.2" diff --git a/amadeus-commoncrawl/src/lib.rs b/amadeus-commoncrawl/src/lib.rs index 716514a1..6e1ff69b 100644 --- a/amadeus-commoncrawl/src/lib.rs +++ b/amadeus-commoncrawl/src/lib.rs @@ -1,4 +1,4 @@ -#![doc(html_root_url = "https://docs.rs/amadeus-commoncrawl/0.1.3")] +#![doc(html_root_url = "https://docs.rs/amadeus-commoncrawl/0.1.4")] #![feature(type_alias_impl_trait)] mod commoncrawl; diff --git a/amadeus-core/Cargo.toml b/amadeus-core/Cargo.toml index 80816bd1..d9fd79d5 100644 --- a/amadeus-core/Cargo.toml +++ b/amadeus-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "amadeus-core" -version = "0.1.3" +version = "0.1.4" license = "Apache-2.0" authors = ["Alec Mocatta "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -10,7 +10,7 @@ Harmonious distributed data analysis in Rust. """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" diff --git a/amadeus-core/src/dist_iter.rs b/amadeus-core/src/dist_iter.rs index 9f384467..5aa25035 100644 --- a/amadeus-core/src/dist_iter.rs +++ b/amadeus-core/src/dist_iter.rs @@ -27,7 +27,7 @@ use serde_closure::*; use std::{cmp::Ordering, hash::Hash, iter, marker::PhantomData, ops::FnMut, vec}; use crate::{ - into_dist_iter::IntoDistributedIterator, pool::{ProcessPool, ProcessSend} + into_dist_iter::IntoDistributedIterator, pool::{ProcessPool, ProcessSend}, util::type_coerce }; pub use self::{ @@ -263,12 +263,7 @@ pub trait DistributedIterator { } fn next_task(&mut self) -> Option { self.0.next_task().map(|task| { - ConnectConsumer( - task, - self.1.task(), - unsafe { type_transmute(self.2.task()) }, - PhantomData, - ) + ConnectConsumer(task, self.1.task(), type_coerce(self.2.task()), PhantomData) }) } } @@ -313,7 +308,7 @@ pub trait DistributedIterator { } } ConsumerReducerHack::<&A::Item>::run(&b, &item, &mut |item| { - i(Sum2::B(unsafe { type_transmute(item) })) + i(Sum2::B(type_coerce(item))) }) | a.run(item, &mut |item| i(Sum2::A(item))) }) } @@ -549,13 +544,13 @@ pub trait DistributedIterator { } } +#[must_use] pub trait DistributedIteratorMulti { type Item; type Task: ConsumerMulti + ProcessSend; fn task(&self) -> Self::Task; - #[must_use] fn for_each(self, f: F) -> ForEach where F: FnMut(Self::Item) + Clone + ProcessSend, @@ -565,7 +560,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(ForEach::new(self, f)) } - #[must_use] fn inspect(self, f: F) -> Inspect where F: FnMut(&Self::Item) + Clone + ProcessSend, @@ -574,7 +568,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_iterator_multi(Inspect::new(self, f)) } - #[must_use] fn update(self, f: F) -> Update where F: FnMut(&mut Self::Item) + Clone + ProcessSend, @@ -583,7 +576,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_iterator_multi(Update::new(self, f)) } - #[must_use] fn map(self, f: F) -> Map where F: FnMut(Self::Item) -> B + Clone + ProcessSend, @@ -592,7 +584,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_iterator_multi(Map::new(self, f)) } - #[must_use] fn flat_map(self, f: F) -> FlatMap where F: FnMut(Self::Item) -> B + Clone + ProcessSend, @@ -602,7 +593,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_iterator_multi(FlatMap::new(self, f)) } - #[must_use] fn filter(self, f: F) -> Filter where F: FnMut(&Self::Item) -> bool + Clone + ProcessSend, @@ -620,7 +610,6 @@ pub trait DistributedIteratorMulti { // Chain::new(self, chain.into_dist_iter()) // } - #[must_use] fn fold(self, identity: ID, op: F) -> Fold where ID: FnMut() -> B + Clone + ProcessSend, @@ -632,7 +621,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Fold::new(self, identity, op)) } - #[must_use] fn count(self) -> Count where Self::Item: 'static, @@ -641,7 +629,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Count::new(self)) } - #[must_use] fn sum(self) -> Sum where B: iter::Sum + iter::Sum + ProcessSend, @@ -651,7 +638,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Sum::new(self)) } - #[must_use] fn combine(self, f: F) -> Combine where F: FnMut(Self::Item, Self::Item) -> Self::Item + Clone + ProcessSend, @@ -661,7 +647,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Combine::new(self, f)) } - #[must_use] fn max(self) -> Max where Self::Item: Ord + ProcessSend, @@ -670,7 +655,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Max::new(self)) } - #[must_use] fn max_by(self, f: F) -> MaxBy where F: FnMut(&Self::Item, &Self::Item) -> Ordering + Clone + ProcessSend, @@ -680,7 +664,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(MaxBy::new(self, f)) } - #[must_use] fn max_by_key(self, f: F) -> MaxByKey where F: FnMut(&Self::Item) -> B + Clone + ProcessSend, @@ -691,7 +674,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(MaxByKey::new(self, f)) } - #[must_use] fn min(self) -> Min where Self::Item: Ord + ProcessSend, @@ -700,7 +682,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Min::new(self)) } - #[must_use] fn min_by(self, f: F) -> MinBy where F: FnMut(&Self::Item, &Self::Item) -> Ordering + Clone + ProcessSend, @@ -710,7 +691,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(MinBy::new(self, f)) } - #[must_use] fn min_by_key(self, f: F) -> MinByKey where F: FnMut(&Self::Item) -> B + Clone + ProcessSend, @@ -721,7 +701,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(MinByKey::new(self, f)) } - #[must_use] fn most_frequent(self, n: usize, probability: f64, tolerance: f64) -> MostFrequent where Self::Item: Hash + Eq + Clone + ProcessSend, @@ -730,7 +709,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(MostFrequent::new(self, n, probability, tolerance)) } - #[must_use] fn most_distinct( self, n: usize, probability: f64, tolerance: f64, error_rate: f64, ) -> MostDistinct @@ -748,7 +726,6 @@ pub trait DistributedIteratorMulti { )) } - #[must_use] fn sample_unstable(self, samples: usize) -> SampleUnstable where Self::Item: ProcessSend, @@ -757,7 +734,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(SampleUnstable::new(self, samples)) } - #[must_use] fn all(self, f: F) -> All where F: FnMut(Self::Item) -> bool + Clone + ProcessSend, @@ -767,7 +743,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(All::new(self, f)) } - #[must_use] fn any(self, f: F) -> Any where F: FnMut(Self::Item) -> bool + Clone + ProcessSend, @@ -777,7 +752,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer(Any::new(self, f)) } - #[must_use] fn collect(self) -> Collect where B: FromDistributedIterator, @@ -786,7 +760,6 @@ pub trait DistributedIteratorMulti { _assert_distributed_reducer::(Collect::new(self)) } - #[must_use] fn cloned<'a, T>(self) -> Cloned where T: Clone + 'a, @@ -830,21 +803,3 @@ pub trait DistributedReducer, Source, B> { type ReduceB: Reducer::Output, Output = B>; fn reducers(self) -> (I, Self::ReduceAFactory, Self::ReduceB); } - -unsafe fn type_transmute(t1: T1) -> T2 { - assert_eq!( - ( - ::std::intrinsics::type_name::(), - ::std::mem::size_of::(), - ::std::mem::align_of::() - ), - ( - ::std::intrinsics::type_name::(), - ::std::mem::size_of::(), - ::std::mem::align_of::() - ) - ); - let ret = ::std::mem::transmute_copy(&t1); - ::std::mem::forget(t1); - ret -} diff --git a/amadeus-core/src/lib.rs b/amadeus-core/src/lib.rs index 2ac8d7f6..2a5c89f7 100644 --- a/amadeus-core/src/lib.rs +++ b/amadeus-core/src/lib.rs @@ -1,4 +1,4 @@ -#![doc(html_root_url = "https://docs.rs/amadeus-core/0.1.3")] +#![doc(html_root_url = "https://docs.rs/amadeus-core/0.1.4")] #![feature(atomic_min_max)] #![feature(specialization)] #![feature(never_type)] diff --git a/amadeus-core/src/pool.rs b/amadeus-core/src/pool.rs index 1cc8b2c0..2d4bb115 100644 --- a/amadeus-core/src/pool.rs +++ b/amadeus-core/src/pool.rs @@ -10,6 +10,7 @@ type Result = std::result::Result>; pub trait ProcessPool: Send + Sync + RefUnwindSafe + UnwindSafe + Unpin { fn processes(&self) -> usize; + fn threads(&self) -> usize; fn spawn(&self, work: F) -> Pin> + Send>> where F: FnOnce() -> T + ProcessSend, @@ -38,6 +39,9 @@ where fn processes(&self) -> usize { (*self).processes() } + fn threads(&self) -> usize { + (*self).threads() + } fn spawn(&self, work: F) -> Pin> + Send>> where F: FnOnce() -> T + ProcessSend, diff --git a/amadeus-core/src/util.rs b/amadeus-core/src/util.rs index 0e37eca6..ba20c923 100644 --- a/amadeus-core/src/util.rs +++ b/amadeus-core/src/util.rs @@ -1,5 +1,5 @@ use serde::{Deserialize, Serialize}; -use std::{error, fmt, io, marker::PhantomData, sync::Arc}; +use std::{any::type_name, error, fmt, io, marker::PhantomData, sync::Arc}; use crate::dist_iter::{Consumer, DistributedIterator}; @@ -92,3 +92,28 @@ impl Consumer for ImplConsumer { unreachable!() } } + +pub fn type_coerce(a: A) -> B { + try_type_coerce(a) + .unwrap_or_else(|| panic!("can't coerce {} to {}", type_name::(), type_name::())) +} +pub fn try_type_coerce(a: A) -> Option { + trait Eq { + fn eq(self) -> Option; + } + + struct Foo(A, PhantomData); + + impl Eq for Foo { + default fn eq(self) -> Option { + None + } + } + impl Eq for Foo { + fn eq(self) -> Option { + Some(self.0) + } + } + + Foo::(a, PhantomData).eq() +} diff --git a/amadeus-derive/Cargo.toml b/amadeus-derive/Cargo.toml index 58141147..8d2ee75f 100644 --- a/amadeus-derive/Cargo.toml +++ b/amadeus-derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "amadeus-derive" -version = "0.1.3" +version = "0.1.4" license = "Apache-2.0" authors = ["Alec Mocatta "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -10,7 +10,7 @@ Harmonious distributed data analysis in Rust. """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" diff --git a/amadeus-derive/src/lib.rs b/amadeus-derive/src/lib.rs index 983004c5..916bbd58 100644 --- a/amadeus-derive/src/lib.rs +++ b/amadeus-derive/src/lib.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#![doc(html_root_url = "https://docs.rs/amadeus-derive/0.1.3")] +#![doc(html_root_url = "https://docs.rs/amadeus-derive/0.1.4")] #![recursion_limit = "400"] #![allow(clippy::useless_let_if_seq)] @@ -451,9 +451,9 @@ fn impl_struct( #parquet_includes #postgres_includes #serde_includes - pub use ::amadeus_types::{DowncastImpl, Downcast, DowncastError, Value, Group, SchemaIncomplete}; + pub use ::amadeus_types::{AmadeusOrd, DowncastFrom, Downcast, DowncastError, Value, Group, SchemaIncomplete}; pub use #amadeus_path::data::Data; - pub use ::std::{boxed::Box, clone::Clone, collections::HashMap, convert::{From, Into}, cmp::PartialEq, default::Default, error::Error, fmt::{self, Debug, Write}, marker::{Send, Sync}, result::Result::{self, Ok, Err}, string::String, vec, vec::Vec, option::Option::{self, Some, None}, iter::Iterator}; + pub use ::std::{boxed::Box, clone::Clone, collections::HashMap, convert::{From, Into}, cmp::{Ordering, PartialEq}, default::Default, error::Error, fmt::{self, Debug, Write}, marker::{Send, Sync}, result::Result::{self, Ok, Err}, string::String, vec, vec::Vec, option::Option::{self, Some, None}, iter::Iterator}; } #parquet_derives @@ -463,8 +463,22 @@ fn impl_struct( #[automatically_derived] impl #impl_generics __::Data for #name #ty_generics #where_clause_with_data {} - impl #impl_generics __::DowncastImpl<__::Value> for #name #ty_generics #where_clause_with_data { - fn downcast_impl(t: __::Value) -> __::Result { + #[automatically_derived] + impl #impl_generics __::AmadeusOrd for #name #ty_generics #where_clause_with_data { + fn amadeus_cmp(&self, other: &Self) -> __::Ordering { + let mut ord = __::Ordering::Equal; + #( + ord = if let __::Ordering::Equal = ord { + self.#field_names1.amadeus_cmp(&other.#field_names1) + } else { ord }; + )* + ord + } + } + + #[automatically_derived] + impl #impl_generics __::DowncastFrom<__::Value> for #name #ty_generics #where_clause_with_data { + fn downcast_from(t: __::Value) -> __::Result { let group = t.into_group()?; let field_names = group.field_names().map(__::Clone::clone); let mut fields = group.into_fields().into_iter(); @@ -485,6 +499,7 @@ fn impl_struct( } } + #[automatically_derived] impl #impl_generics __::From<#name #ty_generics> for __::Value where #where_clause_with_data { fn from(value: #name #ty_generics) -> Self { __::Value::Group(__::Group::new(__::vec![ diff --git a/amadeus-parquet/Cargo.toml b/amadeus-parquet/Cargo.toml index 430815ae..f2e96ba6 100644 --- a/amadeus-parquet/Cargo.toml +++ b/amadeus-parquet/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "amadeus-parquet" -version = "0.1.3" +version = "0.1.4" license = "Apache-2.0" authors = ["Alec Mocatta ", "Apache Arrow "] categories = ["concurrency", "science", "database", "parser-implementations", "text-processing"] @@ -10,7 +10,7 @@ An Apache Parquet implementation in Rust. """ repository = "https://github.com/alecmocatta/amadeus" homepage = "https://github.com/alecmocatta/amadeus" -documentation = "https://docs.rs/amadeus/0.1.3" +documentation = "https://docs.rs/amadeus/0.1.4" readme = "README.md" edition = "2018" @@ -22,8 +22,8 @@ maintenance = { status = "actively-developed" } doc = [] [dependencies] -amadeus-core = { version = "=0.1.3", path = "../amadeus-core" } -amadeus-types = { version = "=0.1.3", path = "../amadeus-types" } +amadeus-core = { version = "=0.1.4", path = "../amadeus-core" } +amadeus-types = { version = "=0.1.4", path = "../amadeus-types" } brotli = "3.3" byteorder = "1.2" chrono = { version = "0.4", default-features = false } @@ -32,13 +32,12 @@ fxhash = "0.2" linked-hash-map = "0.5" lz4 = "1.23" num-bigint = "0.2" -parquet-format = "2.6.0" quick-error = "1.2.2" serde = { version = "1.0", features = ["derive"] } serde_closure = "0.2" snap = "0.2" sum = "0.1" -thrift = "0.12.0" +thrift = "0.13" zstd = "0.4" [dev-dependencies] diff --git a/amadeus-parquet/generate.sh b/amadeus-parquet/generate.sh new file mode 100755 index 00000000..ce298aa9 --- /dev/null +++ b/amadeus-parquet/generate.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +set -o errexit; set -o pipefail; set -o nounset; set -o xtrace; shopt -s nullglob; + +# sudo apt-get install libtool automake pkgconf bison flex + +pushd thrift + +./bootstrap.sh + +./configure --without-as3 --without-cpp --without-qt5 --without-c_glib --without-csharp \ +--without-java --without-erlang --without-nodejs --without-nodets --without-lua --without-python \ +--without-py3 --without-perl --without-php --without-php_extension --without-dart --without-ruby \ +--without-haskell --without-go --without-swift --without-rs --without-cl --without-haxe \ +--without-netstd --without-d + +make + +popd + +thrift/compiler/cpp/thrift --gen rs parquet-format/src/main/thrift/parquet.thrift && \ +mv parquet.rs src/internal/format.rs \ No newline at end of file diff --git a/amadeus-parquet/parquet-format b/amadeus-parquet/parquet-format new file mode 160000 index 00000000..d041aac7 --- /dev/null +++ b/amadeus-parquet/parquet-format @@ -0,0 +1 @@ +Subproject commit d041aac7a451a615abf8953a9b07f443fa34f2cd diff --git a/amadeus-parquet/src/internal/basic.rs b/amadeus-parquet/src/internal/basic.rs index 5f0a9599..183a78cd 100644 --- a/amadeus-parquet/src/internal/basic.rs +++ b/amadeus-parquet/src/internal/basic.rs @@ -20,9 +20,7 @@ use std::{fmt, str}; -use parquet_format as parquet; - -use crate::internal::errors::ParquetError; +use crate::internal::{errors::ParquetError, format as parquet}; // ---------------------------------------------------------------------- // Types from the Thrift definition diff --git a/amadeus-parquet/src/internal/file/metadata.rs b/amadeus-parquet/src/internal/file/metadata.rs index 35ea09ef..96d3d2a2 100644 --- a/amadeus-parquet/src/internal/file/metadata.rs +++ b/amadeus-parquet/src/internal/file/metadata.rs @@ -35,10 +35,8 @@ use std::rc::Rc; -use parquet_format::{ColumnChunk, ColumnMetaData, RowGroup}; - use crate::internal::{ - basic::{ColumnOrder, Compression, Encoding, Type}, errors::{ParquetError, Result}, file::statistics::{self, Statistics}, schema::types::{ + basic::{ColumnOrder, Compression, Encoding, Type}, errors::{ParquetError, Result}, file::statistics::{self, Statistics}, format::{ColumnChunk, ColumnMetaData, RowGroup}, schema::types::{ ColumnDescPtr, ColumnDescriptor, ColumnPath, SchemaDescPtr, SchemaDescriptor, Type as SchemaType, TypePtr } }; @@ -248,6 +246,9 @@ impl RowGroupMetaData { total_byte_size: self.total_byte_size, num_rows: self.num_rows, sorting_columns: None, + file_offset: None, + total_compressed_size: None, + ordinal: None, } } } @@ -483,6 +484,7 @@ impl ColumnChunkMetaData { dictionary_page_offset: self.dictionary_page_offset, statistics: statistics::to_thrift(self.statistics.as_ref()), encoding_stats: None, + bloom_filter_offset: None, }; ColumnChunk { @@ -493,6 +495,8 @@ impl ColumnChunkMetaData { offset_index_length: None, column_index_offset: None, column_index_length: None, + crypto_metadata: None, + encrypted_column_metadata: None, } } } diff --git a/amadeus-parquet/src/internal/file/properties.rs b/amadeus-parquet/src/internal/file/properties.rs index 50a29b90..4225421f 100644 --- a/amadeus-parquet/src/internal/file/properties.rs +++ b/amadeus-parquet/src/internal/file/properties.rs @@ -63,7 +63,11 @@ const DEFAULT_DICTIONARY_PAGE_SIZE_LIMIT: usize = DEFAULT_PAGE_SIZE; const DEFAULT_STATISTICS_ENABLED: bool = true; const DEFAULT_MAX_STATISTICS_SIZE: usize = 4096; const DEFAULT_MAX_ROW_GROUP_SIZE: usize = 128 * 1024 * 1024; -const DEFAULT_CREATED_BY: &str = concat!("parquet-rs version ", env!("CARGO_PKG_VERSION")); +const DEFAULT_CREATED_BY: &str = concat!( + env!("CARGO_PKG_NAME"), + " version ", + env!("CARGO_PKG_VERSION") +); /// Parquet writer version. /// diff --git a/amadeus-parquet/src/internal/file/reader.rs b/amadeus-parquet/src/internal/file/reader.rs index 89179694..ea32437d 100644 --- a/amadeus-parquet/src/internal/file/reader.rs +++ b/amadeus-parquet/src/internal/file/reader.rs @@ -23,15 +23,12 @@ use std::{ }; use byteorder::{ByteOrder, LittleEndian}; -use parquet_format::{ - ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData, PageHeader, PageType -}; use thrift::protocol::TCompactInputProtocol; use crate::internal::{ basic::{ColumnOrder, Compression, Encoding, Type}, column::{ page::{Page, PageReader}, reader::{ColumnReader, ColumnReaderImpl} - }, compression::{create_codec, Codec}, errors::{ParquetError, Result}, file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC}, record::{ParquetData, Predicate, RowIter}, schema::types::{self, SchemaDescriptor}, util::{ + }, compression::{create_codec, Codec}, errors::{ParquetError, Result}, file::{metadata::*, statistics, FOOTER_SIZE, PARQUET_MAGIC}, format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData, PageHeader, PageType}, record::{ParquetData, Predicate, RowIter}, schema::types::{self, SchemaDescriptor}, util::{ io::{BufReader, FileSource}, memory::ByteBufferPtr } }; @@ -667,7 +664,6 @@ impl PageReader for SerializedPageReader { #[cfg(test)] mod tests { - use parquet_format::TypeDefinedOrder; use std::collections::HashMap; use test::Bencher; @@ -675,7 +671,7 @@ mod tests { use super::*; use crate::internal::{ - basic::SortOrder, column::reader::{get_typed_column_reader, ColumnReader}, data_type::*, file::reader::{FileReader, RowGroupReader, SerializedFileReader}, schema::types::{ColumnPath, Type as SchemaType}, util::test_common::{get_temp_file, get_test_file, get_test_path} + basic::SortOrder, column::reader::{get_typed_column_reader, ColumnReader}, data_type::*, file::reader::{FileReader, RowGroupReader, SerializedFileReader}, format::TypeDefinedOrder, schema::types::{ColumnPath, Type as SchemaType}, util::test_common::{get_temp_file, get_test_file, get_test_path} }; #[test] diff --git a/amadeus-parquet/src/internal/file/statistics.rs b/amadeus-parquet/src/internal/file/statistics.rs index 8396abc5..848bbf5b 100644 --- a/amadeus-parquet/src/internal/file/statistics.rs +++ b/amadeus-parquet/src/internal/file/statistics.rs @@ -40,9 +40,8 @@ use std::{fmt, ptr}; use byteorder::{ByteOrder, LittleEndian}; -use parquet_format::Statistics as TStatistics; -use crate::internal::{basic::Type, data_type::*}; +use crate::internal::{basic::Type, data_type::*, format::Statistics as TStatistics}; // Macro to generate methods create Statistics. macro_rules! statistics_new_func { diff --git a/amadeus-parquet/src/internal/file/writer.rs b/amadeus-parquet/src/internal/file/writer.rs index 817189d5..d5698244 100644 --- a/amadeus-parquet/src/internal/file/writer.rs +++ b/amadeus-parquet/src/internal/file/writer.rs @@ -23,7 +23,6 @@ use std::{ }; use byteorder::{ByteOrder, LittleEndian}; -use parquet_format as parquet; use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol}; use crate::internal::{ @@ -31,7 +30,7 @@ use crate::internal::{ page::{CompressedPage, Page, PageWriteSpec, PageWriter}, writer::{get_column_writer, ColumnWriter} }, errors::{ParquetError, Result}, file::{ metadata::*, properties::WriterPropertiesPtr, statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC - }, schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}, util::io::{FileSink, Position} + }, format as parquet, schema::types::{self, SchemaDescPtr, SchemaDescriptor, TypePtr}, util::io::{FileSink, Position} }; // ---------------------------------------------------------------------- @@ -167,6 +166,8 @@ impl SerializedFileWriter { key_value_metadata: None, created_by: Some(self.props.created_by().to_owned()), column_orders: None, + encryption_algorithm: None, + footer_signing_key_metadata: None, }; // Write file metadata diff --git a/amadeus-parquet/src/internal/format.rs b/amadeus-parquet/src/internal/format.rs new file mode 100644 index 00000000..897a3bde --- /dev/null +++ b/amadeus-parquet/src/internal/format.rs @@ -0,0 +1,5196 @@ +// Autogenerated by Thrift Compiler (0.13.0) +// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + +#![allow(unused_imports)] +#![allow(unused_extern_crates)] +#![cfg_attr(feature = "cargo-clippy", allow(too_many_arguments, type_complexity))] +#![cfg_attr(rustfmt, rustfmt_skip)] + +extern crate thrift; + +use thrift::OrderedFloat; +use std::cell::RefCell; +use std::collections::{BTreeMap, BTreeSet}; +use std::convert::{From, TryFrom}; +use std::default::Default; +use std::error::Error; +use std::fmt; +use std::fmt::{Display, Formatter}; +use std::rc::Rc; + +use thrift::{ApplicationError, ApplicationErrorKind, ProtocolError, ProtocolErrorKind, TThriftClient}; +use thrift::protocol::{TFieldIdentifier, TListIdentifier, TMapIdentifier, TMessageIdentifier, TMessageType, TInputProtocol, TOutputProtocol, TSetIdentifier, TStructIdentifier, TType}; +use thrift::protocol::field_id; +use thrift::protocol::verify_expected_message_type; +use thrift::protocol::verify_expected_sequence_number; +use thrift::protocol::verify_expected_service_call; +use thrift::protocol::verify_required_field_exists; +use thrift::server::TProcessor; + +/// Types supported by Parquet. These types are intended to be used in combination +/// with the encodings to control the on disk storage format. +/// For example INT16 is not included as a type since a good encoding of INT32 +/// would handle this. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum Type { + Boolean = 0, + Int32 = 1, + Int64 = 2, + Int96 = 3, + Float = 4, + Double = 5, + ByteArray = 6, + FixedLenByteArray = 7, +} + +impl Type { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Type::try_from(enum_value) } +} + +impl TryFrom for Type { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(Type::Boolean), + 1 => Ok(Type::Int32), + 2 => Ok(Type::Int64), + 3 => Ok(Type::Int96), + 4 => Ok(Type::Float), + 5 => Ok(Type::Double), + 6 => Ok(Type::ByteArray), + 7 => Ok(Type::FixedLenByteArray), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to Type", i) + ) + ) + ) + }, + } + } +} + +/// Common types used by frameworks(e.g. hive, pig) using parquet. This helps map +/// between types in those frameworks to the base types in parquet. This is only +/// metadata and not needed to read or write the data. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ConvertedType { + /// a BYTE_ARRAY actually contains UTF8 encoded chars + Utf8 = 0, + /// a map is converted as an optional field containing a repeated key/value pair + Map = 1, + /// a key/value pair is converted into a group of two fields + MapKeyValue = 2, + /// a list is converted into an optional field containing a repeated field for its + /// values + List = 3, + /// an enum is converted into a binary field + Enum = 4, + /// A decimal value. + /// + /// This may be used to annotate binary or fixed primitive types. The + /// underlying byte array stores the unscaled value encoded as two's + /// complement using big-endian byte order (the most significant byte is the + /// zeroth element). The value of the decimal is the value * 10^{-scale}. + /// + /// This must be accompanied by a (maximum) precision and a scale in the + /// SchemaElement. The precision specifies the number of digits in the decimal + /// and the scale stores the location of the decimal point. For example 1.23 + /// would have precision 3 (3 total digits) and scale 2 (the decimal point is + /// 2 digits over). + Decimal = 5, + /// A Date + /// + /// Stored as days since Unix epoch, encoded as the INT32 physical type. + /// + Date = 6, + /// A time + /// + /// The total number of milliseconds since midnight. The value is stored + /// as an INT32 physical type. + TimeMillis = 7, + /// A time. + /// + /// The total number of microseconds since midnight. The value is stored as + /// an INT64 physical type. + TimeMicros = 8, + /// A date/time combination + /// + /// Date and time recorded as milliseconds since the Unix epoch. Recorded as + /// a physical type of INT64. + TimestampMillis = 9, + /// A date/time combination + /// + /// Date and time recorded as microseconds since the Unix epoch. The value is + /// stored as an INT64 physical type. + TimestampMicros = 10, + /// An unsigned integer value. + /// + /// The number describes the maximum number of meaningful data bits in + /// the stored value. 8, 16 and 32 bit values are stored using the + /// INT32 physical type. 64 bit values are stored using the INT64 + /// physical type. + /// + Uint8 = 11, + Uint16 = 12, + Uint32 = 13, + Uint64 = 14, + /// A signed integer value. + /// + /// The number describes the maximum number of meaningful data bits in + /// the stored value. 8, 16 and 32 bit values are stored using the + /// INT32 physical type. 64 bit values are stored using the INT64 + /// physical type. + /// + Int8 = 15, + Int16 = 16, + Int32 = 17, + Int64 = 18, + /// An embedded JSON document + /// + /// A JSON document embedded within a single UTF8 column. + Json = 19, + /// An embedded BSON document + /// + /// A BSON document embedded within a single BINARY column. + Bson = 20, + /// An interval of time + /// + /// This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 + /// This data is composed of three separate little endian unsigned + /// integers. Each stores a component of a duration of time. The first + /// integer identifies the number of months associated with the duration, + /// the second identifies the number of days associated with the duration + /// and the third identifies the number of milliseconds associated with + /// the provided duration. This duration of time is independent of any + /// particular timezone or date. + Interval = 21, +} + +impl ConvertedType { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + ConvertedType::try_from(enum_value) } +} + +impl TryFrom for ConvertedType { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(ConvertedType::Utf8), + 1 => Ok(ConvertedType::Map), + 2 => Ok(ConvertedType::MapKeyValue), + 3 => Ok(ConvertedType::List), + 4 => Ok(ConvertedType::Enum), + 5 => Ok(ConvertedType::Decimal), + 6 => Ok(ConvertedType::Date), + 7 => Ok(ConvertedType::TimeMillis), + 8 => Ok(ConvertedType::TimeMicros), + 9 => Ok(ConvertedType::TimestampMillis), + 10 => Ok(ConvertedType::TimestampMicros), + 11 => Ok(ConvertedType::Uint8), + 12 => Ok(ConvertedType::Uint16), + 13 => Ok(ConvertedType::Uint32), + 14 => Ok(ConvertedType::Uint64), + 15 => Ok(ConvertedType::Int8), + 16 => Ok(ConvertedType::Int16), + 17 => Ok(ConvertedType::Int32), + 18 => Ok(ConvertedType::Int64), + 19 => Ok(ConvertedType::Json), + 20 => Ok(ConvertedType::Bson), + 21 => Ok(ConvertedType::Interval), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to ConvertedType", i) + ) + ) + ) + }, + } + } +} + +/// Representation of Schemas +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum FieldRepetitionType { + /// This field is required (can not be null) and each record has exactly 1 value. + Required = 0, + /// The field is optional (can be null) and each record has 0 or 1 values. + Optional = 1, + /// The field is repeated and can contain 0 or more values + Repeated = 2, +} + +impl FieldRepetitionType { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + FieldRepetitionType::try_from(enum_value) } +} + +impl TryFrom for FieldRepetitionType { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(FieldRepetitionType::Required), + 1 => Ok(FieldRepetitionType::Optional), + 2 => Ok(FieldRepetitionType::Repeated), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to FieldRepetitionType", i) + ) + ) + ) + }, + } + } +} + +/// Encodings supported by Parquet. Not all encodings are valid for all types. These +/// enums are also used to specify the encoding of definition and repetition levels. +/// See the accompanying doc for the details of the more complicated encodings. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum Encoding { + /// Default encoding. + /// BOOLEAN - 1 bit per value. 0 is false; 1 is true. + /// INT32 - 4 bytes per value. Stored as little-endian. + /// INT64 - 8 bytes per value. Stored as little-endian. + /// FLOAT - 4 bytes per value. IEEE. Stored as little-endian. + /// DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. + /// BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. + /// FIXED_LEN_BYTE_ARRAY - Just the bytes. + Plain = 0, + /// Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + /// plain type. + /// in a data page use RLE_DICTIONARY instead. + /// in a Dictionary page use PLAIN instead + PlainDictionary = 2, + /// Group packed run length encoding. Usable for definition/repetition levels + /// encoding and Booleans (on one bit: 0 is false; 1 is true.) + Rle = 3, + /// Bit packed encoding. This can only be used if the data has a known max + /// width. Usable for definition/repetition levels encoding. + BitPacked = 4, + /// Delta encoding for integers. This can be used for int columns and works best + /// on sorted data + DeltaBinaryPacked = 5, + /// Encoding for byte arrays to separate the length values and the data. The lengths + /// are encoded using DELTA_BINARY_PACKED + DeltaLengthByteArray = 6, + /// Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. + /// Suffixes are stored as delta length byte arrays. + DeltaByteArray = 7, + /// Dictionary encoding: the ids are encoded using the RLE encoding + RleDictionary = 8, +} + +impl Encoding { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + Encoding::try_from(enum_value) } +} + +impl TryFrom for Encoding { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(Encoding::Plain), + 2 => Ok(Encoding::PlainDictionary), + 3 => Ok(Encoding::Rle), + 4 => Ok(Encoding::BitPacked), + 5 => Ok(Encoding::DeltaBinaryPacked), + 6 => Ok(Encoding::DeltaLengthByteArray), + 7 => Ok(Encoding::DeltaByteArray), + 8 => Ok(Encoding::RleDictionary), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to Encoding", i) + ) + ) + ) + }, + } + } +} + +/// Supported compression algorithms. +/// +/// Codecs added in 2.4 can be read by readers based on 2.4 and later. +/// Codec support may vary between readers based on the format version and +/// libraries available at runtime. Gzip, Snappy, and LZ4 codecs are +/// widely available, while Zstd and Brotli require additional libraries. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum CompressionCodec { + Uncompressed = 0, + Snappy = 1, + Gzip = 2, + Lzo = 3, + Brotli = 4, + Lz4 = 5, + Zstd = 6, +} + +impl CompressionCodec { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + CompressionCodec::try_from(enum_value) } +} + +impl TryFrom for CompressionCodec { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(CompressionCodec::Uncompressed), + 1 => Ok(CompressionCodec::Snappy), + 2 => Ok(CompressionCodec::Gzip), + 3 => Ok(CompressionCodec::Lzo), + 4 => Ok(CompressionCodec::Brotli), + 5 => Ok(CompressionCodec::Lz4), + 6 => Ok(CompressionCodec::Zstd), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to CompressionCodec", i) + ) + ) + ) + }, + } + } +} + +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum PageType { + DataPage = 0, + IndexPage = 1, + DictionaryPage = 2, + DataPageV2 = 3, +} + +impl PageType { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + PageType::try_from(enum_value) } +} + +impl TryFrom for PageType { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(PageType::DataPage), + 1 => Ok(PageType::IndexPage), + 2 => Ok(PageType::DictionaryPage), + 3 => Ok(PageType::DataPageV2), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to PageType", i) + ) + ) + ) + }, + } + } +} + +/// Enum to annotate whether lists of min/max elements inside ColumnIndex +/// are ordered and if so, in which direction. +#[derive(Copy, Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BoundaryOrder { + Unordered = 0, + Ascending = 1, + Descending = 2, +} + +impl BoundaryOrder { + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + o_prot.write_i32(*self as i32) + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let enum_value = i_prot.read_i32()?; + BoundaryOrder::try_from(enum_value) } +} + +impl TryFrom for BoundaryOrder { + type Error = thrift::Error; fn try_from(i: i32) -> Result { + match i { + 0 => Ok(BoundaryOrder::Unordered), + 1 => Ok(BoundaryOrder::Ascending), + 2 => Ok(BoundaryOrder::Descending), + _ => { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + format!("cannot convert enum constant {} to BoundaryOrder", i) + ) + ) + ) + }, + } + } +} + +// +// Statistics +// + +/// Statistics per row group and per page +/// All fields are optional. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Statistics { + /// DEPRECATED: min and max value of the column. Use min_value and max_value. + /// + /// Values are encoded using PLAIN encoding, except that variable-length byte + /// arrays do not include a length prefix. + /// + /// These fields encode min and max values determined by signed comparison + /// only. New files should use the correct order for a column's logical type + /// and store the values in the min_value and max_value fields. + /// + /// To support older readers, these may be set when the column order is + /// signed. + pub max: Option>, + pub min: Option>, + /// count of null value in the column + pub null_count: Option, + /// count of distinct values occurring + pub distinct_count: Option, + /// Min and max values for the column, determined by its ColumnOrder. + /// + /// Values are encoded using PLAIN encoding, except that variable-length byte + /// arrays do not include a length prefix. + pub max_value: Option>, + pub min_value: Option>, +} + +impl Statistics { + pub fn new(max: F1, min: F2, null_count: F3, distinct_count: F4, max_value: F5, min_value: F6) -> Statistics where F1: Into>>, F2: Into>>, F3: Into>, F4: Into>, F5: Into>>, F6: Into>> { + Statistics { + max: max.into(), + min: min.into(), + null_count: null_count.into(), + distinct_count: distinct_count.into(), + max_value: max_value.into(), + min_value: min_value.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option> = None; + let mut f_6: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i64()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_bytes()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_bytes()?; + f_6 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = Statistics { + max: f_1, + min: f_2, + null_count: f_3, + distinct_count: f_4, + max_value: f_5, + min_value: f_6, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("Statistics"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.max { + o_prot.write_field_begin(&TFieldIdentifier::new("max", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.min { + o_prot.write_field_begin(&TFieldIdentifier::new("min", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.null_count { + o_prot.write_field_begin(&TFieldIdentifier::new("null_count", TType::I64, 3))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.distinct_count { + o_prot.write_field_begin(&TFieldIdentifier::new("distinct_count", TType::I64, 4))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.max_value { + o_prot.write_field_begin(&TFieldIdentifier::new("max_value", TType::String, 5))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.min_value { + o_prot.write_field_begin(&TFieldIdentifier::new("min_value", TType::String, 6))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for Statistics { + fn default() -> Self { + Statistics{ + max: Some(Vec::new()), + min: Some(Vec::new()), + null_count: Some(0), + distinct_count: Some(0), + max_value: Some(Vec::new()), + min_value: Some(Vec::new()), + } + } +} + +// +// StringType +// + +/// Empty structs to use as logical type annotations +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct StringType { +} + +impl StringType { + pub fn new() -> StringType { + StringType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = StringType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("StringType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for StringType { + fn default() -> Self { + StringType{} + } +} + +// +// UUIDType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct UUIDType { +} + +impl UUIDType { + pub fn new() -> UUIDType { + UUIDType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = UUIDType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("UUIDType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for UUIDType { + fn default() -> Self { + UUIDType{} + } +} + +// +// MapType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MapType { +} + +impl MapType { + pub fn new() -> MapType { + MapType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MapType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MapType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MapType { + fn default() -> Self { + MapType{} + } +} + +// +// ListType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ListType { +} + +impl ListType { + pub fn new() -> ListType { + ListType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = ListType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ListType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for ListType { + fn default() -> Self { + ListType{} + } +} + +// +// EnumType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EnumType { +} + +impl EnumType { + pub fn new() -> EnumType { + EnumType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = EnumType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EnumType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for EnumType { + fn default() -> Self { + EnumType{} + } +} + +// +// DateType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DateType { +} + +impl DateType { + pub fn new() -> DateType { + DateType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = DateType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DateType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for DateType { + fn default() -> Self { + DateType{} + } +} + +// +// NullType +// + +/// Logical type to annotate a column that is always null. +/// +/// Sometimes when discovering the schema of existing data, values are always +/// null and the physical type can't be determined. This annotation signals +/// the case where the physical type was guessed from all null values. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NullType { +} + +impl NullType { + pub fn new() -> NullType { + NullType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = NullType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("NullType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for NullType { + fn default() -> Self { + NullType{} + } +} + +// +// DecimalType +// + +/// Decimal logical type annotation +/// +/// To maintain forward-compatibility in v1, implementations using this logical +/// type must also set scale and precision on the annotated SchemaElement. +/// +/// Allowed for physical types: INT32, INT64, FIXED, and BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DecimalType { + pub scale: i32, + pub precision: i32, +} + +impl DecimalType { + pub fn new(scale: i32, precision: i32) -> DecimalType { + DecimalType { + scale: scale, + precision: precision, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DecimalType.scale", &f_1)?; + verify_required_field_exists("DecimalType.precision", &f_2)?; + let ret = DecimalType { + scale: f_1.expect("auto-generated code should have checked for presence of required fields"), + precision: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DecimalType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("scale", TType::I32, 1))?; + o_prot.write_i32(self.scale)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("precision", TType::I32, 2))?; + o_prot.write_i32(self.precision)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// MilliSeconds +// + +/// Time units for logical types +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MilliSeconds { +} + +impl MilliSeconds { + pub fn new() -> MilliSeconds { + MilliSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MilliSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MilliSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MilliSeconds { + fn default() -> Self { + MilliSeconds{} + } +} + +// +// MicroSeconds +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct MicroSeconds { +} + +impl MicroSeconds { + pub fn new() -> MicroSeconds { + MicroSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = MicroSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("MicroSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for MicroSeconds { + fn default() -> Self { + MicroSeconds{} + } +} + +// +// NanoSeconds +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct NanoSeconds { +} + +impl NanoSeconds { + pub fn new() -> NanoSeconds { + NanoSeconds {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = NanoSeconds {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("NanoSeconds"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for NanoSeconds { + fn default() -> Self { + NanoSeconds{} + } +} + +// +// TimeUnit +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum TimeUnit { + MILLIS(MilliSeconds), + MICROS(MicroSeconds), + NANOS(NanoSeconds), +} + +impl TimeUnit { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = MilliSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::MILLIS(val)); + } + received_field_count += 1; + }, + 2 => { + let val = MicroSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::MICROS(val)); + } + received_field_count += 1; + }, + 3 => { + let val = NanoSeconds::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(TimeUnit::NANOS(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote TimeUnit" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote TimeUnit" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimeUnit"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + TimeUnit::MILLIS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MILLIS", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + TimeUnit::MICROS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MICROS", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + TimeUnit::NANOS(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("NANOS", TType::Struct, 3))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TimestampType +// + +/// Timestamp logical type annotation +/// +/// Allowed for physical types: INT64 +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TimestampType { + pub is_adjusted_to_u_t_c: bool, + pub unit: TimeUnit, +} + +impl TimestampType { + pub fn new(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> TimestampType { + TimestampType { + is_adjusted_to_u_t_c: is_adjusted_to_u_t_c, + unit: unit, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bool()?; + f_1 = Some(val); + }, + 2 => { + let val = TimeUnit::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("TimestampType.is_adjusted_to_u_t_c", &f_1)?; + verify_required_field_exists("TimestampType.unit", &f_2)?; + let ret = TimestampType { + is_adjusted_to_u_t_c: f_1.expect("auto-generated code should have checked for presence of required fields"), + unit: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimestampType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?; + o_prot.write_bool(self.is_adjusted_to_u_t_c)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("unit", TType::Struct, 2))?; + self.unit.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TimeType +// + +/// Time logical type annotation +/// +/// Allowed for physical types: INT32 (millis), INT64 (micros, nanos) +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TimeType { + pub is_adjusted_to_u_t_c: bool, + pub unit: TimeUnit, +} + +impl TimeType { + pub fn new(is_adjusted_to_u_t_c: bool, unit: TimeUnit) -> TimeType { + TimeType { + is_adjusted_to_u_t_c: is_adjusted_to_u_t_c, + unit: unit, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bool()?; + f_1 = Some(val); + }, + 2 => { + let val = TimeUnit::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("TimeType.is_adjusted_to_u_t_c", &f_1)?; + verify_required_field_exists("TimeType.unit", &f_2)?; + let ret = TimeType { + is_adjusted_to_u_t_c: f_1.expect("auto-generated code should have checked for presence of required fields"), + unit: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TimeType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("isAdjustedToUTC", TType::Bool, 1))?; + o_prot.write_bool(self.is_adjusted_to_u_t_c)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("unit", TType::Struct, 2))?; + self.unit.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// IntType +// + +/// Integer logical type annotation +/// +/// bitWidth must be 8, 16, 32, or 64. +/// +/// Allowed for physical types: INT32, INT64 +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IntType { + pub bit_width: i8, + pub is_signed: bool, +} + +impl IntType { + pub fn new(bit_width: i8, is_signed: bool) -> IntType { + IntType { + bit_width: bit_width, + is_signed: is_signed, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i8()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bool()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("IntType.bit_width", &f_1)?; + verify_required_field_exists("IntType.is_signed", &f_2)?; + let ret = IntType { + bit_width: f_1.expect("auto-generated code should have checked for presence of required fields"), + is_signed: f_2.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("IntType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("bitWidth", TType::I08, 1))?; + o_prot.write_i8(self.bit_width)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("isSigned", TType::Bool, 2))?; + o_prot.write_bool(self.is_signed)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// JsonType +// + +/// Embedded JSON logical type annotation +/// +/// Allowed for physical types: BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct JsonType { +} + +impl JsonType { + pub fn new() -> JsonType { + JsonType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = JsonType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("JsonType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for JsonType { + fn default() -> Self { + JsonType{} + } +} + +// +// BsonType +// + +/// Embedded BSON logical type annotation +/// +/// Allowed for physical types: BINARY +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BsonType { +} + +impl BsonType { + pub fn new() -> BsonType { + BsonType {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = BsonType {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BsonType"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for BsonType { + fn default() -> Self { + BsonType{} + } +} + +// +// LogicalType +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum LogicalType { + STRING(StringType), + MAP(MapType), + LIST(ListType), + ENUM(EnumType), + DECIMAL(DecimalType), + DATE(DateType), + TIME(TimeType), + TIMESTAMP(TimestampType), + INTEGER(IntType), + UNKNOWN(NullType), + JSON(JsonType), + BSON(BsonType), + UUID(UUIDType), +} + +impl LogicalType { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = StringType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::STRING(val)); + } + received_field_count += 1; + }, + 2 => { + let val = MapType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::MAP(val)); + } + received_field_count += 1; + }, + 3 => { + let val = ListType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::LIST(val)); + } + received_field_count += 1; + }, + 4 => { + let val = EnumType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::ENUM(val)); + } + received_field_count += 1; + }, + 5 => { + let val = DecimalType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::DECIMAL(val)); + } + received_field_count += 1; + }, + 6 => { + let val = DateType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::DATE(val)); + } + received_field_count += 1; + }, + 7 => { + let val = TimeType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::TIME(val)); + } + received_field_count += 1; + }, + 8 => { + let val = TimestampType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::TIMESTAMP(val)); + } + received_field_count += 1; + }, + 10 => { + let val = IntType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::INTEGER(val)); + } + received_field_count += 1; + }, + 11 => { + let val = NullType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::UNKNOWN(val)); + } + received_field_count += 1; + }, + 12 => { + let val = JsonType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::JSON(val)); + } + received_field_count += 1; + }, + 13 => { + let val = BsonType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::BSON(val)); + } + received_field_count += 1; + }, + 14 => { + let val = UUIDType::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(LogicalType::UUID(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote LogicalType" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote LogicalType" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("LogicalType"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + LogicalType::STRING(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("STRING", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::MAP(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("MAP", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::LIST(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("LIST", TType::Struct, 3))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::ENUM(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENUM", TType::Struct, 4))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::DECIMAL(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("DECIMAL", TType::Struct, 5))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::DATE(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("DATE", TType::Struct, 6))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::TIME(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TIME", TType::Struct, 7))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::TIMESTAMP(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TIMESTAMP", TType::Struct, 8))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::INTEGER(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("INTEGER", TType::Struct, 10))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::UNKNOWN(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UNKNOWN", TType::Struct, 11))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::JSON(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("JSON", TType::Struct, 12))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::BSON(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("BSON", TType::Struct, 13))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + LogicalType::UUID(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UUID", TType::Struct, 14))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SchemaElement +// + +/// Represents a element inside a schema definition. +/// - if it is a group (inner node) then type is undefined and num_children is defined +/// - if it is a primitive type (leaf) then type is defined and num_children is undefined +/// the nodes are listed in depth first traversal order. +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SchemaElement { + /// Data type for this field. Not set if the current element is a non-leaf node + pub type_: Option, + /// If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. + /// Otherwise, if specified, this is the maximum bit length to store any of the values. + /// (e.g. a low cardinality INT col could have this set to 3). Note that this is + /// in the schema, and therefore fixed for the entire file. + pub type_length: Option, + /// repetition of the field. The root of the schema does not have a repetition_type. + /// All other nodes must have one + pub repetition_type: Option, + /// Name of the field in the schema + pub name: String, + /// Nested fields. Since thrift does not support nested fields, + /// the nesting is flattened to a single list by a depth-first traversal. + /// The children count is used to construct the nested relationship. + /// This field is not set when the element is a primitive type + pub num_children: Option, + /// When the schema is the result of a conversion from another model + /// Used to record the original type to help with cross conversion. + pub converted_type: Option, + /// Used when this column contains decimal data. + /// See the DECIMAL converted type for more details. + pub scale: Option, + pub precision: Option, + /// When the original schema supports field ids, this will save the + /// original field id in the parquet schema + pub field_id: Option, + /// The logical type of this SchemaElement + /// + /// LogicalType replaces ConvertedType, but ConvertedType is still required + /// for some logical types to ensure forward-compatibility in format v1. + pub logical_type: Option, +} + +impl SchemaElement { + pub fn new(type_: F1, type_length: F2, repetition_type: F3, name: String, num_children: F5, converted_type: F6, scale: F7, precision: F8, field_id: F9, logical_type: F10) -> SchemaElement where F1: Into>, F2: Into>, F3: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into>, F9: Into>, F10: Into> { + SchemaElement { + type_: type_.into(), + type_length: type_length.into(), + repetition_type: repetition_type.into(), + name: name, + num_children: num_children.into(), + converted_type: converted_type.into(), + scale: scale.into(), + precision: precision.into(), + field_id: field_id.into(), + logical_type: logical_type.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + let mut f_9: Option = None; + let mut f_10: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Type::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = FieldRepetitionType::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_string()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = ConvertedType::read_from_in_protocol(i_prot)?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i32()?; + f_7 = Some(val); + }, + 8 => { + let val = i_prot.read_i32()?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_i32()?; + f_9 = Some(val); + }, + 10 => { + let val = LogicalType::read_from_in_protocol(i_prot)?; + f_10 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("SchemaElement.name", &f_4)?; + let ret = SchemaElement { + type_: f_1, + type_length: f_2, + repetition_type: f_3, + name: f_4.expect("auto-generated code should have checked for presence of required fields"), + num_children: f_5, + converted_type: f_6, + scale: f_7, + precision: f_8, + field_id: f_9, + logical_type: f_10, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SchemaElement"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.type_ { + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.type_length { + o_prot.write_field_begin(&TFieldIdentifier::new("type_length", TType::I32, 2))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.repetition_type { + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_type", TType::I32, 3))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_begin(&TFieldIdentifier::new("name", TType::String, 4))?; + o_prot.write_string(&self.name)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.num_children { + o_prot.write_field_begin(&TFieldIdentifier::new("num_children", TType::I32, 5))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.converted_type { + o_prot.write_field_begin(&TFieldIdentifier::new("converted_type", TType::I32, 6))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.scale { + o_prot.write_field_begin(&TFieldIdentifier::new("scale", TType::I32, 7))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.precision { + o_prot.write_field_begin(&TFieldIdentifier::new("precision", TType::I32, 8))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.field_id { + o_prot.write_field_begin(&TFieldIdentifier::new("field_id", TType::I32, 9))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.logical_type { + o_prot.write_field_begin(&TFieldIdentifier::new("logicalType", TType::Struct, 10))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// DataPageHeader +// + +/// Data page header +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DataPageHeader { + /// Number of values, including NULLs, in this data page. * + pub num_values: i32, + /// Encoding used for this data page * + pub encoding: Encoding, + /// Encoding used for definition levels * + pub definition_level_encoding: Encoding, + /// Encoding used for repetition levels * + pub repetition_level_encoding: Encoding, + /// Optional statistics for the data in this page* + pub statistics: Option, +} + +impl DataPageHeader { + pub fn new(num_values: i32, encoding: Encoding, definition_level_encoding: Encoding, repetition_level_encoding: Encoding, statistics: F5) -> DataPageHeader where F5: Into> { + DataPageHeader { + num_values: num_values, + encoding: encoding, + definition_level_encoding: definition_level_encoding, + repetition_level_encoding: repetition_level_encoding, + statistics: statistics.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_5 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DataPageHeader.num_values", &f_1)?; + verify_required_field_exists("DataPageHeader.encoding", &f_2)?; + verify_required_field_exists("DataPageHeader.definition_level_encoding", &f_3)?; + verify_required_field_exists("DataPageHeader.repetition_level_encoding", &f_4)?; + let ret = DataPageHeader { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + definition_level_encoding: f_3.expect("auto-generated code should have checked for presence of required fields"), + repetition_level_encoding: f_4.expect("auto-generated code should have checked for presence of required fields"), + statistics: f_5, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DataPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("definition_level_encoding", TType::I32, 3))?; + self.definition_level_encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_level_encoding", TType::I32, 4))?; + self.repetition_level_encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 5))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// IndexPageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct IndexPageHeader { +} + +impl IndexPageHeader { + pub fn new() -> IndexPageHeader { + IndexPageHeader {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = IndexPageHeader {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("IndexPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for IndexPageHeader { + fn default() -> Self { + IndexPageHeader{} + } +} + +// +// DictionaryPageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DictionaryPageHeader { + /// Number of values in the dictionary * + pub num_values: i32, + /// Encoding using this dictionary page * + pub encoding: Encoding, + /// If true, the entries in the dictionary are sorted in ascending order * + pub is_sorted: Option, +} + +impl DictionaryPageHeader { + pub fn new(num_values: i32, encoding: Encoding, is_sorted: F3) -> DictionaryPageHeader where F3: Into> { + DictionaryPageHeader { + num_values: num_values, + encoding: encoding, + is_sorted: is_sorted.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DictionaryPageHeader.num_values", &f_1)?; + verify_required_field_exists("DictionaryPageHeader.encoding", &f_2)?; + let ret = DictionaryPageHeader { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + is_sorted: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DictionaryPageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.is_sorted { + o_prot.write_field_begin(&TFieldIdentifier::new("is_sorted", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// DataPageHeaderV2 +// + +/// New page format allowing reading levels without decompressing the data +/// Repetition and definition levels are uncompressed +/// The remaining section containing the data is compressed if is_compressed is true +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct DataPageHeaderV2 { + /// Number of values, including NULLs, in this data page. * + pub num_values: i32, + /// Number of NULL values, in this data page. + /// Number of non-null = num_values - num_nulls which is also the number of values in the data section * + pub num_nulls: i32, + /// Number of rows in this data page. which means pages change on record boundaries (r = 0) * + pub num_rows: i32, + /// Encoding used for data in this page * + pub encoding: Encoding, + /// length of the definition levels + pub definition_levels_byte_length: i32, + /// length of the repetition levels + pub repetition_levels_byte_length: i32, + /// whether the values are compressed. + /// Which means the section of the page between + /// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + /// is compressed with the compression_codec. + /// If missing it is considered compressed + pub is_compressed: Option, + /// optional statistics for this column chunk + pub statistics: Option, +} + +impl DataPageHeaderV2 { + pub fn new(num_values: i32, num_nulls: i32, num_rows: i32, encoding: Encoding, definition_levels_byte_length: i32, repetition_levels_byte_length: i32, is_compressed: F7, statistics: F8) -> DataPageHeaderV2 where F7: Into>, F8: Into> { + DataPageHeaderV2 { + num_values: num_values, + num_nulls: num_nulls, + num_rows: num_rows, + encoding: encoding, + definition_levels_byte_length: definition_levels_byte_length, + repetition_levels_byte_length: repetition_levels_byte_length, + is_compressed: is_compressed.into(), + statistics: statistics.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + 4 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i32()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_bool()?; + f_7 = Some(val); + }, + 8 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("DataPageHeaderV2.num_values", &f_1)?; + verify_required_field_exists("DataPageHeaderV2.num_nulls", &f_2)?; + verify_required_field_exists("DataPageHeaderV2.num_rows", &f_3)?; + verify_required_field_exists("DataPageHeaderV2.encoding", &f_4)?; + verify_required_field_exists("DataPageHeaderV2.definition_levels_byte_length", &f_5)?; + verify_required_field_exists("DataPageHeaderV2.repetition_levels_byte_length", &f_6)?; + let ret = DataPageHeaderV2 { + num_values: f_1.expect("auto-generated code should have checked for presence of required fields"), + num_nulls: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_4.expect("auto-generated code should have checked for presence of required fields"), + definition_levels_byte_length: f_5.expect("auto-generated code should have checked for presence of required fields"), + repetition_levels_byte_length: f_6.expect("auto-generated code should have checked for presence of required fields"), + is_compressed: f_7, + statistics: f_8, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("DataPageHeaderV2"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I32, 1))?; + o_prot.write_i32(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_nulls", TType::I32, 2))?; + o_prot.write_i32(self.num_nulls)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I32, 3))?; + o_prot.write_i32(self.num_rows)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 4))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("definition_levels_byte_length", TType::I32, 5))?; + o_prot.write_i32(self.definition_levels_byte_length)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("repetition_levels_byte_length", TType::I32, 6))?; + o_prot.write_i32(self.repetition_levels_byte_length)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.is_compressed { + o_prot.write_field_begin(&TFieldIdentifier::new("is_compressed", TType::Bool, 7))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SplitBlockAlgorithm +// + +/// Block-based algorithm type annotation. * +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SplitBlockAlgorithm { +} + +impl SplitBlockAlgorithm { + pub fn new() -> SplitBlockAlgorithm { + SplitBlockAlgorithm {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = SplitBlockAlgorithm {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SplitBlockAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for SplitBlockAlgorithm { + fn default() -> Self { + SplitBlockAlgorithm{} + } +} + +// +// BloomFilterAlgorithm +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterAlgorithm { + BLOCK(SplitBlockAlgorithm), +} + +impl BloomFilterAlgorithm { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = SplitBlockAlgorithm::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterAlgorithm::BLOCK(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterAlgorithm" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterAlgorithm" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterAlgorithm::BLOCK(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("BLOCK", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// XxHash +// + +/// Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash +/// algorithm. It uses 64 bits version of xxHash. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct XxHash { +} + +impl XxHash { + pub fn new() -> XxHash { + XxHash {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = XxHash {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("XxHash"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for XxHash { + fn default() -> Self { + XxHash{} + } +} + +// +// BloomFilterHash +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterHash { + XXHASH(XxHash), +} + +impl BloomFilterHash { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = XxHash::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterHash::XXHASH(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterHash" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterHash" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterHash"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterHash::XXHASH(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("XXHASH", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// Uncompressed +// + +/// The compression used in the Bloom filter. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct Uncompressed { +} + +impl Uncompressed { + pub fn new() -> Uncompressed { + Uncompressed {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = Uncompressed {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("Uncompressed"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for Uncompressed { + fn default() -> Self { + Uncompressed{} + } +} + +// +// BloomFilterCompression +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum BloomFilterCompression { + UNCOMPRESSED(Uncompressed), +} + +impl BloomFilterCompression { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Uncompressed::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(BloomFilterCompression::UNCOMPRESSED(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote BloomFilterCompression" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote BloomFilterCompression" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterCompression"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + BloomFilterCompression::UNCOMPRESSED(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("UNCOMPRESSED", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// BloomFilterHeader +// + +/// Bloom filter header is stored at beginning of Bloom filter data of each column +/// and followed by its bitset. +/// +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct BloomFilterHeader { + /// The size of bitset in bytes * + pub num_bytes: i32, + /// The algorithm for setting bits. * + pub algorithm: BloomFilterAlgorithm, + /// The hash function used for Bloom filter. * + pub hash: BloomFilterHash, + /// The compression used in the Bloom filter * + pub compression: BloomFilterCompression, +} + +impl BloomFilterHeader { + pub fn new(num_bytes: i32, algorithm: BloomFilterAlgorithm, hash: BloomFilterHash, compression: BloomFilterCompression) -> BloomFilterHeader { + BloomFilterHeader { + num_bytes: num_bytes, + algorithm: algorithm, + hash: hash, + compression: compression, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = BloomFilterAlgorithm::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = BloomFilterHash::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = BloomFilterCompression::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("BloomFilterHeader.num_bytes", &f_1)?; + verify_required_field_exists("BloomFilterHeader.algorithm", &f_2)?; + verify_required_field_exists("BloomFilterHeader.hash", &f_3)?; + verify_required_field_exists("BloomFilterHeader.compression", &f_4)?; + let ret = BloomFilterHeader { + num_bytes: f_1.expect("auto-generated code should have checked for presence of required fields"), + algorithm: f_2.expect("auto-generated code should have checked for presence of required fields"), + hash: f_3.expect("auto-generated code should have checked for presence of required fields"), + compression: f_4.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("BloomFilterHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("numBytes", TType::I32, 1))?; + o_prot.write_i32(self.num_bytes)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("algorithm", TType::Struct, 2))?; + self.algorithm.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("hash", TType::Struct, 3))?; + self.hash.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compression", TType::Struct, 4))?; + self.compression.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageHeader +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageHeader { + /// the type of the page: indicates which of the *_header fields is set * + pub type_: PageType, + /// Uncompressed page size in bytes (not including this header) * + pub uncompressed_page_size: i32, + /// Compressed (and potentially encrypted) page size in bytes, not including this header * + pub compressed_page_size: i32, + /// The 32bit CRC for the page, to be be calculated as follows: + /// - Using the standard CRC32 algorithm + /// - On the data only, i.e. this header should not be included. 'Data' + /// hereby refers to the concatenation of the repetition levels, the + /// definition levels and the column value, in this exact order. + /// - On the encoded versions of the repetition levels, definition levels and + /// column values + /// - On the compressed versions of the repetition levels, definition levels + /// and column values where possible; + /// - For v1 data pages, the repetition levels, definition levels and column + /// values are always compressed together. If a compression scheme is + /// specified, the CRC shall be calculated on the compressed version of + /// this concatenation. If no compression scheme is specified, the CRC + /// shall be calculated on the uncompressed version of this concatenation. + /// - For v2 data pages, the repetition levels and definition levels are + /// handled separately from the data and are never compressed (only + /// encoded). If a compression scheme is specified, the CRC shall be + /// calculated on the concatenation of the uncompressed repetition levels, + /// uncompressed definition levels and the compressed column values. + /// If no compression scheme is specified, the CRC shall be calculated on + /// the uncompressed concatenation. + /// If enabled, this allows for disabling checksumming in HDFS if only a few + /// pages need to be read. + /// + pub crc: Option, + pub data_page_header: Option, + pub index_page_header: Option, + pub dictionary_page_header: Option, + pub data_page_header_v2: Option, +} + +impl PageHeader { + pub fn new(type_: PageType, uncompressed_page_size: i32, compressed_page_size: i32, crc: F4, data_page_header: F5, index_page_header: F6, dictionary_page_header: F7, data_page_header_v2: F8) -> PageHeader where F4: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into> { + PageHeader { + type_: type_, + uncompressed_page_size: uncompressed_page_size, + compressed_page_size: compressed_page_size, + crc: crc.into(), + data_page_header: data_page_header.into(), + index_page_header: index_page_header.into(), + dictionary_page_header: dictionary_page_header.into(), + data_page_header_v2: data_page_header_v2.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = PageType::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i32()?; + f_4 = Some(val); + }, + 5 => { + let val = DataPageHeader::read_from_in_protocol(i_prot)?; + f_5 = Some(val); + }, + 6 => { + let val = IndexPageHeader::read_from_in_protocol(i_prot)?; + f_6 = Some(val); + }, + 7 => { + let val = DictionaryPageHeader::read_from_in_protocol(i_prot)?; + f_7 = Some(val); + }, + 8 => { + let val = DataPageHeaderV2::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageHeader.type_", &f_1)?; + verify_required_field_exists("PageHeader.uncompressed_page_size", &f_2)?; + verify_required_field_exists("PageHeader.compressed_page_size", &f_3)?; + let ret = PageHeader { + type_: f_1.expect("auto-generated code should have checked for presence of required fields"), + uncompressed_page_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + compressed_page_size: f_3.expect("auto-generated code should have checked for presence of required fields"), + crc: f_4, + data_page_header: f_5, + index_page_header: f_6, + dictionary_page_header: f_7, + data_page_header_v2: f_8, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageHeader"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + self.type_.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("uncompressed_page_size", TType::I32, 2))?; + o_prot.write_i32(self.uncompressed_page_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compressed_page_size", TType::I32, 3))?; + o_prot.write_i32(self.compressed_page_size)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.crc { + o_prot.write_field_begin(&TFieldIdentifier::new("crc", TType::I32, 4))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.data_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_header", TType::Struct, 5))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.index_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("index_page_header", TType::Struct, 6))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.dictionary_page_header { + o_prot.write_field_begin(&TFieldIdentifier::new("dictionary_page_header", TType::Struct, 7))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.data_page_header_v2 { + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_header_v2", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// KeyValue +// + +/// Wrapper struct to store key values +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct KeyValue { + pub key: String, + pub value: Option, +} + +impl KeyValue { + pub fn new(key: String, value: F2) -> KeyValue where F2: Into> { + KeyValue { + key: key, + value: value.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_string()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_string()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("KeyValue.key", &f_1)?; + let ret = KeyValue { + key: f_1.expect("auto-generated code should have checked for presence of required fields"), + value: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("KeyValue"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("key", TType::String, 1))?; + o_prot.write_string(&self.key)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.value { + o_prot.write_field_begin(&TFieldIdentifier::new("value", TType::String, 2))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// SortingColumn +// + +/// Wrapper struct to specify sort order +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct SortingColumn { + /// The column index (in this row group) * + pub column_idx: i32, + /// If true, indicates this column is sorted in descending order. * + pub descending: bool, + /// If true, nulls will come before non-null values, otherwise, + /// nulls go at the end. + pub nulls_first: bool, +} + +impl SortingColumn { + pub fn new(column_idx: i32, descending: bool, nulls_first: bool) -> SortingColumn { + SortingColumn { + column_idx: column_idx, + descending: descending, + nulls_first: nulls_first, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bool()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("SortingColumn.column_idx", &f_1)?; + verify_required_field_exists("SortingColumn.descending", &f_2)?; + verify_required_field_exists("SortingColumn.nulls_first", &f_3)?; + let ret = SortingColumn { + column_idx: f_1.expect("auto-generated code should have checked for presence of required fields"), + descending: f_2.expect("auto-generated code should have checked for presence of required fields"), + nulls_first: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("SortingColumn"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("column_idx", TType::I32, 1))?; + o_prot.write_i32(self.column_idx)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("descending", TType::Bool, 2))?; + o_prot.write_bool(self.descending)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("nulls_first", TType::Bool, 3))?; + o_prot.write_bool(self.nulls_first)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageEncodingStats +// + +/// statistics of a given page type and encoding +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageEncodingStats { + /// the page type (data/dic/...) * + pub page_type: PageType, + /// encoding of the page * + pub encoding: Encoding, + /// number of pages of this type with this encoding * + pub count: i32, +} + +impl PageEncodingStats { + pub fn new(page_type: PageType, encoding: Encoding, count: i32) -> PageEncodingStats { + PageEncodingStats { + page_type: page_type, + encoding: encoding, + count: count, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = PageType::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = Encoding::read_from_in_protocol(i_prot)?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i32()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageEncodingStats.page_type", &f_1)?; + verify_required_field_exists("PageEncodingStats.encoding", &f_2)?; + verify_required_field_exists("PageEncodingStats.count", &f_3)?; + let ret = PageEncodingStats { + page_type: f_1.expect("auto-generated code should have checked for presence of required fields"), + encoding: f_2.expect("auto-generated code should have checked for presence of required fields"), + count: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageEncodingStats"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("page_type", TType::I32, 1))?; + self.page_type.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encoding", TType::I32, 2))?; + self.encoding.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("count", TType::I32, 3))?; + o_prot.write_i32(self.count)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnMetaData +// + +/// Description for column metadata +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnMetaData { + /// Type of this column * + pub type_: Type, + /// Set of all encodings used for this column. The purpose is to validate + /// whether we can decode those pages. * + pub encodings: Vec, + /// Path in schema * + pub path_in_schema: Vec, + /// Compression codec * + pub codec: CompressionCodec, + /// Number of values in this column * + pub num_values: i64, + /// total byte size of all uncompressed pages in this column chunk (including the headers) * + pub total_uncompressed_size: i64, + /// total byte size of all compressed, and potentially encrypted, pages + /// in this column chunk (including the headers) * + pub total_compressed_size: i64, + /// Optional key/value metadata * + pub key_value_metadata: Option>, + /// Byte offset from beginning of file to first data page * + pub data_page_offset: i64, + /// Byte offset from beginning of file to root index page * + pub index_page_offset: Option, + /// Byte offset from the beginning of file to first (only) dictionary page * + pub dictionary_page_offset: Option, + /// optional statistics for this column chunk + pub statistics: Option, + /// Set of all encodings used for pages in this column chunk. + /// This information can be used to determine if all data pages are + /// dictionary encoded for example * + pub encoding_stats: Option>, + /// Byte offset from beginning of file to Bloom filter data. * + pub bloom_filter_offset: Option, +} + +impl ColumnMetaData { + pub fn new(type_: Type, encodings: Vec, path_in_schema: Vec, codec: CompressionCodec, num_values: i64, total_uncompressed_size: i64, total_compressed_size: i64, key_value_metadata: F8, data_page_offset: i64, index_page_offset: F10, dictionary_page_offset: F11, statistics: F12, encoding_stats: F13, bloom_filter_offset: F14) -> ColumnMetaData where F8: Into>>, F10: Into>, F11: Into>, F12: Into>, F13: Into>>, F14: Into> { + ColumnMetaData { + type_: type_, + encodings: encodings, + path_in_schema: path_in_schema, + codec: codec, + num_values: num_values, + total_uncompressed_size: total_uncompressed_size, + total_compressed_size: total_compressed_size, + key_value_metadata: key_value_metadata.into(), + data_page_offset: data_page_offset, + index_page_offset: index_page_offset.into(), + dictionary_page_offset: dictionary_page_offset.into(), + statistics: statistics.into(), + encoding_stats: encoding_stats.into(), + bloom_filter_offset: bloom_filter_offset.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + let mut f_3: Option> = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option> = None; + let mut f_9: Option = None; + let mut f_10: Option = None; + let mut f_11: Option = None; + let mut f_12: Option = None; + let mut f_13: Option> = None; + let mut f_14: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = Type::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_0 = Encoding::read_from_in_protocol(i_prot)?; + val.push(list_elem_0); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_1 = i_prot.read_string()?; + val.push(list_elem_1); + } + i_prot.read_list_end()?; + f_3 = Some(val); + }, + 4 => { + let val = CompressionCodec::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i64()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i64()?; + f_7 = Some(val); + }, + 8 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_2 = KeyValue::read_from_in_protocol(i_prot)?; + val.push(list_elem_2); + } + i_prot.read_list_end()?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_i64()?; + f_9 = Some(val); + }, + 10 => { + let val = i_prot.read_i64()?; + f_10 = Some(val); + }, + 11 => { + let val = i_prot.read_i64()?; + f_11 = Some(val); + }, + 12 => { + let val = Statistics::read_from_in_protocol(i_prot)?; + f_12 = Some(val); + }, + 13 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_3 = PageEncodingStats::read_from_in_protocol(i_prot)?; + val.push(list_elem_3); + } + i_prot.read_list_end()?; + f_13 = Some(val); + }, + 14 => { + let val = i_prot.read_i64()?; + f_14 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnMetaData.type_", &f_1)?; + verify_required_field_exists("ColumnMetaData.encodings", &f_2)?; + verify_required_field_exists("ColumnMetaData.path_in_schema", &f_3)?; + verify_required_field_exists("ColumnMetaData.codec", &f_4)?; + verify_required_field_exists("ColumnMetaData.num_values", &f_5)?; + verify_required_field_exists("ColumnMetaData.total_uncompressed_size", &f_6)?; + verify_required_field_exists("ColumnMetaData.total_compressed_size", &f_7)?; + verify_required_field_exists("ColumnMetaData.data_page_offset", &f_9)?; + let ret = ColumnMetaData { + type_: f_1.expect("auto-generated code should have checked for presence of required fields"), + encodings: f_2.expect("auto-generated code should have checked for presence of required fields"), + path_in_schema: f_3.expect("auto-generated code should have checked for presence of required fields"), + codec: f_4.expect("auto-generated code should have checked for presence of required fields"), + num_values: f_5.expect("auto-generated code should have checked for presence of required fields"), + total_uncompressed_size: f_6.expect("auto-generated code should have checked for presence of required fields"), + total_compressed_size: f_7.expect("auto-generated code should have checked for presence of required fields"), + key_value_metadata: f_8, + data_page_offset: f_9.expect("auto-generated code should have checked for presence of required fields"), + index_page_offset: f_10, + dictionary_page_offset: f_11, + statistics: f_12, + encoding_stats: f_13, + bloom_filter_offset: f_14, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("type", TType::I32, 1))?; + self.type_.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("encodings", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::I32, self.encodings.len() as i32))?; + for e in &self.encodings { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("path_in_schema", TType::List, 3))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.path_in_schema.len() as i32))?; + for e in &self.path_in_schema { + o_prot.write_string(e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("codec", TType::I32, 4))?; + self.codec.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_values", TType::I64, 5))?; + o_prot.write_i64(self.num_values)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_uncompressed_size", TType::I64, 6))?; + o_prot.write_i64(self.total_uncompressed_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_compressed_size", TType::I64, 7))?; + o_prot.write_i64(self.total_compressed_size)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_value_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_value_metadata", TType::List, 8))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_begin(&TFieldIdentifier::new("data_page_offset", TType::I64, 9))?; + o_prot.write_i64(self.data_page_offset)?; + o_prot.write_field_end()?; + if let Some(fld_var) = self.index_page_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("index_page_offset", TType::I64, 10))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.dictionary_page_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("dictionary_page_offset", TType::I64, 11))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.statistics { + o_prot.write_field_begin(&TFieldIdentifier::new("statistics", TType::Struct, 12))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.encoding_stats { + o_prot.write_field_begin(&TFieldIdentifier::new("encoding_stats", TType::List, 13))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.bloom_filter_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("bloom_filter_offset", TType::I64, 14))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// EncryptionWithFooterKey +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EncryptionWithFooterKey { +} + +impl EncryptionWithFooterKey { + pub fn new() -> EncryptionWithFooterKey { + EncryptionWithFooterKey {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = EncryptionWithFooterKey {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionWithFooterKey"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for EncryptionWithFooterKey { + fn default() -> Self { + EncryptionWithFooterKey{} + } +} + +// +// EncryptionWithColumnKey +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct EncryptionWithColumnKey { + /// Column path in schema * + pub path_in_schema: Vec, + /// Retrieval metadata of column encryption key * + pub key_metadata: Option>, +} + +impl EncryptionWithColumnKey { + pub fn new(path_in_schema: Vec, key_metadata: F2) -> EncryptionWithColumnKey where F2: Into>> { + EncryptionWithColumnKey { + path_in_schema: path_in_schema, + key_metadata: key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_4 = i_prot.read_string()?; + val.push(list_elem_4); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("EncryptionWithColumnKey.path_in_schema", &f_1)?; + let ret = EncryptionWithColumnKey { + path_in_schema: f_1.expect("auto-generated code should have checked for presence of required fields"), + key_metadata: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionWithColumnKey"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("path_in_schema", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.path_in_schema.len() as i32))?; + for e in &self.path_in_schema { + o_prot.write_string(e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_metadata", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnCryptoMetaData +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ColumnCryptoMetaData { + ENCRYPTIONWITHFOOTERKEY(EncryptionWithFooterKey), + ENCRYPTIONWITHCOLUMNKEY(EncryptionWithColumnKey), +} + +impl ColumnCryptoMetaData { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = EncryptionWithFooterKey::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(val)); + } + received_field_count += 1; + }, + 2 => { + let val = EncryptionWithColumnKey::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote ColumnCryptoMetaData" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote ColumnCryptoMetaData" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnCryptoMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + ColumnCryptoMetaData::ENCRYPTIONWITHFOOTERKEY(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENCRYPTION_WITH_FOOTER_KEY", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + ColumnCryptoMetaData::ENCRYPTIONWITHCOLUMNKEY(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("ENCRYPTION_WITH_COLUMN_KEY", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnChunk +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnChunk { + /// File where column data is stored. If not set, assumed to be same file as + /// metadata. This path is relative to the current file. + /// + pub file_path: Option, + /// Byte offset in file_path to the ColumnMetaData * + pub file_offset: i64, + /// Column metadata for this chunk. This is the same content as what is at + /// file_path/file_offset. Having it here has it replicated in the file + /// metadata. + /// + pub meta_data: Option, + /// File offset of ColumnChunk's OffsetIndex * + pub offset_index_offset: Option, + /// Size of ColumnChunk's OffsetIndex, in bytes * + pub offset_index_length: Option, + /// File offset of ColumnChunk's ColumnIndex * + pub column_index_offset: Option, + /// Size of ColumnChunk's ColumnIndex, in bytes * + pub column_index_length: Option, + /// Crypto metadata of encrypted columns * + pub crypto_metadata: Option, + /// Encrypted column metadata for this chunk * + pub encrypted_column_metadata: Option>, +} + +impl ColumnChunk { + pub fn new(file_path: F1, file_offset: i64, meta_data: F3, offset_index_offset: F4, offset_index_length: F5, column_index_offset: F6, column_index_length: F7, crypto_metadata: F8, encrypted_column_metadata: F9) -> ColumnChunk where F1: Into>, F3: Into>, F4: Into>, F5: Into>, F6: Into>, F7: Into>, F8: Into>, F9: Into>> { + ColumnChunk { + file_path: file_path.into(), + file_offset: file_offset, + meta_data: meta_data.into(), + offset_index_offset: offset_index_offset.into(), + offset_index_length: offset_index_length.into(), + column_index_offset: column_index_offset.into(), + column_index_length: column_index_length.into(), + crypto_metadata: crypto_metadata.into(), + encrypted_column_metadata: encrypted_column_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + let mut f_8: Option = None; + let mut f_9: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_string()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i64()?; + f_2 = Some(val); + }, + 3 => { + let val = ColumnMetaData::read_from_in_protocol(i_prot)?; + f_3 = Some(val); + }, + 4 => { + let val = i_prot.read_i64()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i32()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i32()?; + f_7 = Some(val); + }, + 8 => { + let val = ColumnCryptoMetaData::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_bytes()?; + f_9 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnChunk.file_offset", &f_2)?; + let ret = ColumnChunk { + file_path: f_1, + file_offset: f_2.expect("auto-generated code should have checked for presence of required fields"), + meta_data: f_3, + offset_index_offset: f_4, + offset_index_length: f_5, + column_index_offset: f_6, + column_index_length: f_7, + crypto_metadata: f_8, + encrypted_column_metadata: f_9, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnChunk"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.file_path { + o_prot.write_field_begin(&TFieldIdentifier::new("file_path", TType::String, 1))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_begin(&TFieldIdentifier::new("file_offset", TType::I64, 2))?; + o_prot.write_i64(self.file_offset)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.meta_data { + o_prot.write_field_begin(&TFieldIdentifier::new("meta_data", TType::Struct, 3))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.offset_index_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("offset_index_offset", TType::I64, 4))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.offset_index_length { + o_prot.write_field_begin(&TFieldIdentifier::new("offset_index_length", TType::I32, 5))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.column_index_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("column_index_offset", TType::I64, 6))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.column_index_length { + o_prot.write_field_begin(&TFieldIdentifier::new("column_index_length", TType::I32, 7))?; + o_prot.write_i32(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.crypto_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("crypto_metadata", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.encrypted_column_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("encrypted_column_metadata", TType::String, 9))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// RowGroup +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct RowGroup { + /// Metadata for each column chunk in this row group. + /// This list must have the same order as the SchemaElement list in FileMetaData. + /// + pub columns: Vec, + /// Total byte size of all the uncompressed column data in this row group * + pub total_byte_size: i64, + /// Number of rows in this row group * + pub num_rows: i64, + /// If set, specifies a sort ordering of the rows in this RowGroup. + /// The sorting columns can be a subset of all the columns. + pub sorting_columns: Option>, + /// Byte offset from beginning of file to first page (data or dictionary) + /// in this row group * + pub file_offset: Option, + /// Total byte size of all compressed (and potentially encrypted) column data + /// in this row group * + pub total_compressed_size: Option, + /// Row group ordinal in the file * + pub ordinal: Option, +} + +impl RowGroup { + pub fn new(columns: Vec, total_byte_size: i64, num_rows: i64, sorting_columns: F4, file_offset: F5, total_compressed_size: F6, ordinal: F7) -> RowGroup where F4: Into>>, F5: Into>, F6: Into>, F7: Into> { + RowGroup { + columns: columns, + total_byte_size: total_byte_size, + num_rows: num_rows, + sorting_columns: sorting_columns.into(), + file_offset: file_offset.into(), + total_compressed_size: total_compressed_size.into(), + ordinal: ordinal.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + let mut f_4: Option> = None; + let mut f_5: Option = None; + let mut f_6: Option = None; + let mut f_7: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_5 = ColumnChunk::read_from_in_protocol(i_prot)?; + val.push(list_elem_5); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i64()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_6 = SortingColumn::read_from_in_protocol(i_prot)?; + val.push(list_elem_6); + } + i_prot.read_list_end()?; + f_4 = Some(val); + }, + 5 => { + let val = i_prot.read_i64()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_i64()?; + f_6 = Some(val); + }, + 7 => { + let val = i_prot.read_i16()?; + f_7 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("RowGroup.columns", &f_1)?; + verify_required_field_exists("RowGroup.total_byte_size", &f_2)?; + verify_required_field_exists("RowGroup.num_rows", &f_3)?; + let ret = RowGroup { + columns: f_1.expect("auto-generated code should have checked for presence of required fields"), + total_byte_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + sorting_columns: f_4, + file_offset: f_5, + total_compressed_size: f_6, + ordinal: f_7, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("RowGroup"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("columns", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.columns.len() as i32))?; + for e in &self.columns { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("total_byte_size", TType::I64, 2))?; + o_prot.write_i64(self.total_byte_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I64, 3))?; + o_prot.write_i64(self.num_rows)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.sorting_columns { + o_prot.write_field_begin(&TFieldIdentifier::new("sorting_columns", TType::List, 4))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.file_offset { + o_prot.write_field_begin(&TFieldIdentifier::new("file_offset", TType::I64, 5))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.total_compressed_size { + o_prot.write_field_begin(&TFieldIdentifier::new("total_compressed_size", TType::I64, 6))?; + o_prot.write_i64(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.ordinal { + o_prot.write_field_begin(&TFieldIdentifier::new("ordinal", TType::I16, 7))?; + o_prot.write_i16(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// TypeDefinedOrder +// + +/// Empty struct to signal the order defined by the physical or logical type +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct TypeDefinedOrder { +} + +impl TypeDefinedOrder { + pub fn new() -> TypeDefinedOrder { + TypeDefinedOrder {} + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = TypeDefinedOrder {}; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("TypeDefinedOrder"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for TypeDefinedOrder { + fn default() -> Self { + TypeDefinedOrder{} + } +} + +// +// ColumnOrder +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum ColumnOrder { + TYPEORDER(TypeDefinedOrder), +} + +impl ColumnOrder { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = TypeDefinedOrder::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(ColumnOrder::TYPEORDER(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote ColumnOrder" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote ColumnOrder" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnOrder"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + ColumnOrder::TYPEORDER(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("TYPE_ORDER", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// PageLocation +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct PageLocation { + /// Offset of the page in the file * + pub offset: i64, + /// Size of the page, including header. Sum of compressed_page_size and header + /// length + pub compressed_page_size: i32, + /// Index within the RowGroup of the first row of the page; this means pages + /// change on record boundaries (r = 0). + pub first_row_index: i64, +} + +impl PageLocation { + pub fn new(offset: i64, compressed_page_size: i32, first_row_index: i64) -> PageLocation { + PageLocation { + offset: offset, + compressed_page_size: compressed_page_size, + first_row_index: first_row_index, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i64()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_i32()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("PageLocation.offset", &f_1)?; + verify_required_field_exists("PageLocation.compressed_page_size", &f_2)?; + verify_required_field_exists("PageLocation.first_row_index", &f_3)?; + let ret = PageLocation { + offset: f_1.expect("auto-generated code should have checked for presence of required fields"), + compressed_page_size: f_2.expect("auto-generated code should have checked for presence of required fields"), + first_row_index: f_3.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("PageLocation"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("offset", TType::I64, 1))?; + o_prot.write_i64(self.offset)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("compressed_page_size", TType::I32, 2))?; + o_prot.write_i32(self.compressed_page_size)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("first_row_index", TType::I64, 3))?; + o_prot.write_i64(self.first_row_index)?; + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// OffsetIndex +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct OffsetIndex { + /// PageLocations, ordered by increasing PageLocation.offset. It is required + /// that page_locations[i].first_row_index < page_locations[i+1].first_row_index. + pub page_locations: Vec, +} + +impl OffsetIndex { + pub fn new(page_locations: Vec) -> OffsetIndex { + OffsetIndex { + page_locations: page_locations, + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_7 = PageLocation::read_from_in_protocol(i_prot)?; + val.push(list_elem_7); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("OffsetIndex.page_locations", &f_1)?; + let ret = OffsetIndex { + page_locations: f_1.expect("auto-generated code should have checked for presence of required fields"), + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("OffsetIndex"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("page_locations", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.page_locations.len() as i32))?; + for e in &self.page_locations { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// ColumnIndex +// + +/// Description for ColumnIndex. +/// Each [i] refers to the page at OffsetIndex.page_locations[i] +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct ColumnIndex { + /// A list of Boolean values to determine the validity of the corresponding + /// min and max values. If true, a page contains only null values, and writers + /// have to set the corresponding entries in min_values and max_values to + /// byte[0], so that all lists have the same length. If false, the + /// corresponding entries in min_values and max_values must be valid. + pub null_pages: Vec, + /// Two lists containing lower and upper bounds for the values of each page. + /// These may be the actual minimum and maximum values found on a page, but + /// can also be (more compact) values that do not exist on a page. For + /// example, instead of storing ""Blart Versenwald III", a writer may set + /// min_values[i]="B", max_values[i]="C". Such more compact values must still + /// be valid values within the column's logical type. Readers must make sure + /// that list entries are populated before using them by inspecting null_pages. + pub min_values: Vec>, + pub max_values: Vec>, + /// Stores whether both min_values and max_values are orderd and if so, in + /// which direction. This allows readers to perform binary searches in both + /// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even + /// if the lists are ordered. + pub boundary_order: BoundaryOrder, + /// A list containing the number of null values for each page * + pub null_counts: Option>, +} + +impl ColumnIndex { + pub fn new(null_pages: Vec, min_values: Vec>, max_values: Vec>, boundary_order: BoundaryOrder, null_counts: F5) -> ColumnIndex where F5: Into>> { + ColumnIndex { + null_pages: null_pages, + min_values: min_values, + max_values: max_values, + boundary_order: boundary_order, + null_counts: null_counts.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option>> = None; + let mut f_3: Option>> = None; + let mut f_4: Option = None; + let mut f_5: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_8 = i_prot.read_bool()?; + val.push(list_elem_8); + } + i_prot.read_list_end()?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec> = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_9 = i_prot.read_bytes()?; + val.push(list_elem_9); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec> = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_10 = i_prot.read_bytes()?; + val.push(list_elem_10); + } + i_prot.read_list_end()?; + f_3 = Some(val); + }, + 4 => { + let val = BoundaryOrder::read_from_in_protocol(i_prot)?; + f_4 = Some(val); + }, + 5 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_11 = i_prot.read_i64()?; + val.push(list_elem_11); + } + i_prot.read_list_end()?; + f_5 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("ColumnIndex.null_pages", &f_1)?; + verify_required_field_exists("ColumnIndex.min_values", &f_2)?; + verify_required_field_exists("ColumnIndex.max_values", &f_3)?; + verify_required_field_exists("ColumnIndex.boundary_order", &f_4)?; + let ret = ColumnIndex { + null_pages: f_1.expect("auto-generated code should have checked for presence of required fields"), + min_values: f_2.expect("auto-generated code should have checked for presence of required fields"), + max_values: f_3.expect("auto-generated code should have checked for presence of required fields"), + boundary_order: f_4.expect("auto-generated code should have checked for presence of required fields"), + null_counts: f_5, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("ColumnIndex"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("null_pages", TType::List, 1))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Bool, self.null_pages.len() as i32))?; + for e in &self.null_pages { + o_prot.write_bool(*e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("min_values", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.min_values.len() as i32))?; + for e in &self.min_values { + o_prot.write_bytes(e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("max_values", TType::List, 3))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::String, self.max_values.len() as i32))?; + for e in &self.max_values { + o_prot.write_bytes(e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("boundary_order", TType::I32, 4))?; + self.boundary_order.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.null_counts { + o_prot.write_field_begin(&TFieldIdentifier::new("null_counts", TType::List, 5))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::I64, fld_var.len() as i32))?; + for e in fld_var { + o_prot.write_i64(*e)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// AesGcmV1 +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AesGcmV1 { + /// AAD prefix * + pub aad_prefix: Option>, + /// Unique file identifier part of AAD suffix * + pub aad_file_unique: Option>, + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix * + pub supply_aad_prefix: Option, +} + +impl AesGcmV1 { + pub fn new(aad_prefix: F1, aad_file_unique: F2, supply_aad_prefix: F3) -> AesGcmV1 where F1: Into>>, F2: Into>>, F3: Into> { + AesGcmV1 { + aad_prefix: aad_prefix.into(), + aad_file_unique: aad_file_unique.into(), + supply_aad_prefix: supply_aad_prefix.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = AesGcmV1 { + aad_prefix: f_1, + aad_file_unique: f_2, + supply_aad_prefix: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("AesGcmV1"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_prefix", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.aad_file_unique { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_file_unique", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.supply_aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("supply_aad_prefix", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for AesGcmV1 { + fn default() -> Self { + AesGcmV1{ + aad_prefix: Some(Vec::new()), + aad_file_unique: Some(Vec::new()), + supply_aad_prefix: Some(false), + } + } +} + +// +// AesGcmCtrV1 +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct AesGcmCtrV1 { + /// AAD prefix * + pub aad_prefix: Option>, + /// Unique file identifier part of AAD suffix * + pub aad_file_unique: Option>, + /// In files encrypted with AAD prefix without storing it, + /// readers must supply the prefix * + pub supply_aad_prefix: Option, +} + +impl AesGcmCtrV1 { + pub fn new(aad_prefix: F1, aad_file_unique: F2, supply_aad_prefix: F3) -> AesGcmCtrV1 where F1: Into>>, F2: Into>>, F3: Into> { + AesGcmCtrV1 { + aad_prefix: aad_prefix.into(), + aad_file_unique: aad_file_unique.into(), + supply_aad_prefix: supply_aad_prefix.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option> = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_bytes()?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_bool()?; + f_3 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + let ret = AesGcmCtrV1 { + aad_prefix: f_1, + aad_file_unique: f_2, + supply_aad_prefix: f_3, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("AesGcmCtrV1"); + o_prot.write_struct_begin(&struct_ident)?; + if let Some(ref fld_var) = self.aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_prefix", TType::String, 1))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.aad_file_unique { + o_prot.write_field_begin(&TFieldIdentifier::new("aad_file_unique", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(fld_var) = self.supply_aad_prefix { + o_prot.write_field_begin(&TFieldIdentifier::new("supply_aad_prefix", TType::Bool, 3))?; + o_prot.write_bool(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +impl Default for AesGcmCtrV1 { + fn default() -> Self { + AesGcmCtrV1{ + aad_prefix: Some(Vec::new()), + aad_file_unique: Some(Vec::new()), + supply_aad_prefix: Some(false), + } + } +} + +// +// EncryptionAlgorithm +// + +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub enum EncryptionAlgorithm { + AESGCMV1(AesGcmV1), + AESGCMCTRV1(AesGcmCtrV1), +} + +impl EncryptionAlgorithm { + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + let mut ret: Option = None; + let mut received_field_count = 0; + i_prot.read_struct_begin()?; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = AesGcmV1::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(EncryptionAlgorithm::AESGCMV1(val)); + } + received_field_count += 1; + }, + 2 => { + let val = AesGcmCtrV1::read_from_in_protocol(i_prot)?; + if ret.is_none() { + ret = Some(EncryptionAlgorithm::AESGCMCTRV1(val)); + } + received_field_count += 1; + }, + _ => { + i_prot.skip(field_ident.field_type)?; + received_field_count += 1; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + if received_field_count == 0 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received empty union from remote EncryptionAlgorithm" + ) + ) + ) + } else if received_field_count > 1 { + Err( + thrift::Error::Protocol( + ProtocolError::new( + ProtocolErrorKind::InvalidData, + "received multiple fields for union from remote EncryptionAlgorithm" + ) + ) + ) + } else { + Ok(ret.expect("return value should have been constructed")) + } + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("EncryptionAlgorithm"); + o_prot.write_struct_begin(&struct_ident)?; + match *self { + EncryptionAlgorithm::AESGCMV1(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("AES_GCM_V1", TType::Struct, 1))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + EncryptionAlgorithm::AESGCMCTRV1(ref f) => { + o_prot.write_field_begin(&TFieldIdentifier::new("AES_GCM_CTR_V1", TType::Struct, 2))?; + f.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + }, + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// FileMetaData +// + +/// Description for file metadata +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileMetaData { + /// Version of this file * + pub version: i32, + /// Parquet schema for this file. This schema contains metadata for all the columns. + /// The schema is represented as a tree with a single root. The nodes of the tree + /// are flattened to a list by doing a depth-first traversal. + /// The column metadata contains the path in the schema for that column which can be + /// used to map columns to nodes in the schema. + /// The first element is the root * + pub schema: Vec, + /// Number of rows in this file * + pub num_rows: i64, + /// Row groups in this file * + pub row_groups: Vec, + /// Optional key/value metadata * + pub key_value_metadata: Option>, + /// String for application that wrote this file. This should be in the format + /// version (build ). + /// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) + /// + pub created_by: Option, + /// Sort order used for the min_value and max_value fields of each column in + /// this file. Sort orders are listed in the order matching the columns in the + /// schema. The indexes are not necessary the same though, because only leaf + /// nodes of the schema are represented in the list of sort orders. + /// + /// Without column_orders, the meaning of the min_value and max_value fields is + /// undefined. To ensure well-defined behaviour, if min_value and max_value are + /// written to a Parquet file, column_orders must be written as well. + /// + /// The obsolete min and max fields are always sorted by signed comparison + /// regardless of column_orders. + pub column_orders: Option>, + /// Encryption algorithm. This field is set only in encrypted files + /// with plaintext footer. Files with encrypted footer store algorithm id + /// in FileCryptoMetaData structure. + pub encryption_algorithm: Option, + /// Retrieval metadata of key used for signing the footer. + /// Used only in encrypted files with plaintext footer. + pub footer_signing_key_metadata: Option>, +} + +impl FileMetaData { + pub fn new(version: i32, schema: Vec, num_rows: i64, row_groups: Vec, key_value_metadata: F5, created_by: F6, column_orders: F7, encryption_algorithm: F8, footer_signing_key_metadata: F9) -> FileMetaData where F5: Into>>, F6: Into>, F7: Into>>, F8: Into>, F9: Into>> { + FileMetaData { + version: version, + schema: schema, + num_rows: num_rows, + row_groups: row_groups, + key_value_metadata: key_value_metadata.into(), + created_by: created_by.into(), + column_orders: column_orders.into(), + encryption_algorithm: encryption_algorithm.into(), + footer_signing_key_metadata: footer_signing_key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + let mut f_3: Option = None; + let mut f_4: Option> = None; + let mut f_5: Option> = None; + let mut f_6: Option = None; + let mut f_7: Option> = None; + let mut f_8: Option = None; + let mut f_9: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = i_prot.read_i32()?; + f_1 = Some(val); + }, + 2 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_12 = SchemaElement::read_from_in_protocol(i_prot)?; + val.push(list_elem_12); + } + i_prot.read_list_end()?; + f_2 = Some(val); + }, + 3 => { + let val = i_prot.read_i64()?; + f_3 = Some(val); + }, + 4 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_13 = RowGroup::read_from_in_protocol(i_prot)?; + val.push(list_elem_13); + } + i_prot.read_list_end()?; + f_4 = Some(val); + }, + 5 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_14 = KeyValue::read_from_in_protocol(i_prot)?; + val.push(list_elem_14); + } + i_prot.read_list_end()?; + f_5 = Some(val); + }, + 6 => { + let val = i_prot.read_string()?; + f_6 = Some(val); + }, + 7 => { + let list_ident = i_prot.read_list_begin()?; + let mut val: Vec = Vec::with_capacity(list_ident.size as usize); + for _ in 0..list_ident.size { + let list_elem_15 = ColumnOrder::read_from_in_protocol(i_prot)?; + val.push(list_elem_15); + } + i_prot.read_list_end()?; + f_7 = Some(val); + }, + 8 => { + let val = EncryptionAlgorithm::read_from_in_protocol(i_prot)?; + f_8 = Some(val); + }, + 9 => { + let val = i_prot.read_bytes()?; + f_9 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("FileMetaData.version", &f_1)?; + verify_required_field_exists("FileMetaData.schema", &f_2)?; + verify_required_field_exists("FileMetaData.num_rows", &f_3)?; + verify_required_field_exists("FileMetaData.row_groups", &f_4)?; + let ret = FileMetaData { + version: f_1.expect("auto-generated code should have checked for presence of required fields"), + schema: f_2.expect("auto-generated code should have checked for presence of required fields"), + num_rows: f_3.expect("auto-generated code should have checked for presence of required fields"), + row_groups: f_4.expect("auto-generated code should have checked for presence of required fields"), + key_value_metadata: f_5, + created_by: f_6, + column_orders: f_7, + encryption_algorithm: f_8, + footer_signing_key_metadata: f_9, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("FileMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("version", TType::I32, 1))?; + o_prot.write_i32(self.version)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("schema", TType::List, 2))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.schema.len() as i32))?; + for e in &self.schema { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("num_rows", TType::I64, 3))?; + o_prot.write_i64(self.num_rows)?; + o_prot.write_field_end()?; + o_prot.write_field_begin(&TFieldIdentifier::new("row_groups", TType::List, 4))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, self.row_groups.len() as i32))?; + for e in &self.row_groups { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_value_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_value_metadata", TType::List, 5))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.created_by { + o_prot.write_field_begin(&TFieldIdentifier::new("created_by", TType::String, 6))?; + o_prot.write_string(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.column_orders { + o_prot.write_field_begin(&TFieldIdentifier::new("column_orders", TType::List, 7))?; + o_prot.write_list_begin(&TListIdentifier::new(TType::Struct, fld_var.len() as i32))?; + for e in fld_var { + e.write_to_out_protocol(o_prot)?; + o_prot.write_list_end()?; + } + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.encryption_algorithm { + o_prot.write_field_begin(&TFieldIdentifier::new("encryption_algorithm", TType::Struct, 8))?; + fld_var.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + () + } else { + () + } + if let Some(ref fld_var) = self.footer_signing_key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("footer_signing_key_metadata", TType::String, 9))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + +// +// FileCryptoMetaData +// + +/// Crypto metadata for files with encrypted footer * +#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] +pub struct FileCryptoMetaData { + /// Encryption algorithm. This field is only used for files + /// with encrypted footer. Files with plaintext footer store algorithm id + /// inside footer (FileMetaData structure). + pub encryption_algorithm: EncryptionAlgorithm, + /// Retrieval metadata of key used for encryption of footer, + /// and (possibly) columns * + pub key_metadata: Option>, +} + +impl FileCryptoMetaData { + pub fn new(encryption_algorithm: EncryptionAlgorithm, key_metadata: F2) -> FileCryptoMetaData where F2: Into>> { + FileCryptoMetaData { + encryption_algorithm: encryption_algorithm, + key_metadata: key_metadata.into(), + } + } + pub fn read_from_in_protocol(i_prot: &mut dyn TInputProtocol) -> thrift::Result { + i_prot.read_struct_begin()?; + let mut f_1: Option = None; + let mut f_2: Option> = None; + loop { + let field_ident = i_prot.read_field_begin()?; + if field_ident.field_type == TType::Stop { + break; + } + let field_id = field_id(&field_ident)?; + match field_id { + 1 => { + let val = EncryptionAlgorithm::read_from_in_protocol(i_prot)?; + f_1 = Some(val); + }, + 2 => { + let val = i_prot.read_bytes()?; + f_2 = Some(val); + }, + _ => { + i_prot.skip(field_ident.field_type)?; + }, + }; + i_prot.read_field_end()?; + } + i_prot.read_struct_end()?; + verify_required_field_exists("FileCryptoMetaData.encryption_algorithm", &f_1)?; + let ret = FileCryptoMetaData { + encryption_algorithm: f_1.expect("auto-generated code should have checked for presence of required fields"), + key_metadata: f_2, + }; + Ok(ret) + } + pub fn write_to_out_protocol(&self, o_prot: &mut dyn TOutputProtocol) -> thrift::Result<()> { + let struct_ident = TStructIdentifier::new("FileCryptoMetaData"); + o_prot.write_struct_begin(&struct_ident)?; + o_prot.write_field_begin(&TFieldIdentifier::new("encryption_algorithm", TType::Struct, 1))?; + self.encryption_algorithm.write_to_out_protocol(o_prot)?; + o_prot.write_field_end()?; + if let Some(ref fld_var) = self.key_metadata { + o_prot.write_field_begin(&TFieldIdentifier::new("key_metadata", TType::String, 2))?; + o_prot.write_bytes(fld_var)?; + o_prot.write_field_end()?; + () + } else { + () + } + o_prot.write_field_stop()?; + o_prot.write_struct_end() + } +} + diff --git a/amadeus-parquet/src/internal/mod.rs b/amadeus-parquet/src/internal/mod.rs index f754ce55..6a2aa55b 100644 --- a/amadeus-parquet/src/internal/mod.rs +++ b/amadeus-parquet/src/internal/mod.rs @@ -58,3 +58,5 @@ mod encodings; pub mod file; pub mod record; pub mod schema; +#[allow(unused_results, renamed_and_removed_lints)] +mod format; diff --git a/amadeus-parquet/src/internal/record/impls.rs b/amadeus-parquet/src/internal/record/impls.rs index c703c077..19aeb0a5 100644 --- a/amadeus-parquet/src/internal/record/impls.rs +++ b/amadeus-parquet/src/internal/record/impls.rs @@ -1,9 +1,14 @@ use linked_hash_map::LinkedHashMap; use std::{ - collections::HashMap, convert::{TryFrom, TryInto}, fmt, hash::Hash, marker::PhantomData, string::FromUtf8Error, sync::Arc + collections::HashMap, convert::{TryFrom, TryInto}, fmt, hash::{BuildHasher, Hash}, marker::PhantomData, string::FromUtf8Error, sync::Arc }; use sum::{Sum2, Sum3}; +use amadeus_core::util::type_coerce; +use amadeus_types::{ + Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Enum, Group, IpAddr, Json, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage +}; + #[cfg(debug_assertions)] use crate::internal::schema::parser::parse_message_type; use crate::internal::{ @@ -11,15 +16,12 @@ use crate::internal::{ BoolType, ByteArrayType, DoubleType, FixedLenByteArrayType, FloatType, Int32Type, Int64Type, Int96, Int96Type }, errors::{ParquetError, Result}, record::{ display::{DisplayFmt, DisplaySchemaGroup}, reader::{ - BoolReader, BoxFixedLenByteArrayReader, BoxReader, ByteArrayReader, F32Reader, F64Reader, FixedLenByteArrayReader, GroupReader, I32Reader, I64Reader, I96Reader, KeyValueReader, MapReader, OptionReader, RepeatedReader, RootReader, TryIntoReader, TupleReader, ValueReader + BoolReader, BoxFixedLenByteArrayReader, BoxReader, ByteArrayReader, F32Reader, F64Reader, FixedLenByteArrayReader, GroupReader, I32Reader, I64Reader, I96Reader, KeyValueReader, MapReader, OptionReader, RepeatedReader, RootReader, TryIntoReader, TupleReader, ValueReader, VecU8Reader }, schemas::{ - BoolSchema, BoxSchema, BsonSchema, ByteArraySchema, DateSchema, DateTimeSchema, DecimalSchema, EnumSchema, F32Schema, F64Schema, FixedByteArraySchema, GroupSchema, I16Schema, I32Schema, I64Schema, I8Schema, JsonSchema, ListSchema, ListSchemaType, MapSchema, OptionSchema, RootSchema, StringSchema, TimeSchema, TupleSchema, U16Schema, U32Schema, U64Schema, U8Schema, ValueSchema + BoolSchema, BoxSchema, BsonSchema, ByteArraySchema, DateSchema, DateTimeSchema, DecimalSchema, EnumSchema, F32Schema, F64Schema, FixedByteArraySchema, GroupSchema, I16Schema, I32Schema, I64Schema, I8Schema, JsonSchema, ListSchema, ListSchemaType, MapSchema, OptionSchema, RootSchema, StringSchema, TimeSchema, TupleSchema, U16Schema, U32Schema, U64Schema, U8Schema, ValueSchema, VecU8Schema }, triplet::TypedTripletIter, types::{downcast, Downcast, Root}, ParquetData, Reader, Schema }, schema::types::{ColumnPath, Type} }; -use amadeus_types::{ - Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Enum, Group, IpAddr, Json, List, Map, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage -}; //////////////////////////////////////////////////////////////////////////////// @@ -48,28 +50,6 @@ macro_rules! via_string { //////////////////////////////////////////////////////////////////////////////// -impl ParquetData for Vec { - type Schema = ByteArraySchema; - type Reader = ByteArrayReader; - - fn parse(schema: &Type, repetition: Option) -> Result<(String, Self::Schema)> { - Value::parse(schema, repetition).and_then(downcast) - } - - fn reader( - _schema: &Self::Schema, path: &mut Vec, def_level: i16, rep_level: i16, - paths: &mut HashMap, batch_size: usize, - ) -> Self::Reader { - let col_path = ColumnPath::new(path.to_vec()); - let col_reader = paths.remove(&col_path).unwrap(); - ByteArrayReader { - column: TypedTripletIter::::new( - def_level, rep_level, col_reader, batch_size, - ), - } - } -} - impl ParquetData for Bson { type Schema = BsonSchema; type Reader = impl Reader; @@ -83,7 +63,7 @@ impl ParquetData for Bson { paths: &mut HashMap, batch_size: usize, ) -> Self::Reader { MapReader( - Vec::::reader(&schema.0, path, def_level, rep_level, paths, batch_size), + byte_array_reader(&schema.0, path, def_level, rep_level, paths, batch_size), |x| Ok(From::from(x)), ) } @@ -102,7 +82,7 @@ impl ParquetData for String { paths: &mut HashMap, batch_size: usize, ) -> Self::Reader { MapReader( - Vec::::reader(&schema.0, path, def_level, rep_level, paths, batch_size), + byte_array_reader(&schema.0, path, def_level, rep_level, paths, batch_size), |x| { String::from_utf8(x) .map_err(|err: FromUtf8Error| ParquetError::General(err.to_string())) @@ -232,34 +212,21 @@ where default fn parse( schema: &Type, repetition: Option, ) -> Result<(String, Self::Schema)> { - T::parse(schema, repetition) - .map(|(name, schema)| (name, unsafe { known_type(BoxSchema(schema)) })) + T::parse(schema, repetition).map(|(name, schema)| (name, type_coerce(BoxSchema(schema)))) } default fn reader( schema: &Self::Schema, path: &mut Vec, def_level: i16, rep_level: i16, paths: &mut HashMap, batch_size: usize, ) -> Self::Reader { - let schema = unsafe { known_type::<&Self::Schema, &BoxSchema>(schema) }; + let schema = type_coerce::<&Self::Schema, &BoxSchema>(schema); let ret = BoxReader(T::reader( &schema.0, path, def_level, rep_level, paths, batch_size, )); - unsafe { known_type(ret) } + type_coerce(ret) } } -/// This is used until specialization can handle groups of items together -unsafe fn known_type(a: A) -> B { - use std::mem; - assert_eq!( - (mem::size_of::(), mem::align_of::()), - (mem::size_of::(), mem::align_of::()) - ); - let ret = mem::transmute_copy(&a); - mem::forget(a); - ret -} - //////////////////////////////////////////////////////////////////////////////// impl ParquetData for Decimal { @@ -290,7 +257,7 @@ impl ParquetData for Decimal { precision, scale, } => DecimalReader::Array { - reader: >::reader( + reader: byte_array_reader( byte_array_schema, path, def_level, @@ -317,7 +284,7 @@ pub enum DecimalReader { scale: u8, }, Array { - reader: as ParquetData>::Reader, + reader: ByteArrayReader, precision: u32, scale: u32, }, @@ -497,21 +464,22 @@ pub(super) fn parse_list(schema: &Type) -> Result", - ))) + Err(ParquetError::General(String::from("Couldn't parse Vec"))) } -impl ParquetData for List +impl ParquetData for Vec where T: ParquetData, { - type Schema = ListSchema; - type Reader = impl Reader; + default type Schema = ListSchema; + default type Reader = RepeatedReader; - fn parse(schema: &Type, repetition: Option) -> Result<(String, Self::Schema)> { + default fn parse( + schema: &Type, repetition: Option, + ) -> Result<(String, Self::Schema)> { if repetition == Some(Repetition::Required) { - return parse_list::(schema).map(|schema2| (schema.name().to_owned(), schema2)); + return parse_list::(schema) + .map(|schema2| (schema.name().to_owned(), type_coerce(schema2))); } // A repeated field that is neither contained by a `LIST`- or `MAP`-annotated // group nor annotated by `LIST` or `MAP` should be interpreted as a @@ -520,70 +488,114 @@ where if repetition == Some(Repetition::Repeated) { return Ok(( schema.name().to_owned(), - ListSchema( + type_coerce(ListSchema( T::parse(&schema, Some(Repetition::Required))?.1, ListSchemaType::Repeated, - ), + )), )); } - Err(ParquetError::General(String::from( - "Couldn't parse List", - ))) + Err(ParquetError::General(String::from("Couldn't parse Vec"))) } - fn reader( + default fn reader( schema: &Self::Schema, path: &mut Vec, def_level: i16, rep_level: i16, paths: &mut HashMap, batch_size: usize, ) -> Self::Reader { - MapReader( - match schema.1 { - ListSchemaType::List(ref list_name, ref element_name) => { - let list_name = list_name.as_ref().map(|x| &**x).unwrap_or("list"); - let element_name = element_name.as_ref().map(|x| &**x).unwrap_or("element"); - - path.push(list_name.to_owned()); - path.push(element_name.to_owned()); - let reader = T::reader( - &schema.0, - path, - def_level + 1, - rep_level + 1, - paths, - batch_size, - ); - let _ = path.pop().unwrap(); - let _ = path.pop().unwrap(); + let schema: &ListSchema = type_coerce(schema); + type_coerce(list_reader::( + schema, path, def_level, rep_level, paths, batch_size, + )) + } +} - RepeatedReader { reader } - } - ListSchemaType::ListCompat(ref element_name) => { - path.push(element_name.to_owned()); - let reader = T::reader( - &schema.0, - path, - def_level + 1, - rep_level + 1, - paths, - batch_size, - ); - let _ = path.pop().unwrap(); +fn list_reader( + schema: &ListSchema, path: &mut Vec, def_level: i16, rep_level: i16, + paths: &mut HashMap, batch_size: usize, +) -> RepeatedReader +where + T: ParquetData, +{ + match &schema.1 { + ListSchemaType::List(ref list_name, ref element_name) => { + let list_name = list_name.as_ref().map(|x| &**x).unwrap_or("list"); + let element_name = element_name.as_ref().map(|x| &**x).unwrap_or("element"); + + path.push(list_name.to_owned()); + path.push(element_name.to_owned()); + let reader = T::reader( + &schema.0, + path, + def_level + 1, + rep_level + 1, + paths, + batch_size, + ); + let _ = path.pop().unwrap(); + let _ = path.pop().unwrap(); - RepeatedReader { reader } - } - ListSchemaType::Repeated => { - let reader = T::reader( - &schema.0, - path, - def_level + 1, - rep_level + 1, - paths, - batch_size, - ); - RepeatedReader { reader } - } - }, - |x| Ok(From::from(x)), - ) + RepeatedReader { reader } + } + ListSchemaType::ListCompat(ref element_name) => { + path.push(element_name.to_owned()); + let reader = T::reader( + &schema.0, + path, + def_level + 1, + rep_level + 1, + paths, + batch_size, + ); + let _ = path.pop().unwrap(); + + RepeatedReader { reader } + } + ListSchemaType::Repeated => { + let reader = T::reader( + &schema.0, + path, + def_level + 1, + rep_level + 1, + paths, + batch_size, + ); + RepeatedReader { reader } + } + } +} + +fn byte_array_reader( + _schema: &ByteArraySchema, path: &mut Vec, def_level: i16, rep_level: i16, + paths: &mut HashMap, batch_size: usize, +) -> ByteArrayReader { + let col_path = ColumnPath::new(path.to_vec()); + let col_reader = paths.remove(&col_path).unwrap(); + ByteArrayReader { + column: TypedTripletIter::::new( + def_level, rep_level, col_reader, batch_size, + ), + } +} + +impl ParquetData for Vec { + type Schema = VecU8Schema; + type Reader = VecU8Reader; + + fn parse(schema: &Type, repetition: Option) -> Result<(String, Self::Schema)> { + Value::parse(schema, repetition).and_then(downcast) + } + + fn reader( + schema: &Self::Schema, path: &mut Vec, def_level: i16, rep_level: i16, + paths: &mut HashMap, batch_size: usize, + ) -> Self::Reader { + match schema { + VecU8Schema::ByteArray(schema) => VecU8Reader::ByteArray(byte_array_reader( + schema, path, def_level, rep_level, paths, batch_size, + )), + VecU8Schema::List(schema) => VecU8Reader::List(list_reader::( + schema, path, def_level, rep_level, paths, batch_size, + )), + } } } @@ -630,14 +642,15 @@ pub(super) fn parse_map( } } Err(ParquetError::General(String::from( - "Couldn't parse Map", + "Couldn't parse HashMap", ))) } -impl ParquetData for Map +impl ParquetData for HashMap where K: ParquetData + Hash + Eq, V: ParquetData, + S: BuildHasher + Default, { type Schema = MapSchema; type Reader = impl Reader; @@ -647,7 +660,7 @@ where return parse_map::(schema).map(|schema2| (schema.name().to_owned(), schema2)); } Err(ParquetError::General(String::from( - "Couldn't parse Map", + "Couldn't parse HashMap", ))) } @@ -687,7 +700,7 @@ where keys_reader, values_reader, }, - |x: Vec<_>| Ok(From::from(x.into_iter().collect::>())), + |x: Vec<_>| Ok(From::from(x.into_iter().collect::>())), ) } } @@ -1882,11 +1895,9 @@ impl ParquetData for Value { schema, path, def_level, rep_level, paths, batch_size, )) } - ValueSchema::ByteArray(ref schema) => { - ValueReader::ByteArray( as ParquetData>::reader( - schema, path, def_level, rep_level, paths, batch_size, - )) - } + ValueSchema::ByteArray(ref schema) => ValueReader::ByteArray(byte_array_reader( + schema, path, def_level, rep_level, paths, batch_size, + )), ValueSchema::Bson(ref schema) => ValueReader::Bson(::reader( schema, path, def_level, rep_level, paths, batch_size, )), @@ -1902,12 +1913,17 @@ impl ParquetData for Value { schema, path, def_level, rep_level, paths, batch_size, )), ValueSchema::List(ref schema) => { - ValueReader::List(Box::new( as ParquetData>::reader( - schema, path, def_level, rep_level, paths, batch_size, + ValueReader::List(Box::new( as ParquetData>::reader( + type_coerce(&**schema), + path, + def_level, + rep_level, + paths, + batch_size, ))) } ValueSchema::Map(ref schema) => { - ValueReader::Map(Box::new( as ParquetData>::reader( + ValueReader::Map(Box::new( as ParquetData>::reader( schema, path, def_level, rep_level, paths, batch_size, ))) } diff --git a/amadeus-parquet/src/internal/record/reader.rs b/amadeus-parquet/src/internal/record/reader.rs index b13a9cb5..4a70aa2a 100644 --- a/amadeus-parquet/src/internal/record/reader.rs +++ b/amadeus-parquet/src/internal/record/reader.rs @@ -36,9 +36,7 @@ use crate::internal::{ BoolType, ByteArrayType, DoubleType, FixedLenByteArrayType, FloatType, Int32Type, Int64Type, Int96, Int96Type }, errors::{ParquetError, Result}, file::reader::{FileReader, RowGroupReader}, schema::types::{ColumnPath, SchemaDescPtr, SchemaDescriptor, Type} }; -use amadeus_types::{ - Bson, Date, DateTime, Decimal, Enum, Group, Json, List, Map, Time, Value, ValueRequired -}; +use amadeus_types::{Bson, Date, DateTime, Decimal, Enum, Group, Json, Time, Value, ValueRequired}; /// Default batch size for a reader const DEFAULT_BATCH_SIZE: usize = 1024; @@ -155,6 +153,56 @@ impl Reader for FixedLenByteArrayReader { reader_passthrough!(column); } +pub enum VecU8Reader { + ByteArray(ByteArrayReader), + List(RepeatedReader<::Reader>), +} +impl Reader for VecU8Reader { + type Item = Vec; + + #[inline] + fn read(&mut self, def_level: i16, rep_level: i16) -> Result { + match self { + VecU8Reader::ByteArray(byte_array_reader) => { + byte_array_reader.read(def_level, rep_level) + } + VecU8Reader::List(list_reader) => list_reader.read(def_level, rep_level), + } + } + + #[inline] + fn advance_columns(&mut self) -> Result<()> { + match self { + VecU8Reader::ByteArray(byte_array_reader) => byte_array_reader.advance_columns(), + VecU8Reader::List(list_reader) => list_reader.advance_columns(), + } + } + + #[inline] + fn has_next(&self) -> bool { + match self { + VecU8Reader::ByteArray(byte_array_reader) => byte_array_reader.has_next(), + VecU8Reader::List(list_reader) => list_reader.has_next(), + } + } + + #[inline] + fn current_def_level(&self) -> i16 { + match self { + VecU8Reader::ByteArray(byte_array_reader) => byte_array_reader.current_def_level(), + VecU8Reader::List(list_reader) => list_reader.current_def_level(), + } + } + + #[inline] + fn current_rep_level(&self) -> i16 { + match self { + VecU8Reader::ByteArray(byte_array_reader) => byte_array_reader.current_rep_level(), + VecU8Reader::List(list_reader) => list_reader.current_rep_level(), + } + } +} + pub struct BoxFixedLenByteArrayReader { pub(super) column: TypedTripletIter, pub(super) marker: PhantomData, @@ -353,13 +401,13 @@ pub enum ValueReader { Time( for B where - A: DowncastImpl, + A: DowncastFrom, { fn downcast(self) -> Result { - A::downcast_impl(self) + A::downcast_from(self) } } -impl DowncastImpl for Box +impl DowncastFrom for Box where - B: DowncastImpl, + B: DowncastFrom, { - fn downcast_impl(t: A) -> Result + fn downcast_from(t: A) -> Result where Self: Sized, { diff --git a/amadeus-types/src/list.rs b/amadeus-types/src/list.rs deleted file mode 100644 index b3bbf391..00000000 --- a/amadeus-types/src/list.rs +++ /dev/null @@ -1,87 +0,0 @@ -//! Implement [`Record`] for [`List`]. - -use serde::{Deserialize, Serialize}; -use std::{ - fmt::{self, Debug}, ops::Index, slice::{self, SliceIndex}, vec -}; - -// use internal::{ -// basic::{LogicalType, Repetition}, -// column::reader::ColumnReader, -// errors::{ParquetError, Result}, -// record::{ -// reader::{MapReader, RepeatedReader}, -// schemas::{ListSchema, ListSchemaType}, -// Reader, Record, -// }, -// schema::types::{ColumnPath, Type}, -// }; - -/// [`List`](List) corresponds to the [List logical type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists). -#[derive(Clone, Hash, Eq, PartialOrd, Serialize, Deserialize)] -pub struct List(Vec); - -impl List { - pub fn len(&self) -> usize { - self.0.len() - } - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns an iterator over references to the elements of the List. - pub fn iter(&self) -> slice::Iter<'_, T> { - self.0.iter() - } -} -impl IntoIterator for List { - type Item = T; - type IntoIter = vec::IntoIter; - - /// Creates an iterator over the elements of the List. - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} -// impl From for internal::record::types::List { -// fn from(list: List) -> Self { -// unimplemented!() -// } -// } - -impl From> for List { - fn from(vec: Vec) -> Self { - Self(vec) - } -} -impl Into> for List { - fn into(self) -> Vec { - self.0 - } -} -impl PartialEq> for List -where - T: PartialEq, -{ - fn eq(&self, other: &List) -> bool { - self.0 == other.0 - } -} -impl Index for List -where - I: SliceIndex<[T]>, -{ - type Output = >::Output; - - fn index(&self, index: I) -> &Self::Output { - self.0.index(index) - } -} -impl Debug for List -where - T: Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_list().entries(self.iter()).finish() - } -} diff --git a/amadeus-types/src/map.rs b/amadeus-types/src/map.rs deleted file mode 100644 index 0be8f81a..00000000 --- a/amadeus-types/src/map.rs +++ /dev/null @@ -1,121 +0,0 @@ -//! Implement [`Record`] for [`Map`]. - -use serde::{Deserialize, Serialize}; -use std::{ - borrow::Borrow, cmp::Ordering, collections::{hash_map, HashMap}, fmt::{self, Debug}, hash::Hash -}; - -// use internal::{ -// basic::{LogicalType, Repetition}, -// column::reader::ColumnReader, -// errors::{ParquetError, Result}, -// record::{ -// reader::{KeyValueReader, MapReader}, -// schemas::MapSchema, -// Reader, Record, -// }, -// schema::types::{ColumnPath, Type}, -// }; - -/// [`Map`](Map) corresponds to the [Map logical type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#maps). -#[derive(Clone, Eq, Serialize, Deserialize)] -pub struct Map(HashMap); - -impl Map -where - K: Hash + Eq, -{ - pub fn len(&self) -> usize { - self.0.len() - } - pub fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Returns a reference to the value corresponding to the key. - pub fn get(&self, k: &Q) -> Option<&V> - where - K: Borrow, - Q: Hash + Eq, - { - self.0.get(k) - } - - /// Returns an iterator over the `(ref key, ref value)` pairs of the Map. - pub fn iter(&self) -> hash_map::Iter<'_, K, V> { - self.0.iter() - } -} -impl IntoIterator for Map -where - K: Hash + Eq, -{ - type Item = (K, V); - type IntoIter = hash_map::IntoIter; - - /// Creates an iterator over the `(key, value)` pairs of the Map. - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} -// impl From for internal::record::types::Map { -// fn from(map: Map) -> Self { -// unimplemented!() -// } -// } -impl From> for Map -where - K: Hash + Eq, -{ - fn from(hashmap: HashMap) -> Self { - Self(hashmap) - } -} -impl Into> for Map -where - K: Hash + Eq, -{ - fn into(self) -> HashMap { - self.0 - } -} -impl PartialEq> for Map -where - K: Eq + Hash, - V: PartialEq, -{ - fn eq(&self, other: &Map) -> bool { - if self.0.len() != other.0.len() { - return false; - } - - self.0 - .iter() - .all(|(key, value)| other.0.get(key).map_or(false, |v| *value == *v)) - } -} -impl PartialOrd> for Map -where - K: Eq + Hash, - V: PartialOrd, -{ - fn partial_cmp(&self, other: &Map) -> Option { - if self.0.len() != other.0.len() { - return None; - } - None - // TODO - // self.0 - // .iter() - // .all(|(key, value)| other.0.get(key).map_or(false, |v| *value.partial_cmp(*v))) - } -} -impl Debug for Map -where - K: Hash + Eq + Debug, - V: Debug, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_map().entries(self.iter()).finish() - } -} diff --git a/amadeus-types/src/ord.rs b/amadeus-types/src/ord.rs new file mode 100644 index 00000000..0d108b1d --- /dev/null +++ b/amadeus-types/src/ord.rs @@ -0,0 +1,158 @@ +use std::{cmp::Ordering, collections::HashMap, hash::BuildHasher}; + +pub trait AmadeusOrd { + fn amadeus_cmp(&self, other: &Self) -> Ordering; +} + +macro_rules! ord { + ($($t:ty)*) => {$( + impl AmadeusOrd for $t { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } + } + )*} +} +ord!(bool u8 i8 u16 i16 u32 i32 u64 i64 String); + +impl AmadeusOrd for f32 { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp( + &ordered_float::OrderedFloat(*self), + &ordered_float::OrderedFloat(*other), + ) + } +} +impl AmadeusOrd for f64 { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp( + &ordered_float::OrderedFloat(*self), + &ordered_float::OrderedFloat(*other), + ) + } +} + +impl<'a, T> AmadeusOrd for &'a T +where + T: AmadeusOrd + ?Sized, +{ + fn amadeus_cmp(&self, other: &Self) -> Ordering { + (**self).amadeus_cmp(&**other) + } +} + +impl AmadeusOrd for Box +where + T: AmadeusOrd, +{ + fn amadeus_cmp(&self, other: &Self) -> Ordering { + (**self).amadeus_cmp(&**other) + } +} + +/// Sort `None` as larger than any non-`None` value +impl AmadeusOrd for Option +where + T: AmadeusOrd, +{ + fn amadeus_cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Some(a), Some(b)) => a.amadeus_cmp(b), + (None, None) => Ordering::Equal, + (None, Some(_)) => Ordering::Greater, + (Some(_), None) => Ordering::Less, + } + } +} + +impl AmadeusOrd for Vec +where + T: AmadeusOrd, +{ + fn amadeus_cmp(&self, other: &Self) -> Ordering { + for i in 0..self.len().min(other.len()) { + match self[i].amadeus_cmp(&other[i]) { + Ordering::Equal => (), + res => return res, + } + } + self.len().cmp(&other.len()) + } +} + +impl AmadeusOrd for HashMap +where + K: AmadeusOrd, + V: AmadeusOrd, + S: BuildHasher, +{ + fn amadeus_cmp(&self, other: &Self) -> Ordering { + let mut keys: Vec<(&K, &V, bool)> = self + .iter() + .map(|(k, v)| (k, v, false)) + .chain(other.iter().map(|(k, v)| (k, v, true))) + .collect(); + keys.sort_by(|(a, _, _), (b, _, _)| a.amadeus_cmp(b)); + let mut keys = &*keys; + while keys.len() >= 2 { + let ((a_k, a_v, a_r), (b_k, b_v, b_r)) = (keys[0], keys[1]); + if !a_r && b_r { + match a_k.amadeus_cmp(b_k) { + Ordering::Equal => (), + res => return res, + } + match a_v.amadeus_cmp(b_v) { + Ordering::Equal => (), + res => return res, + } + keys = &keys[2..]; + } else if !a_r { + return Ordering::Greater; + } else { + return Ordering::Less; + } + } + if keys.len() == 1 { + if !keys[0].2 { + Ordering::Greater + } else { + Ordering::Less + } + } else { + Ordering::Equal + } + } +} + +macro_rules! ord { + ($($i:tt)*) => {$( + impl AmadeusOrd for [T; $i] + where + T: AmadeusOrd + { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + for i in 0..$i { + match self[i].amadeus_cmp(&other[i]) { + Ordering::Equal => (), + res => return res, + } + } + Ordering::Equal + } + } + )*}; +} +array!(ord); + +macro_rules! ord { + ($len:tt $($t:ident $i:tt)*) => { + impl<$($t,)*> AmadeusOrd for ($($t,)*) where $($t: AmadeusOrd,)* { + #[allow(unused_variables)] + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ordering::Equal + $(.then_with(|| self.$i.amadeus_cmp(&other.$i)))* + } + } + }; +} +tuple!(ord); diff --git a/amadeus-types/src/time.rs b/amadeus-types/src/time.rs index 9e0bd88f..b6fda381 100644 --- a/amadeus-types/src/time.rs +++ b/amadeus-types/src/time.rs @@ -10,6 +10,8 @@ use std::{ cmp::Ordering, convert::TryInto, error::Error, fmt::{self, Display}, str::FromStr }; +use super::AmadeusOrd; + const JULIAN_DAY_OF_EPOCH: i64 = 2_440_588; const GREGORIAN_DAY_OF_EPOCH: i64 = 719_163; @@ -237,7 +239,7 @@ impl Timezone { } impl PartialOrd for Timezone { fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) + Some(Ord::cmp(self, other)) } } impl Ord for Timezone { @@ -250,6 +252,11 @@ impl Ord for Timezone { } } } +impl AmadeusOrd for Timezone { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} impl Display for Timezone { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.as_chrono().fmt(f) @@ -377,6 +384,11 @@ impl Date { ) } } +impl AmadeusOrd for Date { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} /// Corresponds to RFC 3339 and ISO 8601 string `%Y-%m-%d%:z` impl Display for Date { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -444,6 +456,11 @@ impl Time { self.timezone } } +impl AmadeusOrd for Time { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} /// Corresponds to RFC 3339 and ISO 8601 string `%H:%M:%S%.9f%:z` impl Display for Time { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -548,6 +565,11 @@ impl DateTime { ) } } +impl AmadeusOrd for DateTime { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} /// Corresponds to RFC 3339 and ISO 8601 string `%Y-%m-%dT%H:%M:%S%.9f%:z` impl Display for DateTime { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -577,6 +599,11 @@ pub struct Duration { days: i64, nanos: i64, } +impl AmadeusOrd for Duration { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} // Parquet's [Date logical type](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#date) is i32 days from Unix epoch // Postgres https://www.postgresql.org/docs/11/datatype-datetime.html is 4713 BC to 5874897 AD @@ -641,6 +668,11 @@ impl DateWithoutTimezone { NaiveDate::from_num_days_from_ce_opt((self.0 + GREGORIAN_DAY_OF_EPOCH).try_into().ok()?) } } +impl AmadeusOrd for DateWithoutTimezone { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} impl Display for DateWithoutTimezone { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.as_chrono().expect(TODO).fmt(f) @@ -755,6 +787,11 @@ impl TimeWithoutTimezone { // self.0 // } } +impl AmadeusOrd for TimeWithoutTimezone { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} impl Display for TimeWithoutTimezone { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.as_chrono().expect(TODO).fmt(f) @@ -916,6 +953,11 @@ impl DateTimeWithoutTimezone { // ) // } } +impl AmadeusOrd for DateTimeWithoutTimezone { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + Ord::cmp(self, other) + } +} impl Display for DateTimeWithoutTimezone { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { self.as_chrono().expect(TODO).fmt(f) diff --git a/amadeus-types/src/value.rs b/amadeus-types/src/value.rs index 1f0ee695..1edca017 100644 --- a/amadeus-types/src/value.rs +++ b/amadeus-types/src/value.rs @@ -6,11 +6,11 @@ use fxhash::FxBuildHasher; use linked_hash_map::LinkedHashMap; use serde::{Deserialize, Serialize}; use std::{ - cmp::Ordering, collections::HashMap, convert::TryInto, hash::{Hash, Hasher}, sync::Arc + cmp::Ordering, collections::HashMap, convert::TryInto, hash::{BuildHasher, Hash, Hasher}, sync::Arc }; use super::{ - Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastError, DowncastImpl, Enum, Group, IpAddr, Json, List, Map, Time, TimeWithoutTimezone, Timezone, Url, ValueRequired, Webpage + AmadeusOrd, Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastError, DowncastFrom, Enum, Group, IpAddr, Json, Time, TimeWithoutTimezone, Timezone, Url, ValueRequired, Webpage }; #[derive(Clone, PartialEq, Debug)] @@ -34,7 +34,6 @@ pub enum SchemaIncomplete { DateTimeWithoutTimezone, Timezone, Decimal, - ByteArray, Bson, String, Json, @@ -71,7 +70,6 @@ pub enum Schema { DateTimeWithoutTimezone, Timezone, Decimal, - ByteArray, Bson, String, Json, @@ -85,7 +83,7 @@ pub enum Schema { Option(Box), } -/// Represents any valid Parquet value. +/// Represents any valid Amadeus value. #[derive(Clone, PartialEq, Debug, Serialize, Deserialize)] pub enum Value { // Primitive types @@ -129,8 +127,6 @@ pub enum Value { Timezone(Timezone), /// Decimal value. Decimal(Decimal), - /// General binary value. - ByteArray(Vec), /// BSON binary value. Bson(Bson), /// UTF-8 encoded character string. @@ -148,9 +144,9 @@ pub enum Value { // Complex types /// List of elements. - List(List), + List(Vec), /// Map of key-value pairs. - Map(Map), + Map(HashMap), /// Struct, child elements are tuples of field-value pairs. Group(Group), /// Optional element. @@ -186,7 +182,6 @@ mod optional_value { ValueRequired::DateTimeWithoutTimezone(value) => serializer.serialize_some(&value), ValueRequired::Timezone(value) => serializer.serialize_some(&value), ValueRequired::Decimal(value) => serializer.serialize_some(&value), - ValueRequired::ByteArray(value) => serializer.serialize_some(&value), ValueRequired::Bson(value) => serializer.serialize_some(&value), ValueRequired::String(value) => serializer.serialize_some(&value), ValueRequired::Json(value) => serializer.serialize_some(&value), @@ -286,24 +281,20 @@ impl Hash for Value { Self::Decimal(_value) => { 14u8.hash(state); } - Self::ByteArray(value) => { - 15u8.hash(state); - value.hash(state); - } Self::Bson(value) => { - 16u8.hash(state); + 15u8.hash(state); value.hash(state); } Self::String(value) => { - 17u8.hash(state); + 16u8.hash(state); value.hash(state); } Self::Json(value) => { - 18u8.hash(state); + 17u8.hash(state); value.hash(state); } Self::Enum(value) => { - 19u8.hash(state); + 18u8.hash(state); value.hash(state); } Self::Url(value) => { @@ -311,25 +302,25 @@ impl Hash for Value { value.hash(state); } Self::Webpage(value) => { - 19u8.hash(state); + 20u8.hash(state); value.hash(state); } Self::IpAddr(value) => { - 19u8.hash(state); + 21u8.hash(state); value.hash(state); } Self::List(value) => { - 20u8.hash(state); + 22u8.hash(state); value.hash(state); } Self::Map(_value) => { - 21u8.hash(state); + 23u8.hash(state); } Self::Group(_value) => { - 22u8.hash(state); + 24u8.hash(state); } Self::Option(value) => { - 23u8.hash(state); + 25u8.hash(state); value.hash(state); } } @@ -360,7 +351,6 @@ impl PartialOrd for Value { } (Self::Timezone(a), Self::Timezone(b)) => a.partial_cmp(b), (Self::Decimal(a), Self::Decimal(b)) => a.partial_cmp(b), - (Self::ByteArray(a), Self::ByteArray(b)) => a.partial_cmp(b), (Self::Bson(a), Self::Bson(b)) => a.partial_cmp(b), (Self::String(a), Self::String(b)) => a.partial_cmp(b), (Self::Json(a), Self::Json(b)) => a.partial_cmp(b), @@ -376,6 +366,45 @@ impl PartialOrd for Value { } } } +impl AmadeusOrd for Value { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Self::Bool(a), Self::Bool(b)) => a.amadeus_cmp(b), + (Self::U8(a), Self::U8(b)) => a.amadeus_cmp(b), + (Self::I8(a), Self::I8(b)) => a.amadeus_cmp(b), + (Self::U16(a), Self::U16(b)) => a.amadeus_cmp(b), + (Self::I16(a), Self::I16(b)) => a.amadeus_cmp(b), + (Self::U32(a), Self::U32(b)) => a.amadeus_cmp(b), + (Self::I32(a), Self::I32(b)) => a.amadeus_cmp(b), + (Self::U64(a), Self::U64(b)) => a.amadeus_cmp(b), + (Self::I64(a), Self::I64(b)) => a.amadeus_cmp(b), + (Self::F32(a), Self::F32(b)) => a.amadeus_cmp(b), + (Self::F64(a), Self::F64(b)) => a.amadeus_cmp(b), + (Self::Date(a), Self::Date(b)) => a.amadeus_cmp(b), + (Self::DateWithoutTimezone(a), Self::DateWithoutTimezone(b)) => a.amadeus_cmp(b), + (Self::Time(a), Self::Time(b)) => a.amadeus_cmp(b), + (Self::TimeWithoutTimezone(a), Self::TimeWithoutTimezone(b)) => a.amadeus_cmp(b), + (Self::DateTime(a), Self::DateTime(b)) => a.amadeus_cmp(b), + (Self::DateTimeWithoutTimezone(a), Self::DateTimeWithoutTimezone(b)) => { + a.amadeus_cmp(b) + } + (Self::Timezone(a), Self::Timezone(b)) => a.amadeus_cmp(b), + (Self::Decimal(a), Self::Decimal(b)) => a.amadeus_cmp(b), + (Self::Bson(a), Self::Bson(b)) => a.amadeus_cmp(b), + (Self::String(a), Self::String(b)) => a.amadeus_cmp(b), + (Self::Json(a), Self::Json(b)) => a.amadeus_cmp(b), + (Self::Enum(a), Self::Enum(b)) => a.amadeus_cmp(b), + (Self::Url(a), Self::Url(b)) => a.amadeus_cmp(b), + (Self::Webpage(a), Self::Webpage(b)) => a.amadeus_cmp(b), + (Self::IpAddr(a), Self::IpAddr(b)) => a.amadeus_cmp(b), + (Self::List(a), Self::List(b)) => a.amadeus_cmp(b), + (Self::Map(a), Self::Map(b)) => a.amadeus_cmp(b), + (Self::Group(a), Self::Group(b)) => a.amadeus_cmp(b), + (Self::Option(a), Self::Option(b)) => a.amadeus_cmp(b), + _ => unimplemented!(), + } + } +} impl Value { fn type_name(&self) -> &'static str { @@ -399,7 +428,6 @@ impl Value { Self::DateTimeWithoutTimezone(_value) => "date_time_without_timezone", Self::Timezone(_value) => "timezone", Self::Decimal(_value) => "decimal", - Self::ByteArray(_value) => "byte_array", Self::Bson(_value) => "bson", Self::String(_value) => "string", Self::Json(_value) => "json", @@ -1041,39 +1069,6 @@ impl Value { } } - /// Returns true if the `Value` is an ByteArray. Returns false otherwise. - pub fn is_byte_array(&self) -> bool { - if let Self::ByteArray(_) = self { - true - } else { - false - } - } - - /// If the `Value` is an ByteArray, return a reference to it. Returns Err otherwise. - pub fn as_byte_array(&self) -> Result<&Vec, DowncastError> { - if let Self::ByteArray(ret) = self { - Ok(ret) - } else { - Err(DowncastError { - from: self.type_name(), - to: "byte_array", - }) - } - } - - /// If the `Value` is an ByteArray, return it. Returns Err otherwise. - pub fn into_byte_array(self) -> Result, DowncastError> { - if let Self::ByteArray(ret) = self { - Ok(ret) - } else { - Err(DowncastError { - from: self.type_name(), - to: "byte_array", - }) - } - } - /// Returns true if the `Value` is an Bson. Returns false otherwise. pub fn is_bson(&self) -> bool { if let Self::Bson(_) = self { @@ -1315,7 +1310,7 @@ impl Value { } /// If the `Value` is an List, return a reference to it. Returns Err otherwise. - pub fn as_list(&self) -> Result<&List, DowncastError> { + pub fn as_list(&self) -> Result<&Vec, DowncastError> { if let Self::List(ret) = self { Ok(ret) } else { @@ -1327,7 +1322,7 @@ impl Value { } /// If the `Value` is an List, return it. Returns Err otherwise. - pub fn into_list(self) -> Result, DowncastError> { + pub fn into_list(self) -> Result, DowncastError> { if let Self::List(ret) = self { Ok(ret) } else { @@ -1348,7 +1343,7 @@ impl Value { } /// If the `Value` is an Map, return a reference to it. Returns Err otherwise. - pub fn as_map(&self) -> Result<&Map, DowncastError> { + pub fn as_map(&self) -> Result<&HashMap, DowncastError> { if let Self::Map(ret) = self { Ok(ret) } else { @@ -1360,7 +1355,7 @@ impl Value { } /// If the `Value` is an Map, return it. Returns Err otherwise. - pub fn into_map(self) -> Result, DowncastError> { + pub fn into_map(self) -> Result, DowncastError> { if let Self::Map(ret) = self { Ok(ret) } else { @@ -1533,11 +1528,6 @@ impl From for Value { Self::Decimal(value) } } -impl From> for Value { - fn from(value: Vec) -> Self { - Self::ByteArray(value) - } -} impl From for Value { fn from(value: Bson) -> Self { Self::Bson(value) @@ -1573,37 +1563,38 @@ impl From for Value { Self::IpAddr(value) } } -impl From> for Value +impl From> for Value where T: Into, { - default fn from(value: List) -> Self { - Self::List(List::from( - value.into_iter().map(Into::into).collect::>(), - )) + default fn from(value: Vec) -> Self { + Self::List(value.into_iter().map(Into::into).collect::>()) } } -impl From> for Value { - fn from(value: List) -> Self { +#[doc(hidden)] +impl From> for Value { + fn from(value: Vec) -> Self { Self::List(value) } } -impl From> for Value +impl From> for Value where K: Into + Hash + Eq, V: Into, + S: BuildHasher, { - default fn from(value: Map) -> Self { - Self::Map(Map::from( + default fn from(value: HashMap) -> Self { + Self::Map( value .into_iter() .map(|(k, v)| (k.into(), v.into())) - .collect::>(), - )) + .collect(), + ) } } -impl From> for Value { - fn from(value: Map) -> Self { +#[doc(hidden)] +impl From> for Value { + fn from(value: HashMap) -> Self { Self::Map(value) } } @@ -1624,6 +1615,7 @@ where ) } } +#[doc(hidden)] impl From> for Value { fn from(value: Option) -> Self { Self::Option(value.map(|x| >::from(x).unwrap())) @@ -1639,10 +1631,13 @@ where } macro_rules! array_from { ($($i:tt)*) => {$( - impl From<[u8; $i]> for Value { - fn from(value: [u8; $i]) -> Self { - let x: Box<[u8]> = Box::new(value); - let x: Vec = x.into(); + impl From<[T; $i]> for Value + where + T: Into + { + fn from(value: [T; $i]) -> Self { + let x: Box<[T]> = Box::new(value); + let x: Vec = x.into(); x.into() } } @@ -1664,214 +1659,216 @@ tuple!(tuple_from); // Downcast implementations for Value so we can try downcasting it to a specific type if // we know it. -impl DowncastImpl for Value { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Value { + fn downcast_from(self_: Value) -> Result { Ok(self_) } } -impl DowncastImpl for bool { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for bool { + fn downcast_from(self_: Value) -> Result { self_.into_bool() } } -impl DowncastImpl for u8 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for u8 { + fn downcast_from(self_: Value) -> Result { self_.into_u8() } } -impl DowncastImpl for i8 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for i8 { + fn downcast_from(self_: Value) -> Result { self_.into_i8() } } -impl DowncastImpl for u16 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for u16 { + fn downcast_from(self_: Value) -> Result { self_.into_u16() } } -impl DowncastImpl for i16 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for i16 { + fn downcast_from(self_: Value) -> Result { self_.into_i16() } } -impl DowncastImpl for u32 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for u32 { + fn downcast_from(self_: Value) -> Result { self_.into_u32() } } -impl DowncastImpl for i32 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for i32 { + fn downcast_from(self_: Value) -> Result { self_.into_i32() } } -impl DowncastImpl for u64 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for u64 { + fn downcast_from(self_: Value) -> Result { self_.into_u64() } } -impl DowncastImpl for i64 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for i64 { + fn downcast_from(self_: Value) -> Result { self_.into_i64() } } -impl DowncastImpl for f32 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for f32 { + fn downcast_from(self_: Value) -> Result { self_.into_f32() } } -impl DowncastImpl for f64 { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for f64 { + fn downcast_from(self_: Value) -> Result { self_.into_f64() } } -impl DowncastImpl for Date { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Date { + fn downcast_from(self_: Value) -> Result { self_.into_date() } } -impl DowncastImpl for DateWithoutTimezone { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for DateWithoutTimezone { + fn downcast_from(self_: Value) -> Result { self_.into_date_without_timezone() } } -impl DowncastImpl for Time { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Time { + fn downcast_from(self_: Value) -> Result { self_.into_time() } } -impl DowncastImpl for TimeWithoutTimezone { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for TimeWithoutTimezone { + fn downcast_from(self_: Value) -> Result { self_.into_time_without_timezone() } } -impl DowncastImpl for DateTime { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for DateTime { + fn downcast_from(self_: Value) -> Result { self_.into_date_time() } } -impl DowncastImpl for DateTimeWithoutTimezone { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for DateTimeWithoutTimezone { + fn downcast_from(self_: Value) -> Result { self_.into_date_time_without_timezone() } } -impl DowncastImpl for Timezone { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Timezone { + fn downcast_from(self_: Value) -> Result { self_.into_timezone() } } -impl DowncastImpl for Decimal { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Decimal { + fn downcast_from(self_: Value) -> Result { self_.into_decimal() } } -impl DowncastImpl for Vec { - fn downcast_impl(self_: Value) -> Result { - self_.into_byte_array() - } -} -impl DowncastImpl for Bson { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Bson { + fn downcast_from(self_: Value) -> Result { self_.into_bson() } } -impl DowncastImpl for String { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for String { + fn downcast_from(self_: Value) -> Result { self_.into_string() } } -impl DowncastImpl for Json { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Json { + fn downcast_from(self_: Value) -> Result { self_.into_json() } } -impl DowncastImpl for Enum { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Enum { + fn downcast_from(self_: Value) -> Result { self_.into_enum() } } -impl DowncastImpl for Url { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Url { + fn downcast_from(self_: Value) -> Result { self_.into_url() } } -impl DowncastImpl for Webpage<'static> { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Webpage<'static> { + fn downcast_from(self_: Value) -> Result { self_.into_webpage() } } -impl DowncastImpl for IpAddr { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for IpAddr { + fn downcast_from(self_: Value) -> Result { self_.into_ip_addr() } } -impl DowncastImpl for List +impl DowncastFrom for Vec where - T: DowncastImpl, + T: DowncastFrom, { - default fn downcast_impl(self_: Value) -> Result { + default fn downcast_from(self_: Value) -> Result { self_.into_list().and_then(|list| { list.into_iter() .map(Downcast::downcast) .collect::, _>>() - .map(List::from) }) } } -impl DowncastImpl for List { - fn downcast_impl(self_: Value) -> Result { +#[doc(hidden)] +impl DowncastFrom for Vec { + fn downcast_from(self_: Value) -> Result { self_.into_list() } } -impl DowncastImpl for Map +impl DowncastFrom for HashMap where - K: DowncastImpl + Hash + Eq, - V: DowncastImpl, + K: DowncastFrom + Hash + Eq, + V: DowncastFrom, + S: BuildHasher + Default, { - default fn downcast_impl(self_: Value) -> Result { + default fn downcast_from(self_: Value) -> Result { self_.into_map().and_then(|map| { map.into_iter() .map(|(k, v)| Ok((k.downcast()?, v.downcast()?))) - .collect::, _>>() - .map(Map::from) + .collect() }) } } -impl DowncastImpl for Map { - fn downcast_impl(self_: Value) -> Result { +#[doc(hidden)] +#[allow(clippy::implicit_hasher)] +impl DowncastFrom for HashMap { + fn downcast_from(self_: Value) -> Result { self_.into_map() } } -impl DowncastImpl for Group { - fn downcast_impl(self_: Value) -> Result { +impl DowncastFrom for Group { + fn downcast_from(self_: Value) -> Result { self_.into_group() } } -impl DowncastImpl for Option +impl DowncastFrom for Option where - T: DowncastImpl, + T: DowncastFrom, { - default fn downcast_impl(self_: Value) -> Result { + default fn downcast_from(self_: Value) -> Result { match self_.into_option()? { Some(t) => t.downcast().map(Some), None => Ok(None), } } } -impl DowncastImpl for Option { - fn downcast_impl(self_: Value) -> Result { +#[doc(hidden)] +impl DowncastFrom for Option { + fn downcast_from(self_: Value) -> Result { self_.into_option() } } macro_rules! array_downcast { ($($i:tt)*) => {$( - impl DowncastImpl for [u8; $i] { - fn downcast_impl(self_: Value) -> Result { + impl DowncastFrom for [T; $i] + where + T: DowncastFrom + { + fn downcast_from(self_: Value) -> Result { let err = DowncastError { from: self_.type_name(), - to: stringify!([u8; $i]), + to: stringify!([T; $i]), }; - let x: Box<[u8]> = self_.into_byte_array().map_err(|_| err)?.into_boxed_slice(); - (&*x).try_into().map_err(|_| err) + let x: Box<[T]> = >::downcast_from(self_).map_err(|_| err)?.into_boxed_slice(); + let x: Box = x.try_into().map_err(|_| err)?; + Ok(*x) } } )*} @@ -1879,8 +1876,8 @@ macro_rules! array_downcast { array!(array_downcast); macro_rules! tuple_downcast { ($len:tt $($t:ident $i:tt)*) => ( - impl<$($t,)*> DowncastImpl for ($($t,)*) where $($t: DowncastImpl,)* { - fn downcast_impl(self_: Value) -> Result { + impl<$($t,)*> DowncastFrom for ($($t,)*) where $($t: DowncastFrom,)* { + fn downcast_from(self_: Value) -> Result { #[allow(unused_mut, unused_variables)] let mut fields = self_.into_group()?.into_fields().into_iter(); if fields.len() != $len { @@ -2002,13 +1999,13 @@ impl PartialEq for Value { .unwrap_or(false) } } -impl PartialEq> for Value { - fn eq(&self, other: &Vec) -> bool { - self.as_byte_array() - .map(|byte_array| byte_array == other) - .unwrap_or(false) - } -} +// impl PartialEq> for Value { +// fn eq(&self, other: &Vec) -> bool { +// self.as_byte_array() +// .map(|byte_array| byte_array == other) +// .unwrap_or(false) +// } +// } impl PartialEq for Value { fn eq(&self, other: &Bson) -> bool { self.as_bson().map(|bson| bson == other).unwrap_or(false) @@ -2050,20 +2047,21 @@ impl PartialEq for Value { .unwrap_or(false) } } -impl PartialEq> for Value +impl PartialEq> for Value where Value: PartialEq, { - fn eq(&self, other: &List) -> bool { + fn eq(&self, other: &Vec) -> bool { self.as_list().map(|list| list == other).unwrap_or(false) } } -impl PartialEq> for Value +impl PartialEq> for Value where Value: PartialEq + PartialEq, K: Hash + Eq + Clone + Into, + S: BuildHasher, { - fn eq(&self, other: &Map) -> bool { + fn eq(&self, other: &HashMap) -> bool { self.as_map() .map(|map| { if map.len() != other.len() { @@ -2095,7 +2093,43 @@ where fn eq(&self, other: &Option) -> bool { self.as_option() .map(|option| match (&option, other) { - (Some(a), Some(b)) if a.eq(b) => true, + (Some(a), Some(b)) => match a { + ValueRequired::Bool(value) => &Value::Bool(*value) == b, + ValueRequired::U8(value) => &Value::U8(*value) == b, + ValueRequired::I8(value) => &Value::I8(*value) == b, + ValueRequired::U16(value) => &Value::U16(*value) == b, + ValueRequired::I16(value) => &Value::I16(*value) == b, + ValueRequired::U32(value) => &Value::U32(*value) == b, + ValueRequired::I32(value) => &Value::I32(*value) == b, + ValueRequired::U64(value) => &Value::U64(*value) == b, + ValueRequired::I64(value) => &Value::I64(*value) == b, + ValueRequired::F32(value) => &Value::F32(*value) == b, + ValueRequired::F64(value) => &Value::F64(*value) == b, + ValueRequired::Date(value) => &Value::Date(*value) == b, + ValueRequired::DateWithoutTimezone(value) => { + &Value::DateWithoutTimezone(*value) == b + } + ValueRequired::Time(value) => &Value::Time(*value) == b, + ValueRequired::TimeWithoutTimezone(value) => { + &Value::TimeWithoutTimezone(*value) == b + } + ValueRequired::DateTime(value) => &Value::DateTime(*value) == b, + ValueRequired::DateTimeWithoutTimezone(value) => { + &Value::DateTimeWithoutTimezone(*value) == b + } + ValueRequired::Timezone(value) => &Value::Timezone(*value) == b, + ValueRequired::Decimal(value) => &Value::Decimal(value.clone()) == b, + ValueRequired::Bson(value) => &Value::Bson(value.clone()) == b, + ValueRequired::String(value) => &Value::String(value.clone()) == b, + ValueRequired::Json(value) => &Value::Json(value.clone()) == b, + ValueRequired::Enum(value) => &Value::Enum(value.clone()) == b, + ValueRequired::Url(value) => &Value::Url(value.clone()) == b, + ValueRequired::Webpage(value) => &Value::Webpage(value.clone()) == b, + ValueRequired::IpAddr(value) => &Value::IpAddr(*value) == b, + ValueRequired::List(value) => &Value::List(value.clone()) == b, + ValueRequired::Map(value) => &Value::Map(value.clone()) == b, + ValueRequired::Group(value) => &Value::Group(value.clone()) == b, + }, (None, None) => true, _ => false, }) diff --git a/amadeus-types/src/value_required.rs b/amadeus-types/src/value_required.rs index e6fae30e..a2424136 100644 --- a/amadeus-types/src/value_required.rs +++ b/amadeus-types/src/value_required.rs @@ -2,11 +2,11 @@ //! Parquet value. use std::{ - cmp::Ordering, hash::{Hash, Hasher} + cmp::Ordering, collections::HashMap, hash::{Hash, Hasher} }; use super::{ - Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Enum, Group, IpAddr, Json, List, Map, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage + AmadeusOrd, Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Enum, Group, IpAddr, Json, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage }; /// Represents any valid required Parquet value. Exists to avoid [`Value`] being recursive @@ -54,8 +54,6 @@ pub enum ValueRequired { Timezone(Timezone), /// Decimal value. Decimal(Decimal), - /// General binary value. - ByteArray(Vec), /// BSON binary value. Bson(Bson), /// UTF-8 encoded character string. @@ -73,9 +71,9 @@ pub enum ValueRequired { // Complex types /// List of elements. - List(List), + List(Vec), /// Map of key-value pairs. - Map(Map), + Map(HashMap), /// Struct, child elements are tuples of field-value pairs. Group(Group), } @@ -157,24 +155,20 @@ impl Hash for ValueRequired { Self::Decimal(_value) => { 14u8.hash(state); } - Self::ByteArray(value) => { - 15u8.hash(state); - value.hash(state); - } Self::Bson(value) => { - 16u8.hash(state); + 15u8.hash(state); value.hash(state); } Self::String(value) => { - 17u8.hash(state); + 16u8.hash(state); value.hash(state); } Self::Json(value) => { - 18u8.hash(state); + 17u8.hash(state); value.hash(state); } Self::Enum(value) => { - 19u8.hash(state); + 18u8.hash(state); value.hash(state); } Self::Url(value) => { @@ -182,22 +176,22 @@ impl Hash for ValueRequired { value.hash(state); } Self::Webpage(value) => { - 19u8.hash(state); + 20u8.hash(state); value.hash(state); } Self::IpAddr(value) => { - 19u8.hash(state); + 21u8.hash(state); value.hash(state); } Self::List(value) => { - 20u8.hash(state); + 22u8.hash(state); value.hash(state); } Self::Map(_value) => { - 21u8.hash(state); + 23u8.hash(state); } Self::Group(_value) => { - 22u8.hash(state); + 24u8.hash(state); } } } @@ -227,7 +221,6 @@ impl PartialOrd for ValueRequired { } (Self::Timezone(a), Self::Timezone(b)) => a.partial_cmp(b), (Self::Decimal(a), Self::Decimal(b)) => a.partial_cmp(b), - (Self::ByteArray(a), Self::ByteArray(b)) => a.partial_cmp(b), (Self::Bson(a), Self::Bson(b)) => a.partial_cmp(b), (Self::String(a), Self::String(b)) => a.partial_cmp(b), (Self::Json(a), Self::Json(b)) => a.partial_cmp(b), @@ -242,26 +235,43 @@ impl PartialOrd for ValueRequired { } } } - -impl ValueRequired { - pub(crate) fn eq(&self, other: &T) -> bool - where - Value: PartialEq, - { - let self_ = unsafe { std::ptr::read(self) }; - let self_: Value = self_.into(); - let ret = &self_ == other; - std::mem::forget(self_); - ret +impl AmadeusOrd for ValueRequired { + fn amadeus_cmp(&self, other: &Self) -> Ordering { + match (self, other) { + (Self::Bool(a), Self::Bool(b)) => a.amadeus_cmp(b), + (Self::U8(a), Self::U8(b)) => a.amadeus_cmp(b), + (Self::I8(a), Self::I8(b)) => a.amadeus_cmp(b), + (Self::U16(a), Self::U16(b)) => a.amadeus_cmp(b), + (Self::I16(a), Self::I16(b)) => a.amadeus_cmp(b), + (Self::U32(a), Self::U32(b)) => a.amadeus_cmp(b), + (Self::I32(a), Self::I32(b)) => a.amadeus_cmp(b), + (Self::U64(a), Self::U64(b)) => a.amadeus_cmp(b), + (Self::I64(a), Self::I64(b)) => a.amadeus_cmp(b), + (Self::F32(a), Self::F32(b)) => a.amadeus_cmp(b), + (Self::F64(a), Self::F64(b)) => a.amadeus_cmp(b), + (Self::Date(a), Self::Date(b)) => a.amadeus_cmp(b), + (Self::DateWithoutTimezone(a), Self::DateWithoutTimezone(b)) => a.amadeus_cmp(b), + (Self::Time(a), Self::Time(b)) => a.amadeus_cmp(b), + (Self::TimeWithoutTimezone(a), Self::TimeWithoutTimezone(b)) => a.amadeus_cmp(b), + (Self::DateTime(a), Self::DateTime(b)) => a.amadeus_cmp(b), + (Self::DateTimeWithoutTimezone(a), Self::DateTimeWithoutTimezone(b)) => { + a.amadeus_cmp(b) + } + (Self::Timezone(a), Self::Timezone(b)) => a.amadeus_cmp(b), + (Self::Decimal(a), Self::Decimal(b)) => a.amadeus_cmp(b), + (Self::Bson(a), Self::Bson(b)) => a.amadeus_cmp(b), + (Self::String(a), Self::String(b)) => a.amadeus_cmp(b), + (Self::Json(a), Self::Json(b)) => a.amadeus_cmp(b), + (Self::Enum(a), Self::Enum(b)) => a.amadeus_cmp(b), + (Self::Url(a), Self::Url(b)) => a.amadeus_cmp(b), + (Self::Webpage(a), Self::Webpage(b)) => a.amadeus_cmp(b), + (Self::IpAddr(a), Self::IpAddr(b)) => a.amadeus_cmp(b), + (Self::List(a), Self::List(b)) => a.amadeus_cmp(b), + (Self::Map(a), Self::Map(b)) => a.amadeus_cmp(b), + (Self::Group(a), Self::Group(b)) => a.amadeus_cmp(b), + _ => unimplemented!(), + } } - // pub(crate) fn as_value(&self, f: F) -> O - // where - // F: FnOnce(&Value) -> O, - // { - // let self_ = unsafe { std::ptr::read(self) }; - // let self_: ManuallyDrop = ManuallyDrop::new(self_.into()); - // f(&self_) - // } } impl From for Value { @@ -286,7 +296,6 @@ impl From for Value { ValueRequired::DateTimeWithoutTimezone(value) => Self::DateTimeWithoutTimezone(value), ValueRequired::Timezone(value) => Self::Timezone(value), ValueRequired::Decimal(value) => Self::Decimal(value), - ValueRequired::ByteArray(value) => Self::ByteArray(value), ValueRequired::Bson(value) => Self::Bson(value), ValueRequired::String(value) => Self::String(value), ValueRequired::Json(value) => Self::Json(value), @@ -323,7 +332,6 @@ impl From for Option { Value::DateTimeWithoutTimezone(value) => ValueRequired::DateTimeWithoutTimezone(value), Value::Timezone(value) => ValueRequired::Timezone(value), Value::Decimal(value) => ValueRequired::Decimal(value), - Value::ByteArray(value) => ValueRequired::ByteArray(value), Value::Bson(value) => ValueRequired::Bson(value), Value::String(value) => ValueRequired::String(value), Value::Json(value) => ValueRequired::Json(value), @@ -360,7 +368,6 @@ impl From for Option { // ValueRequired::Time(value) => value.serialize(serializer), // ValueRequired::DateTime(value) => value.serialize(serializer), // ValueRequired::Decimal(value) => value.serialize(serializer), -// ValueRequired::ByteArray(value) => value.serialize(serializer), // ValueRequired::Bson(value) => value.serialize(serializer), // ValueRequired::String(value) => value.serialize(serializer), // ValueRequired::Json(value) => value.serialize(serializer), diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5387a07c..3976c0bb 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -14,7 +14,7 @@ jobs: endpoint: alecmocatta default: rust_toolchain: nightly - rust_lint_toolchain: nightly-2019-10-15 + rust_lint_toolchain: nightly-2019-11-01 rust_flags: '' rust_features: 'aws;commoncrawl;parquet;postgres;csv;json;constellation aws commoncrawl parquet postgres csv json' rust_doc_features: 'doc constellation aws commoncrawl postgres csv json' diff --git a/examples/common_crawl.rs b/examples/common_crawl.rs index 502e780e..bea9d9e4 100644 --- a/examples/common_crawl.rs +++ b/examples/common_crawl.rs @@ -36,8 +36,6 @@ // #![warn(clippy::pedantic)] #![allow(where_clauses_object_safety, clippy::all)] -use serde_closure::FnMut; - use amadeus::prelude::*; use constellation::{init, Resources}; use data::Webpage; diff --git a/src/data.rs b/src/data.rs index 872821e6..1d46608d 100644 --- a/src/data.rs +++ b/src/data.rs @@ -1,6 +1,6 @@ use ::serde::{Deserialize, Serialize}; use std::{ - cmp::Ordering, fmt::Debug, hash::{Hash, Hasher} + cmp::Ordering, collections::HashMap, fmt::Debug, hash::{BuildHasher, Hash, Hasher} }; #[cfg(feature = "parquet")] @@ -19,17 +19,16 @@ use std::any::Any as SerdeData; pub use amadeus_derive::Data; pub use amadeus_types::{ - Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastImpl, Enum, Group, IpAddr, Json, List, Map, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage + AmadeusOrd, Bson, Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastFrom, Enum, Group, IpAddr, Json, Time, TimeWithoutTimezone, Timezone, Url, Value, Webpage }; pub trait Data: Clone - + PartialEq - + PartialOrd + + AmadeusOrd + ParquetData + PostgresData + SerdeData - + DowncastImpl + + DowncastFrom + Into + Debug + Send @@ -59,11 +58,12 @@ pub struct CastError; impl Data for Option where T: Data {} impl Data for Box where T: Data {} -impl Data for List where T: Data {} -impl Data for Map +impl Data for Vec where T: Data {} +impl Data for HashMap where K: Hash + Eq + Data, V: Data, + S: BuildHasher + Clone + Default + Send + 'static, { } @@ -72,12 +72,13 @@ macro_rules! impl_data { impl Data for $t {} )*); } -impl_data!(bool u8 i8 u16 i16 u32 i32 u64 i64 f32 f64 String Vec Bson Json Enum Decimal Group Date DateWithoutTimezone Time TimeWithoutTimezone DateTime DateTimeWithoutTimezone Timezone Value Webpage<'static> Url IpAddr); +impl_data!(bool u8 i8 u16 i16 u32 i32 u64 i64 f32 f64 String Bson Json Enum Decimal Group Date DateWithoutTimezone Time TimeWithoutTimezone DateTime DateTimeWithoutTimezone Timezone Value Webpage<'static> Url IpAddr); // Implement Record for common array lengths, copied from arrayvec macro_rules! array { ($($i:tt)*) => {$( impl Data for [u8; $i] {} + // TODO: impl Data for [T; $i] where T: Data {} )*}; } amadeus_types::array!(array); @@ -90,6 +91,7 @@ macro_rules! tuple { amadeus_types::tuple!(tuple); #[cfg(feature = "amadeus-serde")] +#[doc(hidden)] pub mod serde_data { use super::Data; use serde::{Deserializer, Serializer}; diff --git a/src/lib.rs b/src/lib.rs index b9411a2a..c78fb534 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ //! //! This library is very nascent. 3 parts: process pool; sources/sinks (Data/Value); analytics; -#![doc(html_root_url = "https://docs.rs/amadeus/0.1.3")] +#![doc(html_root_url = "https://docs.rs/amadeus/0.1.4")] #![doc( html_logo_url = "https://raw.githubusercontent.com/alecmocatta/amadeus/master/logo.svg?sanitize=true" )] @@ -43,7 +43,8 @@ clippy::module_inception, clippy::unreadable_literal, clippy::default_trait_access, - clippy::match_same_arms + clippy::match_same_arms, + clippy::must_use_candidate )] pub mod data; @@ -59,17 +60,17 @@ pub use crate::{ pub mod prelude { #[cfg(feature = "constellation")] - #[doc(inline)] + #[doc(no_inline)] pub use super::pool::ProcessPool; #[cfg(feature = "aws")] - #[doc(inline)] + #[doc(no_inline)] pub use super::source::aws::{AwsError, AwsRegion, CloudfrontRow, S3Directory, S3File}; - #[doc(inline)] + #[doc(no_inline)] pub use super::{ data, data::{ - Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastImpl, Enum, Group, List, Map, Time, TimeWithoutTimezone, Timezone + Date, DateTime, DateTimeWithoutTimezone, DateWithoutTimezone, Decimal, Downcast, DowncastFrom, Enum, Group, Time, TimeWithoutTimezone, Timezone }, dist_iter::{DistributedIteratorMulti, Identity}, pool::LocalPool, pool::ThreadPool, source::*, Data, DistributedIterator, FromDistributedIterator, FutureExt1, IntoDistributedIterator, IteratorExt, Value }; - #[doc(inline)] - pub use amadeus_core::pool::{LocalPool as _, ProcessPool as _, ThreadPool as _}; + #[doc(no_inline)] + pub use serde_closure::{Fn, FnMut, FnOnce}; } diff --git a/src/pool.rs b/src/pool.rs index 9ce45a05..072a8829 100644 --- a/src/pool.rs +++ b/src/pool.rs @@ -26,6 +26,9 @@ mod process_pool_impls { fn processes(&self) -> usize { ProcessPool::processes(self) } + fn threads(&self) -> usize { + ProcessPool::threads(self) + } fn spawn(&self, work: F) -> Pin> + Send>> where F: FnOnce() -> T + ProcessSend, @@ -37,6 +40,9 @@ mod process_pool_impls { impl Pool for ThreadPool { fn processes(&self) -> usize { + 1 + } + fn threads(&self) -> usize { ThreadPool::threads(self) } fn spawn(&self, work: F) -> Pin> + Send>> @@ -52,6 +58,9 @@ mod process_pool_impls { fn processes(&self) -> usize { 1 } + fn threads(&self) -> usize { + 1 + } fn spawn(&self, work: F) -> Pin> + Send>> where F: FnOnce() -> T + ProcessSend, diff --git a/src/pool/local.rs b/src/pool/local.rs index e45ccbcd..b2b8c1ac 100644 --- a/src/pool/local.rs +++ b/src/pool/local.rs @@ -3,17 +3,15 @@ use std::panic; use super::util::Panicked; #[derive(Copy, Clone, Default, Debug)] -pub struct LocalPool; +pub struct LocalPool(()); impl LocalPool { pub fn new() -> Self { - LocalPool - } - pub fn processes(&self) -> usize { - 1 + LocalPool(()) } pub fn spawn T, T>( &self, work: F, ) -> impl std::future::Future> { + let _self = self; let ret = panic::catch_unwind(panic::AssertUnwindSafe(work)); let ret = ret.map_err(Panicked::from); futures::future::ready(ret) diff --git a/src/pool/process.rs b/src/pool/process.rs index c58b4921..1c6f1611 100644 --- a/src/pool/process.rs +++ b/src/pool/process.rs @@ -61,6 +61,7 @@ impl Queued { #[derive(Debug)] struct ProcessPoolInner { processes: Vec, + threads: usize, i: RoundRobin, } impl ProcessPoolInner { @@ -111,12 +112,16 @@ impl ProcessPoolInner { let i = RoundRobin::new(0, processes_vec.len()); Ok(Self { processes: processes_vec, + threads, i, }) } fn processes(&self) -> usize { self.processes.len() } + fn threads(&self) -> usize { + self.threads + } async fn spawn T + ProcessSend, T: ProcessSend>( &self, work: F, ) -> Result { @@ -197,6 +202,9 @@ impl ProcessPool { pub fn processes(&self) -> usize { self.0.processes() } + pub fn threads(&self) -> usize { + self.0.threads() + } pub fn spawn T + ProcessSend, T: ProcessSend>( &self, work: F, ) -> impl Future> { @@ -206,6 +214,16 @@ impl ProcessPool { } } +impl Clone for ProcessPool { + /// Cloning a pool will create a new handle to the pool. + /// The behavior is similar to [Arc](https://doc.rust-lang.org/stable/std/sync/struct.Arc.html). + /// + /// We could for example submit jobs from multiple threads concurrently. + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + impl UnwindSafe for ProcessPool {} impl RefUnwindSafe for ProcessPool {} diff --git a/src/pool/thread.rs b/src/pool/thread.rs index 30e2010b..6a05bc19 100644 --- a/src/pool/thread.rs +++ b/src/pool/thread.rs @@ -215,6 +215,16 @@ impl ThreadPool { } } +impl Clone for ThreadPool { + /// Cloning a pool will create a new handle to the pool. + /// The behavior is similar to [Arc](https://doc.rust-lang.org/stable/std/sync/struct.Arc.html). + /// + /// We could for example submit jobs from multiple threads concurrently. + fn clone(&self) -> Self { + Self(self.0.clone()) + } +} + impl UnwindSafe for ThreadPool {} impl RefUnwindSafe for ThreadPool {} diff --git a/tests/cloudfront.rs b/tests/cloudfront.rs index bca94a6b..b74aa62f 100644 --- a/tests/cloudfront.rs +++ b/tests/cloudfront.rs @@ -1,7 +1,5 @@ #![allow(where_clauses_object_safety)] -use serde_closure::FnMut; - use amadeus::prelude::*; #[cfg(feature = "constellation")] use constellation::*; diff --git a/tests/commoncrawl.rs b/tests/commoncrawl.rs index 4a4bcfb2..cfc789c8 100644 --- a/tests/commoncrawl.rs +++ b/tests/commoncrawl.rs @@ -1,7 +1,5 @@ #![allow(where_clauses_object_safety)] -use serde_closure::FnMut; - #[cfg(feature = "constellation")] use constellation::*; use std::{ diff --git a/tests/csv.rs b/tests/csv.rs index d34f2495..5f54fe5d 100644 --- a/tests/csv.rs +++ b/tests/csv.rs @@ -1,6 +1,5 @@ #[cfg(feature = "constellation")] use constellation::*; -use serde_closure::FnMut; use std::{ env, path::PathBuf, time::{Duration, SystemTime} }; diff --git a/tests/into_dist_iter.rs b/tests/into_dist_iter.rs index 7d61fab5..e14e2000 100644 --- a/tests/into_dist_iter.rs +++ b/tests/into_dist_iter.rs @@ -1,7 +1,5 @@ #![allow(where_clauses_object_safety)] -use serde_closure::FnMut; - use either::Either; use amadeus::prelude::*; diff --git a/tests/json.rs b/tests/json.rs index 6341f3e9..c192a811 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -1,6 +1,5 @@ #[cfg(feature = "constellation")] use constellation::*; -use serde_closure::FnMut; use std::{ env, path::PathBuf, time::{Duration, SystemTime} }; diff --git a/tests/panic.rs b/tests/panic.rs index abaabec2..5ac160fb 100644 --- a/tests/panic.rs +++ b/tests/panic.rs @@ -1,7 +1,5 @@ #![allow(where_clauses_object_safety)] -use serde_closure::FnMut; - #[cfg(feature = "constellation")] use constellation::*; use std::{ diff --git a/tests/parquet.rs b/tests/parquet.rs index 6c46309b..423db860 100644 --- a/tests/parquet.rs +++ b/tests/parquet.rs @@ -6,9 +6,8 @@ #[cfg(feature = "constellation")] use constellation::*; -use serde_closure::FnMut; use std::{ - env, path::PathBuf, time::{Duration, SystemTime} + collections::HashMap, env, path::PathBuf, time::{Duration, SystemTime} }; // use test::Bencher; @@ -116,7 +115,7 @@ fn run(pool: &P) -> Duration { ); } - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct StockSimulatedDerived { bp1: Option, bp2: Option, @@ -167,7 +166,7 @@ fn run(pool: &P) -> Duration { 42_000 ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct StockSimulatedDerivedProjection1 { bs5: Option, __index_level_0__: Option, @@ -199,7 +198,7 @@ fn run(pool: &P) -> Duration { 42_000 ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct StockSimulatedDerivedProjection2 {} let rows = Parquet::<_, StockSimulatedDerivedProjection2>::new(vec![PathBuf::from( @@ -239,7 +238,7 @@ fn run(pool: &P) -> Duration { DateTime, ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct TenKayVeeTwoDerived { binary_field: Vec, int32_field: i32, @@ -303,7 +302,7 @@ fn run(pool: &P) -> Duration { Option, ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct AlltypesDictionaryDerived { id: Option, bool_col: Option, @@ -370,7 +369,7 @@ fn run(pool: &P) -> Duration { Option, ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct AlltypesPlainDerived { id: Option, bool_col: Option, @@ -437,7 +436,7 @@ fn run(pool: &P) -> Duration { Option, ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct AlltypesPlainSnappyDerived { id: Option, bool_col: Option, @@ -518,13 +517,10 @@ fn run(pool: &P) -> Duration { // ))] // ); - type NestedLists = ( - Option>>>>>>, - i32, - ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + type NestedLists = (Option>>>>>>, i32); + #[derive(Data, Clone, PartialEq, Debug)] struct NestedListsDerived { - a: Option>>>>>>, + a: Option>>>>>>, b: i32, } let rows = Parquet::<_, NestedLists>::new(vec![PathBuf::from( @@ -565,10 +561,14 @@ fn run(pool: &P) -> Duration { 3 ); - type NestedMaps = (Option>>>, i32, f64); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + type NestedMaps = ( + Option>>>, + i32, + f64, + ); + #[derive(Data, Clone, PartialEq, Debug)] struct NestedMapsDerived { - a: Option>>>, + a: Option>>>, b: i32, c: f64, } @@ -612,49 +612,49 @@ fn run(pool: &P) -> Duration { type Nonnullable = ( i64, - List, - List>, - Map, - List>, + Vec, + Vec>, + HashMap, + Vec>, ( i32, - List, - (List>,), - Map,),)>, + Vec, + (Vec>,), + HashMap,),)>, ), ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NonnullableDerived { #[amadeus(name = "ID")] id: i64, #[amadeus(name = "Int_Array")] - int_array: List, - int_array_array: List>, + int_array: Vec, + int_array_array: Vec>, #[amadeus(name = "Int_Map")] - int_map: Map, - int_map_array: List>, + int_map: HashMap, + int_map_array: Vec>, #[amadeus(name = "nested_Struct")] nested_struct: NonnullableDerivedInner, } - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NonnullableDerivedInner { a: i32, #[amadeus(name = "B")] - b: List, + b: Vec, c: NonnullableDerivedInnerInner, #[amadeus(name = "G")] - g: Map,),)>, + g: HashMap,),)>, } - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NonnullableDerivedInnerInner { #[amadeus(name = "D")] - d: List>, + d: Vec>, } - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NonnullableDerivedInnerInnerInner { e: i32, f: String, @@ -700,31 +700,31 @@ fn run(pool: &P) -> Duration { type Nullable = ( Option, - Option>>, - Option>>>>, - Option>>, - Option>>>>, + Option>>, + Option>>>>, + Option>>, + Option>>>>, Option<( Option, - Option>>, - Option<(Option, Option)>>>>>,)>, - Option>>,)>,)>>>, + Option>>, + Option<(Option, Option)>>>>>,)>, + Option>>,)>,)>>>, )>, ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NullableDerived { id: Option, - int_array: Option>>, + int_array: Option>>, #[amadeus(name = "int_array_Array")] - int_array_array: Option>>>>, - int_map: Option>>, + int_array_array: Option>>>>, + int_map: Option>>, #[amadeus(name = "int_Map_Array")] - int_map_array: Option>>>>, + int_map_array: Option>>>>, nested_struct: Option<( Option, - Option>>, - Option<(Option, Option)>>>>>,)>, - Option>>,)>,)>>>, + Option>>, + Option<(Option, Option)>>>>>,)>, + Option>>,)>,)>>>, )>, } let rows = Parquet::<_, Nullable>::new(vec![PathBuf::from( @@ -766,7 +766,7 @@ fn run(pool: &P) -> Duration { ); type Nulls = (Option<(Option,)>,); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct NullsDerived { b_struct: Option<(Option,)>, } @@ -808,12 +808,12 @@ fn run(pool: &P) -> Duration { 8 ); - type Repeated = (i32, Option<(List<(i64, Option)>,)>); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + type Repeated = (i32, Option<(Vec<(i64, Option)>,)>); + #[derive(Data, Clone, PartialEq, Debug)] struct RepeatedDerived { id: i32, #[amadeus(name = "phoneNumbers")] - phone_numbers: Option<(List<(i64, Option)>,)>, + phone_numbers: Option<(Vec<(i64, Option)>,)>, } let rows = Parquet::<_, Repeated>::new(vec![PathBuf::from( "amadeus-testing/parquet/repeated_no_annotation.parquet", @@ -853,14 +853,14 @@ fn run(pool: &P) -> Duration { 6 ); - type TestDatapage = (Option, i32, f64, bool, Option>); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + type TestDatapage = (Option, i32, f64, bool, Option>); + #[derive(Data, Clone, PartialEq, Debug)] struct TestDatapageDerived { a: Option, b: i32, c: f64, d: bool, - e: Option>, + e: Option>, } let rows = Parquet::<_, TestDatapage>::new(vec![PathBuf::from( "amadeus-testing/parquet/datapage_v2.snappy.parquet", @@ -900,7 +900,7 @@ fn run(pool: &P) -> Duration { 5 ); - #[derive(Data, Clone, PartialEq, PartialOrd, Debug)] + #[derive(Data, Clone, PartialEq, Debug)] struct CommitsDerived { id: Option, delay: Option, diff --git a/tests/postgres.rs b/tests/postgres.rs index 9e481ac8..9e0dbec9 100644 --- a/tests/postgres.rs +++ b/tests/postgres.rs @@ -1,6 +1,5 @@ #[cfg(feature = "constellation")] use constellation::*; -use serde_closure::FnMut; use std::{env, time::SystemTime}; use amadeus::prelude::*; diff --git a/tests/threads.rs b/tests/threads.rs index abf80f9e..f79678ce 100644 --- a/tests/threads.rs +++ b/tests/threads.rs @@ -1,7 +1,5 @@ #![allow(where_clauses_object_safety)] -use serde_closure::FnOnce; - #[cfg(feature = "constellation")] use constellation::*; use rand::{Rng, SeedableRng};