@@ -92,75 +92,25 @@ std::string ParquetVersionToString(ParquetVersion::type ver) {
9292
9393namespace {
9494
95- enum class StatsMinMaxMode {
96- // Ignore min/max fields because their ordering is unknown or unsupported.
97- kDiscard ,
98- // Use legacy min/max fields for files without column orders.
99- kLegacy ,
100- // Use min_value/max_value fields with the column's well-defined order.
101- kNormal ,
102- };
103-
104- StatsMinMaxMode GetStatsMinMaxMode (const ColumnDescriptor& descr) {
95+ StatisticsMinMaxField GetStatisticsMinMaxField (const ColumnDescriptor& descr) {
96+ if (descr.sort_order () == SortOrder::UNKNOWN ) {
97+ return StatisticsMinMaxField::kInvalid ;
98+ }
10599 switch (descr.column_order ().get_order ()) {
106100 case ColumnOrder::TYPE_DEFINED_ORDER :
107- return descr.sort_order () != SortOrder::UNKNOWN ? StatsMinMaxMode::kNormal
108- : StatsMinMaxMode::kDiscard ;
101+ return StatisticsMinMaxField::kMinValueMaxValue ;
109102 case ColumnOrder::UNDEFINED :
110- return descr.sort_order () != SortOrder::UNKNOWN ? StatsMinMaxMode::kLegacy
111- : StatsMinMaxMode::kDiscard ;
103+ return StatisticsMinMaxField::kLegacyMinMax ;
112104 case ColumnOrder::UNKNOWN :
113- return StatsMinMaxMode:: kDiscard ;
105+ return StatisticsMinMaxField:: kInvalid ;
114106 }
115- return StatsMinMaxMode::kDiscard ;
116- }
117-
118- } // namespace
119-
120- static EncodedStatistics EncodedStatisticsFromThrift (const format::Statistics& statistics,
121- StatsMinMaxMode min_max) {
122- EncodedStatistics out;
123-
124- switch (min_max) {
125- case StatsMinMaxMode::kNormal :
126- if (statistics.__isset .max_value ) {
127- out.set_max (statistics.max_value );
128- if (statistics.__isset .is_max_value_exact ) {
129- out.is_max_value_exact = statistics.is_max_value_exact ;
130- }
131- }
132- if (statistics.__isset .min_value ) {
133- out.set_min (statistics.min_value );
134- if (statistics.__isset .is_min_value_exact ) {
135- out.is_min_value_exact = statistics.is_min_value_exact ;
136- }
137- }
138- break ;
139- case StatsMinMaxMode::kLegacy :
140- if (statistics.__isset .max ) {
141- out.set_max (statistics.max );
142- }
143- if (statistics.__isset .min ) {
144- out.set_min (statistics.min );
145- }
146- break ;
147- case StatsMinMaxMode::kDiscard :
148- break ;
149- }
150- if (statistics.__isset .null_count ) {
151- out.set_null_count (statistics.null_count );
152- }
153- if (statistics.__isset .distinct_count ) {
154- out.set_distinct_count (statistics.distinct_count );
155- }
156-
157- return out;
107+ return StatisticsMinMaxField::kInvalid ;
158108}
159109
160110template <typename DType>
161- static std::shared_ptr<Statistics> MakeTypedColumnStats (
162- const format::ColumnMetaData& metadata, const ColumnDescriptor* descr,
163- ::arrow::MemoryPool* pool) {
111+ std::shared_ptr<Statistics> MakeTypedColumnStats (const format::ColumnMetaData& metadata,
112+ const ColumnDescriptor* descr,
113+ ::arrow::MemoryPool* pool) {
164114 const auto & statistics = metadata.statistics ;
165115 const std::string kEmpty = " " ;
166116 const std::string* encoded_min = &kEmpty ;
@@ -169,8 +119,8 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
169119 std::optional<bool > min_exact = std::nullopt ;
170120 std::optional<bool > max_exact = std::nullopt ;
171121
172- switch (GetStatsMinMaxMode (*descr)) {
173- case StatsMinMaxMode:: kNormal :
122+ switch (GetStatisticsMinMaxField (*descr)) {
123+ case StatisticsMinMaxField:: kMinValueMaxValue :
174124 encoded_min = &statistics.min_value ;
175125 encoded_max = &statistics.max_value ;
176126 has_min_max = statistics.__isset .max_value && statistics.__isset .min_value ;
@@ -181,12 +131,12 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
181131 ? std::optional<bool >(statistics.is_max_value_exact )
182132 : std::nullopt ;
183133 break ;
184- case StatsMinMaxMode:: kLegacy :
134+ case StatisticsMinMaxField:: kLegacyMinMax :
185135 encoded_min = &statistics.min ;
186136 encoded_max = &statistics.max ;
187137 has_min_max = statistics.__isset .max && statistics.__isset .min ;
188138 break ;
189- case StatsMinMaxMode:: kDiscard :
139+ case StatisticsMinMaxField:: kInvalid :
190140 break ;
191141 }
192142
@@ -197,8 +147,6 @@ static std::shared_ptr<Statistics> MakeTypedColumnStats(
197147 max_exact, pool);
198148}
199149
200- namespace {
201-
202150std::shared_ptr<geospatial::GeoStatistics> MakeColumnGeometryStats (
203151 const format::ColumnMetaData& metadata, const ColumnDescriptor* descr) {
204152 if (metadata.__isset .geospatial_statistics ) {
@@ -411,9 +359,8 @@ class ColumnChunkMetaData::ColumnChunkMetaDataImpl {
411359 {
412360 const std::lock_guard<std::mutex> guard (stats_mutex_);
413361 if (possible_encoded_stats_ == nullptr ) {
414- possible_encoded_stats_ =
415- std::make_shared<EncodedStatistics>(EncodedStatisticsFromThrift (
416- column_metadata_->statistics , GetStatsMinMaxMode (*descr_)));
362+ possible_encoded_stats_ = std::make_shared<EncodedStatistics>(
363+ FromThrift (column_metadata_->statistics , GetStatisticsMinMaxField (*descr_)));
417364 }
418365 }
419366 return writer_version_->HasCorrectStatistics (type (), *possible_encoded_stats_,
0 commit comments