@@ -1033,15 +1033,15 @@ impl ArrowColumnWriterFactory {
10331033
10341034        match  data_type { 
10351035            _ if  data_type. is_primitive ( )  => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) , 
1036-             ArrowDataType :: FixedSizeBinary ( _)  | ArrowDataType :: Boolean  | ArrowDataType :: Null  => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) , 
1036+             ArrowDataType :: FixedSizeBinary ( _)  | ArrowDataType :: Boolean  | ArrowDataType :: Null  => { 
1037+                 out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) 
1038+             } 
10371039            ArrowDataType :: LargeBinary 
10381040            | ArrowDataType :: Binary 
10391041            | ArrowDataType :: Utf8 
10401042            | ArrowDataType :: LargeUtf8 
10411043            | ArrowDataType :: BinaryView 
1042-             | ArrowDataType :: Utf8View  => { 
1043-                 out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) 
1044-             } 
1044+             | ArrowDataType :: Utf8View  => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) , 
10451045            ArrowDataType :: List ( f) 
10461046            | ArrowDataType :: LargeList ( f) 
10471047            | ArrowDataType :: FixedSizeList ( f,  _)  => { 
@@ -1058,21 +1058,29 @@ impl ArrowColumnWriterFactory {
10581058                    self . get_arrow_column_writer ( f[ 1 ] . data_type ( ) ,  props,  leaves,  out) ?
10591059                } 
10601060                _ => unreachable ! ( "invalid map type" ) , 
1061-             } 
1061+             } , 
10621062            ArrowDataType :: Dictionary ( _,  value_type)  => match  value_type. as_ref ( )  { 
1063-                 ArrowDataType :: Utf8  | ArrowDataType :: LargeUtf8  | ArrowDataType :: Binary  | ArrowDataType :: LargeBinary  => { 
1064-                     out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) 
1065-                 } 
1063+                 ArrowDataType :: Utf8 
1064+                 | ArrowDataType :: LargeUtf8 
1065+                 | ArrowDataType :: Binary 
1066+                 | ArrowDataType :: LargeBinary  => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) , 
10661067                ArrowDataType :: Utf8View  | ArrowDataType :: BinaryView  => { 
10671068                    out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) 
10681069                } 
1069-                 ArrowDataType :: FixedSizeBinary ( _)  => { 
1070+                 ArrowDataType :: FixedSizeBinary ( _)  => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) , 
1071+                 _ => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) , 
1072+             } , 
1073+             ArrowDataType :: RunEndEncoded ( _run_ends,  value_type)  => match  value_type. data_type ( )  { 
1074+                 ArrowDataType :: Utf8 
1075+                 | ArrowDataType :: LargeUtf8 
1076+                 | ArrowDataType :: Binary 
1077+                 | ArrowDataType :: LargeBinary  => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) , 
1078+                 ArrowDataType :: Utf8View  | ArrowDataType :: BinaryView  => { 
10701079                    out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) 
10711080                } 
1072-                 _ => { 
1073-                     out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) 
1074-                 } 
1075-             } 
1081+                 ArrowDataType :: FixedSizeBinary ( _)  => out. push ( bytes ( leaves. next ( ) . unwrap ( ) ) ?) , 
1082+                 _ => out. push ( col ( leaves. next ( ) . unwrap ( ) ) ?) , 
1083+             } , 
10761084            _ => return  Err ( ParquetError :: NYI ( 
10771085                format ! ( 
10781086                    "Attempting to write an Arrow type {data_type:?} to parquet that is not yet implemented" 
@@ -1166,6 +1174,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
11661174                        write_primitive ( typed,  array. values ( ) ,  levels) 
11671175                    } 
11681176                } , 
1177+                 ArrowDataType :: RunEndEncoded ( _run_ends,  _value_type)  => todo ! ( ) , 
11691178                _ => { 
11701179                    let  array = arrow_cast:: cast ( column,  & ArrowDataType :: Int32 ) ?; 
11711180                    let  array = array. as_primitive :: < Int32Type > ( ) ; 
@@ -1248,6 +1257,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
12481257                        write_primitive ( typed,  array. values ( ) ,  levels) 
12491258                    } 
12501259                } , 
1260+                 ArrowDataType :: RunEndEncoded ( _run_ends,  _values)  => todo ! ( ) , 
12511261                _ => { 
12521262                    let  array = arrow_cast:: cast ( column,  & ArrowDataType :: Int64 ) ?; 
12531263                    let  array = array. as_primitive :: < Int64Type > ( ) ; 
@@ -1324,6 +1334,7 @@ fn write_leaf(writer: &mut ColumnWriter<'_>, levels: &ArrayLevels) -> Result<usi
13241334                    let  array = column. as_primitive :: < Float16Type > ( ) ; 
13251335                    get_float_16_array_slice ( array,  indices) 
13261336                } 
1337+                 ArrowDataType :: RunEndEncoded ( _run_ends,  _values)  => todo ! ( ) , 
13271338                _ => { 
13281339                    return  Err ( ParquetError :: NYI ( 
13291340                        "Attempting to write an Arrow type that is not yet implemented" . to_string ( ) , 
@@ -4293,4 +4304,50 @@ mod tests {
42934304        assert_eq ! ( get_dict_page_size( col0_meta) ,  1024  *  1024 ) ; 
42944305        assert_eq ! ( get_dict_page_size( col1_meta) ,  1024  *  1024  *  4 ) ; 
42954306    } 
4307+ 
4308+     #[ test]  
4309+     fn  arrow_writer_run_end_encoded ( )  { 
4310+         // Create a run array of strings 
4311+         let  mut  builder = StringRunBuilder :: < Int16Type > :: new ( ) ; 
4312+         builder. extend ( 
4313+             vec ! [ Some ( "alpha" ) ;  1000 ] 
4314+                 . into_iter ( ) 
4315+                 . chain ( vec ! [ Some ( "beta" ) ;  1000 ] ) , 
4316+         ) ; 
4317+         let  run_array:  RunArray < Int16Type >  = builder. finish ( ) ; 
4318+         println ! ( "run_array type: {:?}" ,  run_array. data_type( ) ) ; 
4319+         let  schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( 
4320+             "ree" , 
4321+             run_array. data_type( ) . clone( ) , 
4322+             run_array. is_nullable( ) , 
4323+         ) ] ) ) ; 
4324+ 
4325+         // Write to parquet 
4326+         let  mut  parquet_bytes:  Vec < u8 >  = Vec :: new ( ) ; 
4327+         let  mut  writer = ArrowWriter :: try_new ( & mut  parquet_bytes,  schema. clone ( ) ,  None ) . unwrap ( ) ; 
4328+         let  batch = RecordBatch :: try_new ( schema. clone ( ) ,  vec ! [ Arc :: new( run_array) ] ) . unwrap ( ) ; 
4329+         writer. write ( & batch) . unwrap ( ) ; 
4330+         writer. close ( ) . unwrap ( ) ; 
4331+ 
4332+         // Schema of output is plain, not dictionary or REE encoded!! 
4333+         let  expected_schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( 
4334+             "ree" , 
4335+             arrow_schema:: DataType :: Utf8 , 
4336+             false , 
4337+         ) ] ) ) ; 
4338+ 
4339+         // Read from parquet 
4340+         let  bytes = Bytes :: from ( parquet_bytes) ; 
4341+         let  reader = ParquetRecordBatchReaderBuilder :: try_new ( bytes) . unwrap ( ) ; 
4342+         assert_eq ! ( reader. schema( ) ,  & expected_schema) ; 
4343+         let  batches:  Vec < _ >  = reader
4344+             . build ( ) 
4345+             . unwrap ( ) 
4346+             . collect :: < ArrowResult < Vec < _ > > > ( ) 
4347+             . unwrap ( ) ; 
4348+         assert_eq ! ( batches. len( ) ,  2 ) ; 
4349+         // Count rows in total 
4350+         let  total_rows = batches. iter ( ) . map ( |b| b. num_rows ( ) ) . sum :: < usize > ( ) ; 
4351+         assert_eq ! ( total_rows,  2000 ) ; 
4352+     } 
42964353} 
0 commit comments