@@ -22,8 +22,9 @@ import Data.Vector.Internal.Check (HasCallStack)
2222import DataFrame.Errors (DataFrameException (.. ))
2323import DataFrame.Internal.Column (Column (.. ), Columnable , atIndicesStable )
2424import DataFrame.Internal.DataFrame (DataFrame (.. ), unsafeGetColumn )
25- import DataFrame.Internal.Expression (Expr (Col ))
25+ import DataFrame.Internal.Expression (Expr (Col ), getColumns )
2626import DataFrame.Operations.Core (columnNames , dimensions )
27+ import DataFrame.Operations.Transformations (derive )
2728import System.Random (Random (randomR ), RandomGen )
2829import Type.Reflection (typeRep )
2930
@@ -38,15 +39,40 @@ instance Eq SortOrder where
3839 (==) (Desc _) (Desc _) = True
3940 (==) _ _ = False
4041
41- getSortColumnName :: SortOrder -> T. Text
42- getSortColumnName (Asc (Col n)) = n
43- getSortColumnName (Desc (Col n)) = n
44- getSortColumnName _ = error " Sorting on compound column"
42+ sortOrderColumns :: SortOrder -> [T. Text ]
43+ sortOrderColumns (Asc e) = getColumns e
44+ sortOrderColumns (Desc e) = getColumns e
4545
4646mustFlipCompare :: SortOrder -> Bool
4747mustFlipCompare (Asc _) = True
4848mustFlipCompare (Desc _) = False
4949
50+ {- | Materialize any compound sort expressions into synthetic columns on
51+ a working dataframe, returning rewritten 'SortOrder's that reference
52+ those columns by name.
53+ -}
54+ prepareSortColumns :: [SortOrder ] -> DataFrame -> ([SortOrder ], DataFrame )
55+ prepareSortColumns = go 0
56+ where
57+ go _ [] acc = ([] , acc)
58+ go i (ord : rest) acc =
59+ let (ord', acc') = materializeSortOrder i ord acc
60+ (rest', acc'') = go (i + 1 ) rest acc'
61+ in (ord' : rest', acc'')
62+
63+ materializeSortOrder :: Int -> SortOrder -> DataFrame -> (SortOrder , DataFrame )
64+ materializeSortOrder _ ord@ (Asc (Col _)) df = (ord, df)
65+ materializeSortOrder _ ord@ (Desc (Col _)) df = (ord, df)
66+ materializeSortOrder i (Asc (e :: Expr a )) df =
67+ let name = syntheticName i
68+ in (Asc (Col name :: Expr a ), derive name e df)
69+ materializeSortOrder i (Desc (e :: Expr a )) df =
70+ let name = syntheticName i
71+ in (Desc (Col name :: Expr a ), derive name e df)
72+
73+ syntheticName :: Int -> T. Text
74+ syntheticName i = " __sortBy_synthetic_" <> T. pack (show i) <> " __"
75+
5076{- | O(k log n) Sorts the dataframe by a given row.
5177
5278> sortBy Ascending ["Age"] df
@@ -56,22 +82,24 @@ sortBy ::
5682 DataFrame ->
5783 DataFrame
5884sortBy sortOrds df
59- | any ( `notElem` columnNames df) names =
85+ | not ( null missing) =
6086 throw $
6187 ColumnsNotFoundException
62- (names L. \\ columnNames df)
88+ missing
6389 " sortBy"
6490 (columnNames df)
6591 | otherwise =
6692 let
67- comparators = map (`sortOrderComparator` df) sortOrds
93+ (sortOrds', df') = prepareSortColumns sortOrds df
94+ comparators = map (`sortOrderComparator` df') sortOrds'
6895 compositeCompare i j = mconcat [c i j | c <- comparators]
69- nRows = fst (dataframeDimensions df)
96+ nRows = fst (dataframeDimensions df' )
7097 indexes = sortIndices compositeCompare nRows
7198 in
7299 df{columns = V. map (atIndicesStable indexes) (columns df)}
73100 where
74- names = map getSortColumnName sortOrds
101+ referenced = L. nub (concatMap sortOrderColumns sortOrds)
102+ missing = referenced L. \\ columnNames df
75103
76104{- | Build a row-index comparator from a SortOrder and a DataFrame.
77105The Ord dictionary is recovered from the SortOrder GADT.
0 commit comments