Skip to content

Commit c342e42

Browse files
committed
refactor: Move mutable column to its own GADT.
1 parent 649fb4f commit c342e42

3 files changed

Lines changed: 26 additions & 25 deletions

File tree

src/DataFrame/IO/CSV.hs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import Control.Applicative ((<$>), (<|>), (<*>), (<*), (*>), many)
2424
import Control.Monad (forM_, zipWithM_, unless, void)
2525
import Data.Attoparsec.Text
2626
import Data.Char
27-
import DataFrame.Internal.Column (Column(..), freezeColumn', writeColumn, columnLength)
27+
import DataFrame.Internal.Column (Column(..), MutableColumn(..), freezeColumn', writeColumn, columnLength)
2828
import DataFrame.Internal.DataFrame (DataFrame(..))
2929
import DataFrame.Internal.Parsing
3030
import DataFrame.Operations.Typing
@@ -107,13 +107,13 @@ readSeparated c opts path = do
107107
}
108108
{-# INLINE readSeparated #-}
109109

110-
getInitialDataVectors :: Int -> VM.IOVector Column -> [T.Text] -> IO ()
110+
getInitialDataVectors :: Int -> VM.IOVector MutableColumn -> [T.Text] -> IO ()
111111
getInitialDataVectors n mCol xs = do
112112
forM_ (zip [0..] xs) $ \(i, x) -> do
113113
col <- case inferValueType x of
114-
"Int" -> MutableUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Int)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readInt x) >> return c)
115-
"Double" -> MutableUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Double)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readDouble x) >> return c)
116-
_ -> MutableBoxedColumn <$> ((VM.unsafeNew n :: IO (VM.IOVector T.Text)) >>= \c -> VM.unsafeWrite c 0 x >> return c)
114+
"Int" -> MUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Int)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readInt x) >> return c)
115+
"Double" -> MUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Double)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readDouble x) >> return c)
116+
_ -> MBoxedColumn <$> ((VM.unsafeNew n :: IO (VM.IOVector T.Text)) >>= \c -> VM.unsafeWrite c 0 x >> return c)
117117
VM.unsafeWrite mCol i col
118118
{-# INLINE getInitialDataVectors #-}
119119

@@ -128,7 +128,7 @@ inferValueType s = let
128128
{-# INLINE inferValueType #-}
129129

130130
-- | Reads rows from the handle and stores values in mutable vectors.
131-
fillColumns :: Int -> Char -> VM.IOVector Column -> VM.IOVector [(Int, T.Text)] -> Handle -> IO ()
131+
fillColumns :: Int -> Char -> VM.IOVector MutableColumn -> VM.IOVector [(Int, T.Text)] -> Handle -> IO ()
132132
fillColumns n c mutableCols nullIndices handle = do
133133
input <- newIORef (mempty :: T.Text)
134134
forM_ [1..n] $ \i -> do
@@ -148,7 +148,7 @@ fillColumns n c mutableCols nullIndices handle = do
148148
{-# INLINE fillColumns #-}
149149

150150
-- | Writes a value into the appropriate column, resizing the vector if necessary.
151-
writeValue :: VM.IOVector Column -> VM.IOVector [(Int, T.Text)] -> Int -> Int -> T.Text -> IO ()
151+
writeValue :: VM.IOVector MutableColumn -> VM.IOVector [(Int, T.Text)] -> Int -> Int -> T.Text -> IO ()
152152
writeValue mutableCols nullIndices count colIndex value = do
153153
col <- VM.unsafeRead mutableCols colIndex
154154
res <- writeColumn count value col
@@ -157,7 +157,7 @@ writeValue mutableCols nullIndices count colIndex value = do
157157
{-# INLINE writeValue #-}
158158

159159
-- | Freezes a mutable vector into an immutable one, trimming it to the actual row count.
160-
freezeColumn :: VM.IOVector Column -> V.Vector [(Int, T.Text)] -> ReadOptions -> Int -> IO Column
160+
freezeColumn :: VM.IOVector MutableColumn -> V.Vector [(Int, T.Text)] -> ReadOptions -> Int -> IO Column
161161
freezeColumn mutableCols nulls opts colIndex = do
162162
col <- VM.unsafeRead mutableCols colIndex
163163
freezeColumn' (nulls V.! colIndex) col

src/DataFrame/Internal/Column.hs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,9 +54,10 @@ data Column where
5454
GroupedBoxedColumn :: Columnable a => VB.Vector (VB.Vector a) -> Column
5555
GroupedUnboxedColumn :: (Columnable a, VU.Unbox a) => VB.Vector (VU.Vector a) -> Column
5656
GroupedOptionalColumn :: (Columnable a) => VB.Vector (VB.Vector (Maybe a)) -> Column
57-
-- These are used purely for I/O, not to store live data.
58-
MutableBoxedColumn :: Columnable a => VBM.IOVector a -> Column
59-
MutableUnboxedColumn :: (Columnable a, VU.Unbox a) => VUM.IOVector a -> Column
57+
58+
data MutableColumn where
59+
MBoxedColumn :: Columnable a => VBM.IOVector a -> MutableColumn
60+
MUnboxedColumn :: (Columnable a, VU.Unbox a) => VUM.IOVector a -> MutableColumn
6061

6162
-- | A TypedColumn is a wrapper around our type-erased column.
6263
-- It is used to type check expressions on columns.
@@ -515,14 +516,14 @@ zipWithColumns f left right = let
515516
{-# INLINE zipWithColumns #-}
516517

517518
-- Functions for mutable columns (intended for IO).
518-
writeColumn :: Int -> T.Text -> Column -> IO (Either T.Text Bool)
519-
writeColumn i value (MutableBoxedColumn (col :: VBM.IOVector a)) = let
519+
writeColumn :: Int -> T.Text -> MutableColumn -> IO (Either T.Text Bool)
520+
writeColumn i value (MBoxedColumn (col :: VBM.IOVector a)) = let
520521
in case testEquality (typeRep @a) (typeRep @T.Text) of
521522
Just Refl -> (if isNullish value
522523
then VBM.unsafeWrite col i "" >> return (Left $! value)
523524
else VBM.unsafeWrite col i value >> return (Right True))
524525
Nothing -> return (Left value)
525-
writeColumn i value (MutableUnboxedColumn (col :: VUM.IOVector a)) =
526+
writeColumn i value (MUnboxedColumn (col :: VUM.IOVector a)) =
526527
case testEquality (typeRep @a) (typeRep @Int) of
527528
Just Refl -> case readInt value of
528529
Just v -> VUM.unsafeWrite col i v >> return (Right True)
@@ -535,12 +536,12 @@ writeColumn i value (MutableUnboxedColumn (col :: VUM.IOVector a)) =
535536
writeColumn _ _ _ = error "Cannot write to immutable column"
536537
{-# INLINE writeColumn #-}
537538

538-
freezeColumn' :: [(Int, T.Text)] -> Column -> IO Column
539-
freezeColumn' nulls (MutableBoxedColumn col)
539+
freezeColumn' :: [(Int, T.Text)] -> MutableColumn -> IO Column
540+
freezeColumn' nulls (MBoxedColumn col)
540541
| null nulls = BoxedColumn <$> VB.unsafeFreeze col
541542
| all (isNullish . snd) nulls = OptionalColumn . VB.imap (\i v -> if i `elem` map fst nulls then Nothing else Just v) <$> VB.unsafeFreeze col
542543
| otherwise = BoxedColumn . VB.imap (\i v -> if i `elem` map fst nulls then Left (fromMaybe (error "") (lookup i nulls)) else Right v) <$> VB.unsafeFreeze col
543-
freezeColumn' nulls (MutableUnboxedColumn col)
544+
freezeColumn' nulls (MUnboxedColumn col)
544545
| null nulls = UnboxedColumn <$> VU.unsafeFreeze col
545546
| all (isNullish . snd) nulls = VU.unsafeFreeze col >>= \c -> return $ OptionalColumn $ VB.generate (VU.length c) (\i -> if i `elem` map fst nulls then Nothing else Just (c VU.! i))
546547
| otherwise = VU.unsafeFreeze col >>= \c -> return $ BoxedColumn $ VB.generate (VU.length c) (\i -> if i `elem` map fst nulls then Left (fromMaybe (error "") (lookup i nulls)) else Right (c VU.! i))

src/DataFrame/Lazy/IO/CSV.hs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import Control.Applicative ((<$>), (<|>), (<*>), (<*), (*>), many)
2424
import Control.Monad (forM_, zipWithM_, unless, when, void, replicateM_)
2525
import Data.Attoparsec.Text
2626
import Data.Char
27-
import DataFrame.Internal.Column (Column(..), freezeColumn', writeColumn, columnLength)
27+
import DataFrame.Internal.Column (Column(..), MutableColumn(..), freezeColumn', writeColumn, columnLength)
2828
import DataFrame.Internal.DataFrame (DataFrame(..))
2929
import DataFrame.Internal.Parsing
3030
import DataFrame.Operations.Typing
@@ -122,13 +122,13 @@ readSeparated c opts path = do
122122
}, (pos, unconsumed, r + 1))
123123
{-# INLINE readSeparated #-}
124124

125-
getInitialDataVectors :: Int -> VM.IOVector Column -> [T.Text] -> IO ()
125+
getInitialDataVectors :: Int -> VM.IOVector MutableColumn -> [T.Text] -> IO ()
126126
getInitialDataVectors n mCol xs = do
127127
forM_ (zip [0..] xs) $ \(i, x) -> do
128128
col <- case inferValueType x of
129-
"Int" -> MutableUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Int)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readInt x) >> return c)
130-
"Double" -> MutableUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Double)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readDouble x) >> return c)
131-
_ -> MutableBoxedColumn <$> ((VM.unsafeNew n :: IO (VM.IOVector T.Text)) >>= \c -> VM.unsafeWrite c 0 x >> return c)
129+
"Int" -> MUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Int)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readInt x) >> return c)
130+
"Double" -> MUnboxedColumn <$> ((VUM.unsafeNew n :: IO (VUM.IOVector Double)) >>= \c -> VUM.unsafeWrite c 0 (fromMaybe 0 $ readDouble x) >> return c)
131+
_ -> MBoxedColumn <$> ((VM.unsafeNew n :: IO (VM.IOVector T.Text)) >>= \c -> VM.unsafeWrite c 0 x >> return c)
132132
VM.unsafeWrite mCol i col
133133
{-# INLINE getInitialDataVectors #-}
134134

@@ -154,7 +154,7 @@ readSingleLine c unused handle = parseWith (TIO.hGetChunk handle) (parseRow c) u
154154
return (row, unconsumed)
155155

156156
-- | Reads rows from the handle and stores values in mutable vectors.
157-
fillColumns :: Int -> Char -> VM.IOVector Column -> VM.IOVector [(Int, T.Text)] -> T.Text -> Handle -> IO (T.Text, Int)
157+
fillColumns :: Int -> Char -> VM.IOVector MutableColumn -> VM.IOVector [(Int, T.Text)] -> T.Text -> Handle -> IO (T.Text, Int)
158158
fillColumns n c mutableCols nullIndices unused handle = do
159159
input <- newIORef unused
160160
rowsRead <- newIORef (0 :: Int)
@@ -179,7 +179,7 @@ fillColumns n c mutableCols nullIndices unused handle = do
179179
{-# INLINE fillColumns #-}
180180

181181
-- | Writes a value into the appropriate column, resizing the vector if necessary.
182-
writeValue :: VM.IOVector Column -> VM.IOVector [(Int, T.Text)] -> Int -> Int -> T.Text -> IO ()
182+
writeValue :: VM.IOVector MutableColumn -> VM.IOVector [(Int, T.Text)] -> Int -> Int -> T.Text -> IO ()
183183
writeValue mutableCols nullIndices count colIndex value = do
184184
col <- VM.unsafeRead mutableCols colIndex
185185
res <- writeColumn count value col
@@ -188,7 +188,7 @@ writeValue mutableCols nullIndices count colIndex value = do
188188
{-# INLINE writeValue #-}
189189

190190
-- | Freezes a mutable vector into an immutable one, trimming it to the actual row count.
191-
freezeColumn :: VM.IOVector Column -> V.Vector [(Int, T.Text)] -> ReadOptions -> Int -> IO Column
191+
freezeColumn :: VM.IOVector MutableColumn -> V.Vector [(Int, T.Text)] -> ReadOptions -> Int -> IO Column
192192
freezeColumn mutableCols nulls opts colIndex = do
193193
col <- VM.unsafeRead mutableCols colIndex
194194
freezeColumn' (nulls V.! colIndex) col

0 commit comments

Comments
 (0)