diff --git a/bench-unions/Bench/Unions.hs b/bench-unions/Bench/Unions.hs new file mode 100644 index 000000000..d5b6609a1 --- /dev/null +++ b/bench-unions/Bench/Unions.hs @@ -0,0 +1,561 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE DuplicateRecordFields #-} +{-# LANGUAGE NondecreasingIndentation #-} +{-# LANGUAGE OverloadedStrings #-} + +{-# OPTIONS_GHC -Wno-orphans #-} + +{- | Benchmarks for table unions of an LSM tree. + +Here is a summary of the table union benchmark. +Note that /all/ function calls are made through +the "Database.LSMTree.Simple" module's API. + +__Phase 1: Setup__ + +The benchmark will setup an initial set of @--table-count@ tables +to be unioned together during "__Phase 2__." +The size of each generated table is the same +and is equal to @--initial-size@. +Each created table has @--initial-size@ insertions operations performed on it +before being written out to disk as a snapshot. +The @--initial-size@ inserted keys in each table are randomly selected +from the following range. +Each key is unique, meaning that keys are randomly sampled +from the range without replacement. + +\[ +\left[\quad 0,\quad 2 * initialSize \quad\right) +\] + +Additionally, the directory in which to isolate the benchmark environment +is specified via the @--bench-dir@ command line option. + +__Phase 2: Measurement__ + +When generating measurements for the table unions, +the benchmark will reload the snapshots of the tables generated +in __Phase 1__ from disk. +Subsequently, the tables will be "incrementally unioned" together. + +Once the tables have been loaded and the union initiated, +serveral iterations of lookups will be performed. +One iteration involves performing a @--batch-count@ number of batches +of @--batch-size@ lookups each. +We measure the time spent on running an iteration, +and we compute how many lookups per second were performed during the iteration. + +First, 50 iterations are performed /without/ supplying any credits to the unioned table. +This establishes a base-line performance picture. +Iterations \( \left[ 0, 50 \right) \) measure lookups per seconds +on the unioned table with \(100\%\) of the debt remaining. + +Subsequently, 100 more iterations are performed. +Before each of these iterations, +a fixed number of credits are supplied to the incremental union table. +The series of measurements allows reasoning about table performance over time +as the tables debt decreases (at a uniform rate). +The number of credits supplied before each iteration is +\(1%\) of the total starting debt. +After 100 steps, \(100\%\) of the debt will be paid off. +Iterations \( \left[ 50, 100 \right) \) measure lookups per second +on the unioned table as the remaining debt decreases. + +Finally, 50 concluding iterations are performed. +Since no debt is remaining, no credits are supplied. +Rather, these measurements create a "post-payoff" performance picture. +Iterations \( \left[ 150, 200 \right) \) measure lookups per seconds +on to the unioned table with \(0\%\) of the debt remaining. + +__Results__ + +An informative gnuplot script and data file of the benchmark measurements is +generated and placed in the @bench-unions@ directory. +Run the following command in a shell to generate a PNG of the graph. + +@ + cd bench-unions && gnuplot unions-bench.gnuplot && cd .. +@ + +TODO: explain the baseline table + +TODO: explain the seed + +TODO: explain collisions analysis +-} +module Bench.Unions (main) where + +import Control.Applicative ((<**>)) +import Control.Concurrent.Async (forConcurrently_) +import Control.Monad (forM_, void, (>=>)) +import Control.Monad.State.Strict (MonadState (..), runState) +import qualified Data.ByteString.Short as BS +import Data.Foldable (traverse_) +import Data.IORef +import qualified Data.List as List +import Data.List.NonEmpty (NonEmpty (..)) +import qualified Data.List.NonEmpty as NE +import Data.Monoid +import qualified Data.Primitive as P +import qualified Data.Set as Set +import qualified Data.Vector as V +import Data.Word (Word64) +import qualified Options.Applicative as O +import Prelude hiding (lookup) +import qualified System.Clock as Clock +import System.Directory (createDirectoryIfMissing) +import System.IO +import System.Mem (performMajorGC) +import qualified System.Random as Random +import System.Random (StdGen) +import Text.Printf (printf) +import qualified Text.Read as Read + +import Database.LSMTree.Extras (groupsOfN) +import qualified Database.LSMTree.Extras.Random as Random +import Database.LSMTree.Internal.ByteString (byteArrayToSBS) + +import qualified Database.LSMTree.Simple as LSM + +------------------------------------------------------------------------------- +-- Constant Values +------------------------------------------------------------------------------- + +baselineTableID :: Int +baselineTableID = 0 + +baselineTableName :: LSM.SnapshotName +baselineTableName = makeTableName baselineTableID + +defaultBenchDir :: FilePath +defaultBenchDir = "_bench_unions" + +defaultInitialSize :: Int +defaultInitialSize = 1_000_000 + +defaultTableCount :: Int +defaultTableCount = 10 + +-- | The default seed is the first 20 digits of Pi. +defaultSeed :: Int +defaultSeed = 1415926535897932384 + +------------------------------------------------------------------------------- +-- Keys and values +------------------------------------------------------------------------------- + +type K = BS.ShortByteString +type V = BS.ShortByteString + +label :: LSM.SnapshotLabel +label = LSM.SnapshotLabel "K V B" + +-- | We generate 34 byte keys by using a PRNG to extend a word64 to 32 bytes +-- and then appending two constant bytes. This corresponds relatively closely +-- to UTxO keys, which are 32 byte cryptographic hashes, followed by two bytes +-- which are typically the 16bit value 0 or 1 (a transaction output index). +-- +makeKey :: Word64 -> K +makeKey seed = + case P.runPrimArray $ do + v <- P.newPrimArray 5 + let g0 = Random.mkStdGen (fromIntegral seed) + let (!w0, !g1) = Random.uniform g0 + P.writePrimArray v 0 w0 + let (!w1, !g2) = Random.uniform g1 + P.writePrimArray v 1 w1 + let (!w2, !g3) = Random.uniform g2 + P.writePrimArray v 2 w2 + let (!w3, _g4) = Random.uniform g3 + P.writePrimArray v 3 w3 + P.writePrimArray v 4 0x3d3d3d3d3d3d3d3d -- ======== + case v of + P.MutablePrimArray mba -> do + _ <- P.resizeMutableByteArray (P.MutableByteArray mba) 34 + pure v + + of (P.PrimArray ba :: P.PrimArray Word64) -> + byteArrayToSBS (P.ByteArray ba) + +-- We use constant value. This shouldn't affect anything. +theValue :: V +theValue = BS.replicate 60 120 -- 'x' +{-# NOINLINE theValue #-} + +------------------------------------------------------------------------------- +-- Options and commands +------------------------------------------------------------------------------- + +data GlobalOpts = GlobalOpts + { rootDir :: !FilePath -- ^ Session directory. + , tableCount :: !Int -- ^ Number of tables in the benchmark + , initialSize :: !Int -- ^ Initial size of each table in the benchmark + , seed :: !Int -- ^ The seed for the RNG for deterministic behavior, + -- use system entropy when not explicitly provided. + } + deriving stock Show + +data RunOpts = RunOpts + { batchCount :: !Int + , batchSize :: !Int + } + deriving stock Show + +data Cmd + -- | Setup benchmark: generate initial LSM trees etc. + = CmdSetup + + -- | Run collision analysis. + | CmdCollisions + + -- | Run the actual benchmark + | CmdRun RunOpts + deriving stock Show + +------------------------------------------------------------------------------- +-- command line interface +------------------------------------------------------------------------------- + +globalOptsP :: O.Parser GlobalOpts +globalOptsP = pure GlobalOpts + <*> O.option O.str (O.long "bench-dir" <> O.value defaultBenchDir <> O.showDefault <> O.help "Benchmark directory to put files in") + <*> O.option (positiveParser "number of tables") (O.long "table-count" <> O.value defaultTableCount <> O.showDefault <> O.help "Number of tables to benchmark") + <*> O.option (positiveParser "size of tables") (O.long "initial-size" <> O.value defaultInitialSize <> O.showDefault <> O.help "Initial size of each table") + <*> O.option O.auto (O.long "seed" <> O.value defaultSeed <> O.showDefault <> O.help "Random seed, defaults to the first 20 digits of Pi") + where + positiveParser str = + let validator v + | v >= 1 = Right v + | otherwise = Left $ unwords + [ "Non-positive number for", str <> ":", show v ] + in O.eitherReader $ Read.readEither >=> validator + +cmdP :: O.Parser Cmd +cmdP = O.subparser $ mconcat + [ O.command "setup" $ O.info + (CmdSetup <$ O.helper) + (O.progDesc "Setup benchmark by generating required tables") + , O.command "collisions" $ O.info + (CmdCollisions <$ O.helper) + (O.progDesc "Collision analysis, compute shared keys between tables") + , O.command "run" $ O.info + (CmdRun <$> runOptsP <**> O.helper) + (O.progDesc "Proper run, measuring performance and generating a benchmark report") + ] + +runOptsP :: O.Parser RunOpts +runOptsP = pure RunOpts + <*> O.option O.auto (O.long "batch-count" <> O.value 200 <> O.showDefault <> O.help "Batch count") + <*> O.option O.auto (O.long "batch-size" <> O.value 256 <> O.showDefault <> O.help "Batch size") + +------------------------------------------------------------------------------- +-- measurements +------------------------------------------------------------------------------- + + +-- | Returns number of seconds elapsed +timed :: IO a -> IO (a, Double) +timed action = do + performMajorGC + t1 <- Clock.getTime Clock.Monotonic + x <- action + t2 <- Clock.getTime Clock.Monotonic + performMajorGC + let !t = fromIntegral (Clock.toNanoSecs (Clock.diffTimeSpec t2 t1)) * 1e-9 + pure (x, t) + +-- | Returns number of seconds elapsed +timed_ :: IO () -> IO Double +timed_ action = do + ((), t) <- timed action + pure t + +------------------------------------------------------------------------------- +-- Setup +------------------------------------------------------------------------------- + +doSetup :: GlobalOpts -> IO () +doSetup gopts = do + -- Define some constants + let populationBatchSize = 256 + entryCount = initialSize gopts + -- The key size is twice the specified size because we will delete half + -- of the keys in the domain of each table uniformly at random. + keyMax = 2 * entryCount - 1 + keyMin = 0 + tableIDs = tableRange gopts + + -- Setup RNG + let tableRNGs = deriveSetupRNGs gopts $ length tableIDs + + -- Ensure that our mount point exists on the real file system + createDirectoryIfMissing True $ rootDir gopts + + -- Populate the specified number of tables + LSM.withSession (rootDir gopts) $ \session -> do + -- Create a "baseline" table + -- + -- We create a single table that *already has* all the same key value pairs + -- which exist in the union of all tables *which are going* to be unioned. + -- This way we can compare performance of the union of tables to a + -- "baseline" table since both share all the same key value pairs. + table_0 <- LSM.newTable @K @V session + + forConcurrently_ (NE.zip tableIDs tableRNGs) $ \(tID, tRNG) -> do + -- Create a new table + table_n <- LSM.newTable @K @V session + -- Populate the table in batches + forM_ (groupsOfN (populationBatchSize * 2) [ keyMin .. keyMax ]) $ \batch -> do + let prunedBatch = + Random.sampleUniformWithoutReplacement + tRNG (NE.length batch `div` 2) $ NE.toList batch + let keyInserts = V.fromList [ + (makeKey (fromIntegral k), theValue) + | k <- prunedBatch + ] + -- Insert the batch of the randomly selected keys + -- into both the baseline table (0) and the current table + LSM.inserts table_0 keyInserts + LSM.inserts table_n keyInserts + LSM.saveSnapshot (makeTableName tID) label table_n + + -- Finally, save the baseline table + LSM.saveSnapshot baselineTableName label table_0 + +makeTableName :: Show a => a -> LSM.SnapshotName +makeTableName n = LSM.toSnapshotName $ "bench_" <> show n + +tableRange :: GlobalOpts -> NonEmpty Int +tableRange gopts = + let n1 = succ baselineTableID + in NE.fromList [ n1 .. tableCount gopts + baselineTableID ] + +------------------------------------------------------------------------------- +-- Collision analysis +------------------------------------------------------------------------------- + +-- | Count duplicate keys in all tables that will be unioned together +doCollisionAnalysis :: GlobalOpts -> IO () +doCollisionAnalysis gopts = do + LSM.withSession (rootDir gopts) $ \session -> do + seenRef <- newIORef Set.empty + dupRef <- newIORef Set.empty + + forM_ (tableRange gopts) $ \tID -> do + let name = makeTableName tID + LSM.withTableFromSnapshot session name label $ \(table :: LSM.Table K V) -> do + LSM.withCursor table $ \cursor -> do + streamCursor cursor $ \(k, _) -> do + seen <- readIORef seenRef + if Set.member k seen then + modifyIORef dupRef $ Set.insert k + else + modifyIORef seenRef $ Set.insert k + + seen <- readIORef seenRef + dups <- readIORef dupRef + printf "Keys seen at least once: %d\n" $ Set.size seen + printf "Keys seen at least twice: %d\n" $ Set.size dups + +streamCursor :: LSM.Cursor K V -> ((K, V) -> IO ()) -> IO () +streamCursor cursor f = go + where + go = LSM.next cursor >>= \case + Nothing -> pure () + Just kv -> f kv >> go + +------------------------------------------------------------------------------- +-- run +------------------------------------------------------------------------------- + +doRun :: GlobalOpts -> RunOpts -> IO () +doRun gopts opts = do + -- Perform 3 measurement phases + -- * Phase 1: Measure performance before supplying any credits. + -- * Phase 2: Measure performance as credits are incrementally supplied and debt is repaid. + -- * Phase 3: Measure performance when debt is 0. + + let rng = deriveRunRNG gopts + dataPath = "bench-unions/unions-bench.dat" + + withFile dataPath WriteMode $ \h -> do + hPutStrLn h "# iteration \t baseline (ops/sec) \t union (ops/sec) \t union debt" + + LSM.withSession (rootDir gopts) $ \session -> do + -- Load the baseline table + LSM.withTableFromSnapshot session baselineTableName label + $ \baselineTable -> do + -- Load the union tables + withTablesFromSnapshots session label (makeTableName <$> tableRange gopts) + $ \inputTables -> do + -- Start the incremental union + LSM.withIncrementalUnions inputTables $ \unionedTable -> do + let measurePerformance :: Int -> Maybe LSM.UnionCredits -> IO () + measurePerformance iteration mayCredits = do + LSM.supplyUnionCredits unionedTable `traverse_` mayCredits + LSM.UnionDebt currDebt <- LSM.remainingUnionDebt unionedTable + + baselineOpsSec <- timeOpsPerSecond gopts opts baselineTable rng + unionOpsSec <- timeOpsPerSecond gopts opts unionedTable rng + + printf "iteration: %d, baseline: %7.01f ops/sec, union: %7.01f ops/sec, debt: %d\n" + iteration baselineOpsSec unionOpsSec currDebt + + hPutStrLn h $ unwords [ show iteration, show baselineOpsSec + , show unionOpsSec, show currDebt ] + + LSM.UnionDebt totalDebt <- LSM.remainingUnionDebt unionedTable + + -- Phase 1 measurements: Debt = 100% + forM_ [0..50-1] $ \step -> do + measurePerformance step Nothing + + -- Phase 2 measurements: Debt ∈ [0%, 99%] + forM_ [50..150-1] $ \step -> do + let creditsPerIteration = LSM.UnionCredits ((totalDebt + 99) `div` 100) + measurePerformance step (Just creditsPerIteration) + + -- Phase 3 measurements: Debt = 0% + forM_ [150..200-1] $ \step -> do + measurePerformance step Nothing + +-- | Exception-safe opening of multiple snapshots +withTablesFromSnapshots :: + LSM.Session + -> LSM.SnapshotLabel + -> (NonEmpty LSM.SnapshotName) + -> (NonEmpty (LSM.Table K V) -> IO a) + -> IO a +withTablesFromSnapshots session snapLabel (x0 :| xs0) k = do + LSM.withTableFromSnapshot session x0 snapLabel $ \t0 -> go' (NE.singleton t0) xs0 + where + go' acc [] = k (NE.reverse acc) + go' acc (x:xs) = + LSM.withTableFromSnapshot session x snapLabel $ \t -> + go' (t NE.<| acc) xs + +-- | Returns operations per second +timeOpsPerSecond :: GlobalOpts -> RunOpts -> LSM.Table K V -> StdGen -> IO Double +timeOpsPerSecond gopts opts table g = do + t <- timed_ $ + sequentialIterations + (initialSize gopts) (batchSize opts) (batchCount opts) table g + + let ops = batchCount opts * batchSize opts + opsPerSec = fromIntegral ops / t + + pure opsPerSec + +------------------------------------------------------------------------------- +-- PRNG initialisation +------------------------------------------------------------------------------- + +deriveSetupRNGs :: GlobalOpts -> Int -> NonEmpty Random.StdGen +deriveSetupRNGs gOpts amount = + let -- g1 is reserved for the run command + (_g1, !g2) = deriveInitialForkedRNGs gOpts + in NE.fromList $ take amount $ List.unfoldr (Just . Random.splitGen) g2 + +deriveRunRNG :: GlobalOpts -> Random.StdGen +deriveRunRNG gOpts = + let -- g2 and its splits are reserved for the setup command + (!g1, _g2) = deriveInitialForkedRNGs gOpts + in g1 + +deriveInitialForkedRNGs :: GlobalOpts -> (Random.StdGen, Random.StdGen) +deriveInitialForkedRNGs = Random.splitGen . Random.mkStdGen . seed + +------------------------------------------------------------------------------- +-- Batch generation +------------------------------------------------------------------------------- + +generateBatch :: + Int -- ^ initial size of the table + -> Int -- ^ batch size + -> StdGen + -> (StdGen, V.Vector K) +generateBatch initialSize batchSize g = + (g', V.map makeKey lookups) + where + (!g', lookups) = generateBatch' initialSize batchSize g + +{-# INLINE generateBatch' #-} +generateBatch' :: + Int -- ^ initial size of the table + -> Int -- ^ batch size + -> StdGen + -> (StdGen, V.Vector Word64) +generateBatch' initialSize batchSize g = (g', lookups) + where + randomKey :: StdGen -> (Word64, StdGen) + randomKey = Random.uniformR (0, 2 * fromIntegral initialSize - 1) + + lookups :: V.Vector Word64 + (lookups, !g') = + runState (V.replicateM batchSize (state randomKey)) g + +------------------------------------------------------------------------------- +-- sequential +------------------------------------------------------------------------------- + +{-# INLINE sequentialIteration #-} +sequentialIteration :: + Int -- ^ initial size of the table + -> Int -- ^ batch size + -> LSM.Table K V + -> StdGen + -> IO StdGen +sequentialIteration !initialSize !batchSize !tbl !g = do + let (!g', ls) = generateBatch initialSize batchSize g + + -- lookups + !_ <- LSM.lookups tbl ls + + -- continue to the next batch + pure g' + +sequentialIterations :: + Int -- ^ initial size of the table + -> Int -- ^ batch size + -> Int -- ^ batch count + -> LSM.Table K V + -> StdGen + -> IO () +sequentialIterations !initialSize !batchSize !batchCount !tbl !g0 = do + void $ forFoldM_ g0 [ 0 .. batchCount - 1 ] $ \_b g -> + sequentialIteration initialSize batchSize tbl g + +------------------------------------------------------------------------------- +-- main +------------------------------------------------------------------------------- + +main :: IO () +main = do + hSetBuffering stdout NoBuffering +#ifdef NO_IGNORE_ASSERTS + putStrLn "WARNING: Benchmarking in debug mode." + putStrLn " To benchmark in release mode, pass:" + putStrLn " --project-file=cabal.project.release" +#endif + (gopts, cmd) <- O.customExecParser prefs cliP + print gopts + print cmd + case cmd of + CmdCollisions -> doCollisionAnalysis gopts + CmdRun opts -> doRun gopts opts + CmdSetup -> doSetup gopts + where + cliP = O.info ((,) <$> globalOptsP <*> cmdP <**> O.helper) O.fullDesc + prefs = O.prefs $ O.showHelpOnEmpty <> O.helpShowGlobals <> O.subparserInline + +------------------------------------------------------------------------------- +-- general utils +------------------------------------------------------------------------------- + +forFoldM_ :: Monad m => s -> [a] -> (a -> s -> m s) -> m s +forFoldM_ !s0 xs0 f = go s0 xs0 + where + go !s [] = pure s + go !s (x:xs) = do + !s' <- f x s + go s' xs diff --git a/bench-unions/Main.hs b/bench-unions/Main.hs new file mode 100644 index 000000000..f29abfb97 --- /dev/null +++ b/bench-unions/Main.hs @@ -0,0 +1,6 @@ +module Main (main) where + +import qualified Bench.Unions + +main :: IO () +main = Bench.Unions.main diff --git a/bench-unions/unions-bench.dat b/bench-unions/unions-bench.dat new file mode 100644 index 000000000..3f993241b --- /dev/null +++ b/bench-unions/unions-bench.dat @@ -0,0 +1,201 @@ +# iteration baseline (ops/sec) union (ops/sec) union debt +0 270435.00950219884 78848.98202622362 23750011 +1 284344.70422066783 70935.13680007386 23750011 +2 292068.85651072365 76117.21959644658 23750011 +3 293393.49871294515 75251.17180331067 23750011 +4 277532.7307711082 76616.18948687254 23750011 +5 282077.4541716418 77923.19395678357 23750011 +6 291626.58842082275 79817.59758341247 23750011 +7 282599.89140326437 75109.49541458584 23750011 +8 292618.5384840185 75512.43375419179 23750011 +9 291869.3843005077 78976.47743926324 23750011 +10 297169.5844956877 78668.30852753064 23750011 +11 286610.599587695 76330.8289342413 23750011 +12 280651.63252779824 77557.77463059168 23750011 +13 288061.02500799537 78070.01838902687 23750011 +14 290204.9086981348 76542.2147760733 23750011 +15 287933.3407322454 75739.25350155069 23750011 +16 289937.75166717183 76111.07874408204 23750011 +17 291920.00491018564 78684.56254833356 23750011 +18 284838.22854810837 79847.39628003737 23750011 +19 292788.1185483537 78458.93966475972 23750011 +20 292373.71711038373 76331.51741100667 23750011 +21 288929.1758238264 76341.22090579226 23750011 +22 291494.952702268 77910.7945323653 23750011 +23 262662.8772758494 77631.18400630096 23750011 +24 278838.3452937465 79268.85087278658 23750011 +25 288572.83800497063 75237.08948223019 23750011 +26 290299.94101751567 75959.71644973708 23750011 +27 289374.95642420085 75283.47300681961 23750011 +28 286841.1745626198 75716.82978709076 23750011 +29 289379.2169779329 80026.71179102019 23750011 +30 291163.274793649 79996.0548195652 23750011 +31 282125.1329108307 77037.3266581533 23750011 +32 288113.37135113066 75242.65388482083 23750011 +33 287518.7773066326 75907.2693314036 23750011 +34 289837.93389255257 75934.27102211447 23750011 +35 289518.5563130047 76656.66575657837 23750011 +36 287220.8923265365 78326.24272527618 23750011 +37 291713.87479329406 76321.49316370946 23750011 +38 294136.6439469663 76971.68044553205 23750011 +39 287103.6205078699 78810.14778549717 23750011 +40 292893.2027176095 77910.12552387921 23750011 +41 288009.88845950784 76905.82533808656 23750011 +42 296528.6834917582 78575.97714269023 23750011 +43 281455.59245335753 77092.99980926108 23750011 +44 282233.12439537956 78498.69203164238 23750011 +45 287098.001957156 78710.00391313242 23750011 +46 293631.0642212888 78353.41354313104 23750011 +47 291406.34106003505 80761.34234808892 23750011 +48 291888.7458291977 79596.70761029066 23750011 +49 286065.42144102126 77952.04580150402 23750011 +50 292436.82741957146 76538.77625645715 23512510 +51 291507.6505243451 70478.58577273347 23275009 +52 286134.5042523555 79435.90515790442 23037508 +53 282399.2027341008 80235.75559234167 22800007 +54 292980.03191731335 73254.21223130531 22562506 +55 284991.9424651142 81300.0991019626 22325005 +56 291183.6903647271 79833.11959107031 22087504 +57 292955.80667818856 80198.62467566508 21850003 +58 286275.12456308724 81717.84769585555 21612502 +59 290168.2600162867 81373.68778690323 21375001 +60 287823.5326937598 81991.38852453299 21137500 +61 288447.9757512603 79243.72341034513 20899999 +62 293583.72707144776 79332.54665093463 20662498 +63 293555.35735345073 79009.61996905183 20424997 +64 289821.8046483749 78078.77642554266 20187496 +65 290431.58837421413 84370.5533875802 19949995 +66 273639.4385235117 84175.34988010273 19712494 +67 290314.688080399 83691.9000000925 19474993 +68 290763.989765789 84829.0219700871 19237492 +69 294835.9557216053 86578.50920444708 18999991 +70 294555.05996922404 87095.79261540528 18762490 +71 290861.5895757433 86315.32349160843 18524989 +72 291360.73812874145 85308.5853127341 18287488 +73 275272.3824256412 88734.04935922656 18049987 +74 292539.25426841766 85745.16737222212 17812486 +75 292374.8924979777 84994.16186390443 17574985 +76 292320.2970394428 84235.85664748496 17337484 +77 296590.54226286046 87065.23794878516 17099983 +78 285970.33768204105 87650.93223040657 16862482 +79 293618.7195651222 90585.57959842899 16624981 +80 290142.782606763 85948.12341152717 16387480 +81 290218.6014932427 84575.88230340928 16149979 +82 287992.3048456145 86081.77621459846 15912478 +83 280569.3960810056 85591.4551937241 15674977 +84 292539.38631453214 85229.21519332279 15437476 +85 292073.6632825236 88124.77229879849 15199975 +86 292308.5763479029 86383.22118226721 14962474 +87 292380.3104223456 85077.62727809533 14724973 +88 286128.91717193753 83726.10018233761 14487472 +89 289687.3345755187 81575.35560836473 14249971 +90 292356.56821170944 90520.26522437712 14012470 +91 294593.76934638136 86623.5949564075 13774969 +92 292042.59117207496 91045.10311103323 13537468 +93 290829.22358600236 87219.45001503317 13299967 +94 292268.7884162919 89794.5805684005 13062466 +95 284993.37176108995 84855.05348138904 12824965 +96 291589.67446336447 88293.58883368778 12587464 +97 286884.9958233859 89027.40796216587 12349963 +98 289975.19613338914 86563.1625859566 12112462 +99 287307.124750942 88471.43702765886 11874961 +100 279535.9851150582 84203.0652040546 11637460 +101 177010.45283024933 84972.9648305678 11399959 +102 285180.06483735866 86723.68558156378 11162458 +103 290811.96632170235 88430.23778438426 10924957 +104 292385.3127890392 89274.08159763696 10687456 +105 293905.7151728602 85456.04836120678 10449955 +106 288837.7451766339 85996.32761081017 10212454 +107 282722.9662922549 97943.32035354784 9974943 +108 290014.37349752587 94676.59189979771 9737442 +109 291124.2863425017 97559.70610664441 9499941 +110 293707.20103112335 96475.88860553975 9262440 +111 293252.72839159676 95134.64180095124 9024939 +112 287799.56055705226 92729.44381794214 8787438 +113 289015.9738790072 93675.93969001756 8549937 +114 280285.55492335587 95288.6845608332 8312436 +115 285174.3148329226 93706.58229277191 8074935 +116 294115.74632229656 95618.85178253261 7837434 +117 292688.0488701235 99453.58433136284 7599933 +118 279152.8570057264 95194.10762752903 7362432 +119 291038.3135739445 100467.2397739253 7124931 +120 277145.5455165115 94962.50031945514 6887430 +121 289351.6343645597 96113.10097142676 6649929 +122 290165.031928638 95104.11483964522 6412428 +123 290572.6803883712 94154.90349643737 6174927 +124 287742.00620110997 94978.7854865322 5937426 +125 290652.66424749413 99721.56414862679 5699925 +126 288176.75447312935 94659.29870238666 5462424 +127 291964.39288941247 93380.66930777111 5224923 +128 292652.162105314 93879.08236306734 4987422 +129 294808.21260366024 93472.05396720032 4749921 +130 283532.3853431721 94093.54043213766 4512420 +131 283014.063068291 96642.33697720809 4274919 +132 291010.95301000105 97805.26475883259 4037418 +133 293211.48746714456 95581.15156567172 3799917 +134 290821.47102657583 94231.06922067291 3562416 +135 280732.6934682242 94602.23186213626 3324915 +136 282595.6503208669 89180.51785259499 3087414 +137 286840.8194183929 96753.53747279057 2849913 +138 293443.13914466137 89648.57731428568 2612412 +139 292214.6094862793 94989.31123499028 2374911 +140 290131.14545436547 96759.6767925933 2137410 +141 286901.4155687826 94939.0723033371 1899909 +142 278207.37814878183 94951.96131769851 1662408 +143 290487.48793228733 95515.78766929201 1424907 +144 294280.1841173168 92906.3276977365 1187406 +145 291894.46193141217 96357.03974906112 949905 +146 290904.3504904704 98821.43071887837 712404 +147 289685.01043810905 96182.28105221462 474903 +148 291648.87152848975 93408.55541328367 237402 +149 292524.01795016497 547131.1488110594 0 +150 287100.30569734017 540337.6686179364 0 +151 289151.0851693392 541379.7935321272 0 +152 285291.28089332633 558270.8137261609 0 +153 285484.49140907184 639442.438166041 0 +154 290266.738862203 589163.5990747692 0 +155 288927.72634318745 559787.3437854376 0 +156 286775.56815771124 546038.0635242126 0 +157 292531.186309833 579294.262545166 0 +158 285300.3804558585 579842.0734427742 0 +159 270097.2477755144 567785.6309381422 0 +160 289471.77473234123 544411.9984108699 0 +161 290641.09504890797 538753.8933912953 0 +162 286181.9920495736 630487.8301071731 0 +163 290103.75793212093 570378.8261209395 0 +164 274401.0945559535 538652.0113481774 0 +165 294041.3952739726 605165.0506120097 0 +166 286132.07047155703 602865.5471123764 0 +167 295226.6498236788 560583.5876607276 0 +168 291422.89265019965 542653.4745911201 0 +169 291953.7345478639 571719.9316464155 0 +170 295557.4829777506 571505.9926310106 0 +171 290326.7004966452 643649.654453163 0 +172 290849.7972117957 640675.7848178258 0 +173 291553.2347108202 607374.7369382313 0 +174 292410.7380099218 663501.0839884165 0 +175 288967.3061373935 620844.4347674629 0 +176 294468.33977059147 665598.969652795 0 +177 285543.5475632536 640298.4591192569 0 +178 292566.4950644175 553644.4668896332 0 +179 286388.2858420449 590583.6648387872 0 +180 284648.7115225965 557030.8047391397 0 +181 290641.603203152 579252.8945504725 0 +182 291848.2436330441 553001.4341854578 0 +183 285171.91006557975 563149.8227876398 0 +184 289350.88542585983 553655.3211012706 0 +185 282636.73443548486 584611.3819245948 0 +186 291866.84033731755 577547.4155711972 0 +187 276845.56731828995 556827.2193849651 0 +188 276998.7533487161 586004.6053781465 0 +189 287816.1514970231 597208.7535966373 0 +190 291613.37033633614 584931.5130310345 0 +191 288271.50004434143 632766.959529424 0 +192 288607.5571482985 634087.8520545233 0 +193 283248.97797047114 627653.675608574 0 +194 287722.91276470566 604715.1434986674 0 +195 292162.7679865842 631442.509570578 0 +196 289012.49893257295 634480.9044049431 0 +197 291123.2716233118 574519.2219488156 0 +198 282803.61004331725 563891.6497613896 0 +199 291613.1361488593 655290.6754888586 0 diff --git a/bench-unions/unions-bench.gnuplot b/bench-unions/unions-bench.gnuplot new file mode 100644 index 000000000..5c2ab191f --- /dev/null +++ b/bench-unions/unions-bench.gnuplot @@ -0,0 +1,14 @@ +set term png size 1800, 1200 +set output "unions-bench.png" +set title "Lookup performance on incremental union tables" +# set subtitle "blah" + +set xlabel "Iteration" +set grid xtics +set xtics 0,50,150 + +set ylabel "Lookups per second" + +plot "unions-bench.dat" using 1 : 2 title "Baseline table" axis x1y1, \ + "unions-bench.dat" using 1 : 3 title "Union table" axis x1y1, \ + "unions-bench.dat" using 1 : 4 title "Current union debt" axis x1y2 diff --git a/bench-unions/unions-bench.png b/bench-unions/unions-bench.png new file mode 100644 index 000000000..ae501b24b Binary files /dev/null and b/bench-unions/unions-bench.png differ diff --git a/cabal.project.release b/cabal.project.release index 6c37f80ad..d97d05787 100644 --- a/cabal.project.release +++ b/cabal.project.release @@ -10,7 +10,7 @@ packages: tests: True benchmarks: True --- this prevents occurence of Hackage bloomfilter anywhere in the install plan +-- this prevents occurrence of Hackage bloomfilter anywhere in the install plan -- that is overconstraining, as we'd only need to make sure lsm-tree -- doesn't depend on Hackage bloomfilter. -- Luckily, bloomfilter is not commonly used package, so this is good enough. diff --git a/lsm-tree.cabal b/lsm-tree.cabal index 0ae17ab31..f05d07216 100644 --- a/lsm-tree.cabal +++ b/lsm-tree.cabal @@ -963,6 +963,29 @@ library mcg , base <5 , primes +benchmark unions-bench + import: language, warnings, wno-x-partial + type: exitcode-stdio-1.0 + hs-source-dirs: bench-unions + main-is: Main.hs + other-modules: Bench.Unions + build-depends: + , async + , base + , bytestring + , clock + , containers + , directory + , lsm-tree + , lsm-tree:extras + , mtl + , optparse-applicative + , primitive + , random + , vector + + ghc-options: -rtsopts -with-rtsopts=-T -threaded + flag measure-batch-latency description: Measure the latency for individual batches of updates and lookups