Skip to content

Commit 32be305

Browse files
committed
Lift the 8-byte key restriction for the compact index
Now that `topBits64` is fully safe, remove the 8-byte key constraint when using the compact index. Instead, the config option for the index type includes a hint not to use the compact index if their keys are too small, because that will lead to bad performance.
1 parent d102d4c commit 32be305

File tree

10 files changed

+19
-74
lines changed

10 files changed

+19
-74
lines changed

bench/micro/Bench/Database/LSMTree/Internal/Index.hs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,13 @@ import Criterion.Main (Benchmark, Benchmarkable, bench, bgroup, env,
1111
import Data.List (foldl')
1212
-- foldl' is included in the Prelude from base 4.20 onwards
1313
#endif
14-
import Database.LSMTree.Extras.Generators (getKeyForIndexCompact,
15-
mkPages, toAppends)
14+
import Database.LSMTree.Extras.Generators (mkPages, toAppends)
1615
-- also for @Arbitrary@ instantiation of @SerialisedKey@
1716
import Database.LSMTree.Extras.Index (Append, append)
1817
import Database.LSMTree.Internal.Index (Index,
1918
IndexType (Compact, Ordinary), newWithDefaults, search,
2019
unsafeEnd)
21-
import Database.LSMTree.Internal.Serialise
22-
(SerialisedKey (SerialisedKey))
20+
import Database.LSMTree.Internal.Serialise (SerialisedKey)
2321
import Test.QuickCheck (choose, vector)
2422
import Test.QuickCheck.Gen (Gen (MkGen))
2523
import Test.QuickCheck.Random (mkQCGen)
@@ -61,8 +59,7 @@ generated (MkGen exec) = exec (mkQCGen 411) 30
6159
keysForIndexCompact :: Int -- ^ Number of keys
6260
-> [SerialisedKey] -- ^ Constructed keys
6361
keysForIndexCompact = vector >>>
64-
generated >>>
65-
map (getKeyForIndexCompact >>> SerialisedKey)
62+
generated
6663

6764
{-|
6865
Constructs append operations whose serialised keys conform to the key size

src-extras/Database/LSMTree/Extras/Generators.hs

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ module Database.LSMTree.Extras.Generators (
3333
, genRawBytesSized
3434
, packRawBytesPinnedOrUnpinned
3535
, LargeRawBytes (..)
36-
, isKeyForIndexCompact
37-
, KeyForIndexCompact (..)
3836
, BiasedKey (..)
3937
-- * helpers
4038
, shrinkVec
@@ -510,28 +508,6 @@ instance Arbitrary LargeRawBytes where
510508

511509
deriving newtype instance SerialiseValue LargeRawBytes
512510

513-
-- Serialised keys for the compact index must be at least 8 bytes long.
514-
515-
genKeyForIndexCompact :: Gen RawBytes
516-
genKeyForIndexCompact =
517-
genRawBytesN =<< QC.sized (\s -> QC.chooseInt (8, s + 8))
518-
519-
isKeyForIndexCompact :: RawBytes -> Bool
520-
isKeyForIndexCompact rb = RB.size rb >= 8
521-
522-
newtype KeyForIndexCompact =
523-
KeyForIndexCompact { getKeyForIndexCompact :: RawBytes }
524-
deriving stock (Eq, Ord, Show)
525-
526-
instance Arbitrary KeyForIndexCompact where
527-
arbitrary =
528-
KeyForIndexCompact <$> genKeyForIndexCompact
529-
shrink (KeyForIndexCompact rawBytes) =
530-
[KeyForIndexCompact rawBytes' | rawBytes' <- shrink rawBytes,
531-
isKeyForIndexCompact rawBytes']
532-
533-
deriving newtype instance SerialiseKey KeyForIndexCompact
534-
535511
-- we try to make collisions and close keys more likely (very crudely)
536512
arbitraryBiasedKey :: (RawBytes -> k) -> Gen RawBytes -> Gen k
537513
arbitraryBiasedKey fromRB genUnbiased = fromRB <$> frequency

src/Database/LSMTree.hs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ module Database.LSMTree (
140140
serialiseKeyIdentity,
141141
serialiseKeyIdentityUpToSlicing,
142142
serialiseKeyPreservesOrdering,
143-
serialiseKeyMinimalSize,
144143
serialiseValueIdentity,
145144
serialiseValueIdentityUpToSlicing,
146145
packSlice,
@@ -216,7 +215,7 @@ import Database.LSMTree.Internal.Config
216215
DiskCachePolicy (..), FencePointerIndexType (..),
217216
LevelNo (..), MergePolicy (..), MergeSchedule (..),
218217
SizeRatio (..), TableConfig (..), WriteBufferAlloc (..),
219-
defaultTableConfig, serialiseKeyMinimalSize)
218+
defaultTableConfig)
220219
import Database.LSMTree.Internal.Config.Override
221220
(OverrideDiskCachePolicy (..))
222221
import Database.LSMTree.Internal.Entry (NumEntries (..))

src/Database/LSMTree/Internal/Config.hs

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ module Database.LSMTree.Internal.Config (
2020
-- * Fence pointer index
2121
, FencePointerIndexType (..)
2222
, indexTypeForRun
23-
, serialiseKeyMinimalSize
2423
-- * Disk cache policy
2524
, DiskCachePolicy (..)
2625
, diskCachePolicyForLevel
@@ -32,11 +31,9 @@ import Control.DeepSeq (NFData (..))
3231
import Database.LSMTree.Internal.Index (IndexType)
3332
import qualified Database.LSMTree.Internal.Index as Index
3433
(IndexType (Compact, Ordinary))
35-
import qualified Database.LSMTree.Internal.RawBytes as RB
3634
import Database.LSMTree.Internal.Run (RunDataCaching (..))
3735
import Database.LSMTree.Internal.RunAcc (RunBloomFilterAlloc (..))
3836
import Database.LSMTree.Internal.RunBuilder (RunParams (..))
39-
import Database.LSMTree.Internal.Serialise.Class (SerialiseKey (..))
4037

4138
newtype LevelNo = LevelNo Int
4239
deriving stock (Show, Eq, Ord)
@@ -303,12 +300,7 @@ data FencePointerIndexType =
303300
| {- |
304301
Compact indexes are designed for the case where the keys in the database are uniformly distributed, e.g., when the keys are hashes.
305302
306-
When using a compact index, the 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function must satisfy the following additional law:
307-
308-
[Minimal size]
309-
@'Database.LSMTree.Internal.RawBytes.size' ('Database.LSMTree.Internal.Serialise.Class.serialiseKey' x) >= 8@
310-
311-
Use 'serialiseKeyMinimalSize' to test this law.
303+
When using a compact index, the 'Database.LSMTree.Internal.Serialise.Class.serialiseKey' function must ideally produce more than 8 bytes
312304
-}
313305
CompactIndex
314306
deriving stock (Eq, Show)
@@ -321,10 +313,6 @@ indexTypeForRun :: FencePointerIndexType -> IndexType
321313
indexTypeForRun CompactIndex = Index.Compact
322314
indexTypeForRun OrdinaryIndex = Index.Ordinary
323315

324-
-- | Test the __Minimal size__ law for the 'CompactIndex' option.
325-
serialiseKeyMinimalSize :: SerialiseKey k => k -> Bool
326-
serialiseKeyMinimalSize x = RB.size (serialiseKey x) >= 8
327-
328316
{-------------------------------------------------------------------------------
329317
Disk cache policy
330318
-------------------------------------------------------------------------------}

src/Database/LSMTree/Simple.hs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ module Database.LSMTree.Simple (
132132
serialiseKeyIdentity,
133133
serialiseKeyIdentityUpToSlicing,
134134
serialiseKeyPreservesOrdering,
135-
serialiseKeyMinimalSize,
136135
serialiseValueIdentity,
137136
serialiseValueIdentityUpToSlicing,
138137
packSlice,
@@ -178,9 +177,8 @@ import Database.LSMTree (BloomFilterAlloc, CursorClosedError (..),
178177
TableTooLargeError (..), UnionCredits (..), UnionDebt (..),
179178
WriteBufferAlloc, isValidSnapshotName, packSlice,
180179
serialiseKeyIdentity, serialiseKeyIdentityUpToSlicing,
181-
serialiseKeyMinimalSize, serialiseKeyPreservesOrdering,
182-
serialiseValueIdentity, serialiseValueIdentityUpToSlicing,
183-
toSnapshotName)
180+
serialiseKeyPreservesOrdering, serialiseValueIdentity,
181+
serialiseValueIdentityUpToSlicing, toSnapshotName)
184182
import qualified Database.LSMTree as LSMT
185183
import Prelude hiding (lookup, take, takeWhile)
186184

test/Test/Database/LSMTree/Generators.hs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,9 +63,6 @@ tests = testGroup "Test.Database.LSMTree.Generators" [
6363
prop_arbitraryAndShrinkPreserveInvariant
6464
(\(LargeRawBytes rb) -> labelRawBytes rb)
6565
(deepseqInvariant @LargeRawBytes)
66-
, testGroup "KeyForIndexCompact" $
67-
prop_arbitraryAndShrinkPreserveInvariant noTags $
68-
isKeyForIndexCompact . getKeyForIndexCompact
6966
, testGroup "BiasedKey" $
7067
prop_arbitraryAndShrinkPreserveInvariant
7168
(labelTestKOps @BiasedKey)

test/Test/Database/LSMTree/Internal/Index/Compact.hs

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ import Data.Word
2828
import Database.LSMTree.Extras
2929
import Database.LSMTree.Extras.Generators (ChunkSize (..),
3030
LogicalPageSummaries, LogicalPageSummary (..), Pages (..),
31-
genRawBytes, isKeyForIndexCompact, labelPages, toAppends)
31+
genRawBytes, labelPages, toAppends)
3232
import Database.LSMTree.Extras.Index (Append (..), appendToCompact)
3333
import Database.LSMTree.Internal.BitMath
3434
import Database.LSMTree.Internal.Chunk as Chunk (toByteString)
@@ -54,9 +54,7 @@ import Text.Printf (printf)
5454

5555
tests :: TestTree
5656
tests = testGroup "Test.Database.LSMTree.Internal.Index.Compact" [
57-
testGroup "TestKey" $
58-
prop_arbitraryAndShrinkPreserveInvariant @TestKey noTags isTestKey
59-
, testProperty "prop_distribution @TestKey" $
57+
testProperty "prop_distribution @TestKey" $
6058
prop_distribution @TestKey
6159
, testProperty "prop_searchMinMaxKeysAfterConstruction" $
6260
prop_searchMinMaxKeysAfterConstruction @TestKey 100
@@ -173,15 +171,12 @@ instance Arbitrary TestKey where
173171
-- Shrink keys extensively: most failures will occur in small counterexamples,
174172
-- so we don't have to limit the number of shrinks as much.
175173
shrink (TestKey bytes) = [
176-
TestKey bytes'
174+
testkey'
177175
| let RawBytes vec = bytes
178176
, vec' <- VP.fromList <$> shrink (VP.toList vec)
179-
, let bytes' = RawBytes vec'
180-
, isKeyForIndexCompact bytes'
177+
, let testkey' = TestKey $ RawBytes vec'
181178
]
182179

183-
isTestKey :: TestKey -> Bool
184-
isTestKey (TestKey bytes) = isKeyForIndexCompact bytes
185180

186181
{-------------------------------------------------------------------------------
187182
Properties

test/Test/Database/LSMTree/Internal/Lookup.hs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ import qualified Database.LSMTree.Internal.Run as Run
6060
import Database.LSMTree.Internal.RunAcc as Run
6161
import Database.LSMTree.Internal.RunBuilder
6262
(RunDataCaching (CacheRunData), RunParams (RunParams))
63-
import Database.LSMTree.Internal.Serialise
63+
import Database.LSMTree.Internal.Serialise as Serialise
6464
import Database.LSMTree.Internal.Serialise.Class
6565
import Database.LSMTree.Internal.UniqCounter
6666
import qualified Database.LSMTree.Internal.WriteBuffer as WB
@@ -563,14 +563,10 @@ liftShrink3InMemLookupData shrinkKey shrinkValue shrinkBlob InMemLookupData{ run
563563
shrinkEntry = liftShrink2 shrinkValue shrinkBlob
564564

565565
genSerialisedKey :: Gen SerialisedKey
566-
genSerialisedKey = frequency [
567-
(9, arbitrary `suchThat` (\k -> sizeofKey k >= 8))
568-
, (1, do x <- getSmall <$> arbitrary
569-
pure $ SerialisedKey (RB.pack [0,0,0,0,0,0,0, x]))
570-
]
566+
genSerialisedKey = Serialise.serialiseKey <$> arbitraryBoundedIntegral @Word64
571567

572568
shrinkSerialisedKey :: SerialisedKey -> [SerialisedKey]
573-
shrinkSerialisedKey k = [k' | k' <- shrink k, sizeofKey k' >= 8]
569+
shrinkSerialisedKey k = Serialise.serialiseKey <$> shrink (Serialise.deserialiseKey k :: Word64)
574570

575571
genSerialisedValue :: Gen SerialisedValue
576572
genSerialisedValue = frequency [ (50, arbitrary), (1, genLongValue) ]

test/Test/Database/LSMTree/Internal/RunAcc.hs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -140,8 +140,7 @@ fromProtoValue (Proto.Value bs) = SerialisedValue . RB.fromShortByteString $ SBS
140140
fromProtoBlobRef :: Proto.BlobRef -> BlobSpan
141141
fromProtoBlobRef (Proto.BlobRef x y) = BlobSpan x y
142142

143-
-- | Wrapper around 'PageLogical' that generates nearly-full pages, and
144-
-- keys that are always large enough (>= 8 bytes) for the compact index.
143+
-- | Wrapper around 'PageLogical' that generates nearly-full pages.
145144
newtype PageLogical' = PageLogical' { getPrototypeKOps :: [(Proto.Key, Proto.Operation)] }
146145
deriving stock Show
147146

@@ -150,7 +149,7 @@ getRealKOps = fmap fromProtoKOp . getPrototypeKOps
150149

151150
instance Arbitrary PageLogical' where
152151
arbitrary = PageLogical' <$>
153-
Proto.genPageContentFits Proto.DiskPage4k (Proto.MinKeySize 8)
152+
Proto.genPageContentFits Proto.DiskPage4k Proto.noMinKeySize
154153
shrink (PageLogical' page) =
155154
[ PageLogical' page' | page' <- shrink page ]
156155

test/Test/Database/LSMTree/StateMachine.hs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,12 @@ import qualified Database.LSMTree as R
9292
import Database.LSMTree.Class (Entry (..), LookupResult (..))
9393
import qualified Database.LSMTree.Class as Class
9494
import Database.LSMTree.Extras (showPowersOf)
95-
import Database.LSMTree.Extras.Generators (KeyForIndexCompact)
95+
import Database.LSMTree.Extras.Generators ()
9696
import Database.LSMTree.Extras.NoThunks (propNoThunks)
9797
import qualified Database.LSMTree.Internal.Config as R
9898
(TableConfig (TableConfig))
9999
import Database.LSMTree.Internal.Serialise (SerialisedBlob,
100-
SerialisedValue)
100+
SerialisedKey, SerialisedValue)
101101
import qualified Database.LSMTree.Internal.Types as R.Types
102102
import qualified Database.LSMTree.Internal.Unsafe as R.Unsafe
103103
import qualified Database.LSMTree.Model.IO as ModelIO
@@ -567,7 +567,7 @@ handleFsError = Model.ErrFsError . displayException
567567
Key and value types
568568
-------------------------------------------------------------------------------}
569569

570-
newtype Key = Key KeyForIndexCompact
570+
newtype Key = Key SerialisedKey
571571
deriving stock (Show, Eq, Ord)
572572
deriving newtype (Arbitrary, R.SerialiseKey)
573573

0 commit comments

Comments
 (0)