Skip to content

Commit a1e5359

Browse files
authored
Merge pull request #5884 from unisonweb/topic/direct-murmur
Implement a direct murmur hash on values that omits type references
2 parents c63c0f7 + b2b8dbc commit a1e5359

File tree

32 files changed

+1044
-703
lines changed

32 files changed

+1044
-703
lines changed

lib/unison-util-bytes/package.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ library:
1717
- bytestring-to-vector
1818
- deepseq
1919
- memory
20+
- murmur-hash
2021
- primitive
2122
- stringsearch
2223
- text

lib/unison-util-bytes/src/Unison/Util/Bytes.hs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ module Unison.Util.Bytes
5353
zlibDecompress,
5454
gzipCompress,
5555
gzipDecompress,
56+
hash64AddBytes,
5657
)
5758
where
5859

@@ -68,6 +69,7 @@ import Data.ByteString qualified as B
6869
import Data.ByteString.Lazy qualified as LB
6970
import Data.ByteString.Lazy.Search qualified as SS
7071
import Data.Char
72+
import Data.Digest.Murmur64 (Hash64, hash64AddInt)
7173
import Data.Primitive.ByteArray
7274
( ByteArray (ByteArray),
7375
copyByteArrayToPtr,
@@ -418,5 +420,12 @@ toWord8s bs = chunks bs >>= V.toList
418420
fromWord8s :: [Word8] -> Bytes
419421
fromWord8s bs = snoc empty (V.fromList bs)
420422

423+
-- Adds the bytes of the value to a hash. This does not depend on the
424+
-- chunking or splitting of the bytes values, just on the bytes.
425+
hash64AddBytes :: Bytes -> Hash64 -> Hash64
426+
hash64AddBytes bs h = foldl' addChunk h $ underlying bs
427+
where
428+
addChunk = V.foldl' (\h b -> hash64AddInt (fromIntegral b) h)
429+
421430
instance Show Bytes where
422431
show bs = toWord8s (toBase16 bs) >>= \w -> [chr (fromIntegral w)]

lib/unison-util-bytes/unison-util-bytes.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ library
5858
, bytestring-to-vector
5959
, deepseq
6060
, memory
61+
, murmur-hash
6162
, primitive
6263
, stringsearch
6364
, text

parser-typechecker/package.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ library:
3232
- megaparsec
3333
- mmorph
3434
- mtl
35+
- murmur-hash
3536
- mutable-containers
3637
- network-uri
3738
- nonempty-containers

parser-typechecker/src/Unison/Builtin.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,7 @@ builtinsSrc =
486486
B "Universal.>=" $ forall1 "a" (\a -> a --> a --> boolean),
487487
B "Universal.<=" $ forall1 "a" (\a -> a --> a --> boolean),
488488
B "Universal.murmurHash" $ forall1 "a" (\a -> a --> nat),
489+
B "Universal.murmurHashUntyped" $ forall1 "a" (\a -> a --> nat),
489490
B "bug" $ forall1 "a" (\a -> forall1 "b" (\b -> a --> b)),
490491
B "todo" $ forall1 "a" (\a -> forall1 "b" (\b -> a --> b)),
491492
B "Any.Any" $ forall1 "a" (\a -> a --> anyt),

parser-typechecker/src/Unison/Util/EnumContainers.hs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ module Unison.Util.EnumContainers
2222
lookupWithDefault,
2323
mapWithKey,
2424
foldMapWithKey,
25+
foldlWithKey,
2526
mapToList,
2627
(!),
2728
findMin,
@@ -171,6 +172,11 @@ mapWithKey f (EM m) = EM $ IM.mapWithKey (f . intToKey) m
171172
foldMapWithKey :: (EnumKey k) => (Monoid m) => (k -> a -> m) -> EnumMap k a -> m
172173
foldMapWithKey f (EM m) = IM.foldMapWithKey (f . intToKey) m
173174

175+
{-# INLINE foldlWithKey #-}
176+
foldlWithKey :: (EnumKey k) => (r -> k -> a -> r) -> r -> EnumMap k a -> r
177+
foldlWithKey f z (EM m) =
178+
IM.foldlWithKey' (\r k x -> f r (intToKey k) x) z m
179+
174180
{-# INLINE mapToList #-}
175181
mapToList :: (EnumKey k) => EnumMap k a -> [(k, a)]
176182
mapToList (EM m) = first intToKey <$> IM.toList m

parser-typechecker/src/Unison/Util/Text.hs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
module Unison.Util.Text where
66

7+
import Data.Digest.Murmur64 (Hash64, Hashable64 (..))
78
import Data.Foldable (toList)
89
import Data.List (foldl', unfoldr)
910
import Data.List qualified as L
@@ -184,6 +185,15 @@ dropWhileMax p = go 0
184185
| otherwise = (total, empty)
185186
{-# INLINE dropWhileMax #-}
186187

188+
-- Adds the codepoints of the Text into a murmur hash. This depends only
189+
-- on the characters, so adding texts chunked in different ways, or
190+
-- multiple texts that would concatenate to the same text will give the
191+
-- same result.
192+
hash64AddText :: Text -> Hash64 -> Hash64
193+
hash64AddText (Text tx) h = foldl' addChunk h tx
194+
where
195+
addChunk h (Chunk _ tx) = T.foldl' (flip hash64Add) h tx
196+
187197
instance Eq Chunk where (Chunk n a) == (Chunk n2 a2) = n == n2 && a == a2
188198

189199
instance Ord Chunk where (Chunk _ a) `compare` (Chunk _ a2) = compare a a2

parser-typechecker/unison-parser-typechecker.cabal

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ library
214214
, megaparsec
215215
, mmorph
216216
, mtl
217+
, murmur-hash
217218
, mutable-containers
218219
, network-uri
219220
, nonempty-containers

unison-runtime/src/Unison/Runtime/ANF.hs

Lines changed: 1 addition & 170 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ import Unison.Prelude
119119
import Unison.PrettyPrintEnv (PrettyPrintEnv, termName)
120120
import Unison.Reference (Id, Reference, Reference' (Builtin, DerivedId), toShortHash)
121121
import Unison.ReferentPrime qualified as Rfn
122+
import Unison.Runtime.ANF.POp (POp (..))
122123
import Unison.Runtime.Array qualified as PA
123124
import Unison.Runtime.Foreign.Function.Type (ForeignFunc (..))
124125
import Unison.Runtime.InternalError (internalBug)
@@ -1468,176 +1469,6 @@ litRef (C _) = Ty.charRef
14681469
litRef (LM _) = Ty.termLinkRef
14691470
litRef (LY _) = Ty.typeLinkRef
14701471

1471-
-- Note: Enum/Bounded instances should only be used for things like
1472-
-- getting a list of all ops. Using auto-generated numberings for
1473-
-- serialization, for instance, could cause observable changes to
1474-
-- formats that we want to control and version.
1475-
data POp
1476-
= -- Int
1477-
ADDI -- +
1478-
| SUBI -- -
1479-
| MULI
1480-
| DIVI -- /
1481-
| SGNI -- sgn
1482-
| NEGI -- neg
1483-
| MODI -- mod
1484-
| POWI -- pow
1485-
| SHLI -- shiftl
1486-
| SHRI -- shiftr
1487-
| ANDI -- and
1488-
| IORI -- or
1489-
| XORI -- xor
1490-
| COMI -- complement
1491-
| INCI -- inc
1492-
| DECI -- dec
1493-
| LEQI -- <=
1494-
| LESI -- <
1495-
| EQLI -- ==
1496-
| NEQI -- !=
1497-
| TRNC -- truncate0
1498-
-- Nat
1499-
| ADDN -- +
1500-
| SUBN -- -
1501-
| DRPN -- drop
1502-
| MULN
1503-
| DIVN -- /
1504-
| MODN -- mod
1505-
| TZRO -- trailingZeros
1506-
| LZRO -- leadingZeros
1507-
| POPC -- popCount
1508-
| POWN -- pow
1509-
| SHLN -- shiftl
1510-
| SHRN -- shiftr
1511-
| ANDN -- and
1512-
| IORN -- or
1513-
| XORN -- xor
1514-
| COMN -- complement
1515-
| INCN -- inc
1516-
| DECN -- dec
1517-
| LEQN -- <=
1518-
| LESN -- <
1519-
| EQLN -- ==
1520-
| NEQN -- !=
1521-
-- Float
1522-
| ADDF -- +
1523-
| SUBF -- -
1524-
| MULF
1525-
| DIVF -- /
1526-
| MINF -- min
1527-
| MAXF -- max
1528-
| LEQF -- <=
1529-
| LESF -- <
1530-
| EQLF -- ==
1531-
| NEQF -- !=
1532-
| POWF -- pow
1533-
| EXPF -- exp
1534-
| SQRT -- sqrt
1535-
| LOGF -- log
1536-
| LOGB -- logBase
1537-
| ABSF -- abs
1538-
| CEIL -- ceil
1539-
| FLOR -- floor
1540-
| TRNF -- truncate
1541-
| RNDF -- round
1542-
-- Trig
1543-
| COSF -- cos
1544-
| ACOS -- acos
1545-
| COSH -- cosh
1546-
| ACSH -- acosh
1547-
| SINF -- sin
1548-
| ASIN -- asin
1549-
| SINH -- sinh
1550-
| ASNH -- asinh
1551-
| TANF -- tan
1552-
| ATAN -- atan
1553-
| TANH -- tanh
1554-
| ATNH -- atanh
1555-
| ATN2 -- atan2
1556-
-- Text
1557-
| CATT -- ++
1558-
| TAKT -- take
1559-
| DRPT -- drop
1560-
| SIZT -- size
1561-
| IXOT -- indexOf
1562-
| UCNS -- uncons
1563-
| USNC -- unsnoc
1564-
| EQLT -- ==
1565-
| LEQT -- <=
1566-
| PAKT -- pack
1567-
| UPKT -- unpack
1568-
-- Sequence
1569-
| CATS -- ++
1570-
| TAKS -- take
1571-
| DRPS -- drop
1572-
| SIZS -- size
1573-
| CONS -- cons
1574-
| SNOC -- snoc
1575-
| IDXS -- at
1576-
| BLDS -- build
1577-
| VWLS -- viewl
1578-
| VWRS -- viewr
1579-
| SPLL -- splitl
1580-
| SPLR -- splitr
1581-
-- Bytes
1582-
| PAKB -- pack
1583-
| UPKB -- unpack
1584-
| TAKB -- take
1585-
| DRPB -- drop
1586-
| IXOB -- indexOf
1587-
| IDXB -- index
1588-
| SIZB -- size
1589-
| FLTB -- flatten
1590-
| CATB -- append
1591-
-- Conversion
1592-
| ITOF -- intToFloat
1593-
| NTOF -- natToFloat
1594-
| ITOT -- intToText
1595-
| NTOT -- natToText
1596-
| TTOI -- textToInt
1597-
| TTON -- textToNat
1598-
| TTOF -- textToFloat
1599-
| FTOT -- floatToText
1600-
| CAST -- runtime type cast for unboxed values.
1601-
| -- Concurrency
1602-
FORK -- fork
1603-
| -- Universal operations
1604-
EQLU -- ==
1605-
| CMPU -- compare
1606-
| LEQU -- <=
1607-
| LESU -- <
1608-
| EROR -- error
1609-
| -- Code
1610-
MISS -- isMissing
1611-
| CACH -- cache_
1612-
| LKUP -- lookup
1613-
| LOAD -- load
1614-
| CVLD -- validate
1615-
| SDBX -- sandbox
1616-
| VALU -- value
1617-
| TLTT -- Term.Link.toText
1618-
-- Debug
1619-
| PRNT -- print
1620-
| INFO -- info
1621-
| TRCE -- trace
1622-
| DBTX -- debugText
1623-
| -- STM
1624-
ATOM -- atomically
1625-
| TFRC -- try force
1626-
| SDBL -- sandbox link list
1627-
| SDBV -- sandbox check for Values
1628-
-- Refs
1629-
| REFN -- Ref.new
1630-
| REFR -- Ref.read
1631-
| REFW -- Ref.write
1632-
| RCAS -- Ref.cas
1633-
| RRFC -- Ref.readForCas
1634-
| TIKR -- Ref.Ticket.read
1635-
-- Bools
1636-
| NOTB -- not
1637-
| ANDB -- and
1638-
| IORB -- or
1639-
deriving (Show, Eq, Ord, Enum, Bounded)
1640-
16411472
type ANormal ref = ABTN.Term (ANormalF ref)
16421473

16431474
type Cte v = CTE v (ANormal Reference v)

0 commit comments

Comments
 (0)