diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5d18ec50..dcdc4113 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -28,7 +28,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: ${{ matrix.erlang_version }} - gleam-version: "1.11.0" + gleam-version: "1.13.0" - run: gleam test --target erlang - run: gleam format --check src test @@ -44,7 +44,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: actions/setup-node@v4 with: node-version: ${{ matrix.node_version }} @@ -62,7 +62,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: oven-sh/setup-bun@v2 with: bun-version: ${{ matrix.bun_version }} @@ -80,7 +80,7 @@ jobs: - uses: erlef/setup-beam@v1 with: otp-version: "28" - gleam-version: "1.11.0" + gleam-version: "1.13.0" - uses: denoland/setup-deno@v1 with: deno-version: ${{ matrix.deno_version }} diff --git a/gleam.toml b/gleam.toml index 31eb0487..ef52224a 100644 --- a/gleam.toml +++ b/gleam.toml @@ -1,6 +1,6 @@ name = "gleam_stdlib" version = "0.67.1" -gleam = ">= 1.11.0" +gleam = ">= 1.13.0" licences = ["Apache-2.0"] description = "A standard library for the Gleam programming language" diff --git a/src/dict.mjs b/src/dict.mjs index f39cd546..b61fcd39 100644 --- a/src/dict.mjs +++ b/src/dict.mjs @@ -3,7 +3,9 @@ * These types can be checked using the typescript compiler with "checkjs" option. */ -import { isEqual } from "./gleam.mjs"; +import { isEqual, Result$Error, Result$Ok } from "./gleam.mjs"; + +// -- HASH -------------------------------------------------------------------- const referenceMap = /* @__PURE__ */ new WeakMap(); const tempDataView = /* @__PURE__ */ new DataView( @@ -149,845 +151,561 @@ export function getHash(u) { } } -/** - * @template K,V - * @typedef {ArrayNode | IndexNode | CollisionNode} Node - */ -/** - * @template K,V - * @typedef {{ type: typeof ENTRY, k: K, v: V }} Entry - */ -/** - * @template K,V - * @typedef {{ type: typeof ARRAY_NODE, size: number, array: (undefined | Entry | Node)[] }} ArrayNode - */ -/** - * @template K,V - * @typedef {{ type: typeof INDEX_NODE, bitmap: number, array: (Entry | Node)[] }} IndexNode - */ -/** - * @template K,V - * @typedef {{ type: typeof COLLISION_NODE, hash: number, array: Entry[] }} CollisionNode - */ -/** - * @typedef {{ val: boolean }} Flag - */ -const SHIFT = 5; // number of bits you need to shift by to get the next bucket -const BUCKET_SIZE = Math.pow(2, SHIFT); -const MASK = BUCKET_SIZE - 1; // used to zero out all bits not in the bucket -const MAX_INDEX_NODE = BUCKET_SIZE / 2; // when does index node grow into array node -const MIN_ARRAY_NODE = BUCKET_SIZE / 4; // when does array node shrink to index node -const ENTRY = 0; -const ARRAY_NODE = 1; -const INDEX_NODE = 2; -const COLLISION_NODE = 3; - -/** @type {IndexNode} */ -const EMPTY = { - type: INDEX_NODE, - bitmap: 0, - array: [], -}; -/** - * Mask the hash to get only the bucket corresponding to shift - * @param {number} hash - * @param {number} shift - * @returns {number} - */ -function mask(hash, shift) { - return (hash >>> shift) & MASK; -} +// -- DICT -------------------------------------------------------------------- /** - * Set only the Nth bit where N is the masked hash - * @param {number} hash - * @param {number} shift - * @returns {number} + * An implementation of the CHAMP data structure, an optimised HAMT. + * + * See: M. J. Steindorfer, J.J. Vinju (2015). Optimizing Hash-Array Mapped Tries for + Fast and Lean Immutable JVM Collections. Available: https://michael.steindorfer.name/publications/oopsla15.pdf */ -function bitpos(hash, shift) { - return 1 << mask(hash, shift); +export default class Dict { + constructor(size, root) { + this.size = size; + this.root = root; + } + + /** + * @deprecated Use the `fold` function instead. + */ + forEach(f) { + fold(this, null, (_, k, v) => f(v, k)); + } } -/** - * Count the number of 1 bits in a number - * @param {number} x - * @returns {number} - */ -function bitcount(x) { - x -= (x >> 1) & 0x55555555; - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0f0f0f0f; - x += x >> 8; - x += x >> 16; - return x & 0x7f; +/// The power-of-2 branching factor for the dict. For example, a value of `5` indicates a 32-ary tree. +const bits = 5; +const mask = (1 << bits) - 1; + +/// This symbol is used internally to avoid constructing results. +const noElementMarker = Symbol(); + +/// This symbol is used to store the "generation" on a node. +/// Using a symbol makes the property not enumerable, which means the generation +/// will be ignored during equality checks. +const generationKey = Symbol(); + +// Some commonly used constants throughout the code. +const emptyNode = /* @__PURE__ */ newNode(0); +const emptyDict = /* @__PURE__ */ new Dict(0, emptyNode); +const errorNil = /* @__PURE__ */ Result$Error(undefined); + +function makeNode(generation, datamap, nodemap, data) { + // The order of fields is important, as they define the order `isEqual` will + // compare our fields. Putting the bitmaps first means that equality can + // early-out if the bitmaps are not equal. + return { + // A node is a high-arity (32 in practice) hybrid tree node. + // Hybrid means that it stores data directly as well as pointers to child nodes. + // + // Each node contains 2 bitmaps: + // - The datamap has a bit set if that slot in the node contains direct data + // - The nodemap has a bit set if that slot in the node contains another node. + // + // Both are exclusive to on another, so datamap & nodemap == 0. + // + // Every key/hash value directly correlates to a specific bit by using a trie + // suffix (least significant bits first) encoding. + // For example, if the last 5 bits of the hash are 1101, the bit to check for + // that value is the 13th bit. + datamap, + nodemap, + // The slots itself are stored in a single contiguous array that contains + // both direct k/v-pairs and child nodes. + // + // The direct children come first, followed by the child nodes in _reverse order_: + // + // 7654321 + // datamap: 1000100 + // nodemap: 10011 + // data: [key3, value3, key7, value7, child5, child2, child1] + // -------------------------> <--------------------- + // datamap nodemap + // + // Every `1` bit in the datamap corresponds to a pair of [key, value] entries, + // and every `1` bit in the nodemap corresponds to a child node entry. + // + // Children are stored in reverse order to avoid having to store or calculate an + // "offset" value to skip over the direct children. + data, + // The generation is used to track which nodes need to be copied during transient updates. + // Using a symbol here makes `isEqual` ignore this field. + [generationKey]: generation, + }; } -/** - * Calculate the array index of an item in a bitmap index node - * @param {number} bitmap - * @param {number} bit - * @returns {number} - */ -function index(bitmap, bit) { - return bitcount(bitmap & (bit - 1)); +function newNode(generation) { + return makeNode(generation, 0, 0, []); } /** - * Efficiently copy an array and set one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @param {T} val - * @returns {T[]} + * Copies a node and its data array if it's from another generation, making it safe + * to mutate the node. */ -function cloneAndSet(arr, at, val) { - const len = arr.length; - const out = new Array(len); - for (let i = 0; i < len; ++i) { - out[i] = arr[i]; +function copyNode(node, generation) { + if (node[generationKey] === generation) { + return node; } - out[at] = val; - return out; + + const newData = node.data.slice(0); + return makeNode(generation, node.datamap, node.nodemap, newData); } /** - * Efficiently copy an array and insert one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @param {T} val - * @returns {T[]} + * Copies a node if needed ands sets a new value. */ -function spliceIn(arr, at, val) { - const len = arr.length; - const out = new Array(len + 1); - let i = 0; - let g = 0; - while (i < at) { - out[g++] = arr[i++]; - } - out[g++] = val; - while (i < len) { - out[g++] = arr[i++]; +function copyAndSet(node, generation, idx, val) { + if (node.data[idx] === val) { + return node; } - return out; + + // Using copyNode is faster than a specialised implementation. + node = copyNode(node, generation); + node.data[idx] = val; + return node; } /** - * Efficiently copy an array and remove one value at an index - * @template T - * @param {T[]} arr - * @param {number} at - * @returns {T[]} + * Copies a node if needed, and then inserts a new key-value pair. */ -function spliceOut(arr, at) { - const len = arr.length; - const out = new Array(len - 1); - let i = 0; - let g = 0; - while (i < at) { - out[g++] = arr[i++]; - } - ++i; - while (i < len) { - out[g++] = arr[i++]; - } - return out; +function copyAndInsertPair(node, generation, bit, idx, key, val) { + const data = node.data; + const length = data.length; + + // the fastest way to insert a pair is to always copy. + const newData = new Array(length + 2); + + let readIndex = 0; + let writeIndex = 0; + + while (readIndex < idx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = key; + newData[writeIndex++] = val; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + + return makeNode(generation, node.datamap | bit, node.nodemap, newData); } -/** - * Create a new node containing two entries - * @template K,V - * @param {number} shift - * @param {K} key1 - * @param {V} val1 - * @param {number} key2hash - * @param {K} key2 - * @param {V} val2 - * @returns {Node} - */ -function createNode(shift, key1, val1, key2hash, key2, val2) { - const key1hash = getHash(key1); - if (key1hash === key2hash) { - return { - type: COLLISION_NODE, - hash: key1hash, - array: [ - { type: ENTRY, k: key1, v: val1 }, - { type: ENTRY, k: key2, v: val2 }, - ], - }; +function copyAndRemovePair(node, generation, bit, idx) { + node = copyNode(node, generation); + + const data = node.data; + const length = data.length; + for (let w = idx, r = idx + 2; r < length; ++r, ++w) { + data[w] = data[r]; } - const addedLeaf = { val: false }; - return assoc( - assocIndex(EMPTY, shift, key1hash, key1, val1, addedLeaf), - shift, - key2hash, - key2, - val2, - addedLeaf, - ); + data.pop(); + data.pop(); + + node.datamap ^= bit; + return node; } -/** - * @template T,K,V - * @callback AssocFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @param {V} val - * @param {Flag} addedLeaf - * @returns {Node} - */ -/** - * Associate a node with a new entry, creating a new node - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assoc(root, shift, hash, key, val, addedLeaf) { - switch (root.type) { - case ARRAY_NODE: - return assocArray(root, shift, hash, key, val, addedLeaf); - case INDEX_NODE: - return assocIndex(root, shift, hash, key, val, addedLeaf); - case COLLISION_NODE: - return assocCollision(root, shift, hash, key, val, addedLeaf); - } +export function make() { + return emptyDict; } -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocArray(root, shift, hash, key, val, addedLeaf) { - const idx = mask(hash, shift); - const node = root.array[idx]; - // if the corresponding index is empty set the index to a newly created node - if (node === undefined) { - addedLeaf.val = true; - return { - type: ARRAY_NODE, - size: root.size + 1, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), - }; - } - if (node.type === ENTRY) { - // if keys are equal replace the entry - if (isEqual(key, node.k)) { - if (val === node.v) { - return root; - } - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }), - }; - } - // otherwise upgrade the entry to a node and insert - addedLeaf.val = true; - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet( - root.array, - idx, - createNode(shift + SHIFT, node.k, node.v, hash, key, val), - ), - }; - } - // otherwise call assoc on the child node - const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); - // if the child node hasn't changed just return the old root - if (n === node) { - return root; + +export function from(iterable) { + let transient = toTransient(emptyDict); + for (const [key, value] of iterable) { + transient = destructiveTransientInsert(key, value, transient); } - // otherwise set the index to the new node - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, n), - }; + return fromTransient(transient); } -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocIndex(root, shift, hash, key, val, addedLeaf) { - const bit = bitpos(hash, shift); - const idx = index(root.bitmap, bit); - // if there is already a item at this hash index.. - if ((root.bitmap & bit) !== 0) { - // if there is a node at the index (not an entry), call assoc on the child node - const node = root.array[idx]; - if (node.type !== ENTRY) { - const n = assoc(node, shift + SHIFT, hash, key, val, addedLeaf); - if (n === node) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; - } - // otherwise there is an entry at the index - // if the keys are equal replace the entry with the updated value - const nodeKey = node.k; - if (isEqual(key, nodeKey)) { - if (val === node.v) { - return root; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }), - }; - } - // if the keys are not equal, replace the entry with a new child node - addedLeaf.val = true; - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet( - root.array, - idx, - createNode(shift + SHIFT, nodeKey, node.v, hash, key, val), - ), - }; - } else { - // else there is currently no item at the hash index - const n = root.array.length; - // if the number of nodes is at the maximum, expand this node into an array node - if (n >= MAX_INDEX_NODE) { - // create a 32 length array for the new array node (one for each bit in the hash) - const nodes = new Array(32); - // create and insert a node for the new entry - const jdx = mask(hash, shift); - nodes[jdx] = assocIndex(EMPTY, shift + SHIFT, hash, key, val, addedLeaf); - let j = 0; - let bitmap = root.bitmap; - // place each item in the index node into the correct spot in the array node - // loop through all 32 bits / array positions - for (let i = 0; i < 32; i++) { - if ((bitmap & 1) !== 0) { - const node = root.array[j++]; - nodes[i] = node; - } - // shift the bitmap to process the next bit - bitmap = bitmap >>> 1; - } - return { - type: ARRAY_NODE, - size: n + 1, - array: nodes, - }; + +export function size(dict) { + return dict.size; +} + +export function get(dict, key) { + const result = lookup(dict.root, key, getHash(key)); + return result !== noElementMarker ? Result$Ok(result) : errorNil; +} + +export function has(dict, key) { + return lookup(dict.root, key, getHash(key)) !== noElementMarker; +} + +function lookup(node, key, hash) { + for (let shift = 0; shift < 32; shift += bits) { + const data = node.data; + const bit = hashbit(hash, shift); + + if (node.nodemap & bit) { + // we found our hash inside the nodemap, so we can continue our search there. + node = data[data.length - 1 - index(node.nodemap, bit)]; + } else if (node.datamap & bit) { + // we store this hash directly! + // + // this also means that there are no other values with the same + // hash prefix in this dict. + // + // We still need to check if the key matches, but if it does we know for + // sure that this is the correct value, and if it doesn't that we don't + // contain the value in question. + const dataidx = Math.imul(index(node.datamap, bit), 2); + return isEqual(key, data[dataidx]) ? data[dataidx + 1] : noElementMarker; } else { - // else there is still space in this index node - // simply insert a new entry at the hash index - const newArray = spliceIn(root.array, idx, { - type: ENTRY, - k: key, - v: val, - }); - addedLeaf.val = true; - return { - type: INDEX_NODE, - bitmap: root.bitmap | bit, - array: newArray, - }; + // if the hash bit is not set in neither bitmaps, we immediately know that + // this key cannot be inside this dict. + return noElementMarker; } } -} -/** - * @template T,K,V - * @type {AssocFunction,K,V>} - */ -function assocCollision(root, shift, hash, key, val, addedLeaf) { - // if there is a hash collision - if (hash === root.hash) { - const idx = collisionIndexOf(root, key); - // if this key already exists replace the entry with the new value - if (idx !== -1) { - const entry = root.array[idx]; - if (entry.v === val) { - return root; - } - return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, idx, { type: ENTRY, k: key, v: val }), - }; + + // our shift has exceeded 32 bits. Everything that follows is + // implicitely an overflow node and only contains direct children. + const overflow = node.data; + for (let i = 0; i < overflow.length; i += 2) { + if (isEqual(key, overflow[i])) { + return overflow[i + 1]; } - // otherwise insert the entry at the end of the array - const size = root.array.length; - addedLeaf.val = true; - return { - type: COLLISION_NODE, - hash: hash, - array: cloneAndSet(root.array, size, { type: ENTRY, k: key, v: val }), - }; } - // if there is no hash collision, upgrade to an index node - return assoc( - { - type: INDEX_NODE, - bitmap: bitpos(root.hash, shift), - array: [root], - }, - shift, - hash, - key, - val, - addedLeaf, - ); + + return noElementMarker; } + /** - * Find the index of a key in the collision node's array - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {number} - */ -function collisionIndexOf(root, key) { - const size = root.array.length; - for (let i = 0; i < size; i++) { - if (isEqual(key, root.array[i].k)) { - return i; - } - } - return -1; + * We use "transient" values to allow for safer internal mutations of the data + * structure. This is an optimisation only. No mutable API is exposed to the user. + * + * Transients are to be treated as having a linear (single-use, think rust) type. + * A transient value becomes invalid as soon as it's passed to one of the functions. + * + * Internally, we track a "generation" value on each node. If the generation + * doesn't match the one for the current transient, we have to copy - the node + * could still be referenced by another dict instance! + * After that, no other references than the transient one exists, so it's safe + * to mutate in place. + */ +export function toTransient(dict) { + return { + generation: nextGeneration(dict), + root: dict.root, + size: dict.size, + dict: dict, + }; } + /** - * @template T,K,V - * @callback FindFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @returns {undefined | Entry} - */ -/** - * Return the found entry or undefined if not present in the root - * @template K,V - * @type {FindFunction,K,V>} + * Consume a transient, producing a normal Dict again. */ -function find(root, shift, hash, key) { - switch (root.type) { - case ARRAY_NODE: - return findArray(root, shift, hash, key); - case INDEX_NODE: - return findIndex(root, shift, hash, key); - case COLLISION_NODE: - return findCollision(root, key); +export function fromTransient(transient) { + if (transient.root === transient.dict.root) { + return transient.dict; } + + return new Dict(transient.size, transient.root); } + /** + * Find and allocate the next generation id. + * * @template K,V - * @type {FindFunction,K,V>} + * @param {Dict} dict + * @returns {number} */ -function findArray(root, shift, hash, key) { - const idx = mask(hash, shift); - const node = root.array[idx]; - if (node === undefined) { - return undefined; +function nextGeneration(dict) { + const root = dict.root; + if (root[generationKey] < Number.MAX_SAFE_INTEGER) { + return root[generationKey] + 1; } - if (node.type !== ENTRY) { - return find(node, shift + SHIFT, hash, key); - } - if (isEqual(key, node.k)) { - return node; + + // we have reached MAX_SAFE_INTEGER generations - + // at this point, we have to walk the dictionary once to reset the counter + // on every node. This is safe since it's part of the contract for transient + // that only one of them exists at any given time. + // + const queue = [root]; + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const node = queue.pop(); + + // reset the generation to 0 + node[generationKey] = 0; + + // queue all other referenced nodes + const nodeStart = Math.imul(popcount(node.datamap), 2); + for (let i = nodeStart; i < node.data.length; ++i) { + queue.push(node.data[i]); + } } - return undefined; + + return 1; } -/** - * @template K,V - * @type {FindFunction,K,V>} - */ -function findIndex(root, shift, hash, key) { - const bit = bitpos(hash, shift); - if ((root.bitmap & bit) === 0) { - return undefined; - } - const idx = index(root.bitmap, bit); - const node = root.array[idx]; - if (node.type !== ENTRY) { - return find(node, shift + SHIFT, hash, key); - } - if (isEqual(key, node.k)) { - return node; + +/// Insert is the second-most performance-sensitive operation. +/// We use a global "transient" value here to avoid doing a memory allocation. +const globalTransient = /* @__PURE__ */ toTransient(emptyDict); + +export function insert(dict, key, value) { + globalTransient.generation = nextGeneration(dict); + globalTransient.size = dict.size; + + const hash = getHash(key); + const root = insertIntoNode(globalTransient, dict.root, key, value, hash, 0); + if (root === dict.root) { + return dict; } - return undefined; + + return new Dict(globalTransient.size, root); } + /** - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {undefined | Entry} + * Consume a transient, writing a new key/value pair into the dictionary it + * represents. If the key already exists, it will be overwritten. + * + * Returns a new transient. */ -function findCollision(root, key) { - const idx = collisionIndexOf(root, key); - if (idx < 0) { - return undefined; - } - return root.array[idx]; +export function destructiveTransientInsert(key, value, transient) { + const hash = getHash(key); + transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0); + return transient; } + /** - * @template T,K,V - * @callback WithoutFunction - * @param {T} root - * @param {number} shift - * @param {number} hash - * @param {K} key - * @returns {undefined | Node} + * Consume a transient, writing a new key/value pair if the key doesn't exist or updating + * the existing value with a function if it does. + * + * Returns a new transient. */ -/** - * Remove an entry from the root, returning the updated root. - * Returns undefined if the node should be removed from the parent. - * @template K,V - * @type {WithoutFunction,K,V>} - * */ -function without(root, shift, hash, key) { - switch (root.type) { - case ARRAY_NODE: - return withoutArray(root, shift, hash, key); - case INDEX_NODE: - return withoutIndex(root, shift, hash, key); - case COLLISION_NODE: - return withoutCollision(root, key); +export function destructiveTransientUpdateWith(key, fun, value, transient) { + const hash = getHash(key); + + const existing = lookup(transient.root, key, hash); + if (existing !== noElementMarker) { + value = fun(existing); } + transient.root = insertIntoNode(transient, transient.root, key, value, hash, 0); + return transient; } -/** - * @template K,V - * @type {WithoutFunction,K,V>} - */ -function withoutArray(root, shift, hash, key) { - const idx = mask(hash, shift); - const node = root.array[idx]; - if (node === undefined) { - return root; // already empty - } - let n = undefined; - // if node is an entry and the keys are not equal there is nothing to remove - // if node is not an entry do a recursive call - if (node.type === ENTRY) { - if (!isEqual(node.k, key)) { - return root; // no changes - } - } else { - n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes - } - } - // if the recursive call returned undefined the node should be removed - if (n === undefined) { - // if the number of child nodes is at the minimum, pack into an index node - if (root.size <= MIN_ARRAY_NODE) { - const arr = root.array; - const out = new Array(root.size - 1); - let i = 0; - let j = 0; - let bitmap = 0; - while (i < idx) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; - } - ++i; // skip copying the removed node - while (i < arr.length) { - const nv = arr[i]; - if (nv !== undefined) { - out[j] = nv; - bitmap |= 1 << i; - ++j; - } - ++i; + +function insertIntoNode(transient, node, key, value, hash, shift) { + const data = node.data; + const generation = transient.generation; + + // 1. Overflow Node + // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. + if (shift > 32) { + for (let i = 0; i < data.length; i += 2) { + if (isEqual(key, data[i])) { + return copyAndSet(node, generation, i + 1, value); } - return { - type: INDEX_NODE, - bitmap: bitmap, - array: out, - }; } - return { - type: ARRAY_NODE, - size: root.size - 1, - array: cloneAndSet(root.array, idx, n), - }; - } - return { - type: ARRAY_NODE, - size: root.size, - array: cloneAndSet(root.array, idx, n), - }; -} -/** - * @template K,V - * @type {WithoutFunction,K,V>} - */ -function withoutIndex(root, shift, hash, key) { - const bit = bitpos(hash, shift); - if ((root.bitmap & bit) === 0) { - return root; // already empty - } - const idx = index(root.bitmap, bit); - const node = root.array[idx]; - // if the item is not an entry - if (node.type !== ENTRY) { - const n = without(node, shift + SHIFT, hash, key); - if (n === node) { - return root; // no changes - } - // if not undefined, the child node still has items, so update it - if (n !== undefined) { - return { - type: INDEX_NODE, - bitmap: root.bitmap, - array: cloneAndSet(root.array, idx, n), - }; - } - // otherwise the child node should be removed - // if it was the only child node, remove this node from the parent - if (root.bitmap === bit) { - return undefined; - } - // otherwise just remove the child node - return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), - }; + + transient.size += 1; + return copyAndInsertPair(node, generation, 0, data.length, key, value); } - // otherwise the item is an entry, remove it if the key matches - if (isEqual(key, node.k)) { - if (root.bitmap === bit) { - return undefined; - } - return { - type: INDEX_NODE, - bitmap: root.bitmap ^ bit, - array: spliceOut(root.array, idx), - }; + + const bit = hashbit(hash, shift); + + // 2. Child Node + // We have to check first if there is already a child node we have to traverse to. + if (node.nodemap & bit) { + const nodeidx = data.length - 1 - index(node.nodemap, bit); + + let child = data[nodeidx]; + child = insertIntoNode(transient, child, key, value, hash, shift + bits); + return copyAndSet(node, generation, nodeidx, child); } - return root; -} -/** - * @template K,V - * @param {CollisionNode} root - * @param {K} key - * @returns {undefined | Node} - */ -function withoutCollision(root, key) { - const idx = collisionIndexOf(root, key); - // if the key not found, no changes - if (idx < 0) { - return root; + + // 3. New Data Node + // No child node and no data node exists yet, so we can potentially just insert a new value. + const dataidx = Math.imul(index(node.datamap, bit), 2); + if ((node.datamap & bit) === 0) { + transient.size += 1; + return copyAndInsertPair(node, generation, bit, dataidx, key, value); } - // otherwise the entry was found, remove it - // if it was the only entry in this node, remove the whole node - if (root.array.length === 1) { - return undefined; + + // 4. Existing Data Node + // We have a match that we can update, or remove. + if (isEqual(key, data[dataidx])) { + return copyAndSet(node, generation, dataidx + 1, value); } - // otherwise just remove the entry - return { - type: COLLISION_NODE, - hash: root.hash, - array: spliceOut(root.array, idx), - }; + + // 5. Collision + // There is no child node, but a data node with the same hash, but with a different key. + // To resolve this, we push both nodes down one level. + const childShift = shift + bits; + + let child = emptyNode; + child = insertIntoNode(transient, emptyNode, key, value, hash, childShift); + + const key2 = data[dataidx]; + const value2 = data[dataidx + 1]; + const hash2 = getHash(key2); + child = insertIntoNode(transient, child, key2, value2, hash2, childShift); + + // we inserted 2 elements, but implicitely deleted the one we pushed down from the datamap. + transient.size -= 1; + + // remove the old data pair, and insert the new child node. + const length = data.length; + const nodeidx = length - 1 - index(node.nodemap, bit); + + // writing these loops in javascript instead of a combination of splices + // turns out to be faster. Copying always turned out to be faster. + const newData = new Array(length - 1); + + let readIndex = 0; + let writeIndex = 0; + + // [0..dataidx, skip 2 elements, ..nodeidx, newChild, ..rest] + while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++]; + readIndex += 2; + while (readIndex <= nodeidx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = child; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + + return makeNode(generation, node.datamap ^ bit, node.nodemap | bit, newData); } + /** - * @template K,V - * @param {undefined | Node} root - * @param {(value:V,key:K)=>void} fn - * @returns {void} + * Consume a transient, removing a key if it exists. + * Returns a new transient. */ -function forEach(root, fn) { - if (root === undefined) { - return; - } - const items = root.array; - const size = items.length; - for (let i = 0; i < size; i++) { - const item = items[i]; - if (item === undefined) { - continue; - } - if (item.type === ENTRY) { - fn(item.v, item.k); - continue; - } - forEach(item, fn); - } +export function destructiveTransientDelete(key, transient) { + const hash = getHash(key); + transient.root = deleteFromNode(transient, transient.root, key, hash, 0); + return transient; } -/** - * Extra wrapper to keep track of Dict size and clean up the API - * @template K,V - */ -export default class Dict { - /** - * @template V - * @param {Record} o - * @returns {Dict} - */ - static fromObject(o) { - const keys = Object.keys(o); - /** @type Dict */ - let m = Dict.new(); - for (let i = 0; i < keys.length; i++) { - const k = keys[i]; - m = m.set(k, o[k]); +function deleteFromNode(transient, node, key, hash, shift) { + const data = node.data; + const generation = transient.generation; + + // 1. Overflow Node + // overflow nodes only contain key/value-pairs. we walk the data linearly trying to find a match. + if (shift > 32) { + for (let i = 0; i < data.length; i += 2) { + if (isEqual(key, data[i])) { + transient.size -= 1; + return copyAndRemovePair(node, generation, 0, i); + } } - return m; - } - /** - * @template K,V - * @param {Map} o - * @returns {Dict} - */ - static fromMap(o) { - /** @type Dict */ - let m = Dict.new(); - o.forEach((v, k) => { - m = m.set(k, v); - }); - return m; + return node; } - static new() { - return new Dict(undefined, 0); - } + const bit = hashbit(hash, shift); + const dataidx = Math.imul(index(node.datamap, bit), 2); - /** - * @param {undefined | Node} root - * @param {number} size - */ - constructor(root, size) { - this.root = root; - this.size = size; - } - /** - * @template NotFound - * @param {K} key - * @param {NotFound} notFound - * @returns {NotFound | V} - */ - get(key, notFound) { - if (this.root === undefined) { - return notFound; - } - const found = find(this.root, 0, getHash(key), key); - if (found === undefined) { - return notFound; + // 2. Child Node + // We have to check first if there is already a child node we have to traverse to. + if ((node.nodemap & bit) !== 0) { + const nodeidx = data.length - 1 - index(node.nodemap, bit); + + let child = data[nodeidx]; + child = deleteFromNode(transient, child, key, hash, shift + bits); + + // the node did change, so let's copy to incorporate that change. + if (child.nodemap !== 0 || child.data.length > 2) { + return copyAndSet(node, generation, nodeidx, child); } - return found.v; + + // this node only has a single data (k/v-pair) child. + // to restore the CHAMP invariant, we "pull" that pair up into ourselves. + // this ensures that every tree stays in its single optimal representation, + // and allows dicts to be structurally compared. + const length = data.length; + const newData = new Array(length + 1); + + let readIndex = 0; + let writeIndex = 0; + + while (readIndex < dataidx) newData[writeIndex++] = data[readIndex++]; + newData[writeIndex++] = child.data[0]; + newData[writeIndex++] = child.data[1]; + while (readIndex < nodeidx) newData[writeIndex++] = data[readIndex++]; + readIndex++; + while (readIndex < length) newData[writeIndex++] = data[readIndex++]; + + return makeNode(generation, node.datamap | bit, node.nodemap ^ bit, newData); } - /** - * @param {K} key - * @param {V} val - * @returns {Dict} - */ - set(key, val) { - const addedLeaf = { val: false }; - const root = this.root === undefined ? EMPTY : this.root; - const newRoot = assoc(root, 0, getHash(key), key, val, addedLeaf); - if (newRoot === this.root) { - return this; - } - return new Dict(newRoot, addedLeaf.val ? this.size + 1 : this.size); + + // 3. Data Node + // There is no data entry here, or it is a prefix for a different key + if ((node.datamap & bit) === 0 || !isEqual(key, data[dataidx])) { + return node; } - /** - * @param {K} key - * @returns {Dict} - */ - delete(key) { - if (this.root === undefined) { - return this; - } - const newRoot = without(this.root, 0, getHash(key), key); - if (newRoot === this.root) { - return this; + + // we found a data entry that we can delete. + transient.size -= 1; + return copyAndRemovePair(node, generation, bit, dataidx); +} + +export function map(dict, fun) { + // map can never modify the structure, so we can walk the dictionary directly, + // but still move to a new generation to make sure we get a new copy of every node. + const generation = nextGeneration(dict); + const root = copyNode(dict.root, generation); + const queue = [root]; + + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const node = queue.pop(); + const data = node.data; + // every node contains popcount(datamap) direct entries + const edgesStart = Math.imul(popcount(node.datamap), 2); + for (let i = 0; i < edgesStart; i += 2) { + // we copied the node while queueing it, so direct mutation here is safe. + data[i + 1] = fun(data[i], data[i + 1]); } - if (newRoot === undefined) { - return Dict.new(); + // the remaining entries are other nodes we can queue + for (let i = edgesStart; i < data.length; ++i) { + // copy the node first to make it safe to mutate + data[i] = copyNode(data[i], generation); + queue.push(data[i]); } - return new Dict(newRoot, this.size - 1); } - /** - * @param {K} key - * @returns {boolean} - */ - has(key) { - if (this.root === undefined) { - return false; + + return new Dict(dict.size, root); +} + +export function fold(dict, state, fun) { + const queue = [dict.root]; + + while (queue.length) { + // order doesn't matter, so we can use push/pop for faster array usage. + const node = queue.pop(); + const data = node.data; + // every node contains popcount(datamap) direct entries + const edgesStart = Math.imul(popcount(node.datamap), 2); + for (let i = 0; i < edgesStart; i += 2) { + state = fun(state, data[i], data[i + 1]); } - return find(this.root, 0, getHash(key), key) !== undefined; - } - /** - * @returns {[K,V][]} - */ - entries() { - if (this.root === undefined) { - return []; + // the remaining entries are child nodes we can queue. + for (let i = edgesStart; i < data.length; ++i) { + queue.push(data[i]); } - /** @type [K,V][] */ - const result = []; - this.forEach((v, k) => result.push([k, v])); - return result; } - /** - * - * @param {(val:V,key:K)=>void} fn - */ - forEach(fn) { - forEach(this.root, fn); - } - hashCode() { - let h = 0; - this.forEach((v, k) => { - h = (h + hashMerge(getHash(v), getHash(k))) | 0; - }); - return h; - } - /** - * @param {unknown} o - * @returns {boolean} - */ - equals(o) { - if (!(o instanceof Dict) || this.size !== o.size) { - return false; - } - try { - this.forEach((v, k) => { - if (!isEqual(o.get(k, !v), v)) { - throw unequalDictSymbol; - } - }); - return true; - } catch (e) { - if (e === unequalDictSymbol) { - return false; - } + return state; +} - throw e; - } - } +/** + * How many `1` bits are set in a 32-bit integer. + */ +function popcount(n) { + n -= (n >>> 1) & 0x55555555; + n = (n & 0x33333333) + ((n >>> 2) & 0x33333333); + return Math.imul((n + (n >>> 4)) & 0x0f0f0f0f, 0x01010101) >>> 24; } -// This is thrown internally in Dict.equals() so that it returns false as soon -// as a non-matching key is found -const unequalDictSymbol = /* @__PURE__ */ Symbol(); +/** + * Given a population bitmap and a bit selected from that map, returns + * how many less significant 1 bits there are. + * + * For example, index(10101, 100) returns 1, since there is a single less + * significant `1` bit. This translates to the 0-based "index" of that bit. + */ +function index(bitmap, bit) { + return popcount(bitmap & (bit - 1)); +} + +/** + * Extracts a single slice ofthe hash, and returns a bitmask for the resulting value. + * For example, if the slice returns 5, this function returns 10000 = 1 << 5. + */ +function hashbit(hash, shift) { + return 1 << ((hash >>> shift) & mask); +} diff --git a/src/gleam/dict.gleam b/src/gleam/dict.gleam index 29427278..556246ce 100644 --- a/src/gleam/dict.gleam +++ b/src/gleam/dict.gleam @@ -16,6 +16,25 @@ import gleam/option.{type Option} /// pub type Dict(key, value) +/// "TransientDict" is a mutable view on a dictionary used internally by the +/// javascript target. No mutable API is exposed to the user. +/// +/// Transients are to be treated as having a linear (single-use, think rust) type. +/// A transient value becomes invalid as soon as it's passed to one of the functions. +type TransientDict(key, value) + +/// Convert a normal Dict to a transient dict. +/// A transient dict is a mutable copy of the original. +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../dict.mjs", "toTransient") +fn to_transient(dict: Dict(key, value)) -> TransientDict(key, value) + +/// Convert a transient dict back into a normal dict, freezing its contents. +/// Using the transient after this point is highly unsafe and leads to undefined behavior. +@external(erlang, "gleam_stdlib", "identity") +@external(javascript, "../dict.mjs", "fromTransient") +fn from_transient(transient: TransientDict(key, value)) -> Dict(key, value) + /// Determines the number of key-value pairs in the dict. /// This function runs in constant time and does not need to iterate the dict. /// @@ -32,7 +51,7 @@ pub type Dict(key, value) /// ``` /// @external(erlang, "maps", "size") -@external(javascript, "../gleam_stdlib.mjs", "map_size") +@external(javascript, "../dict.mjs", "size") pub fn size(dict: Dict(k, v)) -> Int /// Determines whether or not the dict is empty. @@ -76,8 +95,9 @@ pub fn is_empty(dict: Dict(k, v)) -> Bool { /// ``` /// @external(erlang, "maps", "to_list") -@external(javascript, "../gleam_stdlib.mjs", "map_to_list") -pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) +pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) { + fold(dict, from: [], with: fn(acc, key, value) { [#(key, value), ..acc] }) +} /// Converts a list of 2-element tuples `#(key, value)` to a dict. /// @@ -86,16 +106,17 @@ pub fn to_list(dict: Dict(k, v)) -> List(#(k, v)) /// @external(erlang, "maps", "from_list") pub fn from_list(list: List(#(k, v))) -> Dict(k, v) { - from_list_loop(list, new()) + from_list_loop(to_transient(new()), list) } fn from_list_loop( - over list: List(#(k, v)), - from initial: Dict(k, v), + transient: TransientDict(k, v), + list: List(#(k, v)), ) -> Dict(k, v) { case list { - [] -> initial - [#(key, value), ..rest] -> from_list_loop(rest, insert(initial, key, value)) + [] -> from_transient(transient) + [#(key, value), ..rest] -> + from_list_loop(transient_insert(key, value, transient), rest) } } @@ -113,19 +134,18 @@ fn from_list_loop( /// // -> False /// ``` /// +@external(javascript, "../dict.mjs", "has") pub fn has_key(dict: Dict(k, v), key: k) -> Bool { do_has_key(key, dict) } @external(erlang, "maps", "is_key") -fn do_has_key(key: k, dict: Dict(k, v)) -> Bool { - get(dict, key) != Error(Nil) -} +fn do_has_key(key: k, dict: Dict(k, v)) -> Bool /// Creates a fresh dict that contains no values. /// @external(erlang, "maps", "new") -@external(javascript, "../gleam_stdlib.mjs", "new_map") +@external(javascript, "../dict.mjs", "make") pub fn new() -> Dict(k, v) /// Fetches a value from a dict for a given key. @@ -146,7 +166,7 @@ pub fn new() -> Dict(k, v) /// ``` /// @external(erlang, "gleam_stdlib", "map_get") -@external(javascript, "../gleam_stdlib.mjs", "map_get") +@external(javascript, "../dict.mjs", "get") pub fn get(from: Dict(k, v), get: k) -> Result(v, Nil) /// Inserts a value into the dict with the given key. @@ -166,14 +186,22 @@ pub fn get(from: Dict(k, v), get: k) -> Result(v, Nil) /// // -> from_list([#("a", 5)]) /// ``` /// +@external(javascript, "../dict.mjs", "insert") pub fn insert(into dict: Dict(k, v), for key: k, insert value: v) -> Dict(k, v) { do_insert(key, value, dict) } @external(erlang, "maps", "put") -@external(javascript, "../gleam_stdlib.mjs", "map_insert") fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) +@external(erlang, "maps", "put") +@external(javascript, "../dict.mjs", "destructiveTransientInsert") +fn transient_insert( + key: k, + value: v, + transient: TransientDict(k, v), +) -> TransientDict(k, v) + /// Updates all values in a given dict by calling a given function on each key /// and value. /// @@ -185,15 +213,13 @@ fn do_insert(key: k, value: v, dict: Dict(k, v)) -> Dict(k, v) /// // -> from_list([#(3, 9), #(2, 8)]) /// ``` /// +@external(javascript, "../dict.mjs", "map") pub fn map_values(in dict: Dict(k, v), with fun: fn(k, v) -> a) -> Dict(k, a) { do_map_values(fun, dict) } @external(erlang, "maps", "map") -fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) { - let f = fn(dict, k, v) { insert(dict, k, f(k, v)) } - fold(dict, from: new(), with: f) -} +fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) /// Gets a list of all keys in a given dict. /// @@ -210,21 +236,7 @@ fn do_map_values(f: fn(k, v) -> a, dict: Dict(k, v)) -> Dict(k, a) { /// @external(erlang, "maps", "keys") pub fn keys(dict: Dict(k, v)) -> List(k) { - do_keys_loop(to_list(dict), []) -} - -fn do_keys_loop(list: List(#(k, v)), acc: List(k)) -> List(k) { - case list { - [] -> reverse_and_concat(acc, []) - [#(key, _value), ..rest] -> do_keys_loop(rest, [key, ..acc]) - } -} - -fn reverse_and_concat(remaining: List(a), accumulator: List(a)) -> List(a) { - case remaining { - [] -> accumulator - [first, ..rest] -> reverse_and_concat(rest, [first, ..accumulator]) - } + fold(dict, [], fn(acc, key, _value) { [key, ..acc] }) } /// Gets a list of all values in a given dict. @@ -242,15 +254,7 @@ fn reverse_and_concat(remaining: List(a), accumulator: List(a)) -> List(a) { /// @external(erlang, "maps", "values") pub fn values(dict: Dict(k, v)) -> List(v) { - let list_of_pairs = to_list(dict) - do_values_loop(list_of_pairs, []) -} - -fn do_values_loop(list: List(#(k, v)), acc: List(v)) -> List(v) { - case list { - [] -> reverse_and_concat(acc, []) - [#(_key, value), ..rest] -> do_values_loop(rest, [value, ..acc]) - } + fold(dict, [], fn(acc, _key, value) { [value, ..acc] }) } /// Creates a new dict from a given dict, minus any entries that a given function @@ -279,14 +283,14 @@ pub fn filter( @external(erlang, "maps", "filter") fn do_filter(f: fn(k, v) -> Bool, dict: Dict(k, v)) -> Dict(k, v) { - let insert = fn(dict, k, v) { - case f(k, v) { - True -> insert(dict, k, v) - False -> dict + to_transient(new()) + |> fold(over: dict, with: fn(transient, key, value) { + case f(key, value) { + True -> transient_insert(key, value, transient) + False -> transient } - } - - fold(dict, from: new(), with: insert) + }) + |> from_transient } /// Creates a new dict from a given dict, only including any entries for which the @@ -312,23 +316,21 @@ pub fn take(from dict: Dict(k, v), keeping desired_keys: List(k)) -> Dict(k, v) @external(erlang, "maps", "with") fn do_take(desired_keys: List(k), dict: Dict(k, v)) -> Dict(k, v) { - do_take_loop(dict, desired_keys, new()) + do_take_loop(dict, desired_keys, to_transient(new())) } fn do_take_loop( dict: Dict(k, v), desired_keys: List(k), - acc: Dict(k, v), + acc: TransientDict(k, v), ) -> Dict(k, v) { - let insert = fn(taken, key) { - case get(dict, key) { - Ok(value) -> insert(taken, key, value) - Error(_) -> taken - } - } case desired_keys { - [] -> acc - [first, ..rest] -> do_take_loop(dict, rest, insert(acc, first)) + [] -> from_transient(acc) + [key, ..rest] -> + case get(dict, key) { + Ok(value) -> do_take_loop(dict, rest, transient_insert(key, value, acc)) + Error(_) -> do_take_loop(dict, rest, acc) + } } } @@ -348,20 +350,7 @@ fn do_take_loop( /// @external(erlang, "maps", "merge") pub fn merge(into dict: Dict(k, v), from new_entries: Dict(k, v)) -> Dict(k, v) { - new_entries - |> to_list - |> fold_inserts(dict) -} - -fn fold_inserts(new_entries: List(#(k, v)), dict: Dict(k, v)) -> Dict(k, v) { - case new_entries { - [] -> dict - [first, ..rest] -> fold_inserts(rest, insert_pair(dict, first)) - } -} - -fn insert_pair(dict: Dict(k, v), pair: #(k, v)) -> Dict(k, v) { - insert(dict, pair.0, pair.1) + combine(dict, new_entries, fn(_, new_entry) { new_entry }) } /// Creates a new dict from a given dict with all the same entries except for the @@ -380,12 +369,12 @@ fn insert_pair(dict: Dict(k, v), pair: #(k, v)) -> Dict(k, v) { /// ``` /// pub fn delete(from dict: Dict(k, v), delete key: k) -> Dict(k, v) { - do_delete(key, dict) + to_transient(dict) |> transient_delete(key, _) |> from_transient } @external(erlang, "maps", "remove") -@external(javascript, "../gleam_stdlib.mjs", "map_remove") -fn do_delete(a: k, b: Dict(k, v)) -> Dict(k, v) +@external(javascript, "../dict.mjs", "destructiveTransientDelete") +fn transient_delete(a: k, b: TransientDict(k, v)) -> TransientDict(k, v) /// Creates a new dict from a given dict with all the same entries except any with /// keys found in a given list. @@ -408,9 +397,21 @@ fn do_delete(a: k, b: Dict(k, v)) -> Dict(k, v) /// ``` /// pub fn drop(from dict: Dict(k, v), drop disallowed_keys: List(k)) -> Dict(k, v) { + do_drop(disallowed_keys, dict) +} + +@external(erlang, "maps", "without") +fn do_drop(disallowed_keys: List(k), dict: Dict(k, v)) -> Dict(k, v) { + drop_loop(to_transient(dict), disallowed_keys) +} + +fn drop_loop( + transient: TransientDict(k, v), + disallowed_keys: List(k), +) -> Dict(k, v) { case disallowed_keys { - [] -> dict - [first, ..rest] -> drop(delete(dict, first), rest) + [] -> from_transient(transient) + [key, ..rest] -> drop_loop(transient_delete(key, transient), rest) } } @@ -473,24 +474,18 @@ pub fn upsert( /// // -> "abc" /// ``` /// +@external(javascript, "../dict.mjs", "fold") pub fn fold( over dict: Dict(k, v), from initial: acc, with fun: fn(acc, k, v) -> acc, ) -> acc { - fold_loop(to_list(dict), initial, fun) + let fun = fn(key, value, acc) { fun(acc, key, value) } + do_fold(fun, initial, dict) } -fn fold_loop( - list: List(#(k, v)), - initial: acc, - fun: fn(acc, k, v) -> acc, -) -> acc { - case list { - [] -> initial - [#(k, v), ..rest] -> fold_loop(rest, fun(initial, k, v), fun) - } -} +@external(erlang, "maps", "fold") +fn do_fold(fun: fn(k, v, acc) -> acc, initial: acc, dict: Dict(k, v)) -> acc /// Calls a function for each key and value in a dict, discarding the return /// value. @@ -540,9 +535,56 @@ pub fn combine( other: Dict(k, v), with fun: fn(v, v) -> v, ) -> Dict(k, v) { - use acc, key, value <- fold(over: dict, from: other) - case get(acc, key) { - Ok(other_value) -> insert(acc, key, fun(value, other_value)) - Error(_) -> insert(acc, key, value) + do_combine(fn(_, l, r) { fun(l, r) }, dict, other) +} + +@external(erlang, "maps", "merge_with") +fn do_combine( + combine: fn(k, v, v) -> v, + left: Dict(k, v), + right: Dict(k, v), +) -> Dict(k, v) { + let #(big, small, combine) = case size(left) >= size(right) { + True -> #(left, right, combine) + False -> #(right, left, fn(k, l, r) { combine(k, r, l) }) + } + + to_transient(big) + |> fold(over: small, with: fn(transient, key, value) { + let update = fn(existing) { combine(key, existing, value) } + transient_update_with(key, update, value, transient) + }) + |> from_transient +} + +@external(erlang, "maps", "update_with") +@external(javascript, "../dict.mjs", "destructiveTransientUpdateWith") +fn transient_update_with( + key: k, + fun: fn(v) -> v, + init: v, + transient: TransientDict(k, v), +) -> TransientDict(k, v) + +@internal +pub fn group(key: fn(v) -> k, list: List(v)) -> Dict(k, List(v)) { + group_loop(to_transient(new()), key, list) +} + +fn group_loop( + transient: TransientDict(k, List(v)), + to_key: fn(v) -> k, + list: List(v), +) -> Dict(k, List(v)) { + case list { + [] -> from_transient(transient) + [value, ..rest] -> { + let key = to_key(value) + let update = fn(existing) { [value, ..existing] } + + transient + |> transient_update_with(key, update, [value], _) + |> group_loop(to_key, rest) + } } } diff --git a/src/gleam/list.gleam b/src/gleam/list.gleam index 68362a43..5a172969 100644 --- a/src/gleam/list.gleam +++ b/src/gleam/list.gleam @@ -296,25 +296,7 @@ pub fn rest(list: List(a)) -> Result(List(a), Nil) { /// ``` /// pub fn group(list: List(v), by key: fn(v) -> k) -> Dict(k, List(v)) { - group_loop(list, key, dict.new()) -} - -fn group_loop( - list: List(v), - to_key: fn(v) -> k, - groups: Dict(k, List(v)), -) -> Dict(k, List(v)) { - case list { - [] -> groups - [first, ..rest] -> { - let key = to_key(first) - let groups = case dict.get(groups, key) { - Error(_) -> dict.insert(groups, key, [first]) - Ok(existing) -> dict.insert(groups, key, [first, ..existing]) - } - group_loop(rest, to_key, groups) - } - } + dict.group(key, list) } /// Returns a new list containing only the elements from the first list for diff --git a/src/gleam_stdlib.mjs b/src/gleam_stdlib.mjs index ebac45f4..490443fb 100644 --- a/src/gleam_stdlib.mjs +++ b/src/gleam_stdlib.mjs @@ -13,12 +13,11 @@ import { CustomType, } from "./gleam.mjs"; import { Some, None } from "./gleam/option.mjs"; -import Dict from "./dict.mjs"; +import { default as Dict, fold as dict_fold, get as dict_get, from as dict_from_iterable } from "./dict.mjs"; import { classify } from "./gleam/dynamic.mjs"; import { DecodeError } from "./gleam/dynamic/decode.mjs"; const Nil = undefined; -const NOT_FOUND = {}; export function identity(x) { return x; @@ -471,34 +470,6 @@ export function utf_codepoint_to_int(utf_codepoint) { return utf_codepoint.value; } -export function new_map() { - return Dict.new(); -} - -export function map_size(map) { - return map.size; -} - -export function map_to_list(map) { - return List.fromArray(map.entries()); -} - -export function map_remove(key, map) { - return map.delete(key); -} - -export function map_get(map, key) { - const value = map.get(key, NOT_FOUND); - if (value === NOT_FOUND) { - return new Error(Nil); - } - return new Ok(value); -} - -export function map_insert(key, value, map) { - return map.set(key, value); -} - function unsafe_percent_decode(string) { return decodeURIComponent(string || ""); } @@ -737,11 +708,13 @@ class Inspector { #dict(map) { let body = "dict.from_list(["; let first = true; - map.forEach((value, key) => { + + body = dict_fold(map, body, (body, key, value) => { if (!first) body = body + ", "; - body = body + "#(" + this.inspect(key) + ", " + this.inspect(value) + ")"; first = false; + return body + "#(" + this.inspect(key) + ", " + this.inspect(value) + ")"; }); + return body + "])"; } @@ -947,7 +920,12 @@ export function list_to_array(list) { export function index(data, key) { // Dictionaries and dictionary-like objects can be indexed - if (data instanceof Dict || data instanceof WeakMap || data instanceof Map) { + if (data instanceof Dict) { + const result = dict_get(data, key); + return new Ok(result.isOk() ? new Some(result[0]) : new None()); + } + + if (data instanceof WeakMap || data instanceof Map) { const token = {}; const entry = data.get(key, token); if (entry === token) return new Ok(new None()); @@ -1007,7 +985,7 @@ export function dict(data) { return new Ok(data); } if (data instanceof Map || data instanceof WeakMap) { - return new Ok(Dict.fromMap(data)); + return new Ok(dict_from_iterable(data)); } if (data == null) { return new Error("Dict"); @@ -1017,7 +995,7 @@ export function dict(data) { } const proto = Object.getPrototypeOf(data); if (proto === Object.prototype || proto === null) { - return new Ok(Dict.fromObject(data)); + return new Ok(dict_from_iterable(Object.entries(data))); } return new Error("Dict"); } diff --git a/test/gleam/dict_test.gleam b/test/gleam/dict_test.gleam index a225b0d0..03b5ad90 100644 --- a/test/gleam/dict_test.gleam +++ b/test/gleam/dict_test.gleam @@ -391,3 +391,202 @@ pub fn combine_with_no_overlapping_keys_test() { assert dict.combine(map1, map2, fn(one, _) { one }) == dict.from_list([#("a", 1), #("b", 2), #("c", 3), #("d", 4)]) } + +// Enums without fields all hash to 0 due to how the hash function works - +// we use this fact here to produce and test collisions. +// +// Object.keys() returns [] for variants without fields, so the hash always +// stays on it's initial value. +type CollidingKey { + CollidingKey1 + CollidingKey2 +} + +pub fn hash_collision_overflow_test() { + let d = + dict.new() |> dict.insert(CollidingKey1, 1) |> dict.insert(CollidingKey2, 2) + + assert dict.size(d) == 2 + assert dict.get(d, CollidingKey1) == Ok(1) + assert dict.get(d, CollidingKey2) == Ok(2) + + let d = dict.delete(d, CollidingKey1) + + assert dict.size(d) == 1 + assert dict.get(d, CollidingKey1) == Error(Nil) + assert dict.get(d, CollidingKey2) == Ok(2) +} + +fn test_random_operations( + initial_seed: Int, + num_ops: Int, + key_space: Int, + initial: dict.Dict(Int, Int), +) -> Nil { + test_random_operations_loop( + initial_seed, + prng(initial_seed), + num_ops, + key_space, + dict.to_list(initial), + initial, + ) +} + +fn test_random_operations_loop( + initial_seed: Int, + seed: Int, + remaining: Int, + key_space: Int, + proplist: List(#(Int, Int)), + dict: dict.Dict(Int, Int), +) -> Nil { + case remaining > 0 { + False -> { + assert_dict_matches_proplist(dict, proplist, initial_seed) + } + True -> { + let seed = prng(seed) + let op_choice = seed % 2 + let seed = prng(seed) + let key = seed % key_space + + case op_choice { + // Insert + 0 -> { + let new_proplist = list.key_set(proplist, key, key * 2) + let new_dict = dict.insert(dict, key, key * 2) + test_random_operations_loop( + initial_seed, + seed, + remaining - 1, + key_space, + new_proplist, + new_dict, + ) + } + // Delete + _ -> { + let new_proplist = case list.key_pop(proplist, key) { + Ok(#(_, remaining)) -> remaining + Error(Nil) -> proplist + } + let new_dict = dict.delete(dict, key) + test_random_operations_loop( + initial_seed, + seed, + remaining - 1, + key_space, + new_proplist, + new_dict, + ) + } + } + } + } +} + +fn run_many_random_tests( + count count: Int, + ops_per_test ops_per_test: Int, + key_space key_space: Int, + initial dict: dict.Dict(Int, Int), +) -> Nil { + case count { + 0 -> Nil + _ -> { + let start_seed = int.random(0x7fffffff) + test_random_operations(start_seed, ops_per_test, key_space, dict) + run_many_random_tests( + count: count - 1, + ops_per_test: ops_per_test, + key_space: key_space, + initial: dict, + ) + } + } +} + +pub fn random_operations_small_test() { + run_many_random_tests( + count: 100, + ops_per_test: 50, + key_space: 32, + initial: dict.new(), + ) +} + +pub fn random_operations_medium_test() { + run_many_random_tests( + count: 100, + ops_per_test: 50, + key_space: 200, + initial: range_dict(50), + ) +} + +pub fn random_operations_large_test() { + run_many_random_tests( + count: 100, + ops_per_test: 1000, + key_space: 2000, + initial: range_dict(1000), + ) +} + +fn range_dict(size) { + list.range(1, size) + |> list.map(fn(x) { #(x, x) }) + |> dict.from_list +} + +fn prng(state: Int) -> Int { + { state * 48_271 } % 0x7FFFFFFF +} + +fn assert_dict_matches_proplist( + d: dict.Dict(k, v), + proplist: List(#(k, v)), + seed: Int, +) -> Nil { + case dict.size(d) == list.length(proplist) { + True -> Nil + False -> + panic as { + "Size mismatch with seed " + <> int.to_string(seed) + <> ": dict.size=" + <> int.to_string(dict.size(d)) + <> " proplist.size=" + <> int.to_string(list.length(proplist)) + } + } + + list.each(proplist, fn(pair) { + let #(key, value) = pair + let result = dict.get(d, key) + + case result == Ok(value) { + True -> Nil + False -> + panic as { + "Get mismatch with seed " + <> int.to_string(seed) + <> ": key=" + <> string.inspect(key) + <> ", value=" + <> string.inspect(value) + <> ", dict.get=" + <> string.inspect(result) + } + } + }) + + case d == dict.from_list(proplist) { + True -> Nil + False -> + panic as { + "Structural equality failed with seed " <> int.to_string(seed) + } + } +} diff --git a/test/gleeunit/should.gleam b/test/gleeunit/should.gleam index 11cd82b7..99cd16c4 100644 --- a/test/gleeunit/should.gleam +++ b/test/gleeunit/should.gleam @@ -8,11 +8,11 @@ pub fn equal(a: t, b: t) -> Nil { True -> Nil _ -> panic as string.concat([ - "\n", - string.inspect(a), - "\nshould equal\n", - string.inspect(b), - ]) + "\n", + string.inspect(a), + "\nshould equal\n", + string.inspect(b), + ]) } } @@ -21,11 +21,11 @@ pub fn not_equal(a: t, b: t) -> Nil { True -> Nil _ -> panic as string.concat([ - "\n", - string.inspect(a), - "\nshould not equal\n", - string.inspect(b), - ]) + "\n", + string.inspect(a), + "\nshould not equal\n", + string.inspect(b), + ]) } }