Skip to content

MDB_PREFIX_COMPRESSION delete rebalance fails with MDB_PROBLEM when moving F_BIGDATA node #3

Description

@thesophiaxu

Reproduction:

// repro_prefix_overflow_delete.c
#include "dlmdb.h"

#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>

static void check(int rc, const char *stage) {
  if (rc != MDB_SUCCESS) {
    fprintf(stderr, "%s: %s (%d)\n", stage, mdb_strerror(rc), rc);
    exit(1);
  }
}

static void u64be(unsigned char out[8], uint64_t x) {
  for (int i = 7; i >= 0; --i) {
    out[i] = (unsigned char)(x & 0xff);
    x >>= 8;
  }
}

static int put_one(MDB_env *env, MDB_dbi dbi, uint64_t k, size_t n) {
  MDB_txn *txn = NULL;
  unsigned char keybuf[8];
  unsigned char *valbuf = malloc(n);
  memset(valbuf, 'A', n);
  u64be(keybuf, k);

  MDB_val key = {sizeof keybuf, keybuf};
  MDB_val val = {n, valbuf};

  int rc = mdb_txn_begin(env, NULL, 0, &txn);
  if (rc == MDB_SUCCESS) rc = mdb_put(txn, dbi, &key, &val, MDB_APPEND);
  if (rc == MDB_SUCCESS) rc = mdb_txn_commit(txn);
  else if (txn) mdb_txn_abort(txn);

  free(valbuf);
  return rc;
}

static int del_one(MDB_env *env, MDB_dbi dbi, uint64_t k) {
  MDB_txn *txn = NULL;
  unsigned char keybuf[8];
  u64be(keybuf, k);

  MDB_val key = {sizeof keybuf, keybuf};

  int rc = mdb_txn_begin(env, NULL, 0, &txn);
  if (rc == MDB_SUCCESS) rc = mdb_del(txn, dbi, &key, NULL);
  if (rc == MDB_SUCCESS) rc = mdb_txn_commit(txn);
  else if (txn) mdb_txn_abort(txn);

  return rc;
}

int main(void) {
  const char *dir = "/tmp/dlmdb-prefix-overflow-delete-repro";
  system("rm -rf /tmp/dlmdb-prefix-overflow-delete-repro");
  check(mkdir(dir, 0700) == 0 ? MDB_SUCCESS : errno, "mkdir");

  MDB_env *env = NULL;
  MDB_txn *txn = NULL;
  MDB_dbi dbi;

  check(mdb_env_create(&env), "env create");
  check(mdb_env_set_mapsize(env, 1024UL * 1024UL * 1024UL), "mapsize");
  check(mdb_env_set_maxdbs(env, 16), "maxdbs");
  check(mdb_env_open(env, dir, 0, 0600), "env open");

  check(mdb_txn_begin(env, NULL, 0, &txn), "open txn");
  check(mdb_dbi_open(txn, "repro",
        MDB_CREATE | MDB_PREFIX_COMPRESSION, &dbi), "dbi open");
  check(mdb_txn_commit(txn), "open commit");

  check(put_one(env, dbi, 1013, 8110), "put 1013");
  check(put_one(env, dbi, 1014, 9000), "put 1014");
  check(put_one(env, dbi, 1017, 8100), "put 1017");
  check(put_one(env, dbi, 1019, 100),  "put 1019");
  check(put_one(env, dbi, 1020, 1),    "put 1020");

  check(del_one(env, dbi, 1017), "del 1017");

  int rc = del_one(env, dbi, 1020);
  printf("del 1020: %s (%d)\n", mdb_strerror(rc), rc);

  mdb_dbi_close(env, dbi);
  mdb_env_close(env);
  system("rm -rf /tmp/dlmdb-prefix-overflow-delete-repro");

  return rc == MDB_SUCCESS ? 0 : 1;
}
lldb -b -o 'breakpoint set -n mdb_txn_mark_error' -o run -o bt -o quit /tmp/dlmdb-repro
(lldb) target create "/tmp/dlmdb-repro"
Current executable set to '/tmp/dlmdb-repro' (arm64).
(lldb) breakpoint set -n mdb_txn_mark_error
Breakpoint 1: where = dlmdb-repro`mdb_txn_mark_error + 12 at mdb.c:1794:7, address = 0x0000000100011d9c
(lldb) run
Process 18810 launched: '/tmp/dlmdb-repro' (arm64)
Process 18810 stopped
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
    frame #0: 0x0000000100011d9c dlmdb-repro`mdb_txn_mark_error(txn=0x00000001005a6120, err=-30779) at mdb.c:1794:7
   1791 static void
   1792 mdb_txn_mark_error(MDB_txn *txn, int err)
   1793 {
-> 1794         if (!txn)
   1795                 return;
   1796         txn->mt_last_err = err ? err : MDB_BAD_TXN;
   1797         txn->mt_flags |= MDB_TXN_ERROR;
Target 0: (dlmdb-repro) stopped.
(lldb) bt
* thread #1, queue = 'com.apple.main-thread', stop reason = breakpoint 1.1
  * frame #0: 0x0000000100011d9c dlmdb-repro`mdb_txn_mark_error(txn=0x00000001005a6120, err=-30779) at mdb.c:1794:7
    frame #1: 0x00000001000284e4 dlmdb-repro`mdb_cursor_del0(mc=0x000000016fdfe5a8) at mdb.c:15057:3
    frame #2: 0x000000010000b2d4 dlmdb-repro`_mdb_cursor_del(mc=0x000000016fdfe5a8, flags=32) at mdb.c:12890:9
    frame #3: 0x000000010000caa0 dlmdb-repro`mdb_del0(txn=0x00000001005a6120, dbi=2, key=0x000000016fdfebc0, data=0x0000000000000000, flags=32) at mdb.c:15121:8
    frame #4: 0x000000010000c974 dlmdb-repro`mdb_del(txn=0x00000001005a6120, dbi=2, key=0x000000016fdfebc0, data=0x0000000000000000) at mdb.c:15082:9
    frame #5: 0x00000001000009d8 dlmdb-repro`del_one(env=0x00000001005a56c0, dbi=2, k=1020) at repro_prefix_overflow_delete.c:51:31
    frame #6: 0x0000000100000718 dlmdb-repro`main at repro_prefix_overflow_delete.c:84:12
    frame #7: 0x000000018c03fe00 dyld`start + 6992
(lldb) quit

Below is a succinct (unverified) Codex-written diagnosis:

Codex-written diagnosis Exact failing case:
  1. A DBI is opened with MDB_PREFIX_COMPRESSION.
  2. Some large values are inserted, so at least one leaf node is stored as F_BIGDATA on an overflow page.
  3. A delete causes B-tree leaf rebalancing.
  4. During rebalance, DLMDB moves an existing overflow-valued node from a neighbor leaf into index 0 of another leaf.
  5. Because it lands at index 0, it becomes the prefix-compressed leaf’s “trunk” key, so DLMDB calls the prefix leaf rebuild path.
  6. That rebuild path treats F_BIGDATA as valid only when a new overflow page was just allocated.
    The bug is basically this logic in mdb_leaf_rebuild_after_trunk_insert:
if (F_ISSET(new_flags, F_BIGDATA)) {
    if (!ofp)
        return MDB_PROBLEM;
    insert_entry->data_payload = sizeof(pgno_t);
    insert_entry->data_ptr = (unsigned char *)&ofp->mp_pgno;
}

That assumption is wrong during mdb_node_move. In this path, the overflow page already exists. No new ofp is allocated, so ofp == NULL, but new_data->mv_data already points at the existing overflow page number stored in the source node.

The failing repro’s 1014 -> 9000 bytes value is the moved overflow node. Deleting 1017, then 1020, causes rebalancing that tries to move 1014 into the destination leaf’s first slot, triggering the bad ofp == NULL check.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Fields

    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions