Skip to content

Commit 3b04578

Browse files
committed
btree: Implement faster binary search algorithm
This implements a binary search algorithm for B-Trees that reduces branching to the absolute minimum necessary for a binary search algorithm. It also enables the compiler to inlines the comparator to ensure that the only slowdown when doing binary search is from waiting for memory accesses. Consumers must opt into using the faster algorithm. Signed-off-by: Richard Yao <[email protected]>
1 parent 7381ddf commit 3b04578

File tree

5 files changed

+74
-9
lines changed

5 files changed

+74
-9
lines changed

include/sys/btree.h

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,12 @@ typedef struct zfs_btree_index {
105105
boolean_t bti_before;
106106
} zfs_btree_index_t;
107107

108-
typedef struct btree {
108+
typedef struct btree zfs_btree_t;
109+
110+
struct btree {
109111
int (*bt_compar) (const void *, const void *);
112+
void * (*bt_find_in_buf) (zfs_btree_t *, uint8_t *, uint32_t,
113+
const void *, zfs_btree_index_t *);
110114
size_t bt_elem_size;
111115
size_t bt_leaf_size;
112116
uint32_t bt_leaf_cap;
@@ -115,7 +119,33 @@ typedef struct btree {
115119
uint64_t bt_num_nodes;
116120
zfs_btree_hdr_t *bt_root;
117121
zfs_btree_leaf_t *bt_bulk; // non-null if bulk loading
118-
} zfs_btree_t;
122+
};
123+
124+
#define ZFS_BTREE_FIND_IN_BUF_FUNC(NAME, T, COMP) \
125+
static void * \
126+
NAME(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems, \
127+
const void *value, zfs_btree_index_t *where) \
128+
{ \
129+
T *i = (T *)buf; \
130+
(void) tree; \
131+
while (nelems > 1) { \
132+
uint32_t half = nelems / 2; \
133+
nelems -= half; \
134+
i += (COMP(&i[half - 1], value) == -1) * half; \
135+
} \
136+
\
137+
int comp = COMP(i, value); \
138+
where->bti_offset = (size_t)(i - (T *)buf) / sizeof (T) + \
139+
(comp < 0); \
140+
\
141+
if (comp == 0) { \
142+
where->bti_before = B_FALSE; \
143+
return (i); \
144+
} \
145+
\
146+
where->bti_before = B_TRUE; \
147+
return (NULL); \
148+
}
119149

120150
/*
121151
* Allocate and deallocate caches for btree nodes.

module/zfs/btree.c

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ zfs_btree_create(zfs_btree_t *tree, int (*compar) (const void *, const void *),
198198
zfs_btree_create_custom(tree, compar, size, BTREE_LEAF_SIZE);
199199
}
200200

201+
static void *
202+
zfs_btree_find_in_buf(zfs_btree_t *tree, uint8_t *buf, uint32_t nelems,
203+
const void *value, zfs_btree_index_t *where);
204+
201205
void
202206
zfs_btree_create_custom(zfs_btree_t *tree,
203207
int (*compar) (const void *, const void *),
@@ -208,6 +212,7 @@ zfs_btree_create_custom(zfs_btree_t *tree,
208212
ASSERT3U(size, <=, esize / 2);
209213
memset(tree, 0, sizeof (*tree));
210214
tree->bt_compar = compar;
215+
tree->bt_find_in_buf = zfs_btree_find_in_buf;
211216
tree->bt_elem_size = size;
212217
tree->bt_leaf_size = lsize;
213218
tree->bt_leaf_cap = P2ALIGN(esize / size, 2);
@@ -303,7 +308,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
303308
* element in the last leaf, it's in the last leaf or
304309
* it's not in the tree.
305310
*/
306-
void *d = zfs_btree_find_in_buf(tree,
311+
void *d = tree->bt_find_in_buf(tree,
307312
last_leaf->btl_elems +
308313
last_leaf->btl_hdr.bth_first * size,
309314
last_leaf->btl_hdr.bth_count, value, &idx);
@@ -327,7 +332,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
327332
for (node = (zfs_btree_core_t *)tree->bt_root; depth < tree->bt_height;
328333
node = (zfs_btree_core_t *)node->btc_children[child], depth++) {
329334
ASSERT3P(node, !=, NULL);
330-
void *d = zfs_btree_find_in_buf(tree, node->btc_elems,
335+
void *d = tree->bt_find_in_buf(tree, node->btc_elems,
331336
node->btc_hdr.bth_count, value, &idx);
332337
EQUIV(d != NULL, !idx.bti_before);
333338
if (d != NULL) {
@@ -347,7 +352,7 @@ zfs_btree_find(zfs_btree_t *tree, const void *value, zfs_btree_index_t *where)
347352
*/
348353
zfs_btree_leaf_t *leaf = (depth == 0 ?
349354
(zfs_btree_leaf_t *)tree->bt_root : (zfs_btree_leaf_t *)node);
350-
void *d = zfs_btree_find_in_buf(tree, leaf->btl_elems +
355+
void *d = tree->bt_find_in_buf(tree, leaf->btl_elems +
351356
leaf->btl_hdr.bth_first * size,
352357
leaf->btl_hdr.bth_count, value, &idx);
353358

@@ -671,7 +676,7 @@ zfs_btree_insert_into_parent(zfs_btree_t *tree, zfs_btree_hdr_t *old_node,
671676
zfs_btree_hdr_t *par_hdr = &parent->btc_hdr;
672677
zfs_btree_index_t idx;
673678
ASSERT(zfs_btree_is_core(par_hdr));
674-
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
679+
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
675680
par_hdr->bth_count, buf, &idx), ==, NULL);
676681
ASSERT(idx.bti_before);
677682
uint32_t offset = idx.bti_offset;
@@ -897,7 +902,7 @@ zfs_btree_find_parent_idx(zfs_btree_t *tree, zfs_btree_hdr_t *hdr)
897902
}
898903
zfs_btree_index_t idx;
899904
zfs_btree_core_t *parent = hdr->bth_parent;
900-
VERIFY3P(zfs_btree_find_in_buf(tree, parent->btc_elems,
905+
VERIFY3P(tree->bt_find_in_buf(tree, parent->btc_elems,
901906
parent->btc_hdr.bth_count, buf, &idx), ==, NULL);
902907
ASSERT(idx.bti_before);
903908
ASSERT3U(idx.bti_offset, <=, parent->btc_hdr.bth_count);

module/zfs/dsl_scan.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4203,13 +4203,17 @@ ext_size_compare(const void *x, const void *y)
42034203
return (TREE_CMP(*a, *b));
42044204
}
42054205

4206+
ZFS_BTREE_FIND_IN_BUF_FUNC(ext_size_find_in_buf, uint64_t,
4207+
ext_size_compare)
4208+
42064209
static void
42074210
ext_size_create(range_tree_t *rt, void *arg)
42084211
{
42094212
(void) rt;
42104213
zfs_btree_t *size_tree = arg;
42114214

42124215
zfs_btree_create(size_tree, ext_size_compare, sizeof (uint64_t));
4216+
size_tree->bt_find_in_buf = ext_size_find_in_buf;
42134217
}
42144218

42154219
static void

module/zfs/metaslab.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,13 @@ metaslab_size_tree_full_load(range_tree_t *rt)
14121412
range_tree_walk(rt, metaslab_size_sorted_add, &arg);
14131413
}
14141414

1415+
1416+
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize32_in_buf,
1417+
range_seg32_t, metaslab_rangesize32_compare)
1418+
1419+
ZFS_BTREE_FIND_IN_BUF_FUNC(metaslab_rt_find_rangesize64_in_buf,
1420+
range_seg64_t, metaslab_rangesize64_compare)
1421+
14151422
/*
14161423
* Create any block allocator specific components. The current allocators
14171424
* rely on using both a size-ordered range_tree_t and an array of uint64_t's.
@@ -1428,15 +1435,20 @@ metaslab_rt_create(range_tree_t *rt, void *arg)
14281435
case RANGE_SEG32:
14291436
size = sizeof (range_seg32_t);
14301437
compare = metaslab_rangesize32_compare;
1438+
zfs_btree_create(size_tree, compare, size);
1439+
size_tree->bt_find_in_buf =
1440+
metaslab_rt_find_rangesize32_in_buf;
14311441
break;
14321442
case RANGE_SEG64:
14331443
size = sizeof (range_seg64_t);
14341444
compare = metaslab_rangesize64_compare;
1445+
zfs_btree_create(size_tree, compare, size);
1446+
size_tree->bt_find_in_buf =
1447+
metaslab_rt_find_rangesize64_in_buf;
14351448
break;
14361449
default:
14371450
panic("Invalid range seg type %d", rt->rt_type);
14381451
}
1439-
zfs_btree_create(size_tree, compare, size);
14401452
mrap->mra_floor_shift = metaslab_by_size_min_shift;
14411453
}
14421454

module/zfs/range_tree.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,15 @@ range_tree_seg_gap_compare(const void *x1, const void *x2)
187187
return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
188188
}
189189

190+
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg32_find_in_buf, range_seg64_t,
191+
range_tree_seg32_compare)
192+
193+
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg64_find_in_buf, range_seg64_t,
194+
range_tree_seg64_compare)
195+
196+
ZFS_BTREE_FIND_IN_BUF_FUNC(range_tree_seg_gap_find_in_buf, range_seg64_t,
197+
range_tree_seg_gap_compare)
198+
190199
range_tree_t *
191200
range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
192201
void *arg, uint64_t start, uint64_t shift, uint64_t gap)
@@ -201,19 +210,24 @@ range_tree_create_gap(const range_tree_ops_t *ops, range_seg_type_t type,
201210
case RANGE_SEG32:
202211
size = sizeof (range_seg32_t);
203212
compare = range_tree_seg32_compare;
213+
zfs_btree_create(&rt->rt_root, compare, size);
214+
rt->rt_root.bt_find_in_buf = range_tree_seg32_find_in_buf;
204215
break;
205216
case RANGE_SEG64:
206217
size = sizeof (range_seg64_t);
207218
compare = range_tree_seg64_compare;
219+
zfs_btree_create(&rt->rt_root, compare, size);
220+
rt->rt_root.bt_find_in_buf = range_tree_seg64_find_in_buf;
208221
break;
209222
case RANGE_SEG_GAP:
210223
size = sizeof (range_seg_gap_t);
211224
compare = range_tree_seg_gap_compare;
225+
zfs_btree_create(&rt->rt_root, compare, size);
226+
rt->rt_root.bt_find_in_buf = range_tree_seg_gap_find_in_buf;
212227
break;
213228
default:
214229
panic("Invalid range seg type %d", type);
215230
}
216-
zfs_btree_create(&rt->rt_root, compare, size);
217231

218232
rt->rt_ops = ops;
219233
rt->rt_gap = gap;

0 commit comments

Comments
 (0)