Skip to content

Commit 5c13848

Browse files
committed
hash: fix numerous issues with weak hash tables
1. We don't normalize garbage collected entries. Doing that created a false free bucket, so GETHASH and SETHASH stopped searching for entry too early. Fixes #761. 2. MAPHASH does not map over garbate collected entries. Previously MAPHASH did not call copy_entry, so it was always accessing key/val even if they were recently garbage collected, effectively mapping over NIL in the place of the collected value 3. HASH-TABLE-CONTENT had a similar issue to MAPHASH. 4. REMHASH did fill the hole, but didn't replace the moved entry with a free bucket, moreover we've used the value to comptue the hash when computing the distance function. Moreover we introduce an optimization, where SETHASH first tries to fill a garbage collected entry in the bucket before it tries to extend the table.
1 parent 63f0ba1 commit 5c13848

1 file changed

Lines changed: 72 additions & 50 deletions

File tree

src/c/hash.d

Lines changed: 72 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,33 @@ _ecl_remhash_generic(cl_object key, cl_object hashtable)
553553

554554
/*
555555
* WEAK HASH TABLES
556+
*
557+
* Entries in a weak hash table may disappear without explicit REMHASH. Our
558+
* implementation handles collisions with open addressing, that is we put the
559+
* colliding element at the first free spot starting from its hash.
560+
*
561+
* Until recently we've implemented REMHASH by inserting "tombstones", that is
562+
* by simply invalidating the entry. Currently after removing the hash we fill
563+
* the hole by shifting elements after it to left and that yields some benefits
564+
* for scenarios where we frequently add and remove elements.
565+
*
566+
* Since weak entries may disappear at any time, we need to either fill holes in
567+
* GETHASH/SETHASH too, or we need to revert back to inserting "tombstones".
568+
* Notice that some functions are common to hash tables with and without weak
569+
* entries - for example MAPHASH. These functions assume that entry indexes do
570+
* not change while iterating, so we can't shift values in copy_entry unless we
571+
* customize these functions too.
572+
*
573+
* For reasons above weak entries are not normalized to OBJNULL but rather we
574+
* leave the weak entry in the same place as a tombstone. SETHASH reuses these
575+
* entries while REMHASH behaves the same for all hash tables.
576+
*
577+
* [key=OBJNULL, value=OBJNULL] - free bucket
578+
* [key=ECL_NIL, value=OBJNULL] - tombstone
579+
* [key=OBJNULL, value=ECL_NIL] - tombstone copy
580+
*
556581
*/
582+
557583
#ifndef ECL_WEAK_HASH
558584
#define copy_entry(e,h) *(e)
559585
#endif
@@ -647,6 +673,11 @@ copy_entry(struct ecl_hashtable_entry *e, cl_object h)
647673
{
648674
if (e->key == OBJNULL) {
649675
return *e;
676+
} else if (e->value == OBJNULL) {
677+
struct ecl_hashtable_entry output = *e;
678+
output.key = OBJNULL;
679+
output.value = ECL_NIL;
680+
return output;
650681
} else {
651682
struct ecl_hashtable_entry output = *e;
652683
switch (h->hash.weak) {
@@ -678,80 +709,84 @@ copy_entry(struct ecl_hashtable_entry *e, cl_object h)
678709
default:
679710
return output;
680711
}
681-
h->hash.entries--;
682712
output.key = OBJNULL;
683713
output.value = ECL_NIL;
684-
return *e = output;
685-
}
686-
}
687-
688-
/* Returns the original entry p, and makes a normalized copy to aux. */
689-
static struct ecl_hashtable_entry *
690-
_ecl_weak_hash_loop(cl_hashkey h, cl_object key, cl_object hashtable,
691-
struct ecl_hashtable_entry *aux)
692-
{
693-
cl_index i, hsize = hashtable->hash.size;
694-
for (i = h % hsize; ; i = (i + 1) % hsize) {
695-
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
696-
struct ecl_hashtable_entry e = *aux = copy_entry(p, hashtable);
697-
if (e.key == OBJNULL || _ecl_hash_test(hashtable, key, e.key)) {
698-
return p;
699-
}
714+
e->key = ECL_NIL;
715+
e->value = OBJNULL;
716+
return output;
700717
}
701718
}
702719

703720
static cl_object
704721
_ecl_gethash_weak(cl_object key, cl_object hashtable, cl_object def)
705722
{
723+
cl_index i, hsize = hashtable->hash.size;
706724
cl_hashkey h = _ecl_hash_key(hashtable, key);
707-
struct ecl_hashtable_entry aux[1];
708-
_ecl_weak_hash_loop(h, key, hashtable, aux);
709-
if (aux->key == OBJNULL) {
710-
return def;
725+
struct ecl_hashtable_entry *p, e;
726+
for (i = h % hsize; ; i = (i + 1) % hsize) {
727+
p = hashtable->hash.data + i;
728+
e = copy_entry(p, hashtable);
729+
if (p->key == OBJNULL) return def;
730+
if (p->value == OBJNULL) continue; /* skip the tombstone */
731+
if (_ecl_hash_test(hashtable, key, e.key)) return e.value;
711732
}
712-
return aux->value;
713733
}
714734

715735
static cl_object
716736
_ecl_sethash_weak(cl_object key, cl_object hashtable, cl_object value)
717737
{
738+
cl_index i, hsize = hashtable->hash.size;
718739
cl_hashkey h = _ecl_hash_key(hashtable, key);
719-
struct ecl_hashtable_entry aux[1];
720-
struct ecl_hashtable_entry *e;
740+
struct ecl_hashtable_entry e, *p, *f = NULL;
721741
AGAIN:
722-
e = _ecl_weak_hash_loop(h, key, hashtable, aux);
723-
if (aux->key == OBJNULL) {
742+
for (i = h % hsize; ; i = (i + 1) % hsize) {
743+
p = hashtable->hash.data + i;
744+
e = copy_entry(p, hashtable);
745+
if (p->key == OBJNULL) { break; }
746+
if (p->value == OBJNULL) { f = p; continue; }
747+
if (_ecl_hash_test(hashtable, key, e.key)) {
748+
f = p;
749+
break;
750+
}
751+
}
752+
if (p->key == OBJNULL && f == NULL) {
724753
cl_index i = hashtable->hash.entries + 1;
725754
if (i >= hashtable->hash.limit) {
726755
hashtable = ecl_extend_hashtable(hashtable);
727756
goto AGAIN;
728757
}
729758
hashtable->hash.entries = i;
730-
e->key = _ecl_store_key(hashtable, key);
759+
f = p;
731760
}
732-
e->value = _ecl_store_val(hashtable, value);
761+
f->key = _ecl_store_key(hashtable, key);
762+
f->value = _ecl_store_val(hashtable, value);
733763
return hashtable;
734764
}
735765

736-
737766
static bool
738767
_ecl_remhash_weak(cl_object key, cl_object hashtable)
739768
{
740769
cl_index i, hsize = hashtable->hash.size;
741770
cl_hashkey h = _ecl_hash_key(hashtable, key);
771+
struct ecl_hashtable_entry *p, e;
742772
for (i = h % hsize; ; i = (i + 1) % hsize) {
743-
struct ecl_hashtable_entry *p = hashtable->hash.data + i;
744-
struct ecl_hashtable_entry e = copy_entry(p, hashtable);
745-
if (e.key == OBJNULL) return 0;
773+
p = hashtable->hash.data + i;
774+
e = copy_entry(p, hashtable);
775+
if (p->key == OBJNULL) return 0;
776+
/* We could try to shift consecutive tombstones here(!) */
777+
if (e.key == OBJNULL) continue;
746778
if (_ecl_hash_test(hashtable, key, e.key)) {
747779
cl_index j = (i+1) % hsize, k;
780+
struct ecl_hashtable_entry *q, f;
748781
for (k = 1; k <= hsize; j = (j+1) % hsize, k++) {
749-
struct ecl_hashtable_entry *q = hashtable->hash.data + j;
750-
struct ecl_hashtable_entry f = copy_entry(q, hashtable);
782+
q = hashtable->hash.data + j;
783+
f = copy_entry(q, hashtable);
751784
if (f.key == OBJNULL) {
785+
p->key = OBJNULL;
786+
p->value = OBJNULL;
752787
break;
753788
}
754-
cl_hashkey hf = _ecl_hash_key(hashtable, f.value);
789+
cl_hashkey hf = _ecl_hash_key(hashtable, f.key);
755790
cl_index m = hf % hsize;
756791
cl_index d = (j >= m) ? (j - m) : (j + hsize - m);
757792
if (k <= d) {
@@ -1418,23 +1453,10 @@ cl_maphash(cl_object fun, cl_object ht)
14181453
j = i;
14191454
do {
14201455
j = (j == 0) ? hsize-1 : j - 1;
1421-
/* FIXME copy the entry! We don't want map over collected entries. */
1422-
struct ecl_hashtable_entry e = ht->hash.data[j];
1456+
struct ecl_hashtable_entry e = copy_entry(ht->hash.data + j, ht);
14231457
if (e.key != OBJNULL) {
14241458
cl_object key = e.key;
14251459
cl_object val = e.value;
1426-
switch (ht->hash.weak) {
1427-
case ecl_htt_weak_key:
1428-
key = si_weak_pointer_value(key);
1429-
break;
1430-
case ecl_htt_weak_value:
1431-
val = si_weak_pointer_value(val);
1432-
break;
1433-
case ecl_htt_weak_key_and_value:
1434-
case ecl_htt_weak_key_or_value:
1435-
key = si_weak_pointer_value(key);
1436-
val = si_weak_pointer_value(val);
1437-
}
14381460
funcall(3, fun, key, val);
14391461
}
14401462
} while (j != i);
@@ -1448,7 +1470,7 @@ si_hash_table_content(cl_object ht)
14481470
cl_object output = ECL_NIL;
14491471
assert_type_hash_table(@[ext::hash-table-content], 2, ht);
14501472
for (i = 0; i < ht->hash.size; i++) {
1451-
struct ecl_hashtable_entry e = ht->hash.data[i];
1473+
struct ecl_hashtable_entry e = copy_entry(ht->hash.data + i, ht);
14521474
if (e.key != OBJNULL)
14531475
output = ecl_cons(ecl_cons(e.key, e.value), output);
14541476
}

0 commit comments

Comments
 (0)