1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-05-13 17:20:21 +02:00

Add address map and set

This commit is contained in:
Andy Wingo 2022-04-12 21:41:26 +02:00
parent b0b4c4d893
commit d425620d37
5 changed files with 655 additions and 0 deletions

45
address-hash.h Normal file
View file

@ -0,0 +1,45 @@
#ifndef ADDRESS_HASH_H
#define ADDRESS_HASH_H
#include <stdint.h>
static uintptr_t hash_address(uintptr_t x) {
if (sizeof (x) < 8) {
// Chris Wellon's lowbias32, from https://nullprogram.com/blog/2018/07/31/.
x ^= x >> 16;
x *= 0x7feb352dU;
x ^= x >> 15;
x *= 0x846ca68bU;
x ^= x >> 16;
return x;
} else {
// Sebastiano Vigna's splitmix64 integer mixer, from
// https://prng.di.unimi.it/splitmix64.c.
x ^= x >> 30;
x *= 0xbf58476d1ce4e5b9U;
x ^= x >> 27;
x *= 0x94d049bb133111ebU;
x ^= x >> 31;
return x;
}
}
// Inverse of hash_address from https://nullprogram.com/blog/2018/07/31/.
static uintptr_t unhash_address(uintptr_t x) {
if (sizeof (x) < 8) {
x ^= x >> 16;
x *= 0x43021123U;
x ^= x >> 15 ^ x >> 30;
x *= 0x1d69e2a5U;
x ^= x >> 16;
return x;
} else {
x ^= x >> 31 ^ x >> 62;
x *= 0x319642b2d24d8ec3U;
x ^= x >> 27 ^ x >> 54;
x *= 0x96de1b173f119089U;
x ^= x >> 30 ^ x >> 60;
return x;
}
}
#endif // ADDRESS_HASH_H

210
address-map.h Normal file
View file

@ -0,0 +1,210 @@
#ifndef ADDRESS_MAP_H
#define ADDRESS_MAP_H
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include "address-hash.h"
struct hash_map_entry {
uintptr_t k;
uintptr_t v;
};
struct hash_map {
struct hash_map_entry *data;
size_t size; // total number of slots
size_t n_items; // number of items in set
uint8_t *bits; // bitvector indicating set slots
};
static void hash_map_clear(struct hash_map *map) {
memset(map->bits, 0, map->size / 8);
map->n_items = 0;
}
// Size must be a power of 2.
static void hash_map_init(struct hash_map *map, size_t size) {
map->size = size;
map->data = malloc(sizeof(struct hash_map_entry) * size);
map->bits = malloc(size / 8);
hash_map_clear(map);
}
static void hash_map_destroy(struct hash_map *map) {
free(map->data);
free(map->bits);
}
static size_t hash_map_slot_index(struct hash_map *map, size_t idx) {
return idx & (map->size - 1);
}
static struct hash_map_entry* hash_map_slot_entry(struct hash_map *map,
size_t idx) {
return &map->data[hash_map_slot_index(map, idx)];
}
static int hash_map_slot_is_empty(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
return (map->bits[idx / 8] & (1 << (idx % 8))) == 0;
}
static void hash_map_slot_acquire(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
map->bits[idx / 8] |= (1 << (idx % 8));
map->n_items++;
}
static void hash_map_slot_release(struct hash_map *map, size_t idx) {
idx = hash_map_slot_index(map, idx);
map->bits[idx / 8] &= ~(1 << (idx % 8));
map->n_items--;
}
static size_t hash_map_slot_distance(struct hash_map *map, size_t idx) {
return hash_map_slot_index(map, idx - hash_map_slot_entry(map, idx)->k);
}
static int hash_map_should_shrink(struct hash_map *map) {
return map->size > 8 && map->n_items <= (map->size >> 3);
}
static int hash_map_should_grow(struct hash_map *map) {
return map->n_items >= map->size - (map->size >> 3);
}
static void hash_map_do_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
size_t displacement = 0;
while (!hash_map_slot_is_empty(map, k + displacement)
&& displacement < hash_map_slot_distance(map, k + displacement))
displacement++;
while (!hash_map_slot_is_empty(map, k + displacement)
&& displacement == hash_map_slot_distance(map, k + displacement)) {
if (hash_map_slot_entry(map, k + displacement)->k == k) {
hash_map_slot_entry(map, k + displacement)->v = v;
return;
}
displacement++;
}
size_t idx = k + displacement;
size_t slots_to_move = 0;
while (!hash_map_slot_is_empty(map, idx + slots_to_move))
slots_to_move++;
hash_map_slot_acquire(map, idx + slots_to_move);
while (slots_to_move--)
*hash_map_slot_entry(map, idx + slots_to_move + 1) =
*hash_map_slot_entry(map, idx + slots_to_move);
*hash_map_slot_entry(map, idx) = (struct hash_map_entry){ k, v };
}
static void hash_map_populate(struct hash_map *dst, struct hash_map *src) {
for (size_t i = 0; i < src->size; i++)
if (!hash_map_slot_is_empty(src, i))
hash_map_do_insert(dst, hash_map_slot_entry(src, i)->k,
hash_map_slot_entry(src, i)->v);
}
static void hash_map_grow(struct hash_map *map) {
struct hash_map fresh;
hash_map_init(&fresh, map->size << 1);
hash_map_populate(&fresh, map);
hash_map_destroy(map);
memcpy(map, &fresh, sizeof(fresh));
}
static void hash_map_shrink(struct hash_map *map) {
struct hash_map fresh;
hash_map_init(&fresh, map->size >> 1);
hash_map_populate(&fresh, map);
hash_map_destroy(map);
memcpy(map, &fresh, sizeof(fresh));
}
static void hash_map_insert(struct hash_map *map, uintptr_t k, uintptr_t v) {
if (hash_map_should_grow(map))
hash_map_grow(map);
hash_map_do_insert(map, k, v);
}
static void hash_map_remove(struct hash_map *map, uintptr_t k) {
size_t slot = k;
while (!hash_map_slot_is_empty(map, slot) && hash_map_slot_entry(map, slot)->k != k)
slot++;
if (hash_map_slot_is_empty(map, slot))
__builtin_trap();
while (!hash_map_slot_is_empty(map, slot + 1)
&& hash_map_slot_distance(map, slot + 1)) {
*hash_map_slot_entry(map, slot) = *hash_map_slot_entry(map, slot + 1);
slot++;
}
hash_map_slot_release(map, slot);
if (hash_map_should_shrink(map))
hash_map_shrink(map);
}
static int hash_map_contains(struct hash_map *map, uintptr_t k) {
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
if (hash_map_slot_entry(map, slot)->k == k)
return 1;
if (hash_map_slot_distance(map, slot) < (slot - k))
return 0;
}
return 0;
}
static uintptr_t hash_map_lookup(struct hash_map *map, uintptr_t k, uintptr_t default_) {
for (size_t slot = k; !hash_map_slot_is_empty(map, slot); slot++) {
if (hash_map_slot_entry(map, slot)->k == k)
return hash_map_slot_entry(map, slot)->v;
if (hash_map_slot_distance(map, slot) < (slot - k))
break;
}
return default_;
}
static inline void hash_map_for_each (struct hash_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) __attribute__((always_inline));
static inline void hash_map_for_each(struct hash_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) {
for (size_t i = 0; i < map->size; i++)
if (!hash_map_slot_is_empty(map, i))
f(hash_map_slot_entry(map, i)->k, hash_map_slot_entry(map, i)->v, data);
}
struct address_map {
struct hash_map hash_map;
};
static void address_map_init(struct address_map *map) {
hash_map_init(&map->hash_map, 8);
}
static void address_map_destroy(struct address_map *map) {
hash_map_destroy(&map->hash_map);
}
static void address_map_clear(struct address_map *map) {
hash_map_clear(&map->hash_map);
}
static void address_map_add(struct address_map *map, uintptr_t addr, uintptr_t v) {
hash_map_insert(&map->hash_map, hash_address(addr), v);
}
static void address_map_remove(struct address_map *map, uintptr_t addr) {
hash_map_remove(&map->hash_map, hash_address(addr));
}
static int address_map_contains(struct address_map *map, uintptr_t addr) {
return hash_map_contains(&map->hash_map, hash_address(addr));
}
static uintptr_t address_map_lookup(struct address_map *map, uintptr_t addr,
uintptr_t default_) {
return hash_map_lookup(&map->hash_map, hash_address(addr), default_);
}
struct address_map_for_each_data {
void (*f)(uintptr_t, uintptr_t, void *);
void *data;
};
static void address_map_do_for_each(uintptr_t k, uintptr_t v, void *data) {
struct address_map_for_each_data *for_each_data = data;
for_each_data->f(unhash_address(k), v, for_each_data->data);
}
static inline void address_map_for_each (struct address_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) __attribute__((always_inline));
static inline void address_map_for_each (struct address_map *map,
void (*f)(uintptr_t, uintptr_t, void*),
void *data) {
struct address_map_for_each_data for_each_data = { f, data };
hash_map_for_each(&map->hash_map, address_map_do_for_each, &for_each_data);
}
#endif // ADDRESS_MAP_H

193
address-set.h Normal file
View file

@ -0,0 +1,193 @@
#ifndef ADDRESS_SET_H
#define ADDRESS_SET_H
#include <malloc.h>
#include <stdint.h>
#include <string.h>
#include "address-hash.h"
struct hash_set {
uintptr_t *data;
size_t size; // total number of slots
size_t n_items; // number of items in set
uint8_t *bits; // bitvector indicating set slots
};
static void hash_set_clear(struct hash_set *set) {
memset(set->bits, 0, set->size / 8);
set->n_items = 0;
}
// Size must be a power of 2.
static void hash_set_init(struct hash_set *set, size_t size) {
set->size = size;
set->data = malloc(sizeof(uintptr_t) * size);
set->bits = malloc(size / 8);
hash_set_clear(set);
}
static void hash_set_destroy(struct hash_set *set) {
free(set->data);
free(set->bits);
}
static size_t hash_set_slot_index(struct hash_set *set, size_t idx) {
return idx & (set->size - 1);
}
static int hash_set_slot_is_empty(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
return (set->bits[idx / 8] & (1 << (idx % 8))) == 0;
}
static uintptr_t hash_set_slot_ref(struct hash_set *set, size_t idx) {
return set->data[hash_set_slot_index(set, idx)];
}
static void hash_set_slot_set(struct hash_set *set, size_t idx, uintptr_t v) {
set->data[hash_set_slot_index(set, idx)] = v;
}
static void hash_set_slot_acquire(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
set->bits[idx / 8] |= (1 << (idx % 8));
set->n_items++;
}
static void hash_set_slot_release(struct hash_set *set, size_t idx) {
idx = hash_set_slot_index(set, idx);
set->bits[idx / 8] &= ~(1 << (idx % 8));
set->n_items--;
}
static size_t hash_set_slot_distance(struct hash_set *set, size_t idx) {
return hash_set_slot_index(set, idx - hash_set_slot_ref(set, idx));
}
static int hash_set_should_shrink(struct hash_set *set) {
return set->size > 8 && set->n_items <= (set->size >> 3);
}
static int hash_set_should_grow(struct hash_set *set) {
return set->n_items >= set->size - (set->size >> 3);
}
static void hash_set_do_insert(struct hash_set *set, uintptr_t v) {
size_t displacement = 0;
while (!hash_set_slot_is_empty(set, v + displacement)
&& displacement < hash_set_slot_distance(set, v + displacement))
displacement++;
while (!hash_set_slot_is_empty(set, v + displacement)
&& displacement == hash_set_slot_distance(set, v + displacement)) {
if (hash_set_slot_ref(set, v + displacement) == v)
return;
displacement++;
}
size_t idx = v + displacement;
size_t slots_to_move = 0;
while (!hash_set_slot_is_empty(set, idx + slots_to_move))
slots_to_move++;
hash_set_slot_acquire(set, idx + slots_to_move);
while (slots_to_move--)
hash_set_slot_set(set, idx + slots_to_move + 1,
hash_set_slot_ref(set, idx + slots_to_move));
hash_set_slot_set(set, idx, v);
}
static void hash_set_populate(struct hash_set *dst, struct hash_set *src) {
for (size_t i = 0; i < src->size; i++)
if (!hash_set_slot_is_empty(src, i))
hash_set_do_insert(dst, hash_set_slot_ref(src, i));
}
static void hash_set_grow(struct hash_set *set) {
struct hash_set fresh;
hash_set_init(&fresh, set->size << 1);
hash_set_populate(&fresh, set);
hash_set_destroy(set);
memcpy(set, &fresh, sizeof(fresh));
}
static void hash_set_shrink(struct hash_set *set) {
struct hash_set fresh;
hash_set_init(&fresh, set->size >> 1);
hash_set_populate(&fresh, set);
hash_set_destroy(set);
memcpy(set, &fresh, sizeof(fresh));
}
static void hash_set_insert(struct hash_set *set, uintptr_t v) {
if (hash_set_should_grow(set))
hash_set_grow(set);
hash_set_do_insert(set, v);
}
static void hash_set_remove(struct hash_set *set, uintptr_t v) {
size_t slot = v;
while (!hash_set_slot_is_empty(set, slot) && hash_set_slot_ref(set, slot) != v)
slot++;
if (hash_set_slot_is_empty(set, slot))
__builtin_trap();
while (!hash_set_slot_is_empty(set, slot + 1)
&& hash_set_slot_distance(set, slot + 1)) {
hash_set_slot_set(set, slot, hash_set_slot_ref(set, slot + 1));
slot++;
}
hash_set_slot_release(set, slot);
if (hash_set_should_shrink(set))
hash_set_shrink(set);
}
static int hash_set_contains(struct hash_set *set, uintptr_t v) {
for (size_t slot = v; !hash_set_slot_is_empty(set, slot); slot++) {
if (hash_set_slot_ref(set, slot) == v)
return 1;
if (hash_set_slot_distance(set, slot) < (slot - v))
return 0;
}
return 0;
}
static inline void hash_set_for_each (struct hash_set *set,
void (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
static inline void hash_set_for_each(struct hash_set *set,
void (*f)(uintptr_t, void*), void *data) {
for (size_t i = 0; i < set->size; i++)
if (!hash_set_slot_is_empty(set, i))
f(hash_set_slot_ref(set, i), data);
}
struct address_set {
struct hash_set hash_set;
};
static void address_set_init(struct address_set *set) {
hash_set_init(&set->hash_set, 8);
}
static void address_set_destroy(struct address_set *set) {
hash_set_destroy(&set->hash_set);
}
static void address_set_clear(struct address_set *set) {
hash_set_clear(&set->hash_set);
}
static void address_set_add(struct address_set *set, uintptr_t addr) {
hash_set_insert(&set->hash_set, hash_address(addr));
}
static void address_set_remove(struct address_set *set, uintptr_t addr) {
hash_set_remove(&set->hash_set, hash_address(addr));
}
static int address_set_contains(struct address_set *set, uintptr_t addr) {
return hash_set_contains(&set->hash_set, hash_address(addr));
}
static void address_set_union(struct address_set *set, struct address_set *other) {
while (set->hash_set.size < other->hash_set.size)
hash_set_grow(&set->hash_set);
hash_set_populate(&set->hash_set, &other->hash_set);
}
struct address_set_for_each_data {
void (*f)(uintptr_t, void *);
void *data;
};
static void address_set_do_for_each(uintptr_t v, void *data) {
struct address_set_for_each_data *for_each_data = data;
for_each_data->f(unhash_address(v), for_each_data->data);
}
static inline void address_set_for_each (struct address_set *set,
void (*f)(uintptr_t, void*), void *data) __attribute__((always_inline));
static inline void address_set_for_each (struct address_set *set,
void (*f)(uintptr_t, void*), void *data) {
struct address_set_for_each_data for_each_data = { f, data };
hash_set_for_each(&set->hash_set, address_set_do_for_each, &for_each_data);
}
#endif // ADDRESS_SET_H

109
test-address-map.c Normal file
View file

@ -0,0 +1,109 @@
#include <stdio.h>
#include "address-map.h"
#define COUNT (1000 * 1000)
static void add_to_other(uintptr_t addr, uintptr_t val, void *data) {
struct address_map *other = data;
if (addr >= COUNT)
fprintf(stdout, "unexpected address: %zu\n", addr);
if (address_map_contains(other, addr))
fprintf(stdout, "missing: %zu\n", addr);
address_map_add(other, addr, val);
}
int main(int argc, char *arv[]) {
struct address_map set;
address_map_init(&set);
for (size_t i = 0; i < COUNT; i++)
address_map_add(&set, i, -i);
fprintf(stdout, "after initial add, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
for (size_t i = 0; i < COUNT; i++) {
if (!address_map_contains(&set, i)) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
if (address_map_lookup(&set, i, -1) != -i) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
for (size_t i = COUNT; i < COUNT * 2; i++) {
if (address_map_contains(&set, i)) {
fprintf(stdout, "unexpectedly present: %zu\n", i);
return 1;
}
}
address_map_clear(&set);
fprintf(stdout, "after clear, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
for (size_t i = 0; i < COUNT; i++)
address_map_add(&set, i, 0);
// Now update.
fprintf(stdout, "after re-add, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
for (size_t i = 0; i < COUNT; i++)
address_map_add(&set, i, i + 1);
fprintf(stdout, "after idempotent re-add, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
for (size_t i = 0; i < COUNT; i++) {
if (!address_map_contains(&set, i)) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
if (address_map_lookup(&set, i, -1) != i + 1) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
for (size_t i = 0; i < COUNT; i++)
address_map_remove(&set, i);
fprintf(stdout, "after one-by-one removal, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
for (size_t i = COUNT; i < 2 * COUNT; i++) {
if (address_map_contains(&set, i)) {
fprintf(stdout, "unexpectedly present: %zu\n", i);
return 1;
}
}
for (size_t i = 0; i < COUNT; i++)
address_map_add(&set, i, i + 2);
struct address_map set2;
address_map_init(&set2);
address_map_for_each(&set, add_to_other, &set2);
fprintf(stdout, "after for-each set, %zu/%zu\n", set2.hash_map.n_items,
set2.hash_map.size);
for (size_t i = 0; i < COUNT; i++) {
if (address_map_lookup(&set2, i, -1) != i + 2) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
address_map_destroy(&set2);
size_t burnin = 1000 * 1000 * 1000 / COUNT;
fprintf(stdout, "beginning clear then add %zu items, %zu times\n",
(size_t)COUNT, burnin);
for (size_t j = 0; j < burnin; j++) {
address_map_clear(&set);
for (size_t i = 0; i < COUNT; i++)
address_map_add(&set, i, i + 3);
}
fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
fprintf(stdout, "beginning lookup %zu items, %zu times\n",
(size_t)COUNT, burnin);
for (size_t j = 0; j < burnin; j++) {
for (size_t i = 0; i < COUNT; i++) {
if (address_map_lookup(&set, i, -1) != i + 3) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
}
fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_map.n_items,
set.hash_map.size);
address_map_destroy(&set);
}

98
test-address-set.c Normal file
View file

@ -0,0 +1,98 @@
#include <stdio.h>
#include "address-set.h"
#define COUNT (1000 * 1000)
static void remove_from_other(uintptr_t addr, void *data) {
struct address_set *other = data;
if (addr >= COUNT)
fprintf(stdout, "unexpected address: %zu\n", addr);
if (!address_set_contains(other, addr))
fprintf(stdout, "missing: %zu\n", addr);
address_set_remove(other, addr);
}
int main(int argc, char *arv[]) {
struct address_set set;
address_set_init(&set);
for (size_t i = 0; i < COUNT; i++)
address_set_add(&set, i);
fprintf(stdout, "after initial add, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
for (size_t i = 0; i < COUNT; i++) {
if (!address_set_contains(&set, i)) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
for (size_t i = COUNT; i < COUNT * 2; i++) {
if (address_set_contains(&set, i)) {
fprintf(stdout, "unexpectedly present: %zu\n", i);
return 1;
}
}
address_set_clear(&set);
fprintf(stdout, "after clear, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
for (size_t i = 0; i < COUNT; i++)
address_set_add(&set, i);
// Do it twice.
fprintf(stdout, "after re-add, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
for (size_t i = 0; i < COUNT; i++)
address_set_add(&set, i);
fprintf(stdout, "after idempotent re-add, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
for (size_t i = 0; i < COUNT; i++) {
if (!address_set_contains(&set, i)) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
for (size_t i = 0; i < COUNT; i++)
address_set_remove(&set, i);
fprintf(stdout, "after one-by-one removal, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
for (size_t i = COUNT; i < 2 * COUNT; i++) {
if (address_set_contains(&set, i)) {
fprintf(stdout, "unexpectedly present: %zu\n", i);
return 1;
}
}
for (size_t i = 0; i < COUNT; i++)
address_set_add(&set, i);
struct address_set set2;
address_set_init(&set2);
address_set_union(&set2, &set);
fprintf(stdout, "populated set2, %zu/%zu\n", set2.hash_set.n_items,
set2.hash_set.size);
address_set_for_each(&set, remove_from_other, &set2);
fprintf(stdout, "after for-each removal, %zu/%zu\n", set2.hash_set.n_items,
set2.hash_set.size);
address_set_destroy(&set2);
size_t burnin = 1000 * 1000 * 1000 / COUNT;
fprintf(stdout, "beginning clear then add %zu items, %zu times\n",
(size_t)COUNT, burnin);
for (size_t j = 0; j < burnin; j++) {
address_set_clear(&set);
for (size_t i = 0; i < COUNT; i++)
address_set_add(&set, i);
}
fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
fprintf(stdout, "beginning lookup %zu items, %zu times\n",
(size_t)COUNT, burnin);
for (size_t j = 0; j < burnin; j++) {
for (size_t i = 0; i < COUNT; i++) {
if (!address_set_contains(&set, i)) {
fprintf(stdout, "missing: %zu\n", i);
return 1;
}
}
}
fprintf(stdout, "after burnin, %zu/%zu\n", set.hash_set.n_items,
set.hash_set.size);
address_set_destroy(&set);
}