1
Fork 0
mirror of https://git.savannah.gnu.org/git/guile.git synced 2025-06-27 21:40:34 +02:00

nofl: Refactor SWAR mark-matching routines

We are going to try to use fewer bits for mark state.
This commit is contained in:
Andy Wingo 2025-03-05 10:08:03 +01:00
parent c410992d55
commit e780d27959
2 changed files with 75 additions and 19 deletions

View file

@ -26,23 +26,80 @@ load_eight_aligned_bytes(uint8_t *ptr) {
return word;
}
static inline uint64_t
match_bytes_against_bits(uint64_t bytes, uint8_t mask) {
return bytes & broadcast_byte(mask);
}
static size_t
scan_for_byte(uint8_t *ptr, size_t limit, uint64_t mask) {
scan_for_byte_with_bits(uint8_t *ptr, size_t limit, uint8_t mask) {
size_t n = 0;
size_t unaligned = ((uintptr_t) ptr) & 7;
if (unaligned) {
uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8);
bytes &= mask;
if (bytes)
return count_zero_bytes(bytes);
uint64_t match = match_bytes_against_bits(bytes, mask);
if (match)
return count_zero_bytes(match);
n += 8 - unaligned;
}
for(; n < limit; n += 8) {
uint64_t bytes = load_eight_aligned_bytes(ptr + n);
bytes &= mask;
if (bytes)
return n + count_zero_bytes(bytes);
uint64_t match = match_bytes_against_bits(bytes, mask);
if (match)
return n + count_zero_bytes(match);
}
return limit;
}
static inline uint64_t
match_bytes_against_2_tags(uint64_t bytes, uint8_t mask, uint8_t tag1,
uint8_t tag2)
{
// Precondition: tags are covered by within mask.
GC_ASSERT_EQ(tag1 & mask, tag1);
GC_ASSERT_EQ(tag2 & mask, tag2);
// Precondition: high bit of mask byte is empty, so that we can add without
// overflow.
GC_ASSERT_EQ(mask & 0x7f, mask);
// Precondition: mask is low bits of byte.
GC_ASSERT(mask);
GC_ASSERT_EQ(mask & (mask + 1), 0);
uint64_t vmask = broadcast_byte(mask);
uint64_t vtest = broadcast_byte(mask + 1);
uint64_t vtag1 = broadcast_byte(tag1);
uint64_t vtag2 = broadcast_byte(tag2);
bytes &= vmask;
uint64_t m1 = (bytes ^ vtag1) + vmask;
uint64_t m2 = (bytes ^ vtag2) + vmask;
return ((m1 & m2) & vtest) ^ vtest;
}
static size_t
scan_for_byte_with_tags(uint8_t *ptr, size_t limit, uint8_t mask,
uint8_t tag1, uint8_t tag2) {
// The way we handle unaligned reads by padding high bytes with zeroes assumes
// that all-zeroes is not a matching byte.
GC_ASSERT(tag1 && tag2);
size_t n = 0;
size_t unaligned = ((uintptr_t) ptr) & 7;
if (unaligned) {
uint64_t bytes = load_eight_aligned_bytes(ptr - unaligned) >> (unaligned * 8);
uint64_t match = match_bytes_against_2_tags(bytes, mask, tag1, tag2);
if (match)
return count_zero_bytes(match);
n += 8 - unaligned;
}
for(; n < limit; n += 8) {
uint64_t bytes = load_eight_aligned_bytes(ptr + n);
uint64_t match = match_bytes_against_2_tags(bytes, mask, tag1, tag2);
if (match)
return n + count_zero_bytes(match);
}
return limit;