Skip to content

Commit

Permalink
Change symbol table to cwisstable
Browse files Browse the repository at this point in the history
  • Loading branch information
djwatson committed Oct 25, 2023
1 parent 56395ea commit 5367b95
Show file tree
Hide file tree
Showing 5 changed files with 114 additions and 130 deletions.
9 changes: 2 additions & 7 deletions src/readbc.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,7 @@ static gc_obj read_symbol(FILE *fptr, uint64_t num) {
}
str->str[len] = '\0';

auto res = symbol_table_find_cstr(str->str);
gc_obj val;
if (res == nullptr) {
val = symbol_table_insert(str, true);
} else {
val = tag_symbol(res);
}
auto val = symbol_table_insert(str, true);

arrput(symbols, val);
return val;
Expand Down Expand Up @@ -286,6 +280,7 @@ static void read_const_table(FILE *fptr, uint64_t const_offset) {
}

static bcfunc *readbc(FILE *fptr) {
sym_table_init();
auto const_offset = const_table_sz;
arrfree(symbols);

Expand Down
201 changes: 105 additions & 96 deletions src/symbol_table.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,122 +7,125 @@
#include <stdlib.h> // for calloc, free, size_t
#include <string.h> // for strcmp

#include <xxhash.h>

#include "third-party/cwisstable.h"

#include "defs.h"
#include "gc.h"
#include "types.h" // for string_s, symbol

#define auto __auto_type

/* FNV-1a */
uint64_t str_hash(const char *str) {
const char *p = str;
uint64_t hash = 0xcbf29ce484222325;
typedef struct {
uint32_t len;
const char* str;
} StringView;

while (*p++ != 0) {
hash ^= *p;
hash *= 0x100000001b3;
static inline size_t djb2_hash(const char *s)
{
size_t h = 5381;
char c;
while ((c = *s++)) {
h = (h << 5) + h + c;
}

return hash;
return h;
}
static inline size_t MyMap_StringView_hash(const StringView* val) {
return djb2_hash(val->str);
}
static inline bool MyMap_StringView_eq(const StringView* a, const gc_obj* b) {
auto v = *(const gc_obj*)b;
assert(is_symbol(v));
auto sym = to_symbol(v);
auto name = get_sym_name(sym);

return a->len == to_fixnum(name->len) &&
memcmp(name->str, a->str, a->len) == 0;
}
static inline size_t kCStrPolicy_hash(const void* val) {
auto v = *(const gc_obj*)val;
assert(is_symbol(v));
auto sym = to_symbol(v);
auto name = get_sym_name(sym);
return djb2_hash(name->str);
}
static inline bool kCStrPolicy_eq(const void* a, const void* b) {
auto va = *(const gc_obj*)a;
assert(is_symbol(va));
auto syma = to_symbol(va);
auto namea = get_sym_name(syma);

auto vb = *(const gc_obj*)b;
assert(is_symbol(vb));
auto symb = to_symbol(vb);
auto nameb = get_sym_name(symb);

return to_fixnum(namea->len) == to_fixnum(nameb->len)
&& memcmp(namea->str, nameb->str, to_fixnum(namea->len)) == 0;
}

CWISS_DECLARE_FLAT_SET_POLICY(kCStrPolicy, gc_obj,
(key_hash, kCStrPolicy_hash),
(key_eq, kCStrPolicy_eq));
CWISS_DECLARE_HASHSET_WITH(MyMap, gc_obj, kCStrPolicy);
CWISS_DECLARE_LOOKUP(MyMap, StringView);

// string_s* to symbol* hash table.
// Size must be power of two.
// Bottom bits may be tombstone.
// Open coded.

// TODO(djwatson) weak GC syms, and evict entries when they are collected.

// Non-empty default table so we don't have to null check.
static table empty_table = {0, 0};
table *sym_table = &empty_table;
static MyMap sym_table;
static bool inited = false;
void sym_table_init() {
if (!inited) {
inited = true;
sym_table = MyMap_new(1);
}
}

static symbol *symbol_table_find_internal(const char *str, const uint64_t len);
symbol *symbol_table_find(string_s *str) {
return symbol_table_find_cstr(str->str);
return symbol_table_find_internal(str->str, to_fixnum(str->len));
}

EXPORT symbol *symbol_table_find_cstr(const char *str) {
auto hash = str_hash(str);

auto mask = sym_table->sz - 1;
for (size_t i = 0; i < sym_table->sz; i++) {
auto cur = &sym_table->entries[(i + hash) & mask];
if (cur->value == 0) {
return NULL;
}

symbol *curs = to_symbol(*cur);
string_s *sym_name = get_sym_name(curs);
if (strcmp(sym_name->str, str) == 0) {
return curs;
} // Mismatched comparison, continue.
}

return NULL;
return symbol_table_find_internal(str, strlen(str));
}

static void rehash();
static void symbol_table_insert_sym(symbol *sym) {
if ((sym_table->cnt + 1) > (sym_table->sz / 2)) {
rehash();
}
sym_table->cnt++;

string_s *sym_name = get_sym_name(sym);
auto hash = str_hash(sym_name->str);
auto mask = sym_table->sz - 1;

for (size_t i = 0; i < sym_table->sz; i++) {
auto cur = &sym_table->entries[(i + hash) & mask];
if (cur->value == 0 ||
strcmp(get_sym_name(to_symbol(*cur))->str, sym_name->str) == 0) {
// Insert here.
*cur = tag_sym(sym);
return;
} // Mismatched comparison, continue.
}

// Definitely should find a spot.
assert(false);
}
static symbol *symbol_table_find_internal(const char *str, const uint64_t len) {
assert(inited);

static void rehash() {
auto old = sym_table;
auto new_sz = old->sz * 2;
if (new_sz == 0) {
new_sz = 2;
}
// TODO(djwatson) realloc+memset?
sym_table = calloc(sizeof(table) + sizeof(symbol *) * new_sz, 1);
if (!sym_table) {
printf("symbol_table: calloc error\n");
exit(-1);
}
sym_table->sz = new_sz;
sym_table->cnt = 0;

// Rehash items.
for (size_t i = 0; i < old->sz; i++) {
auto cur = &old->entries[i];
if (cur->value != 0) {
symbol_table_insert_sym(to_symbol(*cur));
}
}
StringView s = {.len = len, .str = str};

if (old != &empty_table) {
free(old);
auto it = MyMap_cfind_by_StringView(&sym_table, &s);
auto entry = MyMap_CIter_get(&it);
if (!entry) {
return NULL;
}
return to_symbol(*entry);
}

void symbol_table_clear() {
if (sym_table != &empty_table) {
free(sym_table);
sym_table = &empty_table;
}
assert(inited);
MyMap_destroy(&sym_table);
inited = false;
}

gc_obj symbol_table_insert(string_s *str, bool can_alloc) {
assert(symbol_table_find(str) == NULL);
assert(inited);

StringView s = {.len = to_fixnum(str->len), .str = str->str};
auto res = MyMap_deferred_insert_by_StringView(&sym_table, &s);
auto entry = MyMap_Iter_get(&res.iter);
if (!res.inserted) {
return *entry;
}

// GC may fire below, so we need to be careful about saving str, and
// potentially the new symbol. We need something valid in the
// symbol table for GC rooting, so might as well put the string
// there.
*entry = tag_string(str);

// Build a new symbol.
// Must dup the string, since strings are not immutable.
auto strlen = to_fixnum(str->len);
Expand All @@ -135,13 +138,15 @@ gc_obj symbol_table_insert(string_s *str, bool can_alloc) {
return FALSE_REP;
}
}
// Reload str after gc.
str = to_string(*entry);

// Note re-load of str after allocation.
// str now saved in sym
*sym = (symbol){
SYMBOL_TAG, 0, tag_string(str), (gc_obj){.value = UNDEFINED_TAG},
0, NULL};

// Save new symbol in frame[ra].
// Save new symbol in a GC root
gc_obj result = tag_symbol(sym);
// DUP the string, so that this one is immutable.
// Note that original is in sym->name temporarily
Expand All @@ -165,16 +170,20 @@ gc_obj symbol_table_insert(string_s *str, bool can_alloc) {
// Re-load str after GC
memcpy(str2->str, to_string(sym->name)->str, strlen + 1);
sym->name = tag_string(str2);
symbol_table_insert_sym(sym);
// symbol_table_insert_sym(sym);

*entry = result;

return result;
}

void symbol_table_for_each(for_each_cb cb) {
for (size_t i = 0; i < sym_table->sz; i++) {
auto cur = &sym_table->entries[i];
if (cur->value != 0) {
cb(&sym_table->entries[i]);
}
assert(inited);

auto it = MyMap_iter(&sym_table);
auto entry = MyMap_Iter_get(&it);
while (entry) {
cb(entry);
entry = MyMap_Iter_next(&it);
}
}
11 changes: 1 addition & 10 deletions src/symbol_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
typedef struct string_s string_s;
typedef struct symbol symbol;

void sym_table_init();
symbol *symbol_table_find(string_s *str);
// Inserts a string_s, making a copy of it for the new symbol.
// Returns the tagged symbol object, or 0 if can_alloc = false and we
Expand All @@ -18,15 +19,5 @@ gc_obj symbol_table_insert(string_s *str, bool can_alloc);
symbol *symbol_table_find_cstr(const char *str);
void symbol_table_clear();

// GC needs access.
typedef struct table {
size_t cnt; // Number of objects currently in hash.
size_t sz; // Size of backing buffer.

gc_obj entries[];
} table;

typedef void (*for_each_cb)(gc_obj *field);
void symbol_table_for_each(for_each_cb cb);

extern table *sym_table;
3 changes: 3 additions & 0 deletions src/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,9 @@ MAYBE_UNUSED static inline bool is_cons(gc_obj obj) {
MAYBE_UNUSED static inline bool is_ptr(gc_obj obj) {
return get_tag(obj) == PTR_TAG;
}
MAYBE_UNUSED static inline bool is_symbol(gc_obj obj) {
return get_tag(obj) == SYMBOL_TAG;
}
MAYBE_UNUSED static inline bool is_literal(gc_obj obj) {
return get_tag(obj) == LITERAL_TAG;
}
Expand Down
20 changes: 3 additions & 17 deletions src/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ static bool should_jit() {
}
return false;
}
//#define CHECK_RECORD_START(cnt) should_jit()
#define CHECK_RECORD_START(cnt) should_jit()
#else
#define CHECK_RECORD_START(cnt) unlikely((hotmap[hotmap_hash(pc)]--) <= cnt)
#endif
Expand Down Expand Up @@ -1324,26 +1324,12 @@ END_LIBRARY_FUNC
ALIGNED8 gc_obj vm_string_symbol(gc_obj in) {
auto str = to_string(in);

auto res = symbol_table_find(str);
if (res) {
return tag_symbol(res);
}
auto inserted = symbol_table_insert(str, false);
if (!inserted
.value) { // TODO(djwatson) cleanup and put in symbol_table_insert?
return FALSE_REP;
}
return inserted;
return symbol_table_insert(str, false);
}

LIBRARY_FUNC_B_LOAD_NAME("STRING->SYMBOL", STRING_SYMBOL) {
LOAD_TYPE_WITH_CHECK(str, string_s, fb, STRING_TAG);
auto res = symbol_table_find(str);
if (res) {
frame[ra] = tag_symbol(res);
} else {
frame[ra] = symbol_table_insert(str, true);
}
frame[ra] = symbol_table_insert(str, true);
}
END_LIBRARY_FUNC

Expand Down

0 comments on commit 5367b95

Please sign in to comment.