Skip to content

bpo-40602: Optimize _Py_hashtable for pointer keys #20051

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 12, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 25 additions & 15 deletions Include/internal/pycore_hashtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,17 @@ typedef struct {

/* Forward declaration */
struct _Py_hashtable_t;
typedef struct _Py_hashtable_t _Py_hashtable_t;

typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
const void *pkey, void *data);

typedef struct {
/* allocate a memory block */
Expand All @@ -93,18 +98,19 @@ typedef struct {


/* _Py_hashtable: table */

typedef struct _Py_hashtable_t {
struct _Py_hashtable_t {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t key_size;
size_t data_size;

_Py_hashtable_get_func get_func;
_Py_hashtable_get_entry_func get_entry_func;
_Py_hashtable_hash_func hash_func;
_Py_hashtable_compare_func compare_func;
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
};

/* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
Expand Down Expand Up @@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(

Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
static inline _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
assert(key_size == ht->key_size);
return ht->get_entry_func(ht, pkey);
}

#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
Expand All @@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
exists, return 0 if the entry does not exist.

Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
static inline int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{
assert(key_size == ht->key_size);
assert(data_size == ht->data_size);
return ht->get_func(ht, pkey, data);
}

#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
Expand Down
207 changes: 128 additions & 79 deletions Python/hashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ Py_uhash_t
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
{
void *key;

_Py_HASHTABLE_READ_KEY(ht, pkey, key);
return (Py_uhash_t)_Py_HashPointer(key);
}
Expand Down Expand Up @@ -137,61 +136,6 @@ round_size(size_t s)
}


_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;

if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}

ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;

ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;

buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);

ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
return ht;
}


_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}


size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
Expand Down Expand Up @@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)


_Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
size_t key_size, const void *pkey)
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;

assert(key_size == ht->key_size);

key_hash = ht->hash_func(ht, pkey);
index = key_hash & (ht->num_buckets - 1);

for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
break;
}
entry = ENTRY_NEXT(entry);
}

return entry;
}

Expand Down Expand Up @@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
entry = ht->get_entry_func(ht, pkey);
assert(entry == NULL);
#endif

Expand Down Expand Up @@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,


int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
size_t data_size, void *data)
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
{
_Py_hashtable_entry_t *entry;

assert(data != NULL);
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
}
}

entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)

// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
_Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash) {
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
break;
}
}
entry = ENTRY_NEXT(entry);
}
return entry;
}


// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
int
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
{
assert(data != NULL);
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
ENTRY_READ_PDATA(ht, entry, data_size, data);
return 1;
}
}


Expand Down Expand Up @@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
}


_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;

if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}

ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;

ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;

buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);

ht->get_func = _Py_hashtable_get_generic;
ht->get_entry_func = _Py_hashtable_get_entry_generic;
ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
if (ht->key_size == sizeof(void*)
&& ht->hash_func == _Py_hashtable_hash_ptr
&& ht->compare_func == _Py_hashtable_compare_direct)
{
ht->get_func = _Py_hashtable_get_ptr;
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
}
return ht;
}


_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}


void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{
Expand Down