Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Swisstable Hash implementation for dict #41

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 45 additions & 30 deletions Include/internal/pycore_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyOb
int _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value);

#define DKIX_EMPTY (-1)
#define DKIX_DUMMY (-2) /* Used internally */
#define DKIX_ERROR (-3)

typedef enum {
Expand All @@ -61,6 +60,39 @@ typedef enum {
DICT_KEYS_SPLIT = 2
} DictKeysKind;

// Currently, we support only 8-wide.
// But it is possible to use 16-wide when SSE2 is available, but NEON can not optimize it.
#define GROUP_WIDTH (8)

typedef union {
char c[GROUP_WIDTH];
uint64_t u64;
} group_control;

typedef struct {
group_control control;
uint8_t index[GROUP_WIDTH];
} group8; // 16byte

typedef struct {
group_control control;
uint16_t index[GROUP_WIDTH];
} group16; // 24byte

typedef struct {
group_control control;
uint32_t index[GROUP_WIDTH];
} group32; // 40byte

typedef struct {
group_control control;
uint64_t index[GROUP_WIDTH];
} group64; // 72byte

// todo: group64 can use uint8_t index[7*GROUP_WIDTH] instead.
// Then sizeof(group64) become 64byte. It is friendly to cache line.


/* See dictobject.c for actual layout of DictKeysObject */
struct _dictkeysobject {
Py_ssize_t dk_refcnt;
Expand All @@ -80,20 +112,7 @@ struct _dictkeysobject {
/* Number of used entries in dk_entries. */
Py_ssize_t dk_nentries;

/* Actual hash table of dk_size entries. It holds indices in dk_entries,
or DKIX_EMPTY(-1) or DKIX_DUMMY(-2).

Indices must be: 0 <= indice < USABLE_FRACTION(dk_size).

The size in bytes of an indice depends on dk_size:

- 1 byte if dk_size <= 0xff (char*)
- 2 bytes if dk_size <= 0xffff (int16_t*)
- 4 bytes if dk_size <= 0xffffffff (int32_t*)
- 8 bytes otherwise (int64_t*)

Dynamically sized, SIZEOF_VOID_P is minimum. */
char dk_indices[]; /* char is required to avoid strict aliasing. */
unsigned char dk_groups[]; /* char is required to avoid strict aliasing. */

/* "PyDictKeyEntry dk_entries[dk_usable];" array follows:
see the DK_ENTRIES() macro */
Expand All @@ -113,23 +132,19 @@ struct _dictvalues {
PyObject *values[1];
};

#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size)
#if SIZEOF_VOID_P > 4
#define DK_LOG_SIZE(dk) ((dk)->dk_log2_size+3)

#define DK_SIZE(dk) (((int64_t)1)<<DK_LOG_SIZE(dk))
#define DK_IXSIZE(dk) \
(DK_LOG_SIZE(dk) <= 7 ? \
1 : DK_LOG_SIZE(dk) <= 15 ? \
2 : DK_LOG_SIZE(dk) <= 31 ? \
4 : sizeof(int64_t))
#else
#define DK_SIZE(dk) (1<<DK_LOG_SIZE(dk))
#define DK_IXSIZE(dk) \
(DK_LOG_SIZE(dk) <= 7 ? \
1 : DK_LOG_SIZE(dk) <= 15 ? \
2 : sizeof(int32_t))
#endif
#define DK_GROUPS(dk) (((int64_t)1)<<((dk)->dk_log2_size))

#define DK_GROUP_SIZE(dk) \
(DK_LOG_SIZE(dk) <= 8 ? sizeof(group8) \
: DK_LOG_SIZE(dk) <= 16 ? sizeof(group16) \
: DK_LOG_SIZE(dk) <= 32 ? sizeof(group32) \
: sizeof(group64))

#define DK_ENTRIES(dk) \
((PyDictKeyEntry*)(&((int8_t*)((dk)->dk_indices))[DK_SIZE(dk) * DK_IXSIZE(dk)]))
((PyDictKeyEntry*)(&((dk)->dk_groups)[DK_GROUP_SIZE(dk) << ((dk)->dk_log2_size)]))

extern uint64_t _pydict_global_version;

Expand Down
Loading