Skip to content

Commit

Permalink
Binary search for pkgimage metadata (#48940)
Browse files Browse the repository at this point in the history
Co-authored-by: Jameson Nash <vtjnash@gmail.com>
  • Loading branch information
pchintalapudi and vtjnash authored Mar 29, 2023
1 parent 38d24e5 commit bc33c81
Show file tree
Hide file tree
Showing 3 changed files with 134 additions and 22 deletions.
126 changes: 125 additions & 1 deletion src/gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,13 @@ pagetable_t memory_map;
// List of marked big objects. Not per-thread. Accessed only by master thread.
bigval_t *big_objects_marked = NULL;

// Eytzinger tree of images. Used for very fast jl_object_in_image queries during gc
// See https://algorithmica.org/en/eytzinger
static arraylist_t eytzinger_image_tree;
static arraylist_t eytzinger_idxs;
static uintptr_t gc_img_min;
static uintptr_t gc_img_max;

// -- Finalization --
// `ptls->finalizers` and `finalizer_list_marked` might have tagged pointers.
// If an object pointer has the lowest bit set, the next pointer is an unboxed c function pointer.
Expand All @@ -183,6 +190,118 @@ arraylist_t finalizer_list_marked;
arraylist_t to_finalize;
JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;

static int ptr_cmp(const void *l, const void *r)
{
uintptr_t left = *(const uintptr_t*)l;
uintptr_t right = *(const uintptr_t*)r;
// jl_safe_printf("cmp %p %p\n", (void*)left, (void*)right);
return (left > right) - (left < right);
}

// Build an eytzinger tree from a sorted array
static int eytzinger(uintptr_t *src, uintptr_t *dest, size_t i, size_t k, size_t n)
{
if (k <= n) {
i = eytzinger(src, dest, i, 2 * k, n);
dest[k-1] = src[i];
i++;
i = eytzinger(src, dest, i, 2 * k + 1, n);
}
return i;
}

static size_t eyt_obj_idx(jl_value_t *obj) JL_NOTSAFEPOINT
{
size_t n = eytzinger_image_tree.len - 1;
if (n == 0)
return n;
assert(n % 2 == 0 && "Eytzinger tree not even length!");
uintptr_t cmp = (uintptr_t) obj;
if (cmp <= gc_img_min || cmp > gc_img_max)
return n;
uintptr_t *tree = (uintptr_t*)eytzinger_image_tree.items;
size_t k = 1;
// note that k preserves the history of how we got to the current node
while (k <= n) {
int greater = (cmp > tree[k - 1]);
k <<= 1;
k |= greater;
}
// Free to assume k is nonzero, since we start with k = 1
// and cmp > gc_img_min
// This shift does a fast revert of the path until we get
// to a node that evaluated less than cmp.
k >>= (__builtin_ctzll(k) + 1);
assert(k != 0);
assert(k <= n && "Eytzinger tree index out of bounds!");
assert(tree[k - 1] < cmp && "Failed to find lower bound for object!");
return k - 1;
}

//used in staticdata.c after we add an image
void rebuild_image_blob_tree(void)
{
size_t inc = 1 + jl_linkage_blobs.len - eytzinger_image_tree.len;
assert(eytzinger_idxs.len == eytzinger_image_tree.len);
assert(eytzinger_idxs.max == eytzinger_image_tree.max);
arraylist_grow(&eytzinger_idxs, inc);
arraylist_grow(&eytzinger_image_tree, inc);
eytzinger_idxs.items[eytzinger_idxs.len - 1] = (void*)jl_linkage_blobs.len;
eytzinger_image_tree.items[eytzinger_image_tree.len - 1] = (void*)1; // outside image
for (size_t i = 0; i < jl_linkage_blobs.len; i++) {
assert((uintptr_t) jl_linkage_blobs.items[i] % 4 == 0 && "Linkage blob not 4-byte aligned!");
// We abuse the pointer here a little so that a couple of properties are true:
// 1. a start and an end are never the same value. This simplifies the binary search.
// 2. ends are always after starts. This also simplifies the binary search.
// We assume that there exist no 0-size blobs, but that's a safe assumption
// since it means nothing could be there anyways
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
eytzinger_idxs.items[i] = (void*)(val + (i & 1));
}
qsort(eytzinger_idxs.items, eytzinger_idxs.len - 1, sizeof(void*), ptr_cmp);
gc_img_min = (uintptr_t) eytzinger_idxs.items[0];
gc_img_max = (uintptr_t) eytzinger_idxs.items[eytzinger_idxs.len - 2] + 1;
eytzinger((uintptr_t*)eytzinger_idxs.items, (uintptr_t*)eytzinger_image_tree.items, 0, 1, eytzinger_idxs.len - 1);
// Reuse the scratch memory to store the indices
// Still O(nlogn) because binary search
for (size_t i = 0; i < jl_linkage_blobs.len; i ++) {
uintptr_t val = (uintptr_t) jl_linkage_blobs.items[i];
// This is the same computation as in the prior for loop
uintptr_t eyt_val = val + (i & 1);
size_t eyt_idx = eyt_obj_idx((jl_value_t*)(eyt_val + 1)); assert(eyt_idx < eytzinger_idxs.len - 1);
assert(eytzinger_image_tree.items[eyt_idx] == (void*)eyt_val && "Eytzinger tree failed to find object!");
if (i & 1)
eytzinger_idxs.items[eyt_idx] = (void*)n_linkage_blobs();
else
eytzinger_idxs.items[eyt_idx] = (void*)(i / 2);
}
}

static int eyt_obj_in_img(jl_value_t *obj) JL_NOTSAFEPOINT
{
assert((uintptr_t) obj % 4 == 0 && "Object not 4-byte aligned!");
int idx = eyt_obj_idx(obj);
// Now we use a tiny trick: tree[idx] & 1 is whether or not tree[idx] is a
// start (0) or an end (1) of a blob. If it's a start, then the object is
// in the image, otherwise it is not.
int in_image = ((uintptr_t)eytzinger_image_tree.items[idx] & 1) == 0;
return in_image;
}

size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT
{
assert((uintptr_t) v % 4 == 0 && "Object not 4-byte aligned!");
int eyt_idx = eyt_obj_idx(v);
// We fill the invalid slots with the length, so we can just return that
size_t idx = (size_t) eytzinger_idxs.items[eyt_idx];
return idx;
}

uint8_t jl_object_in_image(jl_value_t *obj) JL_NOTSAFEPOINT
{
return eyt_obj_in_img(obj);
}

NOINLINE uintptr_t gc_get_stack_ptr(void)
{
return (uintptr_t)jl_get_frame_addr();
Expand Down Expand Up @@ -2270,7 +2389,8 @@ FORCE_INLINE void gc_mark_outrefs(jl_ptls_t ptls, jl_gc_markqueue_t *mq, void *_
uint8_t bits = (gc_old(o->header) && !mark_reset_age) ? GC_OLD_MARKED : GC_MARKED;
int update_meta = __likely(!meta_updated && !gc_verifying);
int foreign_alloc = 0;
if (update_meta && jl_object_in_image(new_obj)) {
// directly point at eyt_obj_in_img to encourage inlining
if (update_meta && eyt_obj_in_img(new_obj)) {
foreign_alloc = 1;
update_meta = 0;
}
Expand Down Expand Up @@ -3245,6 +3365,10 @@ void jl_gc_init(void)

arraylist_new(&finalizer_list_marked, 0);
arraylist_new(&to_finalize, 0);
arraylist_new(&eytzinger_image_tree, 0);
arraylist_new(&eytzinger_idxs, 0);
arraylist_push(&eytzinger_idxs, (void*)0);
arraylist_push(&eytzinger_image_tree, (void*)1); // outside image

gc_num.interval = default_collect_interval;
last_long_collect_interval = default_collect_interval;
Expand Down
23 changes: 2 additions & 21 deletions src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -954,28 +954,9 @@ STATIC_INLINE size_t n_linkage_blobs(void) JL_NOTSAFEPOINT
return jl_image_relocs.len;
}

// TODO: Makes this a binary search
STATIC_INLINE size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT {
size_t i, nblobs = n_linkage_blobs();
assert(jl_linkage_blobs.len == 2*nblobs);
for (i = 0; i < nblobs; i++) {
uintptr_t left = (uintptr_t)jl_linkage_blobs.items[2*i];
uintptr_t right = (uintptr_t)jl_linkage_blobs.items[2*i + 1];
if (left < (uintptr_t)v && (uintptr_t)v <= right) {
// the last object may be a singleton (v is shifted by a type tag, so we use exclusive bounds here)
break;
}
}
return i;
}
size_t external_blob_index(jl_value_t *v) JL_NOTSAFEPOINT;

STATIC_INLINE uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT {
size_t blob = external_blob_index(v);
if (blob == n_linkage_blobs()) {
return 0;
}
return 1;
}
uint8_t jl_object_in_image(jl_value_t* v) JL_NOTSAFEPOINT;

typedef struct {
LLVMOrcThreadSafeModuleRef TSM;
Expand Down
7 changes: 7 additions & 0 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2366,6 +2366,10 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
jl_write_relocations(&s);
}

// This ensures that we can use the low bit of addresses for
// identifying end pointers in gc's eytzinger search.
write_padding(&sysimg, 4 - (sysimg.size % 4));

if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)) {
jl_printf(
JL_STDERR,
Expand Down Expand Up @@ -2658,6 +2662,8 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
// }
#endif

extern void rebuild_image_blob_tree(void);

static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl_array_t *depmods, uint64_t checksum,
/* outputs */ jl_array_t **restored, jl_array_t **init_order,
jl_array_t **extext_methods,
Expand Down Expand Up @@ -3151,6 +3157,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
arraylist_push(&jl_linkage_blobs, (void*)image_base);
arraylist_push(&jl_linkage_blobs, (void*)(image_base + sizeof_sysimg + sizeof(uintptr_t)));
arraylist_push(&jl_image_relocs, (void*)relocs_base);
rebuild_image_blob_tree();

// jl_printf(JL_STDOUT, "%ld blobs to link against\n", jl_linkage_blobs.len >> 1);
jl_gc_enable(en);
Expand Down

9 comments on commit bc33c81

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Executing the daily package evaluation, I will reply here when finished:

@nanosoldier runtests(isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible new issues were detected.
A full report can be found here.

@vtjnash
Copy link
Member

@vtjnash vtjnash commented on bc33c81 Apr 3, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks(ALL, isdaily = true)

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - possible performance regressions were detected. A full report can be found here.

@vtjnash
Copy link
Member

@vtjnash vtjnash commented on bc33c81 Apr 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Whoops, something regressed parse_json and made it take twice as long / allocate more

@vtjnash
Copy link
Member

@vtjnash vtjnash commented on bc33c81 Apr 4, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nanosoldier runbenchmarks("parse_json", vs="@124abaa73c06e4c73c6cc6d470dbaa08eb9d4e28")

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your benchmark job has completed - no performance regressions were detected. A full report can be found here.

@maleadt
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the last daily run where PkgEval didn't result in many packages timing out after their tests completed, so let's see if that's still the case (or if the issue is related to the kernel upgrade on the server):

@nanosoldier runtests()

@nanosoldier
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Your package evaluation job has completed - possible issues were detected.
A full report can be found here.

Please sign in to comment.