Skip to content

Commit

Permalink
staticdata: handle cycles in datatypes (#52752)
Browse files Browse the repository at this point in the history
Handle any sort of cycle encountered in the datatype super fields by
always deferring that field until later and setting a deferred mechanism
for updating the field only after the supertype is ready.

Fix #52660

(cherry picked from commit c94b1a3)
  • Loading branch information
vtjnash authored and KristofferC committed Jan 24, 2024
1 parent 2481fdf commit cc26004
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 71 deletions.
147 changes: 84 additions & 63 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,6 @@ static arraylist_t deser_sym;
static htable_t external_objects;

static htable_t serialization_order; // to break cycles, mark all objects that are serialized
static htable_t unique_ready; // as we serialize types, we need to know if all reachable objects are also already serialized. This tracks whether `immediate` has been set for all of them.
static htable_t nullptrs;
// FIFO queue for objects to be serialized. Anything requiring fixup upon deserialization
// must be "toplevel" in this queue. For types, parameters and field types must appear
Expand Down Expand Up @@ -463,6 +462,7 @@ typedef struct {
arraylist_t relocs_list; // a list of (location, target) pairs, see description at top
arraylist_t gctags_list; // "
arraylist_t uniquing_types; // a list of locations that reference types that must be de-duplicated
arraylist_t uniquing_super; // a list of datatypes, used in super fields, that need to be marked in uniquing_types once they are reached, for handling unique-ing of them on deserialization
arraylist_t uniquing_objs; // a list of locations that reference non-types that must be de-duplicated
arraylist_t fixup_types; // a list of locations of types requiring (re)caching
arraylist_t fixup_objs; // a list of locations of objects requiring (re)caching
Expand Down Expand Up @@ -726,14 +726,13 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
{
jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
jl_queue_for_serialization_(s, (jl_value_t*)t, 1, immediate);
const jl_datatype_layout_t *layout = t->layout;

if (!recursive)
goto done_fields;

if (s->incremental && jl_is_datatype(v) && immediate) {
jl_datatype_t *dt = (jl_datatype_t*)v;
// ensure super is queued (though possibly not yet handled, since it may have cycles)
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, 1);
// ensure all type parameters are recached
jl_queue_for_serialization_(s, (jl_value_t*)dt->parameters, 1, 1);
jl_value_t *singleton = dt->instance;
Expand All @@ -743,7 +742,7 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
// (it may get serialized from elsewhere though)
record_field_change(&dt->instance, jl_nothing);
}
immediate = 0; // do not handle remaining fields immediately (just field types remains)
goto done_fields; // for now
}
if (s->incremental && jl_is_method_instance(v)) {
jl_method_instance_t *mi = (jl_method_instance_t*)v;
Expand Down Expand Up @@ -800,11 +799,9 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_
}
}


if (immediate) // must be things that can be recursively handled, and valid as type parameters
assert(jl_is_immutable(t) || jl_is_typevar(v) || jl_is_symbol(v) || jl_is_svec(v));

const jl_datatype_layout_t *layout = t->layout;
if (layout->npointers == 0) {
// bitstypes do not require recursion
}
Expand Down Expand Up @@ -860,22 +857,35 @@ done_fields: ;

// We've encountered an item we need to cache
void **bp = ptrhash_bp(&serialization_order, v);
assert(*bp != (void*)(uintptr_t)-1);
if (s->incremental) {
void **bp2 = ptrhash_bp(&unique_ready, v);
if (*bp2 == HT_NOTFOUND)
assert(*bp == (void*)(uintptr_t)-2);
else if (*bp != (void*)(uintptr_t)-2)
return;
}
else {
assert(*bp == (void*)(uintptr_t)-2);
}
assert(*bp == (void*)(uintptr_t)-2);
arraylist_push(&serialization_queue, (void*) v);
size_t idx = serialization_queue.len - 1;
assert(serialization_queue.len < ((uintptr_t)1 << RELOC_TAG_OFFSET) && "too many items to serialize");

*bp = (void*)((char*)HT_NOTFOUND + 1 + idx);

// DataType is very unusual, in that some of the fields need to be pre-order, and some
// (notably super) must not be (even if `jl_queue_for_serialization_` would otherwise
// try to promote itself to be immediate)
if (s->incremental && jl_is_datatype(v) && immediate && recursive) {
jl_datatype_t *dt = (jl_datatype_t*)v;
void **bp = ptrhash_bp(&serialization_order, (void*)dt->super);
if (*bp != (void*)-2) {
// if super is already on the stack of things to handle when this returns, do
// not try to handle it now
jl_queue_for_serialization_(s, (jl_value_t*)dt->super, 1, immediate);
}
immediate = 0;
char *data = (char*)jl_data_ptr(v);
size_t i, np = layout->npointers;
for (i = 0; i < np; i++) {
uint32_t ptr = jl_ptr_offset(t, i);
if (ptr * sizeof(jl_value_t*) == offsetof(jl_datatype_t, super))
continue; // skip the super field, since it might not be quite validly ordered
int mutabl = 1;
jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr], mutabl);
jl_queue_for_serialization_(s, fld, 1, immediate);
}
}
}

static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, int recursive, int immediate) JL_GC_DISABLED
Expand All @@ -894,28 +904,19 @@ static void jl_queue_for_serialization_(jl_serializer_state *s, jl_value_t *v, i
}

void **bp = ptrhash_bp(&serialization_order, v);
if (*bp == HT_NOTFOUND) {
*bp = (void*)(uintptr_t)(immediate ? -2 : -1);
}
else {
if (!s->incremental || !immediate || !recursive)
return;
void **bp2 = ptrhash_bp(&unique_ready, v);
if (*bp2 == HT_NOTFOUND)
*bp2 = v; // now is unique_ready
else {
assert(*bp != (void*)(uintptr_t)-1);
return; // already was unique_ready
}
assert(*bp != (void*)(uintptr_t)-2); // should be unique_ready then
if (*bp == (void*)(uintptr_t)-1)
*bp = (void*)(uintptr_t)-2; // now immediate
}
assert(!immediate || *bp != (void*)(uintptr_t)-2);
if (*bp == HT_NOTFOUND)
*bp = (void*)(uintptr_t)-1; // now enqueued
else if (!s->incremental || !immediate || !recursive || *bp != (void*)(uintptr_t)-1)
return;

if (immediate)
if (immediate) {
*bp = (void*)(uintptr_t)-2; // now immediate
jl_insert_into_serialization_queue(s, v, recursive, immediate);
else
}
else {
arraylist_push(&object_worklist, (void*)v);
}
}

// Do a pre-order traversal of the to-serialize worklist, in the identical order
Expand Down Expand Up @@ -1065,8 +1066,10 @@ static void record_uniquing(jl_serializer_state *s, jl_value_t *fld, uintptr_t o
if (s->incremental && jl_needs_serialization(s, fld) && needs_uniquing(fld)) {
if (jl_is_datatype(fld) || jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(fld)))
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)offset);
else
else if (jl_is_method_instance(fld))
arraylist_push(&s->uniquing_objs, (void*)(uintptr_t)offset);
else
assert(0 && "unknown object type with needs_uniquing set");
}
}

Expand Down Expand Up @@ -1224,7 +1227,15 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
write_pointerfield(s, (jl_value_t*)mi->sparam_vals);
continue;
}
else if (!jl_is_datatype(v)) {
else if (jl_is_datatype(v)) {
for (size_t i = 0; i < s->uniquing_super.len; i++) {
if (s->uniquing_super.items[i] == (void*)v) {
s->uniquing_super.items[i] = arraylist_pop(&s->uniquing_super);
arraylist_push(&s->uniquing_types, (void*)(uintptr_t)(reloc_offset|3));
}
}
}
else {
assert(jl_is_datatype_singleton(t) && "unreachable");
}
}
Expand Down Expand Up @@ -1589,6 +1600,9 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
ios_write(s->const_data, (char*)&dyn, sizeof(jl_fielddescdyn_t));
}
}
void *superidx = ptrhash_get(&serialization_order, dt->super);
if (s->incremental && superidx != HT_NOTFOUND && (char*)superidx - 1 - (char*)HT_NOTFOUND > item && needs_uniquing((jl_value_t*)dt->super))
arraylist_push(&s->uniquing_super, dt->super);
}
else if (jl_is_typename(v)) {
assert(f == s->s);
Expand Down Expand Up @@ -1633,6 +1647,7 @@ static void jl_write_values(jl_serializer_state *s) JL_GC_DISABLED
}
}
}
assert(s->uniquing_super.len == 0);
}

// In deserialization, create Symbols and set up the
Expand Down Expand Up @@ -2397,7 +2412,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
}
htable_new(&serialization_order, 25000);
htable_new(&unique_ready, 0);
htable_new(&nullptrs, 0);
arraylist_new(&object_worklist, 0);
arraylist_new(&serialization_queue, 0);
Expand All @@ -2420,6 +2434,7 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
arraylist_new(&s.relocs_list, 0);
arraylist_new(&s.gctags_list, 0);
arraylist_new(&s.uniquing_types, 0);
arraylist_new(&s.uniquing_super, 0);
arraylist_new(&s.uniquing_objs, 0);
arraylist_new(&s.fixup_types, 0);
arraylist_new(&s.fixup_objs, 0);
Expand Down Expand Up @@ -2652,6 +2667,11 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
arraylist_free(&object_worklist);
arraylist_free(&serialization_queue);
arraylist_free(&layout_table);
arraylist_free(&s.uniquing_types);
arraylist_free(&s.uniquing_super);
arraylist_free(&s.uniquing_objs);
arraylist_free(&s.fixup_types);
arraylist_free(&s.fixup_objs);
arraylist_free(&s.ccallable_list);
arraylist_free(&s.relocs_list);
arraylist_free(&s.gctags_list);
Expand All @@ -2661,7 +2681,6 @@ static void jl_save_system_image_to_stream(ios_t *f, jl_array_t *mod_array,
if (worklist)
htable_free(&external_objects);
htable_free(&serialization_order);
htable_free(&unique_ready);
htable_free(&nullptrs);
htable_free(&symbol_table);
htable_free(&fptr_to_id);
Expand Down Expand Up @@ -3026,31 +3045,43 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
uintptr_t item = (uintptr_t)s.uniquing_types.items[i];
// check whether we are operating on the typetag
// (needing to ignore GC bits) or a regular field
int tag = (item & 1) == 1;
// check whether this is a gvar index
int gvar = (item & 2) == 2;
// and check whether this is a gvar index
int tag = (item & 3);
item &= ~(uintptr_t)3;
uintptr_t *pfld;
jl_value_t **obj, *newobj;
if (gvar) {
if (tag == 3) {
obj = (jl_value_t**)(image_base + item);
pfld = NULL;
for (size_t i = 0; i < delay_list.len; i += 2) {
if (obj == (jl_value_t **)delay_list.items[i + 0]) {
pfld = (uintptr_t*)delay_list.items[i + 1];
delay_list.items[i + 1] = arraylist_pop(&delay_list);
delay_list.items[i + 0] = arraylist_pop(&delay_list);
break;
}
}
assert(pfld);
}
else if (tag == 2) {
if (image->gvars_base == NULL)
continue;
item >>= 2;
assert(item < s.gvar_record->size / sizeof(reloc_t));
pfld = sysimg_gvars(image->gvars_base, image->gvars_offsets, item);
obj = *(jl_value_t***)pfld;
assert(tag == 0);
}
else {
pfld = (uintptr_t*)(image_base + item);
if (tag)
if (tag == 1)
obj = (jl_value_t**)jl_typeof(jl_valueof(pfld));
else
obj = *(jl_value_t***)pfld;
if ((char*)obj > (char*)pfld) {
// this must be the super field
assert(tag == 0);
arraylist_push(&delay_list, pfld);
arraylist_push(&delay_list, obj);
arraylist_push(&delay_list, pfld);
ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid
*pfld = (uintptr_t)NULL;
continue;
Expand Down Expand Up @@ -3100,25 +3131,14 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
assert(newobj != jl_nothing);
arraylist_push(&cleanup_list, (void*)obj);
}
if (tag)
if (tag == 1)
*pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
else
*pfld = (uintptr_t)newobj;
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
assert(jl_typetagis(obj, otyp));
}
// A few fields (reached via super) might be self-recursive. This is rare, but handle them now.
// They cannot be instances though, since the type must fully exist before the singleton field can be allocated
for (size_t i = 0; i < delay_list.len; ) {
uintptr_t *pfld = (uintptr_t*)delay_list.items[i++];
jl_value_t **obj = (jl_value_t **)delay_list.items[i++];
assert(jl_is_datatype(obj));
jl_datatype_t *dt = (jl_datatype_t*)obj[0];
assert(jl_is_datatype(dt));
jl_value_t *newobj = (jl_value_t*)dt;
*pfld = (uintptr_t)newobj;
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
}
assert(delay_list.len == 0);
arraylist_free(&delay_list);
// now that all the fields of dt are assigned and unique, copy them into
// their final newdt memory location: this ensures we do not accidentally
Expand Down Expand Up @@ -3166,11 +3186,12 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl
for (size_t i = 0; i < s.uniquing_objs.len; i++) {
uintptr_t item = (uintptr_t)s.uniquing_objs.items[i];
// check whether this is a gvar index
int gvar = (item & 2) == 2;
int tag = (item & 3);
assert(tag == 0 || tag == 2);
item &= ~(uintptr_t)3;
uintptr_t *pfld;
jl_value_t **obj, *newobj;
if (gvar) {
if (tag == 2) {
if (image->gvars_base == NULL)
continue;
item >>= 2;
Expand Down
13 changes: 6 additions & 7 deletions src/staticdata_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,15 @@ int must_be_new_dt(jl_value_t *t, htable_t *news, char *image_base, size_t sizeo
jl_datatype_t *dt = (jl_datatype_t*)t;
assert(jl_object_in_image((jl_value_t*)dt->name) && "type_in_worklist mistake?");
jl_datatype_t *super = dt->super;
// check if super is news, since then we must be new also
// (it is also possible that super is indeterminate now, wait for `t`
// to be resolved, then will be determined later and fixed up by the
// delay_list, for this and any other references to it).
while (super != jl_any_type) {
assert(super);
// fast-path: check if super is in news, since then we must be new also
// (it is also possible that super is indeterminate or NULL right now,
// waiting for `t` to be resolved, then will be determined later as
// soon as possible afterwards).
while (super != NULL && super != jl_any_type) {
if (ptrhash_has(news, (void*)super))
return 1;
if (!(image_base < (char*)super && (char*)super <= image_base + sizeof_sysimg))
break; // fast-path for rejection of super
break; // the rest must all be non-new
// otherwise super might be something that was not cached even though a later supertype might be
// for example while handling `Type{Mask{4, U} where U}`, if we have `Mask{4, U} <: AbstractSIMDVector{4}`
super = super->super;
Expand Down
6 changes: 5 additions & 1 deletion test/precompile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ precompile_test_harness(false) do dir
d = den(a)
return h
end
abstract type AbstractAlgebraMap{A} end
struct GAPGroupHomomorphism{A, B} <: AbstractAlgebraMap{GAPGroupHomomorphism{B, A}} end
end
""")
write(Foo2_file,
Expand All @@ -130,7 +132,7 @@ precompile_test_harness(false) do dir
write(Foo_file,
"""
module $Foo_module
import $FooBase_module, $FooBase_module.typeA
import $FooBase_module, $FooBase_module.typeA, $FooBase_module.GAPGroupHomomorphism
import $Foo2_module: $Foo2_module, override, overridenc
import $FooBase_module.hash
import Test
Expand Down Expand Up @@ -211,6 +213,8 @@ precompile_test_harness(false) do dir
Base.convert(::Type{Some{Value18343}}, ::Value18343{Some}) = 2
Base.convert(::Type{Ref}, ::Value18343{T}) where {T} = 3
const GAPType1 = GAPGroupHomomorphism{Nothing, Nothing}
const GAPType2 = GAPGroupHomomorphism{1, 2}
# issue #28297
mutable struct Result
Expand Down

0 comments on commit cc26004

Please sign in to comment.