Skip to content

Commit

Permalink
Mark phase prefetching. (#73375)
Browse files Browse the repository at this point in the history
This adds prefetching to the mark phase.

The idea is that once we have established that an object is in one of the generations we want to collect, we prefetch its memory before we determine whether we have marked it already. This is because the mark bit is in the object itself, and thus requires accessing the object's memory.

As the prefetching will take some time to take effect, we park the object in a queue (see type mark_queue_t below). We then retrieve an older object from the queue, and test whether it has been marked. This should be faster, because we have issued a prefetch for this older object's memory a while back.

In quite a few places we now need to drain the queue to ensure correctness - see calls to drain_mark_queue().
  • Loading branch information
PeterSolMS authored Aug 9, 2022
1 parent b8554be commit 4570911
Show file tree
Hide file tree
Showing 2 changed files with 221 additions and 18 deletions.
215 changes: 197 additions & 18 deletions src/coreclr/gc/gc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2854,6 +2854,8 @@ size_t gc_heap::expand_mechanisms_per_heap[max_expand_mechanisms_count];

size_t gc_heap::interesting_mechanism_bits_per_heap[max_gc_mechanism_bits_count];

mark_queue_t gc_heap::mark_queue;

#endif // MULTIPLE_HEAPS

/* end of per heap static initialization */
Expand Down Expand Up @@ -23453,7 +23455,7 @@ BOOL gc_heap::gc_mark (uint8_t* o, uint8_t* low, uint8_t* high, int condemned_ge
{
return FALSE;
}
set_marked(o);
set_marked (o);
return TRUE;
}
return FALSE;
Expand Down Expand Up @@ -23777,14 +23779,31 @@ void gc_heap::save_post_plug_info (uint8_t* last_pinned_plug, uint8_t* last_obje
}
}

//#define PREFETCH
// enable on processors known to have a useful prefetch instruction
#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
#define PREFETCH
#endif

#ifdef PREFETCH
__declspec(naked) void __fastcall Prefetch(void* addr)
inline void Prefetch(void* addr)
{
__asm {
PREFETCHT0 [ECX]
ret
};
#ifdef TARGET_WINDOWS

#if defined(TARGET_AMD64) || defined(TARGET_X86)

#ifndef _MM_HINT_T0
#define _MM_HINT_T0 1
#endif
_mm_prefetch((const char*)addr, _MM_HINT_T0);
#elif defined(TARGET_ARM64)
__prefetch((const char*)addr);
#endif //defined(TARGET_AMD64) || defined(TARGET_X86)

#elif defined(TARGET_UNIX) || defined(TARGET_OSX)
__builtin_prefetch(addr);
#else //!(TARGET_WINDOWS || TARGET_UNIX || TARGET_OSX)
UNREFERENCED_PARAMETER(addr);
#endif //TARGET_WINDOWS
}
#else //PREFETCH
inline void Prefetch (void* addr)
Expand Down Expand Up @@ -23840,6 +23859,114 @@ BOOL ref_p (uint8_t* r)
return (straight_ref_p (r) || partial_object_p (r));
}

mark_queue_t::mark_queue_t() : curr_slot_index(0)
{
for (size_t i = 0; i < slot_count; i++)
{
slot_table[i] = nullptr;
}
}

// place an object in the mark queue
// returns a *different* object or nullptr
// if a non-null object is returned, that object is newly marked
// object o *must* be in a condemned generation
FORCEINLINE
uint8_t *mark_queue_t::queue_mark(uint8_t *o)
{
Prefetch (o);

// while the prefetch is taking effect, park our object in the queue
// and fetch an object that has been sitting in the queue for a while
// and where (hopefully) the memory is already in the cache
size_t slot_index = curr_slot_index;
uint8_t* old_o = slot_table[slot_index];
slot_table[slot_index] = o;

curr_slot_index = (slot_index + 1) % slot_count;
if (old_o == nullptr)
return nullptr;

// this causes us to access the method table pointer of the old object
BOOL already_marked = marked (old_o);
if (already_marked)
{
return nullptr;
}
set_marked (old_o);
return old_o;
}

// place an object in the mark queue
// returns a *different* object or nullptr
// if a non-null object is returned, that object is newly marked
// check first whether the object o is indeed in a condemned generation
FORCEINLINE
uint8_t *mark_queue_t::queue_mark(uint8_t *o, int condemned_gen)
{
#ifdef USE_REGIONS
if (!is_in_heap_range (o))
{
return nullptr;
}
if (condemned_gen != max_generation && gc_heap::get_region_gen_num (o) > condemned_gen)
{
return nullptr;
}
return queue_mark(o);
#else //USE_REGIONS
assert (condemned_gen == -1);

#ifdef MULTIPLE_HEAPS
if (o)
{
gc_heap* hp = gc_heap::heap_of_gc (o);
assert (hp);
if ((o >= hp->gc_low) && (o < hp->gc_high))
return queue_mark (o);
}
#else //MULTIPLE_HEAPS
if ((o >= gc_heap::gc_low) && (o < gc_heap::gc_high))
return queue_mark (o);
#endif //MULTIPLE_HEAPS
return nullptr;
#endif //USE_REGIONS
}

// retrieve a newly marked object from the queue
// returns nullptr if there is no such object
uint8_t* mark_queue_t::get_next_marked()
{
size_t slot_index = curr_slot_index;
size_t empty_slot_count = 0;
while (empty_slot_count < slot_count)
{
uint8_t* o = slot_table[slot_index];
slot_table[slot_index] = nullptr;
slot_index = (slot_index + 1) % slot_count;
if (o != nullptr)
{
BOOL already_marked = marked (o);
if (!already_marked)
{
set_marked (o);
curr_slot_index = slot_index;
return o;
}
}
empty_slot_count++;
}
return nullptr;
}

mark_queue_t::~mark_queue_t()
{
for (size_t slot_index = 0; slot_index < slot_count; slot_index++)
{
assert(slot_table[slot_index] == nullptr);
}
}

void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL)
{
SERVER_SC_MARK_VOLATILE(uint8_t*)* mark_stack_tos = (SERVER_SC_MARK_VOLATILE(uint8_t*)*)mark_stack_array;
Expand Down Expand Up @@ -23899,9 +24026,8 @@ void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL

go_through_object_cl (method_table(oo), oo, s, ppslot,
{
uint8_t* o = *ppslot;
Prefetch(o);
if (gc_mark (o, gc_low, gc_high, condemned_gen))
uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen);
if (o != nullptr)
{
if (full_p)
{
Expand Down Expand Up @@ -23997,9 +24123,8 @@ void gc_heap::mark_object_simple1 (uint8_t* oo, uint8_t* start THREAD_NUMBER_DCL
go_through_object (method_table(oo), oo, s, ppslot,
start, use_start, (oo + s),
{
uint8_t* o = *ppslot;
Prefetch(o);
if (gc_mark (o, gc_low, gc_high,condemned_gen))
uint8_t* o = mark_queue.queue_mark(*ppslot, condemned_gen);
if (o != nullptr)
{
if (full_p)
{
Expand Down Expand Up @@ -24438,16 +24563,17 @@ gc_heap::mark_object_simple (uint8_t** po THREAD_NUMBER_DCL)
snoop_stat.objects_checked_count++;
#endif //SNOOP_STATS

if (gc_mark1 (o))
o = mark_queue.queue_mark (o);
if (o != nullptr)
{
m_boundary (o);
size_t s = size (o);
add_to_promoted_bytes (o, s, thread);
{
go_through_object_cl (method_table(o), o, s, poo,
{
uint8_t* oo = *poo;
if (gc_mark (oo, gc_low, gc_high, condemned_gen))
uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen);
if (oo != nullptr)
{
m_boundary (oo);
add_to_promoted_bytes (oo, thread);
Expand Down Expand Up @@ -24484,6 +24610,45 @@ void gc_heap::mark_object (uint8_t* o THREAD_NUMBER_DCL)
#endif //USE_REGIONS
}

void gc_heap::drain_mark_queue ()
{
int condemned_gen =
#ifdef USE_REGIONS
settings.condemned_generation;
#else
-1;
#endif //USE_REGIONS

#ifdef MULTIPLE_HEAPS
THREAD_FROM_HEAP;
#else
const int thread = 0;
#endif //MULTIPLE_HEAPS

uint8_t* o;
while ((o = mark_queue.get_next_marked()) != nullptr)
{
m_boundary (o);
size_t s = size (o);
add_to_promoted_bytes (o, s, thread);
if (contain_pointers_or_collectible (o))
{
go_through_object_cl (method_table(o), o, s, poo,
{
uint8_t* oo = mark_queue.queue_mark(*poo, condemned_gen);
if (oo != nullptr)
{
m_boundary (oo);
add_to_promoted_bytes (oo, thread);
if (contain_pointers_or_collectible (oo))
mark_object_simple1 (oo, oo THREAD_NUMBER_ARG);
}
}
);
}
}
}

#ifdef BACKGROUND_GC

#ifdef USE_REGIONS
Expand Down Expand Up @@ -25660,6 +25825,8 @@ void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc,
if (GCScan::GcDhUnpromotedHandlesExist(sc))
s_fUnpromotedHandles = TRUE;

drain_mark_queue();

// Synchronize all the threads so we can read our state variables safely. The shared variable
// s_fScanRequired, indicating whether we should scan the tables or terminate the loop, will be set by
// a single thread inside the join.
Expand Down Expand Up @@ -25761,6 +25928,8 @@ void gc_heap::scan_dependent_handles (int condemned_gen_number, ScanContext *sc,
if (process_mark_overflow(condemned_gen_number))
fUnscannedPromotions = true;

drain_mark_queue();

// Perform the scan and set the flag if any promotions resulted.
if (GCScan::GcDhReScan(sc))
fUnscannedPromotions = true;
Expand Down Expand Up @@ -26187,6 +26356,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
if ((condemned_gen_number == max_generation) && (num_sizedrefs > 0))
{
GCScan::GcScanSizedRefs(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_SIZEDREF, current_promoted_bytes, last_promoted_bytes);

#ifdef MULTIPLE_HEAPS
Expand All @@ -26210,26 +26380,30 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
GCScan::GcScanRoots(GCHeap::Promote,
condemned_gen_number, max_generation,
&sc);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_STACK, current_promoted_bytes, last_promoted_bytes);

#ifdef BACKGROUND_GC
if (gc_heap::background_running_p())
{
scan_background_roots (GCHeap::Promote, heap_number, &sc);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_BGC, current_promoted_bytes, last_promoted_bytes);
}
#endif //BACKGROUND_GC

#ifdef FEATURE_PREMORTEM_FINALIZATION
dprintf(3, ("Marking finalization data"));
finalize_queue->GcScanRoots(GCHeap::Promote, heap_number, 0);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_FQ, current_promoted_bytes, last_promoted_bytes);
#endif // FEATURE_PREMORTEM_FINALIZATION

dprintf(3,("Marking handle table"));
GCScan::GcScanHandles(GCHeap::Promote,
condemned_gen_number, max_generation,
&sc);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_HANDLES, current_promoted_bytes, last_promoted_bytes);

if (!full_p)
Expand Down Expand Up @@ -26341,6 +26515,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
update_old_card_survived();
#endif //USE_REGIONS

drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_OLDER, current_promoted_bytes, last_promoted_bytes);
}
}
Expand All @@ -26349,6 +26524,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
if (do_mark_steal_p)
{
mark_steal();
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_STEAL, current_promoted_bytes, last_promoted_bytes);
}
#endif //MH_SC_MARK
Expand All @@ -26362,6 +26538,7 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
// handle table has been fully promoted.
GCScan::GcDhInitialScan(GCHeap::Promote, condemned_gen_number, max_generation, &sc);
scan_dependent_handles(condemned_gen_number, &sc, true);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes);

#ifdef MULTIPLE_HEAPS
Expand Down Expand Up @@ -26444,12 +26621,14 @@ void gc_heap::mark_phase (int condemned_gen_number, BOOL mark_only_p)
#ifdef FEATURE_PREMORTEM_FINALIZATION
dprintf (3, ("Finalize marking"));
finalize_queue->ScanForFinalization (GCHeap::Promote, condemned_gen_number, mark_only_p, __this);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_NEW_FQ, current_promoted_bytes, last_promoted_bytes);
GCToEEInterface::DiagWalkFReachableObjects(__this);

// Scan dependent handles again to promote any secondaries associated with primaries that were promoted
// for finalization. As before scan_dependent_handles will also process any mark stack overflow.
scan_dependent_handles(condemned_gen_number, &sc, false);
drain_mark_queue();
fire_mark_event (ETW::GC_ROOT_DH_HANDLES, current_promoted_bytes, last_promoted_bytes);
#endif //FEATURE_PREMORTEM_FINALIZATION

Expand Down Expand Up @@ -31927,7 +32106,7 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
assert (candidate < tree);
candidate = tree;
tree = tree + cn;
Prefetch (tree - 8);
Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
continue;
}
else
Expand All @@ -31938,7 +32117,7 @@ uint8_t* tree_search (uint8_t* tree, uint8_t* old_address)
if ((cn = node_left_child (tree)) != 0)
{
tree = tree + cn;
Prefetch (tree - 8);
Prefetch (&((plug_and_pair*)tree)[-1].m_pair.left);
continue;
}
else
Expand Down
Loading

0 comments on commit 4570911

Please sign in to comment.