Skip to content

Commit 485ed8e

Browse files
kgmichaelgsharp
authored andcommitted
[mono] Optimize startup vtable setup (dotnet#101312)
* Add new [ptr, ptr] -> ptr simdhash variant for caching * Cache mono_class_implement_interface_slow because we perform many redundant calls to it during application startup * Verify cache in checked builds
1 parent 6c90aae commit 485ed8e

8 files changed

+196
-9
lines changed

src/mono/mono/metadata/CMakeLists.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,8 @@ endif()
4545
set(imported_native_sources
4646
../../../native/containers/dn-simdhash.c
4747
../../../native/containers/dn-simdhash-string-ptr.c
48-
../../../native/containers/dn-simdhash-u32-ptr.c)
48+
../../../native/containers/dn-simdhash-u32-ptr.c
49+
../../../native/containers/dn-simdhash-ptrpair-ptr.c)
4950

5051
set(metadata_common_sources
5152
appdomain.c

src/mono/mono/metadata/class-setup-vtable.c

+8-1
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,13 @@ mono_method_get_method_definition (MonoMethod *method)
773773
static gboolean
774774
verify_class_overrides (MonoClass *klass, MonoMethod **overrides, int onum)
775775
{
776+
// on windows and arm, we define NDEBUG for release builds
777+
// on browser and wasi, we define DEBUG for debug builds
778+
#ifdef ENABLE_CHECKED_BUILD
779+
if (klass->image == mono_defaults.corlib)
780+
return TRUE;
781+
#endif
782+
776783
int i;
777784

778785
for (i = 0; i < onum; ++i) {
@@ -1760,7 +1767,7 @@ mono_class_setup_vtable_general (MonoClass *klass, MonoMethod **overrides, int o
17601767
MonoMethod *override = iface_overrides [i*2 + 1];
17611768
if (mono_class_is_gtd (override->klass)) {
17621769
override = mono_class_inflate_generic_method_full_checked (override, ic, mono_class_get_context (ic), error);
1763-
}
1770+
}
17641771
// there used to be code here to inflate decl if decl->is_inflated, but in https://github.com/dotnet/runtime/pull/64102#discussion_r790019545 we
17651772
// think that this does not correspond to any real code.
17661773
if (!apply_override (klass, ic, vtable, decl, override, &override_map, &override_class_map, &conflict_map))

src/mono/mono/metadata/class.c

+111-5
Original file line numberDiff line numberDiff line change
@@ -4331,12 +4331,16 @@ mono_class_is_variant_compatible_slow (MonoClass *klass, MonoClass *oklass)
43314331
}
43324332
return TRUE;
43334333
}
4334-
/*Check if @candidate implements the interface @target*/
4334+
43354335
static gboolean
4336-
mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate)
4336+
mono_class_implement_interface_slow_cached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache);
4337+
4338+
static gboolean
4339+
mono_class_implement_interface_slow_uncached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache)
43374340
{
43384341
ERROR_DECL (error);
43394342
int i;
4343+
43404344
gboolean is_variant = mono_class_has_variant_generic_params (target);
43414345

43424346
if (is_variant && MONO_CLASS_IS_INTERFACE_INTERNAL (candidate)) {
@@ -4365,7 +4369,7 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate)
43654369
return TRUE;
43664370
if (is_variant && mono_class_is_variant_compatible_slow (target, iface_class))
43674371
return TRUE;
4368-
if (mono_class_implement_interface_slow (target, iface_class))
4372+
if (mono_class_implement_interface_slow_cached (target, iface_class, cache))
43694373
return TRUE;
43704374
}
43714375
}
@@ -4390,7 +4394,7 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate)
43904394
if (is_variant && mono_class_is_variant_compatible_slow (target, candidate_interfaces [i]))
43914395
return TRUE;
43924396

4393-
if (mono_class_implement_interface_slow (target, candidate_interfaces [i]))
4397+
if (mono_class_implement_interface_slow_cached (target, candidate_interfaces [i], cache))
43944398
return TRUE;
43954399
}
43964400
}
@@ -4400,6 +4404,107 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate)
44004404
return FALSE;
44014405
}
44024406

4407+
// #define LOG_INTERFACE_CACHE_HITS 1
4408+
4409+
#if LOG_INTERFACE_CACHE_HITS
4410+
static gint64 implement_interface_hits = 0, implement_interface_misses = 0;
4411+
4412+
static void
4413+
log_hit_rate (dn_simdhash_ptrpair_ptr_t *cache)
4414+
{
4415+
gint64 total_calls = implement_interface_hits + implement_interface_misses;
4416+
if ((total_calls % 500) != 0)
4417+
return;
4418+
double hit_rate = implement_interface_hits * 100.0 / total_calls;
4419+
g_printf ("implement_interface cache hit rate: %f (%lld total calls). Overflow count: %u\n", hit_rate, total_calls, dn_simdhash_overflow_count (cache));
4420+
}
4421+
#endif
4422+
4423+
static gboolean
4424+
mono_class_implement_interface_slow_cached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache)
4425+
{
4426+
gpointer cached_result = NULL;
4427+
dn_ptrpair_t key = { target, candidate };
4428+
gboolean result = 0, cache_hit = 0;
4429+
4430+
// Skip the caching logic for exact matches
4431+
if (candidate == target)
4432+
return TRUE;
4433+
4434+
cache_hit = dn_simdhash_ptrpair_ptr_try_get_value (cache, key, &cached_result);
4435+
if (cache_hit) {
4436+
// Testing shows a cache hit rate of 60% on S.R.Tests and S.T.J.Tests,
4437+
// and 40-50% for small app startup. Near-zero overflow count.
4438+
#if LOG_INTERFACE_CACHE_HITS
4439+
implement_interface_hits++;
4440+
log_hit_rate (cache);
4441+
#endif
4442+
result = (cached_result != NULL);
4443+
#ifndef ENABLE_CHECKED_BUILD
4444+
return result;
4445+
#endif
4446+
}
4447+
4448+
gboolean uncached_result = mono_class_implement_interface_slow_uncached (target, candidate, cache);
4449+
4450+
if (!cache_hit) {
4451+
#if LOG_INTERFACE_CACHE_HITS
4452+
implement_interface_misses++;
4453+
log_hit_rate (cache);
4454+
#endif
4455+
dn_simdhash_ptrpair_ptr_try_add (cache, key, uncached_result ? GUINT_TO_POINTER(1) : NULL);
4456+
}
4457+
4458+
#ifdef ENABLE_CHECKED_BUILD
4459+
if (cache_hit) {
4460+
if (result != uncached_result)
4461+
g_print (
4462+
"Cache mismatch for %s.%s and %s.%s: cached=%d, uncached=%d\n",
4463+
m_class_get_name_space (target), m_class_get_name (target),
4464+
m_class_get_name_space (candidate), m_class_get_name (candidate),
4465+
result, uncached_result
4466+
);
4467+
g_assert (result == uncached_result);
4468+
}
4469+
#endif
4470+
return uncached_result;
4471+
}
4472+
4473+
static dn_simdhash_ptrpair_ptr_t *implement_interface_scratch_cache = NULL;
4474+
4475+
/*Check if @candidate implements the interface @target*/
4476+
static gboolean
4477+
mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate)
4478+
{
4479+
gpointer cas_result;
4480+
gboolean result;
4481+
dn_simdhash_ptrpair_ptr_t *cache = (dn_simdhash_ptrpair_ptr_t *)mono_atomic_xchg_ptr ((volatile gpointer *)&implement_interface_scratch_cache, NULL);
4482+
if (!cache)
4483+
// Roughly 64KB of memory usage and big enough to have fast lookups
4484+
// Smaller is viable but makes the hit rate worse
4485+
cache = dn_simdhash_ptrpair_ptr_new (2048, NULL);
4486+
else if (dn_simdhash_count (cache) >= 2250) {
4487+
// FIXME: 2250 is arbitrary (roughly 256 11-item buckets w/load factor)
4488+
// One step down reduces hit rate by approximately 2-4%
4489+
// HACK: Only clear the scratch cache once it gets too big.
4490+
// The pattern is that (especially during startup), we have lots
4491+
// of mono_class_implement_interface_slow calls back to back that
4492+
// perform similar checks, so keeping the cache data around between
4493+
// sequential calls will potentially optimize them a lot.
4494+
dn_simdhash_clear (cache);
4495+
}
4496+
4497+
result = mono_class_implement_interface_slow_cached (target, candidate, cache);
4498+
4499+
// Under most circumstances we won't have multiple threads competing to run implement_interface_slow,
4500+
// so it's not worth making this thread-local and potentially keeping a cache instance around per-thread.
4501+
cas_result = mono_atomic_cas_ptr ((volatile gpointer *)&implement_interface_scratch_cache, cache, NULL);
4502+
if (cas_result != NULL)
4503+
dn_simdhash_free (cache);
4504+
4505+
return result;
4506+
}
4507+
44034508
/*
44044509
* Check if @oklass can be assigned to @klass.
44054510
* This function does the same as mono_class_is_assignable_from_internal but is safe to be used from mono_class_init_internal context.
@@ -4416,8 +4521,9 @@ mono_class_is_assignable_from_slow (MonoClass *target, MonoClass *candidate)
44164521
return TRUE;
44174522

44184523
/*If target is not an interface there is no need to check them.*/
4419-
if (MONO_CLASS_IS_INTERFACE_INTERNAL (target))
4524+
if (MONO_CLASS_IS_INTERFACE_INTERNAL (target)) {
44204525
return mono_class_implement_interface_slow (target, candidate);
4526+
}
44214527

44224528
if (m_class_is_delegate (target) && mono_class_has_variant_generic_params (target))
44234529
return mono_class_is_variant_compatible (target, candidate, FALSE);

src/native/containers/containers.cmake

+2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ list(APPEND SHARED_CONTAINER_SOURCES
1313
# dn-simdhash-string-ptr.c
1414
# dn-simdhash-u32-ptr.c
1515
# dn-simdhash-ptr-ptr.c
16+
# dn-simdhash-ght-compatible.c
17+
# dn-simdhash-ptrpair-ptr.c
1618
)
1719

1820
list(APPEND SHARED_CONTAINER_HEADERS
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
4+
#include <config.h>
5+
#include "dn-simdhash.h"
6+
7+
#include "dn-simdhash-utils.h"
8+
9+
typedef struct dn_ptrpair_t {
10+
void *first;
11+
void *second;
12+
} dn_ptrpair_t;
13+
14+
static inline uint32_t
15+
dn_ptrpair_t_hash (dn_ptrpair_t key)
16+
{
17+
return (MurmurHash3_32_ptr(key.first, 0) ^ MurmurHash3_32_ptr(key.second, 1));
18+
}
19+
20+
static inline uint8_t
21+
dn_ptrpair_t_equals (dn_ptrpair_t lhs, dn_ptrpair_t rhs)
22+
{
23+
return (lhs.first == rhs.first) && (lhs.second == rhs.second);
24+
}
25+
26+
#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr
27+
#define DN_SIMDHASH_KEY_T dn_ptrpair_t
28+
#define DN_SIMDHASH_VALUE_T void *
29+
#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_ptrpair_t_hash(key)
30+
#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs)
31+
#if SIZEOF_VOID_P == 8
32+
// 192 bytes holds 12 16-byte blocks, so 11 keys and one suffix table
33+
#define DN_SIMDHASH_BUCKET_CAPACITY 11
34+
#else
35+
// 128 bytes holds 16 8-byte blocks, so 14 keys and one suffix table
36+
#define DN_SIMDHASH_BUCKET_CAPACITY 14
37+
#endif
38+
39+
#include "dn-simdhash-specialization.h"

src/native/containers/dn-simdhash-specializations.h

+15
Original file line numberDiff line numberDiff line change
@@ -59,4 +59,19 @@ typedef struct dn_simdhash_str_key dn_simdhash_str_key;
5959

6060
#include "dn-simdhash-ght-compatible.h"
6161

62+
63+
typedef struct dn_ptrpair_t {
64+
void *first, *second;
65+
} dn_ptrpair_t;
66+
67+
#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr
68+
#define DN_SIMDHASH_KEY_T dn_ptrpair_t
69+
#define DN_SIMDHASH_VALUE_T void *
70+
71+
#include "dn-simdhash-specialization-declarations.h"
72+
73+
#undef DN_SIMDHASH_T
74+
#undef DN_SIMDHASH_KEY_T
75+
#undef DN_SIMDHASH_VALUE_T
76+
6277
#endif

src/native/containers/dn-simdhash.c

+14-2
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,7 @@ dn_simdhash_clear (dn_simdhash_t *hash)
119119
if (hash->vtable.destroy_all)
120120
hash->vtable.destroy_all(hash);
121121
hash->count = 0;
122-
// TODO: Scan through buckets sequentially and only erase ones with data in them
123-
// Maybe skip erasing the key slots too?
122+
// TODO: Implement a fast clear algorithm that scans buckets and only clears ones w/nonzero count
124123
memset(hash->buffers.buckets, 0, hash->buffers.buckets_length * hash->meta->bucket_size_bytes);
125124
// Skip this for performance; memset is especially slow in wasm
126125
// memset(hash->buffers.values, 0, hash->buffers.values_length * hash->meta->value_size);
@@ -140,6 +139,19 @@ dn_simdhash_count (dn_simdhash_t *hash)
140139
return hash->count;
141140
}
142141

142+
uint32_t
143+
dn_simdhash_overflow_count (dn_simdhash_t *hash)
144+
{
145+
assert(hash);
146+
uint32_t result = 0;
147+
for (uint32_t bucket_index = 0; bucket_index < hash->buffers.buckets_length; bucket_index++) {
148+
uint8_t *suffixes = ((uint8_t *)hash->buffers.buckets) + (bucket_index * hash->meta->bucket_size_bytes);
149+
uint8_t cascade_count = suffixes[DN_SIMDHASH_CASCADED_SLOT];
150+
result += cascade_count;
151+
}
152+
return result;
153+
}
154+
143155
void
144156
dn_simdhash_ensure_capacity (dn_simdhash_t *hash, uint32_t capacity)
145157
{

src/native/containers/dn-simdhash.h

+5
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,11 @@ dn_simdhash_capacity (dn_simdhash_t *hash);
144144
uint32_t
145145
dn_simdhash_count (dn_simdhash_t *hash);
146146

147+
// Returns the estimated number of items that have overflowed out of a bucket.
148+
// WARNING: This is expensive to calculate.
149+
uint32_t
150+
dn_simdhash_overflow_count (dn_simdhash_t *hash);
151+
147152
// Automatically resizes the table if it is too small to hold the requested number
148153
// of items. Will not shrink the table if it is already bigger.
149154
void

0 commit comments

Comments
 (0)