Skip to content

Commit 25af093

Browse files
kgRuihan-Yin
authored andcommitted
[wasm] Optimize bundled_resources key creation, hashing, and comparison (dotnet#101460)
Optimize bundled_resources key creation and hashing Migrate bundled_resources to simdhash
1 parent f1600ff commit 25af093

File tree

5 files changed

+80
-41
lines changed

5 files changed

+80
-41
lines changed

src/mono/mono/eglib/eglib-remap.h

+1
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@
9898
#define g_log_set_fatal_mask monoeg_g_log_set_fatal_mask
9999
#define g_logv monoeg_g_logv
100100
#define g_memdup monoeg_g_memdup
101+
#define g_memrchr monoeg_g_memrchr
101102
#define g_mem_set_vtable monoeg_g_mem_set_vtable
102103
#define g_mem_get_vtable monoeg_g_mem_get_vtable
103104
#define g_mkdtemp monoeg_g_mkdtemp

src/mono/mono/eglib/glib.h

+1
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,7 @@ gchar *g_strchug (gchar *str);
375375
gchar *g_strchomp (gchar *str);
376376
gchar *g_strnfill (gsize length, gchar fill_char);
377377
gsize g_strnlen (const char*, gsize);
378+
const gchar *g_memrchr (const char *s, char c, size_t n);
378379

379380
void g_strdelimit (char *string, char delimiter, char new_delimiter);
380381

src/mono/mono/eglib/gstr.c

+9
Original file line numberDiff line numberDiff line change
@@ -783,3 +783,12 @@ g_strnlen (const char* s, gsize n)
783783
for (i = 0; i < n && s [i]; ++i) ;
784784
return i;
785785
}
786+
787+
const gchar *
788+
g_memrchr (const char *s, char c, size_t n)
789+
{
790+
while (n--)
791+
if (s[n] == c)
792+
return (void *)(s + n);
793+
return NULL;
794+
}

src/mono/mono/metadata/bundled-resources-internals.h

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ typedef enum {
1717

1818
typedef void (*free_bundled_resource_func)(void *, void*);
1919

20+
// WARNING: The layout of these structs cannot change because EmitBundleBase.cs depends on it!
2021
typedef struct _MonoBundledResource {
2122
MonoBundledResourceType type;
2223
const char *id;

src/mono/mono/metadata/bundled-resources.c

+68-41
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
#include <mono/metadata/appdomain.h>
99
#include <mono/metadata/bundled-resources-internals.h>
1010
#include <mono/metadata/webcil-loader.h>
11+
#include "../native/containers/dn-simdhash-specializations.h"
12+
#include "../native/containers/dn-simdhash-utils.h"
1113

12-
static GHashTable *bundled_resources = NULL;
14+
static dn_simdhash_ght_t *bundled_resources = NULL;
15+
static dn_simdhash_ptr_ptr_t *bundled_resource_key_lookup_table = NULL;
1316
static bool bundled_resources_contains_assemblies = false;
1417
static bool bundled_resources_contains_satellite_assemblies = false;
1518

@@ -31,8 +34,10 @@ mono_bundled_resources_free (void)
3134
{
3235
g_assert (mono_runtime_is_shutting_down ());
3336

34-
g_hash_table_destroy (bundled_resources);
37+
dn_simdhash_free (bundled_resources);
38+
dn_simdhash_free (bundled_resource_key_lookup_table);
3539
bundled_resources = NULL;
40+
bundled_resource_key_lookup_table = NULL;
3641

3742
bundled_resources_contains_assemblies = false;
3843
bundled_resources_contains_satellite_assemblies = false;
@@ -50,6 +55,12 @@ bundled_resources_value_destroy_func (void *resource)
5055
MonoBundledResource *value = (MonoBundledResource *)resource;
5156
if (value->free_func)
5257
value->free_func (resource, value->free_data);
58+
59+
char *key;
60+
if (dn_simdhash_ptr_ptr_try_get_value (bundled_resource_key_lookup_table, (void *)value->id, (void **)&key)) {
61+
dn_simdhash_ptr_ptr_try_remove (bundled_resource_key_lookup_table, (void *)value->id);
62+
g_free (key);
63+
}
5364
}
5465

5566
static bool
@@ -62,48 +73,51 @@ bundled_resources_is_known_assembly_extension (const char *ext)
6273
#endif
6374
}
6475

65-
static gboolean
66-
bundled_resources_resource_id_equal (const char *id_one, const char *id_two)
76+
// If a bundled resource has a known assembly extension, we strip the extension from its name
77+
// This ensures that lookups for foo.dll will work even if the assembly is in a webcil container
78+
static char *
79+
key_from_id (const char *id, char *buffer, guint buffer_len)
6780
{
68-
const char *extension_one = strrchr (id_one, '.');
69-
const char *extension_two = strrchr (id_two, '.');
70-
if (extension_one && extension_two && bundled_resources_is_known_assembly_extension (extension_one) && bundled_resources_is_known_assembly_extension (extension_two)) {
71-
size_t len_one = extension_one - id_one;
72-
size_t len_two = extension_two - id_two;
73-
return (len_one == len_two) && !strncmp (id_one, id_two, len_one);
81+
size_t id_length = strlen (id),
82+
extension_offset = -1;
83+
const char *extension = g_memrchr (id, '.', id_length);
84+
if (extension)
85+
extension_offset = extension - id;
86+
if (!buffer) {
87+
// Add space for .dll and null terminator
88+
buffer_len = (guint)(id_length + 6);
89+
buffer = g_malloc (buffer_len);
7490
}
91+
buffer[0] = 0;
7592

76-
return !strcmp (id_one, id_two);
93+
if (extension_offset && bundled_resources_is_known_assembly_extension (extension)) {
94+
// Subtract from buffer_len to make sure we have space for .dll
95+
g_strlcpy (buffer, id, MIN(buffer_len - 4, extension_offset + 2));
96+
strcat (buffer, "dll");
97+
} else {
98+
g_strlcpy (buffer, id, MIN(buffer_len, id_length + 1));
99+
}
100+
101+
return buffer;
77102
}
78103

79-
static guint
80-
bundled_resources_resource_id_hash (const char *id)
104+
static gboolean
105+
bundled_resources_resource_id_equal (const char *key_one, const char *key_two)
81106
{
82-
const char *current = id;
83-
const char *extension = NULL;
84-
guint previous_hash = 0;
85-
guint hash = 0;
86-
87-
while (*current) {
88-
hash = (hash << 5) - (hash + *current);
89-
if (*current == '.') {
90-
extension = current;
91-
previous_hash = hash;
92-
}
93-
current++;
94-
}
95-
96-
// alias all extensions to .dll
97-
if (extension && bundled_resources_is_known_assembly_extension (extension)) {
98-
hash = previous_hash;
99-
hash = (hash << 5) - (hash + 'd');
100-
hash = (hash << 5) - (hash + 'l');
101-
hash = (hash << 5) - (hash + 'l');
102-
}
107+
return strcmp (key_one, key_two) == 0;
108+
}
103109

104-
return hash;
110+
static guint32
111+
bundled_resources_resource_id_hash (const char *key)
112+
{
113+
// FIXME: Seed
114+
// FIXME: We should cache the hash code so rehashes are cheaper
115+
return MurmurHash3_32_streaming ((const uint8_t *)key, 0);
105116
}
106117

118+
static MonoBundledResource *
119+
bundled_resources_get (const char *id);
120+
107121
//---------------------------------------------------------------------------------------
108122
//
109123
// mono_bundled_resources_add handles bundling of many types of resources to circumvent
@@ -130,7 +144,11 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
130144
g_assert (!domain);
131145

132146
if (!bundled_resources)
133-
bundled_resources = g_hash_table_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func);
147+
// FIXME: Choose a good initial capacity to avoid rehashes during startup. I picked one at random
148+
bundled_resources = dn_simdhash_ght_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func, 2048, NULL);
149+
150+
if (!bundled_resource_key_lookup_table)
151+
bundled_resource_key_lookup_table = dn_simdhash_ptr_ptr_new (2048, NULL);
134152

135153
bool assemblyAdded = false;
136154
bool satelliteAssemblyAdded = false;
@@ -143,7 +161,13 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
143161
if (resource_to_bundle->type == MONO_BUNDLED_SATELLITE_ASSEMBLY)
144162
satelliteAssemblyAdded = true;
145163

146-
g_hash_table_insert (bundled_resources, (gpointer) resource_to_bundle->id, resource_to_bundle);
164+
// Generate the hash key for the id (strip certain extensions) and store it
165+
// so that we can free it later when freeing the bundled data
166+
char *key = key_from_id (resource_to_bundle->id, NULL, 0);
167+
dn_simdhash_ptr_ptr_try_add (bundled_resource_key_lookup_table, (void *)resource_to_bundle->id, key);
168+
169+
g_assert (dn_simdhash_ght_try_add (bundled_resources, (gpointer) key, resource_to_bundle));
170+
// g_assert (bundled_resources_get (resource_to_bundle->id) == resource_to_bundle);
147171
}
148172

149173
if (assemblyAdded)
@@ -172,7 +196,12 @@ bundled_resources_get (const char *id)
172196
if (!bundled_resources)
173197
return NULL;
174198

175-
return g_hash_table_lookup (bundled_resources, id);
199+
char key_buffer[1024];
200+
key_from_id(id, key_buffer, sizeof(key_buffer));
201+
202+
MonoBundledResource *result = NULL;
203+
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
204+
return result;
176205
}
177206

178207
//---------------------------------------------------------------------------------------
@@ -364,9 +393,7 @@ bool
364393
mono_bundled_resources_get_data_resource_values (const char *id, const uint8_t **data_out, uint32_t *size_out)
365394
{
366395
MonoBundledDataResource *bundled_data_resource = bundled_resources_get_data_resource (id);
367-
if (!bundled_data_resource ||
368-
!bundled_data_resource->data.data ||
369-
bundled_data_resource->data.size == 0)
396+
if (!bundled_data_resource || !bundled_data_resource->data.data)
370397
return false;
371398

372399
if (data_out)

0 commit comments

Comments
 (0)