Skip to content

Commit 0c58523

Browse files
committed
Optimize bundled_resources key creation, hashing, and comparison
1 parent b5ee986 commit 0c58523

File tree

3 files changed

+79
-42
lines changed

3 files changed

+79
-42
lines changed

src/mono/mono/metadata/CMakeLists.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ set(imported_native_sources
4646
../../../native/containers/dn-simdhash.c
4747
../../../native/containers/dn-simdhash-string-ptr.c
4848
../../../native/containers/dn-simdhash-u32-ptr.c
49-
../../../native/containers/dn-simdhash-ptrpair-ptr.c)
49+
../../../native/containers/dn-simdhash-ptrpair-ptr.c
50+
../../../native/containers/dn-simdhash-ptr-ptr.c
51+
../../../native/containers/dn-simdhash-ght-compatible.c)
5052

5153
set(metadata_common_sources
5254
appdomain.c

src/mono/mono/metadata/bundled-resources-internals.h

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ typedef enum {
1717

1818
typedef void (*free_bundled_resource_func)(void *, void*);
1919

20+
// WARNING: The layout of these structs cannot change because EmitBundleBase.cs depends on it!
2021
typedef struct _MonoBundledResource {
2122
MonoBundledResourceType type;
2223
const char *id;

src/mono/mono/metadata/bundled-resources.c

+75-41
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,11 @@
88
#include <mono/metadata/appdomain.h>
99
#include <mono/metadata/bundled-resources-internals.h>
1010
#include <mono/metadata/webcil-loader.h>
11+
#include "../native/containers/dn-simdhash-specializations.h"
12+
#include "../native/containers/dn-simdhash-utils.h"
1113

12-
static GHashTable *bundled_resources = NULL;
14+
static dn_simdhash_ght_t *bundled_resources = NULL;
15+
static dn_simdhash_ptr_ptr_t *bundled_resource_key_lookup_table = NULL;
1316
static bool bundled_resources_contains_assemblies = false;
1417
static bool bundled_resources_contains_satellite_assemblies = false;
1518

@@ -31,8 +34,10 @@ mono_bundled_resources_free (void)
3134
{
3235
g_assert (mono_runtime_is_shutting_down ());
3336

34-
g_hash_table_destroy (bundled_resources);
37+
dn_simdhash_free (bundled_resources);
38+
dn_simdhash_free (bundled_resource_key_lookup_table);
3539
bundled_resources = NULL;
40+
bundled_resource_key_lookup_table = NULL;
3641

3742
bundled_resources_contains_assemblies = false;
3843
bundled_resources_contains_satellite_assemblies = false;
@@ -50,6 +55,12 @@ bundled_resources_value_destroy_func (void *resource)
5055
MonoBundledResource *value = (MonoBundledResource *)resource;
5156
if (value->free_func)
5257
value->free_func (resource, value->free_data);
58+
59+
char *key;
60+
if (dn_simdhash_ptr_ptr_try_get_value (bundled_resource_key_lookup_table, (void *)value->id, (void **)&key)) {
61+
dn_simdhash_ptr_ptr_try_remove (bundled_resource_key_lookup_table, (void *)value->id);
62+
g_free (key);
63+
}
5364
}
5465

5566
static bool
@@ -62,48 +73,58 @@ bundled_resources_is_known_assembly_extension (const char *ext)
6273
#endif
6374
}
6475

65-
static gboolean
66-
bundled_resources_resource_id_equal (const char *id_one, const char *id_two)
76+
// strrchr calls strlen, so we need to do a search with known length instead
77+
// for some reason memrchr is defined in a header but the build fails when we try to use it
78+
static const char *
79+
g_memrchr (const char *s, char c, size_t n)
6780
{
68-
const char *extension_one = strrchr (id_one, '.');
69-
const char *extension_two = strrchr (id_two, '.');
70-
if (extension_one && extension_two && bundled_resources_is_known_assembly_extension (extension_one) && bundled_resources_is_known_assembly_extension (extension_two)) {
71-
size_t len_one = extension_one - id_one;
72-
size_t len_two = extension_two - id_two;
73-
return (len_one == len_two) && !strncmp (id_one, id_two, len_one);
74-
}
75-
76-
return !strcmp (id_one, id_two);
81+
while (n--)
82+
if (s[n] == c)
83+
return (void *)(s + n);
84+
return NULL;
7785
}
7886

79-
static guint
80-
bundled_resources_resource_id_hash (const char *id)
87+
// If a bundled resource has a known assembly extension, we strip the extension from its name
88+
// This ensures that lookups for foo.dll will work even if the assembly is in a webcil container
89+
static char *
90+
key_from_id (const char *id, char *buffer, guint buffer_len)
8191
{
82-
const char *current = id;
83-
const char *extension = NULL;
84-
guint previous_hash = 0;
85-
guint hash = 0;
86-
87-
while (*current) {
88-
hash = (hash << 5) - (hash + *current);
89-
if (*current == '.') {
90-
extension = current;
91-
previous_hash = hash;
92-
}
93-
current++;
92+
size_t id_length = strlen (id),
93+
extension_offset = -1;
94+
const char *extension = g_memrchr (id, '.', id_length);
95+
if (extension)
96+
extension_offset = extension - id;
97+
if (!buffer) {
98+
buffer_len = (guint)(id_length + 1);
99+
buffer = g_malloc (buffer_len);
94100
}
101+
buffer[0] = 0;
95102

96-
// alias all extensions to .dll
97-
if (extension && bundled_resources_is_known_assembly_extension (extension)) {
98-
hash = previous_hash;
99-
hash = (hash << 5) - (hash + 'd');
100-
hash = (hash << 5) - (hash + 'l');
101-
hash = (hash << 5) - (hash + 'l');
102-
}
103+
if (extension_offset && bundled_resources_is_known_assembly_extension (extension))
104+
g_strlcpy(buffer, id, MIN(buffer_len, extension_offset + 1));
105+
else
106+
g_strlcpy(buffer, id, MIN(buffer_len, id_length + 1));
107+
108+
return buffer;
109+
}
110+
111+
static gboolean
112+
bundled_resources_resource_id_equal (const char *key_one, const char *key_two)
113+
{
114+
return strcmp (key_one, key_two) == 0;
115+
}
103116

104-
return hash;
117+
static guint32
118+
bundled_resources_resource_id_hash (const char *key)
119+
{
120+
// FIXME: Seed
121+
// FIXME: We should cache the hash code so rehashes are cheaper
122+
return MurmurHash3_32_streaming ((const uint8_t *)key, 0);
105123
}
106124

125+
static MonoBundledResource *
126+
bundled_resources_get (const char *id);
127+
107128
//---------------------------------------------------------------------------------------
108129
//
109130
// mono_bundled_resources_add handles bundling of many types of resources to circumvent
@@ -130,7 +151,11 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
130151
g_assert (!domain);
131152

132153
if (!bundled_resources)
133-
bundled_resources = g_hash_table_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func);
154+
// FIXME: Choose a good initial capacity to avoid rehashes during startup. I picked one at random
155+
bundled_resources = dn_simdhash_ght_new_full ((GHashFunc)bundled_resources_resource_id_hash, (GEqualFunc)bundled_resources_resource_id_equal, NULL, bundled_resources_value_destroy_func, 2048, NULL);
156+
157+
if (!bundled_resource_key_lookup_table)
158+
bundled_resource_key_lookup_table = dn_simdhash_ptr_ptr_new (2048, NULL);
134159

135160
bool assemblyAdded = false;
136161
bool satelliteAssemblyAdded = false;
@@ -143,7 +168,13 @@ mono_bundled_resources_add (MonoBundledResource **resources_to_bundle, uint32_t
143168
if (resource_to_bundle->type == MONO_BUNDLED_SATELLITE_ASSEMBLY)
144169
satelliteAssemblyAdded = true;
145170

146-
g_hash_table_insert (bundled_resources, (gpointer) resource_to_bundle->id, resource_to_bundle);
171+
// Generate the hash key for the id (strip certain extensions) and store it
172+
// so that we can free it later when freeing the bundled data
173+
char *key = key_from_id (resource_to_bundle->id, NULL, 0);
174+
dn_simdhash_ptr_ptr_try_add (bundled_resource_key_lookup_table, (void *)resource_to_bundle->id, key);
175+
176+
g_assert (dn_simdhash_ght_try_add (bundled_resources, (gpointer) key, resource_to_bundle));
177+
// g_assert (bundled_resources_get (resource_to_bundle->id) == resource_to_bundle);
147178
}
148179

149180
if (assemblyAdded)
@@ -172,7 +203,12 @@ bundled_resources_get (const char *id)
172203
if (!bundled_resources)
173204
return NULL;
174205

175-
return g_hash_table_lookup (bundled_resources, id);
206+
char key_buffer[1024];
207+
key_from_id(id, key_buffer, 1024);
208+
209+
MonoBundledResource *result = NULL;
210+
dn_simdhash_ght_try_get_value (bundled_resources, key_buffer, (void **)&result);
211+
return result;
176212
}
177213

178214
//---------------------------------------------------------------------------------------
@@ -364,9 +400,7 @@ bool
364400
mono_bundled_resources_get_data_resource_values (const char *id, const uint8_t **data_out, uint32_t *size_out)
365401
{
366402
MonoBundledDataResource *bundled_data_resource = bundled_resources_get_data_resource (id);
367-
if (!bundled_data_resource ||
368-
!bundled_data_resource->data.data ||
369-
bundled_data_resource->data.size == 0)
403+
if (!bundled_data_resource || !bundled_data_resource->data.data)
370404
return false;
371405

372406
if (data_out)

0 commit comments

Comments
 (0)