Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.x] Shader goodies: async. compilation + caching (ubershader approach) #53411

Merged
merged 5 commits into from
Nov 9, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions core/hash_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ class HashMap {
TKey key;
TData data;

Pair() {}
Pair(const TKey &p_key) :
key(p_key),
data() {}
Pair(const TKey &p_key, const TData &p_data) :
key(p_key),
data(p_data) {
Expand Down Expand Up @@ -90,6 +92,12 @@ class HashMap {
const TData &value() const {
return pair.value();
}

Element(const TKey &p_key) :
pair(p_key) {}
Element(const Element &p_other) :
hash(p_other.hash),
pair(p_other.pair.key, p_other.pair.data) {}
};

private:
Expand Down Expand Up @@ -192,14 +200,12 @@ class HashMap {

Element *create_element(const TKey &p_key) {
/* if element doesn't exist, create it */
Element *e = memnew(Element);
Element *e = memnew(Element(p_key));
ERR_FAIL_COND_V_MSG(!e, nullptr, "Out of memory.");
uint32_t hash = Hasher::hash(p_key);
uint32_t index = hash & ((1 << hash_table_power) - 1);
e->next = hash_table[index];
e->hash = hash;
e->pair.key = p_key;
e->pair.data = TData();

hash_table[index] = e;
elements++;
Expand Down Expand Up @@ -228,9 +234,7 @@ class HashMap {
const Element *e = p_t.hash_table[i];

while (e) {
Element *le = memnew(Element); /* local element */

*le = *e; /* copy data */
Element *le = memnew(Element(*e)); /* local element */

/* add to list and reassign pointers */
le->next = hash_table[i];
Expand Down
6 changes: 6 additions & 0 deletions core/os/os.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,12 @@ const char *OS::get_video_driver_name(int p_driver) const {
}
}

bool OS::is_offscreen_gl_available() const {
return false;
}

void OS::set_offscreen_gl_current(bool p_current) {}

int OS::get_audio_driver_count() const {
return AudioDriverManager::get_driver_count();
}
Expand Down
3 changes: 3 additions & 0 deletions core/os/os.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ class OS {
virtual const char *get_video_driver_name(int p_driver) const;
virtual int get_current_video_driver() const = 0;

virtual bool is_offscreen_gl_available() const;
virtual void set_offscreen_gl_current(bool p_current);

virtual int get_audio_driver_count() const;
virtual const char *get_audio_driver_name(int p_driver) const;

Expand Down
5 changes: 5 additions & 0 deletions core/project_settings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ void ProjectSettings::set_restart_if_changed(const String &p_name, bool p_restar
props[p_name].restart_if_changed = p_restart;
}

void ProjectSettings::set_hide_from_editor(const String &p_name, bool p_hide_from_editor) {
ERR_FAIL_COND_MSG(!props.has(p_name), "Request for nonexistent project setting: " + p_name + ".");
props[p_name].hide_from_editor = p_hide_from_editor;
}

void ProjectSettings::set_ignore_value_in_docs(const String &p_name, bool p_ignore) {
ERR_FAIL_COND_MSG(!props.has(p_name), "Request for nonexistent project setting: " + p_name + ".");
#ifdef DEBUG_METHODS_ENABLED
Expand Down
2 changes: 1 addition & 1 deletion core/project_settings.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ class ProjectSettings : public Object {

void set_initial_value(const String &p_name, const Variant &p_value);
void set_restart_if_changed(const String &p_name, bool p_restart);
void set_hide_from_editor(const String &p_name, bool p_hide_from_editor);
void set_ignore_value_in_docs(const String &p_name, bool p_ignore);
bool get_ignore_value_in_docs(const String &p_name) const;

bool property_can_revert(const String &p_name);
Variant property_get_revert(const String &p_name);

Expand Down
133 changes: 133 additions & 0 deletions core/threaded_callable_queue.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/*************************************************************************/
/* threaded_callable_queue.h */
/*************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/*************************************************************************/
/* Copyright (c) 2007-2021 Juan Linietsky, Ariel Manzur. */
/* Copyright (c) 2014-2021 Godot Engine contributors (cf. AUTHORS.md). */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/*************************************************************************/

#ifndef THREADED_CALLABLE_QUEUE_H
#define THREADED_CALLABLE_QUEUE_H

#include "core/local_vector.h"
#include "core/ordered_hash_map.h"
#include "core/os/mutex.h"
#include "core/os/semaphore.h"
#include "core/os/thread.h"

#include <functional>

template <class K>
class ThreadedCallableQueue {
public:
using Job = std::function<void()>;

private:
bool exit;
Thread thread;
BinaryMutex mutex;
Semaphore sem;
OrderedHashMap<K, Job> queue;

static void _thread_func(void *p_user_data);

public:
void enqueue(K p_key, Job p_job);
void cancel(K p_key);

ThreadedCallableQueue();
~ThreadedCallableQueue();
};

template <class K>
void ThreadedCallableQueue<K>::_thread_func(void *p_user_data) {
ThreadedCallableQueue *self = static_cast<ThreadedCallableQueue *>(p_user_data);

while (true) {
self->sem.wait();
self->mutex.lock();
if (self->exit) {
self->mutex.unlock();
break;
}

typename OrderedHashMap<K, Job>::Element E = self->queue.front();
// Defense about implementation bugs (excessive posts)
if (!E) {
ERR_PRINT("Semaphore unlocked, the queue is empty. Bug?");
self->mutex.unlock();
// --- Defense end
} else {
LocalVector<Job> jobs;
jobs.push_back(E.value());
self->queue.erase(E);
self->mutex.unlock();

for (uint32_t i = 0; i < jobs.size(); i++) {
jobs[i]();
}
}
}

self->mutex.lock();
for (typename OrderedHashMap<K, Job>::Element E = self->queue.front(); E; E = E.next()) {
Job job = E.value();
job();
}
self->mutex.unlock();
}

template <class K>
void ThreadedCallableQueue<K>::enqueue(K p_key, Job p_job) {
MutexLock lock(mutex);
ERR_FAIL_COND(exit);
ERR_FAIL_COND(queue.has(p_key));
queue.insert(p_key, p_job);
sem.post();
}

template <class K>
void ThreadedCallableQueue<K>::cancel(K p_key) {
MutexLock lock(mutex);
ERR_FAIL_COND(exit);
if (queue.erase(p_key)) {
sem.wait();
}
}

template <class K>
ThreadedCallableQueue<K>::ThreadedCallableQueue() :
exit(false) {
thread.start(&_thread_func, this);
}

template <class K>
ThreadedCallableQueue<K>::~ThreadedCallableQueue() {
exit = true;
sem.post();
thread.wait_to_finish();
}

#endif // THREADED_CALLABLE_QUEUE_H
33 changes: 33 additions & 0 deletions doc/classes/ProjectSettings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -1222,6 +1222,39 @@
If [code]true[/code] and available on the target Android device, enables high floating point precision for all shader computations in GLES2.
[b]Warning:[/b] High floating point precision can be extremely slow on older devices and is often not available at all. Use with caution.
</member>
<member name="rendering/gles3/shaders/log_active_async_compiles_count" type="bool" setter="" getter="" default="false">
If [code]true[/code], every time an asynchronous shader compilation or an asynchronous shader reconstruction from cache starts or finishes, a line will be logged telling how many of those are happening.
If the platform doesn't support parallel shader compile, but only the compile queue via a secondary GL context, what the message will tell is the number of shader compiles currently queued.
[b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code].
</member>
<member name="rendering/gles3/shaders/max_simultaneous_compiles" type="int" setter="" getter="" default="2">
This is the maximum number of shaders that can be compiled (or reconstructed from cache) at the same time.
At runtime, while that count is reached, other shaders that can be asynchronously compiled will just use their fallback, without their setup being started until the count gets lower.
This is a way to balance the CPU work between running the game and compiling the shaders. The goal is to have as many asynchronous compiles in flight as possible without impacting the responsiveness of the game, which beyond some point would destroy the benefits of asynchronous compilation. In other words, you may be able to afford that the FPS lowers a bit, and that will already be better than the stalling that synchronous compilation could cause.
The default value is a conservative one, so you are advised to tweak it according to the hardware you are targeting.
[b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code].
</member>
<member name="rendering/gles3/shaders/max_simultaneous_compiles.mobile" type="int" setter="" getter="" default="1">
The default is a very conservative override for [code]rendering/gles3/shaders/max_concurrent_compiles[/code].
Depending on the specific devices you are targeting, you may want to raise it.
[b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is [b]not[/b] [code]Synchronous[/code].
</member>
<member name="rendering/gles3/shaders/shader_cache_size_mb" type="int" setter="" getter="" default="512">
The maximum size, in megabytes, that the ubershader cache can grow up to. On startup, the least recently used entries will be deleted until the total size is within bounds.
[b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is set to [code]Asynchronous + Cache[/code].
</member>
<member name="rendering/gles3/shaders/shader_cache_size_mb.mobile" type="int" setter="" getter="" default="128">
An override for [code]rendering/gles3/shaders/ubershader_cache_size_mb[/code], so a smaller maximum size can be configured for mobile platforms, where storage space is more limited.
[b]Note:[/b] This setting is only meaningful if [code]rendering/gles3/shaders/shader_compilation_mode[/code] is set to [code]Asynchronous + Cache[/code].
</member>
<member name="rendering/gles3/shaders/shader_compilation_mode" type="int" setter="" getter="" default="0">
If set to [code]Asynchronous[/code] and available on the target device, asynchronous compilation of shaders is enabled (in contrast to [code]Asynchronous[/code]).
That means that when a shader is first used under some new rendering situation, the game won't stall while such shader is being compiled. Instead, a fallback will be used and the real shader will be compiled in the background. Once the actual shader is compiled, it will be used the next times it's used to draw a frame.
Depending on the async mode configured for a given material/shader, the fallback will be an "ubershader" (the default) or just skip rendering any item it is applied to.
An ubershader is a very complex shader, slow but suited to any rendering situation, that the engine generates internally so it can be used from the beginning while the traditional conditioned, optimized version of it is being compiled.
In order to save some loading time, you can use [code]Asynchronous + Cache[/code], which also causes the ubershaders to be cached into storage so they can be ready faster next time they are used (provided the platform provides support for it).
[b]Warning:[/b] Async. compilation is currently only supported for spatial and particle materials/shaders.
</member>
<member name="rendering/limits/buffers/blend_shape_max_buffer_size_kb" type="int" setter="" getter="" default="4096">
Max buffer size for blend shapes. Any blend shape bigger than this will not work.
</member>
Expand Down
11 changes: 11 additions & 0 deletions doc/classes/SpatialMaterial.xml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,10 @@
<member name="ao_texture_channel" type="int" setter="set_ao_texture_channel" getter="get_ao_texture_channel" enum="SpatialMaterial.TextureChannel">
Specifies the channel of the [member ao_texture] in which the ambient occlusion information is stored. This is useful when you store the information for multiple effects in a single texture. For example if you stored metallic in the red channel, roughness in the blue, and ambient occlusion in the green you could reduce the number of textures you use.
</member>
<member name="async_mode" type="int" setter="set_async_mode" getter="get_async_mode" enum="SpatialMaterial.AsyncMode" default="0">
If [member ProjectSettings.rendering/gles3/shaders/shader_compilation_mode] is [code]Synchronous[/code] (with or without cache), this determines how this material must behave in regards to asynchronous shader compilation.
[constant ASYNC_MODE_VISIBLE] is the default and the best for most cases.
</member>
<member name="clearcoat" type="float" setter="set_clearcoat" getter="get_clearcoat">
Sets the strength of the clearcoat effect. Setting to [code]0[/code] looks the same as disabling the clearcoat effect.
</member>
Expand Down Expand Up @@ -639,5 +643,12 @@
<constant name="DISTANCE_FADE_OBJECT_DITHER" value="3" enum="DistanceFadeMode">
Smoothly fades the object out based on the object's distance from the camera using a dither approach. Dithering discards pixels based on a set pattern to smoothly fade without enabling transparency. On certain hardware this can be faster than [constant DISTANCE_FADE_PIXEL_ALPHA].
</constant>
<constant name="ASYNC_MODE_VISIBLE" value="0" enum="AsyncMode">
The real conditioned shader needed on each situation will be sent for background compilation. In the meantime, a very complex shader that adapts to every situation will be used ("ubershader"). This ubershader is much slower to render, but will keep the game running without stalling to compile. Once shader compilation is done, the ubershader is replaced by the traditional optimized shader.
</constant>
<constant name="ASYNC_MODE_HIDDEN" value="1" enum="AsyncMode">
Anything with this material applied won't be rendered while this material's shader is being compiled.
This is useful for optimization, in cases where the visuals won't suffer from having certain non-essential elements missing during the short time their shaders are being compiled.
</constant>
</constants>
</class>
9 changes: 9 additions & 0 deletions doc/classes/VisualServer.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2525,6 +2525,15 @@
Sets the default clear color which is used when a specific clear color has not been selected.
</description>
</method>
<method name="set_shader_async_hidden_forbidden">
<return type="void" />
<argument index="0" name="forbidden" type="bool" />
<description>
If asynchronous shader compilation is enabled, this controls whether [constant SpatialMaterial.ASYNC_MODE_HIDDEN] is obeyed.
For instance, you may want to enable this temporarily before taking a screenshot. This ensures everything is visible even if shaders with async mode [i]hidden[/i] are not ready yet.
Reflection probes use this internally to ensure they capture everything regardless the shaders are ready or not.
</description>
</method>
<method name="set_shader_time_scale">
<return type="void" />
<argument index="0" name="scale" type="float" />
Expand Down
3 changes: 3 additions & 0 deletions drivers/dummy/rasterizer_dummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,9 @@ class RasterizerStorageDummy : public RasterizerStorage {
void shader_get_custom_defines(RID p_shader, Vector<String> *p_defines) const {}
void shader_remove_custom_define(RID p_shader, const String &p_define) {}

void set_shader_async_hidden_forbidden(bool p_forbidden) {}
bool is_shader_async_hidden_forbidden() { return false; }

/* COMMON MATERIAL API */

RID material_create() { return RID(); }
Expand Down
3 changes: 3 additions & 0 deletions drivers/gles2/rasterizer_storage_gles2.h
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,9 @@ class RasterizerStorageGLES2 : public RasterizerStorage {
virtual void shader_get_custom_defines(RID p_shader, Vector<String> *p_defines) const;
virtual void shader_remove_custom_define(RID p_shader, const String &p_define);

void set_shader_async_hidden_forbidden(bool p_forbidden) {}
bool is_shader_async_hidden_forbidden() { return false; }

void _update_shader(Shader *p_shader) const;
void update_dirty_shaders();

Expand Down
Loading