Merge branch 'master' into timestamps

KomputeProject · Mar 6, 2021 · eb47d52 · eb47d52
2 parents 6f5a8f8 + 515c3b6
commit eb47d52
Show file tree

Hide file tree

Showing 26 changed files with 1,188 additions and 815 deletions.
diff --git a/Makefile b/Makefile
@@ -163,6 +163,9 @@ generate_python_docstrings:
 	python -m pybind11_mkdoc \
 		-o python/src/docstrings.hpp \
 		single_include/kompute/Kompute.hpp \
+		-Iexternal/fmt/include/ \
+		-Iexternal/spdlog/include/ \
+		-Iexternal/glslang/ \
 		-I/usr/include/c++/7.5.0/
 
 install_python_reqs:

diff --git a/README.md b/README.md
@@ -51,7 +51,7 @@ The C++ interface provides low level access to the native components of Kompute
 
 void kompute(const std::string& shader) {
 
-    // 1. Create Kompute Manager with default settings (device 0 and first compute compatible queue)
+    // 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
     kp::Manager mgr; 
 
     // 2. Create and initialise Kompute Tensors through manager
@@ -71,14 +71,16 @@ void kompute(const std::string& shader) {
     auto algorithm = mgr.algorithm(params,
                                    kp::Shader::compile_source(shader),
                                    workgroup,
-                                   specConsts);
+                                   specConsts,
+                                   pushConstsA);
 
     // 4. Run operation synchronously using sequence
     mgr.sequence()
         ->record<kp::OpTensorSyncDevice>(params)
-        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsA)
-        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsB)
-        ->eval();
+        ->record<kp::OpAlgoDispatch>(algorithm) // Binds default push consts
+        ->eval() // Evaluates the two recorded operations
+        ->record<kp::OpAlgoDispatch>(algorithm, pushConstsB) // Overrides push consts
+        ->eval(); // Evaluates only last recorded operation
 
     // 5. Sync results from the GPU asynchronously
     sq = mgr.sequence()
@@ -138,7 +140,7 @@ The [Python package](https://kompute.cc/overview/python-package.html) provides a
 ```python
 
 def kompute(shader):
-    # 1. Create Kompute Manager with default settings (device 0 and first compute compatible queue)
+    # 1. Create Kompute Manager with default settings (device 0, first queue and no extensions)
     mgr = kp.Manager()
 
     # 2. Create and initialise Kompute Tensors through manager
@@ -155,14 +157,17 @@ def kompute(shader):
     push_consts_a = [2]
     push_consts_b = [3]
 
-    algo = mgr.algorithm(params, kp.Shader.compile_source(shader), workgroup, spec_consts)
+    spirv = kp.Shader.compile_source(shader)
+
+    algo = mgr.algorithm(params, spirv, workgroup, spec_consts, push_consts_a)
 
     # 4. Run operation synchronously using sequence
     (mgr.sequence()
         .record(kp.OpTensorSyncDevice(params))
-        .record(kp.OpAlgoDispatch(algo, push_consts_a))
-        .record(kp.OpAlgoDispatch(algo, push_consts_b))
-        .eval())
+        .record(kp.OpAlgoDispatch(algo)) # Binds default push consts provided
+        .eval() # evaluates the two recorded ops
+        .record(kp.OpAlgoDispatch(algo, push_consts_b)) # Overrides push consts
+        .eval()) # evaluates only the last recorded op
 
     # 5. Sync results from the GPU asynchronously
     sq = mgr.sequence()
@@ -429,6 +434,12 @@ We appreciate PRs and Issues. If you want to contribute try checking the "Good f
 * Uses doxygen and sphinx for documentation and autodocs
 * Uses vcpkg for finding the dependencies, it's the recommended set up to retrieve the libraries
 
+If you want to run with debug layers you can add them with the `KOMPUTE_ENV_DEBUG_LAYERS` parameter as:
+
+```
+export KOMPUTE_ENV_DEBUG_LAYERS="VK_LAYER_LUNARG_api_dump"
+```
+
 ##### Updating documentation
 
 To update the documentation you will need to:

diff --git a/docs/overview/advanced-examples.rst b/docs/overview/advanced-examples.rst
@@ -23,6 +23,63 @@ End-to-end examples
 * `Android NDK Mobile Kompute ML Application <https://towardsdatascience.com/gpu-accelerated-machine-learning-in-your-mobile-applications-using-the-android-ndk-vulkan-kompute-1e9da37b7617>`_
 * `Game Development Kompute ML in Godot Engine <https://towardsdatascience.com/supercharging-game-development-with-gpu-accelerated-ml-using-vulkan-kompute-the-godot-game-engine-4e75a84ea9f0>`_
 
+Add Vulkan Extensions
+^^^^^^^^^^^^^^^^^^^^
+
+Kompute provides a simple way to add Vulkan extensions through kp::Manager initialisation. When debug is enabled you will be able to see logs that show what are the desired extensions requested and the ones that are added based on the available extensions on the current driver.
+
+The example below shows how you can enable the "VK_EXT_shader_atomic_float" extension so we can use the adomicAdd for floats in the shaders.
+
+.. code-block:: cpp
+   :linenos:
+
+   int main() {
+       std::string shader(R"(
+             #version 450
+
+             #extension GL_EXT_shader_atomic_float: enable
+
+             layout(push_constant) uniform PushConstants {
+               float x;
+               float y;
+               float z;
+             } pcs;
+
+             layout (local_size_x = 1) in;
+
+             layout(set = 0, binding = 0) buffer a { float pa[]; };
+
+             void main() {
+                 atomicAdd(pa[0], pcs.x);
+                 atomicAdd(pa[1], pcs.y);
+                 atomicAdd(pa[2], pcs.z);
+             })");
+
+       std::vector<uint32_t> spirv = kp::Shader::compile_source(shader);
+
+       std::shared_ptr<kp::Sequence> sq = nullptr;
+
+       {
+           kp::Manager mgr(0, {}, { "VK_EXT_shader_atomic_float" });
+
+           std::shared_ptr<kp::Tensor> tensor = mgr.tensor({ 0, 0, 0 });
+
+           std::shared_ptr<kp::Algorithm> algo =
+             mgr.algorithm({ tensor }, spirv, kp::Workgroup({ 1 }), {}, { 0.0, 0.0, 0.0 });
+
+           sq = mgr.sequence()
+                  ->record<kp::OpTensorSyncDevice>({ tensor })
+                  ->record<kp::OpAlgoDispatch>(algo,
+                                               kp::Constants{ 0.1, 0.2, 0.3 })
+                  ->record<kp::OpAlgoDispatch>(algo,
+                                               kp::Constants{ 0.3, 0.2, 0.1 })
+                  ->record<kp::OpTensorSyncLocal>({ tensor })
+                  ->eval();
+
+           EXPECT_EQ(tensor->data(), kp::Constants({ 0.4, 0.4, 0.4 }));
+       }
+   }
+
 
 Your Custom Kompute Operation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^