diff --git a/ci/licenses_golden/licenses_flutter b/ci/licenses_golden/licenses_flutter index a59702e59de55..76984cb37eef9 100644 --- a/ci/licenses_golden/licenses_flutter +++ b/ci/licenses_golden/licenses_flutter @@ -1091,6 +1091,7 @@ ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl + ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl + ../../../flutter/LICENSE +ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl + ../../../flutter/LICENSE @@ -1560,6 +1561,7 @@ ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.cc + ../../../flu ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.h + ../../../flutter/LICENSE +ORIGIN: ../../../flutter/impeller/renderer/prefix_sum_test.comp + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/render_pass.cc + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/render_pass.h + ../../../flutter/LICENSE ORIGIN: ../../../flutter/impeller/renderer/render_target.cc + ../../../flutter/LICENSE @@ -3713,6 +3715,7 @@ FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl +FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl @@ -4182,6 +4185,7 @@ FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.cc FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.h FILE: ../../../flutter/impeller/renderer/pipeline_library.cc FILE: ../../../flutter/impeller/renderer/pipeline_library.h +FILE: ../../../flutter/impeller/renderer/prefix_sum_test.comp FILE: ../../../flutter/impeller/renderer/render_pass.cc FILE: ../../../flutter/impeller/renderer/render_pass.h FILE: ../../../flutter/impeller/renderer/render_target.cc diff --git a/impeller/compiler/shader_lib/impeller/prefix_sum.glsl b/impeller/compiler/shader_lib/impeller/prefix_sum.glsl new file mode 100644 index 0000000000000..a232d4da5dd85 --- /dev/null +++ b/impeller/compiler/shader_lib/impeller/prefix_sum.glsl @@ -0,0 +1,38 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Parallel exclusive prefix sum computes the prefix in place in storage. +// BLOCK_SIZE is the overall storage size while ident must be the global +// x identifier. +#define ExclusivePrefixSum(ident, storage, BLOCK_SIZE) \ + do { \ + uint offset = 1; \ + for (uint n = BLOCK_SIZE / 2; n > 0; n /= 2) { \ + if (ident < n) { \ + uint ai = offset * (2 * ident + 1) - 1; \ + uint bi = offset * (2 * ident + 2) - 1; \ + storage[bi] += storage[ai]; \ + } \ + offset *= 2; \ + barrier(); \ + } \ + \ + if (ident == 0) { \ + storage[BLOCK_SIZE - 1] = 0; \ + } \ + barrier(); \ + \ + for (uint n = 1; n < BLOCK_SIZE; n *= 2) { \ + offset /= 2; \ + barrier(); \ + if (ident < n) { \ + uint ai = offset * (2 * ident + 1) - 1; \ + uint bi = offset * (2 * ident + 2) - 1; \ + uint temp = storage[ai]; \ + storage[ai] = storage[bi]; \ + storage[bi] += temp; \ + } \ + } \ + barrier(); \ + } while (false) diff --git a/impeller/renderer/BUILD.gn b/impeller/renderer/BUILD.gn index 7175e8d60b05a..67bf32d03da3f 100644 --- a/impeller/renderer/BUILD.gn +++ b/impeller/renderer/BUILD.gn @@ -22,6 +22,7 @@ if (impeller_enable_compute) { shaders = [ "stroke.comp", "path_polyline.comp", + "prefix_sum_test.comp", ] } diff --git a/impeller/renderer/compute_unittests.cc b/impeller/renderer/compute_unittests.cc index 8790396f5aefd..c96171f8d204e 100644 --- a/impeller/renderer/compute_unittests.cc +++ b/impeller/renderer/compute_unittests.cc @@ -18,6 +18,7 @@ #include "impeller/renderer/compute_command.h" #include "impeller/renderer/compute_pipeline_builder.h" #include "impeller/renderer/pipeline_library.h" +#include "impeller/renderer/prefix_sum_test.comp.h" namespace impeller { namespace testing { @@ -103,6 +104,117 @@ TEST_P(ComputeTest, CanCreateComputePass) { latch.Wait(); } +TEST_P(ComputeTest, CanComputePrefixSum) { + using CS = PrefixSumTestComputeShader; + auto context = GetContext(); + ASSERT_TRUE(context); + ASSERT_TRUE(context->GetCapabilities()->SupportsCompute()); + + using SamplePipelineBuilder = ComputePipelineBuilder; + auto pipeline_desc = + SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context); + ASSERT_TRUE(pipeline_desc.has_value()); + auto compute_pipeline = + context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get(); + ASSERT_TRUE(compute_pipeline); + + auto cmd_buffer = context->CreateCommandBuffer(); + auto pass = cmd_buffer->CreateComputePass(); + ASSERT_TRUE(pass && pass->IsValid()); + + static constexpr size_t kCount = 5; + + pass->SetGridSize(ISize(kCount, 1)); + pass->SetThreadGroupSize(ISize(kCount, 1)); + + ComputeCommand cmd; + cmd.label = "Compute"; + cmd.pipeline = compute_pipeline; + + CS::InputData input_data; + input_data.count = kCount; + for (size_t i = 0; i < kCount; i++) { + input_data.data[i] = 1 + i; + } + + auto output_buffer = CreateHostVisibleDeviceBuffer>( + context, "Output Buffer"); + + CS::BindInputData( + cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data)); + CS::BindOutputData(cmd, output_buffer->AsBufferView()); + + ASSERT_TRUE(pass->AddCommand(std::move(cmd))); + ASSERT_TRUE(pass->EncodeCommands()); + + fml::AutoResetWaitableEvent latch; + ASSERT_TRUE(cmd_buffer->SubmitCommands( + [&latch, output_buffer](CommandBuffer::Status status) { + EXPECT_EQ(status, CommandBuffer::Status::kCompleted); + + auto view = output_buffer->AsBufferView(); + EXPECT_EQ(view.range.length, sizeof(CS::OutputData)); + + CS::OutputData* output = + reinterpret_cast*>(view.contents); + EXPECT_TRUE(output); + + constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15}; + for (size_t i = 0; i < kCount; i++) { + auto computed_sum = output->data[i]; + EXPECT_EQ(computed_sum, expected[i]); + } + latch.Signal(); + })); + + latch.Wait(); +} + +TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) { + using CS = PrefixSumTestComputeShader; + + auto context = GetContext(); + ASSERT_TRUE(context); + ASSERT_TRUE(context->GetCapabilities()->SupportsCompute()); + + auto callback = [&](RenderPass& render_pass) -> bool { + using SamplePipelineBuilder = ComputePipelineBuilder; + auto pipeline_desc = + SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context); + auto compute_pipeline = + context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get(); + + auto cmd_buffer = context->CreateCommandBuffer(); + auto pass = cmd_buffer->CreateComputePass(); + + static constexpr size_t kCount = 1023; + + pass->SetGridSize(ISize(kCount, 1)); + + ComputeCommand cmd; + cmd.label = "Compute"; + cmd.pipeline = compute_pipeline; + + CS::InputData input_data; + input_data.count = kCount; + for (size_t i = 0; i < kCount; i++) { + input_data.data[i] = 1 + i; + } + + auto output_buffer = CreateHostVisibleDeviceBuffer>( + context, "Output Buffer"); + + CS::BindInputData( + cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data)); + CS::BindOutputData(cmd, output_buffer->AsBufferView()); + + pass->AddCommand(std::move(cmd)); + pass->EncodeCommands(); + return cmd_buffer->SubmitCommands(); + }; + ASSERT_TRUE(OpenPlaygroundHere(callback)); +} + TEST_P(ComputeTest, MultiStageInputAndOutput) { using CS1 = Stage1ComputeShader; using Stage1PipelineBuilder = ComputePipelineBuilder; diff --git a/impeller/renderer/prefix_sum_test.comp b/impeller/renderer/prefix_sum_test.comp new file mode 100644 index 0000000000000..e964d551cf6fe --- /dev/null +++ b/impeller/renderer/prefix_sum_test.comp @@ -0,0 +1,43 @@ +// Copyright 2013 The Flutter Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +layout(local_size_x = 512, local_size_y = 1) in; +layout(std430) buffer; + +#include + +#define BLOCK_SIZE 1024 + +layout(binding = 0) readonly buffer InputData { + uint count; + uint data[]; +} +input_data; + +layout(binding = 1) writeonly buffer OutputData { + uint data[]; +} +output_data; + +// Needs to be number of threads per threadgroup. +shared uint memory[BLOCK_SIZE]; + +void main() { + uint ident = gl_GlobalInvocationID.x; + + uint value = 0; + if (ident < input_data.count) { + value = input_data.data[ident]; + } + + memory[ident] = value; + barrier(); + + ExclusivePrefixSum(ident, memory, BLOCK_SIZE); + + if (ident < input_data.count) { + // Convert exclusive to inclusive sum. + output_data.data[ident] = memory[ident] + value; + } +} diff --git a/impeller/tools/malioc.json b/impeller/tools/malioc.json index 0315be91636a6..c3fda3a9d4c8c 100644 --- a/impeller/tools/malioc.json +++ b/impeller/tools/malioc.json @@ -14027,6 +14027,68 @@ } } }, + "flutter/impeller/renderer/prefix_sum_test.comp.vkspv": { + "Mali-G78": { + "core": "Mali-G78", + "filename": "flutter/impeller/renderer/prefix_sum_test.comp.vkspv", + "has_uniform_computation": true, + "type": "Compute", + "variants": { + "Main": { + "fp16_arithmetic": null, + "has_stack_spilling": false, + "performance": { + "longest_path_bound_pipelines": [ + "load_store" + ], + "longest_path_cycles": [ + 2.549999952316284, + 0.0, + 2.549999952316284, + 1.0, + 72.0, + 0.0 + ], + "pipelines": [ + "arith_total", + "arith_fma", + "arith_cvt", + "arith_sfu", + "load_store", + "texture" + ], + "shortest_path_bound_pipelines": [ + "load_store" + ], + "shortest_path_cycles": [ + 0.949999988079071, + 0.0, + 0.949999988079071, + 0.0, + 1.0, + 0.0 + ], + "total_bound_pipelines": [ + "load_store" + ], + "total_cycles": [ + 2.549999952316284, + 0.0, + 2.549999952316284, + 1.0, + 72.0, + 0.0 + ] + }, + "shared_storage_used": 4096, + "stack_spill_bytes": 0, + "thread_occupancy": 100, + "uniform_registers_used": 8, + "work_registers_used": 21 + } + } + } + }, "flutter/impeller/renderer/stroke.comp.vkspv": { "Mali-G78": { "core": "Mali-G78",