Skip to content
This repository was archived by the owner on Feb 25, 2025. It is now read-only.

Commit 82c30a0

Browse files
author
Jonah Williams
authored
[Impeller] Create reusable prefix sum. (#42167)
Creates a reusable function macro for performing a prefix sum. Eventually, we'll need this for polyline decomposition ![image](https://github.com/flutter/engine/assets/8975114/3d9af2b2-f1ea-413a-ac1f-d4a69211388e) Reasonably fast with max (1024) input elements.
1 parent 1ed9fc0 commit 82c30a0

File tree

6 files changed

+260
-0
lines changed

6 files changed

+260
-0
lines changed

ci/licenses_golden/licenses_flutter

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,7 @@ ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl +
10911091
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl + ../../../flutter/LICENSE
10921092
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl + ../../../flutter/LICENSE
10931093
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl + ../../../flutter/LICENSE
1094+
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl + ../../../flutter/LICENSE
10941095
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl + ../../../flutter/LICENSE
10951096
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl + ../../../flutter/LICENSE
10961097
ORIGIN: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl + ../../../flutter/LICENSE
@@ -1560,6 +1561,7 @@ ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.cc + ../../../flu
15601561
ORIGIN: ../../../flutter/impeller/renderer/pipeline_descriptor.h + ../../../flutter/LICENSE
15611562
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.cc + ../../../flutter/LICENSE
15621563
ORIGIN: ../../../flutter/impeller/renderer/pipeline_library.h + ../../../flutter/LICENSE
1564+
ORIGIN: ../../../flutter/impeller/renderer/prefix_sum_test.comp + ../../../flutter/LICENSE
15631565
ORIGIN: ../../../flutter/impeller/renderer/render_pass.cc + ../../../flutter/LICENSE
15641566
ORIGIN: ../../../flutter/impeller/renderer/render_pass.h + ../../../flutter/LICENSE
15651567
ORIGIN: ../../../flutter/impeller/renderer/render_target.cc + ../../../flutter/LICENSE
@@ -3716,6 +3718,7 @@ FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/constants.glsl
37163718
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gaussian.glsl
37173719
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/gradient.glsl
37183720
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/path.glsl
3721+
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/prefix_sum.glsl
37193722
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/texture.glsl
37203723
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/transform.glsl
37213724
FILE: ../../../flutter/impeller/compiler/shader_lib/impeller/types.glsl
@@ -4185,6 +4188,7 @@ FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.cc
41854188
FILE: ../../../flutter/impeller/renderer/pipeline_descriptor.h
41864189
FILE: ../../../flutter/impeller/renderer/pipeline_library.cc
41874190
FILE: ../../../flutter/impeller/renderer/pipeline_library.h
4191+
FILE: ../../../flutter/impeller/renderer/prefix_sum_test.comp
41884192
FILE: ../../../flutter/impeller/renderer/render_pass.cc
41894193
FILE: ../../../flutter/impeller/renderer/render_pass.h
41904194
FILE: ../../../flutter/impeller/renderer/render_target.cc
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// Copyright 2013 The Flutter Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
// Parallel exclusive prefix sum computes the prefix in place in storage.
6+
// BLOCK_SIZE is the overall storage size while ident must be the global
7+
// x identifier.
8+
#define ExclusivePrefixSum(ident, storage, BLOCK_SIZE) \
9+
do { \
10+
uint offset = 1; \
11+
for (uint n = BLOCK_SIZE / 2; n > 0; n /= 2) { \
12+
if (ident < n) { \
13+
uint ai = offset * (2 * ident + 1) - 1; \
14+
uint bi = offset * (2 * ident + 2) - 1; \
15+
storage[bi] += storage[ai]; \
16+
} \
17+
offset *= 2; \
18+
barrier(); \
19+
} \
20+
\
21+
if (ident == 0) { \
22+
storage[BLOCK_SIZE - 1] = 0; \
23+
} \
24+
barrier(); \
25+
\
26+
for (uint n = 1; n < BLOCK_SIZE; n *= 2) { \
27+
offset /= 2; \
28+
barrier(); \
29+
if (ident < n) { \
30+
uint ai = offset * (2 * ident + 1) - 1; \
31+
uint bi = offset * (2 * ident + 2) - 1; \
32+
uint temp = storage[ai]; \
33+
storage[ai] = storage[bi]; \
34+
storage[bi] += temp; \
35+
} \
36+
} \
37+
barrier(); \
38+
} while (false)

impeller/renderer/BUILD.gn

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ if (impeller_enable_compute) {
2222
shaders = [
2323
"stroke.comp",
2424
"path_polyline.comp",
25+
"prefix_sum_test.comp",
2526
]
2627
}
2728

impeller/renderer/compute_unittests.cc

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "impeller/renderer/compute_command.h"
1919
#include "impeller/renderer/compute_pipeline_builder.h"
2020
#include "impeller/renderer/pipeline_library.h"
21+
#include "impeller/renderer/prefix_sum_test.comp.h"
2122

2223
namespace impeller {
2324
namespace testing {
@@ -103,6 +104,117 @@ TEST_P(ComputeTest, CanCreateComputePass) {
103104
latch.Wait();
104105
}
105106

107+
TEST_P(ComputeTest, CanComputePrefixSum) {
108+
using CS = PrefixSumTestComputeShader;
109+
auto context = GetContext();
110+
ASSERT_TRUE(context);
111+
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
112+
113+
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
114+
auto pipeline_desc =
115+
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
116+
ASSERT_TRUE(pipeline_desc.has_value());
117+
auto compute_pipeline =
118+
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
119+
ASSERT_TRUE(compute_pipeline);
120+
121+
auto cmd_buffer = context->CreateCommandBuffer();
122+
auto pass = cmd_buffer->CreateComputePass();
123+
ASSERT_TRUE(pass && pass->IsValid());
124+
125+
static constexpr size_t kCount = 5;
126+
127+
pass->SetGridSize(ISize(kCount, 1));
128+
pass->SetThreadGroupSize(ISize(kCount, 1));
129+
130+
ComputeCommand cmd;
131+
cmd.label = "Compute";
132+
cmd.pipeline = compute_pipeline;
133+
134+
CS::InputData<kCount> input_data;
135+
input_data.count = kCount;
136+
for (size_t i = 0; i < kCount; i++) {
137+
input_data.data[i] = 1 + i;
138+
}
139+
140+
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
141+
context, "Output Buffer");
142+
143+
CS::BindInputData(
144+
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
145+
CS::BindOutputData(cmd, output_buffer->AsBufferView());
146+
147+
ASSERT_TRUE(pass->AddCommand(std::move(cmd)));
148+
ASSERT_TRUE(pass->EncodeCommands());
149+
150+
fml::AutoResetWaitableEvent latch;
151+
ASSERT_TRUE(cmd_buffer->SubmitCommands(
152+
[&latch, output_buffer](CommandBuffer::Status status) {
153+
EXPECT_EQ(status, CommandBuffer::Status::kCompleted);
154+
155+
auto view = output_buffer->AsBufferView();
156+
EXPECT_EQ(view.range.length, sizeof(CS::OutputData<kCount>));
157+
158+
CS::OutputData<kCount>* output =
159+
reinterpret_cast<CS::OutputData<kCount>*>(view.contents);
160+
EXPECT_TRUE(output);
161+
162+
constexpr uint32_t expected[kCount] = {1, 3, 6, 10, 15};
163+
for (size_t i = 0; i < kCount; i++) {
164+
auto computed_sum = output->data[i];
165+
EXPECT_EQ(computed_sum, expected[i]);
166+
}
167+
latch.Signal();
168+
}));
169+
170+
latch.Wait();
171+
}
172+
173+
TEST_P(ComputeTest, CanComputePrefixSumLargeInteractive) {
174+
using CS = PrefixSumTestComputeShader;
175+
176+
auto context = GetContext();
177+
ASSERT_TRUE(context);
178+
ASSERT_TRUE(context->GetCapabilities()->SupportsCompute());
179+
180+
auto callback = [&](RenderPass& render_pass) -> bool {
181+
using SamplePipelineBuilder = ComputePipelineBuilder<CS>;
182+
auto pipeline_desc =
183+
SamplePipelineBuilder::MakeDefaultPipelineDescriptor(*context);
184+
auto compute_pipeline =
185+
context->GetPipelineLibrary()->GetPipeline(pipeline_desc).Get();
186+
187+
auto cmd_buffer = context->CreateCommandBuffer();
188+
auto pass = cmd_buffer->CreateComputePass();
189+
190+
static constexpr size_t kCount = 1023;
191+
192+
pass->SetGridSize(ISize(kCount, 1));
193+
194+
ComputeCommand cmd;
195+
cmd.label = "Compute";
196+
cmd.pipeline = compute_pipeline;
197+
198+
CS::InputData<kCount> input_data;
199+
input_data.count = kCount;
200+
for (size_t i = 0; i < kCount; i++) {
201+
input_data.data[i] = 1 + i;
202+
}
203+
204+
auto output_buffer = CreateHostVisibleDeviceBuffer<CS::OutputData<kCount>>(
205+
context, "Output Buffer");
206+
207+
CS::BindInputData(
208+
cmd, pass->GetTransientsBuffer().EmplaceStorageBuffer(input_data));
209+
CS::BindOutputData(cmd, output_buffer->AsBufferView());
210+
211+
pass->AddCommand(std::move(cmd));
212+
pass->EncodeCommands();
213+
return cmd_buffer->SubmitCommands();
214+
};
215+
ASSERT_TRUE(OpenPlaygroundHere(callback));
216+
}
217+
106218
TEST_P(ComputeTest, MultiStageInputAndOutput) {
107219
using CS1 = Stage1ComputeShader;
108220
using Stage1PipelineBuilder = ComputePipelineBuilder<CS1>;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
// Copyright 2013 The Flutter Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style license that can be
3+
// found in the LICENSE file.
4+
5+
layout(local_size_x = 512, local_size_y = 1) in;
6+
layout(std430) buffer;
7+
8+
#include <impeller/prefix_sum.glsl>
9+
10+
#define BLOCK_SIZE 1024
11+
12+
layout(binding = 0) readonly buffer InputData {
13+
uint count;
14+
uint data[];
15+
}
16+
input_data;
17+
18+
layout(binding = 1) writeonly buffer OutputData {
19+
uint data[];
20+
}
21+
output_data;
22+
23+
// Needs to be number of threads per threadgroup.
24+
shared uint memory[BLOCK_SIZE];
25+
26+
void main() {
27+
uint ident = gl_GlobalInvocationID.x;
28+
29+
uint value = 0;
30+
if (ident < input_data.count) {
31+
value = input_data.data[ident];
32+
}
33+
34+
memory[ident] = value;
35+
barrier();
36+
37+
ExclusivePrefixSum(ident, memory, BLOCK_SIZE);
38+
39+
if (ident < input_data.count) {
40+
// Convert exclusive to inclusive sum.
41+
output_data.data[ident] = memory[ident] + value;
42+
}
43+
}

impeller/tools/malioc.json

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14027,6 +14027,68 @@
1402714027
}
1402814028
}
1402914029
},
14030+
"flutter/impeller/renderer/prefix_sum_test.comp.vkspv": {
14031+
"Mali-G78": {
14032+
"core": "Mali-G78",
14033+
"filename": "flutter/impeller/renderer/prefix_sum_test.comp.vkspv",
14034+
"has_uniform_computation": true,
14035+
"type": "Compute",
14036+
"variants": {
14037+
"Main": {
14038+
"fp16_arithmetic": null,
14039+
"has_stack_spilling": false,
14040+
"performance": {
14041+
"longest_path_bound_pipelines": [
14042+
"load_store"
14043+
],
14044+
"longest_path_cycles": [
14045+
2.549999952316284,
14046+
0.0,
14047+
2.549999952316284,
14048+
1.0,
14049+
72.0,
14050+
0.0
14051+
],
14052+
"pipelines": [
14053+
"arith_total",
14054+
"arith_fma",
14055+
"arith_cvt",
14056+
"arith_sfu",
14057+
"load_store",
14058+
"texture"
14059+
],
14060+
"shortest_path_bound_pipelines": [
14061+
"load_store"
14062+
],
14063+
"shortest_path_cycles": [
14064+
0.949999988079071,
14065+
0.0,
14066+
0.949999988079071,
14067+
0.0,
14068+
1.0,
14069+
0.0
14070+
],
14071+
"total_bound_pipelines": [
14072+
"load_store"
14073+
],
14074+
"total_cycles": [
14075+
2.549999952316284,
14076+
0.0,
14077+
2.549999952316284,
14078+
1.0,
14079+
72.0,
14080+
0.0
14081+
]
14082+
},
14083+
"shared_storage_used": 4096,
14084+
"stack_spill_bytes": 0,
14085+
"thread_occupancy": 100,
14086+
"uniform_registers_used": 8,
14087+
"work_registers_used": 21
14088+
}
14089+
}
14090+
}
14091+
},
1403014092
"flutter/impeller/renderer/stroke.comp.vkspv": {
1403114093
"Mali-G78": {
1403214094
"core": "Mali-G78",

0 commit comments

Comments
 (0)