-
Notifications
You must be signed in to change notification settings - Fork 0
/
lfbLL.cpp
266 lines (224 loc) · 7.69 KB
/
lfbLL.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/* Copyright 2011 Pyarelal Knowles, under GNU LGPL (see LICENCE.txt) */
#include <assert.h>
#include <stdio.h>
#include <string>
#include <vector>
#include <map>
#include <set>
#include <pyarlib/gpu.h>
#include "lfbLL.h"
#include "extraglenums.h"
EMBED(lfbLLGLSL, shaders/lfbLL.glsl);
//this is stupid, but it works
static TextureBuffer* justOneInt = NULL;
LFB_LL::LFB_LL()
{
//all LFBBase shaders are embedded. when the first LFBBase is created, the embedded data is given to the shader compiler
static bool loadEmbed = false;
if (!loadEmbed)
{
Shader::include("lfbLL.glsl", RESOURCE(lfbLLGLSL));
loadEmbed = true;
}
alloc = new GPUBuffer(GL_ATOMIC_COUNTER_BUFFER);
headPtrs = new TextureBuffer(GL_R32UI);
nextPtrs = new TextureBuffer(GL_R32UI);
counts = new TextureBuffer(GL_R32UI);
data = new TextureBuffer(lfbDataType);
lfbNeedsCounts = false;
}
LFB_LL::~LFB_LL()
{
alloc->release();
headPtrs->release();
nextPtrs->release();
data->release();
counts->release();
delete alloc;
delete headPtrs;
delete nextPtrs;
delete data;
delete counts;
}
bool LFB_LL::_resize(vec2i size)
{
alloc->resize(sizeof(unsigned int));
headPtrs->resize(sizeof(unsigned int) * totalPixels);
if (data->setFormat(lfbDataType))
resizePool(allocFragments); //need to resize as data type, and hence probably size has changed
memory["Counter"] = alloc->size();
memory["Head Ptrs"] = headPtrs->size();
memory["Counts"] = counts->size();
return true;
}
bool LFB_LL::resizePool(size_t allocs)
{
totalFragments = allocs - 1;
allocs = mymax(allocs, (size_t)32);
//only resize if necessary
//comparing using ->size and lfbDataStride allows for changes in lfbDataStride
if (allocs * lfbDataStride < data->size() * LFB_UNDERALLOCATE || data->size() < allocs * lfbDataStride)
{
//allocate a little more than needed
allocFragments = (int)(allocs * LFB_OVERALLOCATE);
nextPtrs->resize(allocFragments * sizeof(unsigned int));
data->resize(allocFragments * lfbDataStride);
memory["Next Ptrs"] = nextPtrs->size();
memory["Data"] = data->size();
if (!data->object) //debugging
printf("Error resizing pool data %zux%i=%.2fMB\n", allocFragments, lfbDataStride, allocFragments * lfbDataStride / 1024.0 / 1024.0);
return true;
}
return false;
}
void LFB_LL::initBuffers()
{
//index zero is reserved as NULL pointer, so we start the counter at one
//yes, this means allocating all of 4 extra bytes, but it means zeroBuffer() can be reused
static const int one = 1;
CHECKERROR;
alloc->buffer(&one, sizeof(one));
CHECKERROR;
//zero the head pointers, or rather, set each pointer to NULL
zeroBuffer(headPtrs);
if (computeCounts || lfbNeedsCounts)
{
counts->resize(sizeof(unsigned int) * totalPixels);
zeroBuffer(counts);
}
else
{
if (*counts)
counts->release();
}
}
void LFB_LL::setDefines(Shader& program)
{
LFBBase::setDefines(program);
program.define("LFB_METHOD_H", "lfbLL.glsl");
program.define("LFB_REQUIRE_COUNTS", computeCounts || lfbNeedsCounts);
}
bool LFB_LL::setUniforms(Shader& program, std::string suffix)
{
assert(allocFragments < ((size_t)1<<31)-1); //current GLSL code only supports 32 bit signed int image unit addressing
if (!alloc->object || !headPtrs->object)
return false;
if (state != PRE_INIT && (!nextPtrs->object || !data->object) && allocFragments > 0)
return false;
std::string headName = "headPtrs" + suffix;
std::string nextName = "nextPtrs" + suffix;
std::string countsName = "counts" + suffix;
std::string dataName = "data" + suffix;
glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *alloc);
std::string infoStructName = "lfbInfo" + suffix;
if (size2D.x > 0)
program.set(infoStructName + ".size", size2D);
//glUniform2i(glGetUniformLocation(program, ().c_str()), size2D.x, size2D.y);
program.set(infoStructName + ".fragAlloc", (int)allocFragments);
//glUniform1i(glGetUniformLocation(program, (infoStructName + ".fragAlloc").c_str()), allocFragments);
//writing, depending on the state, determines READ_ONLY, WRITE_ONLY and READ_WRITE TextureBuffer data
bool writing = state!=DRAWING;
//int headIndex = program.unique("image", headName);
//int nextIndex = program.unique("image", nextName);
//int dataIndex = program.unique("image", dataName);
//printf("%s: %i %i %i\n", suffix.c_str(), headIndex, nextIndex, dataIndex);
//headPtrs->bind(headIndex, headName.c_str(), program, true, writing);
int exposeAs = bindless ? Shader::BINDLESS : Shader::IMAGE_UNIT;
program.set(exposeAs, headName, *headPtrs);
if (nextPtrs->object)
program.set(exposeAs, nextName, *nextPtrs);
//nextPtrs->bind(nextIndex, nextName.c_str(), program, !writing, writing);
if (data->object)
program.set(exposeAs, dataName, *data);
//data->bind(dataIndex, dataName.c_str(), program, !writing, true);
if (*counts)
program.set(exposeAs, countsName, *counts);
return true;
}
bool LFB_LL::begin()
{
//mark the start of the frame for profiler averaging
//if (profile) profile->begin();
//parent begin - may trigger ::resize()
LFBBase::begin();
//zero alloc counter and head pointers
glMemoryBarrierEXT(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT_EXT);
glMemoryBarrierEXT(GL_ATOMIC_COUNTER_BARRIER_BIT_EXT);
initBuffers();
glMemoryBarrierEXT(GL_BUFFER_UPDATE_BARRIER_BIT_EXT);
if (profile) profile->time("Zero");
return true; //full render, including colour
}
bool LFB_LL::count()
{
glMemoryBarrierEXT(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT_EXT);
glMemoryBarrierEXT(GL_ATOMIC_COUNTER_BARRIER_BIT_EXT);
if (profile) profile->time("Render");
LFBBase::count();
//read the alloc atomic counter to find the number of fragments (or pages)
//if this number is greater than that allocated, a re-render is needed
#if 0
//in some strange circumstances (found during the grid tests) this is really slow
unsigned int* d = (unsigned int*)alloc->map(true, false);
unsigned int numAllocs = d[0];
alloc->unmap();
#else
//this is stupid but works, somehow avoiding the sync/blocking issue
if (!justOneInt)
{
justOneInt = new TextureBuffer(GL_R32UI);
justOneInt->resize(alloc->size());
}
alloc->copy(justOneInt);
unsigned int numAllocs = 0;
unsigned int* d = (unsigned int*)justOneInt->map(true, false);
if (d)
{
numAllocs = d[0];
justOneInt->unmap();
}
else
printf("Error: Unable to map alloc counter for linked list LFB\n");
#endif
if (profile) profile->time("Read Total");
//FIXME: if pool is reducing in size a re-render is still done
if (resizePool(numAllocs))
{
printf("Linked List Re-Render\n");
initBuffers();
return true; //please do a second pass - we didn't allocate enough
}
else
return false; //render is done, no second pass needed
}
size_t LFB_LL::end()
{
LFBBase::end();
glMemoryBarrierEXT(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT_EXT);
glMemoryBarrierEXT(GL_ATOMIC_COUNTER_BARRIER_BIT_EXT);
//NOTE: profile will average this time, so for it to become significant many re-renders must be done
if (profile) profile->time("Re-Render");
return totalFragments; //take one because the element zero is "null"
}
std::string LFB_LL::getName()
{
return "LinkedListLFB";
}
bool LFB_LL::getDepthHistogram(std::vector<unsigned int>& histogram)
{
if (!counts->size())
return LFBBase::getDepthHistogram(histogram);
histogram.clear();
unsigned int* l = (unsigned int*)counts->map(true, false);
assert(l);
for (size_t i = 0; i < getTotalPixels(); ++i)
{
assert(l[i] < 1024);
if (histogram.size() <= l[i])
histogram.resize(l[i]+1, 0);
histogram[l[i]]++;
}
bool ok = counts->unmap();
assert(ok);
return true;
}