Skip to content

Commit 662e896

Browse files
committed
Move curve from constant mem to device ptr.
Tests pass (Windows, Release, Seatbelts=ON, 1038 Pass, 5 Disabled) Note, Environment cache is still in constant memory.
1 parent 14d4530 commit 662e896

File tree

8 files changed

+162
-139
lines changed

8 files changed

+162
-139
lines changed

include/flamegpu/exception/FLAMEGPUDeviceException_device.cuh

+8-8
Original file line numberDiff line numberDiff line change
@@ -85,16 +85,16 @@ class DeviceException {
8585
// Only the thread which first reported error gets to output
8686
if (hasError) {
8787
// Only output once
88-
if (buff[0]->format_string[0])
88+
if (buff[1]->format_string[0])
8989
return;
9090
// Copy the format string
9191
unsigned int eos = 0;
9292
for (eos = 0; eos < DeviceExceptionBuffer::FORMAT_BUFF_LEN; ++eos)
9393
if (format[eos] == '\0')
9494
break;
95-
memcpy(buff[0]->format_string, format, eos * sizeof(char));
95+
memcpy(buff[1]->format_string, format, eos * sizeof(char));
9696
// Process args
97-
subformat_recurse(buff[0], args...);
97+
subformat_recurse(buff[1], args...);
9898
}
9999
}
100100

@@ -139,14 +139,14 @@ class DeviceException {
139139
if (hasError) {
140140
// Copy file location
141141
const size_t file_len = strlen(file);
142-
memcpy(buff[0]->file_path, file, file_len);
142+
memcpy(buff[1]->file_path, file, file_len);
143143
// Copy line no
144-
buff[0]->line_no = line;
144+
buff[1]->line_no = line;
145145
// Copy block/thread indices
146146
const uint3 bid3 = blockIdx;
147-
memcpy(buff[0]->block_id, &bid3, sizeof(unsigned int) * 3);
147+
memcpy(buff[1]->block_id, &bid3, sizeof(unsigned int) * 3);
148148
const uint3 tid3 = threadIdx;
149-
memcpy(buff[0]->thread_id, &tid3, sizeof(unsigned int) * 3);
149+
memcpy(buff[1]->thread_id, &tid3, sizeof(unsigned int) * 3);
150150
}
151151
}
152152
/**
@@ -190,7 +190,7 @@ __device__ inline void DeviceException::subformat(DeviceExceptionBuffer *buff, c
190190
__device__ unsigned int DeviceException::getErrorCount() {
191191
extern __shared__ DeviceExceptionBuffer* buff[];
192192
// Are we the first exception
193-
return atomicInc(&buff[0]->error_count, UINT_MAX);
193+
return atomicInc(&buff[1]->error_count, UINT_MAX);
194194
}
195195
#endif
196196
#else

include/flamegpu/runtime/AgentFunction.cuh

+14-5
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@ typedef void(AgentFunctionWrapper)(
1818
exception::DeviceExceptionBuffer *error_buffer,
1919
#endif
2020
detail::curve::Curve::NamespaceHash instance_id_hash,
21+
#ifndef __CUDACC_RTC__
22+
const detail::curve::Curve::CurveTable *d_curve_table,
23+
#endif
2124
detail::curve::Curve::NamespaceHash agent_func_name_hash,
2225
detail::curve::Curve::NamespaceHash messagename_inp_hash,
2326
detail::curve::Curve::NamespaceHash messagename_outp_hash,
@@ -58,6 +61,9 @@ __global__ void agent_function_wrapper(
5861
exception::DeviceExceptionBuffer *error_buffer,
5962
#endif
6063
detail::curve::Curve::NamespaceHash instance_id_hash,
64+
#ifndef __CUDACC_RTC__
65+
const detail::curve::Curve::CurveTable* d_curve_table,
66+
#endif
6167
detail::curve::Curve::NamespaceHash agent_func_name_hash,
6268
detail::curve::Curve::NamespaceHash messagename_inp_hash,
6369
detail::curve::Curve::NamespaceHash messagename_outp_hash,
@@ -70,18 +76,21 @@ __global__ void agent_function_wrapper(
7076
unsigned int *scanFlag_agentDeath,
7177
unsigned int *scanFlag_messageOutput,
7278
unsigned int *scanFlag_agentOutput) {
73-
#if !defined(SEATBELTS) || SEATBELTS
74-
// We place this at the start of shared memory, so we can locate it anywhere in device code without a reference
75-
extern __shared__ exception::DeviceExceptionBuffer *buff[];
79+
// We place these at the start of shared memory, so we can locate it anywhere in device code without a reference
7680
if (threadIdx.x == 0) {
77-
buff[0] = error_buffer;
81+
extern __shared__ const void* sm[];
82+
#ifndef __CUDACC_RTC__
83+
sm[0] = d_curve_table;
84+
#endif
85+
#if !defined(SEATBELTS) || SEATBELTS
86+
sm[1] = error_buffer;
87+
#endif
7888
}
7989

8090
#if defined(__CUDACC__) // @todo - This should not be required. This template should only ever be processed by a CUDA compiler.
8191
// Sync the block after Thread 0 has written to shared.
8292
__syncthreads();
8393
#endif // __CUDACC__
84-
#endif
8594
// Must be terminated here, else AgentRandom has bounds issues inside DeviceAPI constructor
8695
if (DeviceAPI<MessageIn, MessageOut>::getThreadIndex() >= popNo)
8796
return;

include/flamegpu/runtime/AgentFunctionCondition.cuh

+19-9
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ typedef void(AgentFunctionConditionWrapper)(
1414
exception::DeviceExceptionBuffer *error_buffer,
1515
#endif
1616
detail::curve::Curve::NamespaceHash instance_id_hash,
17+
#ifndef __CUDACC_RTC__
18+
const detail::curve::Curve::CurveTable* d_curve_table,
19+
#endif
1720
detail::curve::Curve::NamespaceHash agent_func_name_hash,
1821
const unsigned int popNo,
1922
curandState *d_rng,
@@ -37,21 +40,28 @@ __global__ void agent_function_condition_wrapper(
3740
exception::DeviceExceptionBuffer *error_buffer,
3841
#endif
3942
detail::curve::Curve::NamespaceHash instance_id_hash,
43+
#ifndef __CUDACC_RTC__
44+
const detail::curve::Curve::CurveTable* d_curve_table,
45+
#endif
4046
detail::curve::Curve::NamespaceHash agent_func_name_hash,
4147
const unsigned int popNo,
4248
curandState *d_rng,
4349
unsigned int *scanFlag_conditionResult) {
44-
#if !defined(SEATBELTS) || SEATBELTS
45-
// We place this at the start of shared memory, so we can locate it anywhere in device code without a reference
46-
extern __shared__ exception::DeviceExceptionBuffer *shared_mem[];
50+
// We place these at the start of shared memory, so we can locate it anywhere in device code without a reference
4751
if (threadIdx.x == 0) {
48-
shared_mem[0] = error_buffer;
49-
}
50-
// @todo - this tempalte should onyl ever be seen by a cuda compiler.
51-
#if defined(__CUDACC__)
52-
__syncthreads();
53-
#endif
52+
extern __shared__ const void* sm[];
53+
#ifndef __CUDACC_RTC__
54+
sm[0] = d_curve_table;
5455
#endif
56+
#if !defined(SEATBELTS) || SEATBELTS
57+
sm[1] = error_buffer;
58+
#endif
59+
}
60+
61+
#if defined(__CUDACC__) // @todo - This should not be required. This template should only ever be processed by a CUDA compiler.
62+
// Sync the block after Thread 0 has written to shared.
63+
__syncthreads();
64+
#endif // __CUDACC__
5565
// Must be terminated here, else AgentRandom has bounds issues inside DeviceAPI constructor
5666
if (ReadOnlyDeviceAPI::getThreadIndex() >= popNo)
5767
return;

include/flamegpu/runtime/DeviceAPI.cuh

+8-2
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,12 @@ class ReadOnlyDeviceAPI {
4141
template<typename AgentFunctionCondition>
4242
friend __global__ void agent_function_condition_wrapper(
4343
#if !defined(SEATBELTS) || SEATBELTS
44-
exception::DeviceExceptionBuffer *error_buffer,
44+
exception::DeviceExceptionBuffer *,
4545
#endif
4646
detail::curve::Curve::NamespaceHash,
47+
#ifndef __CUDACC_RTC__
48+
const detail::curve::Curve::CurveTable *,
49+
#endif
4750
detail::curve::Curve::NamespaceHash,
4851
const unsigned int,
4952
curandState *,
@@ -153,9 +156,12 @@ class DeviceAPI {
153156
template<typename AgentFunction, typename _MessageIn, typename _MessageOut>
154157
friend __global__ void agent_function_wrapper(
155158
#if !defined(SEATBELTS) || SEATBELTS
156-
exception::DeviceExceptionBuffer *error_buffer,
159+
exception::DeviceExceptionBuffer *,
157160
#endif
158161
detail::curve::Curve::NamespaceHash,
162+
#ifndef __CUDACC_RTC__
163+
const detail::curve::Curve::CurveTable *,
164+
#endif
159165
detail::curve::Curve::NamespaceHash,
160166
detail::curve::Curve::NamespaceHash,
161167
detail::curve::Curve::NamespaceHash,

0 commit comments

Comments
 (0)