Skip to content

Commit 0215b37

Browse files
authored
Merge pull request #139 from RSpliet/dev
Assortment of OpenCL fixes
2 parents 0898436 + 55a05c1 commit 0215b37

File tree

1 file changed

+53
-1
lines changed

1 file changed

+53
-1
lines changed

libopencl/opencl_runtime_api.cc

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,27 @@ void _cl_kernel::SetKernelArg(
263263

264264
cl_int _cl_kernel::bind_args( gpgpu_ptx_sim_arg_list_t &arg_list )
265265
{
266+
size_t offset = 0;
267+
266268
assert( arg_list.empty() );
267269
unsigned k=0;
268270
std::map<unsigned, arg_info>::iterator i;
269271
for( i = m_args.begin(); i!=m_args.end(); i++ ) {
270272
if( i->first != k )
271273
return CL_INVALID_KERNEL_ARGS;
274+
272275
arg_info arg = i->second;
273-
gpgpu_ptx_sim_arg param( arg.m_arg_value, arg.m_arg_size, 0);
276+
const symbol *sym = m_kernel_impl->get_arg(i->first);
277+
const type_info_key &t = sym->type()->get_key();
278+
279+
int align = (t.get_alignment_spec() == -1) ? arg.m_arg_size : t.get_alignment_spec();
280+
if( offset % align )
281+
offset += (align - (offset % align));
282+
283+
gpgpu_ptx_sim_arg param( arg.m_arg_value, arg.m_arg_size, offset );
274284
arg_list.push_front( param );
285+
286+
offset += arg.m_arg_size;
275287
k++;
276288
}
277289
return CL_SUCCESS;
@@ -950,6 +962,17 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
950962
gpgpu_ptx_sim_memcpy_symbol( "%_global_block_offset", zeros, 3 * sizeof(int), 0, 1, gpu );
951963
}
952964
kernel_info_t *grid = gpgpu_opencl_ptx_sim_init_grid(kernel->get_implementation(),params,GridDim,BlockDim,gpu);
965+
966+
//do dynamic PDOM analysis for performance simulation scenario
967+
std::string kname = grid->name();
968+
function_info *kernel_func_info = grid->entry();
969+
if (kernel_func_info->is_pdom_set()) {
970+
printf("GPGPU-Sim PTX: PDOM analysis already done for %s \n", kname.c_str() );
971+
} else {
972+
printf("GPGPU-Sim PTX: finding reconvergence points for \'%s\'...\n", kname.c_str() );
973+
kernel_func_info->do_pdom();
974+
kernel_func_info->set_pdom();
975+
}
953976
if ( g_ptx_sim_mode )
954977
gpgpu_opencl_ptx_sim_main_func( grid );
955978
else
@@ -1255,6 +1278,35 @@ clGetProgramInfo(cl_program program,
12551278
return CL_SUCCESS;
12561279
}
12571280

1281+
extern CL_API_ENTRY cl_int CL_API_CALL
1282+
clGetProgramBuildInfo (cl_program program,
1283+
cl_device_id device,
1284+
cl_program_build_info param_name,
1285+
size_t param_value_size,
1286+
void * param_value,
1287+
size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
1288+
{
1289+
char *buf = (char*)param_value;
1290+
1291+
switch( param_name ) {
1292+
case CL_PROGRAM_BUILD_STATUS:
1293+
CL_CASE( cl_build_status, CL_BUILD_SUCCESS );
1294+
break;
1295+
case CL_PROGRAM_BUILD_OPTIONS:
1296+
case CL_PROGRAM_BUILD_LOG:
1297+
CL_STRING_CASE( "" );
1298+
break;
1299+
case CL_PROGRAM_BINARY_TYPE:
1300+
CL_CASE( cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_EXECUTABLE );
1301+
break;
1302+
default:
1303+
return CL_INVALID_VALUE;
1304+
break;
1305+
}
1306+
1307+
return CL_SUCCESS;
1308+
}
1309+
12581310
extern CL_API_ENTRY cl_int CL_API_CALL
12591311
clEnqueueCopyBuffer(cl_command_queue command_queue,
12601312
cl_mem src_buffer,

0 commit comments

Comments
 (0)