@@ -263,15 +263,27 @@ void _cl_kernel::SetKernelArg(
263263
264264cl_int _cl_kernel::bind_args ( gpgpu_ptx_sim_arg_list_t &arg_list )
265265{
266+ size_t offset = 0 ;
267+
266268 assert ( arg_list.empty () );
267269 unsigned k=0 ;
268270 std::map<unsigned , arg_info>::iterator i;
269271 for ( i = m_args.begin (); i!=m_args.end (); i++ ) {
270272 if ( i->first != k )
271273 return CL_INVALID_KERNEL_ARGS;
274+
272275 arg_info arg = i->second ;
273- gpgpu_ptx_sim_arg param ( arg.m_arg_value , arg.m_arg_size , 0 );
276+ const symbol *sym = m_kernel_impl->get_arg (i->first );
277+ const type_info_key &t = sym->type ()->get_key ();
278+
279+ int align = (t.get_alignment_spec () == -1 ) ? arg.m_arg_size : t.get_alignment_spec ();
280+ if ( offset % align )
281+ offset += (align - (offset % align));
282+
283+ gpgpu_ptx_sim_arg param ( arg.m_arg_value , arg.m_arg_size , offset );
274284 arg_list.push_front ( param );
285+
286+ offset += arg.m_arg_size ;
275287 k++;
276288 }
277289 return CL_SUCCESS;
@@ -950,6 +962,17 @@ clEnqueueNDRangeKernel(cl_command_queue command_queue,
950962 gpgpu_ptx_sim_memcpy_symbol ( " %_global_block_offset" , zeros, 3 * sizeof (int ), 0 , 1 , gpu );
951963 }
952964 kernel_info_t *grid = gpgpu_opencl_ptx_sim_init_grid (kernel->get_implementation (),params,GridDim,BlockDim,gpu);
965+
966+ // do dynamic PDOM analysis for performance simulation scenario
967+ std::string kname = grid->name ();
968+ function_info *kernel_func_info = grid->entry ();
969+ if (kernel_func_info->is_pdom_set ()) {
970+ printf (" GPGPU-Sim PTX: PDOM analysis already done for %s \n " , kname.c_str () );
971+ } else {
972+ printf (" GPGPU-Sim PTX: finding reconvergence points for \' %s\' ...\n " , kname.c_str () );
973+ kernel_func_info->do_pdom ();
974+ kernel_func_info->set_pdom ();
975+ }
953976 if ( g_ptx_sim_mode )
954977 gpgpu_opencl_ptx_sim_main_func ( grid );
955978 else
@@ -1255,6 +1278,35 @@ clGetProgramInfo(cl_program program,
12551278 return CL_SUCCESS;
12561279}
12571280
1281+ extern CL_API_ENTRY cl_int CL_API_CALL
1282+ clGetProgramBuildInfo (cl_program program,
1283+ cl_device_id device,
1284+ cl_program_build_info param_name,
1285+ size_t param_value_size,
1286+ void * param_value,
1287+ size_t * param_value_size_ret) CL_API_SUFFIX__VERSION_1_0
1288+ {
1289+ char *buf = (char *)param_value;
1290+
1291+ switch ( param_name ) {
1292+ case CL_PROGRAM_BUILD_STATUS:
1293+ CL_CASE ( cl_build_status, CL_BUILD_SUCCESS );
1294+ break ;
1295+ case CL_PROGRAM_BUILD_OPTIONS:
1296+ case CL_PROGRAM_BUILD_LOG:
1297+ CL_STRING_CASE ( " " );
1298+ break ;
1299+ case CL_PROGRAM_BINARY_TYPE:
1300+ CL_CASE ( cl_program_binary_type, CL_PROGRAM_BINARY_TYPE_EXECUTABLE );
1301+ break ;
1302+ default :
1303+ return CL_INVALID_VALUE;
1304+ break ;
1305+ }
1306+
1307+ return CL_SUCCESS;
1308+ }
1309+
12581310extern CL_API_ENTRY cl_int CL_API_CALL
12591311clEnqueueCopyBuffer (cl_command_queue command_queue,
12601312 cl_mem src_buffer,
0 commit comments