diff --git a/examples/tools/ompt/README.md b/examples/tools/ompt/README.md new file mode 100644 index 000000000..2901723a2 --- /dev/null +++ b/examples/tools/ompt/README.md @@ -0,0 +1,54 @@ +OMPT target support: Examples to demonstrate how a tool would use the OMPT target APIs +======================================================================================= + +The examples simulate how a tool is expected to use OMPT target +support. The tool would register callbacks and call OMPT runtime entry +points to start and stop device tracing, if required. The tool would +have an OpenMP thread call these runtime entry points to control +device tracing. When certain events occur, the OpenMP runtime would +invoke the event callbacks so that the tool can establish the event +context. If device tracing has been requested, the OpenMP runtime +would collect and manage trace records in buffers. When a buffer fills +up or if an OpenMP thread requests explicit flushing of trace records, +an OpenMP runtime helper thread would invoke a buffer-completion +callback. The buffer-completion callback is implemented by the tool +and would typically traverse the trace records returned as part of the +callback. Once the trace records are returned, they can be correlated +to the context established earlier through the event callbacks. + +Here are the steps: +(1) The tool has to define a function called ompt_start_tool with +C-linkage and the appropriate signature as defined by the OpenMP +spec. This function provides 2 function pointers as part of the +returned object, one for an initialization function and the other for +a finalization function. + +(2) The tool has to define the initialization and the finalization +functions referred to above. The initialization function is invoked by +the OpenMP runtime with an input lookup parameter. Typically, the +initialization function would use the lookup parameter to obtain a +handle to the function ompt_set_callback that is implemented by the +OpenMP runtime. Using this handle, the tool can then register +callbacks. In our examples for OMPT target, some common callbacks +registered include device initialization, data transfer operations, +and target submit. + +(3) The device initialize callback, implemented by the tool, is +invoked by the OpenMP device plugin runtime during device +initialization with a lookup parameter. This callback would look up +entry points (such as ompt_start_trace) for device tracing so that the +tool can control the regions that should be traced. + +(4) The ompt_start_trace entry point expects 2 function pointers, one +for an allocation function that will be invoked by the OpenMP runtime +for allocating space for trace record buffers. The other one is a +buffer-completion callback function that will be invoked by an OpenMP +runtime helper thread for returning trace records to the tool. The +tool is expected to use the entry point, ompt_get_record_ompt, to +inspect a trace record at a given cursor and the entry point, +ompt_advance_buffer_cursor, to traverse the returned trace records. + +(5) If device tracing is desired, calls to entry points, +ompt_set_trace_ompt, ompt_start_trace, ompt_flush_trace, and +ompt_stop_trace will be injected into the OpenMP program by the tool +to control the type and region of tracing. diff --git a/examples/tools/ompt/veccopy-ompt-target-tracing/Makefile b/examples/tools/ompt/veccopy-ompt-target-tracing/Makefile new file mode 100644 index 000000000..0328bd33e --- /dev/null +++ b/examples/tools/ompt/veccopy-ompt-target-tracing/Makefile @@ -0,0 +1,129 @@ +#----------------------------------------------------------------------- +# +# Makefile: Cuda clang demo Makefile for both amdgcn and nvptx targets. +# amdgcn GPU targets begin with "gfx". nvptx targets begin +# with sm_. Example: To build and run on k4000 do this: +# +# export AOMP_GPU=sm_30 +# make run +# +# Run "make help" to see other options for this Makefile + +TESTNAME = veccopy-ompt-target-tracing +TESTSRC = veccopy-ompt-target-tracing.c + +UNAMEP = $(shell uname -m) +AOMP_CPUTARGET = $(UNAMEP)-pc-linux-gnu +ifeq ($(UNAMEP),ppc64le) + AOMP_CPUTARGET = ppc64le-linux-gnu +endif + +# --- Standard Makefile check for AOMP installation --- +ifeq ("$(wildcard $(AOMP))","") + ifneq ($(AOMP),) + $(warning AOMP not found at $(AOMP)) + endif + AOMP = $(HOME)/rocm/aomp + ifeq ("$(wildcard $(AOMP))","") + $(warning AOMP not found at $(AOMP)) + AOMP = /usr/lib/aomp + ifeq ("$(wildcard $(AOMP))","") + $(warning AOMP not found at $(AOMP)) + $(error Please install AOMP or correctly set env-var AOMP) + endif + endif +endif +# --- End Standard Makefile check for AOMP installation --- +INSTALLED_GPU = $(shell $(AOMP)/bin/mygpu -d gfx900)# Default AOMP_GPU is gfx900 which is vega +AOMP_GPU ?= $(INSTALLED_GPU) +CC = $(AOMP)/bin/clang + +ifeq (sm_,$(findstring sm_,$(AOMP_GPU))) + AOMP_GPUTARGET = nvptx64-nvidia-cuda +else + AOMP_GPUTARGET = amdgcn-amd-amdhsa +endif + +# Sorry, clang openmp requires these complex options +CFLAGS = -O3 -target $(AOMP_CPUTARGET) -fopenmp -fopenmp-targets=$(AOMP_GPUTARGET) -Xopenmp-target=$(AOMP_GPUTARGET) -march=$(AOMP_GPU) + +ifeq ($(OFFLOAD_DEBUG),1) + $(info DEBUG Mode ON) + CCENV = env LIBRARY_PATH=$(AOMP)/lib-debug + RUNENV = LIBOMPTARGET_DEBUG=1 +endif + +ifeq ($(VERBOSE),1) + $(info Compilation VERBOSE Mode ON) + CFLAGS += -v +endif + +ifeq ($(TEMPS),1) + $(info Compilation and linking save-temp Mode ON) + CFLAGS += -save-temps +endif + +ifeq (sm_,$(findstring sm_,$(AOMP_GPU))) + CUDA ?= /usr/local/cuda + LFLAGS += -L$(CUDA)/targets/$(UNAMEP)-linux/lib -lcudart +endif + +CFLAGS += $(EXTRA_CFLAGS) + +# ----- Demo compile and link in one step, no object code saved +$(TESTNAME): $(TESTSRC) + $(CCENV) $(CC) $(CFLAGS) $(LFLAGS) $^ -o $@ + +run: $(TESTNAME) + $(RUNENV) ./$(TESTNAME) + +# ---- Demo compile and link in two steps, object saved +$(TESTNAME).o: $(TESTSRC) + $(CCENV) $(CC) -c $(CFLAGS) $^ -o $@ + +obin: $(TESTNAME).o + $(CCENV) $(CC) $(CFLAGS) $(LFLAGS) $^ -o $@ + +run_obin: obin + $(RUNENV) ./obin + +help: + @echo + @echo "Source[s]: $(TESTSRC)" + @echo "Application binary: $(TESTNAME)" + @echo "Target GPU: $(AOMP_GPU)" + @echo "Target triple: $(AOMP_GPUTARGET)" + @echo "AOMP compiler: $(CC)" + @echo "Compile flags: $(CFLAGS)" +ifeq (sm_,$(findstring sm_,$(AOMP_GPU))) + @echo "CUDA installation: $(CUDA)" +endif + @echo + @echo "This Makefile supports these targets:" + @echo + @echo " make // Builds $(TESTNAME) " + @echo " make run // Executes $(TESTNAME) " + @echo + @echo " make $(TESTNAME).o // build object file " + @echo " make obin // Link object file to build binary " + @echo " make run_obin // Execute obin " + @echo + @echo " make clean" + @echo " make help" + @echo + @echo "Environment variables used by this Makefile:" + @echo " AOMP_GPU= Target GPU, e.g sm_30, default=gfx900. To build for" + @echo " Nvidia GPUs, set AOMP_GPU=sm_60 or appropriate sm_" + @echo " AOMP= AOMP install dir, default=/usr/lib/aomp" + @echo " EXTRA_CFLAGS= extra arguments for compiler" + @echo " OFFLOAD_DEBUG=n if n=1, compile and run in Debug mode" + @echo " VERBOSE=n if n=1, add verbose output" + @echo " TEMPS=1 do not delete intermediate files" +ifeq (sm_,$(findstring sm_,$(AOMP_GPU))) + @echo " CUDA= CUDA install dir, default=/usr/local/cuda" +endif + @echo + +# Cleanup anything this makefile can create +clean: + rm -f $(TESTNAME) obin *.i *.ii *.bc *.lk a.out-* *.ll *.s *.o *.cubin diff --git a/examples/tools/ompt/veccopy-ompt-target-tracing/README b/examples/tools/ompt/veccopy-ompt-target-tracing/README new file mode 100644 index 000000000..260ed868b --- /dev/null +++ b/examples/tools/ompt/veccopy-ompt-target-tracing/README @@ -0,0 +1,10 @@ +An illustration of how a tool would use OMPT target support for a +simple vector copy OpenMP program. + +To compile and run: +make run + +For help: +make help + +Example output is in example_run.log. diff --git a/examples/tools/ompt/veccopy-ompt-target-tracing/callbacks.h b/examples/tools/ompt/veccopy-ompt-target-tracing/callbacks.h new file mode 100644 index 000000000..26366fb4e --- /dev/null +++ b/examples/tools/ompt/veccopy-ompt-target-tracing/callbacks.h @@ -0,0 +1,305 @@ +#include + +// Available at $INSTALL_DIR/include/omp-tools.h +#include + +// Use helper macro from llvm repo, llvm-project/openmp/runtime/test/ompt/callback.h +#define register_ompt_callback_t(name, type) \ + do { \ + type f_##name = &on_##name; \ + if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \ + printf("0: Could not register callback '" #name "'\n"); \ + } while (0) + +#define register_ompt_callback(name) register_ompt_callback_t(name, name##_t) + +// Tool related code below. The tool is expected to provide the +// following definitions, some of them optionally. + +#define OMPT_BUFFER_REQUEST_SIZE 256 + +// Utilities + +// Simple print routine that this example uses while traversing +// through the trace records returned as part of the buffer-completion callback +static void print_record_ompt(ompt_record_ompt_t *rec) { + if (rec == NULL) return; + + printf("rec=%p type=%d time=%lu thread_id=%lu target_id=%lu\n", + rec, rec->type, rec->time, rec->thread_id, rec->target_id); + + switch (rec->type) { + case ompt_callback_target: + case ompt_callback_target_emi: + { + ompt_record_target_t target_rec = rec->record.target; + printf("\tRecord Target: kind=%d endpoint=%d device=%d task_id=%lu target_id=%lu codeptr=%p\n", + target_rec.kind, target_rec.endpoint, target_rec.device_num, + target_rec.task_id, target_rec.target_id, target_rec.codeptr_ra); + break; + } + case ompt_callback_target_data_op: + case ompt_callback_target_data_op_emi: + { + ompt_record_target_data_op_t target_data_op_rec = rec->record.target_data_op; + printf("\t Record DataOp: host_op_id=%lu optype=%d src_addr=%p src_device=%d " + "dest_addr=%p dest_device=%d bytes=%lu end_time=%lu duration=%lu ns codeptr=%p\n", + target_data_op_rec.host_op_id, target_data_op_rec.optype, + target_data_op_rec.src_addr, target_data_op_rec.src_device_num, + target_data_op_rec.dest_addr, target_data_op_rec.dest_device_num, + target_data_op_rec.bytes, target_data_op_rec.end_time, + target_data_op_rec.end_time - rec->time, + target_data_op_rec.codeptr_ra); + break; + } + case ompt_callback_target_submit: + case ompt_callback_target_submit_emi: + { + ompt_record_target_kernel_t target_kernel_rec = rec->record.target_kernel; + printf("\t Record Submit: host_op_id=%lu requested_num_teams=%u granted_num_teams=%u " + "end_time=%lu duration=%lu ns\n", + target_kernel_rec.host_op_id, target_kernel_rec.requested_num_teams, + target_kernel_rec.granted_num_teams, target_kernel_rec.end_time, + target_kernel_rec.end_time - rec->time); + break; + } + default: + assert(0); + break; + } +} + +// Deallocation routine that will be called by the tool when a buffer +// previously allocated by the buffer-request callback is no longer required. +// The deallocation method must match the allocation routine. Here +// free is used for corresponding malloc +static void delete_buffer_ompt(ompt_buffer_t *buffer) { + free(buffer); + printf("Deallocated %p\n", buffer); +} + +// OMPT entry point handles +static ompt_set_callback_t ompt_set_callback = 0; +static ompt_set_trace_ompt_t ompt_set_trace_ompt = 0; +static ompt_start_trace_t ompt_start_trace = 0; +static ompt_flush_trace_t ompt_flush_trace = 0; +static ompt_stop_trace_t ompt_stop_trace = 0; +static ompt_get_record_ompt_t ompt_get_record_ompt = 0; +static ompt_advance_buffer_cursor_t ompt_advance_buffer_cursor = 0; + +// OMPT callbacks + +// Trace record callbacks +// Allocation routine +static void on_ompt_callback_buffer_request ( + int device_num, + ompt_buffer_t **buffer, + size_t *bytes +) { + *bytes = OMPT_BUFFER_REQUEST_SIZE; + *buffer = malloc(*bytes); + printf("Allocated %lu bytes at %p in buffer request callback\n", *bytes, *buffer); +} + +// This function is called by an OpenMP runtime helper thread for +// returning trace records from a buffer. +// Note: This callback must handle a null begin cursor. Currently, +// ompt_get_record_ompt, print_record_ompt, and +// ompt_advance_buffer_cursor handle a null cursor. +static void on_ompt_callback_buffer_complete ( + int device_num, + ompt_buffer_t *buffer, + size_t bytes, /* bytes returned in this callback */ + ompt_buffer_cursor_t begin, + int buffer_owned +) { + printf("Executing buffer complete callback: %d %p %lu %p %d\n", + device_num, buffer, bytes, (void*)begin, buffer_owned); + + int status = 1; + ompt_buffer_cursor_t current = begin; + while (status) { + ompt_record_ompt_t *rec = ompt_get_record_ompt(buffer, current); + print_record_ompt(rec); + status = ompt_advance_buffer_cursor(NULL, /* TODO device */ + buffer, + bytes, + current, + ¤t); + } + if (buffer_owned) delete_buffer_ompt(buffer); +} + +// Utility routine to enable the desired tracing modes +static ompt_set_result_t set_trace_ompt() { + if (!ompt_set_trace_ompt) return ompt_set_error; + + ompt_set_trace_ompt(0, 1, ompt_callback_target); + ompt_set_trace_ompt(0, 1, ompt_callback_target_data_op); + ompt_set_trace_ompt(0, 1, ompt_callback_target_submit); + + return ompt_set_always; +} + +static int start_trace() { + if (!ompt_start_trace) return 0; + return ompt_start_trace(0, &on_ompt_callback_buffer_request, + &on_ompt_callback_buffer_complete); +} + +static int flush_trace() { + if (!ompt_flush_trace) return 0; + return ompt_flush_trace(0); +} + +static int stop_trace() { + if (!ompt_stop_trace) return 0; + return ompt_stop_trace(0); +} + +// Synchronous callbacks +// The device init callback must obtain the handles to the tracing +// entry points, if required. +static void on_ompt_callback_device_initialize +( + int device_num, + const char *type, + ompt_device_t *device, + ompt_function_lookup_t lookup, + const char *documentation + ) { + printf("Init: device_num=%d type=%s device=%p lookup=%p doc=%p\n", + device_num, type, device, lookup, documentation); + if (!lookup) { + printf("Trace collection disabled on device %d\n", device_num); + return; + } + + ompt_set_trace_ompt = (ompt_set_trace_ompt_t) lookup("ompt_set_trace_ompt"); + ompt_start_trace = (ompt_start_trace_t) lookup("ompt_start_trace"); + ompt_flush_trace = (ompt_flush_trace_t) lookup("ompt_flush_trace"); + ompt_stop_trace = (ompt_stop_trace_t) lookup("ompt_stop_trace"); + ompt_get_record_ompt = (ompt_get_record_ompt_t) lookup("ompt_get_record_ompt"); + ompt_advance_buffer_cursor = (ompt_advance_buffer_cursor_t) lookup("ompt_advance_buffer_cursor"); + + set_trace_ompt(); + + // In many scenarios, this will be a good place to start the + // trace. If start_trace is called from the main program before this + // callback is dispatched, the start_trace handle will be null. This + // is because this device_init callback is invoked during the first + // target construct implementation. + + start_trace(); +} + +// Called at device finalize +static void on_ompt_callback_device_finalize +( + int device_num + ) { + printf("Callback Fini: device_num=%d\n", device_num); +} + +// Called at device load time +static void on_ompt_callback_device_load + ( + int device_num, + const char *filename, + int64_t offset_in_file, + void *vma_in_file, + size_t bytes, + void *host_addr, + void *device_addr, + uint64_t module_id + ) { + printf("Load: device_num:%d filename:%s host_adddr:%p device_addr:%p bytes:%lu\n", + device_num, filename, host_addr, device_addr, bytes); +} + +// Data transfer +static void on_ompt_callback_target_data_op + ( + ompt_id_t target_id, + ompt_id_t host_op_id, + ompt_target_data_op_t optype, + void *src_addr, + int src_device_num, + void *dest_addr, + int dest_device_num, + size_t bytes, + const void *codeptr_ra + ) { + assert(codeptr_ra != 0); + // Both src and dest must not be null + assert(src_addr != 0 || dest_addr != 0); + printf(" Callback DataOp: target_id=%lu host_op_id=%lu optype=%d src=%p src_device_num=%d " + "dest=%p dest_device_num=%d bytes=%lu code=%p\n", + target_id, host_op_id, optype, src_addr, src_device_num, + dest_addr, dest_device_num, bytes, codeptr_ra); +} + +// Target region +static void on_ompt_callback_target + ( + ompt_target_t kind, + ompt_scope_endpoint_t endpoint, + int device_num, + ompt_data_t *task_data, + ompt_id_t target_id, + const void *codeptr_ra + ) { + assert(codeptr_ra != 0); + printf("Callback Target: target_id=%lu kind=%d endpoint=%d device_num=%d code=%p\n", + target_id, kind, endpoint, device_num, codeptr_ra); +} + +// Target launch +static void on_ompt_callback_target_submit + ( + ompt_id_t target_id, + ompt_id_t host_op_id, + unsigned int requested_num_teams + ) { + printf(" Callback Submit: target_id=%lu host_op_id=%lu req_num_teams=%d\n", + target_id, host_op_id, requested_num_teams); +} + +// Init functions +int ompt_initialize( + ompt_function_lookup_t lookup, + int initial_device_num, + ompt_data_t *tool_data) +{ + ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); + + if (!ompt_set_callback) return 0; // failed + + register_ompt_callback(ompt_callback_device_initialize); + register_ompt_callback(ompt_callback_device_finalize); + register_ompt_callback(ompt_callback_device_load); + register_ompt_callback(ompt_callback_target_data_op); + register_ompt_callback(ompt_callback_target); + register_ompt_callback(ompt_callback_target_submit); + + return 1; //success +} + +void ompt_finalize(ompt_data_t *tool_data) +{ +} + +// ompt_start_tool must be defined for a tool to use OMPT +#ifdef __cplusplus +extern "C" { +#endif +ompt_start_tool_result_t *ompt_start_tool( + unsigned int omp_version, + const char *runtime_version) +{ + static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0}; + return &ompt_start_tool_result; +} +#ifdef __cplusplus +} +#endif diff --git a/examples/tools/ompt/veccopy-ompt-target-tracing/example_run.log b/examples/tools/ompt/veccopy-ompt-target-tracing/example_run.log new file mode 100644 index 000000000..92b9d62d9 --- /dev/null +++ b/examples/tools/ompt/veccopy-ompt-target-tracing/example_run.log @@ -0,0 +1,113 @@ +Init: device_num=0 type=AMD gfx906 device=0x1b7c1b0 lookup=0x14e1227cf430 doc=(nil) +Load: device_num:0 filename:(null) host_adddr:0x2012b0 device_addr:(nil) bytes:14504 +Callback Target: target_id=1 kind=1 endpoint=1 device_num=0 code=0x206900 +Allocated 256 bytes at 0x1bb4a90 in buffer request callback + Callback DataOp: target_id=1 host_op_id=2 optype=1 src=0x7fffde8f2f40 src_device_num=0 dest=(nil) dest_device_num=0 bytes=400000 code=0x14e12cbf9264 + Callback DataOp: target_id=1 host_op_id=3 optype=2 src=0x7fffde8f2f40 src_device_num=0 dest=0x14e121404000 dest_device_num=0 bytes=400000 code=0x14e12cbf9142 +Allocated 256 bytes at 0x1bda3c0 in buffer request callback + Callback DataOp: target_id=1 host_op_id=4 optype=1 src=0x7fffde8914c0 src_device_num=0 dest=(nil) dest_device_num=0 bytes=400000 code=0x14e12cbf9264 + Callback DataOp: target_id=1 host_op_id=5 optype=2 src=0x7fffde8914c0 src_device_num=0 dest=0x14e121466000 dest_device_num=0 bytes=400000 code=0x14e12cbf9142 +Executing buffer complete callback: 0 0x1bb4a90 208 0x1bb4a90 0 +rec=0x1bb4a90 type=8 time=1642116169956984774 thread_id=0 target_id=1 + Record Target: kind=1 endpoint=1 device=0 task_id=0 target_id=1 codeptr=0x206900 +rec=0x1bb4af8 type=9 time=1642116169956994413 thread_id=0 target_id=1 + Record DataOp: host_op_id=2 optype=1 src_addr=0x14e121404000 src_device=0 dest_addr=0x7fffde8f2f40 dest_device=0 bytes=400000 end_time=1642116169956997278 duration=2865 ns codeptr=0x14e12cbf9264 +Executing buffer complete callback: 0 0x1bb4a90 0 (nil) 1 +Deallocated 0x1bb4a90 +Allocated 256 bytes at 0x1bda7c0 in buffer request callback + Callback Submit: target_id=1 host_op_id=6 req_num_teams=1 +Executing buffer complete callback: 0 0x1bda3c0 208 0x1bda3c0 0 +rec=0x1bda3c0 type=9 time=1642116169956998631 thread_id=0 target_id=1 + Record DataOp: host_op_id=3 optype=2 src_addr=0x7fffde8f2f40 src_device=0 dest_addr=0x14e121404000 dest_device=0 bytes=400000 end_time=1642116169958280777 duration=1282146 ns codeptr=0x14e12cbf9142 +rec=0x1bda428 type=9 time=1642116169958283763 thread_id=0 target_id=1 + Record DataOp: host_op_id=4 optype=1 src_addr=0x14e121466000 src_device=0 dest_addr=0x7fffde8914c0 dest_device=0 bytes=400000 end_time=1642116169958285566 duration=1803 ns codeptr=0x14e12cbf9264 +Executing buffer complete callback: 0 0x1bda3c0 0 (nil) 1 +Deallocated 0x1bda3c0 + Callback DataOp: target_id=1 host_op_id=7 optype=3 src=0x14e121466000 src_device_num=0 dest=0x7fffde8914c0 dest_device_num=0 bytes=400000 code=0x14e12cc1641a +Allocated 256 bytes at 0x1bdac10 in buffer request callback + Callback DataOp: target_id=1 host_op_id=8 optype=3 src=0x14e121404000 src_device_num=0 dest=0x7fffde8f2f40 dest_device_num=0 bytes=400000 code=0x14e12cc1641a +Executing buffer complete callback: 0 0x1bda7c0 208 0x1bda7c0 0 +rec=0x1bda7c0 type=9 time=1642116169958287389 thread_id=0 target_id=1 + Record DataOp: host_op_id=5 optype=2 src_addr=0x7fffde8914c0 src_device=0 dest_addr=0x14e121466000 dest_device=0 bytes=400000 end_time=1642116169958678419 duration=391030 ns codeptr=0x14e12cbf9142 +rec=0x1bda828 type=10 time=1642116169958684641 thread_id=0 target_id=1 + Record Submit: host_op_id=6 requested_num_teams=1 granted_num_teams=1 end_time=1642116169958943992 duration=259351 ns +Executing buffer complete callback: 0 0x1bda7c0 0 (nil) 1 +Deallocated 0x1bda7c0 + Callback DataOp: target_id=1 host_op_id=9 optype=4 src=0x14e121466000 src_device_num=0 dest=(nil) dest_device_num=0 bytes=0 code=0x14e12cbfafe0 +Allocated 256 bytes at 0x1bdae50 in buffer request callback + Callback DataOp: target_id=1 host_op_id=10 optype=4 src=0x14e121404000 src_device_num=0 dest=(nil) dest_device_num=0 bytes=0 code=0x14e12cbfafe0 +Allocated 256 bytes at 0x1bdb0a0 in buffer request callback +Callback Target: target_id=1 kind=1 endpoint=2 device_num=0 code=0x206900 +Executing buffer complete callback: 0 0x1bdac10 208 0x1bdac10 0 +rec=0x1bdac10 type=9 time=1642116169958946827 thread_id=0 target_id=1 + Record DataOp: host_op_id=7 optype=3 src_addr=0x14e121466000 src_device=0 dest_addr=0x7fffde8914c0 dest_device=0 bytes=400000 end_time=1642116169959299916 duration=353089 ns codeptr=0x14e12cc1641a +Executing buffer complete callback: 0 0x1bdb0a0 104 0x1bdb0a0 0 +rec=0x1bdb0a0 type=8 time=1642116169959662812 thread_id=0 target_id=1 + Record Target: kind=1 endpoint=2 device=0 task_id=0 target_id=1 codeptr=0x206900 +rec=0x1bdac78 type=9 time=1642116169959302000 thread_id=0 target_id=1 + Record DataOp: host_op_id=8 optype=3 src_addr=0x14e121404000 src_device=0 dest_addr=0x7fffde8f2f40 dest_device=0 bytes=400000 end_time=1642116169959647053 duration=345053 ns codeptr=0x14e12cc1641a +Executing buffer complete callback: 0 0x1bdae50 208 0x1bdae50 0 +rec=0x1bdae50 type=9 time=1642116169959650148 thread_id=0 target_id=1 + Record DataOp: host_op_id=9 optype=4 src_addr=0x14e121466000 src_device=0 dest_addr=(nil) dest_device=0 bytes=0 end_time=1642116169959658645 duration=8497 ns codeptr=0x14e12cbfafe0 +rec=0x1bdaeb8 type=9 time=1642116169959660448 thread_id=0 target_id=1 + Record DataOp: host_op_id=10 optype=4 src_addr=0x14e121404000 src_device=0 dest_addr=(nil) dest_device=0 bytes=0 end_time=1642116169959661320 duration=872 ns codeptr=0x14e12cbfafe0 +Executing buffer complete callback: 0 0x1bdae50 0 (nil) 1 +Deallocated 0x1bdae50 +Executing buffer complete callback: 0 0x1bdac10 0 (nil) 1 +Deallocated 0x1bdac10 +Callback Target: target_id=11 kind=1 endpoint=1 device_num=0 code=0x206a42 + Callback DataOp: target_id=11 host_op_id=12 optype=1 src=0x7fffde8f2f40 src_device_num=0 dest=(nil) dest_device_num=0 bytes=400000 code=0x14e12cbf9264 +Allocated 256 bytes at 0x1bda3c0 in buffer request callback + Callback DataOp: target_id=11 host_op_id=13 optype=2 src=0x7fffde8f2f40 src_device_num=0 dest=0x14e121404000 dest_device_num=0 bytes=400000 code=0x14e12cbf9142 +Executing buffer complete callback: 0 0x1bdb0a0 104 0x1bdb108 0 +rec=0x1bdb108 type=8 time=1642116169959999580 thread_id=0 target_id=11 + Record Target: kind=1 endpoint=1 device=0 task_id=0 target_id=11 codeptr=0x206a42 +Executing buffer complete callback: 0 0x1bdb0a0 0 (nil) 1 +Deallocated 0x1bdb0a0 + Callback DataOp: target_id=11 host_op_id=14 optype=1 src=0x7fffde8914c0 src_device_num=0 dest=(nil) dest_device_num=0 bytes=400000 code=0x14e12cbf9264 +Allocated 256 bytes at 0x1bdac10 in buffer request callback + Callback DataOp: target_id=11 host_op_id=15 optype=2 src=0x7fffde8914c0 src_device_num=0 dest=0x14e121466000 dest_device_num=0 bytes=400000 code=0x14e12cbf9142 +Executing buffer complete callback: 0 0x1bda3c0 208 0x1bda3c0 0 +rec=0x1bda3c0 type=9 time=1642116169960003487 thread_id=0 target_id=11 + Record DataOp: host_op_id=12 optype=1 src_addr=0x14e121404000 src_device=0 dest_addr=0x7fffde8f2f40 dest_device=0 bytes=400000 end_time=1642116169960016231 duration=12744 ns codeptr=0x14e12cbf9264 +rec=0x1bda428 type=9 time=1642116169960018335 thread_id=0 target_id=11 + Record DataOp: host_op_id=13 optype=2 src_addr=0x7fffde8f2f40 src_device=0 dest_addr=0x14e121404000 dest_device=0 bytes=400000 end_time=1642116169960375962 duration=357627 ns codeptr=0x14e12cbf9142 +Executing buffer complete callback: 0 0x1bda3c0 0 (nil) 1 +Deallocated 0x1bda3c0 + Callback Submit: target_id=11 host_op_id=16 req_num_teams=0 +Allocated 256 bytes at 0x1bda7c0 in buffer request callback + Callback DataOp: target_id=11 host_op_id=17 optype=3 src=0x14e121466000 src_device_num=0 dest=0x7fffde8914c0 dest_device_num=0 bytes=400000 code=0x14e12cc1641a +Executing buffer complete callback: 0 0x1bdac10 208 0x1bdac10 0 +rec=0x1bdac10 type=9 time=1642116169960378567 thread_id=0 target_id=11 + Record DataOp: host_op_id=14 optype=1 src_addr=0x14e121466000 src_device=0 dest_addr=0x7fffde8914c0 dest_device=0 bytes=400000 end_time=1642116169960384909 duration=6342 ns codeptr=0x14e12cbf9264 +rec=0x1bdac78 type=9 time=1642116169960386793 thread_id=0 target_id=11 + Record DataOp: host_op_id=15 optype=2 src_addr=0x7fffde8914c0 src_device=0 dest_addr=0x14e121466000 dest_device=0 bytes=400000 end_time=1642116169960725143 duration=338350 ns codeptr=0x14e12cbf9142 +Executing buffer complete callback: 0 0x1bdac10 0 (nil) 1 +Deallocated 0x1bdac10 + Callback DataOp: target_id=11 host_op_id=18 optype=3 src=0x14e121404000 src_device_num=0 dest=0x7fffde8f2f40 dest_device_num=0 bytes=400000 code=0x14e12cc1641a +Allocated 256 bytes at 0x1bb4a90 in buffer request callback + Callback DataOp: target_id=11 host_op_id=19 optype=4 src=0x14e121466000 src_device_num=0 dest=(nil) dest_device_num=0 bytes=0 code=0x14e12cbfafe0 + Callback DataOp: target_id=11 host_op_id=20 optype=4 src=0x14e121404000 src_device_num=0 dest=(nil) dest_device_num=0 bytes=0 code=0x14e12cbfafe0 +Allocated 256 bytes at 0x1bdae50 in buffer request callback +Callback Target: target_id=11 kind=1 endpoint=2 device_num=0 code=0x206a42 +Executing buffer complete callback: 0 0x1bda7c0 208 0x1bda7c0 0 +rec=0x1bda7c0 type=10 time=1642116169960728950 thread_id=0 target_id=11 + Record Submit: host_op_id=16 requested_num_teams=0 granted_num_teams=240 end_time=1642116169960762193 duration=33243 ns +rec=0x1bda828 type=9 time=1642116169960764307 thread_id=0 target_id=11 + Record DataOp: host_op_id=17 optype=3 src_addr=0x14e121466000 src_device=0 dest_addr=0x7fffde8914c0 dest_device=0 bytes=400000 end_time=1642116169961102026 duration=337719 ns codeptr=0x14e12cc1641a +Executing buffer complete callback: 0 0x1bda7c0 0 (nil) 1 +Deallocated 0x1bda7c0 +Executing buffer complete callback: 0 0x1bb4a90 208 0x1bb4a90 0 +rec=0x1bb4a90 type=9 time=1642116169961113187 thread_id=0 target_id=11 + Record DataOp: host_op_id=18 optype=3 src_addr=0x14e121404000 src_device=0 dest_addr=0x7fffde8f2f40 dest_device=0 bytes=400000 end_time=1642116169961464352 duration=351165 ns codeptr=0x14e12cc1641a +rec=0x1bb4af8 type=9 time=1642116169961466776 thread_id=0 target_id=11 + Record DataOp: host_op_id=19 optype=4 src_addr=0x14e121466000 src_device=0 dest_addr=(nil) dest_device=0 bytes=0 end_time=1642116169961468550 duration=1774 ns codeptr=0x14e12cbfafe0 +Executing buffer complete callback: 0 0x1bb4a90 0 (nil) 1 +Deallocated 0x1bb4a90 +Executing buffer complete callback: 0 0x1bdae50 208 0x1bdae50 0 +rec=0x1bdae50 type=9 time=1642116169961470363 thread_id=0 target_id=11 + Record DataOp: host_op_id=20 optype=4 src_addr=0x14e121404000 src_device=0 dest_addr=(nil) dest_device=0 bytes=0 end_time=1642116169961475924 duration=5561 ns codeptr=0x14e12cbfafe0 +rec=0x1bdaeb8 type=8 time=1642116169961477016 thread_id=0 target_id=11 + Record Target: kind=1 endpoint=2 device=0 task_id=0 target_id=11 codeptr=0x206a42 +Success +Callback Fini: device_num=0 diff --git a/examples/tools/ompt/veccopy-ompt-target-tracing/veccopy-ompt-target-tracing.c b/examples/tools/ompt/veccopy-ompt-target-tracing/veccopy-ompt-target-tracing.c new file mode 100644 index 000000000..52e1b5f9f --- /dev/null +++ b/examples/tools/ompt/veccopy-ompt-target-tracing/veccopy-ompt-target-tracing.c @@ -0,0 +1,57 @@ +#include +#include +#include + +#include "callbacks.h" + +// Calls to start/stop/flush_trace to be injected by the tool + +int main() +{ + int N = 100000; + + int a[N]; + int b[N]; + + int i; + + for (i=0; i