Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add write result file function for Nvidia GPU and AMD GPU. #586

Merged
merged 2 commits into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -734,7 +734,7 @@ local: $(L_APPS) likwid.lua
@echo "===> Setting Lua scripts to run from current directory"
@PWD=$(shell pwd)
@for APP in $(L_APPS); do \
sed -i -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" -e "s/<RELEASE>/$(RELEASE)/g" -e "s/<GITCOMMIT>/$(GITCOMMIT)/g" -e "s/<MINOR>/$(MINOR)/g" -e "s+$(PREFIX)/bin/likwid-lua+$(PWD)/ext/lua/lua+" -e "s+$(PREFIX)/share/lua/?.lua+$(PWD)/?.lua+" -e "s+$(PREFIX)/bin/likwid-pin+$(PWD)/likwid-pin+" -e "s+$(PREFIX)/bin/likwid-perfctr+$(PWD)/likwid-perfctr+" $$APP; \
sed -i -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" -e "s/<RELEASE>/$(RELEASE)/g" -e "s/<GITCOMMIT>/$(GITCOMMIT)/g" -e "s/<MINOR>/$(MINOR)/g" -e "s+$(PREFIX)/bin/likwid-lua+$(PWD)/ext/lua/lua+" -e "s+$(PREFIX)/share/lua/?.lua+$(PWD)/?.lua+" -e "s+$(PREFIX)/bin/likwid-pin+$(PWD)/likwid-pin+" -e "s+$(PREFIX)/bin/likwid-perfctr+$(PWD)/likwid-perfctr+" -e "s+$(PREFIX)/lib+$(PWD)+" $$APP; \
chmod +x $$APP; \
done
@sed -i -e "s/<VERSION>/$(VERSION)/g" -e "s/<DATE>/$(DATE)/g" -e "s/<RELEASE>/$(RELEASE)/g" -e "s+$(PREFIX)/lib+$(PWD)+g" -e "s+$(PREFIX)/share/likwid/perfgroups+$(PWD)/groups+g" -e "s/<GITCOMMIT>/$(GITCOMMIT)/g" -e "s/<MINOR>/$(MINOR)/g" likwid.lua;
Expand Down
2 changes: 2 additions & 0 deletions make/config_defines.mk
Original file line number Diff line number Diff line change
Expand Up @@ -294,10 +294,12 @@ endif

ifeq ($(strip $(NVIDIA_INTERFACE)),true)
DEFINES += -DLIKWID_WITH_NVMON
BUILDAPPDAEMON = true
endif

ifeq ($(strip $(ROCM_INTERFACE)),true)
DEFINES += -DLIKWID_WITH_ROCMON -D__HIP_PLATFORM_HCC__
BUILDAPPDAEMON = true
endif

ifeq ($(strip $(BUILDDAEMON)),true)
Expand Down
2 changes: 1 addition & 1 deletion src/access-daemon/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ CFLAGS += -std=c99 -fPIC -pie -fPIE -fstack-protector
ifeq ($(COMPILER),GCCX86)
CFLAGS += -m32
endif
CPPFLAGS := $(DEFINES) $(INCLUDES) -L$(PREFIX)/lib
CPPFLAGS := $(DEFINES) $(INCLUDES) -L../../lib

ifeq ($(COMPILER),GCCARMv8)
all:
Expand Down
4 changes: 2 additions & 2 deletions src/access-daemon/appDaemon.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ static int parse_gpustr(char* gpuStr, int* numGpus, int** gpuIds)
{
// Create bstring
bstring bGpuStr = bfromcstr(gpuStr);

int (*ownatoi)(const char*) = atoi;
// Parse list
struct bstrList* gpuTokens = bsplit(bGpuStr,',');
int tmpNumGpus = gpuTokens->qty;
Expand All @@ -129,7 +129,7 @@ static int parse_gpustr(char* gpuStr, int* numGpus, int** gpuIds)
// Parse ids to int
for (int i = 0; i < tmpNumGpus; i++)
{
tmpGpuIds[i] = atoi(bdata(gpuTokens->entry[i]));
tmpGpuIds[i] = ownatoi(bdata(gpuTokens->entry[i]));
}

// Copy data
Expand Down
10 changes: 5 additions & 5 deletions src/applications/likwid-perfctr.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1541,14 +1541,14 @@ if use_marker == true then
---------------------------
if nvSupported and #cuda_event_string_list > 0 then
if likwid.access(nvMarkerFile, "e") >= 0 then
results, metrics = likwid.getNvMarkerResults(nvMarkerFile, markergpulist, nan2value)
results, metrics = likwid.getMarkerResultsCuda(nvMarkerFile, gpulist_cuda, nan2value)
if not results then
print_stderr("Failure reading GPU Marker API result file.")
print_stderr("Failure reading Nv Marker API result file.")
elseif #results == 0 then
print_stderr("No regions could be found in GPU Marker API result file.")
print_stderr("No regions could be found in Nv Marker API result file.")
else
for r = 1, #results do
likwid.printGpuOutput(results[r], metrics[r], gpulist_cuda, r, print_stats)
likwid.printOutputCuda(results[r], metrics[r], gpulist_cuda, r, print_stats)
end
end
likwid.destroyNvMarkerFile()
Expand All @@ -1561,7 +1561,7 @@ if use_marker == true then
---------------------------
if rocmSupported and #rocm_event_string_list > 0 then
if likwid.access(rocmMarkerFile, "e") >= 0 then
results, metrics = likwid.getMarkerResultsRocm(rocmMarkerFile, markerrocmgpulist, nan2value)
results, metrics = likwid.getMarkerResultsRocm(rocmMarkerFile, gpulist_rocm, nan2value)
if not results then
print_stderr("Failure reading ROCM Marker API result file.")
elseif #results == 0 then
Expand Down
83 changes: 17 additions & 66 deletions src/applications/likwid.lua
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,17 @@ likwid.nvGetNameOfCounter = likwid_nvGetNameOfCounter
likwid.nvSupported = likwid_nvSupported
likwid.readNvMarkerFile = likwid_readNvMarkerFile
likwid.destroyNvMarkerFile = likwid_destroyNvMarkerFile
likwid.nvMarkerNumRegions = nvmon_getNumberOfRegions
likwid.nvMarkerRegionGroup = nvmon_getGroupOfRegion
likwid.nvMarkerRegionTag = nvmon_getTagOfRegion
likwid.nvMarkerRegionEvents = likwid_markerRegionEvents
likwid.nvMarkerRegionMetrics = likwid_markerRegionMetrics
likwid.nvMarkerRegionGpulist = likwid_markerRegionGpulist
likwid.nvMarkerRegionGpus = likwid_markerRegionGpus
likwid.nvMarkerRegionTime = likwid_markerRegionTime
likwid.nvMarkerRegionCount = likwid_markerRegionCount
likwid.nvMarkerRegionResult = likwid_markerRegionResult
likwid.nvMarkerRegionMetric = likwid_markerRegionMetric
likwid.nvMarkerNumRegions = likwid_nvMarkerNumRegions
likwid.nvMarkerRegionGroup = likwid_nvMarkerRegionGroup
likwid.nvMarkerRegionTag = likwid_nvMarkerRegionTag
likwid.nvMarkerRegionEvents = likwid_nvMarkerRegionEvents
likwid.nvMarkerRegionMetrics = likwid_nvMarkerRegionMetrics
likwid.nvMarkerRegionGpulist = likwid_nvMarkerRegionGpulist
likwid.nvMarkerRegionGpus = likwid_nvMarkerRegionGpus
likwid.nvMarkerRegionTime = likwid_nvMarkerRegionTime
likwid.nvMarkerRegionCount = likwid_nvMarkerRegionCount
likwid.nvMarkerRegionResult = likwid_nvMarkerRegionResult
likwid.nvMarkerRegionMetric = likwid_nvMarkerRegionMetric
likwid.nvInit = likwid_nvInit
likwid.nvAddEventSet = likwid_nvAddEventSet
likwid.nvFinalize = likwid_nvFinalize
Expand Down Expand Up @@ -1366,59 +1366,10 @@ end
likwid.getArch = llikwid_getArch


local function getGpuMarkerResults(filename, gpulist, nan2value)
local gputopo = likwid.getGpuTopology()
local ret = likwid.readNvMarkerFile(filename)
if ret < 0 then
return nil, nil
elseif ret == 0 then
return {}, {}
end
if not nan2value then
nan2value = '-'
end
results = {}
metrics = {}
for i=1, likwid.nvMarkerNumRegions() do
local regionName = likwid.nvMarkerRegionTag(i)
local groupID = likwid.nvMarkerRegionGroup(i)
local regionGPUs = likwid.nvMarkerRegionGpus(i)
results[i] = {}
metrics[i] = {}
results[i][groupID] = {}
metrics[i][groupID] = {}
for k=1, likwid.nvMarkerRegionEvents(i) do
local eventName = likwid.nvGetNameOfEvent(groupID, k)
local counterName = likwid.nvGetNameOfCounter(groupID, k)
results[i][groupID][k] = {}
for j=1, regionGPUs do
results[i][groupID][k][j] = likwid.nvMarkerRegionResult(i,k,j)
if results[i][groupID][k][j] ~= results[i][groupID][k][j] then
results[i][groupID][k][j] = nan2value
end
end
end
if likwid.nvMarkerRegionMetrics(groupID) > 0 then
for k=1, likwid.nvMarkerRegionMetrics(groupID) do
local metricName = likwid.getNameOfMetric(groupID, k)
metrics[i][likwid.nvMarkerRegionGroup(i)][k] = {}
for j=1, regionGPUs do
metrics[i][groupID][k][j] = likwid.nvMarkerRegionMetric(i,k,j)
if metrics[i][groupID][k][j] ~= metrics[i][groupID][k][j] then
metrics[i][groupID][k][j] = nan2value
end
end
end
end
end
return results, metrics
end

likwid.getGpuMarkerResults = getGpuMarkerResults

local function printGpuOutput(results, metrics, gpulist, region, stats)
local function printOutputCuda(results, metrics, gpulist, region, stats)
local maxLineFields = 0
local gputopo = likwid.getGpuTopology()
local gputopo = likwid.getCudaTopology()
local regionName = likwid.nvMarkerRegionTag(region)
local regionGPUs = likwid.nvMarkerRegionGpus(region)
local cur_gpulist = gpulist
Expand Down Expand Up @@ -1571,10 +1522,10 @@ local function printGpuOutput(results, metrics, gpulist, region, stats)
end
end

likwid.printGpuOutput = printGpuOutput
likwid.printOutputCuda = printOutputCuda

local function getNvMarkerResults(filename, gpulist, nan2value)
local gputopo = likwid.getGpuTopology()
local function getMarkerResultsCuda(filename, gpulist, nan2value)
local gputopo = likwid.getCudaTopology()
local ret = likwid.readNvMarkerFile(filename)
if ret < 0 then
return nil, nil
Expand Down Expand Up @@ -1621,7 +1572,7 @@ local function getNvMarkerResults(filename, gpulist, nan2value)
return results, metrics
end

likwid.getNvMarkerResults = getNvMarkerResults
likwid.getMarkerResultsCuda = getMarkerResultsCuda

local function getMarkerResultsRocm(filename, gpulist, nan2value)
local gputopo = likwid.getGpuTopology_rocm()
Expand Down
49 changes: 31 additions & 18 deletions src/includes/likwid-marker.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,39 +102,43 @@ Shortcut for likwid_markerClose() if compiled with -DLIKWID_PERFMON. Otherwise n
*/
/*!
\def LIKWID_NVMARKER_INIT
Shortcut for likwid_gpuMarkerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
Shortcut for nvmon_markerInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_THREADINIT
Shortcut for likwid_gpuMarkerThreadInit() if compiled with -DLIKWID_PERFMON. Otherwise no operation is performed
No operation is performed, this macro exists only to be similar as CPU MarkerAPI
*/
/*!
\def LIKWID_NVMARKER_REGISTER(regionTag)
Shortcut for likwid_gpuMarkerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerRegisterRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_START(regionTag)
Shortcut for likwid_gpuMarkerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerStartRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_STOP(regionTag)
Shortcut for likwid_gpuMarkerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerStopRegion() with \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count)
Shortcut for likwid_gpuMarkerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerGetRegion() for \a regionTag if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_SWITCH
Shortcut for likwid_gpuMarkerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerNextGroup() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_RESET(regionTag)
Shortcut for likwid_gpuMarkerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerResetRegion() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_CLOSE
Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
Shortcut for nvmon_markerClose() if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/*!
\def LIKWID_NVMARKER_WRITE_FILE
Shortcut for nvmon_markerWriteFile() with \a filename if compiled with -DLIKWID_NVMON. Otherwise no operation is performed
*/
/** @}*/

Expand All @@ -143,16 +147,18 @@ Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise
#define LIKWID_WITH_NVMON
#endif
#include <likwid.h>
#define LIKWID_NVMARKER_INIT likwid_gpuMarkerInit()
#define LIKWID_NVMARKER_THREADINIT likwid_gpuMarkerThreadInit()
#define LIKWID_NVMARKER_SWITCH likwid_gpuMarkerNextGroup()
#define LIKWID_NVMARKER_REGISTER(regionTag) likwid_gpuMarkerRegisterRegion(regionTag)
#define LIKWID_NVMARKER_START(regionTag) likwid_gpuMarkerStartRegion(regionTag)
#define LIKWID_NVMARKER_STOP(regionTag) likwid_gpuMarkerStopRegion(regionTag)
#define LIKWID_NVMARKER_CLOSE likwid_gpuMarkerClose()
#define LIKWID_NVMARKER_RESET(regionTag) likwid_gpuMarkerResetRegion(regionTag)
#define LIKWID_NVMARKER_INIT nvmon_markerInit()
#define LIKWID_NVMARKER_THREADINIT
#define LIKWID_NVMARKER_SWITCH nvmon_markerNextGroup()
#define LIKWID_NVMARKER_REGISTER(regionTag) nvmon_markerRegisterRegion(regionTag)
#define LIKWID_NVMARKER_START(regionTag) nvmon_markerStartRegion(regionTag)
#define LIKWID_NVMARKER_STOP(regionTag) nvmon_markerStopRegion(regionTag)
#define LIKWID_NVMARKER_CLOSE nvmon_markerClose()
#define LIKWID_NVMARKER_RESET(regionTag) nvmon_markerResetRegion(regionTag)
#define LIKWID_NVMARKER_GET(regionTag, ngpus, nevents, events, time, count) \
likwid_gpuMarkerGetRegion(regionTag, ngpus, nevents, events, time, count)
nvmon_markerGetRegion(regionTag, ngpus, nevents, events, time, count)
#define LIKWID_NVMARKER_WRITE_FILE(markerfile) \
nvmon_markerWriteFile(markerfile)
#else /* LIKWID_NVMON */
#define LIKWID_NVMARKER_INIT
#define LIKWID_NVMARKER_THREADINIT
Expand All @@ -163,6 +169,7 @@ Shortcut for likwid_gpuMarkerClose() if compiled with -DLIKWID_NVMON. Otherwise
#define LIKWID_NVMARKER_CLOSE
#define LIKWID_NVMARKER_GET(regionTag, nevents, events, time, count)
#define LIKWID_NVMARKER_RESET(regionTag)
#define LIKWID_NVMARKER_WRITE_FILE(markerfile)
#endif /* LIKWID_NVMON */


Expand Down Expand Up @@ -205,6 +212,10 @@ Shortcut for rocmon_markerResetRegion() if compiled with -DLIKWID_ROCMON. Otherw
\def ROCMON_MARKER_CLOSE
Shortcut for rocmon_markerClose() if compiled with -DLIKWID_ROCMON. Otherwise no operation is performed
*/
/*!
\def ROCMON_MARKER_WRITE_FILE
Shortcut for rocmon_markerWriteFile() with \a filename if compiled with -DLIKWID_ROCMON. Otherwise no operation is performed
*/
/** @}*/

#ifdef LIKWID_ROCMON
Expand All @@ -221,6 +232,7 @@ Shortcut for rocmon_markerClose() if compiled with -DLIKWID_ROCMON. Otherwise no
#define ROCMON_MARKER_CLOSE rocmon_markerClose()
#define ROCMON_MARKER_RESET(regionTag) rocmon_markerResetRegion(regionTag)
#define ROCMON_MARKER_GET(regionTag, ngpus, nevents, events, time, count) rocmon_markerGetRegion(regionTag, ngpus, nevents, events, time, count)
#define ROCMON_MARKER_WRITE_FILE(filename) rocmon_markerWriteFile(filename)
#else /* LIKWID_ROCMON */
#define ROCMON_MARKER_INIT
#define ROCMON_MARKER_THREADINIT
Expand All @@ -231,6 +243,7 @@ Shortcut for rocmon_markerClose() if compiled with -DLIKWID_ROCMON. Otherwise no
#define ROCMON_MARKER_CLOSE
#define ROCMON_MARKER_GET(regionTag, nevents, events, time, count)
#define ROCMON_MARKER_RESET(regionTag)
#define ROCMON_MARKER_WRITE_FILE(filename)
#endif /* LIKWID_ROCMON */


Expand Down
25 changes: 25 additions & 0 deletions src/includes/likwid.h
Original file line number Diff line number Diff line change
Expand Up @@ -2318,6 +2318,13 @@ extern void nvmon_markerGetRegion(const char *regionTag, int *nr_gpus,
int *nr_events, double **events,
double *time, int *count)
__attribute__((visibility("default")));
/*! \brief Write the output file of the NvMarker API
@param [in] markerfile Filename for NvMarker API results
@return 0 or negative error number
*/
extern int nvmon_markerWriteFile(const char* markerfile)
__attribute__((visibility("default")));


/*! \brief Read the output file of the NvMarker API
@param [in] filename Filename with NvMarker API results
Expand Down Expand Up @@ -3090,6 +3097,24 @@ Reset the values of all configured counters and timers.
int rocmon_markerResetRegion(const char *regionTag)
__attribute__((visibility("default")));

/*! \brief Write measurement data to file

Write current values to file
@param markerfile [in] Filename for writing
@return Error code of write operation
*/
int rocmon_markerWriteFile(const char *markerfile)
__attribute__((visibility("default")));

/*! \brief Select next group to measure

Must be called in parallel region of the application to switch group on every
CPU.
*/
extern void rocmon_markerNextGroup(void)
__attribute__((visibility("default")));


/*! \brief Read the output file of the RocmonMarker API
@param [in] filename Filename with RocmonMarker API results
@return 0 or negative error number
Expand Down
Loading
Loading