From 93777642ee439a34bc59f7e8684c2e68bf9c7c01 Mon Sep 17 00:00:00 2001 From: xzl Date: Mon, 26 Aug 2024 13:26:49 +0800 Subject: [PATCH] feat: update rocminfo to 5.7.1 ROCm Application for Reporting System Info Issue: https://github.com/orgs/linuxdeepin/discussions/10383 Log: update repo --- CMakeLists.txt | 245 +++++ License.txt | 38 + README.md | 45 +- cmake_modules/utils.cmake | 162 +++ cmake_modules/version_util.sh | 40 + debian/changelog | 117 +- debian/compat | 1 - debian/control | 38 +- debian/copyright | 87 +- debian/gbp.conf | 4 + debian/manpages | 2 + debian/not-installed | 1 + debian/patches/cmakefile-fixes.patch | 25 + debian/patches/cross.patch | 31 + debian/patches/fix-escape-sequence.patch | 74 ++ debian/patches/series | 3 + debian/rocm_agent_enumerator.1 | 23 + debian/rocminfo.1 | 9 + debian/rocminfo.install | 2 + debian/rules | 11 +- debian/salsa-ci.yml | 7 + debian/tests/control | 3 + debian/tests/enumerator | 3 + debian/tests/rocminfo | 9 + debian/upstream/metadata | 5 + debian/watch | 6 + rocm_agent_enumerator | 263 +++++ rocminfo.cc | 1234 ++++++++++++++++++++++ 28 files changed, 2451 insertions(+), 37 deletions(-) create mode 100755 CMakeLists.txt create mode 100644 License.txt create mode 100755 cmake_modules/utils.cmake create mode 100755 cmake_modules/version_util.sh delete mode 100644 debian/compat create mode 100644 debian/gbp.conf create mode 100644 debian/manpages create mode 100644 debian/not-installed create mode 100644 debian/patches/cmakefile-fixes.patch create mode 100644 debian/patches/cross.patch create mode 100644 debian/patches/fix-escape-sequence.patch create mode 100644 debian/patches/series create mode 100644 debian/rocm_agent_enumerator.1 create mode 100644 debian/rocminfo.1 create mode 100644 debian/rocminfo.install create mode 100644 debian/salsa-ci.yml create mode 100644 debian/tests/control create mode 100644 debian/tests/enumerator create mode 100644 debian/tests/rocminfo create mode 100644 debian/upstream/metadata create mode 100644 debian/watch create mode 100755 rocm_agent_enumerator create mode 100755 rocminfo.cc diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100755 index 0000000..ce13531 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,245 @@ +# +# GCC 4.8 or higher compiler required. +# +# Required Defines on cmake command line +# +# 1) Set location of ROCR header files (required) +# +# ROCM_DIR="Root for RocM install" +# +# 2) Set ROCRTST_BLD_TYPE to either "Debug" or "Release". +# If not set, the default value is "Debug" is bound. +# +# ROCRTST_BLD_TYPE=Debug or ROCRTST_BLD_TYPE=Release +# +# 3) Set ROCRTST_BLD_BITS to either "32" or "64" +# If not set, the default value of "64" is bound. +# +# ROCRTST_BLD_BITS=32 or ROCRTST_BLD_BITS=64 +# +# Building rocminfo +# +# 1) Create build folder e.g. "rocminfo/build" - any name will do +# 2) Cd into build folder +# 3) Run cmake, passing in the above defines, as needed/required: +# "cmake -DROCM_DIR= .." +# 4) Run "make" +# +# Upon a successful build, the executable "rocminfo" will be in the +# build directory. +# +# Currently support for Windows platform is not present +# + +# +# Minimum version of cmake required +# +cmake_minimum_required(VERSION 3.6.3) + +set(ROCMINFO_EXE "rocminfo") +set(PROJECT_NAME ${ROCMINFO_EXE}) +project (${PROJECT_NAME}) + +include ( GNUInstallDirs ) +if(WIN32) + message("This sample is not supported on Windows platform") + return() +endif() + +## Set default module path if not already set +if(NOT DEFINED CMAKE_MODULE_PATH) + set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules/") +endif() +## Include common cmake modules +include(utils) + +# +# Process input variables +# + +find_package(hsa-runtime64 1.0 REQUIRED ) + +string(TOLOWER "${ROCRTST_BLD_TYPE}" tmp) +if("${tmp}" STREQUAL release) + set(BUILD_TYPE "Release") + set(ISDEBUG 0) +else() + set(BUILD_TYPE "Debug") + set(ISDEBUG 1) +endif() + +# The following default version values should be updated as appropriate for +# ABI breaks (update MAJOR and MINOR), and ABI/API additions (update MINOR). +# Until ABI stabilizes VERSION_MAJOR will be 0. This should be over-ridden +# by git tags (through "git describe") when they are present. +set(PKG_VERSION_MAJOR 1) +set(PKG_VERSION_MINOR 0) +set(PKG_VERSION_PATCH 0) +set(PKG_VERSION_NUM_COMMIT 0) + +################# Determine the library version ######################### +## Setup the package version based on git tags. +set(PKG_VERSION_GIT_TAG_PREFIX "rocminfo_pkg_ver") + +find_program (GIT NAMES git) + +get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) +# VERSION_* variables should be set by get_version_from_tag +message("Package version: ${PKG_VERSION_STR}") + +# +# Print out the build configuration being used: +# +# Build Src directory +# Build Binary directory +# Build Type: Debug Vs Release, 32 Vs 64 +# Compiler Version, etc +# +message("") +message("Build Configuration:") +message("-----------BuildType: " ${BUILD_TYPE}) +message("------------Compiler: " ${CMAKE_CXX_COMPILER}) +message("-------------Version: " ${CMAKE_CXX_COMPILER_VERSION}) +message("--------Proj Src Dir: " ${PROJECT_SOURCE_DIR}) +message("--------Proj Bld Dir: " ${PROJECT_BINARY_DIR}) +message("--------Proj Lib Dir: " ${PROJECT_BINARY_DIR}/lib) +message("--------Proj Exe Dir: " ${PROJECT_BINARY_DIR}/bin) +message("") + + +# +# Set the build type based on user input +# +set(CMAKE_BUILD_TYPE ${BUILD_TYPE}) +# +# Flag to enable / disable verbose output. +# +SET( CMAKE_VERBOSE_MAKEFILE on ) +# +# Compiler pre-processor definitions. +# +# Define MACRO "DEBUG" if build type is "Debug" +if(${BUILD_TYPE} STREQUAL "Debug") +add_definitions(-DDEBUG) +endif() + +add_definitions(-DLITTLEENDIAN_CPU=1) + +# +# Linux Compiler options +# +set(ROCMINFO_CXX_FLAGS -std=c++11) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fexceptions) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-rtti) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-math-errno) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fno-threadsafe-statics) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fmerge-all-constants) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fms-extensions) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Werror) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Wall) +set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64) + +# +# Extend the compiler flags for 64-bit builds +# +if((${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "AMD64")) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -msse -msse2) +endif() + +# +# Add compiler flags to include symbol information for debug builds +# +if(ISDEBUG) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -ggdb -O0) +endif() + +########################### +# rocm_agent_enumerator +########################### + +configure_file(rocm_agent_enumerator rocm_agent_enumerator COPYONLY) + + +########################### +# RocR Info +########################### +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} ROCMINFO_SOURCES) +add_executable(${ROCMINFO_EXE} ${ROCMINFO_SOURCES}) +target_link_libraries(${ROCMINFO_EXE} hsa-runtime64::hsa-runtime64) + +target_compile_options(${ROCMINFO_EXE} PRIVATE ${ROCMINFO_CXX_FLAGS}) + +########################### +# Install directives +########################### +install ( + FILES ${CMAKE_CURRENT_BINARY_DIR}/${ROCMINFO_EXE} + PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR} ) +install ( + FILES ${CMAKE_CURRENT_BINARY_DIR}/rocm_agent_enumerator + PERMISSIONS OWNER_READ OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE + DESTINATION ${CMAKE_INSTALL_BINDIR} ) + +########################### +# Packaging directives +########################### +set(CPACK_PACKAGE_NAME "${PROJECT_NAME}") +set(CPACK_PACKAGE_VENDOR "Advanced Micro Devices, Inc.") +set(CPACK_PACKAGE_VERSION_MAJOR "${PKG_VERSION_MAJOR}") +set(CPACK_PACKAGE_VERSION_MINOR "${PKG_VERSION_MINOR}") +set(CPACK_PACKAGE_VERSION_PATCH "${PKG_VERSION_PATCH}") +set(CPACK_PACKAGE_CONTACT "AMD Rocminfo Support ") +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/License.txt") +set( CPACK_RPM_PACKAGE_LICENSE "NCSA" ) +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Radeon Open Compute (ROCm) Runtime rocminfo tool") + +#Install license file +install(FILES ${CPACK_RESOURCE_FILE_LICENSE} DESTINATION ${CMAKE_INSTALL_DOCDIR}) + +#Make proper version for appending +#Default Value is 99999, setting it first +set(ROCM_VERSION_FOR_PACKAGE "99999") +if(DEFINED ENV{ROCM_LIBPATCH_VERSION}) + set(ROCM_VERSION_FOR_PACKAGE $ENV{ROCM_LIBPATCH_VERSION}) +endif() + +#Debian package specific variables +set(CPACK_DEBIAN_PACKAGE_DEPENDS "hsa-rocr, kmod, pciutils") +set(CPACK_DEBIAN_PACKAGE_HOMEPAGE ${CPACK_DEBIAN_PACKAGE_HOMEPAGE} CACHE STRING "https://github.com/RadeonOpenCompute/ROCm") +if (DEFINED ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) + set(CPACK_DEBIAN_PACKAGE_RELEASE $ENV{CPACK_DEBIAN_PACKAGE_RELEASE}) +else() + set(CPACK_DEBIAN_PACKAGE_RELEASE "local") +endif() +if ( ROCM_DEP_ROCMCORE ) + string ( APPEND CPACK_DEBIAN_PACKAGE_DEPENDS ", rocm-core" ) +endif() + +#RPM package specific variables +set(CPACK_RPM_PACKAGE_REQUIRES "hsa-rocr kmod pciutils") +if(DEFINED CPACK_PACKAGING_INSTALL_PREFIX) + set ( CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION "${CPACK_PACKAGING_INSTALL_PREFIX} ${CPACK_PACKAGING_INSTALL_PREFIX}/${CMAKE_INSTALL_BINDIR}" ) +endif() +if(DEFINED ENV{CPACK_RPM_PACKAGE_RELEASE}) + set(CPACK_RPM_PACKAGE_RELEASE $ENV{CPACK_RPM_PACKAGE_RELEASE}) +else() + set(CPACK_RPM_PACKAGE_RELEASE "local") +endif() +if ( ROCM_DEP_ROCMCORE ) + string ( APPEND CPACK_RPM_PACKAGE_REQUIRES " rocm-core" ) +endif() + +#Set rpm distro +if(CPACK_RPM_PACKAGE_RELEASE) + set(CPACK_RPM_PACKAGE_RELEASE_DIST ON) +endif() + +#Prepare final version for the CPACK use +set(CPACK_PACKAGE_VERSION "${CPACK_PACKAGE_VERSION_MAJOR}.${CPACK_PACKAGE_VERSION_MINOR}.${CPACK_PACKAGE_VERSION_PATCH}.${ROCM_VERSION_FOR_PACKAGE}") + +#Set the names now using CPACK utility +set(CPACK_DEBIAN_FILE_NAME "DEB-DEFAULT") +set(CPACK_RPM_FILE_NAME "RPM-DEFAULT") + +include ( CPack ) diff --git a/License.txt b/License.txt new file mode 100644 index 0000000..4cc6799 --- /dev/null +++ b/License.txt @@ -0,0 +1,38 @@ +The University of Illinois/NCSA +Open Source License (NCSA) + +Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. + +Developed by: + + AMD Research and AMD HSA Software Development + + Advanced Micro Devices, Inc. + + www.amd.com + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal with the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in + the documentation and/or other materials provided with the distribution. + - Neither the names of Advanced Micro Devices, Inc, + nor the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior written + permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS WITH THE SOFTWARE. + diff --git a/README.md b/README.md index 9ebb840..e49dd58 100644 --- a/README.md +++ b/README.md @@ -1 +1,44 @@ -# template-repository \ No newline at end of file +# rocminfo +ROCm Application for Reporting System Info + +## To Build +Use the standard cmake build procedure to build rocminfo. The location of ROCM +root (parent directory containing ROCM headers and libraries) must be provided +as a cmake argument using the standard CMAKE_PREFIX_PATH cmake variable. + +After cloning the rocminfo git repo, please make sure to do a git-fetch --tags +to get the tags residing on the repo. These tags are used for versioning. +For example, + +$ git fetch --tags origin + +Building from the CMakeLists.txt directory might look like this: + +mkdir -p build + +cd build + +cmake -DCMAKE_PREFIX_PATH=/opt/rocm .. + +make + +cd .. + +Upon a successful build the binary, rocminfo, and the python script, +rocm_agent_enumerator, will be in the build folder. + +## Execution + +"rocminfo" gives information about the HSA system attributes and agents. + +"rocm_agent_enumerator" prints the list of available AMD GCN ISA or architecture names. With the option '-name', it prints out available architectures names obtained from rocminfo. Otherwise, it generates ISA in one of five different ways: + +1. ROCM_TARGET_LST : a user defined environment variable, set to the path and filename where to find the "target.lst" file. This can be used in an install environment with sandbox, where execution of "rocminfo" is not possible. + +2. target.lst : user-supplied text file, in the same folder as "rocm_agent_enumerator". This is used in a container setting where ROCm stack may usually not available. + +3. HSA topology : gathers the information from the HSA node topology in /sys/class/kfd/kfd/topology/nodes/ + +4. lspci : enumerate PCI bus and locate supported devices from a hard-coded lookup table. + +5. rocminfo : a tool shipped with this script to enumerate GPU agents available on a working ROCm stack. diff --git a/cmake_modules/utils.cmake b/cmake_modules/utils.cmake new file mode 100755 index 0000000..5e1c19b --- /dev/null +++ b/cmake_modules/utils.cmake @@ -0,0 +1,162 @@ +################################################################################ +## +## The University of Illinois/NCSA +## Open Source License (NCSA) +## +## Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. +## +## Developed by: +## +## AMD Research and AMD HSA Software Development +## +## Advanced Micro Devices, Inc. +## +## www.amd.com +## +## Permission is hereby granted, free of charge, to any person obtaining a copy +## of this software and associated documentation files (the "Software"), to +## deal with the Software without restriction, including without limitation +## the rights to use, copy, modify, merge, publish, distribute, sublicense, +## and#or sell copies of the Software, and to permit persons to whom the +## Software is furnished to do so, subject to the following conditions: +## +## - Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimers. +## - Redistributions in binary form must reproduce the above copyright +## notice, this list of conditions and the following disclaimers in +## the documentation and#or other materials provided with the distribution. +## - Neither the names of Advanced Micro Devices, Inc, +## nor the names of its contributors may be used to endorse or promote +## products derived from this Software without specific prior written +## permission. +## +## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +## THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR +## OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +## ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +## DEALINGS WITH THE SOFTWARE. +## +################################################################################ + +## Parses the VERSION_STRING variable and places +## the first, second and third number values in +## the major, minor and patch variables. +function( parse_version VERSION_STRING ) + + string ( FIND ${VERSION_STRING} "-" STRING_INDEX ) + + if ( ${STRING_INDEX} GREATER -1 ) + math ( EXPR STRING_INDEX "${STRING_INDEX} + 1" ) + string ( SUBSTRING ${VERSION_STRING} ${STRING_INDEX} -1 VERSION_BUILD ) + endif () + + string ( REGEX MATCHALL "[0123456789]+" VERSIONS ${VERSION_STRING} ) + list ( LENGTH VERSIONS VERSION_COUNT ) + + if ( ${VERSION_COUNT} GREATER 0) + list ( GET VERSIONS 0 MAJOR ) + set ( VERSION_MAJOR ${MAJOR} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${MAJOR}" ) + endif () + + if ( ${VERSION_COUNT} GREATER 1 ) + list ( GET VERSIONS 1 MINOR ) + set ( VERSION_MINOR ${MINOR} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${MINOR}" ) + endif () + + if ( ${VERSION_COUNT} GREATER 2 ) + list ( GET VERSIONS 2 PATCH ) + set ( VERSION_PATCH ${PATCH} PARENT_SCOPE ) + set ( TEMP_VERSION_STRING "${TEMP_VERSION_STRING}.${PATCH}" ) + endif () + + set ( VERSION_STRING "${TEMP_VERSION_STRING}" PARENT_SCOPE ) + +endfunction () + +## Gets the current version of the repository +## using versioning tags and git describe. +## Passes back a packaging version string +## and a library version string. +function(get_version_from_tag DEFAULT_VERSION_STRING VERSION_PREFIX GIT) + parse_version ( ${DEFAULT_VERSION_STRING} ) + + if ( GIT ) + execute_process ( COMMAND git describe --tags --dirty --long --match ${VERSION_PREFIX}-[0-9.]* + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE GIT_TAG_STRING + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT ) + if ( ${RESULT} EQUAL 0 ) + parse_version ( ${GIT_TAG_STRING} ) + endif () + + endif () + + set( VERSION_STRING "${VERSION_STRING}" PARENT_SCOPE ) + set( VERSION_MAJOR "${VERSION_MAJOR}" PARENT_SCOPE ) + set( VERSION_MINOR "${VERSION_MINOR}" PARENT_SCOPE ) +endfunction() + +function(num_change_since_prev_pkg VERSION_PREFIX) + find_program(get_commits NAMES version_util.sh + PATHS ${CMAKE_CURRENT_SOURCE_DIR}/cmake_modules) + if (get_commits) + execute_process( COMMAND ${get_commits} -c ${VERSION_PREFIX} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE NUM_COMMITS + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT ) + + set(NUM_COMMITS "${NUM_COMMITS}" PARENT_SCOPE ) + + if ( ${RESULT} EQUAL 0 ) + message("${NUM_COMMITS} were found since previous release") + else() + message("Unable to determine number of commits since previous release") + endif() + else() + message("WARNING: Didn't find version_util.sh") + set(NUM_COMMITS "unknown" PARENT_SCOPE ) + endif() +endfunction() + +function(get_package_version_number DEFAULT_VERSION_STRING VERSION_PREFIX GIT) + get_version_from_tag(${DEFAULT_VERSION_STRING} ${VERSION_PREFIX} GIT) + num_change_since_prev_pkg(${VERSION_PREFIX}) + + set(PKG_VERSION_STR "${VERSION_STRING}.${NUM_COMMITS}") + if (DEFINED ENV{ROCM_BUILD_ID}) + set(VERSION_ID $ENV{ROCM_BUILD_ID}) + else() + set(VERSION_ID "local_build-0") + endif() + + set(PKG_VERSION_STR "${PKG_VERSION_STR}.${VERSION_ID}") + + if (GIT) + execute_process(COMMAND git rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE VERSION_HASH + OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE RESULT ) + if( ${RESULT} EQUAL 0 ) + # Check for dirty workspace. + execute_process(COMMAND git diff --quiet + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE RESULT ) + if(${RESULT} EQUAL 1) + set(VERSION_HASH "${VERSION_HASH}-dirty") + endif() + else() + set( VERSION_HASH "unknown" ) + endif() + else() + set( VERSION_HASH "unknown" ) + endif() + set(PKG_VERSION_STR "${PKG_VERSION_STR}-${VERSION_HASH}") + set(PKG_VERSION_STR ${PKG_VERSION_STR} PARENT_SCOPE) +endfunction() diff --git a/cmake_modules/version_util.sh b/cmake_modules/version_util.sh new file mode 100755 index 0000000..5c1ded9 --- /dev/null +++ b/cmake_modules/version_util.sh @@ -0,0 +1,40 @@ +#!/bin/bash + +# Handle commandline args +while [ "$1" != "" ]; do + case $1 in + -c ) # Commits since prevous tag + TARGET="count" ;; + * ) + TARGET="count" + break ;; + esac + shift 1 +done +TAG_PREFIX=$1 +reg_ex="${TAG_PREFIX}*" + +commits_since_last_tag() { + TAG_ARR=(`git tag --sort=committerdate -l ${reg_ex} | tail -2`) + PREVIOUS_TAG=${TAG_ARR[0]} + CURRENT_TAG=${TAG_ARR[1]} + + PREV_CMT_NUM=`git rev-list --count $PREVIOUS_TAG` + CURR_CMT_NUM=`git rev-list --count $CURRENT_TAG` + + # Commits since prevous tag: + if [[ -z $PREV_CMT_NUM || -z $CURR_CMT_NUM ]]; then + let NUM_COMMITS="0" + else + let NUM_COMMITS="${CURR_CMT_NUM}-${PREV_CMT_NUM}" + fi + echo $NUM_COMMITS +} + +case $TARGET in + count) commits_since_last_tag ;; + *) die "Invalid target $target" ;; +esac + +exit 0 + diff --git a/debian/changelog b/debian/changelog index bad88e2..8815027 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,116 @@ -template-repository (1.0-1) unstable; urgency=medium +rocminfo (5.7.1-3) unstable; urgency=medium - * Initial release + * Fix autopkgtests on systems without AMD GPUs - -- Tsic404 Sat, 28 Jan 2023 13:46:49 +0800 + -- Cordell Bloor Tue, 19 Mar 2024 17:55:24 -0600 + +rocminfo (5.7.1-2) unstable; urgency=medium + + * Add d/p/fix-escape-sequence.patch to fix invalid escape sequence warning + when running rocm_agent_enumerator. (Closes: #1065405) + * Add trivial autopkgtests. + * Add d/gbp.conf to ensure consistent settings. + * Tidy up d/p/cmakefile-fixes.patch. + * Use d/not-installed to control exclude files. + + -- Cordell Bloor Mon, 18 Mar 2024 18:32:35 -0600 + +rocminfo (5.7.1-1) unstable; urgency=medium + + * Migrate to unstable. + * d/control: update maintainer name for consistency + * d/control: add self to uploaders + + -- Cordell Bloor Fri, 23 Feb 2024 15:14:34 -0700 + +rocminfo (5.7.1-1~exp1) experimental; urgency=medium + + * Team upload. + + * New upstream version 5.7.1 + * Refresh patches and drop those applied upstream + * Update upstream URL + * d/control: fix grammar in package description + + -- Cordell Bloor Tue, 09 Jan 2024 17:56:14 -0700 + +rocminfo (5.2.3-3) unstable; urgency=medium + + * Team upload. + + * add d/p/fix-crash-on-kfd-permission-error.patch (Closes: #1031423) + * d/control: update standards version to 4.6.2 + * d/copyright: update for debian files + + -- Cordell Bloor Thu, 02 Mar 2023 21:36:26 -0700 + +rocminfo (5.2.3-2) unstable; urgency=medium + + * cross.patch: fix failure to cross-build from source. + Thanks to Helmut Grohne for the heavy lifting (Closes: #1024611) + * d/salsa-ci.yml: disable i386 while dependencies are missing. + * d/control: rocminfo depends on kmod. + Thanks to Cordell Bloor (Closes: #1026931) + * d/copyright: update copyright year. + + -- Étienne Mollier Mon, 02 Jan 2023 22:35:17 +0100 + +rocminfo (5.2.3-1) unstable; urgency=medium + + * Migrate ROCm 5.2.3 to unstable. + + -- Étienne Mollier Wed, 19 Oct 2022 19:52:21 +0200 + +rocminfo (5.2.3-1~0exp0) experimental; urgency=medium + + * New upstream version + * Standards-Version: 4.6.1 (routine-update) + * Reorder sequence of d/control fields by cme (routine-update) + * Remove trailing whitespace in debian/copyright (routine-update) + * Add salsa-ci file (routine-update) + * Rules-Requires-Root: no (routine-update) + * Set upstream metadata fields: Bug-Database, Bug-Submit, Repository, + Repository-Browse. + * d/control: add myself to uploaders. + * d/control: bump build-dependency to ROCm 5.2. + + -- Étienne Mollier Wed, 07 Sep 2022 21:32:54 +0200 + +rocminfo (5.1.0-1) unstable; urgency=medium + + * New upstream version 5.1.0 + * Add the missing runtime Dep pciutils. + * Bump ROCm B-D to (>= 5.1.0~). + + -- Mo Zhou Fri, 08 Apr 2022 10:27:05 -0400 + +rocminfo (5.0.0-1) unstable; urgency=medium + + [ Maxime Chambonnet ] + * New upstream version 5.0.0 + * Hardened in d/rules + * Update maintainer address to debian-ai@l.d.o. + * propagated roc runtime soname rollback to d/control BD + * extend auto clean to patches in case of crash + * Remove uupdate from d/watch + * Removed auto-clean improved: using > gbp buildpackage + --git-export-dir instead, which first clones the source nearby. + + [ Mo Zhou ] + * Add version requirement on ROCm B-Ds. + * Get rid of the duplicated license install. + * Upload to unstable. + + -- Mo Zhou Tue, 15 Feb 2022 10:41:42 -0500 + +rocminfo (3.3.0-1~exp1) experimental; urgency=medium + + * Initial release. (Closes: #960982) + + [ Mo Zhou ] + * Initial packaging. + + [ Norbert Preining ] + * Update to 3.3.0. + + -- Norbert Preining Wed, 20 May 2020 09:19:54 +0900 diff --git a/debian/compat b/debian/compat deleted file mode 100644 index b4de394..0000000 --- a/debian/compat +++ /dev/null @@ -1 +0,0 @@ -11 diff --git a/debian/control b/debian/control index cb7c4a0..9d023c3 100644 --- a/debian/control +++ b/debian/control @@ -1,15 +1,29 @@ -Source: template-repository -Section: unknown +Source: rocminfo +Maintainer: Debian ROCm Team +Uploaders: Mo Zhou , + Norbert Preining , + Maxime Chambonnet , + Étienne Mollier , + Cordell Bloor +Section: devel Priority: optional -Maintainer: Tsic404 -Build-Depends: debhelper (>= 11) -Standards-Version: 4.1.3 -Homepage: https://github.com/deepin-community/template-repository -#Vcs-Browser: https://salsa.debian.org/debian/deepin-community-template-repository -#Vcs-Git: https://salsa.debian.org/debian/deepin-community-template-repository.git +Build-Depends: debhelper-compat (= 13), + cmake, + libhsakmt-dev (>= 5.7.0~), + libhsa-runtime-dev (>= 5.7.1~) +Standards-Version: 4.6.2 +Vcs-Browser: https://salsa.debian.org/rocm-team/rocminfo +Vcs-Git: https://salsa.debian.org/rocm-team/rocminfo.git +Homepage: https://github.com/ROCm/rocminfo +Rules-Requires-Root: no -Package: template-repository +Package: rocminfo Architecture: any -Depends: ${shlibs:Depends}, ${misc:Depends} -Description: - +Depends: ${misc:Depends}, + ${shlibs:Depends}, + python3, + pciutils, + kmod +Description: ROCm Application for Reporting System Info + rocminfo gives information about the HSA system attributes and agents. + This package is part of the AMD ROCm software stack. diff --git a/debian/copyright b/debian/copyright index f5c805e..d24b3a8 100644 --- a/debian/copyright +++ b/debian/copyright @@ -1,22 +1,71 @@ Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ -Upstream-Name: template-repository -Source: https://github.com/deepin-community/template-repository +Upstream-Name: AMD +Source: https://github.com/ROCm/rocminfo Files: * -Copyright: 2023 Tsic404 -License: GPL-2+ - This package is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - . - This package is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - . - You should have received a copy of the GNU General Public License - along with this program. If not, see - . - On Debian systems, the complete text of the GNU General - Public License version 2 can be found in "/usr/share/common-licenses/GPL-2". +Copyright: 2014-2023, Advanced Micro Devices, Inc. +License: Illinois-NCSA + The University of Illinois/NCSA + Open Source License (NCSA) + . + Copyright (c) 2014-2017, Advanced Micro Devices, Inc. All rights reserved. + . + Developed by: + . + AMD Research and AMD HSA Software Development + . + Advanced Micro Devices, Inc. + . + www.amd.com + . + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + . + - Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in + the documentation and/or other materials provided with the distribution. + - Neither the names of Advanced Micro Devices, Inc, + nor the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior written + permission. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + +Files: debian/* +Copyright: 2019-2022, Mo Zhou + 2020, Norbert Preining + 2022-2023, Étienne Mollier + 2023,2024, Cordell Bloor +License: Expat + +License: Expat + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/debian/gbp.conf b/debian/gbp.conf new file mode 100644 index 0000000..bfa58d8 --- /dev/null +++ b/debian/gbp.conf @@ -0,0 +1,4 @@ +[DEFAULT] +pristine-tar = True +[pq] +patch-numbers = False diff --git a/debian/manpages b/debian/manpages new file mode 100644 index 0000000..17ddc31 --- /dev/null +++ b/debian/manpages @@ -0,0 +1,2 @@ +debian/rocminfo.1 +debian/rocm_agent_enumerator.1 diff --git a/debian/not-installed b/debian/not-installed new file mode 100644 index 0000000..6a881fd --- /dev/null +++ b/debian/not-installed @@ -0,0 +1 @@ +usr/share/doc/rocminfo/License.txt diff --git a/debian/patches/cmakefile-fixes.patch b/debian/patches/cmakefile-fixes.patch new file mode 100644 index 0000000..01e914b --- /dev/null +++ b/debian/patches/cmakefile-fixes.patch @@ -0,0 +1,25 @@ +Description: Debian git tags differ from upstream, breaking this cmake mechanism +Forwarded: not-needed +--- + CMakeLists.txt | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -78,12 +78,12 @@ + set(PKG_VERSION_NUM_COMMIT 0) + + ################# Determine the library version ######################### +-## Setup the package version based on git tags. +-set(PKG_VERSION_GIT_TAG_PREFIX "rocminfo_pkg_ver") ++### Setup the package version based on git tags. ++#set(PKG_VERSION_GIT_TAG_PREFIX "rocminfo_pkg_ver") + +-find_program (GIT NAMES git) ++#find_program (GIT NAMES git) + +-get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) ++#get_package_version_number("1.0.0" ${PKG_VERSION_GIT_TAG_PREFIX} GIT) + # VERSION_* variables should be set by get_version_from_tag + message("Package version: ${PKG_VERSION_STR}") + diff --git a/debian/patches/cross.patch b/debian/patches/cross.patch new file mode 100644 index 0000000..f47bfc7 --- /dev/null +++ b/debian/patches/cross.patch @@ -0,0 +1,31 @@ +Description: rocminfo FTCBFS: confused architecture terminology + rocminfo fails to cross build from source, because it passes invalid + compiler flags. The detection is broken due to confusing architecture + terminologies. The system we are building on is called "build" in Debian + and GNU, but for cmake it is called "host". The system we are building + for is called "host" in Debian and GNU, but for cmake it is empty (it + doesn't have a name). +Author: Helmut Grohne +Bug: https://github.com/ROCm/rocminfo/issues/60 +Bug-Debian: https://bugs.debian.org/1024611 +Reviewed-by: Étienne Mollier +Last-Update: 2022-11-22 +--- +This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -137,12 +137,12 @@ + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -fms-extensions) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Werror) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -Wall) +-set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64) ++#set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -m64) + + # + # Extend the compiler flags for 64-bit builds + # +-if((${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR (${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "AMD64")) ++if((${CMAKE_SYSTEM_PROCESSOR} STREQUAL "x86_64") OR (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")) + set(ROCMINFO_CXX_FLAGS ${ROCMINFO_CXX_FLAGS} -msse -msse2) + endif() + diff --git a/debian/patches/fix-escape-sequence.patch b/debian/patches/fix-escape-sequence.patch new file mode 100644 index 0000000..7a1d352 --- /dev/null +++ b/debian/patches/fix-escape-sequence.patch @@ -0,0 +1,74 @@ +From 1c14fd02037ba59200954328e1af2da851b829dc Mon Sep 17 00:00:00 2001 +From: getianao +Date: Thu, 2 Jun 2022 12:29:44 +0800 +Subject: [PATCH] Fix invalid escape sequence deprecation in Python3 + +Bug-Debian: https://bugs.debian.org/1065405 +Bug: https://github.com/ROCm/rocminfo/pull/55 +Forwarded: https://github.com/ROCm/rocminfo/pull/55 +Applied-Upstream: 6.1.0 +--- + rocm_agent_enumerator | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/rocm_agent_enumerator ++++ b/rocm_agent_enumerator +@@ -81,7 +81,7 @@ + return func + return deco + +-@staticVars(search_term=re.compile("gfx[0-9a-fA-F]+")) ++@staticVars(search_term=re.compile(r"gfx[0-9a-fA-F]+")) + def getGCNISA(line, match_from_beginning = False): + if match_from_beginning is True: + result = getGCNISA.search_term.match(line) +@@ -92,7 +92,7 @@ + return result.group(0) + return None + +-@staticVars(search_name=re.compile("gfx[0-9a-fA-F]+(:[-+:\w]+)?")) ++@staticVars(search_name=re.compile(r"gfx[0-9a-fA-F]+(:[-+:\w]+)?")) + def getGCNArchName(line): + result = getGCNArchName.search_name.search(line) + +@@ -135,8 +135,8 @@ + break + # run rocminfo + rocminfo_output = subprocess.Popen(rocminfo_executable, stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') +- term1 = re.compile("Cannot allocate memory") +- term2 = re.compile("HSA_STATUS_ERROR_OUT_OF_RESOURCES") ++ term1 = re.compile(r"Cannot allocate memory") ++ term2 = re.compile(r"HSA_STATUS_ERROR_OUT_OF_RESOURCES") + done = 1 + for line in rocminfo_output: + if term1.search(line) is not None or term2.search(line) is not None: +@@ -149,9 +149,9 @@ + + # search AMDGCN gfx ISA + if search_arch_name is True: +- line_search_term = re.compile("\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") ++ line_search_term = re.compile(r"\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") + else: +- line_search_term = re.compile("\A\s+Name:\s+(gfx\d+)") ++ line_search_term = re.compile(r"\A\s+Name:\s+(gfx\d+)") + for line in rocminfo_output: + if line_search_term.match(line) is not None: + if search_arch_name is True: +@@ -172,7 +172,7 @@ + except: + lspci_output = [] + +- target_search_term = re.compile("1002:\w+") ++ target_search_term = re.compile(r"1002:\w+") + for line in lspci_output: + search_result = target_search_term.search(line) + if search_result is not None: +@@ -196,7 +196,7 @@ + if os.path.isdir(node_path): + prop_path = node_path + '/properties' + if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK): +- target_search_term = re.compile("gfx_target_version.+") ++ target_search_term = re.compile(r"gfx_target_version.+") + with open(prop_path) as f: + try: + line = f.readline() diff --git a/debian/patches/series b/debian/patches/series new file mode 100644 index 0000000..fc1ea23 --- /dev/null +++ b/debian/patches/series @@ -0,0 +1,3 @@ +cmakefile-fixes.patch +cross.patch +fix-escape-sequence.patch diff --git a/debian/rocm_agent_enumerator.1 b/debian/rocm_agent_enumerator.1 new file mode 100644 index 0000000..18fb238 --- /dev/null +++ b/debian/rocm_agent_enumerator.1 @@ -0,0 +1,23 @@ +.TH ROCM-AGENT-ENUMERATOR "1" "May 2020" "rocm_agent_enumerator 3.3.0" "User Commands" +.SH NAME +rocm_agent_enumerator \- list all available AMD GCN ISA +.SH SYNOPSIS +.PP +.B rocm_agent_enumerator +.SH DESCRIPTION +.B rocm_agent_enumerator +uses four different ways to generate the list of +available AMD Graphics Core Next ISA: + +1. ROCM_TARGET_LST : a user defined environment variable, set to the path +and filename where to find the "target.lst" file. This can be used in an +install environment with sandbox, where execution of "rocminfo" is not possible. + +2. target.lst : user-supplied text file, in the same folder as +"rocm_agent_enumerator". This is used in a container setting where ROCm +stack may usually not available. + +3. rocminfo : a tool shipped with this script to enumerate GPU agents +available on a working ROCm stack. + +4. lspci : enumerate PCI bus and locate supported devices from a hard-coded lookup table. diff --git a/debian/rocminfo.1 b/debian/rocminfo.1 new file mode 100644 index 0000000..bccd20f --- /dev/null +++ b/debian/rocminfo.1 @@ -0,0 +1,9 @@ +.TH ROCMINFO "1" "May 2020" "rocminfo 3.3.0" "User Commands" +.SH NAME +rocminfo \- ROCm Application for Reporting System Info +.SH SYNOPSIS +.PP +.B rocminfo +.SH DESCRIPTION +.B rocminfo +gives information about the HSA system attributes and agents. diff --git a/debian/rocminfo.install b/debian/rocminfo.install new file mode 100644 index 0000000..8e1846f --- /dev/null +++ b/debian/rocminfo.install @@ -0,0 +1,2 @@ +usr/bin/rocminfo +usr/bin/rocm_agent_enumerator diff --git a/debian/rules b/debian/rules index 2d33f6a..755485e 100755 --- a/debian/rules +++ b/debian/rules @@ -1,4 +1,13 @@ #!/usr/bin/make -f +export DEB_BUILD_MAINT_OPTIONS = hardening=+all %: - dh $@ + dh $@ -Scmake + +override_dh_auto_configure: + dh_auto_configure -- \ + -DROCM_DIR=/usr \ + -DCMAKE_INSTALL_PREFIX=/usr + +override_dh_auto_install: + dh_auto_install --destdir=debian/tmp diff --git a/debian/salsa-ci.yml b/debian/salsa-ci.yml new file mode 100644 index 0000000..47352ee --- /dev/null +++ b/debian/salsa-ci.yml @@ -0,0 +1,7 @@ +--- +include: + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/salsa-ci.yml + - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/pipeline-jobs.yml + +variables: + SALSA_CI_DISABLE_BUILD_PACKAGE_I386: 1 diff --git a/debian/tests/control b/debian/tests/control new file mode 100644 index 0000000..e43095c --- /dev/null +++ b/debian/tests/control @@ -0,0 +1,3 @@ +Tests: rocminfo, enumerator +Depends: @, sed +Restrictions: superficial, skippable diff --git a/debian/tests/enumerator b/debian/tests/enumerator new file mode 100644 index 0000000..a7efe15 --- /dev/null +++ b/debian/tests/enumerator @@ -0,0 +1,3 @@ +#!/bin/sh +set -e +rocm_agent_enumerator diff --git a/debian/tests/rocminfo b/debian/tests/rocminfo new file mode 100644 index 0000000..10b0a28 --- /dev/null +++ b/debian/tests/rocminfo @@ -0,0 +1,9 @@ +#!/bin/sh +set -e +if [ -d /sys/class/kfd ]; then + echo 'Running rocminfo and expecting GPU information' + rocminfo +else + echo 'Running rocminfo and expecting no GPU found' + rocminfo | sed '/ROCk module is NOT/,$b;$q1' +fi diff --git a/debian/upstream/metadata b/debian/upstream/metadata new file mode 100644 index 0000000..5dd650c --- /dev/null +++ b/debian/upstream/metadata @@ -0,0 +1,5 @@ +--- +Bug-Database: https://github.com/ROCm/rocminfo/issues +Bug-Submit: https://github.com/ROCm/rocminfo/issues/new +Repository: https://github.com/ROCm/rocminfo.git +Repository-Browse: https://github.com/ROCm/rocminfo diff --git a/debian/watch b/debian/watch new file mode 100644 index 0000000..e4f9056 --- /dev/null +++ b/debian/watch @@ -0,0 +1,6 @@ +version=4 +opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%rocminfo-$1.tar.gz%" \ +https://github.com/ROCm/rocminfo/tags \ +(?:.*?/)?rocm-v?(\d[\d.]*)\.tar\.gz debian + + diff --git a/rocm_agent_enumerator b/rocm_agent_enumerator new file mode 100755 index 0000000..b901e47 --- /dev/null +++ b/rocm_agent_enumerator @@ -0,0 +1,263 @@ +#!/usr/bin/env python3 + +import os +import re +import subprocess +import sys +import time + +# get current working directory +CWD = os.path.dirname(os.path.realpath(__file__)) + +ISA_TO_ID = { + # Kaveri - Temporary + "gfx700" : [0x1304, 0x1305, 0x1306, 0x1307, 0x1309, 0x130a, 0x130b, 0x130c, + 0x130d, 0x130e, 0x130f, 0x1310, 0x1311, 0x1312, 0x1313, 0x1315, + 0x1316, 0x1317, 0x1318, 0x131b, 0x131c, 0x131d], + # Hawaii + "gfx701" : [0x67a0, 0x67a1, 0x67a2, 0x67a8, 0x67a9, 0x67aa, 0x67b0, 0x67b1, + 0x67b8, 0x67b9, 0x67ba, 0x67be], + # Carrizo + "gfx801" : [0x9870, 0x9874, 0x9875, 0x9876, 0x9877, 0x98e4], + # Tonga + "gfx802" : [0x6920, 0x6921, 0x6928, 0x6929, 0x692b, 0x692f, 0x6930, 0x6938, + 0x6939], + # Fiji + "gfx803" : [0x7300, 0x730f, + # Polaris10 + 0x67c0, 0x67c1, 0x67c2, 0x67c4, 0x67c7, 0x67c8, 0x67c9, 0x67ca, + 0x67cc, 0x67cf, 0x6fdf, + # Polaris11 + 0x67d0, 0x67df, 0x67e0, 0x67e1, 0x67e3, 0x67e7, 0x67e8, 0x67e9, + 0x67eb, 0x67ef, 0x67ff, + # Polaris12 + 0x6980, 0x6981, 0x6985, 0x6986, 0x6987, 0x6995, 0x6997, 0x699f, + # VegaM + 0x694c, 0x694e, 0x694f], + # Vega10 + "gfx900" : [0x6860, 0x6861, 0x6862, 0x6863, 0x6864, 0x6867, 0x6868, 0x6869, + 0x6869, 0x686a, 0x686b, 0x686c, 0x686d, 0x686e, 0x686f, 0x687f], + # Raven + "gfx902" : [0x15dd, 0x15d8], + # Vega12 + "gfx904" : [0x69a0, 0x69a1, 0x69a2, 0x69a3, 0x69af], + # Vega20 + "gfx906" : [0x66a0, 0x66a1, 0x66a2, 0x66a3, 0x66a4, 0x66a7, 0x66af], + # Arcturus + "gfx908" : [0x738c, 0x7388, 0x738e, 0x7390], + # Aldebaran + "gfx90a" : [0x7408, 0x740c, 0x740f, 0x7410], + # Renoir + "gfx90c" : [0x15e7, 0x1636, 0x1638, 0x164c], + # Navi10 + "gfx1010" : [0x7310, 0x7312, 0x7318, 0x7319, 0x731a, 0x731b, 0x731e, 0x731f], + # Navi12 + "gfx1011" : [0x7360, 0x7362], + # Navi14 + "gfx1012" : [0x7340, 0x7341, 0x7347, 0x734f], + # Cyan_Skillfish + "gfx1013" : [0x13f9, 0x13fa, 0x13fb, 0x13fc, 0x13f3], + # Sienna_Cichlid + "gfx1030" : [0x73a0, 0x73a1, 0x73a2, 0x73a3, 0x73a5, 0x73a8, 0x73a9, 0x73ab, + 0x73ac, 0x73ad, 0x73ae, 0x73af, 0x73bf], + # Navy_Flounder + "gfx1031" : [0x73c0, 0x73c1, 0x73c3, 0x73da, 0x73db, 0x73dc, 0x73dd, 0x73de, + 0x73df], + # Dimgray_Cavefish + "gfx1032" : [0x73e0, 0x73e1, 0x73e2, 0x73e3, 0x73e8, 0x73e9, 0x73ea, 0x73eb, + 0x73ec, 0x73ed, 0x73ef, 0x73ff], + # Van Gogh + "gfx1033" : [0x163f], + # Beige_Goby + "gfx1034" : [0x7420, 0x7421, 0x7422, 0x7423, 0x743f], + # Yellow_Carp + "gfx1035" : [0x164d, 0x1681] +} + +def staticVars(**kwargs): + def deco(func): + for k in kwargs: + setattr(func, k, kwargs[k]) + return func + return deco + +@staticVars(search_term=re.compile("gfx[0-9a-fA-F]+")) +def getGCNISA(line, match_from_beginning = False): + if match_from_beginning is True: + result = getGCNISA.search_term.match(line) + else: + result = getGCNISA.search_term.search(line) + + if result is not None: + return result.group(0) + return None + +@staticVars(search_name=re.compile("gfx[0-9a-fA-F]+(:[-+:\w]+)?")) +def getGCNArchName(line): + result = getGCNArchName.search_name.search(line) + + if result is not None: + return result.group(0) + return None + +def readFromTargetLstFile(): + target_list = [] + + # locate target.lst using environment variable or + # it should be placed at the same directory with this script + target_lst_path = os.environ.get("ROCM_TARGET_LST"); + if target_lst_path == None: + target_lst_path = os.path.join(CWD, "target.lst") + if os.path.isfile(target_lst_path): + target_lst_file = open(target_lst_path, 'r') + for line in target_lst_file: + # for target.lst match from beginning so targets can be disabled by + # commenting it out + target = getGCNISA(line, match_from_beginning = True) + if target is not None: + target_list.append(target) + + return target_list + +def readFromROCMINFO(search_arch_name = False): + target_list = [] + # locate rocminfo binary which should be placed at the same directory with + # this script + rocminfo_executable = os.path.join(CWD, "rocminfo") + + try: + t0 = time.time() + while 1: + t1 = time.time() + # quit after retrying rocminfo for a minute. + if t1 - t0 > 60.0: + print("Timeout querying rocminfo. Are you compiling with more than 254 threads?") + break + # run rocminfo + rocminfo_output = subprocess.Popen(rocminfo_executable, stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') + term1 = re.compile("Cannot allocate memory") + term2 = re.compile("HSA_STATUS_ERROR_OUT_OF_RESOURCES") + done = 1 + for line in rocminfo_output: + if term1.search(line) is not None or term2.search(line) is not None: + done = 0 + break + if done: + break + except: + rocminfo_output = [] + + # search AMDGCN gfx ISA + if search_arch_name is True: + line_search_term = re.compile("\A\s+Name:\s+(amdgcn-amd-amdhsa--gfx\d+)") + else: + line_search_term = re.compile("\A\s+Name:\s+(gfx\d+)") + for line in rocminfo_output: + if line_search_term.match(line) is not None: + if search_arch_name is True: + target = getGCNArchName(line) + else: + target = getGCNISA(line) + if target is not None: + target_list.append(target) + + return target_list + +def readFromLSPCI(): + target_list = [] + + try: + # run lspci + lspci_output = subprocess.Popen(["/usr/bin/lspci", "-n", "-d", "1002:"], stdout=subprocess.PIPE).communicate()[0].decode("utf-8").split('\n') + except: + lspci_output = [] + + target_search_term = re.compile("1002:\w+") + for line in lspci_output: + search_result = target_search_term.search(line) + if search_result is not None: + device_id = int(search_result.group(0).split(':')[1], 16) + # try lookup from ISA_TO_ID dict + for target in ISA_TO_ID.keys(): + for target_device_id in ISA_TO_ID[target]: + if device_id == target_device_id: + target_list.append(target) + break + + return target_list + +def readFromKFD(): + target_list = [] + + topology_dir = '/sys/class/kfd/kfd/topology/nodes/' + if os.path.isdir(topology_dir): + for node in sorted(os.listdir(topology_dir)): + node_path = os.path.join(topology_dir, node) + if os.path.isdir(node_path): + prop_path = node_path + '/properties' + if os.path.isfile(prop_path) and os.access(prop_path, os.R_OK): + target_search_term = re.compile("gfx_target_version.+") + with open(prop_path) as f: + try: + line = f.readline() + except PermissionError: + # We may have a subsystem (e.g. scheduler) limiting device visibility which + # could cause a permission error. + line = '' + while line != '' : + search_result = target_search_term.search(line) + if search_result is not None: + device_id = int(search_result.group(0).split(' ')[1], 10) + if device_id != 0: + major_ver = int((device_id / 10000) % 100) + minor_ver = int((device_id / 100) % 100) + stepping_ver = int(device_id % 100) + target_list.append("gfx" + format(major_ver, 'd') + format(minor_ver, 'x') + format(stepping_ver, 'x')) + line = f.readline() + + return target_list + +def main(): + if len(sys.argv) == 2 and sys.argv[1] == '-name' : + """ Prints the list of available AMD GCN target names extracted from rocminfo, a tool + shipped with this script to enumerate GPU agents available on a working ROCm stack.""" + target_list = readFromROCMINFO(True) + else: + """Prints the list of available AMD GCN ISA + + The program collects the list in 3 different ways, in the order of + precendence: + + 1. ROCM_TARGET_LST : a user defined environment variable, set to the path and + filename where to find the "target.lst" file. This can be + used in an install environment with sandbox, where + execution of "rocminfo" is not possible. + 2. target.lst : user-supplied text file. This is used in a container setting + where ROCm stack may usually not available. + 3. HSA topology : gathers the information from the HSA node topology in + /sys/class/kfd/kfd/topology/nodes/ + 4. lspci : enumerate PCI bus and locate supported devices from a hard-coded + lookup table. + 5. rocminfo : a tool shipped with this script to enumerate GPU agents + available on a working ROCm stack. + """ + target_list = readFromTargetLstFile() + + if len(target_list) == 0: + target_list = readFromKFD() + + if len(target_list) == 0: + target_list = readFromLSPCI() + + if len(target_list) == 0: + target_list = readFromROCMINFO() + + # workaround to cope with existing rocm_agent_enumerator behavior where gfx000 + # would always be returned + print("gfx000") + + for gfx in target_list: + print(gfx) + +if __name__ == "__main__": + main() diff --git a/rocminfo.cc b/rocminfo.cc new file mode 100755 index 0000000..42f6cfd --- /dev/null +++ b/rocminfo.cc @@ -0,0 +1,1234 @@ +/* + * ============================================================================= + * ROC Runtime Conformance Release License + * ============================================================================= + * The University of Illinois/NCSA + * Open Source License (NCSA) + * + * Copyright (c) 2017, Advanced Micro Devices, Inc. + * All rights reserved. + * + * Developed by: + * + * AMD Research and AMD ROC Software Development + * + * Advanced Micro Devices, Inc. + * + * www.amd.com + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal with the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimers. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimers in + * the documentation and/or other materials provided with the distribution. + * - Neither the names of , + * nor the names of its contributors may be used to endorse or promote + * products derived from this Software without specific prior written + * permission. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS WITH THE SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "hsa/hsa.h" +#include "hsa/hsa_ext_amd.h" + +#define COL_BLU "\x1B[34m" +#define COL_KCYN "\x1B[36m" +#define COL_GRN "\x1B[32m" +#define COL_NRM "\x1B[0m" +#define COL_RED "\x1B[31m" +#define COL_MAG "\x1B[35m" +#define COL_WHT "\x1B[37m" +#define COL_YEL "\x1B[33m" +#define COL_RESET "\033[0m" + +#define RET_IF_HSA_ERR(err) { \ + if ((err) != HSA_STATUS_SUCCESS) { \ + char err_val[12]; \ + char* err_str = NULL; \ + if (hsa_status_string(err, \ + (const char**)&err_str) != HSA_STATUS_SUCCESS) { \ + snprintf(&(err_val[0]), sizeof(err_val), "%#x", (uint32_t)err); \ + err_str = &(err_val[0]); \ + } \ + printf("%shsa api call failure at: %s:%d\n", \ + COL_RED, __FILE__, __LINE__); \ + printf("%sCall returned %s\n", COL_RED, err_str); \ + printf("%s", COL_RESET); \ + return (err); \ + } \ +} + +// This structure holds system information acquired through hsa info related +// calls, and is later used for reference when displaying the information. +struct system_info_t { + uint16_t major, minor; + uint64_t timestamp_frequency = 0; + uint64_t max_wait = 0; + hsa_endianness_t endianness; + hsa_machine_model_t machine_model; + bool mwaitx_enabled; + bool dmabuf_support; +}; + +// This structure holds agent information acquired through hsa info related +// calls, and is later used for reference when displaying the information. +struct agent_info_t { + char name[64]; + char uuid[24]; + char vendor_name[64]; + char device_mkt_name[64]; + hsa_agent_feature_t agent_feature; + hsa_profile_t agent_profile; + hsa_default_float_rounding_mode_t float_rounding_mode; + uint32_t max_queue; + uint32_t queue_min_size; + uint32_t queue_max_size; + hsa_queue_type_t queue_type; + uint32_t node; + hsa_device_type_t device_type; + uint32_t cache_size[4]; + uint32_t chip_id; + uint32_t asic_revision; + uint32_t cacheline_size; + uint32_t max_clock_freq; + uint32_t internal_node_id; + uint32_t max_addr_watch_pts; + // HSA_AMD_AGENT_INFO_MEMORY_WIDTH is deprecated, so exclude + // uint32_t mem_max_freq; Not supported by get_info + uint32_t compute_unit; + uint32_t wavefront_size; + uint32_t workgroup_max_size; + uint32_t grid_max_size; + uint32_t fbarrier_max_size; + uint32_t max_waves_per_cu; + uint32_t simds_per_cu; + uint32_t shader_engs; + uint32_t shader_arrs_per_sh_eng; + hsa_isa_t agent_isa; + hsa_dim3_t grid_max_dim; + uint16_t workgroup_max_dim[3]; + uint16_t bdf_id; + bool fast_f16; + uint32_t pkt_processor_ucode_ver; + uint32_t sdma_ucode_ver; + hsa_amd_iommu_version_t iommu_support; +}; + +// This structure holds memory pool information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +typedef struct { + uint32_t segment; + size_t pool_size; + bool alloc_allowed; + size_t alloc_granule; + size_t pool_alloc_alignment; + bool pl_access; + uint32_t global_flag; +} pool_info_t; + +// This structure holds ISA information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +struct isa_info_t { + char *name_str; + uint32_t workgroup_max_size; + hsa_dim3_t grid_max_dim; + uint64_t grid_max_size; + uint32_t fbarrier_max_size; + uint16_t workgroup_max_dim[3]; + bool def_rounding_modes[3]; + bool base_rounding_modes[3]; + bool mach_models[2]; + bool profiles[2]; + bool fast_f16; +}; + +// This structure holds cache information acquired through hsa info +// related calls, and is later used for reference when displaying the +// information. +struct cache_info_t { + char *name_str; + uint8_t level; + uint32_t size; +}; + +static const uint32_t kLabelFieldSize = 25; +static const uint32_t kValueFieldSize = 35; +static const uint32_t kIndentSize = 2; + +enum rocmi_int_format { + ROCMI_INT_FORMAT_DEC = 1, + ROCMI_INT_FORMAT_HEX = 2, +}; + +// Make the most common format the default +std::string int_to_string(uint32_t i, + uint32_t fmt = ROCMI_INT_FORMAT_DEC|ROCMI_INT_FORMAT_HEX) { + std::stringstream sd; + bool need_parens = false; + + if (fmt & ROCMI_INT_FORMAT_DEC) { + if (need_parens) { + sd << "("; + } + sd << i; + if (need_parens) { + sd << ") "; + } + need_parens = true; + } + + if (fmt & ROCMI_INT_FORMAT_HEX) { + if (need_parens) { + sd << "(0x"; + } + sd << std::hex << i; + if (need_parens) { + sd << ") "; + } + } + + return sd.str(); +} + +static void printLabelInt(char const *l, int d, uint32_t indent_lvl = 0) { + std::string ind(kIndentSize * indent_lvl, ' '); + + printf("%s%-*s%-*d\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, d); +} +static void printLabelStr(char const *l, char const *s, + uint32_t indent_lvl = 0) { + std::string ind(kIndentSize * indent_lvl, ' '); + printf("%s%-*s%-*s\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, s); +} +static void printLabelStr(char const *l, std::string const &s, + uint32_t indent_lvl = 0) { + std::string ind(kIndentSize * indent_lvl, ' '); + printf("%s%-*s%-*s\n", ind.c_str(), kLabelFieldSize, l, kValueFieldSize, + s.c_str()); +} +static void printLabel(char const *l, bool newline = false, + uint32_t indent_lvl = 0) { + std::string ind(kIndentSize * indent_lvl, ' '); + + printf("%s%-*s", ind.c_str(), kLabelFieldSize, l); + + if (newline) { + printf("\n"); + } +} +static void printValueStr(char const *s, bool newline = true) { + printf("%-*s\n", kValueFieldSize, s); +} + +// Acquire system information +static hsa_status_t AcquireSystemInfo(system_info_t *sys_info) { + hsa_status_t err; + + // Get Major and Minor version of runtime + err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MAJOR, &sys_info->major); + RET_IF_HSA_ERR(err); + err = hsa_system_get_info(HSA_SYSTEM_INFO_VERSION_MINOR, &sys_info->minor); + RET_IF_HSA_ERR(err); + + // Get timestamp frequency + err = hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, + &sys_info->timestamp_frequency); + RET_IF_HSA_ERR(err); + + // Get maximum duration of a signal wait operation + err = hsa_system_get_info(HSA_SYSTEM_INFO_SIGNAL_MAX_WAIT, + &sys_info->max_wait); + RET_IF_HSA_ERR(err); + + // Get Endianness of the system + err = hsa_system_get_info(HSA_SYSTEM_INFO_ENDIANNESS, &sys_info->endianness); + RET_IF_HSA_ERR(err); + + // Get machine model info + err = hsa_system_get_info(HSA_SYSTEM_INFO_MACHINE_MODEL, + &sys_info->machine_model); + RET_IF_HSA_ERR(err); + + // Get mwaitx mode + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_MWAITX_ENABLED, + &sys_info->mwaitx_enabled); + // Get DMABuf support + err = hsa_system_get_info(HSA_AMD_SYSTEM_INFO_DMABUF_SUPPORTED, + &sys_info->dmabuf_support); + RET_IF_HSA_ERR(err); + return err; +} + +static void DisplaySystemInfo(system_info_t const *sys_info) { + printLabel("Runtime Version:"); + printf("%d.%d\n", sys_info->major, sys_info->minor); + printLabel("System Timestamp Freq.:"); + printf("%fMHz\n", sys_info->timestamp_frequency / 1e6); + printLabel("Sig. Max Wait Duration:"); + printf("%lu (0x%lX) (timestamp count)\n", sys_info->max_wait, + sys_info->max_wait); + + printLabel("Machine Model:"); + if (HSA_MACHINE_MODEL_SMALL == sys_info->machine_model) { + printValueStr("SMALL"); + } else if (HSA_MACHINE_MODEL_LARGE == sys_info->machine_model) { + printValueStr("LARGE"); + } + + printLabel("System Endianness:"); + if (HSA_ENDIANNESS_LITTLE == sys_info->endianness) { + printValueStr("LITTLE"); + } else if (HSA_ENDIANNESS_BIG == sys_info->endianness) { + printValueStr("BIG"); + } + + printLabel("Mwaitx:"); + printf("%s\n", sys_info->mwaitx_enabled ? "ENABLED" : "DISABLED"); + + printLabel("DMAbuf Support:"); + printf("%s\n", sys_info->dmabuf_support ? "YES" : "NO"); + + printf("\n"); +} + +static hsa_status_t +AcquireAgentInfo(hsa_agent_t agent, agent_info_t *agent_i) { + hsa_status_t err; + // Get agent name and vendor + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_i->name); + RET_IF_HSA_ERR(err); + + // Get UUID, an Ascii string, of a ROCm device + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UUID, + &agent_i->uuid); + + // Get device's vendor name + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_VENDOR_NAME, + &agent_i->vendor_name); + RET_IF_HSA_ERR(err); + + // Get device marketing name + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME, + &agent_i->device_mkt_name); + RET_IF_HSA_ERR(err); + + // Get agent feature + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FEATURE, + &agent_i->agent_feature); + RET_IF_HSA_ERR(err); + + // Get profile supported by the agent + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_PROFILE, + &agent_i->agent_profile); + RET_IF_HSA_ERR(err); + + // Get floating-point rounding mode + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEFAULT_FLOAT_ROUNDING_MODE, + &agent_i->float_rounding_mode); + RET_IF_HSA_ERR(err); + + // Get max number of queue + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUES_MAX, + &agent_i->max_queue); + RET_IF_HSA_ERR(err); + + // Get queue min size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MIN_SIZE, + &agent_i->queue_min_size); + RET_IF_HSA_ERR(err); + + // Get queue max size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_MAX_SIZE, + &agent_i->queue_max_size); + RET_IF_HSA_ERR(err); + + // Get queue type + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_QUEUE_TYPE, + &agent_i->queue_type); + RET_IF_HSA_ERR(err); + + // Get agent node + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_NODE, &agent_i->node); + RET_IF_HSA_ERR(err); + + // Get device type + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, + &agent_i->device_type); + RET_IF_HSA_ERR(err); + + if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) { + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_ISA, &agent_i->agent_isa); + RET_IF_HSA_ERR(err); + } + + // Get cache size + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_CACHE_SIZE, + agent_i->cache_size); + RET_IF_HSA_ERR(err); + + // Get chip id + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CHIP_ID, + &agent_i->chip_id); + RET_IF_HSA_ERR(err); + + // Get asic revision + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_ASIC_REVISION, + &agent_i->asic_revision); + RET_IF_HSA_ERR(err); + + // Get cacheline size + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_CACHELINE_SIZE, + &agent_i->cacheline_size); + RET_IF_HSA_ERR(err); + + // Get Max clock frequency + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, + &agent_i->max_clock_freq); + RET_IF_HSA_ERR(err); + + // Internal Driver node ID + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_DRIVER_NODE_ID, + &agent_i->internal_node_id); + RET_IF_HSA_ERR(err); + + // Max number of watch points on mem. addr. ranges to generate exeception + // events + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_MAX_ADDRESS_WATCH_POINTS, + &agent_i->max_addr_watch_pts); + RET_IF_HSA_ERR(err); + + // Get Agent BDFID + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_BDFID, &agent_i->bdf_id); + RET_IF_HSA_ERR(err); + + // Get Max Memory Clock + // Not supported by hsa_agent_get_info + // err = hsa_agent_get_info(agent,d + // (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, + // &agent_i->mem_max_freq); + // RET_IF_HSA_ERR(err); + + // Get Num SIMDs per CU + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, + &agent_i->simds_per_cu); + RET_IF_HSA_ERR(err); + + // Get Num Shader Engines + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ENGINES, + &agent_i->shader_engs); + RET_IF_HSA_ERR(err); + + // Get Num Shader Arrays per Shader engine + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_NUM_SHADER_ARRAYS_PER_SE, + &agent_i->shader_arrs_per_sh_eng); + RET_IF_HSA_ERR(err); + + // Get number of Compute Unit + err = hsa_agent_get_info(agent, + (hsa_agent_info_t) HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, + &agent_i->compute_unit); + RET_IF_HSA_ERR(err); + + // Check if the agent is kernel agent + if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + // Get flaf of fast_f16 operation + err = hsa_agent_get_info(agent, + HSA_AGENT_INFO_FAST_F16_OPERATION, &agent_i->fast_f16); + RET_IF_HSA_ERR(err); + + // Get wavefront size + err = hsa_agent_get_info(agent, + HSA_AGENT_INFO_WAVEFRONT_SIZE, &agent_i->wavefront_size); + RET_IF_HSA_ERR(err); + + // Get max total number of work-items in a workgroup + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, + &agent_i->workgroup_max_size); + RET_IF_HSA_ERR(err); + + // Get max number of work-items of each dimension of a work-group + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_WORKGROUP_MAX_DIM, + &agent_i->workgroup_max_dim); + RET_IF_HSA_ERR(err); + + // Get max number of a grid per dimension + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_DIM, + &agent_i->grid_max_dim); + RET_IF_HSA_ERR(err); + + // Get max total number of work-items in a grid + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_GRID_MAX_SIZE, + &agent_i->grid_max_size); + RET_IF_HSA_ERR(err); + + // Get max number of fbarriers per work group + err = hsa_agent_get_info(agent, HSA_AGENT_INFO_FBARRIER_MAX_SIZE, + &agent_i->fbarrier_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU, + &agent_i->max_waves_per_cu); + RET_IF_HSA_ERR(err); + + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_UCODE_VERSION, + &agent_i->pkt_processor_ucode_ver); + RET_IF_HSA_ERR(err); + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_SDMA_UCODE_VERSION, + &agent_i->sdma_ucode_ver); + RET_IF_HSA_ERR(err); + err = hsa_agent_get_info(agent, + (hsa_agent_info_t)HSA_AMD_AGENT_INFO_IOMMU_SUPPORT, + &agent_i->iommu_support); + RET_IF_HSA_ERR(err); + } + return err; +} + +static void DisplayAgentInfo(agent_info_t *agent_i) { + printLabelStr("Name:", agent_i->name, 1); + printLabelStr("Uuid:", agent_i->uuid, 1); + printLabelStr("Marketing Name:", agent_i->device_mkt_name, 1); + printLabelStr("Vendor Name:", agent_i->vendor_name, 1); + + printLabel("Feature:", false, 1); + if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH + && agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { + printValueStr("KERNEL_DISPATCH & AGENT_DISPATCH"); + } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + printValueStr("KERNEL_DISPATCH"); + } else if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { + printValueStr("AGENT_DISPATCH"); + } else { + printValueStr("None specified"); + } + + printLabel("Profile:", false, 1); + if (HSA_PROFILE_BASE == agent_i->agent_profile) { + printValueStr("BASE_PROFILE"); + } else if (HSA_PROFILE_FULL == agent_i->agent_profile) { + printValueStr("FULL_PROFILE"); + } else { + printValueStr("Unknown"); + } + + printLabel("Float Round Mode:", false, 1); + if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO == agent_i->float_rounding_mode) { + printValueStr("ZERO"); + } else if (HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR == + agent_i->float_rounding_mode) { + printValueStr("NEAR"); + } else { + printValueStr("Not Supported"); + } + + printLabelStr("Max Queue Number:", int_to_string(agent_i->max_queue), 1); + + printLabelStr("Queue Min Size:", int_to_string(agent_i->queue_min_size), 1); + + printLabelStr("Queue Max Size:", int_to_string(agent_i->queue_max_size), 1); + + if (HSA_QUEUE_TYPE_MULTI == agent_i->queue_type) { + printLabelStr("Queue Type:", "MULTI", 1); + } else if (HSA_QUEUE_TYPE_SINGLE == agent_i->queue_type) { + printLabelStr("Queue Type:", "SINGLE", 1); + } else { + printLabelStr("Queue Type:", "Unknown", 1); + } + + printLabelInt("Node:", agent_i->node, 1); + + printLabel("Device Type:", false, 1); + if (HSA_DEVICE_TYPE_CPU == agent_i->device_type) { + printValueStr("CPU"); + } else if (HSA_DEVICE_TYPE_GPU == agent_i->device_type) { + printValueStr("GPU"); + } else { + printValueStr("DSP"); + } + + printLabel("Cache Info:", true, 1); + + for (int i = 0; i < 4; i++) { + if (agent_i->cache_size[i]) { + std::string tmp_str("L"); + tmp_str += std::to_string(i+1); + tmp_str += ":"; + printLabel(tmp_str.c_str(), false, 2); + + // tmp_str = std::to_string(agent_i->cache_size[i]/1024); + tmp_str = int_to_string(agent_i->cache_size[i]/1024); + tmp_str += "KB"; + printValueStr(tmp_str.c_str()); + } + } + + printLabelStr("Chip ID:", int_to_string(agent_i->chip_id), 1); + printLabelStr("ASIC Revision:", int_to_string(agent_i->asic_revision), 1); + printLabelStr("Cacheline Size:", int_to_string(agent_i->cacheline_size), 1); + printLabelInt("Max Clock Freq. (MHz):", agent_i->max_clock_freq, 1); + printLabelInt("BDFID:", agent_i->bdf_id, 1); + printLabelInt("Internal Node ID:", agent_i->internal_node_id, 1); + printLabelInt("Compute Unit:", agent_i->compute_unit, 1); + printLabelInt("SIMDs per CU:", agent_i->simds_per_cu, 1); + printLabelInt("Shader Engines:", agent_i->shader_engs, 1); + printLabelInt("Shader Arrs. per Eng.:", agent_i->shader_arrs_per_sh_eng, 1); + printLabelInt("WatchPts on Addr. Ranges:", agent_i->max_addr_watch_pts, 1); + + printLabel("Features:", false, 1); + if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + printf("%s", "KERNEL_DISPATCH "); + } + if (agent_i->agent_feature & HSA_AGENT_FEATURE_AGENT_DISPATCH) { + printf("%s", "AGENT_DISPATCH"); + } + if (agent_i->agent_feature == 0) { + printf("None"); + } + printf("\n"); + + if (agent_i->agent_feature & HSA_AGENT_FEATURE_KERNEL_DISPATCH) { + printLabelStr("Fast F16 Operation:", + agent_i->fast_f16 ? "TRUE":"FALSE", 1); + + printLabelStr("Wavefront Size:", + int_to_string(agent_i->wavefront_size), 1); + + printLabelStr("Workgroup Max Size:", + int_to_string(agent_i->workgroup_max_size), 1); + printLabel("Workgroup Max Size per Dimension:", true, 1); + printLabelStr("x", + int_to_string(static_cast(agent_i->workgroup_max_dim[0])), 2); + printLabelStr("y", + int_to_string(static_cast(agent_i->workgroup_max_dim[1])), 2); + printLabelStr("z", + int_to_string(static_cast(agent_i->workgroup_max_dim[2])), 2); + + printLabelStr("Max Waves Per CU:", + int_to_string(agent_i->max_waves_per_cu), 1); + printLabelStr("Max Work-item Per CU:", + int_to_string(agent_i->wavefront_size*agent_i->max_waves_per_cu), 1); + + printLabelStr("Grid Max Size:", int_to_string(agent_i->grid_max_size), 1); + printLabel("Grid Max Size per Dimension:", true, 1); + printLabelStr("x", int_to_string(agent_i->grid_max_dim.x), 2); + printLabelStr("y", int_to_string(agent_i->grid_max_dim.y), 2); + printLabelStr("z", int_to_string(agent_i->grid_max_dim.z), 2); + + printLabelInt("Max fbarriers/Workgrp:", agent_i->fbarrier_max_size, 1); + + printLabelInt("Packet Processor uCode::", agent_i->pkt_processor_ucode_ver, 1); + printLabelInt("SDMA engine uCode::", agent_i->sdma_ucode_ver, 1); + printLabelStr("IOMMU Support::", + agent_i->iommu_support == HSA_IOMMU_SUPPORT_V2 ? "V2" : "None", 1); + } +} + +static hsa_status_t AcquirePoolInfo(hsa_amd_memory_pool_t pool, + pool_info_t *pool_i) { + hsa_status_t err; + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &pool_i->global_flag); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, + &pool_i->segment); + RET_IF_HSA_ERR(err); + + // Get the size of the POOL + err = hsa_amd_memory_pool_get_info(pool, HSA_AMD_MEMORY_POOL_INFO_SIZE, + &pool_i->pool_size); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, + &pool_i->alloc_allowed); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, + &pool_i->alloc_granule); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, + &pool_i->pool_alloc_alignment); + RET_IF_HSA_ERR(err); + + err = hsa_amd_memory_pool_get_info(pool, + HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, + &pool_i->pl_access); + RET_IF_HSA_ERR(err); + + return HSA_STATUS_SUCCESS; +} + +static void MakeGlobalFlagsString(uint32_t global_flag, std::string* out_str) { + *out_str = ""; + + std::vector flags; + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & global_flag) { + flags.push_back("KERNARG"); + } + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & global_flag) { + flags.push_back("FINE GRAINED"); + } + + if (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_COARSE_GRAINED & global_flag) { + flags.push_back("COARSE GRAINED"); + } + + if (flags.size() > 0) { + *out_str += flags[0]; + } + + for (size_t i = 1; i < flags.size(); i++) { + *out_str += ", " + flags[i]; + } +} + +static void DumpSegment(pool_info_t *pool_i, uint32_t ind_lvl) { + std::string seg_str; + std::string tmp_str; + + printLabel("Segment:", false, ind_lvl); + + switch (pool_i->segment) { + case HSA_AMD_SEGMENT_GLOBAL: + MakeGlobalFlagsString(pool_i->global_flag, &tmp_str); + seg_str += "GLOBAL; FLAGS: " + tmp_str; + break; + + case HSA_AMD_SEGMENT_READONLY: + seg_str += "READONLY"; + break; + + case HSA_AMD_SEGMENT_PRIVATE: + seg_str += "PRIVATE"; + break; + + case HSA_AMD_SEGMENT_GROUP: + seg_str += "GROUP"; + break; + + default: + printf("Not Supported\n"); + break; + } + printValueStr(seg_str.c_str()); +} + +static void DisplayPoolInfo(pool_info_t *pool_i, uint32_t indent) { + DumpSegment(pool_i, indent); + + size_t sz = pool_i->pool_size/1024; + printLabelStr("Size:", int_to_string(sz) + "KB", indent); + printLabelStr("Allocatable:", (pool_i->alloc_allowed ? "TRUE" : "FALSE"), + indent); + std::string gr_str = std::to_string(pool_i->alloc_granule/1024)+"KB"; + printLabelStr("Alloc Granule:", gr_str.c_str(), indent); + + std::string al_str = std::to_string(pool_i->pool_alloc_alignment/1024)+"KB"; + printLabelStr("Alloc Alignment:", al_str.c_str(), indent); + + printLabelStr("Accessible by all:", (pool_i->pl_access ? "TRUE" : "FALSE"), + indent); +} + +static hsa_status_t +AcquireAndDisplayMemPoolInfo(const hsa_amd_memory_pool_t pool, + uint32_t indent) { + hsa_status_t err; + pool_info_t pool_i; + + err = AcquirePoolInfo(pool, &pool_i); + RET_IF_HSA_ERR(err); + + DisplayPoolInfo(&pool_i, 3); + + return err; +} + +static hsa_status_t get_pool_info(hsa_amd_memory_pool_t pool, void* data) { + hsa_status_t err; + int* p_int = reinterpret_cast(data); + (*p_int)++; + + std::string pool_str("Pool "); + pool_str += std::to_string(*p_int); + printLabel(pool_str.c_str(), true, 2); + + err = AcquireAndDisplayMemPoolInfo(pool, 3); + RET_IF_HSA_ERR(err); + + return err; +} + +static hsa_status_t AcquireISAInfo(hsa_isa_t isa, isa_info_t *isa_i) { + hsa_status_t err; + uint32_t name_len; + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME_LENGTH, &name_len); + RET_IF_HSA_ERR(err); + + isa_i->name_str = new char[name_len]; + if (isa_i->name_str == nullptr) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_NAME, isa_i->name_str); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_MACHINE_MODELS, + isa_i->mach_models); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_PROFILES, isa_i->profiles); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_DEFAULT_FLOAT_ROUNDING_MODES, + isa_i->def_rounding_modes); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, + HSA_ISA_INFO_BASE_PROFILE_DEFAULT_FLOAT_ROUNDING_MODES, + isa_i->base_rounding_modes); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FAST_F16_OPERATION, + &isa_i->fast_f16); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_DIM, + &isa_i->workgroup_max_dim); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_WORKGROUP_MAX_SIZE, + &isa_i->workgroup_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_DIM, + &isa_i->grid_max_dim); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_GRID_MAX_SIZE, + &isa_i->grid_max_size); + RET_IF_HSA_ERR(err); + + err = hsa_isa_get_info_alt(isa, HSA_ISA_INFO_FBARRIER_MAX_SIZE, + &isa_i->fbarrier_max_size); + RET_IF_HSA_ERR(err); + + return err; +} + +static void DisplayISAInfo(isa_info_t *isa_i, uint32_t indent) { + printLabelStr("Name:", isa_i->name_str, indent); + + std::string models(""); + if (isa_i->mach_models[HSA_MACHINE_MODEL_SMALL]) { + models = "HSA_MACHINE_MODEL_SMALL "; + } + if (isa_i->mach_models[HSA_MACHINE_MODEL_LARGE]) { + models += "HSA_MACHINE_MODEL_LARGE"; + } + printLabelStr("Machine Models:", models.c_str(), indent); + + std::string profiles(""); + if (isa_i->profiles[HSA_PROFILE_BASE]) { + profiles = "HSA_PROFILE_BASE "; + } + if (isa_i->profiles[HSA_PROFILE_FULL]) { + profiles += "HSA_PROFILE_FULL"; + } + printLabelStr("Profiles:", profiles.c_str(), indent); + + std::string rounding_modes(""); + if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) { + rounding_modes = "DEFAULT "; + } + if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) { + rounding_modes += "ZERO "; + } + if (isa_i->def_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) { + rounding_modes += "NEAR"; + } + printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent); + + rounding_modes = ""; + if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_DEFAULT]) { + rounding_modes = "DEFAULT "; + } + if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_ZERO]) { + rounding_modes += "ZERO "; + } + if (isa_i->base_rounding_modes[HSA_DEFAULT_FLOAT_ROUNDING_MODE_NEAR]) { + rounding_modes += "NEAR"; + } + printLabelStr("Default Rounding Mode:", rounding_modes.c_str(), indent); + + printLabelStr("Fast f16:", (isa_i->fast_f16 ? "TRUE" : "FALSE"), indent); + + printLabelStr("Workgroup Max Size:", + int_to_string(isa_i->workgroup_max_size), indent); + printLabel("Workgroup Max Size per Dimension:", true, indent); + printLabelStr("x", int_to_string( + static_cast(isa_i->workgroup_max_dim[0])), indent+1); + printLabelStr("y", int_to_string( + static_cast(isa_i->workgroup_max_dim[1])), indent+1); + printLabelStr("z", int_to_string( + static_cast(isa_i->workgroup_max_dim[2])), indent+1); + + printLabelStr("Grid Max Size:", int_to_string(isa_i->grid_max_size), indent); + printLabel("Grid Max Size per Dimension:", true, indent); + printLabelStr("x", int_to_string(isa_i->grid_max_dim.x), indent+1); + printLabelStr("y", int_to_string(isa_i->grid_max_dim.y), indent+1); + printLabelStr("z", int_to_string(isa_i->grid_max_dim.z), indent+1); + + printLabelInt("FBarrier Max Size:", isa_i->fbarrier_max_size, indent); +} + +static hsa_status_t +AcquireAndDisplayISAInfo(const hsa_isa_t isa, uint32_t indent) { + hsa_status_t err; + isa_info_t isa_i; + + isa_i.name_str = nullptr; + err = AcquireISAInfo(isa, &isa_i); + RET_IF_HSA_ERR(err); + + DisplayISAInfo(&isa_i, 3); + + if (isa_i.name_str != nullptr) { + delete []isa_i.name_str; + } + return err; +} +static hsa_status_t get_isa_info(hsa_isa_t isa, void* data) { + hsa_status_t err; + int* isa_int = reinterpret_cast(data); + (*isa_int)++; + + std::string isa_str("ISA "); + isa_str += std::to_string(*isa_int); + printLabel(isa_str.c_str(), true, 2); + + err = AcquireAndDisplayISAInfo(isa, 3); + RET_IF_HSA_ERR(err); + + return err; +} +// Cache info dump is ifdef'd out as it generates a lot of output that is +// not that interesting. Define ENABLE_CACHE_DUMP if this is of interest. +#ifdef ENABLE_CACHE_DUMP +static void DisplayCacheInfo(cache_info_t *cache_i, uint32_t indent) { + printLabelStr("Name:", cache_i->name_str, indent); + + printLabelInt("Level:", cache_i->level, indent); + printLabelInt("Size:", cache_i->size, indent); +} + +static hsa_status_t AcquireCacheInfo(hsa_cache_t cache, cache_info_t *cache_i) { + hsa_status_t err; + uint32_t name_len; + err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME_LENGTH, &name_len); + RET_IF_HSA_ERR(err); + + cache_i->name_str = new char[name_len]; + if (cache_i->name_str == nullptr) { + return HSA_STATUS_ERROR_OUT_OF_RESOURCES; + } + + err = hsa_cache_get_info(cache, HSA_CACHE_INFO_NAME, cache_i->name_str); + RET_IF_HSA_ERR(err); + + err = hsa_cache_get_info(cache, HSA_CACHE_INFO_LEVEL, &cache_i->level); + RET_IF_HSA_ERR(err); + + err = hsa_cache_get_info(cache, HSA_CACHE_INFO_SIZE, &cache_i->size); + RET_IF_HSA_ERR(err); + return err; +} + +static hsa_status_t +AcquireAndDisplayCacheInfo(const hsa_cache_t cache, uint32_t indent) { + hsa_status_t err; + cache_info_t cache_i; + + err = AcquireCacheInfo(cache, &cache_i); + RET_IF_HSA_ERR(err); + + DisplayCacheInfo(&cache_i, 3); + + if (cache_i.name_str != nullptr) { + delete []cache_i.name_str; + } + + return err; +} + +static hsa_status_t get_cache_info(hsa_cache_t cache, void* data) { + hsa_status_t err; + int* cache_int = reinterpret_cast(data); + (*cache_int)++; + + std::string cache_str("Cache L"); + cache_str += std::to_string(*cache_int); + printLabel(cache_str.c_str(), true, 2); + + err = AcquireAndDisplayCacheInfo(cache, 3); + RET_IF_HSA_ERR(err); + + return err; +} +#endif // ENABLE_CACHE_DUMP +static hsa_status_t +AcquireAndDisplayAgentInfo(hsa_agent_t agent, void* data) { + int pool_number = 0; + int isa_number = 0; + + hsa_status_t err; + agent_info_t agent_i; + + int *agent_number = reinterpret_cast(data); + (*agent_number)++; + + err = AcquireAgentInfo(agent, &agent_i); + RET_IF_HSA_ERR(err); + + printLabel("*******", true); + std::string agent_ind("Agent "); + agent_ind += std::to_string(*agent_number).c_str(); + printLabel(agent_ind.c_str(), true); + printLabel("*******", true); + + DisplayAgentInfo(&agent_i); + + printLabel("Pool Info:", true, 1); + err = hsa_amd_agent_iterate_memory_pools(agent, get_pool_info, &pool_number); + RET_IF_HSA_ERR(err); + + printLabel("ISA Info:", true, 1); + err = hsa_agent_iterate_isas(agent, get_isa_info, &isa_number); + if (err == HSA_STATUS_ERROR_INVALID_AGENT) { + printLabel("N/A", true, 2); + return HSA_STATUS_SUCCESS; + } + RET_IF_HSA_ERR(err); + +#if ENABLE_CACHE_DUMP + int cache_number = 0; + printLabel("Cache Info:", true, 1); + err = hsa_agent_iterate_caches(agent, get_cache_info, &cache_number); + if (err == HSA_STATUS_ERROR_INVALID_AGENT) { + printLabel("N/A", true, 2); + return HSA_STATUS_SUCCESS; + } +#endif + RET_IF_HSA_ERR(err); + + return HSA_STATUS_SUCCESS; +} + +int CheckInitialState(void) { + // Check kernel module for ROCk is loaded + + std::ifstream amdgpu_initstate("/sys/module/amdgpu/initstate"); + if (amdgpu_initstate){ + std::stringstream buffer; + buffer << amdgpu_initstate.rdbuf(); + amdgpu_initstate.close(); + + std::string line; + bool is_live = false; + while (std::getline(buffer, line)){ + if (line.find( "live" ) != std::string::npos){ + is_live = true; + break; + } + } + if (is_live){ + printf("%sROCk module is loaded%s\n", COL_WHT, COL_RESET); + } else { + printf("%sROCk module is NOT live, possibly no GPU devices%s\n", + COL_RED, COL_RESET); + return -1; + } + } else { + printf("%sROCk module is NOT loaded, possibly no GPU devices%s\n", + COL_RED, COL_RESET); + return -1; + } + + // Check if user belongs to the group for /dev/kfd (e.g. "video" or + // "render") + // @note: User who are not members of "video" + // group cannot access DRM services + char u_name[32]; + bool member = false; + struct passwd *pw; + int num_groups = 0; + gid_t *groups; + + // Check if we can open /dev/kfd as read-write. If not, try to + // diagnose common reasons why you can't. + int open_kfd = open("/dev/kfd", O_RDWR); + if (open_kfd >= 0) { + close(open_kfd); + return 0; + } + + printf("%sUnable to open /dev/kfd read-write: %s%s\n", + COL_RED, strerror(errno), COL_RESET); + + const char *kfd_gr_name = NULL; + + struct stat sb; + if (stat("/dev/kfd", &sb) == 0) { + // The owner of kfd was renamed, so avoid hard-coding the + // name. Check whatever group owns it. + if (struct group *kfd_gr = getgrgid(sb.st_gid)) + kfd_gr_name = kfd_gr->gr_name; + } + + if (!kfd_gr_name) + kfd_gr_name = "video"; + + struct group *gr_s = getgrnam(kfd_gr_name); // NOLINT + if (gr_s == nullptr) { + printf("%sFailed to get group info to check" + " for %s group membership%s\n", COL_RED, kfd_gr_name, + COL_RESET); + return -1; + } + + if (getlogin_r(u_name, 32)) { + printf("%sFailed to get user name to check for" + " %s group membership%s\n", COL_RED, kfd_gr_name, + COL_RESET); + return -1; + } + + pw = getpwnam(u_name); // NOLINT + if (pw == NULL) { + printf("%sFailed to find pwd entry for user %s%s\n", + COL_RED, u_name, COL_RESET); + return -1; + } + + (void)getgrouplist(u_name, pw->pw_gid, NULL, &num_groups); + groups = new gid_t[num_groups]; + if (getgrouplist(u_name, pw->pw_gid, groups, &num_groups) == -1) { + printf("%sFailed to get user group list%s\n", COL_RED, COL_RESET); + delete []groups; + return -1; + } + + for (int i = 0; i < num_groups; ++i) { + if (gr_s->gr_gid == groups[i]) { + printf("%s%s is member of %s group%s\n", COL_WHT, u_name, kfd_gr_name, COL_RESET); + member = true; + break; + } + } + if (member == false) { + printf("%s%s is not member of \"%s\" group, the default DRM access " + "group. Users must be a member of the \"%s\" group or another" + " DRM access group in order for ROCm applications to run " + "successfully%s.\n", COL_RED, u_name, kfd_gr_name, kfd_gr_name, COL_RESET); + } + + delete []groups; + return -1; +} + +// Print out all static information known to HSA about the target system. +// Throughout this program, the Acquire-type functions make HSA calls to +// interate through HSA objects and then perform HSA get_info calls to +// acccumulate information about those objects. Corresponding to each +// Acquire-type function is a Display* function which display the +// accumulated data in a formatted way. +int main(int argc, char* argv[]) { + hsa_status_t err; + + if (CheckInitialState()) { + return 1; + } + err = hsa_init(); + RET_IF_HSA_ERR(err) + + // Acquire and display system information + system_info_t sys_info; + + // This function will call HSA get_info functions to gather information + // about the system. + err = AcquireSystemInfo(&sys_info); + RET_IF_HSA_ERR(err); + + printLabel("=====================", true); + printLabel("HSA System Attributes", true); + printLabel("=====================", true); + DisplaySystemInfo(&sys_info); + + // Iterate through every agent and get and display their info + printLabel("==========", true); + printLabel("HSA Agents", true); + printLabel("==========", true); + uint32_t agent_ind = 0; + err = hsa_iterate_agents(AcquireAndDisplayAgentInfo, &agent_ind); + RET_IF_HSA_ERR(err); + + printLabel("*** Done ***", true); + + err = hsa_shut_down(); + RET_IF_HSA_ERR(err); + return 0; +} + +#undef RET_IF_HSA_ERR