Skip to content

Commit

Permalink
Remove profiling nvtx from develop branch.
Browse files Browse the repository at this point in the history
This make trouble for continuous integration and is apparently not supported on all platforms.
Since it is a debug function, it's just as well to remove it from the mainstream tree.
  • Loading branch information
Carsten Griwodz committed Aug 12, 2024
1 parent bdd37e5 commit 5c37b81
Show file tree
Hide file tree
Showing 12 changed files with 1 addition and 103 deletions.
13 changes: 0 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/${CMAKE_SYSTEM_NAME}-$

option(PopSift_BUILD_EXAMPLES "Build PopSift applications." ON)
option(PopSift_BUILD_DOCS "Build PopSift documentation." OFF)
option(PopSift_USE_NVTX_PROFILING "Use CUDA NVTX for profiling." OFF)
option(PopSift_ERRCHK_AFTER_KERNEL "Synchronize and check CUDA error after every kernel." OFF)
option(PopSift_USE_POSITION_INDEPENDENT_CODE "Generate position independent code." ON)
option(PopSift_USE_GRID_FILTER "Switch off grid filtering to massively reduce compile time while debugging other things." ON)
Expand Down Expand Up @@ -90,10 +89,6 @@ find_package(CUDAToolkit)
message(STATUS "CUDA Version is ${CUDAToolkit_VERSION}")
set(CUDA_VERSION ${CUDAToolkit_VERSION})

if(PopSift_USE_NVTX_PROFILING)
message(STATUS "PROFILING CPU CODE: NVTX is in use")
endif()

if(PopSift_ERRCHK_AFTER_KERNEL)
message(STATUS "Synchronizing and checking errors after every kernel call")
list(APPEND CUDA_NVCC_FLAGS "-DERRCHK_AFTER_KERNEL")
Expand Down Expand Up @@ -150,13 +145,6 @@ else()
set(DISABLE_GRID_FILTER 0)
endif()

if(PopSift_USE_NVTX_PROFILING)
# library required for NVTX profiling of the CPU
set(PopSift_USE_NVTX 1)
else()
set(PopSift_USE_NVTX 0)
endif()

add_subdirectory(src)

if(PopSift_BUILD_DOCS)
Expand Down Expand Up @@ -194,7 +182,6 @@ message(STATUS "Build Shared libs: " ${BUILD_SHARED_LIBS})
message(STATUS "Build examples: " ${PopSift_BUILD_EXAMPLES})
message(STATUS "Build documentation: " ${PopSift_BUILD_DOCS})
message(STATUS "Generate position independent code: " ${CMAKE_POSITION_INDEPENDENT_CODE})
message(STATUS "Use CUDA NVTX for profiling: " ${PopSift_USE_NVTX_PROFILING})
message(STATUS "Synchronize and check CUDA error after every kernel: " ${PopSift_ERRCHK_AFTER_KERNEL})
message(STATUS "Grid filtering: " ${PopSift_USE_GRID_FILTER})
message(STATUS "Additional warning for CUDA nvcc: " ${PopSift_NVCC_WARNINGS})
Expand Down
2 changes: 1 addition & 1 deletion appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ install:
before_build:
- md build
- cd build
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_NVTX_PROFILING:BOOL=OFF -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- cmake -G "Visual Studio 17 2022" -A x64 -T v143,host=x64,cuda="%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" -DBUILD_SHARED_LIBS:BOOL=ON -DPopSift_USE_GRID_FILTER:BOOL=OFF -DPopSift_BUILD_DOCS:BOOL=OFF -DPopSift_USE_POSITION_INDEPENDENT_CODE:BOOL=ON -DPopSift_BUILD_EXAMPLES:BOOL=ON -DCMAKE_BUILD_TYPE=%configuration% -DCMAKE_TOOLCHAIN_FILE=c:/tools/vcpkg/scripts/buildsystems/vcpkg.cmake ..
- ls -l

build:
Expand Down
1 change: 0 additions & 1 deletion cmake/sift_config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,4 @@
#define POPSIFT_HAVE_SHFL_DOWN_SYNC() @PopSift_HAVE_SHFL_DOWN_SYNC@
#define POPSIFT_HAVE_NORMF() @PopSift_HAVE_NORMF@
#define POPSIFT_DISABLE_GRID_FILTER() @DISABLE_GRID_FILTER@
#define POPSIFT_USE_NVTX() @PopSift_USE_NVTX@

3 changes: 0 additions & 3 deletions cudaInstallAppveyor.cmd
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ echo Downloading CUDA toolkit 12 for Windows 10

appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvcc.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.5.82-archive.zip -Filename cuda_cudart.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.5.82-archive.zip -Filename cuda_nvtx.zip
appveyor DownloadFile https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.5.82-archive.zip -Filename vs_integration.zip
dir

echo Unzipping CUDA toolkit 12
tar -xf cuda_nvcc.zip
tar -xf cuda_cudart.zip
tar -xf cuda_nvtx.zip
tar -xf vs_integration.zip
dir

Expand All @@ -22,7 +20,6 @@ mkdir "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras"
echo Copying toolkit files to install dir(s)
xcopy cuda_cudart-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvcc-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy cuda_nvtx-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5" /s /e /i /y
xcopy visual_studio_integration-windows-x86_64-12.5.82-archive "%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v12.5\extras" /s /e /i /y


Expand Down
6 changes: 0 additions & 6 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,6 @@ target_link_libraries(popsift
CUDA::cudart
Threads::Threads)

if(PopSift_USE_NVTX_PROFILING)
target_link_libraries(popsift
PUBLIC
CUDA::nvtx3)
endif()

set_target_properties(popsift PROPERTIES VERSION ${PROJECT_VERSION})
set_target_properties(popsift PROPERTIES DEBUG_POSTFIX "d")
set_target_properties(popsift PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
Expand Down
6 changes: 0 additions & 6 deletions src/popsift/popsift.cu
Original file line number Diff line number Diff line change
Expand Up @@ -438,18 +438,12 @@ void SiftJob::setImg( popsift::ImageBase* img )

popsift::ImageBase* SiftJob::getImg()
{
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
_nvtx_id = nvtxRangeStartA( "inserting image" );
#endif
return _img;
}

void SiftJob::setFeatures( popsift::FeaturesBase* f )
{
_p.set_value( f );
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeEnd( _nvtx_id );
#endif
}

popsift::FeaturesHost* SiftJob::get()
Expand Down
10 changes: 0 additions & 10 deletions src/popsift/popsift.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@
#include <thread>
#include <vector>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangeStartA(a)
#define nvtxRangeEnd(a)
#endif

/* user parameters */
namespace popsift
{
Expand All @@ -50,9 +43,6 @@ class SiftJob
unsigned char* _imageData;
popsift::ImageBase* _img;
std::exception_ptr _err;
#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
nvtxRangeId_t _nvtx_id;
#endif

public:

Expand Down
9 changes: 0 additions & 9 deletions src/popsift/s_filtergrid.cu
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,6 @@
#include "sift_extremum.h"
#include "sift_pyramid.h"

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

#if ! POPSIFT_IS_DEFINED(POPSIFT_DISABLE_GRID_FILTER)

#include <thrust/copy.h>
Expand Down Expand Up @@ -317,9 +310,7 @@ int Pyramid::extrema_filter_grid( const Config& conf, int ext_total )
}
}

nvtxRangePushA( "writing back count" );
writeDescCountersToDevice( );
nvtxRangePop( );

return ret_ext_total;
}
Expand Down
23 changes: 0 additions & 23 deletions src/popsift/s_image.cu
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,6 @@
#include <fstream>
#include <iostream>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace std;

namespace popsift {
Expand Down Expand Up @@ -98,8 +91,6 @@ void Image::resetDimensions( int w, int h )
destroyTexture( );
createTexture( );
} else {
nvtxRangePushA( "reallocating host-side image memory" );

_max_w = max( w, _max_w );
_max_h = max( h, _max_h );
_input_image_h.freeHost( popsift::CudaAllocated );
Expand All @@ -111,21 +102,15 @@ void Image::resetDimensions( int w, int h )

destroyTexture( );
createTexture( );

nvtxRangePop(); // "reallocating host-side image memory"
}
}

void Image::allocate( int w, int h )
{
nvtxRangePushA( "allocating host-side image memory" );

_input_image_h.allocHost( w, h, popsift::CudaAllocated );
_input_image_d.allocDev( w, h );

createTexture( );

nvtxRangePop(); // "allocating host-side image memory"
}

void Image::destroyTexture( )
Expand Down Expand Up @@ -222,8 +207,6 @@ void ImageFloat::resetDimensions( int w, int h )
destroyTexture( );
createTexture( );
} else {
nvtxRangePushA( "reallocating host-side image memory" );

_max_w = max( w, _max_w );
_max_h = max( h, _max_h );
_input_image_h.freeHost( popsift::CudaAllocated );
Expand All @@ -235,21 +218,15 @@ void ImageFloat::resetDimensions( int w, int h )

destroyTexture( );
createTexture( );

nvtxRangePop(); // "reallocating host-side image memory"
}
}

void ImageFloat::allocate( int w, int h )
{
nvtxRangePushA( "allocating host-side image memory" );

_input_image_h.allocHost( w, h, popsift::CudaAllocated );
_input_image_d.allocDev( w, h );

createTexture( );

nvtxRangePop(); // "allocating host-side image memory"
}

void ImageFloat::destroyTexture( )
Expand Down
7 changes: 0 additions & 7 deletions src/popsift/s_orientation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,6 @@
#include <cmath>
#include <cstdio>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace popsift;
using namespace std;

Expand Down
10 changes: 0 additions & 10 deletions src/popsift/sift_desc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,6 @@
#include <cstdio>
#include <iostream>

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

using namespace popsift;
using namespace std;

Expand Down Expand Up @@ -55,11 +48,8 @@ using namespace std;
__host__
void Pyramid::descriptors( const Config& conf )
{
nvtxRangePushA("Reading orientation count");

readDescCountersFromDevice( _octaves[0].getStream() );
cudaStreamSynchronize( _octaves[0].getStream() );
nvtxRangePop( );

for( int octave=_num_octaves-1; octave>=0; octave-- )
// for( int octave=0; octave<_num_octaves; octave++ )
Expand Down
14 changes: 0 additions & 14 deletions src/popsift/sift_pyramid.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,6 @@
#define mkdir(path, perm) _mkdir(path)
#endif

#if POPSIFT_IS_DEFINED(POPSIFT_USE_NVTX)
#include <nvtx3/nvToolsExtCuda.h>
#else
#define nvtxRangePushA(a)
#define nvtxRangePop()
#endif

#define PYRAMID_PRINT_DEBUG 0

using namespace std;
Expand Down Expand Up @@ -285,22 +278,18 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )

readDescCountersFromDevice();

nvtxRangePushA( "download descriptors" );
FeaturesHost* features = new FeaturesHost( hct.ext_total, hct.ori_total );

if( hct.ext_total == 0 || hct.ori_total == 0 )
{
nvtxRangePop();
return features;
}

dim3 grid( grid_divide( hct.ext_total, 32 ) );
prep_features<<<grid,32,0,_download_stream>>>( features->getDescriptors(), up_fac );
POP_SYNC_CHK;

nvtxRangePushA( "register host memory" );
features->pin( );
nvtxRangePop();
popcuda_memcpy_async( features->getFeatures(),
dobuf_shadow.features,
hct.ext_total * sizeof(Feature),
Expand All @@ -313,10 +302,7 @@ FeaturesHost* Pyramid::get_descriptors( const Config& conf )
cudaMemcpyDeviceToHost,
_download_stream );
cudaStreamSynchronize( _download_stream );
nvtxRangePushA( "unregister host memory" );
features->unpin( );
nvtxRangePop();
nvtxRangePop();

return features;
}
Expand Down

0 comments on commit 5c37b81

Please sign in to comment.