Skip to content

Commit

Permalink
ENH: CUDA projectors can now process itk::Image<itk::Vector<float,3>>
Browse files Browse the repository at this point in the history
  • Loading branch information
cyrilmory committed Jun 1, 2018
1 parent 5717b6d commit 1e75fbc
Show file tree
Hide file tree
Showing 12 changed files with 319 additions and 397 deletions.
2 changes: 1 addition & 1 deletion applications/rtkbackprojections/rtkbackprojections.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ int main(int argc, char * argv[])
break;
case(bp_arg_CudaBackProjection):
#ifdef RTK_USE_CUDA
bp = rtk::CudaBackProjectionImageFilter::New();
bp = rtk::CudaBackProjectionImageFilter<OutputImageType>::New();
#else
std::cerr << "The program has not been compiled with cuda option" << std::endl;
return EXIT_FAILURE;
Expand Down
2 changes: 1 addition & 1 deletion applications/rtkfieldofview/rtkfieldofview.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ int main(int argc, char * argv[])
typedef rtk::BackProjectionImageFilter<MaskImgType, MaskImgType> BPType;
BPType::Pointer bp = BPType::New();
#ifdef RTK_USE_CUDA
typedef rtk::CudaBackProjectionImageFilter BPCudaType;
typedef rtk::CudaBackProjectionImageFilter<MaskImgType> BPCudaType;
if(!strcmp(args_info.hardware_arg, "cuda") )
bp = BPCudaType::New();
#endif
Expand Down
15 changes: 10 additions & 5 deletions include/rtkCudaBackProjectionImageFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,21 +45,22 @@ namespace rtk
*
* \ingroup Projector CudaImageToImageFilter
*/
class RTK_EXPORT CudaBackProjectionImageFilter :
public itk::CudaInPlaceImageFilter< itk::CudaImage<float,3>, itk::CudaImage<float,3>,
BackProjectionImageFilter< itk::CudaImage<float,3>, itk::CudaImage<float,3> > >

template <class ImageType = itk::CudaImage<float,3> >
class ITK_EXPORT CudaBackProjectionImageFilter :
public itk::CudaInPlaceImageFilter< ImageType, ImageType,
BackProjectionImageFilter< ImageType, ImageType> >
{
public:
/** Standard class typedefs. */
typedef itk::CudaImage<float,3> ImageType;
typedef BackProjectionImageFilter< ImageType, ImageType> BackProjectionImageFilterType;
typedef CudaBackProjectionImageFilter Self;
typedef itk::CudaInPlaceImageFilter<ImageType, ImageType,
BackProjectionImageFilterType> Superclass;
typedef itk::SmartPointer<Self> Pointer;
typedef itk::SmartPointer<const Self> ConstPointer;

typedef ImageType::RegionType OutputImageRegionType;
typedef typename ImageType::RegionType OutputImageRegionType;
typedef itk::CudaImage<float, 2> ProjectionImageType;
typedef ProjectionImageType::Pointer ProjectionImagePointer;

Expand All @@ -82,6 +83,10 @@ class RTK_EXPORT CudaBackProjectionImageFilter :

} // end namespace rtk

#ifndef ITK_MANUAL_INSTANTIATION
#include "rtkCudaBackProjectionImageFilter.hxx"
#endif

#endif //end conditional definition of the class

#endif
4 changes: 2 additions & 2 deletions include/rtkCudaBackProjectionImageFilter.hcu
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ CUDA_back_project(int projSize[3],
float *dev_vol_in,
float *dev_vol_out,
float *dev_img,
double radiusCylindricalDetector
);
double radiusCylindricalDetector,
unsigned int vectorLength);

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@
*
*=========================================================================*/

#ifndef rtkCudaBackProjectionImageFilter_hxx
#define rtkCudaBackProjectionImageFilter_hxx

#include "rtkConfiguration.h"
//Conditional definition of the class to pass ITKHeaderTest
#ifdef RTK_USE_CUDA

#include "rtkCudaBackProjectionImageFilter.h"
#include "rtkCudaUtilities.hcu"
#include "rtkCudaBackProjectionImageFilter.hcu"
Expand All @@ -28,13 +35,15 @@
namespace rtk
{

CudaBackProjectionImageFilter
template <class ImageType>
CudaBackProjectionImageFilter<ImageType>
::CudaBackProjectionImageFilter()
{
}

template <class ImageType>
void
CudaBackProjectionImageFilter
CudaBackProjectionImageFilter<ImageType>
::GPUGenerateData()
{
const unsigned int Dimension = ImageType::ImageDimension;
Expand All @@ -44,7 +53,7 @@ ::GPUGenerateData()
itkGenericExceptionMacro("The CUDA voxel based back projection image filter can only handle slabs of at most 1024 projections")

// Rotation center (assumed to be at 0 yet)
ImageType::PointType rotCenterPoint;
typename ImageType::PointType rotCenterPoint;
rotCenterPoint.Fill(0.0);
itk::ContinuousIndex<double, Dimension> rotCenterIndex;
this->GetInput(0)->TransformPhysicalPointToContinuousIndex(rotCenterPoint, rotCenterIndex);
Expand Down Expand Up @@ -73,7 +82,8 @@ ::GPUGenerateData()

float *stackGPUPointer = *(float**)( this->GetInput(1)->GetCudaDataManager()->GetGPUBufferPointer() );
ptrdiff_t projSize = this->GetInput(1)->GetBufferedRegion().GetSize()[0] *
this->GetInput(1)->GetBufferedRegion().GetSize()[1];
this->GetInput(1)->GetBufferedRegion().GetSize()[1] *
itk::NumericTraits<typename ImageType::PixelType>::GetLength();
stackGPUPointer += projSize * (iFirstProj-this->GetInput(1)->GetBufferedRegion().GetIndex()[2]);

// Allocate a large matrix to hold the matrix of all projections
Expand All @@ -83,7 +93,7 @@ ::GPUGenerateData()
float *fprojPPToProjIndex = new float[9];

// Projection physical point to projection index matrix
itk::Matrix<double, 3, 3> projPPToProjIndex = GetProjectionPhysicalPointToProjectionIndexMatrix();
itk::Matrix<double, 3, 3> projPPToProjIndex = this->GetProjectionPhysicalPointToProjectionIndexMatrix();

// Correction for non-zero indices in the projections
itk::Matrix<double, 3, 3> matrixIdxProj;
Expand All @@ -103,7 +113,7 @@ ::GPUGenerateData()
// Volume index to projection physical point matrix
// normalized to have a correct backprojection weight
// (1 at the isocenter)
ProjectionMatrixType volIndexToProjPP = GetVolumeIndexToProjectionPhysicalPointMatrix(iProj);
typename BackProjectionImageFilterType::ProjectionMatrixType volIndexToProjPP = this->GetVolumeIndexToProjectionPhysicalPointMatrix(iProj);

// Correction for non-zero indices in the volume
volIndexToProjPP = volIndexToProjPP.GetVnlMatrix() * matrixIdxVol.GetVnlMatrix();
Expand All @@ -113,7 +123,7 @@ ::GPUGenerateData()
perspFactor += volIndexToProjPP[Dimension-1][j] * rotCenterIndex[j];
volIndexToProjPP /= perspFactor;

ProjectionMatrixType matrix = ProjectionMatrixType(projPPToProjIndex.GetVnlMatrix() * volIndexToProjPP.GetVnlMatrix());
typename BackProjectionImageFilterType::ProjectionMatrixType matrix = typename BackProjectionImageFilterType::ProjectionMatrixType(projPPToProjIndex.GetVnlMatrix() * volIndexToProjPP.GetVnlMatrix());

// Fill float arrays with matrices coefficients, to be passed to GPU
for (int j = 0; j < 12; j++)
Expand All @@ -123,6 +133,8 @@ ::GPUGenerateData()
}
}

const unsigned int vectorLength = itk::PixelTraits<typename ImageType::PixelType>::Dimension;

for (unsigned int i=0; i<nProj; i+=SLAB_SIZE)
{
// If nProj is not a multiple of SLAB_SIZE, the last slab will contain less than SLAB_SIZE projections
Expand All @@ -137,8 +149,8 @@ ::GPUGenerateData()
pin,
pout,
stackGPUPointer + projSize * i,
this->m_Geometry->GetRadiusCylindricalDetector()
);
this->m_Geometry->GetRadiusCylindricalDetector(),
vectorLength);

// Re-use the output as input
pin = pout;
Expand All @@ -150,3 +162,7 @@ ::GPUGenerateData()
}

} // end namespace rtk

#endif //end conditional definition of the class

#endif
5 changes: 3 additions & 2 deletions include/rtkCudaForwardProjectionImageFilter.hcu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "rtkWin32Header.h"

void
RTK_EXPORT CUDA_forward_project( int projSize[3],
RTK_EXPORT CUDA_forward_project(int projSize[3],
int volSize[3],
float *translatedProjectionIndexTransformMatrices,
float* translatedVolumeTransformMatrices,
Expand All @@ -35,6 +35,7 @@ RTK_EXPORT CUDA_forward_project( int projSize[3],
float box_min[3],
float box_max[3],
float spacing[3],
bool useCudaTexture);
bool useCudaTexture,
const unsigned int vectorLength);

#endif
10 changes: 7 additions & 3 deletions include/rtkCudaForwardProjectionImageFilter.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,9 @@ CudaForwardProjectionImageFilter<TInputImage,
const unsigned int Dimension = TInputImage::ImageDimension;
const unsigned int iFirstProj = this->GetInput(0)->GetRequestedRegion().GetIndex(Dimension-1);
const unsigned int nProj = this->GetInput(0)->GetRequestedRegion().GetSize(Dimension-1);
const unsigned int nPixelsPerProj = this->GetOutput()->GetBufferedRegion().GetSize(0) *
this->GetOutput()->GetBufferedRegion().GetSize(1);
const unsigned int nPixelsPerProj = this->GetOutput()->GetBufferedRegion().GetSize(0)
* this->GetOutput()->GetBufferedRegion().GetSize(1)
* itk::NumericTraits<typename TInputImage::PixelType>::GetLength();

itk::Vector<double, 4> source_position;

Expand Down Expand Up @@ -178,6 +179,8 @@ CudaForwardProjectionImageFilter<TInputImage,
}

int projectionOffset = 0;
const unsigned int vectorLength = itk::PixelTraits<typename TInputImage::PixelType>::Dimension;

for (unsigned int i=0; i<nProj; i+=SLAB_SIZE)
{
// If nProj is not a multiple of SLAB_SIZE, the last slab will contain less than SLAB_SIZE projections
Expand All @@ -198,7 +201,8 @@ CudaForwardProjectionImageFilter<TInputImage,
boxMin,
boxMax,
spacing,
m_UseCudaTexture);
m_UseCudaTexture,
vectorLength);
}

delete[] translatedProjectionIndexTransformMatrices;
Expand Down
78 changes: 78 additions & 0 deletions include/rtkCudaUtilities.hcu
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
#include <itkMacro.h>
#undef ITK_STATIC
#include <cuda.h>
#include <cublas_v2.h>

#define CUDA_CHECK_ERROR \
{ \
Expand Down Expand Up @@ -108,5 +109,82 @@ inline __host__ __device__ float dot_vector(float3 u, float3 v)
{
return u.x*v.x + u.y*v.y + u.z*v.z;
}
inline __host__ void prepareTextureObject(int size[3],
float *dev_ptr,
cudaArray **&componentArrays,
unsigned int nComponents,
cudaTextureObject_t *tex,
bool isProjections)
{
// Create CUBLAS context
cublasHandle_t handle;
cublasCreate(&handle);

// create texture object
cudaResourceDesc resDesc;
memset(&resDesc, 0, sizeof(resDesc));
resDesc.resType = cudaResourceTypeArray;

cudaTextureDesc texDesc;
memset(&texDesc, 0, sizeof(texDesc));
texDesc.readMode = cudaReadModeElementType;
for (unsigned int component = 0; component < nComponents; component++)
{
if (isProjections)
texDesc.addressMode[component] = cudaAddressModeBorder;
else
texDesc.addressMode[component] = cudaAddressModeClamp;
}
texDesc.filterMode = cudaFilterModeLinear;

static cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32,0,0,0, cudaChannelFormatKindFloat);
cudaExtent volExtent = make_cudaExtent(size[0], size[1], size[2]);

// Allocate an intermediate memory space to extract the components of the input volume
float *singleComponent;
int numel = size[0] * size[1] * size[2];
cudaMalloc(&singleComponent, numel * sizeof(float));
float one = 1.0;

// Copy image data to arrays. The tricky part is the make_cudaPitchedPtr.
// The best way to understand it is to read
// http://stackoverflow.com/questions/16119943/how-and-when-should-i-use-pitched-pointer-with-the-cuda-api
for (unsigned int component = 0; component < nComponents; component++)
{
// Reset the intermediate memory
cudaMemset((void *)singleComponent, 0, numel * sizeof(float));

// Fill it with the current component
float * pComponent = dev_ptr + component;
cublasSaxpy(handle, numel, &one, pComponent, nComponents, singleComponent, 1);

// Allocate the cudaArray. Projections use layered arrays, volumes use default 3D arrays
if (isProjections)
cudaMalloc3DArray((cudaArray**)& componentArrays[component], &channelDesc, volExtent, cudaArrayLayered);
else
cudaMalloc3DArray((cudaArray**)& componentArrays[component], &channelDesc, volExtent);

// Fill it with the current singleComponent
cudaMemcpy3DParms CopyParams = {0};
CopyParams.srcPtr = make_cudaPitchedPtr(singleComponent, size[0] * sizeof(float), size[0], size[1]);
CopyParams.dstArray = (cudaArray*) componentArrays[component];
CopyParams.extent = volExtent;
CopyParams.kind = cudaMemcpyDeviceToDevice;
cudaMemcpy3D(&CopyParams);
CUDA_CHECK_ERROR;

// Fill in the texture object with all this information
resDesc.res.array.array = componentArrays[component];
cudaCreateTextureObject(&tex[component], &resDesc, &texDesc, NULL);
CUDA_CHECK_ERROR;
}

// Intermediate memory is no longer needed
cudaFree(singleComponent);

// Destroy CUBLAS context
cublasDestroy(handle);
}

//static cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc<float>();
#endif
4 changes: 2 additions & 2 deletions include/rtkIterativeConeBeamReconstructionFilter.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ namespace rtk
bp = rtk::JosephBackProjectionImageFilter<itk::CudaImage<float, 3>, itk::CudaImage<float, 3> >::New();
break;
case(BP_CUDAVOXELBASED):
bp = rtk::CudaBackProjectionImageFilter::New();
bp = rtk::CudaBackProjectionImageFilter<itk::CudaImage<float, 3>>::New();
break;
case(BP_CUDARAYCAST):
bp = rtk::CudaRayCastBackProjectionImageFilter::New();
Expand All @@ -140,7 +140,7 @@ namespace rtk
bp = rtk::JosephBackProjectionImageFilter<itk::CudaImage<float, 3>, itk::CudaImage<float, 3> >::New();
break;
case(BP_CUDAVOXELBASED):
bp = rtk::CudaBackProjectionImageFilter::New();
bp = rtk::CudaBackProjectionImageFilter<itk::CudaImage<float, 3>>::New();
break;
case(BP_CUDARAYCAST):
bp = rtk::CudaRayCastBackProjectionImageFilter::New();
Expand Down
1 change: 0 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ if (RTK_USE_CUDA)

set(RTK_SRCS ${RTK_SRCS}
rtkCudaAverageOutOfROIImageFilter.cxx
rtkCudaBackProjectionImageFilter.cxx
rtkCudaConjugateGradientImageFilter_3f.cxx
rtkCudaConjugateGradientImageFilter_4f.cxx
rtkCudaConstantVolumeSeriesSource.cxx
Expand Down
Loading

0 comments on commit 1e75fbc

Please sign in to comment.