From 4a9d474fbeb2801fc47d07b1e4f627dd740563d3 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Wed, 23 May 2018 16:21:20 +0200 Subject: [PATCH] Clean up some GPU- and non-GPU-related pixel tracking code (#49) - clean up GPU- and non-GPU-related pixel tracking code and build files - drop obsolete or unused classes - rename RawToDigiGPU to SiPixelRawToDigiGPUKernel - move the definitions in SiPixelRawToDigiGPUKernel to the `pixelgpudetails` namespace --- .../SiPixelRawToDigi/plugins/BuildFile.xml | 4 +- .../SiPixelRawToDigi/plugins/DetParamBits.h | 39 - .../SiPixelRawToDigi/plugins/EventInfoGPU.h | 11 - .../SiPixelRawToDigi/plugins/RawToDigiGPU.cu | 693 ----------------- .../SiPixelRawToDigi/plugins/RawToDigiGPU.h | 216 ------ .../SiPixelRawToDigi/plugins/SealModule.cc | 8 - .../plugins/SiPixelDigiToRaw.cc | 5 +- .../plugins/SiPixelFedCablingMapGPU.cc | 45 +- .../plugins/SiPixelFedCablingMapGPU.h | 54 +- .../plugins/SiPixelRawToDigi.cc | 4 + .../plugins/SiPixelRawToDigiGPU.cc | 44 +- .../plugins/SiPixelRawToDigiGPU.h | 32 +- .../plugins/SiPixelRawToDigiGPUKernel.cu | 696 ++++++++++++++++++ .../plugins/SiPixelRawToDigiGPUKernel.h | 212 ++++++ .../SiPixelRecHits/interface/pixelCPEforGPU.h | 317 ++++---- .../SiPixelRecHits/plugins/PixelRecHits.cu | 4 +- .../SiPixelRecHits/plugins/PixelRecHits.h | 6 +- .../plugins/SiPixelRecHitGPU.cc | 18 +- .../SiPixelRecHits/plugins/gpuPixelRecHits.h | 96 ++- 19 files changed, 1221 insertions(+), 1283 deletions(-) delete mode 100644 EventFilter/SiPixelRawToDigi/plugins/DetParamBits.h delete mode 100644 EventFilter/SiPixelRawToDigi/plugins/EventInfoGPU.h delete mode 100644 EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.cu delete mode 100644 EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.h delete mode 100644 EventFilter/SiPixelRawToDigi/plugins/SealModule.cc create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.cu create mode 100644 EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.h diff --git a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml index 790b772fc6feb..3e047e2f90b57 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml +++ b/EventFilter/SiPixelRawToDigi/plugins/BuildFile.xml @@ -1,8 +1,8 @@ - + - + diff --git a/EventFilter/SiPixelRawToDigi/plugins/DetParamBits.h b/EventFilter/SiPixelRawToDigi/plugins/DetParamBits.h deleted file mode 100644 index 8cc944c3c7f25..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/DetParamBits.h +++ /dev/null @@ -1,39 +0,0 @@ -// Sushil Dubey, Shashi Dugad, TIFR -#ifndef DETPARAMBITS_H -#define DETPARAMBITS_H -typedef unsigned int uint; -//reference -//http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/d3/db2/PixelROC_8cc_source.html#l00197 -const uint layerStartBit_ = 20; -const uint ladderStartBit_ = 12; -const uint moduleStartBit_ = 2; - -const uint panelStartBit_ = 10; -const uint diskStartBit_ = 18; -const uint bladeStartBit_ = 12; - -const uint layerMask_ = 0xF; -const uint ladderMask_ = 0xFF; -const uint moduleMask_ = 0x3FF; -const uint panelMask_ = 0x3; -const uint diskMask_ = 0xF; -const uint bladeMask_ = 0x3F; - -// __host__ __device__ bool isBarrel(uint rawId) { -// return (1==((rawId>>25)&0x7)); -// } - -__host__ __device__ int getLayer(uint rawId) { - int layer = (rawId >> layerStartBit_) & layerMask_; - return layer; -} - -__host__ __device__ int getDisk(uint rawId) { - // int side =1; - // unsigned int panel = ((rawId>>panelStartBit_) & panelMask_); - // if(panel==1) side = -1; - unsigned int disk = int((rawId>>diskStartBit_) & diskMask_); - // return disk*side; - return disk; -} -#endif diff --git a/EventFilter/SiPixelRawToDigi/plugins/EventInfoGPU.h b/EventFilter/SiPixelRawToDigi/plugins/EventInfoGPU.h deleted file mode 100644 index 1170717e4adf8..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/EventInfoGPU.h +++ /dev/null @@ -1,11 +0,0 @@ -/*Sushil Dubey, Shashi Dugad, TIFR -*/ - -#ifndef EVENTINFO_GPU -#define EVENTINFO_GPU - -const int NEVENT = 1 ; //optimal number of events to run simultaneously, -// using 4 cuda stream, hence it should be multiple of 4 -const int NMODULE = 1856; // for phase 1, we have 1856 modules - -#endif diff --git a/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.cu b/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.cu deleted file mode 100644 index 88f68f460441e..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.cu +++ /dev/null @@ -1,693 +0,0 @@ -/* Sushil Dubey, Shashi Dugad, TIFR, July 2017 - * - * File Name: RawToDigiGPU.cu - * Description: It converts Raw data into Digi Format on GPU - * then it converts adc -> electron and - * applies the adc threshold to needed for clustering - * Finaly the Output of RawToDigi data is given to pixelClusterizer - * -**/ - -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" -#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" - - - -// System includes -#include -#include -#include -#include -#include -#include -#include -#include - -// CUDA runtime -#include -#include -#include -#include -#include -#include -#include - -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "EventInfoGPU.h" -#include "RawToDigiGPU.h" -#include "SiPixelFedCablingMapGPU.h" - -context initDeviceMemory() { - - using namespace gpuClustering; - context c; - - // Number of words for all the feds - constexpr uint32_t MAX_WORD08_SIZE = MAX_FED * MAX_WORD * sizeof(uint8_t); - constexpr uint32_t MAX_WORD32_SIZE = MAX_FED * MAX_WORD * sizeof(uint32_t); - constexpr uint32_t MAX_WORD16_SIZE = MAX_FED * MAX_WORD * sizeof(uint16_t); - constexpr uint32_t vsize = sizeof(GPU::SimpleVector); - constexpr uint32_t esize = sizeof(error_obj); - constexpr uint32_t MAX_ERROR_SIZE = MAX_FED * MAX_WORD * esize; - - cudaCheck(cudaMalloc((void**) & c.word_d, MAX_WORD32_SIZE)); - cudaCheck(cudaMalloc((void**) & c.fedId_d, MAX_WORD08_SIZE)); - cudaCheck(cudaMalloc((void**) & c.pdigi_d, MAX_WORD32_SIZE)); // to store thepacked digi - cudaCheck(cudaMalloc((void**) & c.xx_d, MAX_WORD16_SIZE)); // to store the x and y coordinate - cudaCheck(cudaMalloc((void**) & c.yy_d, MAX_WORD16_SIZE)); - cudaCheck(cudaMalloc((void**) & c.adc_d, MAX_WORD16_SIZE)); - - cudaCheck(cudaMalloc((void**) & c.moduleInd_d, MAX_WORD16_SIZE)); - cudaCheck(cudaMalloc((void**) & c.rawIdArr_d, MAX_WORD32_SIZE)); - cudaCheck(cudaMalloc((void**) & c.error_d, vsize)); - cudaCheck(cudaMalloc((void**) & c.data_d, MAX_ERROR_SIZE)); - - // for the clusterizer - cudaCheck(cudaMalloc((void**) & c.clus_d, MAX_WORD32_SIZE)); // cluser index in module - - cudaCheck(cudaMalloc((void**) & c.moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); - cudaCheck(cudaMalloc((void**) & c.clusInModule_d, (MaxNumModules)*sizeof(uint32_t) )); - cudaCheck(cudaMalloc((void**) & c.moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); - - cudaCheck(cudaMalloc((void**) & c.debug_d, MAX_WORD32_SIZE)); - - // create a CUDA stream - cudaCheck(cudaStreamCreate(&c.stream)); - - return c; -} - - -void freeMemory(context & c) { - // free the GPU memory - cudaCheck(cudaFree(c.word_d)); - cudaCheck(cudaFree(c.fedId_d)); - cudaCheck(cudaFree(c.pdigi_d)); - cudaCheck(cudaFree(c.xx_d)); - cudaCheck(cudaFree(c.yy_d)); - cudaCheck(cudaFree(c.adc_d)); - cudaCheck(cudaFree(c.moduleInd_d)); - cudaCheck(cudaFree(c.rawIdArr_d)); - cudaCheck(cudaFree(c.error_d)); - cudaCheck(cudaFree(c.data_d)); - - // these are for the clusterizer (to be moved) - cudaCheck(cudaFree(c.moduleStart_d)); - cudaCheck(cudaFree(c.clus_d)); - cudaCheck(cudaFree(c.clusInModule_d)); - cudaCheck(cudaFree(c.moduleId_d)); - cudaCheck(cudaFree(c.debug_d)); - - - // destroy the CUDA stream - cudaCheck(cudaStreamDestroy(c.stream)); -} - - -__device__ uint32_t getLink(uint32_t ww) { - return ((ww >> LINK_shift) & LINK_mask); -} - - -__device__ uint32_t getRoc(uint32_t ww) { - return ((ww >> ROC_shift ) & ROC_mask); -} - - -__device__ uint32_t getADC(uint32_t ww) { - return ((ww >> ADC_shift) & ADC_mask); -} - - -__device__ bool isBarrel(uint32_t rawId) { - return (1==((rawId>>25)&0x7)); -} - - - -__device__ DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) { - uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; - DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] }; - return detId; -} - - -//reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html -//http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 -// Convert local pixel to global pixel -__device__ Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, Pixel local) { - - int slopeRow = 0, slopeCol = 0; - int rowOffset = 0, colOffset = 0; - - if (bpix) { - - if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*numColsInRoc-1; - } - else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*numColsInRoc; - } // if roc - } - else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 - if (rocIdInDetUnit < 8) { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*numRowsInRoc-1; - colOffset = rocIdInDetUnit * numColsInRoc; - } - else { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (16-rocIdInDetUnit)*numColsInRoc-1; - } - } - - } - else { // fpix - if (side==-1) { // pannel 1 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*numColsInRoc-1; - } - else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*numColsInRoc; - } - } - else { // pannel 2 - if (rocIdInDetUnit < 8) { - slopeRow = 1; - slopeCol = -1; - rowOffset = 0; - colOffset = (8-rocIdInDetUnit)*numColsInRoc-1; - } - else { - slopeRow = -1; - slopeCol = 1; - rowOffset = 2*numRowsInRoc-1; - colOffset = (rocIdInDetUnit-8)*numColsInRoc; - } - - } // side - - } - - uint32_t gRow = rowOffset+slopeRow*local.row; - uint32_t gCol = colOffset+slopeCol*local.col; - //printf("Inside frameConversion row: %u, column: %u\n",gRow, gCol); - Pixel global = {gRow, gCol}; - return global; -} - - -__device__ uint32_t conversionError(uint32_t fedId, uint32_t status, bool debug = false) -{ - - uint32_t errorType = 0; - - // debug = true; - - switch (status) { - case(1) : { - if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); - errorType = 35; - break; - } - case(2) : { - if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); - errorType = 36; - break; - } - case(3) : { - if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); - errorType = 37; - break; - } - case(4) : { - if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); - errorType = 38; - break; - } - default: if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); - }; - - return errorType; - -} - - -__device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) -{ - uint32_t numRowsInRoc = 80; - uint32_t numColsInRoc = 52; - - /// row and collumn in ROC representation - return ( (rocRow < numRowsInRoc) & (rocCol < numColsInRoc) ); -} - - -__device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) -{ - return ( (dcol < 26) & (2 <= pxid) & (pxid < 162) ); -} - - -__device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false) -{ - - int errorType = (errorWord >> ROC_shift) & ERROR_mask; - if (errorType < 25) return false; - bool errorFound = false; - - switch (errorType) { - case(25) : { - errorFound = true; - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; - if (index > 1 && index <= Map->size){ - if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; - } - if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); - break; - } - case(26) : { - if (debug) printf("Gap word found (errorType = 26)\n"); - errorFound = true; - break; - } - case(27) : { - if (debug) printf("Dummy word found (errorType = 27)\n"); - errorFound = true; - break; - } - case(28) : { - if (debug) printf("Error fifo nearly full (errorType = 28)\n"); - errorFound = true; - break; - } - case(29) : { - if (debug) printf("Timeout on a channel (errorType = 29)\n"); - if ((errorWord >> OMIT_ERR_shift) & OMIT_ERR_mask) { - if (debug) printf("...first errorType=29 error, this gets masked out\n"); - } - errorFound = true; - break; - } - case(30) : { - if (debug) printf("TBM error trailer (errorType = 30)\n"); - int StateMatch_bits = 4; - int StateMatch_shift = 8; - uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); - int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; - if ( StateMatch != 1 && StateMatch != 8 ) { - if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); - } - if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 - errorFound = true; - break; - } - case(31) : { - if (debug) printf("Event number error (errorType = 31)\n"); - errorFound = true; - break; - } - default: errorFound = false; - - }; - - return errorFound? errorType : 0; - -} - - -__device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false) -{ - - uint32_t rID = 0xffffffff; - - switch (errorType) { - case 25 : case 30 : case 31 : case 36 : case 40 : { - //set dummy values for cabling just to get detId from link - //cabling.dcol = 0; - //cabling.pxid = 2; - uint32_t roc = 1; - uint32_t link = (errWord >> LINK_shift) & LINK_mask; - - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; - break; - } - case 29 : { - int chanNmbr = 0; - const int DB0_shift = 0; - const int DB1_shift = DB0_shift + 1; - const int DB2_shift = DB1_shift + 1; - const int DB3_shift = DB2_shift + 1; - const int DB4_shift = DB3_shift + 1; - const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); - - int CH1 = (errWord >> DB0_shift) & DataBit_mask; - int CH2 = (errWord >> DB1_shift) & DataBit_mask; - int CH3 = (errWord >> DB2_shift) & DataBit_mask; - int CH4 = (errWord >> DB3_shift) & DataBit_mask; - int CH5 = (errWord >> DB4_shift) & DataBit_mask; - int BLOCK_bits = 3; - int BLOCK_shift = 8; - uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); - int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; - int localCH = 1*CH1+2*CH2+3*CH3+4*CH4+5*CH5; - if (BLOCK%2==0) chanNmbr=(BLOCK/2)*9+localCH; - else chanNmbr = ((BLOCK-1)/2)*9+4+localCH; - if ((chanNmbr < 1)||(chanNmbr > 36)) break; // signifies unexpected result - - // set dummy values for cabling just to get detId from link if in Barrel - //cabling.dcol = 0; - //cabling.pxid = 2; - uint32_t roc = 1; - uint32_t link = chanNmbr; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; - break; - } - case 37 : case 38: { - //cabling.dcol = 0; - //cabling.pxid = 2; - uint32_t roc = (errWord >> ROC_shift) & ROC_mask; - uint32_t link = (errWord >> LINK_shift) & LINK_mask; - uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; - if(rID_temp != 9999) rID = rID_temp; - break; - } - - default : break; - - }; - - return rID; - -} - - -/*---------- -* Name: applyADCthreshold_kernel() -* Desc: converts adc count to electrons and then applies the -* threshold on each channel. -* make pixel to 0 if it is below the threshold -* Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold -*----------- -* Output: xx_adc[], yy_adc[] with pixel threshold applied -*/ -// kernel to apply adc threshold on the channels - - -// Felice: gains and pedestals are not the same for each pixel. This code should be rewritten to take -// in account local gains/pedestals -// __global__ void applyADCthreshold_kernel(const uint32_t *xx_d, const uint32_t *yy_d, const uint32_t *layer_d, uint32_t *adc, const uint32_t wordCounter, -// const ADCThreshold adcThreshold, uint32_t *xx_adc, uint32_t *yy_adc ) { -// int tid = threadIdx.x; -// int gIndex = blockDim.x*blockIdx.x+tid; -// if (gIndex=adcThreshold.theFirstStack_) { -// if (adcThreshold.theStackADC_==1 && adcOld==1) { -// adcNew = int(255*135); // Arbitrarily use overflow value. -// } -// if (adcThreshold.theStackADC_ >1 && adcThreshold.theStackADC_!=255 && adcOld>=1){ -// adcNew = int((adcOld-1) * gain * 255/float(adcThreshold.theStackADC_-1)); -// } -// } -// -// if (adcNew >adcThreshold.thePixelThreshold ) { -// xx_adc[gIndex]=xx_d[gIndex]; -// yy_adc[gIndex]=yy_d[gIndex]; -// } -// else { -// xx_adc[gIndex]=0; // 0: dead pixel -// yy_adc[gIndex]=0; -// } -// adc[gIndex] = adcNew; -// } -// } - - -// Kernel to perform Raw to Digi conversion -__global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds, - uint16_t * XX, uint16_t * YY, uint16_t * ADC, - uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId, - GPU::SimpleVector *err, - bool useQualityInfo, bool includeErrors, bool debug) -{ - uint32_t blockId = blockIdx.x; - uint32_t threadId = threadIdx.x; - - bool skipROC = false; - //if (threadId==0) printf("Event: %u blockId: %u start: %u end: %u\n", eventno, blockId, begin, end); - - for (int aaa=0; aaa<1; ++aaa) { // too many coninue below.... (to be fixed) - auto gIndex = threadId + blockId*blockDim.x; - if (gIndex < wordCounter) { - - uint32_t fedId = fedIds[gIndex/2]; // +1200; - - // initialize (too many coninue below) - pdigi[gIndex] = 0; - rawIdArr[gIndex] = 0; - moduleId[gIndex] = 9999; - - uint32_t ww = Word[gIndex]; // Array containing 32 bit raw data - if (ww == 0) { - //noise and dead channels are ignored - XX[gIndex] = 0; // 0 is an indicator of a noise/dead channel - YY[gIndex] = 0; // skip these pixels during clusterization - ADC[gIndex] = 0; - continue ; // 0: bad word - } - - uint32_t link = getLink(ww); // Extract link - uint32_t roc = getRoc(ww); // Extract Roc in link - DetIdGPU detId = getRawId(Map, fedId, link, roc); - - uint32_t errorType = checkROC(ww, fedId, link, Map, debug); - skipROC = (roc < maxROCIndex) ? false : (errorType != 0); - if (includeErrors and skipROC) - { - uint32_t rID = getErrRawID(fedId, ww, errorType, Map, debug); - err->emplace_back(rID, ww, errorType, fedId); - continue; - } - - uint32_t rawId = detId.RawId; - uint32_t rocIdInDetUnit = detId.rocInDet; - bool barrel = isBarrel(rawId); - - uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; - if (useQualityInfo) { - - skipROC = Map->badRocs[index]; - if (skipROC) continue; - - } - skipROC = Map->modToUnp[index]; - if (skipROC) continue; - - uint32_t layer = 0;//, ladder =0; - int side = 0, panel = 0, module = 0;//disk = 0,blade = 0 - - if (barrel) - { - layer = (rawId >> layerStartBit_) & layerMask_; - module = (rawId >> moduleStartBit_) & moduleMask_; - side = (module < 5)? -1 : 1; - } - else { - // endcap ids - layer = 0; - panel = (rawId >> panelStartBit_) & panelMask_; - //disk = (rawId >> diskStartBit_) & diskMask_ ; - side = (panel == 1)? -1 : 1; - //blade = (rawId>>bladeStartBit_) & bladeMask_; - } - - // ***special case of layer to 1 be handled here - Pixel localPix; - if (layer == 1) { - uint32_t col = (ww >> COL_shift) & COL_mask; - uint32_t row = (ww >> ROW_shift) & ROW_mask; - localPix.row = row; - localPix.col = col; - if (includeErrors) { - if (not rocRowColIsValid(row, col)) { - uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays - err->emplace_back(rawId, ww, error, fedId); - if(debug) printf("BPIX1 Error status: %i\n", error); - continue; - } - } - } else { - // ***conversion rules for dcol and pxid - uint32_t dcol = (ww >> DCOL_shift) & DCOL_mask; - uint32_t pxid = (ww >> PXID_shift) & PXID_mask; - uint32_t row = numRowsInRoc - pxid/2; - uint32_t col = dcol*2 + pxid%2; - localPix.row = row; - localPix.col = col; - if (includeErrors and not dcolIsValid(dcol, pxid)) { - uint32_t error = conversionError(fedId, 3, debug); - err->emplace_back(rawId, ww, error, fedId); - if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); - continue; - } - } - - Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); - XX[gIndex] = globalPix.row ; // origin shifting by 1 0-159 - YY[gIndex] = globalPix.col ; // origin shifting by 1 0-415 - ADC[gIndex] = getADC(ww); - pdigi[gIndex] = pack(globalPix.row,globalPix.col,ADC[gIndex]); - moduleId[gIndex] = detId.moduleId; - rawIdArr[gIndex] = rawId; - } // end of if (gIndex < end) - } // end fake loop -} // end of Raw to Digi kernel - - -// kernel wrapper called from runRawToDigi_kernel -void RawToDigi_wrapper( - context & c, - const SiPixelFedCablingMapGPU* cablingMapDevice, SiPixelGainForHLTonGPU * const ped, - const uint32_t wordCounter, uint32_t *word, const uint32_t fedCounter, uint8_t *fedId_h, - bool convertADCtoElectrons, - uint32_t * pdigi_h, uint32_t *rawIdArr_h, - GPU::SimpleVector *error_h, GPU::SimpleVector *error_h_tmp, error_obj *data_h, - uint16_t * adc_h, int32_t * clus_h, - bool useQualityInfo, bool includeErrors, bool debug, uint32_t & nModulesActive) -{ - const int threadsPerBlock = 512; - const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all - - - assert(0 == wordCounter%2); - // wordCounter is the total no of words in each event to be trasfered on device - cudaCheck(cudaMemcpyAsync(&c.word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - cudaCheck(cudaMemcpyAsync(&c.fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t)/2, cudaMemcpyDefault, c.stream)); - - constexpr uint32_t vsize = sizeof(GPU::SimpleVector); - constexpr uint32_t esize = sizeof(error_obj); - cudaCheck(cudaMemcpyAsync(c.error_d, error_h_tmp, vsize, cudaMemcpyDefault, c.stream)); - - // Launch rawToDigi kernel - RawToDigi_kernel<<>>( - cablingMapDevice, - wordCounter, - c.word_d, - c.fedId_d, - c.xx_d, c.yy_d, c.adc_d, - c.pdigi_d, - c.rawIdArr_d, - c.moduleInd_d, - c.error_d, - useQualityInfo, - includeErrors, - debug); - cudaCheck(cudaGetLastError()); - - // copy data to host variable - - cudaCheck(cudaMemcpyAsync(pdigi_h, c.pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - cudaCheck(cudaMemcpyAsync(rawIdArr_h, c.rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - - if (includeErrors) { - cudaCheck(cudaMemcpyAsync(error_h, c.error_d, vsize, cudaMemcpyDefault, c.stream)); - cudaStreamSynchronize(c.stream); - error_h->set_data(data_h); - int size = error_h->size(); - cudaCheck(cudaMemcpyAsync(data_h, c.data_d, size*esize, cudaMemcpyDefault, c.stream)); - } - // End of Raw2Digi and passing data for cluserisation - - { - // clusterizer ... - using namespace gpuClustering; - int threadsPerBlock = 256; - int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; - - - assert(ped); - gpuCalibPixel::calibDigis<<>>( - c.moduleInd_d, - c.xx_d, c.yy_d, c.adc_d, - ped, - wordCounter - ); - - cudaCheck(cudaGetLastError()); - - // calibrated adc - cudaCheck(cudaMemcpyAsync(adc_h, c.adc_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - - /* - std::cout - << "CUDA countModules kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; - */ - - uint32_t nModules=0; - cudaCheck(cudaMemcpyAsync(c.moduleStart_d, &nModules, sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - - countModules<<>>(c.moduleInd_d, c.moduleStart_d, c.clus_d, wordCounter); - cudaCheck(cudaGetLastError()); - - cudaCheck(cudaMemcpyAsync(&nModules, c.moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - - // std::cout << "found " << nModules << " Modules active" << std::endl; - - - threadsPerBlock = 256; - blocks = nModules; - - /* - std::cout - << "CUDA findClus kernel launch with " << blocks - << " blocks of " << threadsPerBlock << " threads\n"; - */ - - cudaCheck(cudaMemsetAsync(c.clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t),c.stream)); - - findClus<<>>( - c.moduleInd_d, - c.xx_d, c.yy_d, c.adc_d, - c.moduleStart_d, - c.clusInModule_d, c.moduleId_d, - c.clus_d, - c.debug_d, - wordCounter - ); - - // clusters - cudaCheck(cudaMemcpyAsync(clus_h, c.clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); - - - cudaStreamSynchronize(c.stream); - cudaCheck(cudaGetLastError()); - - nModulesActive = nModules; - - } // end clusterizer scope - -} diff --git a/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.h b/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.h deleted file mode 100644 index 44a820db45aba..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.h +++ /dev/null @@ -1,216 +0,0 @@ -/*Sushil Dubey, Shashi Dugad, TIFR - * - */ - -#ifndef RAWTODIGIGPU_H -#define RAWTODIGIGPU_H - -#include - -#include "SiPixelFedCablingMapGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" -#include - -const uint32_t layerStartBit_ = 20; -const uint32_t ladderStartBit_ = 12; -const uint32_t moduleStartBit_ = 2; - -const uint32_t panelStartBit_ = 10; -const uint32_t diskStartBit_ = 18; -const uint32_t bladeStartBit_ = 12; - -const uint32_t layerMask_ = 0xF; -const uint32_t ladderMask_ = 0xFF; -const uint32_t moduleMask_ = 0x3FF; -const uint32_t panelMask_ = 0x3; -const uint32_t diskMask_ = 0xF; -const uint32_t bladeMask_ = 0x3F; - -const uint32_t LINK_bits = 6; -const uint32_t ROC_bits = 5; -const uint32_t DCOL_bits = 5; -const uint32_t PXID_bits = 8; -const uint32_t ADC_bits = 8; -// special for layer 1 -const uint32_t LINK_bits1 = 6; -const uint32_t ROC_bits1 = 5; -const uint32_t COL_bits1_l1 = 6; -const uint32_t ROW_bits1_l1 = 7; -const uint32_t OMIT_ERR_bits = 1; - -const uint32_t maxROCIndex = 8; -const uint32_t numRowsInRoc = 80; -const uint32_t numColsInRoc = 52; - -const uint32_t MAX_WORD = 2000; - -const uint32_t ADC_shift = 0; -const uint32_t PXID_shift = ADC_shift + ADC_bits; -const uint32_t DCOL_shift = PXID_shift + PXID_bits; -const uint32_t ROC_shift = DCOL_shift + DCOL_bits; -const uint32_t LINK_shift = ROC_shift + ROC_bits1; -// special for layer 1 ROC -const uint32_t ROW_shift = ADC_shift + ADC_bits; -const uint32_t COL_shift = ROW_shift + ROW_bits1_l1; -const uint32_t OMIT_ERR_shift = 20; - -const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits1); -const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits1); -const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits1_l1); -const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits1_l1); -const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); -const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); -const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); -const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits1); -const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); - -struct DetIdGPU { - uint32_t RawId; - uint32_t rocInDet; - uint32_t moduleId; -}; - -struct Pixel { - uint32_t row; - uint32_t col; -}; - - -namespace gpudetails{ - -class Packing { - public: - using PackedDigiType = uint32_t; - - // Constructor: pre-computes masks and shifts from field widths -__host__ __device__ -inline - constexpr Packing(unsigned int row_w, unsigned int column_w, - unsigned int time_w, unsigned int adc_w) : - row_width(row_w), column_width(column_w), adc_width(adc_w) - ,row_shift(0) - ,column_shift(row_shift + row_w) - ,time_shift(column_shift + column_w) - ,adc_shift(time_shift + time_w) - ,row_mask(~(~0U << row_w)) - ,column_mask( ~(~0U << column_w)) - ,time_mask(~(~0U << time_w)) - ,adc_mask(~(~0U << adc_w)) - ,rowcol_mask(~(~0U << (column_w+row_w))) - ,max_row(row_mask) - ,max_column(column_mask) - ,max_adc(adc_mask){} - - - uint32_t row_width; - uint32_t column_width; - uint32_t adc_width; - - uint32_t row_shift; - uint32_t column_shift; - uint32_t time_shift; - uint32_t adc_shift; - - PackedDigiType row_mask; - PackedDigiType column_mask; - PackedDigiType time_mask; - PackedDigiType adc_mask; - PackedDigiType rowcol_mask; - - - uint32_t max_row; - uint32_t max_column; - uint32_t max_adc; - }; - - -// const PixelChannelIdentifier::Packing PixelChannelIdentifier::thePacking( 11, 11, 0, 10); // row, col, time, adc - - -__host__ __device__ -inline -constexpr gpudetails::Packing packing() { return gpudetails::Packing(11, 11, 0, 10);} - -} - -// constexpr Packing thePacking = packing(); - -__host__ __device__ -inline uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { - constexpr gpudetails::Packing thePacking = gpudetails::packing(); - adc = std::min(adc, thePacking.max_adc); - - return (row << thePacking.row_shift) | - (col << thePacking.column_shift) | - (adc << thePacking.adc_shift); - -} - -struct error_obj { - uint32_t rawId; - uint32_t word; - unsigned char errorType; - unsigned char fedId; - __host__ __device__ error_obj(uint32_t a_, uint32_t b_, unsigned char c_, unsigned char d_): - rawId(a_), word(b_), errorType(c_), fedId(d_) {} -}; - -// configuration and memory buffers alocated on the GPU -struct context { - cudaStream_t stream; - - uint32_t * word_d; - uint8_t * fedId_d; - uint32_t * pdigi_d; - uint16_t * xx_d; - uint16_t * yy_d; - uint16_t * adc_d; - - uint16_t * moduleInd_d; - uint32_t * rawIdArr_d; - - GPU::SimpleVector * error_d; - error_obj * data_d; - - // these are for the clusterizer (to be moved) - uint32_t * moduleStart_d; - int32_t * clus_d; - uint32_t * clusInModule_d; - uint32_t * moduleId_d; - - uint32_t * debug_d; -}; - - -// wrapper function to call RawToDigi on the GPU from host side -void RawToDigi_wrapper(context &, const SiPixelFedCablingMapGPU* cablingMapDevice, - SiPixelGainForHLTonGPU * const ped, - const uint32_t wordCounter, uint32_t *word, - const uint32_t fedCounter, uint8_t *fedId_h, - bool convertADCtoElectrons, uint32_t * pdigi_h, - uint32_t *rawIdArr_h, GPU::SimpleVector *error_h, - GPU::SimpleVector *error_h_tmp, error_obj *data_h, - uint16_t * adc_h, int32_t * clus_h, - bool useQualityInfo, bool includeErrors, bool debug, - uint32_t & nModulesActive); - -// void initCablingMap(); -context initDeviceMemory(); -void freeMemory(context &); - -// reference cmssw/RecoLocalTracker/SiPixelClusterizer -// all are runtime const, should be specified in python _cfg.py -struct ADCThreshold { - const int thePixelThreshold = 1000; // default Pixel threshold in electrons - const int theSeedThreshold = 1000; //seed thershold in electrons not used in our algo - const float theClusterThreshold = 4000; // Cluster threshold in electron - const int ConversionFactor = 65; // adc to electron conversion factor - - // following are the default value - // it should be i python script - const int theStackADC_ = 255; // the maximum adc count for stack layer - const int theFirstStack_ = 5; // the index of the fits stack layer - const double theElectronPerADCGain_ = 600; //ADC to electron conversion -}; - -#endif diff --git a/EventFilter/SiPixelRawToDigi/plugins/SealModule.cc b/EventFilter/SiPixelRawToDigi/plugins/SealModule.cc deleted file mode 100644 index 30ed2eb6e8850..0000000000000 --- a/EventFilter/SiPixelRawToDigi/plugins/SealModule.cc +++ /dev/null @@ -1,8 +0,0 @@ -#include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Framework/interface/MakerMacros.h" - -#include "SiPixelRawToDigi.h" -#include "SiPixelDigiToRaw.h" - -DEFINE_FWK_MODULE(SiPixelDigiToRaw); -DEFINE_FWK_MODULE(SiPixelRawToDigi); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiToRaw.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiToRaw.cc index cb0999d51c4db..cd6199e05ae9c 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiToRaw.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelDigiToRaw.cc @@ -147,5 +147,6 @@ void SiPixelDigiToRaw::produce( edm::Event& ev, } -// ----------------------------------------------------------------------------- - +// declare this as a framework plugin +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelDigiToRaw); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.cc index e96af4789b1ce..16fa6281e5c7b 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.cc @@ -1,3 +1,4 @@ +// C++ includes #include #include #include @@ -5,21 +6,51 @@ #include #include +// CUDA includes #include -#include "SiPixelFedCablingMapGPU.h" - -#include "FWCore/MessageLogger/interface/MessageLogger.h" +// CMSSW includes #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" +#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" #include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" #include "Geometry/CommonDetUnit/interface/GeomDetType.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelGainCalibrationForHLT.h" -#include "CondFormats/SiPixelObjects/interface/SiPixelGainForHLTonGPU.h" +// local includes +#include "SiPixelFedCablingMapGPU.h" + +void allocateCablingMap(SiPixelFedCablingMapGPU* & cablingMapHost, SiPixelFedCablingMapGPU* & cablingMapDevice) { + cudaCheck(cudaMallocHost((void**) & cablingMapHost, sizeof(SiPixelFedCablingMapGPU))); + cudaCheck(cudaMalloc((void**) & cablingMapDevice, sizeof(SiPixelFedCablingMapGPU))); + cudaCheck(cudaMalloc((void**) & cablingMapHost->fed, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->link, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->roc, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->RawId, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->rocInDet, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->moduleId, MAX_SIZE_BYTE_INT)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->badRocs, MAX_SIZE_BYTE_BOOL)); + cudaCheck(cudaMalloc((void**) & cablingMapHost->modToUnp, MAX_SIZE_BYTE_BOOL)); + cudaCheck(cudaMemcpy(cablingMapDevice, cablingMapHost, sizeof(SiPixelFedCablingMapGPU), cudaMemcpyDefault)); +} + +void deallocateCablingMap(SiPixelFedCablingMapGPU* cablingMapHost, SiPixelFedCablingMapGPU* cablingMapDevice) { + cudaCheck(cudaFree(cablingMapHost->fed)); + cudaCheck(cudaFree(cablingMapHost->link)); + cudaCheck(cudaFree(cablingMapHost->roc)); + cudaCheck(cudaFree(cablingMapHost->RawId)); + cudaCheck(cudaFree(cablingMapHost->rocInDet)); + cudaCheck(cudaFree(cablingMapHost->moduleId)); + cudaCheck(cudaFree(cablingMapHost->modToUnp)); + cudaCheck(cudaFree(cablingMapHost->badRocs)); + cudaCheck(cudaFree(cablingMapDevice)); + cudaCheck(cudaFreeHost(cablingMapHost)); +} -#include "HeterogeneousCore/CUDAUtilities/interface/CUDAHostAllocator.h" void processCablingMap(SiPixelFedCablingMap const& cablingMap, TrackerGeometry const& trackerGeom, SiPixelFedCablingMapGPU* cablingMapHost, SiPixelFedCablingMapGPU* cablingMapDevice, diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.h b/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.h index 3cd35f54480d5..27355cb9d96c7 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.h +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelFedCablingMapGPU.h @@ -1,10 +1,9 @@ -#ifndef SiPixelFedCablingMapGPU_h -#define SiPixelFedCablingMapGPU_h +#ifndef EventFilter_SiPixelRawToDigi_plugins_SiPixelFedCablingMapGPU_h +#define EventFilter_SiPixelRawToDigi_plugins_SiPixelFedCablingMapGPU_h +// C++ includes #include -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - class SiPixelFedCablingMap; class SiPixelQuality; class TrackerGeometry; @@ -34,39 +33,22 @@ struct SiPixelFedCablingMapGPU { unsigned char * badRocs; }; -inline -void allocateCablingMap(SiPixelFedCablingMapGPU* & cablingMapHost, SiPixelFedCablingMapGPU* & cablingMapDevice) { - cudaCheck(cudaMallocHost((void**) & cablingMapHost, sizeof(SiPixelFedCablingMapGPU))); - cudaCheck(cudaMalloc((void**) & cablingMapDevice, sizeof(SiPixelFedCablingMapGPU))); - cudaCheck(cudaMalloc((void**) & cablingMapHost->fed, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->link, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->roc, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->RawId, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->rocInDet, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->moduleId, MAX_SIZE_BYTE_INT)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->badRocs, MAX_SIZE_BYTE_BOOL)); - cudaCheck(cudaMalloc((void**) & cablingMapHost->modToUnp, MAX_SIZE_BYTE_BOOL)); - cudaCheck(cudaMemcpy(cablingMapDevice, cablingMapHost, sizeof(SiPixelFedCablingMapGPU), cudaMemcpyDefault)); -} - -inline -void deallocateCablingMap(SiPixelFedCablingMapGPU* cablingMapHost, SiPixelFedCablingMapGPU* cablingMapDevice) { - cudaCheck(cudaFree(cablingMapHost->fed)); - cudaCheck(cudaFree(cablingMapHost->link)); - cudaCheck(cudaFree(cablingMapHost->roc)); - cudaCheck(cudaFree(cablingMapHost->RawId)); - cudaCheck(cudaFree(cablingMapHost->rocInDet)); - cudaCheck(cudaFree(cablingMapHost->moduleId)); - cudaCheck(cudaFree(cablingMapHost->modToUnp)); - cudaCheck(cudaFree(cablingMapHost->badRocs)); - cudaCheck(cudaFree(cablingMapDevice)); - cudaCheck(cudaFreeHost(cablingMapHost)); -} +void allocateCablingMap(SiPixelFedCablingMapGPU* & cablingMapHost, + SiPixelFedCablingMapGPU* & cablingMapDevice); -void processCablingMap(SiPixelFedCablingMap const& cablingMap, TrackerGeometry const& trackerGeom, - SiPixelFedCablingMapGPU* cablingMapHost, SiPixelFedCablingMapGPU* cablingMapDevice, const SiPixelQuality* badPixelInfo, std::set const& modules); +void deallocateCablingMap(SiPixelFedCablingMapGPU* cablingMapHost, + SiPixelFedCablingMapGPU* cablingMapDevice); -void processGainCalibration(SiPixelGainCalibrationForHLT const & gains, TrackerGeometry const& trackerGeom, SiPixelGainForHLTonGPU * & gainsOnGPU, SiPixelGainForHLTonGPU_DecodingStructure * & gainDataOnGPU); +void processCablingMap(SiPixelFedCablingMap const& cablingMap, + TrackerGeometry const& trackerGeom, + SiPixelFedCablingMapGPU* cablingMapHost, + SiPixelFedCablingMapGPU* cablingMapDevice, + SiPixelQuality const* badPixelInfo, + std::set const& modules); -#endif // SiPixelFedCablingMapGPU_h +void processGainCalibration(SiPixelGainCalibrationForHLT const& gains, + TrackerGeometry const& trackerGeom, + SiPixelGainForHLTonGPU * & gainsOnGPU, + SiPixelGainForHLTonGPU_DecodingStructure * & gainDataOnGPU); +#endif // EventFilter_SiPixelRawToDigi_plugins_SiPixelFedCablingMapGPU_h diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigi.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigi.cc index 9b93f63741df4..dc3c9e4c68f68 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigi.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigi.cc @@ -324,3 +324,7 @@ void SiPixelRawToDigi::produce( edm::Event& ev, ev.put(std::move(disabled_channelcollection)); } } + +// declare this as a framework plugin +#include "FWCore/Framework/interface/MakerMacros.h" +DEFINE_FWK_MODULE(SiPixelRawToDigi); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.cc b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.cc index e9f4d3654f4c9..92089d50cb58e 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.cc +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.cc @@ -1,17 +1,21 @@ // This code is an entry point for GPU based pixel track reconstruction for HLT // Modified by Sushil and Shashi for this purpose July-2017 +// C++ includes #include #include #include #include +// CUDA kincludes #include #include +// ROOT includes #include #include +// CMSSW includes #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingMap.h" #include "CondFormats/SiPixelObjects/interface/SiPixelFedCablingTree.h" #include "CondFormats/SiPixelObjects/interface/SiPixelQuality.h" @@ -21,34 +25,28 @@ #include "DataFormats/FEDRawData/interface/FEDNumbering.h" #include "DataFormats/FEDRawData/interface/FEDRawData.h" #include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" +#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" #include "DataFormats/SiPixelDetId/interface/PixelFEDChannel.h" #include "DataFormats/SiPixelDigi/interface/PixelDigi.h" #include "DataFormats/SiPixelRawData/interface/SiPixelRawDataError.h" - -#include "DataFormats/SiPixelCluster/interface/SiPixelCluster.h" - - - -#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" -#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" - #include "EventFilter/SiPixelRawToDigi/interface/PixelDataFormatter.h" #include "EventFilter/SiPixelRawToDigi/interface/PixelUnpackingRegions.h" #include "FWCore/Framework/interface/ConsumesCollector.h" #include "FWCore/Framework/interface/ESHandle.h" #include "FWCore/Framework/interface/ESTransientHandle.h" +#include "FWCore/Framework/interface/MakerMacros.h" #include "FWCore/MessageLogger/interface/MessageLogger.h" #include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" #include "FWCore/ParameterSet/interface/ParameterSetDescription.h" #include "FWCore/PluginManager/interface/ModuleDef.h" -#include "FWCore/Framework/interface/MakerMacros.h" +#include "Geometry/Records/interface/TrackerDigiGeometryRecord.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" -#include "EventInfoGPU.h" -#include "RawToDigiGPU.h" +// local includes #include "SiPixelFedCablingMapGPU.h" #include "SiPixelRawToDigiGPU.h" -#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" - +#include "SiPixelRawToDigiGPUKernel.h" namespace { struct AccretionCluster { @@ -159,7 +157,7 @@ SiPixelRawToDigiGPU::SiPixelRawToDigiGPU( const edm::ParameterSet& conf ) // device copy of GPU friendly cablng map allocateCablingMap(cablingMapGPUHost_, cablingMapGPUDevice_); - int WSIZE = MAX_FED * MAX_WORD * sizeof(unsigned int); + int WSIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(unsigned int); cudaMallocHost(&word, sizeof(unsigned int)*WSIZE); cudaMallocHost(&fedId_h, sizeof(unsigned char)*WSIZE); @@ -170,21 +168,21 @@ SiPixelRawToDigiGPU::SiPixelRawToDigiGPU( const edm::ParameterSet& conf ) cudaMallocHost(&adc_h, sizeof(uint16_t)*WSIZE); cudaMallocHost(&clus_h, sizeof(int32_t)*WSIZE); - uint32_t vsize = sizeof(GPU::SimpleVector); - uint32_t esize = sizeof(error_obj); + uint32_t vsize = sizeof(GPU::SimpleVector); + uint32_t esize = sizeof(pixelgpudetails::error_obj); cudaCheck(cudaMallocHost(&error_h, vsize)); cudaCheck(cudaMallocHost(&error_h_tmp, vsize)); - cudaCheck(cudaMallocHost(&data_h, MAX_FED*MAX_WORD*esize)); + cudaCheck(cudaMallocHost(&data_h, MAX_FED*pixelgpudetails::MAX_WORD*esize)); // allocate memory for RawToDigi on GPU - context_ = initDeviceMemory(); + context_ = pixelgpudetails::initDeviceMemory(); - new (error_h) GPU::SimpleVector(MAX_FED*MAX_WORD, data_h); - new (error_h_tmp) GPU::SimpleVector(MAX_FED*MAX_WORD, context_.data_d); + new (error_h) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, data_h); + new (error_h_tmp) GPU::SimpleVector(MAX_FED*pixelgpudetails::MAX_WORD, context_.data_d); assert(error_h->size() == 0); - assert(error_h->capacity() == static_cast(MAX_FED*MAX_WORD)); + assert(error_h->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); assert(error_h_tmp->size() == 0); - assert(error_h_tmp->capacity() == static_cast(MAX_FED*MAX_WORD)); + assert(error_h_tmp->capacity() == static_cast(MAX_FED*pixelgpudetails::MAX_WORD)); } // ----------------------------------------------------------------------------- @@ -479,7 +477,7 @@ SiPixelRawToDigiGPU::produce( edm::Event& ev, const edm::EventSetup& es) auto size = error_h->size(); for (auto i = 0; i < size; i++) { - error_obj err = (*error_h)[i]; + pixelgpudetails::error_obj err = (*error_h)[i]; if (err.errorType != 0) { SiPixelRawDataError error(err.word, err.errorType, err.fedId + 1200); errors[err.rawId].push_back(error); diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.h b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.h index 2a4e0d1df3ccd..76f7f7f256e70 100644 --- a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.h +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPU.h @@ -6,32 +6,30 @@ * for pixel subdetector */ -#include "FWCore/Framework/interface/ESWatcher.h" -#include "FWCore/Framework/interface/stream/EDProducer.h" -#include "FWCore/Framework/interface/EventSetup.h" -#include "FWCore/Framework/interface/Event.h" -#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTService.h" #include "CondFormats/DataRecord/interface/SiPixelFedCablingMapRcd.h" #include "CondFormats/DataRecord/interface/SiPixelQualityRcd.h" #include "DataFormats/FEDRawData/interface/FEDRawDataCollection.h" - -#include "CalibTracker/SiPixelESProducers/interface/SiPixelGainCalibrationForHLTService.h" #include "DataFormats/TrackerCommon/interface/TrackerTopology.h" - - #include "FWCore/Framework/interface/ConsumesCollector.h" +#include "FWCore/Framework/interface/ESWatcher.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/EventSetup.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" #include "FWCore/Utilities/interface/CPUTimer.h" -#include "RawToDigiGPU.h" +#include "SiPixelRawToDigiGPUKernel.h" +class PixelUnpackingRegions; class SiPixelFedCablingTree; class SiPixelFedCabling; class SiPixelQuality; class TH1D; -class PixelUnpackingRegions; - class SiPixelGainForHLTonGPU; struct SiPixelGainForHLTonGPU_DecodingStructure; -class SiPixelRawToDigiGPU : public edm::stream::EDProducer<> { + +class SiPixelRawToDigiGPU : public edm::stream::EDProducer<> +{ public: /// ctor @@ -77,12 +75,12 @@ class SiPixelRawToDigiGPU : public edm::stream::EDProducer<> { // to store the output uint32_t *pdigi_h, *rawIdArr_h; // host copy of output uint16_t * adc_h; int32_t * clus_h; // host copy of calib&clus output - error_obj *data_h = nullptr; - GPU::SimpleVector *error_h = nullptr; - GPU::SimpleVector *error_h_tmp = nullptr; + pixelgpudetails::error_obj *data_h = nullptr; + GPU::SimpleVector *error_h = nullptr; + GPU::SimpleVector *error_h_tmp = nullptr; // configuration and memory buffers alocated on the GPU - context context_; + pixelgpudetails::context context_; SiPixelFedCablingMapGPU * cablingMapGPUHost_; SiPixelFedCablingMapGPU * cablingMapGPUDevice_; diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.cu b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.cu new file mode 100644 index 0000000000000..d8cfc643f4461 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.cu @@ -0,0 +1,696 @@ +/* Sushil Dubey, Shashi Dugad, TIFR, July 2017 + * + * File Name: RawToDigiGPU.cu + * Description: It converts Raw data into Digi Format on GPU + * then it converts adc -> electron and + * applies the adc threshold to needed for clustering + * Finaly the Output of RawToDigi data is given to pixelClusterizer + * +**/ + +// C++ includes +#include +#include +#include +#include +#include +#include +#include +#include + +// CUDA includes +#include +#include +#include +#include +#include +#include +#include + +// CMSSW includes +#include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuCalibPixel.h" +#include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" + +// local includes +#include "SiPixelFedCablingMapGPU.h" +#include "SiPixelRawToDigiGPUKernel.h" + +namespace pixelgpudetails { + + pixelgpudetails::context initDeviceMemory() { + + using namespace gpuClustering; + pixelgpudetails::context c; + + // Number of words for all the feds + constexpr uint32_t MAX_WORD08_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint8_t); + constexpr uint32_t MAX_WORD32_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint32_t); + constexpr uint32_t MAX_WORD16_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * sizeof(uint16_t); + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); + constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); + constexpr uint32_t MAX_ERROR_SIZE = MAX_FED * pixelgpudetails::MAX_WORD * esize; + + cudaCheck(cudaMalloc((void**) & c.word_d, MAX_WORD32_SIZE)); + cudaCheck(cudaMalloc((void**) & c.fedId_d, MAX_WORD08_SIZE)); + cudaCheck(cudaMalloc((void**) & c.pdigi_d, MAX_WORD32_SIZE)); // to store thepacked digi + cudaCheck(cudaMalloc((void**) & c.xx_d, MAX_WORD16_SIZE)); // to store the x and y coordinate + cudaCheck(cudaMalloc((void**) & c.yy_d, MAX_WORD16_SIZE)); + cudaCheck(cudaMalloc((void**) & c.adc_d, MAX_WORD16_SIZE)); + + cudaCheck(cudaMalloc((void**) & c.moduleInd_d, MAX_WORD16_SIZE)); + cudaCheck(cudaMalloc((void**) & c.rawIdArr_d, MAX_WORD32_SIZE)); + cudaCheck(cudaMalloc((void**) & c.error_d, vsize)); + cudaCheck(cudaMalloc((void**) & c.data_d, MAX_ERROR_SIZE)); + + // for the clusterizer + cudaCheck(cudaMalloc((void**) & c.clus_d, MAX_WORD32_SIZE)); // cluser index in module + + cudaCheck(cudaMalloc((void**) & c.moduleStart_d, (MaxNumModules+1)*sizeof(uint32_t) )); + cudaCheck(cudaMalloc((void**) & c.clusInModule_d, (MaxNumModules)*sizeof(uint32_t) )); + cudaCheck(cudaMalloc((void**) & c.moduleId_d, (MaxNumModules)*sizeof(uint32_t) )); + + cudaCheck(cudaMalloc((void**) & c.debug_d, MAX_WORD32_SIZE)); + + // create a CUDA stream + cudaCheck(cudaStreamCreate(&c.stream)); + + return c; + } + + + void freeMemory(pixelgpudetails::context & c) { + // free the GPU memory + cudaCheck(cudaFree(c.word_d)); + cudaCheck(cudaFree(c.fedId_d)); + cudaCheck(cudaFree(c.pdigi_d)); + cudaCheck(cudaFree(c.xx_d)); + cudaCheck(cudaFree(c.yy_d)); + cudaCheck(cudaFree(c.adc_d)); + cudaCheck(cudaFree(c.moduleInd_d)); + cudaCheck(cudaFree(c.rawIdArr_d)); + cudaCheck(cudaFree(c.error_d)); + cudaCheck(cudaFree(c.data_d)); + + // these are for the clusterizer (to be moved) + cudaCheck(cudaFree(c.moduleStart_d)); + cudaCheck(cudaFree(c.clus_d)); + cudaCheck(cudaFree(c.clusInModule_d)); + cudaCheck(cudaFree(c.moduleId_d)); + cudaCheck(cudaFree(c.debug_d)); + + + // destroy the CUDA stream + cudaCheck(cudaStreamDestroy(c.stream)); + } + + + __device__ uint32_t getLink(uint32_t ww) { + return ((ww >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask); + } + + + __device__ uint32_t getRoc(uint32_t ww) { + return ((ww >> pixelgpudetails::ROC_shift ) & pixelgpudetails::ROC_mask); + } + + + __device__ uint32_t getADC(uint32_t ww) { + return ((ww >> pixelgpudetails::ADC_shift) & pixelgpudetails::ADC_mask); + } + + + __device__ bool isBarrel(uint32_t rawId) { + return (1==((rawId>>25)&0x7)); + } + + + + __device__ pixelgpudetails::DetIdGPU getRawId(const SiPixelFedCablingMapGPU * Map, uint32_t fed, uint32_t link, uint32_t roc) { + uint32_t index = fed * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; + pixelgpudetails::DetIdGPU detId = { Map->RawId[index], Map->rocInDet[index], Map->moduleId[index] }; + return detId; + } + + + //reference http://cmsdoxygen.web.cern.ch/cmsdoxygen/CMSSW_9_2_0/doc/html/dd/d31/FrameConversion_8cc_source.html + //http://cmslxr.fnal.gov/source/CondFormats/SiPixelObjects/src/PixelROC.cc?v=CMSSW_9_2_0#0071 + // Convert local pixel to pixelgpudetails::global pixel + __device__ pixelgpudetails::Pixel frameConversion(bool bpix, int side, uint32_t layer, uint32_t rocIdInDetUnit, pixelgpudetails::Pixel local) { + + int slopeRow = 0, slopeCol = 0; + int rowOffset = 0, colOffset = 0; + + if (bpix) { + + if (side == -1 && layer != 1) { // -Z side: 4 non-flipped modules oriented like 'dddd', except Layer 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } // if roc + } + else { // +Z side: 4 non-flipped modules oriented like 'pppp', but all 8 in layer1 + if (rocIdInDetUnit < 8) { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = rocIdInDetUnit * pixelgpudetails::numColsInRoc; + } + else { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (16-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + } + + } + else { // fpix + if (side==-1) { // pannel 1 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } + } + else { // pannel 2 + if (rocIdInDetUnit < 8) { + slopeRow = 1; + slopeCol = -1; + rowOffset = 0; + colOffset = (8-rocIdInDetUnit)*pixelgpudetails::numColsInRoc-1; + } + else { + slopeRow = -1; + slopeCol = 1; + rowOffset = 2*pixelgpudetails::numRowsInRoc-1; + colOffset = (rocIdInDetUnit-8)*pixelgpudetails::numColsInRoc; + } + + } // side + + } + + uint32_t gRow = rowOffset+slopeRow*local.row; + uint32_t gCol = colOffset+slopeCol*local.col; + //printf("Inside frameConversion row: %u, column: %u\n",gRow, gCol); + pixelgpudetails::Pixel global = {gRow, gCol}; + return global; + } + + + __device__ uint32_t conversionError(uint32_t fedId, uint32_t status, bool debug = false) + { + + uint32_t errorType = 0; + + // debug = true; + + switch (status) { + case(1) : { + if (debug) printf("Error in Fed: %i, invalid channel Id (errorType = 35\n)", fedId ); + errorType = 35; + break; + } + case(2) : { + if (debug) printf("Error in Fed: %i, invalid ROC Id (errorType = 36)\n", fedId); + errorType = 36; + break; + } + case(3) : { + if (debug) printf("Error in Fed: %i, invalid dcol/pixel value (errorType = 37)\n", fedId); + errorType = 37; + break; + } + case(4) : { + if (debug) printf("Error in Fed: %i, dcol/pixel read out of order (errorType = 38)\n", fedId); + errorType = 38; + break; + } + default: if (debug) printf("Cabling check returned unexpected result, status = %i\n", status); + }; + + return errorType; + + } + + + __device__ bool rocRowColIsValid(uint32_t rocRow, uint32_t rocCol) + { + uint32_t numRowsInRoc = 80; + uint32_t numColsInRoc = 52; + + /// row and collumn in ROC representation + return ((rocRow < numRowsInRoc) & (rocCol < numColsInRoc)); + } + + + __device__ bool dcolIsValid(uint32_t dcol, uint32_t pxid) + { + return ((dcol < 26) & (2 <= pxid) & (pxid < 162)); + } + + + __device__ uint32_t checkROC(uint32_t errorWord, uint32_t fedId, uint32_t link, const SiPixelFedCablingMapGPU *Map, bool debug = false) + { + + int errorType = (errorWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ERROR_mask; + if (errorType < 25) return false; + bool errorFound = false; + + switch (errorType) { + case(25) : { + errorFound = true; + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + 1; + if (index > 1 && index <= Map->size){ + if (!(link == Map->link[index] && 1 == Map->roc[index])) errorFound = false; + } + if (debug&errorFound) printf("Invalid ROC = 25 found (errorType = 25)\n"); + break; + } + case(26) : { + if (debug) printf("Gap word found (errorType = 26)\n"); + errorFound = true; + break; + } + case(27) : { + if (debug) printf("Dummy word found (errorType = 27)\n"); + errorFound = true; + break; + } + case(28) : { + if (debug) printf("Error fifo nearly full (errorType = 28)\n"); + errorFound = true; + break; + } + case(29) : { + if (debug) printf("Timeout on a channel (errorType = 29)\n"); + if ((errorWord >> pixelgpudetails::OMIT_ERR_shift) & pixelgpudetails::OMIT_ERR_mask) { + if (debug) printf("...first errorType=29 error, this gets masked out\n"); + } + errorFound = true; + break; + } + case(30) : { + if (debug) printf("TBM error trailer (errorType = 30)\n"); + int StateMatch_bits = 4; + int StateMatch_shift = 8; + uint32_t StateMatch_mask = ~(~uint32_t(0) << StateMatch_bits); + int StateMatch = (errorWord >> StateMatch_shift) & StateMatch_mask; + if ( StateMatch != 1 && StateMatch != 8 ) { + if (debug) printf("FED error 30 with unexpected State Bits (errorType = 30)\n"); + } + if ( StateMatch == 1 ) errorType = 40; // 1=Overflow -> 40, 8=number of ROCs -> 30 + errorFound = true; + break; + } + case(31) : { + if (debug) printf("Event number error (errorType = 31)\n"); + errorFound = true; + break; + } + default: errorFound = false; + + }; + + return errorFound? errorType : 0; + + } + + + __device__ uint32_t getErrRawID(uint32_t fedId, uint32_t errWord, uint32_t errorType, const SiPixelFedCablingMapGPU *Map, bool debug = false) + { + + uint32_t rID = 0xffffffff; + + switch (errorType) { + case 25 : case 30 : case 31 : case 36 : case 40 : { + //set dummy values for cabling just to get detId from link + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = 1; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + case 29 : { + int chanNmbr = 0; + const int DB0_shift = 0; + const int DB1_shift = DB0_shift + 1; + const int DB2_shift = DB1_shift + 1; + const int DB3_shift = DB2_shift + 1; + const int DB4_shift = DB3_shift + 1; + const uint32_t DataBit_mask = ~(~uint32_t(0) << 1); + + int CH1 = (errWord >> DB0_shift) & DataBit_mask; + int CH2 = (errWord >> DB1_shift) & DataBit_mask; + int CH3 = (errWord >> DB2_shift) & DataBit_mask; + int CH4 = (errWord >> DB3_shift) & DataBit_mask; + int CH5 = (errWord >> DB4_shift) & DataBit_mask; + int BLOCK_bits = 3; + int BLOCK_shift = 8; + uint32_t BLOCK_mask = ~(~uint32_t(0) << BLOCK_bits); + int BLOCK = (errWord >> BLOCK_shift) & BLOCK_mask; + int localCH = 1*CH1+2*CH2+3*CH3+4*CH4+5*CH5; + if (BLOCK%2==0) chanNmbr=(BLOCK/2)*9+localCH; + else chanNmbr = ((BLOCK-1)/2)*9+4+localCH; + if ((chanNmbr < 1)||(chanNmbr > 36)) break; // signifies unexpected result + + // set dummy values for cabling just to get detId from link if in Barrel + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = 1; + uint32_t link = chanNmbr; + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + case 37 : case 38: { + //cabling.dcol = 0; + //cabling.pxid = 2; + uint32_t roc = (errWord >> pixelgpudetails::ROC_shift) & pixelgpudetails::ROC_mask; + uint32_t link = (errWord >> pixelgpudetails::LINK_shift) & pixelgpudetails::LINK_mask; + uint32_t rID_temp = getRawId(Map, fedId, link, roc).RawId; + if(rID_temp != 9999) rID = rID_temp; + break; + } + + default : break; + + }; + + return rID; + + } + + + /*---------- + * Name: applyADCthreshold_kernel() + * Desc: converts adc count to electrons and then applies the + * threshold on each channel. + * make pixel to 0 if it is below the threshold + * Input: xx_d[], yy_d[], layer_d[], wordCounter, adc[], ADCThreshold + *----------- + * Output: xx_adc[], yy_adc[] with pixel threshold applied + */ + // kernel to apply adc threshold on the channels + + + // Felice: gains and pedestals are not the same for each pixel. This code should be rewritten to take + // in account local gains/pedestals + // __global__ void applyADCthreshold_kernel(const uint32_t *xx_d, const uint32_t *yy_d, const uint32_t *layer_d, uint32_t *adc, const uint32_t wordCounter, + // const ADCThreshold adcThreshold, uint32_t *xx_adc, uint32_t *yy_adc ) { + // int tid = threadIdx.x; + // int gIndex = blockDim.x*blockIdx.x+tid; + // if (gIndex=adcThreshold.theFirstStack_) { + // if (adcThreshold.theStackADC_==1 && adcOld==1) { + // adcNew = int(255*135); // Arbitrarily use overflow value. + // } + // if (adcThreshold.theStackADC_ >1 && adcThreshold.theStackADC_!=255 && adcOld>=1){ + // adcNew = int((adcOld-1) * gain * 255/float(adcThreshold.theStackADC_-1)); + // } + // } + // + // if (adcNew >adcThreshold.thePixelThreshold ) { + // xx_adc[gIndex]=xx_d[gIndex]; + // yy_adc[gIndex]=yy_d[gIndex]; + // } + // else { + // xx_adc[gIndex]=0; // 0: dead pixel + // yy_adc[gIndex]=0; + // } + // adc[gIndex] = adcNew; + // } + // } + + + // Kernel to perform Raw to Digi conversion + __global__ void RawToDigi_kernel(const SiPixelFedCablingMapGPU *Map, const uint32_t wordCounter, const uint32_t *Word, const uint8_t *fedIds, + uint16_t * XX, uint16_t * YY, uint16_t * ADC, + uint32_t * pdigi, uint32_t *rawIdArr, uint16_t * moduleId, + GPU::SimpleVector *err, + bool useQualityInfo, bool includeErrors, bool debug) + { + uint32_t blockId = blockIdx.x; + uint32_t threadId = threadIdx.x; + + bool skipROC = false; + //if (threadId==0) printf("Event: %u blockId: %u start: %u end: %u\n", eventno, blockId, begin, end); + + for (int aaa=0; aaa<1; ++aaa) { // too many coninue below.... (to be fixed) + auto gIndex = threadId + blockId*blockDim.x; + if (gIndex < wordCounter) { + + uint32_t fedId = fedIds[gIndex/2]; // +1200; + + // initialize (too many coninue below) + pdigi[gIndex] = 0; + rawIdArr[gIndex] = 0; + moduleId[gIndex] = 9999; + + uint32_t ww = Word[gIndex]; // Array containing 32 bit raw data + if (ww == 0) { + //noise and dead channels are ignored + XX[gIndex] = 0; // 0 is an indicator of a noise/dead channel + YY[gIndex] = 0; // skip these pixels during clusterization + ADC[gIndex] = 0; + continue ; // 0: bad word + } + + uint32_t link = getLink(ww); // Extract link + uint32_t roc = getRoc(ww); // Extract Roc in link + pixelgpudetails::DetIdGPU detId = getRawId(Map, fedId, link, roc); + + uint32_t errorType = checkROC(ww, fedId, link, Map, debug); + skipROC = (roc < pixelgpudetails::maxROCIndex) ? false : (errorType != 0); + if (includeErrors and skipROC) + { + uint32_t rID = getErrRawID(fedId, ww, errorType, Map, debug); + err->emplace_back(rID, ww, errorType, fedId); + continue; + } + + uint32_t rawId = detId.RawId; + uint32_t rocIdInDetUnit = detId.rocInDet; + bool barrel = isBarrel(rawId); + + uint32_t index = fedId * MAX_LINK * MAX_ROC + (link-1) * MAX_ROC + roc; + if (useQualityInfo) { + + skipROC = Map->badRocs[index]; + if (skipROC) continue; + + } + skipROC = Map->modToUnp[index]; + if (skipROC) continue; + + uint32_t layer = 0;//, ladder =0; + int side = 0, panel = 0, module = 0;//disk = 0,blade = 0 + + if (barrel) + { + layer = (rawId >> pixelgpudetails::layerStartBit) & pixelgpudetails::layerMask; + module = (rawId >> pixelgpudetails::moduleStartBit) & pixelgpudetails::moduleMask; + side = (module < 5)? -1 : 1; + } + else { + // endcap ids + layer = 0; + panel = (rawId >> pixelgpudetails::panelStartBit) & pixelgpudetails::panelMask; + //disk = (rawId >> diskStartBit_) & diskMask_ ; + side = (panel == 1)? -1 : 1; + //blade = (rawId>>bladeStartBit_) & bladeMask_; + } + + // ***special case of layer to 1 be handled here + pixelgpudetails::Pixel localPix; + if (layer == 1) { + uint32_t col = (ww >> pixelgpudetails::COL_shift) & pixelgpudetails::COL_mask; + uint32_t row = (ww >> pixelgpudetails::ROW_shift) & pixelgpudetails::ROW_mask; + localPix.row = row; + localPix.col = col; + if (includeErrors) { + if (not rocRowColIsValid(row, col)) { + uint32_t error = conversionError(fedId, 3, debug); //use the device function and fill the arrays + err->emplace_back(rawId, ww, error, fedId); + if(debug) printf("BPIX1 Error status: %i\n", error); + continue; + } + } + } else { + // ***conversion rules for dcol and pxid + uint32_t dcol = (ww >> pixelgpudetails::DCOL_shift) & pixelgpudetails::DCOL_mask; + uint32_t pxid = (ww >> pixelgpudetails::PXID_shift) & pixelgpudetails::PXID_mask; + uint32_t row = pixelgpudetails::numRowsInRoc - pxid/2; + uint32_t col = dcol*2 + pxid%2; + localPix.row = row; + localPix.col = col; + if (includeErrors and not dcolIsValid(dcol, pxid)) { + uint32_t error = conversionError(fedId, 3, debug); + err->emplace_back(rawId, ww, error, fedId); + if(debug) printf("Error status: %i %d %d %d %d\n", error, dcol, pxid, fedId, roc); + continue; + } + } + + pixelgpudetails::Pixel globalPix = frameConversion(barrel, side, layer, rocIdInDetUnit, localPix); + XX[gIndex] = globalPix.row ; // origin shifting by 1 0-159 + YY[gIndex] = globalPix.col ; // origin shifting by 1 0-415 + ADC[gIndex] = getADC(ww); + pdigi[gIndex] = pixelgpudetails::pack(globalPix.row,globalPix.col,ADC[gIndex]); + moduleId[gIndex] = detId.moduleId; + rawIdArr[gIndex] = rawId; + } // end of if (gIndex < end) + } // end fake loop + } // end of Raw to Digi kernel + + + // kernel wrapper called from runRawToDigi_kernel + void RawToDigi_wrapper( + pixelgpudetails::context & c, + const SiPixelFedCablingMapGPU* cablingMapDevice, SiPixelGainForHLTonGPU * const ped, + const uint32_t wordCounter, uint32_t *word, const uint32_t fedCounter, uint8_t *fedId_h, + bool convertADCtoElectrons, + uint32_t * pdigi_h, uint32_t *rawIdArr_h, + GPU::SimpleVector *error_h, GPU::SimpleVector *error_h_tmp, pixelgpudetails::error_obj *data_h, + uint16_t * adc_h, int32_t * clus_h, + bool useQualityInfo, bool includeErrors, bool debug, uint32_t & nModulesActive) + { + const int threadsPerBlock = 512; + const int blocks = (wordCounter + threadsPerBlock-1) /threadsPerBlock; // fill it all + + + assert(0 == wordCounter%2); + // wordCounter is the total no of words in each event to be trasfered on device + cudaCheck(cudaMemcpyAsync(&c.word_d[0], &word[0], wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + cudaCheck(cudaMemcpyAsync(&c.fedId_d[0], &fedId_h[0], wordCounter*sizeof(uint8_t)/2, cudaMemcpyDefault, c.stream)); + + constexpr uint32_t vsize = sizeof(GPU::SimpleVector); + constexpr uint32_t esize = sizeof(pixelgpudetails::error_obj); + cudaCheck(cudaMemcpyAsync(c.error_d, error_h_tmp, vsize, cudaMemcpyDefault, c.stream)); + + // Launch rawToDigi kernel + RawToDigi_kernel<<>>( + cablingMapDevice, + wordCounter, + c.word_d, + c.fedId_d, + c.xx_d, c.yy_d, c.adc_d, + c.pdigi_d, + c.rawIdArr_d, + c.moduleInd_d, + c.error_d, + useQualityInfo, + includeErrors, + debug); + cudaCheck(cudaGetLastError()); + + // copy data to host variable + + cudaCheck(cudaMemcpyAsync(pdigi_h, c.pdigi_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + cudaCheck(cudaMemcpyAsync(rawIdArr_h, c.rawIdArr_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + + if (includeErrors) { + cudaCheck(cudaMemcpyAsync(error_h, c.error_d, vsize, cudaMemcpyDefault, c.stream)); + cudaStreamSynchronize(c.stream); + error_h->set_data(data_h); + int size = error_h->size(); + cudaCheck(cudaMemcpyAsync(data_h, c.data_d, size*esize, cudaMemcpyDefault, c.stream)); + } + // End of Raw2Digi and passing data for cluserisation + + { + // clusterizer ... + using namespace gpuClustering; + int threadsPerBlock = 256; + int blocks = (wordCounter + threadsPerBlock - 1) / threadsPerBlock; + + + assert(ped); + gpuCalibPixel::calibDigis<<>>( + c.moduleInd_d, + c.xx_d, c.yy_d, c.adc_d, + ped, + wordCounter + ); + + cudaCheck(cudaGetLastError()); + + // calibrated adc + cudaCheck(cudaMemcpyAsync(adc_h, c.adc_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + + /* + std::cout + << "CUDA countModules kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + + uint32_t nModules=0; + cudaCheck(cudaMemcpyAsync(c.moduleStart_d, &nModules, sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + + countModules<<>>(c.moduleInd_d, c.moduleStart_d, c.clus_d, wordCounter); + cudaCheck(cudaGetLastError()); + + cudaCheck(cudaMemcpyAsync(&nModules, c.moduleStart_d, sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + + // std::cout << "found " << nModules << " Modules active" << std::endl; + + + threadsPerBlock = 256; + blocks = nModules; + + /* + std::cout + << "CUDA findClus kernel launch with " << blocks + << " blocks of " << threadsPerBlock << " threads\n"; + */ + + cudaCheck(cudaMemsetAsync(c.clusInModule_d, 0, (MaxNumModules)*sizeof(uint32_t),c.stream)); + + findClus<<>>( + c.moduleInd_d, + c.xx_d, c.yy_d, c.adc_d, + c.moduleStart_d, + c.clusInModule_d, c.moduleId_d, + c.clus_d, + c.debug_d, + wordCounter + ); + + // clusters + cudaCheck(cudaMemcpyAsync(clus_h, c.clus_d, wordCounter*sizeof(uint32_t), cudaMemcpyDefault, c.stream)); + + + cudaStreamSynchronize(c.stream); + cudaCheck(cudaGetLastError()); + + nModulesActive = nModules; + + } // end clusterizer scope + + } + +} diff --git a/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.h b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.h new file mode 100644 index 0000000000000..0bebde676bc28 --- /dev/null +++ b/EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.h @@ -0,0 +1,212 @@ +#ifndef EventFilter_SiPixelRawToDigi_plugins_SiPixelRawToDigiGPUKernel_h +#define EventFilter_SiPixelRawToDigi_plugins_SiPixelRawToDigiGPUKernel_h + +#include +#include + +#include "HeterogeneousCore/CUDAUtilities/interface/GPUSimpleVector.h" +#include "SiPixelFedCablingMapGPU.h" + +namespace pixelgpudetails { + + // Phase 1 geometry constants + const uint32_t layerStartBit = 20; + const uint32_t ladderStartBit = 12; + const uint32_t moduleStartBit = 2; + + const uint32_t panelStartBit = 10; + const uint32_t diskStartBit = 18; + const uint32_t bladeStartBit = 12; + + const uint32_t layerMask = 0xF; + const uint32_t ladderMask = 0xFF; + const uint32_t moduleMask = 0x3FF; + const uint32_t panelMask = 0x3; + const uint32_t diskMask = 0xF; + const uint32_t bladeMask = 0x3F; + + const uint32_t LINK_bits = 6; + const uint32_t ROC_bits = 5; + const uint32_t DCOL_bits = 5; + const uint32_t PXID_bits = 8; + const uint32_t ADC_bits = 8; + + // special for layer 1 + const uint32_t LINK_bits_l1 = 6; + const uint32_t ROC_bits_l1 = 5; + const uint32_t COL_bits_l1 = 6; + const uint32_t ROW_bits_l1 = 7; + const uint32_t OMIT_ERR_bits = 1; + + const uint32_t maxROCIndex = 8; + const uint32_t numRowsInRoc = 80; + const uint32_t numColsInRoc = 52; + + const uint32_t MAX_WORD = 2000; + + const uint32_t ADC_shift = 0; + const uint32_t PXID_shift = ADC_shift + ADC_bits; + const uint32_t DCOL_shift = PXID_shift + PXID_bits; + const uint32_t ROC_shift = DCOL_shift + DCOL_bits; + const uint32_t LINK_shift = ROC_shift + ROC_bits_l1; + // special for layer 1 ROC + const uint32_t ROW_shift = ADC_shift + ADC_bits; + const uint32_t COL_shift = ROW_shift + ROW_bits_l1; + const uint32_t OMIT_ERR_shift = 20; + + const uint32_t LINK_mask = ~(~uint32_t(0) << LINK_bits_l1); + const uint32_t ROC_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t COL_mask = ~(~uint32_t(0) << COL_bits_l1); + const uint32_t ROW_mask = ~(~uint32_t(0) << ROW_bits_l1); + const uint32_t DCOL_mask = ~(~uint32_t(0) << DCOL_bits); + const uint32_t PXID_mask = ~(~uint32_t(0) << PXID_bits); + const uint32_t ADC_mask = ~(~uint32_t(0) << ADC_bits); + const uint32_t ERROR_mask = ~(~uint32_t(0) << ROC_bits_l1); + const uint32_t OMIT_ERR_mask = ~(~uint32_t(0) << OMIT_ERR_bits); + + struct DetIdGPU { + uint32_t RawId; + uint32_t rocInDet; + uint32_t moduleId; + }; + + struct Pixel { + uint32_t row; + uint32_t col; + }; + + class Packing { + public: + using PackedDigiType = uint32_t; + + // Constructor: pre-computes masks and shifts from field widths + __host__ __device__ + inline + constexpr Packing(unsigned int row_w, unsigned int column_w, + unsigned int time_w, unsigned int adc_w) : + row_width(row_w), + column_width(column_w), + adc_width(adc_w), + row_shift(0), + column_shift(row_shift + row_w), + time_shift(column_shift + column_w), + adc_shift(time_shift + time_w), + row_mask(~(~0U << row_w)), + column_mask( ~(~0U << column_w)), + time_mask(~(~0U << time_w)), + adc_mask(~(~0U << adc_w)), + rowcol_mask(~(~0U << (column_w+row_w))), + max_row(row_mask), + max_column(column_mask), + max_adc(adc_mask) + { } + + uint32_t row_width; + uint32_t column_width; + uint32_t adc_width; + + uint32_t row_shift; + uint32_t column_shift; + uint32_t time_shift; + uint32_t adc_shift; + + PackedDigiType row_mask; + PackedDigiType column_mask; + PackedDigiType time_mask; + PackedDigiType adc_mask; + PackedDigiType rowcol_mask; + + uint32_t max_row; + uint32_t max_column; + uint32_t max_adc; + }; + + __host__ __device__ + inline + constexpr Packing packing() { + return Packing(11, 11, 0, 10); + } + + + __host__ __device__ + inline + uint32_t pack(uint32_t row, uint32_t col, uint32_t adc) { + constexpr Packing thePacking = packing(); + adc = std::min(adc, thePacking.max_adc); + + return (row << thePacking.row_shift) | + (col << thePacking.column_shift) | + (adc << thePacking.adc_shift); + } + + struct error_obj { + uint32_t rawId; + uint32_t word; + unsigned char errorType; + unsigned char fedId; + + __host__ __device__ + error_obj(uint32_t a, uint32_t b, unsigned char c, unsigned char d): + rawId(a), + word(b), + errorType(c), + fedId(d) + { } + }; + + // configuration and memory buffers alocated on the GPU + struct context { + cudaStream_t stream; + + uint32_t * word_d; + uint8_t * fedId_d; + uint32_t * pdigi_d; + uint16_t * xx_d; + uint16_t * yy_d; + uint16_t * adc_d; + uint16_t * moduleInd_d; + uint32_t * rawIdArr_d; + + GPU::SimpleVector * error_d; + error_obj * data_d; + + // these are for the clusterizer (to be moved) + uint32_t * moduleStart_d; + int32_t * clus_d; + uint32_t * clusInModule_d; + uint32_t * moduleId_d; + uint32_t * debug_d; + }; + + // wrapper function to call RawToDigi on the GPU from host side + void RawToDigi_wrapper(context &, const SiPixelFedCablingMapGPU* cablingMapDevice, + SiPixelGainForHLTonGPU * const ped, + const uint32_t wordCounter, uint32_t *word, + const uint32_t fedCounter, uint8_t *fedId_h, + bool convertADCtoElectrons, uint32_t * pdigi_h, + uint32_t *rawIdArr_h, GPU::SimpleVector *error_h, + GPU::SimpleVector *error_h_tmp, error_obj *data_h, + uint16_t * adc_h, int32_t * clus_h, + bool useQualityInfo, bool includeErrors, bool debug, + uint32_t & nModulesActive); + + // void initCablingMap(); + context initDeviceMemory(); + void freeMemory(context &); + + // see RecoLocalTracker/SiPixelClusterizer + // all are runtime const, should be specified in python _cfg.py + struct ADCThreshold { + const int thePixelThreshold = 1000; // default Pixel threshold in electrons + const int theSeedThreshold = 1000; // seed thershold in electrons not used in our algo + const float theClusterThreshold = 4000; // cluster threshold in electron + const int ConversionFactor = 65; // adc to electron conversion factor + + const int theStackADC_ = 255; // the maximum adc count for stack layer + const int theFirstStack_ = 5; // the index of the fits stack layer + const double theElectronPerADCGain_ = 600; // ADC to electron conversion + }; + +} + +#endif // EventFilter_SiPixelRawToDigi_plugins_SiPixelRawToDigiGPUKernel_h diff --git a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h index a76ca821e70da..6add3a78b96e6 100644 --- a/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h +++ b/RecoLocalTracker/SiPixelRecHits/interface/pixelCPEforGPU.h @@ -19,12 +19,11 @@ namespace pixelCPEforGPU { struct CommonParams { float theThicknessB; float theThicknessE; - float thePitchX; + float thePitchX; float thePitchY; }; struct DetParams { - bool isBarrel; bool isPosZ; uint16_t layer; @@ -39,7 +38,6 @@ namespace pixelCPEforGPU { float x0,y0,z0; // the vertex in the local coord of the detector Frame frame; - }; @@ -51,10 +49,9 @@ namespace pixelCPEforGPU { CommonParams const & commonParams() const {return *m_commonParams;} constexpr DetParams const & detParams(int i) const {return m_detParams[i];} - }; - // SOA! (on device) + // SOA (on device) template struct ClusParamsT { uint32_t minRow[N]; @@ -66,12 +63,12 @@ namespace pixelCPEforGPU { int32_t Q_l_X[N]; int32_t Q_f_Y[N]; int32_t Q_l_Y[N]; - + int32_t charge[N]; float xpos[N]; float ypos[N]; - + float xerr[N]; float yerr[N]; }; @@ -85,177 +82,171 @@ namespace pixelCPEforGPU { // x,y local position on det auto gvx = x - detParams.x0; auto gvy = y - detParams.y0; - auto gvz = -1.f/detParams.z0; - // normalization not required as only ratio used... + auto gvz = -1.f / detParams.z0; + // normalization not required as only ratio used... // calculate angles - cotalpha = gvx*gvz; - cotbeta = gvy*gvz; + cotalpha = gvx * gvz; + cotbeta = gvy * gvz; } constexpr inline - float correction( - int sizeM1, - int Q_f, //!< Charge in the first pixel. - int Q_l, //!< Charge in the last pixel. - uint16_t upper_edge_first_pix, //!< As the name says. - uint16_t lower_edge_last_pix, //!< As the name says. - float lorentz_shift, //!< L-shift at half thickness - float theThickness, //detector thickness - float cot_angle, //!< cot of alpha_ or beta_ - float pitch, //!< thePitchX or thePitchY - bool first_is_big, //!< true if the first is big - bool last_is_big //!< true if the last is big - ) -{ - if (0==sizeM1) return 0; // size1 - float W_eff=0; - bool simple=true; - if (1==sizeM1) { // size 2 - //--- Width of the clusters minus the edge (first and last) pixels. - //--- In the note, they are denoted x_F and x_L (and y_F and y_L) - // assert(lower_edge_last_pix>=upper_edge_first_pix); - auto W_inner = pitch * float(lower_edge_last_pix-upper_edge_first_pix); // in cm - - //--- Predicted charge width from geometry - auto W_pred = theThickness * cot_angle // geometric correction (in cm) - - lorentz_shift; // (in cm) &&& check fpix! - - W_eff = std::abs( W_pred ) - W_inner; - - //--- If the observed charge width is inconsistent with the expectations - //--- based on the track, do *not* use W_pred-W_innner. Instead, replace - //--- it with an *average* effective charge width, which is the average - //--- length of the edge pixels. - // - simple = ( W_eff < 0.0f ) | ( W_eff > pitch ); // this produces "large" regressions for very small numeric differences... - - } - if (simple) { - //--- Total length of the two edge pixels (first+last) - float sum_of_edge = 2.0f; - if (first_is_big) sum_of_edge += 1.0f; - if (last_is_big) sum_of_edge += 1.0f; - W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) - } - - - //--- Finally, compute the position in this projection - float Qdiff = Q_l - Q_f; - float Qsum = Q_l + Q_f; - - //--- Temporary fix for clusters with both first and last pixel with charge = 0 - if(Qsum==0) Qsum=1.0f; - return 0.5f*(Qdiff/Qsum) * W_eff; - + float correction( + int sizeM1, + int Q_f, //!< Charge in the first pixel. + int Q_l, //!< Charge in the last pixel. + uint16_t upper_edge_first_pix, //!< As the name says. + uint16_t lower_edge_last_pix, //!< As the name says. + float lorentz_shift, //!< L-shift at half thickness + float theThickness, //detector thickness + float cot_angle, //!< cot of alpha_ or beta_ + float pitch, //!< thePitchX or thePitchY + bool first_is_big, //!< true if the first is big + bool last_is_big ) //!< true if the last is big + { + if (0 == sizeM1) // size 1 + return 0; + + float W_eff = 0; + bool simple = true; + if (1 == sizeM1) { // size 2 + //--- Width of the clusters minus the edge (first and last) pixels. + //--- In the note, they are denoted x_F and x_L (and y_F and y_L) + // assert(lower_edge_last_pix >= upper_edge_first_pix); + auto W_inner = pitch * float(lower_edge_last_pix - upper_edge_first_pix); // in cm + + //--- Predicted charge width from geometry + auto W_pred = theThickness * cot_angle // geometric correction (in cm) + - lorentz_shift; // (in cm) &&& check fpix! + + W_eff = std::abs(W_pred) - W_inner; + + //--- If the observed charge width is inconsistent with the expectations + //--- based on the track, do *not* use W_pred-W_inner. Instead, replace + //--- it with an *average* effective charge width, which is the average + //--- length of the edge pixels. + simple = (W_eff < 0.0f) | (W_eff > pitch); // this produces "large" regressions for very small numeric differences... + } + + if (simple) { + //--- Total length of the two edge pixels (first+last) + float sum_of_edge = 2.0f; + if (first_is_big) sum_of_edge += 1.0f; + if (last_is_big) sum_of_edge += 1.0f; + W_eff = pitch * 0.5f * sum_of_edge; // ave. length of edge pixels (first+last) (cm) + } + + //--- Finally, compute the position in this projection + float Qdiff = Q_l - Q_f; + float Qsum = Q_l + Q_f; + + //--- Temporary fix for clusters with both first and last pixel with charge = 0 + if (Qsum == 0) + Qsum = 1.0f; + + return 0.5f * (Qdiff/Qsum) * W_eff; } constexpr inline void position(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { - //--- Upper Right corner of Lower Left pixel -- in measurement frame - uint16_t llx = cp.minRow[ic]+1; - uint16_t lly = cp.minCol[ic]+1; - - //--- Lower Left corner of Upper Right pixel -- in measurement frame - uint16_t urx = cp.maxRow[ic]; - uint16_t ury = cp.maxCol[ic]; - - auto llxl = phase1PixelTopology::localX(llx); - auto llyl = phase1PixelTopology::localY(lly); - auto urxl = phase1PixelTopology::localX(urx); - auto uryl = phase1PixelTopology::localY(ury); - - auto mx = llxl+urxl; - auto my = llyl+uryl; - - // apply the lorentz offset correction - auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); - auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); - - float cotalpha=0, cotbeta=0; - - - computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); - - auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; - - auto xcorr = correction( - cp.maxRow[ic]-cp.minRow[ic], - cp.Q_f_X[ic], cp.Q_l_X[ic], - llxl, urxl, - detParams.chargeWidthX, // lorentz shift in cm - thickness, - cotalpha, - comParams.thePitchX, - phase1PixelTopology::isBigPixX( cp.minRow[ic] ), - phase1PixelTopology::isBigPixX( cp.maxRow[ic] ) - ); - - - auto ycorr = correction( - cp.maxCol[ic]-cp.minCol[ic], - cp.Q_f_Y[ic], cp.Q_l_Y[ic], - llyl, uryl, - detParams.chargeWidthY, // lorentz shift in cm - thickness, - cotbeta, - comParams.thePitchY, - phase1PixelTopology::isBigPixY( cp.minCol[ic] ), - phase1PixelTopology::isBigPixY( cp.maxCol[ic] ) - ); - - cp.xpos[ic]=xPos+xcorr; - cp.ypos[ic]=yPos+ycorr; - + //--- Upper Right corner of Lower Left pixel -- in measurement frame + uint16_t llx = cp.minRow[ic]+1; + uint16_t lly = cp.minCol[ic]+1; + + //--- Lower Left corner of Upper Right pixel -- in measurement frame + uint16_t urx = cp.maxRow[ic]; + uint16_t ury = cp.maxCol[ic]; + + auto llxl = phase1PixelTopology::localX(llx); + auto llyl = phase1PixelTopology::localY(lly); + auto urxl = phase1PixelTopology::localX(urx); + auto uryl = phase1PixelTopology::localY(ury); + + auto mx = llxl+urxl; + auto my = llyl+uryl; + + // apply the lorentz offset correction + auto xPos = detParams.shiftX + comParams.thePitchX*(0.5f*float(mx)+float(phase1PixelTopology::xOffset)); + auto yPos = detParams.shiftY + comParams.thePitchY*(0.5f*float(my)+float(phase1PixelTopology::yOffset)); + + float cotalpha=0, cotbeta=0; + + computeAnglesFromDet(detParams, xPos, yPos, cotalpha, cotbeta); + + auto thickness = detParams.isBarrel ? comParams.theThicknessB : comParams.theThicknessE; + + auto xcorr = correction( + cp.maxRow[ic]-cp.minRow[ic], + cp.Q_f_X[ic], cp.Q_l_X[ic], + llxl, urxl, + detParams.chargeWidthX, // lorentz shift in cm + thickness, + cotalpha, + comParams.thePitchX, + phase1PixelTopology::isBigPixX(cp.minRow[ic]), + phase1PixelTopology::isBigPixX(cp.maxRow[ic]) ); + + auto ycorr = correction( + cp.maxCol[ic]-cp.minCol[ic], + cp.Q_f_Y[ic], cp.Q_l_Y[ic], + llyl, uryl, + detParams.chargeWidthY, // lorentz shift in cm + thickness, + cotbeta, + comParams.thePitchY, + phase1PixelTopology::isBigPixY(cp.minCol[ic]), + phase1PixelTopology::isBigPixY(cp.maxCol[ic]) ); + + cp.xpos[ic]=xPos+xcorr; + cp.ypos[ic]=yPos+ycorr; } - // FIXME these are errors form Run1 constexpr inline void error(CommonParams const & comParams, DetParams const & detParams, ClusParams & cp, uint32_t ic) { - // Edge cluster errors - cp.xerr[ic]= 0.0050; - cp.yerr[ic]= 0.0085; - - - constexpr float xerr_barrel_l1[] = {0.00115, 0.00120, 0.00088}; - constexpr float xerr_barrel_l1_def = 0.01030; - constexpr float yerr_barrel_l1[] = {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - constexpr float yerr_barrel_l1_def=0.00210; - constexpr float xerr_barrel_ln[]= {0.00115, 0.00120, 0.00088}; - constexpr float xerr_barrel_ln_def=0.01030; - constexpr float yerr_barrel_ln[]= {0.00375,0.00230,0.00250,0.00250,0.00230,0.00230,0.00210,0.00210,0.00240}; - constexpr float yerr_barrel_ln_def=0.00210; - constexpr float xerr_endcap[]= {0.0020, 0.0020}; - constexpr float xerr_endcap_def=0.0020; - constexpr float yerr_endcap[]= {0.00210}; - constexpr float yerr_endcap_def=0.00210; - - // is edgy? - bool isEdgeX = cp.minRow[ic]==0 || cp.maxRow[ic]==phase1PixelTopology::lastRowInModule; - bool isEdgeY = cp.minCol[ic]==0 || cp.maxCol[ic]==phase1PixelTopology::lastColInModule; - - if (!isEdgeX) { - auto sx = cp.maxRow[ic]-cp.minRow[ic]; - if (!detParams.isBarrel ) { - cp.xerr[ic] = sx // CMSSW headers -#include "EventFilter/SiPixelRawToDigi/plugins/RawToDigiGPU.h" +#include "EventFilter/SiPixelRawToDigi/plugins/SiPixelRawToDigiGPUKernel.h" #include "HeterogeneousCore/CUDAUtilities/interface/cudaCheck.h" #include "RecoLocalTracker/SiPixelClusterizer/plugins/gpuClustering.h" #include "PixelRecHits.h" @@ -35,7 +35,7 @@ HitsOnGPU allocHitsOnGPU() { HitsOnCPU pixelRecHits_wrapper( - context const & c, + pixelgpudetails::context const & c, pixelCPEforGPU::ParamsOnGPU const * cpeParams, uint32_t ndigis, uint32_t nModules, // active modules (with digis) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h index 75976ad697a1a..9126ab1c8abd1 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h +++ b/RecoLocalTracker/SiPixelRecHits/plugins/PixelRecHits.h @@ -8,7 +8,9 @@ namespace pixelCPEforGPU { struct ParamsOnGPU; } -struct context; +namespace pixelgpudetails { + struct context; +} struct HitsOnGPU{ uint32_t * hitsModuleStart_d; @@ -32,7 +34,7 @@ struct HitsOnCPU { HitsOnGPU allocHitsOnGPU(); HitsOnCPU pixelRecHits_wrapper( - context const & c, + pixelgpudetails::context const & c, pixelCPEforGPU::ParamsOnGPU const * cpeParams, uint32_t ndigis, uint32_t nModules, // active modules (with digis) diff --git a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitGPU.cc b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitGPU.cc index aa9c2751c3848..d2ad7d5cc2806 100644 --- a/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitGPU.cc +++ b/RecoLocalTracker/SiPixelRecHits/plugins/SiPixelRecHitGPU.cc @@ -136,7 +136,7 @@ using namespace std; // Step B*: create CPE edm::ESHandle hCPE; std::string cpeName_ = conf_.getParameter("CPE"); - es.get().get(cpeName_,hCPE); + es.get().get(cpeName_, hCPE); cpe_ = dynamic_cast< const PixelCPEBase* >(&(*hCPE)); /// do it on GPU.... @@ -152,14 +152,14 @@ using namespace std; } assert(fcpe->d_paramsOnGPU); - auto hoc = pixelRecHits_wrapper(* (context const *)(gprod[0]),fcpe->d_paramsOnGPU,gprod[1],gprod[2], hitsOnGPU_); + auto hoc = pixelRecHits_wrapper(* (pixelgpudetails::context const *)(gprod[0]), fcpe->d_paramsOnGPU, gprod[1], gprod[2], hitsOnGPU_); // Step C: Iterate over DetIds and invoke the strip CPE algorithm // on each DetUnit // std::cout << "Number of Clusers on CPU " << (*input).data().size() << std::endl; - run( input, *output, geom,hoc ); + run( input, *output, geom, hoc ); // std::cout << "Number of Hits on CPU " << (*output).data().size() << std::endl; output->shrink_to_fit(); @@ -192,7 +192,7 @@ using namespace std; auto gind = genericDet->index(); const PixelGeomDetUnit * pixDet = dynamic_cast(genericDet); assert(pixDet); - SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output,detid); + SiPixelRecHitCollectionNew::FastFiller recHitsOnDetUnit(output, detid); auto fc = hoc.hitsModuleStart[gind]; auto lc = hoc.hitsModuleStart[gind+1]; auto nhits = lc-fc; @@ -201,10 +201,10 @@ using namespace std; uint32_t ngh=0; for (uint32_t i=0; i=96 && hoc.charge[fc+i]<4000) ) continue; - ind[ngh]=i;std::push_heap(ind,ind+ngh+1,[&](auto a, auto b) { return mrp[a]size()); for (auto const & clust : *DSViter) { @@ -233,8 +233,8 @@ using namespace std; << gind <<'/'<=MaxClusInModule); - assert(nclus<=MaxClusInModule); + assert(blockDim.x >= MaxClusInModule); + assert(nclus <= MaxClusInModule); auto ic = threadIdx.x; - - if (ic::max(); clusParams.maxRow[ic] = 0; clusParams.minCol[ic] = std::numeric_limits::max(); clusParams.maxCol[ic] = 0; - clusParams.charge[ic] = 0; - clusParams.Q_f_X[ic] = 0; clusParams.Q_l_X[ic] = 0; clusParams.Q_f_Y[ic] = 0; clusParams.Q_l_Y[ic] = 0; } - - first+=threadIdx.x; - - __syncthreads(); + first += threadIdx.x; + __syncthreads(); // one thead per "digi" - - for (int i=first; i=nclus) return; + if (ic >= nclus) return; first = hitsModuleStart[me]; auto h = first+ic; // output index in global memory - assert(h<2000*256); + assert(h < 2000*256); + + pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); + pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams, ic); - pixelCPEforGPU::position(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); - pixelCPEforGPU::error(cpeParams->commonParams(), cpeParams->detParams(me), clusParams,ic); - chargeh[h] = clusParams.charge[ic]; - if (local) { - xh[h]= clusParams.xpos[ic]; - yh[h]= clusParams.ypos[ic]; + if (local) { + xh[h] = clusParams.xpos[ic]; + yh[h] = clusParams.ypos[ic]; } else { - cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic],clusParams.ypos[ic], - xh[h],yh[h],zh[h] - ); + cpeParams->detParams(me).frame.toGlobal(clusParams.xpos[ic], clusParams.ypos[ic], + xh[h], yh[h], zh[h] ); } - xe[h]= clusParams.xerr[ic]; - ye[h]= clusParams.yerr[ic]; - mr[h]= clusParams.minRow[ic]; + xe[h] = clusParams.xerr[ic]; + ye[h] = clusParams.yerr[ic]; + mr[h] = clusParams.minRow[ic]; } }