From 44f31f4120279ff0141979715592138411ab5778 Mon Sep 17 00:00:00 2001 From: Piotr Jarosik Date: Fri, 5 Nov 2021 11:21:39 +0100 Subject: [PATCH 1/3] Increased the maximum voltage for esaote probes to 90 V. --- arrus/cfg/default.dict | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arrus/cfg/default.dict b/arrus/cfg/default.dict index c54058af4..ae46f3409 100644 --- a/arrus/cfg/default.dict +++ b/arrus/cfg/default.dict @@ -358,7 +358,7 @@ probe_models: [ }, voltage_range: { begin: 0, - end: 75 + end: 90 } }, { @@ -375,7 +375,7 @@ probe_models: [ }, voltage_range: { begin: 0, - end: 75 + end: 90 } }, { @@ -391,7 +391,7 @@ probe_models: [ }, voltage_range: { begin: 0, - end: 75 + end: 90 } }, { @@ -407,7 +407,7 @@ probe_models: [ }, voltage_range: { begin: 0, - end: 75 + end: 90 } }, { From 02d71df25ae921fa73410ddcc2ad49744c2e5187 Mon Sep 17 00:00:00 2001 From: Piotr Jarosik Date: Fri, 5 Nov 2021 11:26:37 +0100 Subject: [PATCH 2/3] Fixed temporary variable cleanup in iq 2 lri CUDA kernel reconstruction. --- api/python/arrus/utils/iq_raw_2_lri.cu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/python/arrus/utils/iq_raw_2_lri.cu b/api/python/arrus/utils/iq_raw_2_lri.cu index 81e6c0573..4b5bcdc28 100644 --- a/api/python/arrus/utils/iq_raw_2_lri.cu +++ b/api/python/arrus/utils/iq_raw_2_lri.cu @@ -84,6 +84,8 @@ iqRaw2Lri(complex *iqLri, const complex *iqRaw, (xPix[x] - xElem[txApLstElem[iTx]])*cosf(txAngZX[iTx])) >= 0.f)) ? 1.f : 0.f; } pixWgh = 0.0f; + pix.real(0.0f); + pix.imag(0.0f); if (txApod != 0.0f) { for (int iRx = 0; iRx < nRx; iRx++) { From 906f931fa60e0a32f05823ae8fcd4651fbea2cec Mon Sep 17 00:00:00 2001 From: Piotr Jarosik Date: Fri, 12 Nov 2021 09:46:01 +0100 Subject: [PATCH 3/3] Improved iq raw 2 LRI performance. Moved the probe geometry to constant memory, changed kernel grid of threads dimensions. --- api/python/arrus/utils/imaging.py | 47 +++++-- api/python/arrus/utils/iq_raw_2_lri.cu | 174 ++++++++++++------------- 2 files changed, 122 insertions(+), 99 deletions(-) diff --git a/api/python/arrus/utils/imaging.py b/api/python/arrus/utils/imaging.py index 33a006ac5..5d63244e3 100644 --- a/api/python/arrus/utils/imaging.py +++ b/api/python/arrus/utils/imaging.py @@ -1435,18 +1435,21 @@ def __init__(self, x_grid, z_grid, rx_tang_limits=None): def set_pkgs(self, num_pkg, **kwargs): if num_pkg is np: - raise ValueError("Currently reconstructLri operation is " - "implemented for GPU only.") + raise ValueError("ReconstructLri operation is implemented for GPU only.") def prepare(self, const_metadata): from pathlib import Path import os + import cupy as cp + current_dir = os.path.dirname(os.path.join(os.path.abspath(__file__))) _kernel_source = Path(os.path.join(current_dir, "iq_raw_2_lri.cu")).read_text() - self._kernel = self.num_pkg.RawKernel(_kernel_source, "iqRaw2Lri") + self._kernel_module = self.num_pkg.RawModule(code=_kernel_source) + self._kernel = self._kernel_module.get_function("iqRaw2Lri") + self._z_elem_const = self._kernel_module.get_global("zElemConst") + self._tang_elem_const = self._kernel_module.get_global("tangElemConst") # INPUT PARAMETERS. - # Input data shape. self.n_tx, self.n_rx, self.n_samples = const_metadata.input_shape @@ -1461,10 +1464,13 @@ def prepare(self, const_metadata): self.output_buffer = self.num_pkg.zeros(output_shape, dtype=self.num_pkg.complex64) x_block_size = min(self.x_size, 16) z_block_size = min(self.z_size, 16) - self.block_size = (z_block_size, x_block_size, 1) + tx_block_size = min(self.n_tx, 4) + self.block_size = (z_block_size, x_block_size, tx_block_size) self.grid_size = (int((self.z_size-1)//z_block_size + 1), int((self.x_size-1)//x_block_size + 1), - self.n_tx) + int((self.n_tx-1)//tx_block_size + 1)) + print(self.block_size) + print(self.grid_size) self.x_pix = self.num_pkg.asarray(self.x_grid, dtype=self.num_pkg.float32) self.z_pix = self.num_pkg.asarray(self.z_grid, dtype=self.num_pkg.float32) @@ -1478,11 +1484,23 @@ def prepare(self, const_metadata): element_pos_x = probe_model.element_pos_x element_pos_z = probe_model.element_pos_z element_angle_tang = np.tan(probe_model.element_angle) - self.x_elem = self.num_pkg.asarray(element_pos_x, dtype=self.num_pkg.float32) - self.z_elem = self.num_pkg.asarray(element_pos_z, dtype=self.num_pkg.float32) - self.tang_elem = self.num_pkg.asarray(element_angle_tang, dtype=self.num_pkg.float32) + self.n_elements = probe_model.n_elements + device_props = cp.cuda.runtime.getDeviceProperties(0) + if device_props["totalConstMem"] < 256*3*4: # 3 float32 arrays, 256 elements max + raise ValueError("There is not enough constant memory available!") + + x_elem = np.asarray(element_pos_x, dtype=self.num_pkg.float32) + self._x_elem_const = _get_const_memory_array( + self._kernel_module, name="xElemConst", input_array=x_elem) + z_elem = np.asarray(element_pos_z, dtype=self.num_pkg.float32) + self._z_elem_const = _get_const_memory_array( + self._kernel_module, name="zElemConst", input_array=z_elem) + tang_elem = np.asarray(element_angle_tang, dtype=self.num_pkg.float32) + self._tang_elem_const = _get_const_memory_array( + self._kernel_module, name="tangElemConst", input_array=tang_elem) + # TX aperture description # Convert the sequence to the positions of the aperture centers tx_rx_params = arrus.kernels.imaging.compute_tx_rx_params( @@ -1530,8 +1548,7 @@ def process(self, data): params = ( self.output_buffer, data, - self.x_elem, self.z_elem, self.tang_elem, self.n_elements, # DONE - self.n_tx, self.n_samples, + self.n_elements, self.n_tx, self.n_samples, self.z_pix, self.z_size, self.x_pix, self.x_size, self.sos, self.fs, self.fn, @@ -1757,3 +1774,11 @@ def process(self, data): self._remap_fn(data) return self._output_buffer + +def _get_const_memory_array(module, name, input_array): + import cupy as cp + const_arr_ptr = module.get_global(name) + const_arr = cp.ndarray(shape=input_array.shape, dtype=input_array.dtype, + memptr=const_arr_ptr) + const_arr.set(input_array) + return const_arr \ No newline at end of file diff --git a/api/python/arrus/utils/iq_raw_2_lri.cu b/api/python/arrus/utils/iq_raw_2_lri.cu index 4b5bcdc28..45bfed761 100644 --- a/api/python/arrus/utils/iq_raw_2_lri.cu +++ b/api/python/arrus/utils/iq_raw_2_lri.cu @@ -2,11 +2,14 @@ #define CUDART_PI_F 3.141592654f +__constant__ float zElemConst[256]; +__constant__ float xElemConst[256]; +__constant__ float tangElemConst[256]; + extern "C" __global__ void iqRaw2Lri(complex *iqLri, const complex *iqRaw, - const float *xElem, const float *zElem, const float *tangElem, const int nElem, - const int nTx, const int nSamp, + const int nElem, const int nTx, const int nSamp, const float *zPix, const int nZPix, const float *xPix, const int nXPix, float const sos, float const fs, float const fn, @@ -19,8 +22,9 @@ iqRaw2Lri(complex *iqLri, const complex *iqRaw, int z = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.y * blockDim.y + threadIdx.y; + int iTx = blockIdx.z * blockDim.z + threadIdx.z; - if (z >= nZPix || x >= nXPix) { + if(z >= nZPix || x >= nXPix || iTx >= nTx) { return; } @@ -36,93 +40,87 @@ iqRaw2Lri(complex *iqLri, const complex *iqRaw, const float centRxTang = (maxRxTang + minRxTang) * 0.5f; complex pix(0.0f, 0.0f), samp(0.0f, 0.0f), modFactor; - for (int iTx = 0; iTx < nTx; ++iTx) { - int txOffset = iTx*nSamp*nRx; - - if (!isinf(txFoc[iTx])) { - /* STA */ - float zFoc = txApCentZ[iTx] + txFoc[iTx]*cosf(txAngZX[iTx]); - float xFoc = txApCentX[iTx] + txFoc[iTx]*sinf(txAngZX[iTx]); - - float pixFocArrang; - - if (txFoc[iTx] <= 0.0f) { - /* Virtual Point Source BEHIND probe surface */ - // Valid pixels are assumed to be always in front of the focal point (VSP) - pixFocArrang = 1.0f; - } - else { - /* Virtual Point Source IN FRONT OF probe surface */ - // Projection of the Foc-Pix vector on the ApCent-Foc vector (dot product) ... - // to determine if the pixel is behind (-) or in front of (+) the focal point (VSP). - pixFocArrang = (((zPix[z]-zFoc)*(zFoc-txApCentZ[iTx]) + - (xPix[x]-xFoc)*(xFoc-txApCentX[iTx])) >= 0.f) ? 1.f : -1.f; - } - txDist = hypotf(zPix[z] - zFoc, xPix[x] - xFoc); - txDist *= pixFocArrang; // Compensation for the Pix-Foc arrangement - txDist += txFoc[iTx]; // Compensation for the reference time being the moment when txApCent fires. - - // Projections of Foc-Pix vector on the rotated Foc-ApEdge vectors (dot products) ... - // to determine if the pixel is in the sonified area (dot product >= 0). - // Foc-ApEdgeFst vector is rotated left, Foc-ApEdgeLst vector is rotated right. - txApod = ( ( (-(xElem[txApFstElem[iTx]] - xFoc)*(zPix[z] - zFoc) + - (zElem[txApFstElem[iTx]] - zFoc)*(xPix[x] - xFoc))*pixFocArrang >= 0.f ) && - ( ( (xElem[txApLstElem[iTx]] - xFoc)*(zPix[z] - zFoc) - - (zElem[txApLstElem[iTx]] - zFoc)*(xPix[x] - xFoc))*pixFocArrang >= 0.f ) ) ? 1.f : 0.f; - } - else { - /* PWI */ - txDist = (zPix[z] - txApCentZ[iTx]) * cosf(txAngZX[iTx]) + - (xPix[x] - txApCentX[iTx]) * sinf(txAngZX[iTx]); - - // Projections of ApEdge-Pix vector on the rotated unit vector of tx direction (dot products) ... - // to determine if the pixel is in the sonified area (dot product >= 0). - // For ApEdgeFst, the vector is rotated left, for ApEdgeLst the vector is rotated right. - txApod = (((-(zPix[z] - zElem[txApFstElem[iTx]])*sinf(txAngZX[iTx]) + - (xPix[x] - xElem[txApFstElem[iTx]])*cosf(txAngZX[iTx])) >= 0.f) && - (((zPix[z] - zElem[txApLstElem[iTx]])*sinf(txAngZX[iTx]) - - (xPix[x] - xElem[txApLstElem[iTx]])*cosf(txAngZX[iTx])) >= 0.f)) ? 1.f : 0.f; + int txOffset = iTx * nSamp * nRx; + + if(!isinf(txFoc[iTx])) { + /* STA */ + float zFoc = txApCentZ[iTx] + txFoc[iTx] * cosf(txAngZX[iTx]); + float xFoc = txApCentX[iTx] + txFoc[iTx] * sinf(txAngZX[iTx]); + + float pixFocArrang; + + if(txFoc[iTx] <= 0.0f) { + /* Virtual Point Source BEHIND probe surface */ + // Valid pixels are assumed to be always in front of the focal point (VSP) + pixFocArrang = 1.0f; + } else { + /* Virtual Point Source IN FRONT OF probe surface */ + // Projection of the Foc-Pix vector on the ApCent-Foc vector (dot product) ... + // to determine if the pixel is behind (-) or in front of (+) the focal point (VSP). + pixFocArrang = (((zPix[z] - zFoc) * (zFoc - txApCentZ[iTx]) + + (xPix[x] - xFoc) * (xFoc - txApCentX[iTx])) >= 0.f) ? 1.f : -1.f; } - pixWgh = 0.0f; - pix.real(0.0f); - pix.imag(0.0f); - - if (txApod != 0.0f) { - for (int iRx = 0; iRx < nRx; iRx++) { - iElem = iRx + rxApOrigElem[iTx]; - if (iElem < 0 || iElem >= nElem) continue; - - rxDist = hypotf(xPix[x] - xElem[iElem], zPix[z] - zElem[iElem]); - rxTang = __fdividef(xPix[x] - xElem[iElem], zPix[z] - zElem[iElem]); - rxTang = __fdividef(rxTang - tangElem[iElem], 1.f + rxTang*tangElem[iElem]); - if (rxTang < minRxTang || rxTang > maxRxTang) continue; - - rxApod = (rxTang - centRxTang) * rngRxTangInv; - rxApod = __expf(-rxApod * rxApod * twoSigSqrInv); - - time = (txDist + rxDist)*sosInv + initDel; - iSamp = time * fs; - if (iSamp < 0.0f || iSamp >= static_cast(nSamp - 1)) { - continue; - } - offset = txOffset + iRx*nSamp; - interpWgh = modff(iSamp, &iSamp); - int intSamp = int(iSamp); - - __sincosf(omega*time, &modSin, &modCos); - complex modFactor = complex(modCos, modSin); - - samp = iqRaw[offset+intSamp]*(1-interpWgh) + iqRaw[offset+intSamp+1]*interpWgh; - pix += samp*modFactor*rxApod; - pixWgh += rxApod; + txDist = hypotf(zPix[z] - zFoc, xPix[x] - xFoc); + txDist *= pixFocArrang; // Compensation for the Pix-Foc arrangement + txDist += txFoc[iTx]; // Compensation for the reference time being the moment when txApCent fires. + + // Projections of Foc-Pix vector on the rotated Foc-ApEdge vectors (dot products) ... + // to determine if the pixel is in the sonified area (dot product >= 0). + // Foc-ApEdgeFst vector is rotated left, Foc-ApEdgeLst vector is rotated right. + txApod = (((-(xElemConst[txApFstElem[iTx]] - xFoc) * (zPix[z] - zFoc) + + (zElemConst[txApFstElem[iTx]] - zFoc) * (xPix[x] - xFoc)) * pixFocArrang >= 0.f) && + (((xElemConst[txApLstElem[iTx]] - xFoc) * (zPix[z] - zFoc) - + (zElemConst[txApLstElem[iTx]] - zFoc) * (xPix[x] - xFoc)) * pixFocArrang >= 0.f)) ? 1.f : 0.f; + } else { + /* PWI */ + txDist = (zPix[z] - txApCentZ[iTx]) * cosf(txAngZX[iTx]) + + (xPix[x] - txApCentX[iTx]) * sinf(txAngZX[iTx]); + + // Projections of ApEdge-Pix vector on the rotated unit vector of tx direction (dot products) ... + // to determine if the pixel is in the sonified area (dot product >= 0). + // For ApEdgeFst, the vector is rotated left, for ApEdgeLst the vector is rotated right. + txApod = (((-(zPix[z] - zElemConst[txApFstElem[iTx]]) * sinf(txAngZX[iTx]) + + (xPix[x] - xElemConst[txApFstElem[iTx]]) * cosf(txAngZX[iTx])) >= 0.f) && + (((zPix[z] - zElemConst[txApLstElem[iTx]]) * sinf(txAngZX[iTx]) - + (xPix[x] - xElemConst[txApLstElem[iTx]]) * cosf(txAngZX[iTx])) >= 0.f)) ? 1.f : 0.f; + } + pixWgh = 0.0f; + pix.real(0.0f); + pix.imag(0.0f); + + if(txApod != 0.0f) { + for(int iRx = 0; iRx < nRx; iRx++) { + iElem = iRx + rxApOrigElem[iTx]; + if(iElem < 0 || iElem >= nElem) continue; + + rxDist = hypotf(xPix[x] - xElemConst[iElem], zPix[z] - zElemConst[iElem]); + rxTang = __fdividef(xPix[x] - xElemConst[iElem], zPix[z] - zElemConst[iElem]); + rxTang = __fdividef(rxTang - tangElemConst[iElem], 1.f + rxTang * tangElemConst[iElem]); + if(rxTang < minRxTang || rxTang > maxRxTang) continue; + + rxApod = (rxTang - centRxTang) * rngRxTangInv; + rxApod = __expf(-rxApod * rxApod * twoSigSqrInv); + + time = (txDist + rxDist) * sosInv + initDel; + iSamp = time * fs; + if(iSamp < 0.0f || iSamp >= static_cast(nSamp - 1)) { + continue; } - } - if(pixWgh == 0.0f) { - iqLri[z + x*nZPix + iTx*nZPix*nXPix] = complex(0.0f, 0.0f); - } - else { - iqLri[z + x * nZPix + iTx * nZPix * nXPix] = pix/pixWgh*txApod; + offset = txOffset + iRx * nSamp; + interpWgh = modff(iSamp, &iSamp); + int intSamp = int(iSamp); + + __sincosf(omega * time, &modSin, &modCos); + complex modFactor = complex(modCos, modSin); + + samp = iqRaw[offset + intSamp] * (1 - interpWgh) + iqRaw[offset + intSamp + 1] * interpWgh; + pix += samp * modFactor * rxApod; + pixWgh += rxApod; } } - + if(pixWgh == 0.0f) { + iqLri[z + x * nZPix + iTx * nZPix * nXPix] = complex(0.0f, 0.0f); + } else { + iqLri[z + x * nZPix + iTx * nZPix * nXPix] = pix / pixWgh * txApod; + } } \ No newline at end of file