From 440c08a43531430246adccaf75b6b1fe2c728d8f Mon Sep 17 00:00:00 2001 From: Piotr Jarosik Date: Sun, 3 Apr 2022 15:04:38 +0200 Subject: [PATCH] Fixed TX apodization for CUDA kernel for 3D. --- api/python/arrus/utils/imaging.py | 107 +++++++++++++--------- api/python/arrus/utils/iq_raw_2_lri_3d.cu | 31 ++++--- 2 files changed, 83 insertions(+), 55 deletions(-) diff --git a/api/python/arrus/utils/imaging.py b/api/python/arrus/utils/imaging.py index 8482099f4..bb872dada 100644 --- a/api/python/arrus/utils/imaging.py +++ b/api/python/arrus/utils/imaging.py @@ -2175,10 +2175,7 @@ class ReconstructLri3D(Operation): """ def __init__(self, x_grid, y_grid, z_grid, tx_foc, tx_ang_zx, tx_ang_zy, - tx_ap_cent_x, tx_ap_cent_y, speed_of_sound, rx_tang_limits=None): - self.tx_ap_cent_y = tx_ap_cent_y - self.tx_ap_cent_x = tx_ap_cent_x self.tx_ang_zy = tx_ang_zy self.tx_ang_zx = tx_ang_zx self.tx_foc = tx_foc @@ -2194,6 +2191,17 @@ def set_pkgs(self, num_pkg, **kwargs): if num_pkg is np: raise ValueError("ReconstructLri3D operation is implemented for GPU only.") + def _get_aperture_boundaries(self, apertures): + def get_min_max_x_y(ap): + cords = np.argwhere(ap) + y, x = zip(*cords) + return np.min(x), np.max(x), np.min(y), np.max(y) + min_max_x_y = (get_min_max_x_y(aperture) for aperture in apertures) + min_x, max_x, min_y, max_y = zip(*min_max_x_y) + min_x, max_x = np.atleast_1d(min_x), np.atleast_1d(max_x) + min_y, max_y = np.atleast_1d(min_y), np.atleast_1d(max_y) + return min_x, max_x, min_y, max_y + def prepare(self, const_metadata): import cupy as cp @@ -2220,7 +2228,7 @@ def prepare(self, const_metadata): self.y_size = len(self.y_grid) self.x_size = len(self.x_grid) self.z_size = len(self.z_grid) - output_shape = (self.n_seq, self.n_tx, self.y_size, self.x_size, self.z_size) + output_shape = (self.n_seq, self.y_size, self.x_size, self.z_size) self.output_buffer = self.num_pkg.zeros(output_shape, dtype=self.num_pkg.complex64) x_block_size = min(self.z_size, 8) y_block_size = min(self.x_size, 8) @@ -2242,26 +2250,20 @@ def prepare(self, const_metadata): self.tx_ang_zx = self.num_pkg.asarray(self.tx_ang_zx).astype(np.float32) self.tx_ang_zy = self.num_pkg.asarray(self.tx_ang_zy).astype(np.float32) - # Could be determined automatically based on - self.tx_ap_cent_x = self.num_pkg.asarray(self.tx_ap_cent_x).astype(np.float32) - self.tx_ap_cent_y = self.num_pkg.asarray(self.tx_ap_cent_y).astype(np.float32) - # Probe description # TODO specific for Vermon mat-3d probe. probe_model = const_metadata.context.device.probe.model pitch = 0.3e-3 # probe_model.pitch self.n_elements = 32 + n_rows_x = self.n_elements + n_rows_y = self.n_elements + 3 # General regular position of elements - element_pos = np.arange(-(self.n_elements - 1)/2, self.n_elements/2) - element_pos_x = element_pos*pitch - element_pos_y = np.zeros_like(element_pos) - # E.g. rows - # 0:8 -> 0:8 - # 8:16 -> 9:17 (+ 1 offset) - # 16:24 -> 18:26 (+2 offset) - # 24:32 -> 27:35 (+ 3 offset) - for i in range(4): - element_pos_y[i*8:(i+1)*8] = (element_pos[i*8:(i+1)*8]+i)*pitch + element_pos_x = np.linspace(-(n_rows_x - 1)/2, (n_rows_x - 1)/2, num=n_rows_x) + element_pos_x = element_pos_x*pitch + + element_pos_y = np.linspace(-(n_rows_y - 1)/2, (n_rows_y - 1)/2, num=n_rows_y) + element_pos_y = element_pos_y*pitch + element_pos_y = np.delete(element_pos_y, (8, 17, 26)) element_pos_x = element_pos_x.astype(np.float32) element_pos_y = element_pos_y.astype(np.float32) @@ -2282,24 +2284,44 @@ def get_min_max_x_y(aperture): return np.min(x), np.max(x), np.min(y), np.max(y) # TODO assumption, that probe has the same number elements in both dimensions - apertures = (tx_rx.tx.aperture.reshape((self.n_elements, self.n_elements)) for tx_rx in seq.ops) - min_max_x_y = (get_min_max_x_y(aperture) for aperture in apertures) - min_x, max_x, min_y, max_y = zip(*min_max_x_y) - min_x, max_x = np.atleast_1d(min_x), np.atleast_1d(max_x) - min_y, max_y = np.atleast_1d(min_y), np.atleast_1d(max_y) - self.tx_ap_first_elem_x = self.num_pkg.asarray(min_x, dtype=self.num_pkg.int32) - self.tx_ap_last_elem_x = self.num_pkg.asarray(max_x, dtype=self.num_pkg.int32) - self.tx_ap_first_elem_y = self.num_pkg.asarray(min_y, dtype=self.num_pkg.int32) - self.tx_ap_last_elem_y = self.num_pkg.asarray(max_y, dtype=self.num_pkg.int32) - - # Find the tx_center_delay + tx_apertures = (tx_rx.tx.aperture.reshape((self.n_elements, self.n_elements)) for tx_rx in seq.ops) + rx_apertures = (tx_rx.rx.aperture.reshape((self.n_elements, self.n_elements)) for tx_rx in seq.ops) + tx_bounds = self._get_aperture_boundaries(tx_apertures) + rx_bounds = self._get_aperture_boundaries(rx_apertures) + txap_min_x, txap_max_x, txap_min_y, txap_max_y = tx_bounds + rxap_min_x, rxap_max_x, rxap_min_y, rxap_max_y = rx_bounds + rxap_size_x = set((rxap_max_x-rxap_min_x).tolist()) + rxap_size_y = set((rxap_max_y-rxap_min_y).tolist()) + if len(rxap_size_x) > 1 or len(rxap_size_y) > 1: + raise ValueError("Each TX/RX aperture should have the same square aperture size.") + rxap_size_x = next(iter(rxap_size_x)) + rxap_size_y = next(iter(rxap_size_y)) + # The above can be also compared with the size of data, but right we are not doing it here + + self.tx_ap_first_elem_x = self.num_pkg.asarray(txap_min_x, dtype=self.num_pkg.int32) + self.tx_ap_last_elem_x = self.num_pkg.asarray(txap_max_x, dtype=self.num_pkg.int32) + self.tx_ap_first_elem_y = self.num_pkg.asarray(txap_min_y, dtype=self.num_pkg.int32) + self.tx_ap_last_elem_y = self.num_pkg.asarray(txap_max_y, dtype=self.num_pkg.int32) + # RX AP + self.rx_ap_first_elem_x = self.num_pkg.asarray(rxap_min_x, dtype=self.num_pkg.int32) + self.rx_ap_first_elem_y = self.num_pkg.asarray(rxap_min_y, dtype=self.num_pkg.int32) + + # Find the center of TX aperture. + # TODO note: this method assumes that all TX/RXs have a rectangle TX aperture # 1. Find the position of the center. - ap_center_x = (element_pos_x[min_x] + element_pos_x[max_x])/2 - ap_center_y = (element_pos_y[min_y] + element_pos_y[max_y])/2 - ap_center_elem_x = np.interp(ap_center_x, element_pos_x, np.arange(len(element_pos_x))) - ap_center_elem_y = np.interp(ap_center_y, element_pos_y, np.arange(len(element_pos_y))) - ap_center_elem_x = np.round(ap_center_elem_x).astype(np.int32) - ap_center_elem_y = np.round(ap_center_elem_y).astype(np.int32) + tx_ap_center_x = (element_pos_x[txap_min_x] + element_pos_x[txap_max_x])/2 + tx_ap_center_y = (element_pos_y[txap_min_y] + element_pos_y[txap_max_y])/2 + # element index -> element position + ap_center_elem_x = np.interp(tx_ap_center_x, element_pos_x, np.arange(len(element_pos_x))) + ap_center_elem_y = np.interp(tx_ap_center_y, element_pos_y, np.arange(len(element_pos_y))) + # TODO Currently 'floor' NN, consider interpolating into + # the center delay + ap_center_elem_x = np.floor(ap_center_elem_x).astype(np.int32) + ap_center_elem_y = np.floor(ap_center_elem_y).astype(np.int32) + self.tx_ap_cent_x = self.num_pkg.asarray(tx_ap_center_x).astype(np.float32) + self.tx_ap_cent_y = self.num_pkg.asarray(tx_ap_center_y).astype(np.float32) + + # FIND THE TX_CENTER_DELAY # Make sure, that for all TX/RXs: # The center element is in the aperture # All TX/RX have (almost) the same delay in the aperture's center @@ -2309,15 +2331,16 @@ def get_min_max_x_y(aperture): tx_center_x = ap_center_elem_x[i] tx_center_y = ap_center_elem_y[i] aperture = tx.aperture.reshape((self.n_elements, self.n_elements)) - # TODO the below will work only for full aperture transmits - delays = tx.delays.reshape((self.n_elements, self.n_elements)) + delays = np.zeros(aperture.shape) + delays[:] = np.nan + delays[np.where(aperture)] = tx.delays.flatten() if not aperture[tx_center_y, tx_center_x]: # The aperture's center should transmit signal raise ValueError("TX aperture center should be turned on.") if tx_center_delay is None: tx_center_delay = delays[tx_center_y, tx_center_x] else: - # Make sure that the center' delays is the same for all TX/RXs + # Make sure that the center' delays is the same position for all TX/RXs current_center_delay = delays[tx_center_y, tx_center_x] if not np.isclose(tx_center_delay, current_center_delay): raise ValueError(f"TX/RX {i}: center delay is not close " @@ -2327,7 +2350,7 @@ def get_min_max_x_y(aperture): f"Center delays should be equalized " f"for all TX/RXs. ") - # Min/max tang + # MIN/MAX TANG if self.rx_tang_limits is not None: self.min_tang, self.max_tang = self.rx_tang_limits else: @@ -2338,7 +2361,6 @@ def get_min_max_x_y(aperture): burst_factor = ref_tx.excitation.n_periods / (2*self.fn) self.initial_delay = -start_sample/65e6+burst_factor+tx_center_delay self.initial_delay = self.num_pkg.float32(self.initial_delay) - self.rx_apod = scipy.signal.windows.hamming(20).astype(np.float32) self.rx_apod = self.num_pkg.asarray(self.rx_apod) self.n_rx_apod = self.num_pkg.int32(len(self.rx_apod)) @@ -2361,7 +2383,8 @@ def process(self, data): self.min_tang, self.max_tang, self.min_tang, self.max_tang, self.initial_delay, - self.rx_apod, self.n_rx_apod + self.rx_apod, self.n_rx_apod, + self.rx_ap_first_elem_x, self.rx_ap_first_elem_y ) self._kernel(self.grid_size, self.block_size, params) - return self.output_buffer \ No newline at end of file + return self.output_buffer diff --git a/api/python/arrus/utils/iq_raw_2_lri_3d.cu b/api/python/arrus/utils/iq_raw_2_lri_3d.cu index 742c7a699..4341275a7 100644 --- a/api/python/arrus/utils/iq_raw_2_lri_3d.cu +++ b/api/python/arrus/utils/iq_raw_2_lri_3d.cu @@ -42,8 +42,11 @@ __global__ void iqRaw2Lri3D(complex *iqLri, const complex *input, const float minRxTangZX, const float maxRxTangZX, const float minRxTangZY, const float maxRxTangZY, const float initDel, - const float *rxApod, const int nRxApod) { - + const float *rxApod, const int nRxApod, + // list of positions (x, y) of the first element of rx aperture + // (assuming rectangle aperture, the first element is the one in the top left corner) + const int *rxApFstElemX, const int *rxApFstElemY +) { int z = blockIdx.x * blockDim.x + threadIdx.x; int x = blockIdx.y * blockDim.y + threadIdx.y; int y = blockIdx.z * blockDim.z + threadIdx.z; @@ -97,11 +100,11 @@ __global__ void iqRaw2Lri3D(complex *iqLri, const complex *input, // to determine if the pixel is in the sonified area (dot product >= 0). // Foc-ApEdgeFst vector is rotated left, Foc-ApEdgeLst vector is rotated right. txApod = ( - ((-(zPix[z] - zFoc)*(xElemConst[txApFstElemX[iTx]] - xFoc) - (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) && - ( ((zPix[z] - zFoc)*(xElemConst[txApLstElemX[iTx]] - xFoc) + (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) && - ((-(zPix[z] - zFoc)*(yElemConst[txApFstElemY[iTx]] - yFoc) - (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f) && - (( (zPix[z] - zFoc)*(yElemConst[txApLstElemY[iTx]] - yFoc) + (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f) - ) ? 1.f : 0.f; + ((-(zPix[z] - zFoc)*(xElemConst[txApFstElemX[iTx]] - xFoc) - (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) && + ( ((zPix[z] - zFoc)*(xElemConst[txApLstElemX[iTx]] - xFoc) + (xPix[x] - xFoc)*zFoc) * pixFocArrang >= 0.f) && + ((-(zPix[z] - zFoc)*(yElemConst[txApFstElemY[iTx]] - yFoc) - (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f) && + (( (zPix[z] - zFoc)*(yElemConst[txApLstElemY[iTx]] - yFoc) + (yPix[y] - yFoc)*zFoc) * pixFocArrang >= 0.f) + ) ? 1.f : 0.f; } else { /* PWI */ @@ -111,9 +114,9 @@ __global__ void iqRaw2Lri3D(complex *iqLri, const complex *input, // to determine if the pixel is in the sonified area (dot product >= 0). // For ApEdgeFst, the vector is rotated left, for ApEdgeLst the vector is rotated right. txApod = (((-sinf(txAngZX[iTx])*zPix[z] + cosf(txAngZX[iTx]) * (xPix[x]-xElemConst[txApFstElemX[iTx]])) >= 0.f) && - ( (sinf(txAngZX[iTx])*zPix[z] - cosf(txAngZX[iTx]) * (xPix[x]-xElemConst[txApLstElemX[iTx]])) >= 0.f) && - ( (-sinf(txAngZY[iTx])*zPix[z] + cosf(txAngZY[iTx]) * (yPix[x]-yElemConst[txApFstElemY[iTx]])) >= 0.f) && - ( (sinf(txAngZY[iTx])*zPix[z] - cosf(txAngZY[iTx]) * (yPix[x]-yElemConst[txApLstElemY[iTx]])) >= 0.f )) ? 1.f : 0.f; + ( (sinf(txAngZX[iTx])*zPix[z] - cosf(txAngZX[iTx]) * (xPix[x]-xElemConst[txApLstElemX[iTx]])) >= 0.f) && + ( (-sinf(txAngZY[iTx])*zPix[z] + cosf(txAngZY[iTx]) * (yPix[y]-yElemConst[txApFstElemY[iTx]])) >= 0.f) && + ( (sinf(txAngZY[iTx])*zPix[z] - cosf(txAngZY[iTx]) * (yPix[y]-yElemConst[txApLstElemY[iTx]])) >= 0.f )) ? 1.f : 0.f; } pix.real(0.0f); @@ -122,15 +125,17 @@ __global__ void iqRaw2Lri3D(complex *iqLri, const complex *input, if (txApod != 0.f) { for (int iElemY = 0; iElemY < nElemY; ++iElemY) { - rxTang = (yPix[y]-yElemConst[iElemY])*zDistInv; + int rxFstElemY = iElemY+rxApFstElemY[iTx]; // global, the position in the full probe's aperture + rxTang = (yPix[y]-yElemConst[rxFstElemY])*zDistInv; if (rxTang < minRxTangZY || rxTang > maxRxTangZY) continue; rxApodY = interpLinearNormalized(rxApod, nRxApod, (rxTang-minRxTangZY)*rngRxTangZYInv); for (int iElemX = 0; iElemX < nElemX; ++iElemX) { + int rxFstElemX = iElemX+rxApFstElemX[iTx]; offset = iTx*nElemY*nElemX*nSamp + iElemY*nElemX*nSamp + iElemX*nSamp; - rxTang = (xPix[x] - xElemConst[iElemX])*zDistInv; + rxTang = (xPix[x] - xElemConst[rxFstElemX])*zDistInv; if (rxTang < minRxTangZX || rxTang > maxRxTangZX) continue; rxApodX = interpLinearNormalized(rxApod, nRxApod, (rxTang-minRxTangZX)*rngRxTangZXInv); - rxDist = ownHypotf(zPix[z], xPix[x] - xElemConst[iElemX], yPix[y] - yElemConst[iElemY]); + rxDist = ownHypotf(zPix[z], xPix[x] - xElemConst[rxFstElemX], yPix[y] - yElemConst[rxFstElemY]); time = (txDist+rxDist)*sosInv + initDel; iSamp = time*fs; if (iSamp < 0 || iSamp > static_cast(nSamp-1)) continue;