Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize zoom out implementation with separate padding kernel #125

Merged
merged 8 commits into from
Nov 21, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -169,74 +169,71 @@
output_tensor[(blockIdx.z * pitch) +
((out_pixel_h + out_h_start) * input_w) +
(out_pixel_w + out_w_start)] = sum_;
}
}

// replicate along top edge
if (out_pixel_h == 0) {
for (int ik = 0; ik < out_h_start; ik++)
output_tensor[(blockIdx.z * pitch) +
((out_pixel_h + ik) * input_w) +
(out_pixel_w + out_w_start)] = sum_;
}

// replicate along bottom edge
if (out_pixel_h == (output_h - 1)) {
for (int ik = 1; ik <= out_h_end; ik++)
output_tensor[(blockIdx.z * pitch) +
((out_h_start + out_pixel_h + ik) * input_w) +
(out_pixel_w + out_w_start)] = sum_;
}

// replicate along left edge
if (out_pixel_w == 0) {
for (int ik = 0; ik < out_w_start; ik++)
output_tensor[(blockIdx.z * pitch) +
((out_pixel_h + out_h_start) * input_w) + ik] = sum_;
}

// replicate along right edge
if (out_pixel_w == (output_w - 1)) {
for (int ik = 1; ik <= out_w_end; ik++)
output_tensor[(blockIdx.z * pitch) +
((out_pixel_h + out_h_start) * input_w) +
(out_pixel_w + out_w_start + ik)] = sum_;
}
__global__ void zoomout_edge_pad(float *output_tensor, int height, int width,
int pitch, int no_padding_h_start,
int no_padding_w_start,
int no_padding_h_end, int no_padding_w_end) {
// H -> block Y, row
// W -> block X, col

// corner replication not very friendly if large area to patch -
// single thread issues stores
// ToDo: Consider adding another kernel for corner padding
int out_pixel_h = blockIdx.y * blockDim.y + threadIdx.y;
int out_pixel_w = blockIdx.x * blockDim.x + threadIdx.x;
int image_start_offset = blockIdx.z * pitch;
jakirkham marked this conversation as resolved.
Show resolved Hide resolved

// top left corner
if (out_pixel_h == 0 && out_pixel_w == 0) {
for (int ik = 0; ik < out_h_start; ik++) {
for (int il = 0; il < out_w_start; il++)
output_tensor[(blockIdx.z * pitch) + (ik * input_w) + il] = sum_;
}
}
// top right corner
if (out_pixel_h == 0 && out_pixel_w == (output_w - 1)) {
for (int ik = 0; ik < out_h_start; ik++) {
for (int il = 1; il <= out_w_end; il++)
output_tensor[(blockIdx.z * pitch) + (ik * input_w) +
(out_pixel_w + out_w_start + il)] = sum_;
}
}
// bottom left corner
if (out_pixel_h == (output_h - 1) && out_pixel_w == 0) {
for (int ik = 1; ik <= out_h_end; ik++) {
for (int il = 0; il < out_w_start; il++)
output_tensor[(blockIdx.z * pitch) +
((out_h_start + out_pixel_h + ik) * input_w) +
il] = sum_;
}
}
// bottom right corner
if (out_pixel_h == (output_h - 1) && out_pixel_w == (output_w - 1)) {
for (int ik = 1; ik <= out_h_end; ik++) {
for (int il = 1; il <= out_w_end; il++)
output_tensor[(blockIdx.z * pitch) +
((out_h_start + out_pixel_h + ik) * input_w) +
(out_pixel_w + out_w_start + il)] = sum_;
}
// no_padding_h_end, no_padding_w_end --> w_cropped+wstart, same for height
int out_location = (blockIdx.z * pitch) + (out_pixel_h * width) + out_pixel_w;

if (out_pixel_h < height && out_pixel_w < width) {
if (out_pixel_h < no_padding_h_start && out_pixel_w >= no_padding_w_start
&& out_pixel_w < no_padding_w_end) {
// top pad
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
(no_padding_h_start * width) + out_pixel_w];
} else if (out_pixel_h >= no_padding_h_end
&& out_pixel_w >= no_padding_w_start
&& out_pixel_w < no_padding_w_end) {
// bottom pad
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
((no_padding_h_end-1) * width) + out_pixel_w];
} else if (out_pixel_w < no_padding_w_start
&& out_pixel_h >= no_padding_h_start
&& out_pixel_h < no_padding_h_end) {
// left pad
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
(out_pixel_h * width) + no_padding_w_start];
} else if (out_pixel_w >= no_padding_w_end
&& out_pixel_h >= no_padding_h_start
&& out_pixel_h < no_padding_h_end) {
// right pad
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
(out_pixel_h * width) + (no_padding_w_end-1)];
} else if (out_pixel_h < no_padding_h_start
&& out_pixel_w < no_padding_w_start) {
// top-left corner
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
(no_padding_h_start * width) +
no_padding_w_start];
} else if (out_pixel_h < no_padding_h_start
&& out_pixel_w >= no_padding_w_end) {
// top-right corner
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
(no_padding_h_start * width) +
(no_padding_w_end-1)];
} else if (out_pixel_h >= no_padding_h_end
&& out_pixel_w < no_padding_w_start) {
// bottom-left corner
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
((no_padding_h_end-1) * width) +
no_padding_w_start];
} else if (out_pixel_h >= no_padding_h_end
&& out_pixel_w >= no_padding_w_end) {
// bottom-right corner
output_tensor[out_location] = output_tensor[(blockIdx.z * pitch) +
((no_padding_h_end-1) * width) +
(no_padding_w_end-1)];
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,13 @@ def get_input_arr():
return arr


def get_zoomed_data():
def get_zoomed_data(zoomout=False):
dirname = os.path.dirname(__file__)
img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
if not zoomout:
img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
else:
img1 = Image.open(os.path.join(os.path.abspath(dirname),
"zoomout_padded.png"))
arr_o = np.asarray(img1)
arr_o = np.transpose(arr_o)
return arr_o
Expand Down Expand Up @@ -54,3 +58,21 @@ def test_rand_zoom_batchinput():

for i in range(np_output.shape[0]):
assert np.allclose(np_output[i], zoomed_arr)


def test_rand_zoomout_numpy_input():
arr = get_input_arr()
zoomed_arr = get_zoomed_data(True)
output = its.rand_zoom(arr, prob=1.0, min_zoom=0.85, max_zoom=0.85)
assert np.allclose(output, zoomed_arr)


def test_rand_zoomout_batchinput():
arr = get_input_arr()
zoomed_arr = get_zoomed_data(True)
arr_batch = np.stack((arr,) * 8, axis=0)
np_output = its.rand_zoom(arr_batch, prob=1.0, min_zoom=0.85, max_zoom=0.85)
assert np_output.shape[0] == 8

for i in range(np_output.shape[0]):
assert np.allclose(np_output[i], zoomed_arr)
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@ def get_input_arr():
return arr


def get_zoomed_data():
def get_zoomed_data(zoomout=False):
dirname = os.path.dirname(__file__)
img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
if not zoomout:
img1 = Image.open(os.path.join(os.path.abspath(dirname), "zoomed.png"))
else:
img1 = Image.open(os.path.join(os.path.abspath(dirname),
"zoomout_padded.png"))
arr_o = np.asarray(img1)
arr_o = np.transpose(arr_o)
return arr_o
Expand Down Expand Up @@ -59,3 +63,30 @@ def test_zoom_batchinput():

for i in range(np_output.shape[0]):
assert np.allclose(np_output[i], zoomed_arr)


def test_zoomout_numpy_input():
arr = get_input_arr()
zoomed_arr = get_zoomed_data(True)
output = its.zoom(arr, [0.85, 0.85])
assert np.allclose(output, zoomed_arr)


def test_zoomout_cupy_input():
arr = get_input_arr()
zoomed_arr = get_zoomed_data(True)
cupy_arr = cupy.asarray(arr)
cupy_output = its.zoom(cupy_arr, [0.85, 0.85])
np_output = cupy.asnumpy(cupy_output)
assert np.allclose(np_output, zoomed_arr)


def test_zoomout_batchinput():
arr = get_input_arr()
zoomed_arr = get_zoomed_data(True)
arr_batch = np.stack((arr,) * 8, axis=0)
np_output = its.zoom(arr_batch, [0.85, 0.85])
assert np_output.shape[0] == 8

for i in range(np_output.shape[0]):
assert np.allclose(np_output[i], zoomed_arr)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions python/cucim/src/cucim/core/operations/intensity/zoom.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,17 @@ def get_block_size(output_size_cu, H, W):
np.int32(pad_dims[1][0]),
np.int32(pad_dims[1][1])),
shared_mem=smem_size)
# padding kernel
kernel = CUDA_KERNELS.get_function("zoomout_edge_pad")
grid = (int((W - 1) / block_config[0] + 1),
int((H - 1) / block_config[1] + 1),
C * N)
kernel(grid, block_config,
args=(result, np.int32(H), np.int32(W), np.int32(pitch),
np.int32(pad_dims[0][0]), np.int32(pad_dims[1][0]),
np.int32(pad_dims[0][0] + output_size_cu[2]),
np.int32(pad_dims[1][0] + output_size_cu[3])))

else:
raise Exception("Can only handle simultaneous \
expansion(or shrinkage) in both H,W dimension, \
Expand Down