Skip to content

Commit

Permalink
plan: overwrite existing strides when setting data layout
Browse files Browse the repository at this point in the history
Reported-by: HJA Bird <hugh.bird@codeplay.com>
  • Loading branch information
evetsso authored Sep 4, 2024
1 parent d272efd commit 0d02305
Show file tree
Hide file tree
Showing 3 changed files with 93 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Documentation for rocFFT is available at
### Fixes

* Fixed incorrect results from 2-kernel 3D FFT plans that used non-default output strides.
* Allow plan descriptions to be reused with different strides for different plans.

## rocFFT 1.0.30 for ROCm 6.3.0

Expand Down
90 changes: 90 additions & 0 deletions clients/tests/unit_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,96 @@ TEST(rocfft_UnitTest, plan_description)
ASSERT_TRUE(rocfft_status_success == rocfft_plan_destroy(plan));
}

TEST(rocfft_UnitTest, plan_description_reuse)
{
// check that a plan description can be reused between different
// plans, with different layout parameters for each.

// allocate plan description once
rocfft_plan_description desc = nullptr;
ASSERT_EQ(rocfft_plan_description_create(&desc), rocfft_status_success);

std::vector<rocfft_complex<float>> output;

// do length-8 FFTs with different strides. first one is
// stride-1 and we use that as our baseline to know what output
// to expect for the rest
const size_t length = 8;
for(const size_t stride : {1, 2, 4})
{
// set layout for this stride
ASSERT_EQ(rocfft_plan_description_set_data_layout(desc,
rocfft_array_type_complex_interleaved,
rocfft_array_type_complex_interleaved,
nullptr,
nullptr,
1,
&stride,
length * stride,
1,
&stride,
length * stride),
rocfft_status_success);

static const rocfft_complex<float> input[8]{{-0.100, 0.380},
{0.0166, 0.439},
{-0.475, 0.212},
{0.440, -0.432},
{0.445, 0.0589},
{0.296, 0.164},
{-0.084, 0.077},
{0.320, 0.087}};

// allocate host buffer. initialize the whole thing to zero
// but set a known input along the strides we want
std::vector<rocfft_complex<float>> data_host(length * stride, {0.0, 0.0});
for(size_t i = 0; i < length; ++i)
{
data_host[i * stride] = input[i];
}

// copy to device
const size_t data_bytes = data_host.size() * sizeof(rocfft_complex<float>);
gpubuf_t<rocfft_complex<float>> data_dev;
ASSERT_EQ(data_dev.alloc(data_bytes), hipSuccess);
void* data_dev_ptr = data_dev.data();
ASSERT_EQ(hipMemcpy(data_dev_ptr, data_host.data(), data_bytes, hipMemcpyHostToDevice),
hipSuccess);

// do the transform
rocfft_plan plan = nullptr;
ASSERT_EQ(rocfft_plan_create(&plan,
rocfft_placement_inplace,
rocfft_transform_type_complex_forward,
rocfft_precision_single,
1,
&length,
1,
desc),
rocfft_status_success);
ASSERT_EQ(rocfft_execute(plan, &data_dev_ptr, nullptr, nullptr), rocfft_status_success);
ASSERT_EQ(hipMemcpy(data_host.data(), data_dev_ptr, data_bytes, hipMemcpyDeviceToHost),
hipSuccess);
ASSERT_EQ(hipDeviceSynchronize(), hipSuccess);

// save output for reference on first run
if(output.empty())
{
output = data_host;
}
else
{
// check that the output matches output from the first
// (stride-1) run.
for(size_t i = 0; i < length; ++i)
ASSERT_EQ(data_host[i * stride], output[i]);
}
ASSERT_EQ(rocfft_plan_destroy(plan), rocfft_status_success);
}

ASSERT_EQ(rocfft_plan_description_destroy(desc), rocfft_status_success);
}

// Check whether logs can be emitted from multiple threads properly
TEST(rocfft_UnitTest, log_multithreading)
{
Expand Down
2 changes: 2 additions & 0 deletions library/src/plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,7 @@ rocfft_status rocfft_plan_description_set_data_layout(rocfft_plan_description de

if(in_strides != nullptr)
{
description->inStrides.clear();
std::copy(
in_strides, in_strides + in_strides_size, std::back_inserter(description->inStrides));
}
Expand All @@ -431,6 +432,7 @@ rocfft_status rocfft_plan_description_set_data_layout(rocfft_plan_description de

if(out_strides != nullptr)
{
description->outStrides.clear();
std::copy(out_strides,
out_strides + out_strides_size,
std::back_inserter(description->outStrides));
Expand Down

0 comments on commit 0d02305

Please sign in to comment.