Skip to content

Commit 972f6ef

Browse files
committed
add placement new instead of assignment
Signed-off-by: Fedorov, Andrey <andrey.fedorov@intel.com>
1 parent 768d3a3 commit 972f6ef

File tree

3 files changed

+47
-20
lines changed

3 files changed

+47
-20
lines changed

sycl/include/CL/sycl/detail/group_sort_impl.hpp

Lines changed: 33 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,11 +66,22 @@ struct GetValueType<sycl::multi_ptr<ElementType, Space>> {
6666
using type = ElementType;
6767
};
6868

69+
// since we couldn't assign data to raw memory, it's better to use placement for
70+
// first assignment
71+
template <typename Acc, typename T>
72+
void set_value(Acc ptr, const std::size_t idx, const T &val, bool is_first) {
73+
if (is_first) {
74+
::new (ptr + idx) T(val);
75+
} else {
76+
ptr[idx] = val;
77+
}
78+
}
79+
6980
template <typename InAcc, typename OutAcc, typename Compare>
7081
void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
7182
const std::size_t start_1, const std::size_t end_1,
7283
const std::size_t end_2, const std::size_t start_out, Compare comp,
73-
const std::size_t chunk) {
84+
const std::size_t chunk, bool is_first) {
7485
const std::size_t start_2 = end_1;
7586
// Borders of the sequences to merge within this call
7687
const std::size_t local_start_1 =
@@ -98,7 +109,9 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
98109
const std::size_t l_shift_1 = local_start_1 - start_1;
99110
const std::size_t l_shift_2 = l_search_bound_2 - start_2;
100111

101-
out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_1;
112+
// out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_1;
113+
set_value(out_acc1, start_out + l_shift_1 + l_shift_2, local_l_item_1,
114+
is_first);
102115

103116
std::size_t r_search_bound_2{};
104117
// find right border in 2nd sequence
@@ -109,7 +122,9 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
109122
const auto r_shift_1 = local_end_1 - 1 - start_1;
110123
const auto r_shift_2 = r_search_bound_2 - start_2;
111124

112-
out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_1;
125+
// out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_1;
126+
set_value(out_acc1, start_out + r_shift_1 + r_shift_2, local_r_item_1,
127+
is_first);
113128
}
114129

115130
// Handle intermediate items
@@ -123,7 +138,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
123138
const std::size_t shift_1 = idx - start_1;
124139
const std::size_t shift_2 = l_search_bound_2 - start_2;
125140

126-
out_acc1[start_out + shift_1 + shift_2] = intermediate_item_1;
141+
set_value(out_acc1, start_out + shift_1 + shift_2, intermediate_item_1,
142+
is_first);
127143
}
128144
}
129145
// Process 2nd sequence
@@ -136,7 +152,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
136152
const std::size_t l_shift_1 = l_search_bound_1 - start_1;
137153
const std::size_t l_shift_2 = local_start_2 - start_2;
138154

139-
out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_2;
155+
set_value(out_acc1, start_out + l_shift_1 + l_shift_2, local_l_item_2,
156+
is_first);
140157

141158
std::size_t r_search_bound_1{};
142159
// find right border in 1st sequence
@@ -147,7 +164,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
147164
const std::size_t r_shift_1 = r_search_bound_1 - start_1;
148165
const std::size_t r_shift_2 = local_end_2 - 1 - start_2;
149166

150-
out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_2;
167+
set_value(out_acc1, start_out + r_shift_1 + r_shift_2, local_r_item_2,
168+
is_first);
151169
}
152170

153171
// Handle intermediate items
@@ -161,7 +179,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
161179
const std::size_t shift_1 = l_search_bound_1 - start_1;
162180
const std::size_t shift_2 = idx - start_2;
163181

164-
out_acc1[start_out + shift_1 + shift_2] = intermediate_item_2;
182+
set_value(out_acc1, start_out + shift_1 + shift_2, intermediate_item_2,
183+
is_first);
165184
}
166185
}
167186
}
@@ -196,6 +215,7 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
196215

197216
T *temp = reinterpret_cast<T *>(scratch);
198217
bool data_in_temp = false;
218+
bool is_first = true;
199219
std::size_t sorted_size = 1;
200220
while (sorted_size * chunk < n) {
201221
const std::size_t start_1 =
@@ -205,14 +225,18 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
205225
const std::size_t offset = chunk * (idx % sorted_size);
206226

207227
if (!data_in_temp) {
208-
merge(offset, first, temp, start_1, end_1, end_2, start_1, comp, chunk);
228+
merge(offset, first, temp, start_1, end_1, end_2, start_1, comp, chunk,
229+
is_first);
209230
} else {
210-
merge(offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk);
231+
merge(offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk,
232+
/*is_first*/ false);
211233
}
212234
id.barrier();
213235

214236
data_in_temp = !data_in_temp;
215237
sorted_size *= 2;
238+
if (is_first)
239+
is_first = false;
216240
}
217241

218242
// copy back if data is in a temporary storage

sycl/include/sycl/ext/oneapi/experimental/group_helpers_sorters.hpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ template <typename Compare = std::less<>> class default_sorter {
5151
(void)g;
5252
(void)first;
5353
(void)last;
54-
throw sycl::exception(std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
54+
throw sycl::exception(
55+
std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
5556
"default_sorter constructor is not supported on host device.");
56-
5757
#endif
5858
}
5959

@@ -64,30 +64,31 @@ template <typename Compare = std::less<>> class default_sorter {
6464
auto id = sycl::detail::Builder::getNDItem<Group::dimensions>();
6565
uint32_t local_id = id.get_local_id();
6666
T *temp = reinterpret_cast<T *>(scratch);
67-
temp[local_id] = val;
67+
::new (temp + local_id) T(val);
6868
sycl::detail::merge_sort(g, temp, range_size, comp,
6969
scratch + range_size * sizeof(T));
7070
val = temp[local_id];
7171
}
7272
// TODO: it's better to add else branch
7373
#else
7474
(void)g;
75-
throw sycl::exception(std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
75+
throw sycl::exception(
76+
std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
7677
"default_sorter operator() is not supported on host device.");
7778
#endif
7879
return val;
7980
}
8081

8182
template <typename T>
82-
static constexpr std::size_t memory_required(sycl::memory_scope scope,
83+
static constexpr std::size_t memory_required(sycl::memory_scope,
8384
std::size_t range_size) {
84-
return range_size * sizeof(T);
85+
return range_size * sizeof(T) + alignof(T);
8586
}
8687

8788
template <typename T, int dim = 1>
8889
static constexpr std::size_t memory_required(sycl::memory_scope scope,
8990
sycl::range<dim> r) {
90-
return 2 * r.size() * sizeof(T);
91+
return 2 * memory_required<T>(scope, r.size());
9192
}
9293
};
9394

sycl/include/sycl/ext/oneapi/group_sort.hpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,9 @@ sort_over_group(Group group, T value, Sorter sorter) {
7979
(void)group;
8080
(void)value;
8181
(void)sorter;
82-
throw sycl::exception(std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
83-
"Group algorithms are not supported on host device.");
82+
throw sycl::exception(
83+
std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
84+
"Group algorithms are not supported on host device.");
8485
#endif
8586
}
8687

@@ -113,8 +114,9 @@ joint_sort(Group group, Iter first, Iter last, Sorter sorter) {
113114
(void)first;
114115
(void)last;
115116
(void)sorter;
116-
throw sycl::exception(std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
117-
"Group algorithms are not supported on host device.");
117+
throw sycl::exception(
118+
std::error_code(PI_INVALID_DEVICE, sycl::sycl_category()),
119+
"Group algorithms are not supported on host device.");
118120
#endif
119121
}
120122

0 commit comments

Comments
 (0)