@@ -66,11 +66,22 @@ struct GetValueType<sycl::multi_ptr<ElementType, Space>> {
6666 using type = ElementType;
6767};
6868
69+ // since we couldn't assign data to raw memory, it's better to use placement for
70+ // first assignment
71+ template <typename Acc, typename T>
72+ void set_value (Acc ptr, const std::size_t idx, const T &val, bool is_first) {
73+ if (is_first) {
74+ ::new (ptr + idx) T (val);
75+ } else {
76+ ptr[idx] = val;
77+ }
78+ }
79+
6980template <typename InAcc, typename OutAcc, typename Compare>
7081void merge (const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
7182 const std::size_t start_1, const std::size_t end_1,
7283 const std::size_t end_2, const std::size_t start_out, Compare comp,
73- const std::size_t chunk) {
84+ const std::size_t chunk, bool is_first ) {
7485 const std::size_t start_2 = end_1;
7586 // Borders of the sequences to merge within this call
7687 const std::size_t local_start_1 =
@@ -98,7 +109,9 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
98109 const std::size_t l_shift_1 = local_start_1 - start_1;
99110 const std::size_t l_shift_2 = l_search_bound_2 - start_2;
100111
101- out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_1;
112+ // out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_1;
113+ set_value (out_acc1, start_out + l_shift_1 + l_shift_2, local_l_item_1,
114+ is_first);
102115
103116 std::size_t r_search_bound_2{};
104117 // find right border in 2nd sequence
@@ -109,7 +122,9 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
109122 const auto r_shift_1 = local_end_1 - 1 - start_1;
110123 const auto r_shift_2 = r_search_bound_2 - start_2;
111124
112- out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_1;
125+ // out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_1;
126+ set_value (out_acc1, start_out + r_shift_1 + r_shift_2, local_r_item_1,
127+ is_first);
113128 }
114129
115130 // Handle intermediate items
@@ -123,7 +138,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
123138 const std::size_t shift_1 = idx - start_1;
124139 const std::size_t shift_2 = l_search_bound_2 - start_2;
125140
126- out_acc1[start_out + shift_1 + shift_2] = intermediate_item_1;
141+ set_value (out_acc1, start_out + shift_1 + shift_2, intermediate_item_1,
142+ is_first);
127143 }
128144 }
129145 // Process 2nd sequence
@@ -136,7 +152,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
136152 const std::size_t l_shift_1 = l_search_bound_1 - start_1;
137153 const std::size_t l_shift_2 = local_start_2 - start_2;
138154
139- out_acc1[start_out + l_shift_1 + l_shift_2] = local_l_item_2;
155+ set_value (out_acc1, start_out + l_shift_1 + l_shift_2, local_l_item_2,
156+ is_first);
140157
141158 std::size_t r_search_bound_1{};
142159 // find right border in 1st sequence
@@ -147,7 +164,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
147164 const std::size_t r_shift_1 = r_search_bound_1 - start_1;
148165 const std::size_t r_shift_2 = local_end_2 - 1 - start_2;
149166
150- out_acc1[start_out + r_shift_1 + r_shift_2] = local_r_item_2;
167+ set_value (out_acc1, start_out + r_shift_1 + r_shift_2, local_r_item_2,
168+ is_first);
151169 }
152170
153171 // Handle intermediate items
@@ -161,7 +179,8 @@ void merge(const std::size_t offset, InAcc &in_acc1, OutAcc &out_acc1,
161179 const std::size_t shift_1 = l_search_bound_1 - start_1;
162180 const std::size_t shift_2 = idx - start_2;
163181
164- out_acc1[start_out + shift_1 + shift_2] = intermediate_item_2;
182+ set_value (out_acc1, start_out + shift_1 + shift_2, intermediate_item_2,
183+ is_first);
165184 }
166185 }
167186}
@@ -196,6 +215,7 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
196215
197216 T *temp = reinterpret_cast <T *>(scratch);
198217 bool data_in_temp = false ;
218+ bool is_first = true ;
199219 std::size_t sorted_size = 1 ;
200220 while (sorted_size * chunk < n) {
201221 const std::size_t start_1 =
@@ -205,14 +225,18 @@ void merge_sort(Group group, Iter first, const std::size_t n, Compare comp,
205225 const std::size_t offset = chunk * (idx % sorted_size);
206226
207227 if (!data_in_temp) {
208- merge (offset, first, temp, start_1, end_1, end_2, start_1, comp, chunk);
228+ merge (offset, first, temp, start_1, end_1, end_2, start_1, comp, chunk,
229+ is_first);
209230 } else {
210- merge (offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk);
231+ merge (offset, temp, first, start_1, end_1, end_2, start_1, comp, chunk,
232+ /* is_first*/ false );
211233 }
212234 id.barrier ();
213235
214236 data_in_temp = !data_in_temp;
215237 sorted_size *= 2 ;
238+ if (is_first)
239+ is_first = false ;
216240 }
217241
218242 // copy back if data is in a temporary storage
0 commit comments