@@ -330,15 +330,20 @@ inline void DoSplitStreams(const uint8_t* src, int width, int64_t nvalues,
330330 while (nvalues >= kBlockSize ) {
331331 for (int stream = 0 ; stream < width; ++stream) {
332332 uint8_t * dest = dest_streams[stream];
333+ #if !ARROW_LITTLE_ENDIAN
334+ const int src_stream = width - 1 - stream;
335+ #else
336+ const int src_stream = stream;
337+ #endif
333338 for (int i = 0 ; i < kBlockSize ; i += 8 ) {
334- uint64_t a = src[stream + i * width];
335- uint64_t b = src[stream + (i + 1 ) * width];
336- uint64_t c = src[stream + (i + 2 ) * width];
337- uint64_t d = src[stream + (i + 3 ) * width];
338- uint64_t e = src[stream + (i + 4 ) * width];
339- uint64_t f = src[stream + (i + 5 ) * width];
340- uint64_t g = src[stream + (i + 6 ) * width];
341- uint64_t h = src[stream + (i + 7 ) * width];
339+ uint64_t a = src[src_stream + i * width];
340+ uint64_t b = src[src_stream + (i + 1 ) * width];
341+ uint64_t c = src[src_stream + (i + 2 ) * width];
342+ uint64_t d = src[src_stream + (i + 3 ) * width];
343+ uint64_t e = src[src_stream + (i + 4 ) * width];
344+ uint64_t f = src[src_stream + (i + 5 ) * width];
345+ uint64_t g = src[src_stream + (i + 6 ) * width];
346+ uint64_t h = src[src_stream + (i + 7 ) * width];
342347#if ARROW_LITTLE_ENDIAN
343348 uint64_t r = a | (b << 8 ) | (c << 16 ) | (d << 24 ) | (e << 32 ) | (f << 40 ) |
344349 (g << 48 ) | (h << 56 );
@@ -357,8 +362,14 @@ inline void DoSplitStreams(const uint8_t* src, int width, int64_t nvalues,
357362 // Epilog
358363 for (int stream = 0 ; stream < width; ++stream) {
359364 uint8_t * dest = dest_streams[stream];
365+ #if !ARROW_LITTLE_ENDIAN
366+ // On big-endian, reverse byte order: stream 0 gets LSB (at highest address)
367+ const int src_stream = width - 1 - stream;
368+ #else
369+ const int src_stream = stream;
370+ #endif
360371 for (int64_t i = 0 ; i < nvalues; ++i) {
361- dest[i] = src[stream + i * width];
372+ dest[i] = src[src_stream + i * width];
362373 }
363374 }
364375}
@@ -375,25 +386,22 @@ inline void DoMergeStreams(const uint8_t** src_streams, int width, int64_t nvalu
375386 const uint8_t * src = src_streams[stream];
376387 for (int i = 0 ; i < kBlockSize ; i += 8 ) {
377388 uint64_t v = arrow::util::SafeLoadAs<uint64_t >(&src[i]);
378- #if ARROW_LITTLE_ENDIAN
379- dest[stream + i * width] = static_cast <uint8_t >(v);
380- dest[stream + (i + 1 ) * width] = static_cast <uint8_t >(v >> 8 );
381- dest[stream + (i + 2 ) * width] = static_cast <uint8_t >(v >> 16 );
382- dest[stream + (i + 3 ) * width] = static_cast <uint8_t >(v >> 24 );
383- dest[stream + (i + 4 ) * width] = static_cast <uint8_t >(v >> 32 );
384- dest[stream + (i + 5 ) * width] = static_cast <uint8_t >(v >> 40 );
385- dest[stream + (i + 6 ) * width] = static_cast <uint8_t >(v >> 48 );
386- dest[stream + (i + 7 ) * width] = static_cast <uint8_t >(v >> 56 );
389+ #if !ARROW_LITTLE_ENDIAN
390+ // Byte-stream-split format stores bytes in little-endian order.
391+ // On big-endian, byteswap after loading and write to reversed stream position.
392+ v = ::arrow::bit_util::ByteSwap (v);
393+ const int dest_stream = width - 1 - stream;
387394#else
388- dest[stream + i * width] = static_cast <uint8_t >(v >> 56 );
389- dest[stream + (i + 1 ) * width] = static_cast <uint8_t >(v >> 48 );
390- dest[stream + (i + 2 ) * width] = static_cast <uint8_t >(v >> 40 );
391- dest[stream + (i + 3 ) * width] = static_cast <uint8_t >(v >> 32 );
392- dest[stream + (i + 4 ) * width] = static_cast <uint8_t >(v >> 24 );
393- dest[stream + (i + 5 ) * width] = static_cast <uint8_t >(v >> 16 );
394- dest[stream + (i + 6 ) * width] = static_cast <uint8_t >(v >> 8 );
395- dest[stream + (i + 7 ) * width] = static_cast <uint8_t >(v);
395+ const int dest_stream = stream;
396396#endif
397+ dest[dest_stream + i * width] = static_cast <uint8_t >(v);
398+ dest[dest_stream + (i + 1 ) * width] = static_cast <uint8_t >(v >> 8 );
399+ dest[dest_stream + (i + 2 ) * width] = static_cast <uint8_t >(v >> 16 );
400+ dest[dest_stream + (i + 3 ) * width] = static_cast <uint8_t >(v >> 24 );
401+ dest[dest_stream + (i + 4 ) * width] = static_cast <uint8_t >(v >> 32 );
402+ dest[dest_stream + (i + 5 ) * width] = static_cast <uint8_t >(v >> 40 );
403+ dest[dest_stream + (i + 6 ) * width] = static_cast <uint8_t >(v >> 48 );
404+ dest[dest_stream + (i + 7 ) * width] = static_cast <uint8_t >(v >> 56 );
397405 }
398406 src_streams[stream] += kBlockSize ;
399407 }
@@ -404,8 +412,13 @@ inline void DoMergeStreams(const uint8_t** src_streams, int width, int64_t nvalu
404412 // Epilog
405413 for (int stream = 0 ; stream < width; ++stream) {
406414 const uint8_t * src = src_streams[stream];
415+ #if !ARROW_LITTLE_ENDIAN
416+ const int dest_stream = width - 1 - stream;
417+ #else
418+ const int dest_stream = stream;
419+ #endif
407420 for (int64_t i = 0 ; i < nvalues; ++i) {
408- dest[stream + i * width] = src[i];
421+ dest[dest_stream + i * width] = src[i];
409422 }
410423 }
411424}
0 commit comments