Skip to content

Commit

Permalink
Force inlining memcpy for short data
Browse files Browse the repository at this point in the history
This work is based on @derbeyn patch provided on #6678. I reworked it to
be more inclusive (works now with both gcc and icc) and to cover more
standard size lengths (4, 8, 16).

Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
Signed-off-by: Nadia Derbey <Nadia.Derbey@atos.net>
  • Loading branch information
bosilca committed May 29, 2019
1 parent fee929c commit 5e2dba2
Showing 1 changed file with 25 additions and 2 deletions.
27 changes: 25 additions & 2 deletions opal/datatype/opal_datatype_memcpy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,30 @@
#ifndef OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED
#define OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED

#define MEMCPY( DST, SRC, BLENGTH ) \
memcpy( (DST), (SRC), (BLENGTH) )
/*
* This macro is called whenever we are packing/unpacking a DDT that
* that is built with basic datatypes.
* Specifying a fixed size for the memcpy() makes the intel compiler
* inline it as an assignment operation.
* This code is a bit hacky, but doing this we can divide the latency
* by up to 2 during DDT exechanges.
*/
#define MEMCPY( DST, SRC, BLENGTH ) \
do { \
if( (BLENGTH) < 16 ) { \
uintptr_t align = ((uintptr_t)(DST)) ^ ((uintptr_t)(SRC)); \
if( (4 == (BLENGTH)) && (0 == (align & 0x3)) ) { /* We are copying an int */ \
*(int*)(DST) = *(int*)(SRC); \
} else if( (8 == (BLENGTH)) && (0 == (align & 0x7)) ) { /* We are copying a double */ \
*(double*)(DST) = *(double*)(SRC); \
} else if( (16 == (BLENGTH)) && (0 == (align & 0xF)) ) { /* We are copying a long double */ \
*(long double*)(DST) = *(long double*)(SRC); \
} else { \
memcpy((DST), (SRC), (BLENGTH)); \
} \
} else { \
memcpy((DST), (SRC), (BLENGTH)); \
} \
} while (0)

#endif /* OPAL_DATATYPE_MEMCPY_H_HAS_BEEN_INCLUDED */

0 comments on commit 5e2dba2

Please sign in to comment.