Skip to content

Commit

Permalink
Optimization for blocklen == 1
Browse files Browse the repository at this point in the history
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
  • Loading branch information
bosilca committed Jun 24, 2019
1 parent 7cd3aba commit 41aab40
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 8 deletions.
20 changes: 18 additions & 2 deletions opal/datatype/opal_datatype_pack.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t do_now, do_now_bytes;
size_t blocklen_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
unsigned char* _memory = (*memory) + _elem->disp;
unsigned char* _packed = *packed;

Expand All @@ -46,6 +46,21 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
if( cando_count > *(COUNT) )
cando_count = *(COUNT);

if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
*(COUNT) -= cando_count;
for(; cando_count > 0; cando_count--) {
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "pack 2. memcpy( %p, %p, %lu ) => space %lu\n",
(void*)_packed, (void*)_memory, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
MEMCPY_CSUM( _packed, _memory, blocklen_bytes, (CONVERTOR) );
_packed += blocklen_bytes;
_memory += _elem->extent;
}
goto update_and_return;
}
blocklen_bytes *= _elem->blocklen;

/**
* First check if we already did something on this element ? The COUNT is the number
* of remaining predefined types in the current elem, not how many predefined types
Expand Down Expand Up @@ -92,14 +107,15 @@ pack_predefined_data( opal_convertor_t* CONVERTOR,
assert( cando_count < _elem->blocklen );
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "pack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
(void*)_packed, (void*)_memory, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE)) ); );
MEMCPY_CSUM( _packed, _memory, do_now_bytes, (CONVERTOR) );
_memory += do_now_bytes;
_packed += do_now_bytes;
}

update_and_return:
*(memory) = _memory - _elem->disp;
*(SPACE) -= (_packed - *packed);
*(packed) = _packed;
Expand Down
17 changes: 13 additions & 4 deletions opal/datatype/opal_datatype_position.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,8 @@ position_single_block(opal_convertor_t* CONVERTOR,
}

/**
* Advance the current position in the convertor based using the
* current element and a left-over counter. Update the head pointer
* and the leftover byte space.
* Advance the convertors' position according. Update the pointer and the remaining space
* accordingly.
*/
static inline void
position_predefined_data( opal_convertor_t* CONVERTOR,
Expand All @@ -79,14 +78,23 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
size_t total_count = _elem->count * _elem->blocklen;
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t do_now, do_now_bytes;
size_t do_now, do_now_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
unsigned char* _memory = (*POINTER) + _elem->disp;

assert( *(COUNT) <= _elem->count * _elem->blocklen);

if( cando_count > *(COUNT) )
cando_count = *(COUNT);

if( 1 == _elem->blocklen ) {
DO_DEBUG( opal_output( 0, "position( %p, %" PRIsize_t " ) x (count %" PRIsize_t ", extent %ld) => space %lu [prolog]\n",
(void*)_memory, (unsigned long)do_now_bytes, cando_count, _elem->extent, (unsigned long)(*SPACE) ); );
_memory += cando_count * _elem->extent;
*SPACE -= cando_count * do_now_bytes;
*COUNT -= cando_count;
goto update_and_return;
}

/**
* First check if we already did something on this element ?
*/
Expand Down Expand Up @@ -139,6 +147,7 @@ position_predefined_data( opal_convertor_t* CONVERTOR,
SPACE, do_now_bytes, COUNT, do_now );
}

update_and_return:
*(POINTER) = _memory - _elem->disp;
}

Expand Down
20 changes: 18 additions & 2 deletions opal/datatype/opal_datatype_unpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
const ddt_elem_desc_t* _elem = &((ELEM)->elem);
size_t cando_count = (*SPACE) / opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t do_now, do_now_bytes;
size_t blocklen_bytes = _elem->blocklen * opal_datatype_basicDatatypes[_elem->common.type]->size;
size_t blocklen_bytes = opal_datatype_basicDatatypes[_elem->common.type]->size;
unsigned char* _memory = (*memory) + _elem->disp;
unsigned char* _packed = *packed;

Expand All @@ -46,6 +46,21 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
if( cando_count > *(COUNT) )
cando_count = *(COUNT);

if( 1 == _elem->blocklen ) { /* Do as many full blocklen as possible */
*(COUNT) -= cando_count;
for(; cando_count > 0; cando_count--) {
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, blocklen_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "unpack 2. memcpy( %p, %p, %lu ) => space %lu\n",
(void*)_memory, (void*)_packed, (unsigned long)blocklen_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
MEMCPY_CSUM( _memory, _packed, blocklen_bytes, (CONVERTOR) );
_packed += blocklen_bytes;
_memory += _elem->extent;
}
goto update_and_return;
}
blocklen_bytes *= _elem->blocklen;

/**
* First check if we already did something on this element ? The COUNT is the number
* of remaining predefined types in the current elem, not how many predefined types
Expand Down Expand Up @@ -92,14 +107,15 @@ unpack_predefined_data( opal_convertor_t* CONVERTOR,
assert( cando_count < _elem->blocklen );
do_now_bytes = cando_count * opal_datatype_basicDatatypes[_elem->common.type]->size;
OPAL_DATATYPE_SAFEGUARD_POINTER( _memory, do_now_bytes, (CONVERTOR)->pBaseBuf,
(CONVERTOR)->pDesc, (CONVERTOR)->count );
(CONVERTOR)->pDesc, (CONVERTOR)->count );
DO_DEBUG( opal_output( 0, "unpack 3. memcpy( %p, %p, %lu ) => space %lu [epilog]\n",
(void*)_memory, (void*)_packed, (unsigned long)do_now_bytes, (unsigned long)(*(SPACE) - (_packed - *(packed))) ); );
MEMCPY_CSUM( _memory, _packed, do_now_bytes, (CONVERTOR) );
_memory += do_now_bytes;
_packed += do_now_bytes;
}

update_and_return:
*(memory) = _memory - _elem->disp;
*(SPACE) -= (_packed - *packed);
*(packed) = _packed;
Expand Down

0 comments on commit 41aab40

Please sign in to comment.