Skip to content

Commit

Permalink
Optimize the pack/unpack of contiguous with gaps datatype.
Browse files Browse the repository at this point in the history
Signed-off-by: George Bosilca <bosilca@icl.utk.edu>
  • Loading branch information
bosilca committed Jun 24, 2019
1 parent 31a20f5 commit 7cd3aba
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 68 deletions.
10 changes: 4 additions & 6 deletions opal/datatype/opal_datatype_pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,22 +129,20 @@ opal_pack_homogeneous_contig_with_gaps_function( opal_convertor_t* pConv,
}
/* We can provide directly the pointers in the user buffers (like the convertor_raw) */
if( NULL == iov[0].iov_base ) {
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
user_memory = pConv->pBaseBuf + pData->true_lb;

for( idx = 0; (idx < (*out_size)) && stack[0].count; idx++ ) {
iov[idx].iov_base = user_memory;
iov[idx].iov_base = user_memory + stack[0].disp + stack[1].disp;
iov[idx].iov_len = stack[1].count;
COMPUTE_CSUM( iov[idx].iov_base, iov[idx].iov_len, pConv );

user_memory += extent;
pConv->bConverted += stack[1].count;

stack[0].disp += extent;
stack[0].count--;
stack[1].disp = 0;
stack[1].disp = 0;
stack[1].count = pData->size; /* we might need this to update the partial
* length for the first iteration */

user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp;
}
goto update_status_and_return;
}
Expand Down
106 changes: 44 additions & 62 deletions opal/datatype/opal_datatype_unpack.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,97 +70,79 @@ opal_unpack_homogeneous_contig_function( opal_convertor_t* pConv,
{
const opal_datatype_t *pData = pConv->pDesc;
unsigned char *user_memory, *packed_buffer;
uint32_t iov_count, i;
size_t remaining, length, initial_bytes_converted = pConv->bConverted;
uint32_t iov_idx, i;
size_t remaining, initial_bytes_converted = pConv->bConverted;
dt_stack_t* stack = pConv->pStack;
ptrdiff_t extent = pData->ub - pData->lb;

DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov_count %d )\n",
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( pBaseBuf %p, iov count %d )\n",
(void*)pConv->pBaseBuf, *out_size ); );
if( stack[1].type != opal_datatype_uint1.id ) {
stack[1].count *= opal_datatype_basicDatatypes[stack[1].type]->size;
stack[1].type = opal_datatype_uint1.id;
}

for( iov_count = 0; iov_count < (*out_size); iov_count++ ) {
remaining = pConv->local_size - pConv->bConverted;
if( 0 == remaining ) break; /* we're done this time */
if( remaining > iov[iov_count].iov_len )
remaining = iov[iov_count].iov_len;
packed_buffer = (unsigned char*)iov[iov_count].iov_base;
pConv->bConverted += remaining; /* how much will get unpacked this time */
user_memory = pConv->pBaseBuf + pData->true_lb;

if( (ptrdiff_t)pData->size == extent ) {
user_memory += pConv->bConverted;
DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, remaining ); );
if( (ptrdiff_t)pData->size == extent ) {
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
remaining = pConv->local_size - pConv->bConverted;
if( 0 == remaining ) break; /* we're done this time */
if( remaining > iov[iov_idx].iov_len )
remaining = iov[iov_idx].iov_len;

packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
user_memory = pConv->pBaseBuf + pData->true_lb + pConv->bConverted;

/* contiguous data or basic datatype with count */
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining,
pConv->pBaseBuf, pData, pConv->count );
DO_DEBUG( opal_output( 0, "1. unpack contig dest %p src %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, remaining ); );
DO_DEBUG( opal_output( 0, "unpack contig [%d] dest %p src %p length %" PRIsize_t "\n",
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
} else {
user_memory += stack[0].disp + stack[1].disp;
pConv->bConverted += remaining; /* how much will get unpacked this time */
}
} else {
for( iov_idx = 0; iov_idx < (*out_size); iov_idx++ ) {
remaining = pConv->local_size - pConv->bConverted;
if( 0 == remaining ) break; /* we're done this time */
if( remaining > iov[iov_idx].iov_len )
remaining = iov[iov_idx].iov_len;

packed_buffer = (unsigned char*)iov[iov_idx].iov_base;
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
pConv->bConverted += remaining; /* how much will get unpacked this time */

for( i = 0; stack[1].count <= remaining; i++ ) { /* partial or full data */
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, stack[1].count, pConv->pBaseBuf,
pData, pConv->count );
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [%d]\n",
iov_idx, (void*)user_memory, (void*)packed_buffer, stack[1].count, i ); );
MEMCPY_CSUM( user_memory, packed_buffer, stack[1].count, pConv );

DO_DEBUG( opal_output( 0, "unpack_homogeneous_contig( user_memory %p, packed_buffer %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, remaining ); );
packed_buffer += stack[1].count;
remaining -= stack[1].count;

length = (0 == pConv->stack_pos ? 0 : stack[1].count); /* left over from the last unpack */
/* complete the last copy */
if( (pData->size != length) && (length <= remaining) ) {
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, length, pConv->pBaseBuf,
pData, pConv->count );
DO_DEBUG( opal_output( 0, "2. unpack dest %p src %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, length ); );
MEMCPY_CSUM( user_memory, packed_buffer, length, pConv );
packed_buffer += length;
remaining -= length;
stack[1].count -= length;
stack[1].disp += length; /* just in case, we overwrite this below */
if( 0 == stack[1].count) { /* one completed element */
stack[0].count--;
stack[0].disp += extent;
if( 0 == stack[0].count )
break;
stack[1].count = pData->size;
stack[1].disp = 0;
}
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp + stack[1].disp;
}
stack[0].count--;
stack[0].disp += extent;
stack[1].count = pData->size;
stack[1].disp = 0;

for( i = 0; pData->size <= remaining; i++ ) {
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, pData->size, pConv->pBaseBuf,
pData, pConv->count );
DO_DEBUG( opal_output( 0, "3. unpack dest %p src %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, pData->size ); );
MEMCPY_CSUM( user_memory, packed_buffer, pData->size, pConv );
packed_buffer += pData->size;
user_memory += extent;
remaining -= pData->size;
user_memory = pConv->pBaseBuf + pData->true_lb + stack[0].disp;
}
stack[0].count -= i;
stack[0].disp += (i * extent);

/* Copy the last bits */
if( 0 != remaining ) {
OPAL_DATATYPE_SAFEGUARD_POINTER( user_memory, remaining, pConv->pBaseBuf,
pData, pConv->count );
DO_DEBUG( opal_output( 0, "4. unpack dest %p src %p length %" PRIsize_t "\n",
(void*)user_memory, (void*)packed_buffer, remaining ); );
DO_DEBUG( opal_output( 0, "unpack gaps [%d] dest %p src %p length %" PRIsize_t " [epilog]\n",
iov_idx, (void*)user_memory, (void*)packed_buffer, remaining ); );
MEMCPY_CSUM( user_memory, packed_buffer, remaining, pConv );
stack[1].count -= remaining;
stack[1].disp += remaining; /* keep the += in case we are copying less that the datatype size */
if( 0 == stack[1].count ) { /* prepare for the next element */
stack[1].count = pData->size;
stack[1].disp = 0;
}
assert( stack[1].count );
}
}
}
*out_size = iov_count; /* we only reach this line after the for loop succesfully complete */
*out_size = iov_idx; /* we only reach this line after the for loop succesfully complete */
*max_data = pConv->bConverted - initial_bytes_converted;
if( pConv->bConverted == pConv->local_size ) pConv->flags |= CONVERTOR_COMPLETED;
return !!(pConv->flags & CONVERTOR_COMPLETED); /* done or not */
Expand Down

0 comments on commit 7cd3aba

Please sign in to comment.