Skip to content

Commit

Permalink
Fixed #312, an infinite loop in MemoryPool when calling allocate() on…
Browse files Browse the repository at this point in the history
… an empty allocator and added unit test to verify this works. Fixed an issue where constructing a MemoryPool with too small a superblock size caused a hang instead of printing out the error message. Added more description of the user interface for MemoryPool.
  • Loading branch information
gmackey committed Jun 2, 2016
1 parent b983b58 commit 170e66f
Show file tree
Hide file tree
Showing 6 changed files with 189 additions and 75 deletions.
166 changes: 97 additions & 69 deletions core/src/Kokkos_MemoryPool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,8 @@ struct count_allocated_blocks {

/// \class MemoryPool
/// \brief Bitset based memory manager for pools of same-sized chunks of memory.
/// \tparam Device Kokkos device that gives the execution and memory space the
/// allocator will be used in.
///
/// MemoryPool is a memory space that can be on host or device. It provides a
/// pool memory allocator for fast allocation of same-sized chunks of memory.
Expand Down Expand Up @@ -659,6 +661,13 @@ class MemoryPool {
MemoryPool & operator = ( MemoryPool && ) = default;
MemoryPool & operator = ( const MemoryPool & ) = default;

/// \brief Initializes the memory pool.
/// \param memspace The memory space from which the memory pool will allocate memory.
/// \param total_size The requested memory amount controlled by the allocator. The
/// actual amount is rounded up to the smallest multiple of the
/// superblock size >= the requested size.
/// \param log2_superblock_size Log2 of the size of superblocks used by the allocator.
/// In most use cases, the default value should work.
inline
MemoryPool( const backend_memory_space & memspace,
size_t total_size, size_t log2_superblock_size = 20 )
Expand All @@ -675,8 +684,8 @@ class MemoryPool {
m_partfull_sb_size( m_ceil_num_sb * m_num_block_size / CHAR_BIT ),
m_total_size( m_data_size + m_sb_blocks_size + m_empty_sb_size + m_partfull_sb_size ),
m_data(0),
m_active( "Active superblocks", m_num_block_size ),
m_sb_header( "Superblock headers", m_num_sb ),
m_active( "Active superblocks" ),
m_sb_header( "Superblock headers" ),
m_track()
{
// Assumption. The minimum block size must be a power of 2.
Expand Down Expand Up @@ -715,6 +724,11 @@ class MemoryPool {
Kokkos::abort( "" );
}

// Allocate memory for Views. This is done here instead of at construction
// so that the runtime checks can be performed before allocating memory.
resize(m_active, m_num_block_size );
resize(m_sb_header, m_num_sb );

// Allocate superblock memory.
typedef Impl::SharedAllocationRecord< backend_memory_space, void > SharedRecord;
SharedRecord * rec =
Expand Down Expand Up @@ -742,7 +756,7 @@ class MemoryPool {

deep_copy(m_active, host_active);

// Initialize the blocksize info
// Initialize the blocksize info.
for ( size_t i = 0; i < m_num_block_size; ++i ) {
uint32_t lg_block_size = i + LG_MIN_BLOCK_SIZE;
uint32_t blocks_per_sb = m_sb_size >> lg_block_size;
Expand Down Expand Up @@ -803,7 +817,11 @@ class MemoryPool {
#endif
}

/// \brief Claim chunks of untracked memory from the pool.
/// \brief Allocate a chunk of memory.
/// \param alloc_size Size of the requested allocated in number of bytes.
///
/// The function returns a void pointer to a memory location on success and
/// NULL on failure.
KOKKOS_FUNCTION
void * allocate( size_t alloc_size ) const
{
Expand Down Expand Up @@ -919,9 +937,9 @@ class MemoryPool {
}

if ( need_new_sb ) {
sb_id = find_superblock( block_size_id, sb_id );
uint32_t new_sb_id = find_superblock( block_size_id, sb_id );

if ( sb_id == INVALID_SUPERBLOCK ) {
if ( new_sb_id == sb_id ) {
allocation_done = true;
#ifdef KOKKOS_MEMPOOL_PRINT_INFO
printf( "** No superblocks available. **\n" );
Expand All @@ -930,6 +948,9 @@ class MemoryPool {
#endif
#endif
}
else {
sb_id = new_sb_id;
}
}
}
}
Expand All @@ -946,88 +967,92 @@ class MemoryPool {
return p;
}

/// \brief Release claimed memory back into the pool.
/// \brief Release allocated memory back to the pool.
/// \param alloc_ptr Pointer to chunk of memory previously allocated by
/// the allocator.
/// \param alloc_size Size of the allocated memory in number of bytes.
KOKKOS_FUNCTION
void deallocate( void * alloc_ptr, size_t alloc_size ) const
{
char * ap = static_cast<char *>( alloc_ptr );

#ifdef KOKKOS_MEMPOOL_PRINTERR
// Verify that the pointer is controlled by this pool.
if ( ap < m_data || ap + alloc_size > m_data + m_data_size ) {
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
reinterpret_cast<uint64_t>( alloc_ptr ) );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
fflush( stdout );
#endif
Kokkos::abort( "" );
}
#endif

// Get the superblock for the address. This can be calculated by math on
// the address since the superblocks are stored contiguously in one memory
// chunk.
uint32_t sb_id = ( ap - m_data ) >> m_lg_sb_size;
// Only deallocate memory controlled by this pool.
if ( ap >= m_data && ap + alloc_size <= m_data + m_data_size ) {
// Get the superblock for the address. This can be calculated by math on
// the address since the superblocks are stored contiguously in one memory
// chunk.
uint32_t sb_id = ( ap - m_data ) >> m_lg_sb_size;

// Get the starting position for this superblock's bits in the bitset.
uint32_t pos_base = sb_id << m_lg_max_sb_blocks;
// Get the starting position for this superblock's bits in the bitset.
uint32_t pos_base = sb_id << m_lg_max_sb_blocks;

// Get the relative position for this memory location's bit in the bitset.
uint32_t offset = ( ap - m_data ) - ( size_t(sb_id) << m_lg_sb_size );
uint32_t lg_block_size = m_sb_header(sb_id).m_lg_block_size;
uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE;
uint32_t pos_rel = offset >> lg_block_size;
// Get the relative position for this memory location's bit in the bitset.
uint32_t offset = ( ap - m_data ) - ( size_t(sb_id) << m_lg_sb_size );
uint32_t lg_block_size = m_sb_header(sb_id).m_lg_block_size;
uint32_t block_size_id = lg_block_size - LG_MIN_BLOCK_SIZE;
uint32_t pos_rel = offset >> lg_block_size;

bool success;
unsigned prev_val;
bool success;
unsigned prev_val;

Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel );
Kokkos::tie( success, prev_val ) = m_sb_blocks.fetch_word_reset( pos_base + pos_rel );

if ( success ) {
uint32_t page_fill_level = Kokkos::Impl::bit_count( prev_val );
// If the memory location was previously deallocated, do nothing.
if ( success ) {
uint32_t page_fill_level = Kokkos::Impl::bit_count( prev_val );

if ( page_fill_level == 1 ) {
// This page is now empty. Increment the number of empty pages for the
// superblock.
uint32_t empty_pages = atomic_fetch_add( &m_sb_header(sb_id).m_empty_pages, 1 );
if ( page_fill_level == 1 ) {
// This page is now empty. Increment the number of empty pages for the
// superblock.
uint32_t empty_pages = atomic_fetch_add( &m_sb_header(sb_id).m_empty_pages, 1 );

if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) &&
empty_pages == m_blocksize_info[block_size_id].m_pages_per_sb - 1 )
{
// This deallocation caused the superblock to be empty. Change the
// superblock category from partially full to empty.
unsigned pos = block_size_id * m_ceil_num_sb + sb_id;
if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) &&
empty_pages == m_blocksize_info[block_size_id].m_pages_per_sb - 1 )
{
// This deallocation caused the superblock to be empty. Change the
// superblock category from partially full to empty.
unsigned pos = block_size_id * m_ceil_num_sb + sb_id;

if ( m_partfull_sb.reset( pos ) ) {
// Reset the empty pages and block size for the superblock.
volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) );
volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) );
if ( m_partfull_sb.reset( pos ) ) {
// Reset the empty pages and block size for the superblock.
volatile_store( &m_sb_header(sb_id).m_empty_pages, uint32_t(0) );
volatile_store( &m_sb_header(sb_id).m_lg_block_size, uint32_t(0) );

memory_fence();
memory_fence();

m_empty_sb.set( sb_id );
m_empty_sb.set( sb_id );
}
}
}
}
else if ( page_fill_level == m_blocksize_info[block_size_id].m_page_full_level ) {
// This page is no longer full. Decrement the number of full pages for
// the superblock.
uint32_t full_pages = atomic_fetch_sub( &m_sb_header(sb_id).m_full_pages, 1 );

if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) &&
full_pages == m_blocksize_info[block_size_id].m_sb_full_level )
{
// This deallocation caused the number of full pages to decrease below
// the full threshold. Change the superblock category from full to
// partially full.
unsigned pos = block_size_id * m_ceil_num_sb + sb_id;
m_partfull_sb.set( pos );
else if ( page_fill_level == m_blocksize_info[block_size_id].m_page_full_level ) {
// This page is no longer full. Decrement the number of full pages for
// the superblock.
uint32_t full_pages = atomic_fetch_sub( &m_sb_header(sb_id).m_full_pages, 1 );

if ( !volatile_load( &m_sb_header(sb_id).m_is_active ) &&
full_pages == m_blocksize_info[block_size_id].m_sb_full_level )
{
// This deallocation caused the number of full pages to decrease below
// the full threshold. Change the superblock category from full to
// partially full.
unsigned pos = block_size_id * m_ceil_num_sb + sb_id;
m_partfull_sb.set( pos );
}
}
}
}
#ifdef KOKKOS_MEMPOOL_PRINTERR
else {
printf( "\n** MemoryPool::deallocate() ADDRESS_OUT_OF_RANGE(0x%llx) **\n",
reinterpret_cast<uint64_t>( alloc_ptr ) );
#ifdef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
fflush( stdout );
#endif
}
#endif
}

/// \brief Tests if the memory pool is empty.
/// \brief Tests if the memory pool has no more memory available to allocate.
KOKKOS_INLINE_FUNCTION
bool is_empty() const
{
Expand Down Expand Up @@ -1273,9 +1298,12 @@ class MemoryPool {
/// \brief Finds a superblock with free space to become a new active superblock.
///
/// If this function is called, the current active superblock needs to be replaced
/// because it is full. All threads that encounter a full active superblock call
/// this function. Only one will replace the active superblock. The others spin
/// on a lock to wait until the active superblock has been replaced.
/// because it is full. Initially, only the thread that sets the active superblock
/// to full calls this function. Other threads can still allocate from the "full"
/// active superblock because a full superblock still has locations available. If
/// a thread tries to allocate from the active superblock when it has no free
/// locations, then that thread will call this function, too, and spin on a lock
/// waiting until the active superblock has been replaced.
KOKKOS_FUNCTION
uint32_t find_superblock( int block_size_id, uint32_t old_sb ) const
{
Expand Down
4 changes: 4 additions & 0 deletions core/unit_test/TestCuda_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,10 @@ TEST_F( cuda , memory_pool )
TestMemoryPool::test_mempool2< device_type >( 64, 4, 100000, 200000 );

Kokkos::Cuda::fence();

TestMemoryPool::test_memory_exhaustion< Kokkos::Cuda >();

Kokkos::Cuda::fence();
}

}
Expand Down
88 changes: 82 additions & 6 deletions core/unit_test/TestMemoryPool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -334,10 +334,6 @@ bool test_mempool( size_t chunk_size, size_t total_size )
timer.reset();
#endif

// Tests:
// test for correct behvior when out of memory
// test for correct behvior when interleaving allocate() and deallocate()

{
allocate_memory< pointer_view, pool_memory_space >
am( pointers, num_chunks, chunk_size, mempool );
Expand Down Expand Up @@ -647,8 +643,8 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes,
#ifdef TESTMEMORYPOOL_PRINT
printf( "\n*** test_mempool2() ***\n" );
printf( " num_chunk_sizes: %12zu\n", num_chunk_sizes );
printf( " base_chunk_size: %12zu\n", base_chunk_size );
printf( " ceil_base_chunk_size: %12zu\n", ceil_base_chunk_size );
printf( " base_chunk_size: %12u\n", base_chunk_size );
printf( " ceil_base_chunk_size: %12u\n", ceil_base_chunk_size );
printf( " phase1_size: %12zu\n", phase1_size );
printf( " phase2_size: %12zu\n", phase2_size );
printf( " phase3_size: %12zu\n", phase3_size );
Expand Down Expand Up @@ -727,6 +723,86 @@ void test_mempool2( unsigned base_chunk_size, size_t num_chunk_sizes,
#endif
}

// Tests for correct behavior when the allocator is out of memory.
template < class Device >
void test_memory_exhaustion()
{
#ifdef TESTMEMORYPOOL_PRINT
typedef typename Device::execution_space execution_space;
#endif
typedef typename Device::memory_space memory_space;
typedef Device device_type;
typedef Kokkos::View< pointer_obj *, device_type > pointer_view;
typedef Kokkos::Experimental::MemoryPool< device_type > pool_memory_space;

// The allocator will have a single superblock, and allocations will all be
// of the same chunk size. The allocation loop will attempt to allocate
// twice the number of chunks as are available in the allocator. The
// deallocation loop will only free the successfully allocated chunks.

size_t chunk_size = 128;
size_t num_chunks = 128;
size_t half_num_chunks = num_chunks / 2;
size_t superblock_size = chunk_size * half_num_chunks;
size_t lg_superblock_size =
Kokkos::Impl::integral_power_of_two( superblock_size );

pointer_view pointers( "pointers", num_chunks );

#ifdef TESTMEMORYPOOL_PRINT
std::cout << "\n*** test_memory_exhaustion() ***" << std::endl;

double elapsed_time = 0;
Kokkos::Timer timer;
#endif

pool_memory_space mempool( memory_space(), superblock_size,
lg_superblock_size );

#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "initialize mempool: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif

{
allocate_memory< pointer_view, pool_memory_space >
am( pointers, num_chunks, chunk_size, mempool );
}

#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "allocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
timer.reset();
#endif

{
// In parallel, the allocations that succeeded were not put contiguously
// into the pointers View. The whole View can still be looped over and
// have deallocate called because deallocate will just do nothing for NULL
// pointers.
deallocate_memory< pointer_view, pool_memory_space >
dm( pointers, num_chunks, chunk_size, mempool );
}

#ifdef TESTMEMORYPOOL_PRINT
execution_space::fence();
elapsed_time = timer.seconds();
print_results( "deallocate chunks: ", elapsed_time );
#ifdef TESTMEMORYPOOL_PRINT_STATUS
mempool.print_status();
#endif
#endif
}

}

#ifdef TESTMEMORYPOOL_PRINT
Expand Down
2 changes: 2 additions & 0 deletions core/unit_test/TestOpenMP_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,8 @@ TEST_F( openmp , memory_pool )
ASSERT_TRUE( val );

TestMemoryPool::test_mempool2< Kokkos::OpenMP >( 64, 4, 1000000, 2000000 );

TestMemoryPool::test_memory_exhaustion< Kokkos::OpenMP >();
}

//----------------------------------------------------------------------------
Expand Down
2 changes: 2 additions & 0 deletions core/unit_test/TestSerial.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ TEST_F( serial , memory_pool )
ASSERT_TRUE( val );

TestMemoryPool::test_mempool2< Kokkos::Serial >( 64, 4, 1000000, 2000000 );

TestMemoryPool::test_memory_exhaustion< Kokkos::Serial >();
}

//----------------------------------------------------------------------------
Expand Down
Loading

0 comments on commit 170e66f

Please sign in to comment.