Skip to content

Commit

Permalink
Simplified partial preemption algorithm. (#730)
Browse files Browse the repository at this point in the history
- Simplified partial preemption algorithm for groups with multiple
sequences.
- Removed dividing into single sequence and multiple sequence path.
  • Loading branch information
popovaan authored Aug 2, 2024
1 parent 97a05e1 commit 66f9d62
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 66 deletions.
47 changes: 5 additions & 42 deletions src/cpp/src/block_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,52 +251,17 @@ class BlockManager {
return m_block_table[seq_id];
}

const size_t free_rightest_blocks(SequenceGroup::Ptr sequence_group) {
size_t blocks_released = 0;
const size_t free_group_partially(SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
size_t blocks_num = std::ceil(num_required_blocks / sequence_group->get_not_finished_sequences().size());
auto running_sequences = sequence_group->get_not_finished_sequences();
std::set<size_t> blocks_released_indices;
for (size_t idx = 0; idx < running_sequences.size(); ++idx) {
auto seq_id = running_sequences[idx]->get_id();
OPENVINO_ASSERT(m_block_table.count(seq_id) > 0, "Invalid sequence group.");
auto block_table = m_block_table[seq_id];
if (free_last_block(seq_id)) {
blocks_released++;
}
}
return blocks_released;
}

const bool free_group_partially_multiple_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released, size_t& logical_blocks_released) {
phisical_blocks_released = 0;
logical_blocks_released = 0;
while (num_required_blocks > phisical_blocks_released) {
size_t released_count = free_rightest_blocks(sequence_group);
logical_blocks_released += 1;
if (get_number_of_blocks_occupied_by_sequence(sequence_group) == 0) {
break;
}
phisical_blocks_released += released_count;
free_sequence_partially(seq_id, blocks_num);
}
return num_required_blocks <= phisical_blocks_released;
}

const bool free_group_partially_single_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released) {
auto sequences = sequence_group->get_not_finished_sequences();
OPENVINO_ASSERT(sequences.size() == 1);
auto running_sequence = sequences[0];
auto seq_id = running_sequence->get_id();
if (!has_block_table(seq_id)) {
// no blocks are allocated for this sequence, so it can't be preempted
return false;
}
auto block_table = get_block_table(seq_id);
auto prev_blocks_count = num_free_blocks();
free_sequence_partially_single_runnning_sequence(seq_id, num_required_blocks);

// calculate the number of released blocks
phisical_blocks_released = num_free_blocks() - prev_blocks_count;

return num_required_blocks <= phisical_blocks_released;
return blocks_num;
}

const size_t get_number_of_blocks_occupied_by_sequence(SequenceGroup::Ptr sequence_group) {
Expand Down Expand Up @@ -399,15 +364,13 @@ class BlockManager {
return block_table[block_idx]->is_free();
}

void free_sequence_partially_single_runnning_sequence(size_t seq_id, size_t block_num) {
// this method is applicable only for groups with single sequences
void free_sequence_partially(size_t seq_id, size_t block_num) {

auto block_table = m_block_table[seq_id];
OPENVINO_ASSERT(block_table.size() >= block_num);
for (size_t idx = 0; idx < block_num; idx++) {
size_t block_idx = m_block_table[seq_id].size() - idx - 1;
m_allocator.free(block_table[block_idx]);
OPENVINO_ASSERT(block_table[block_idx]->is_free());
}
m_block_table[seq_id].resize(m_block_table[seq_id].size() - block_num);

Expand Down
29 changes: 6 additions & 23 deletions src/cpp/src/scheduler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,31 +117,14 @@ class Scheduler {
return m_block_manager.num_free_blocks() > prev_blocks_count;
}

if (num_running_sequences > 1) {
size_t phisycal_blocks_released;
size_t logical_blocks_released;
m_block_manager.free_group_partially_multiple_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released, logical_blocks_released);

// calculate the number of preempted tokens
auto tokens_in_last_block = processed_tokens % block_size;
if (tokens_in_last_block == 0) {
tokens_in_last_block = block_size;
}
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)logical_blocks_released - 1, 0) * block_size;
size_t logical_blocks_released = m_block_manager.free_group_partially(sequence_group, blocks_needed);

// calculate the number of preempted tokens
auto tokens_in_last_block = processed_tokens % block_size;
if (tokens_in_last_block == 0) {
tokens_in_last_block = block_size;
}
else {
OPENVINO_ASSERT(num_running_sequences == 1);
size_t phisycal_blocks_released;
m_block_manager.free_group_partially_single_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released);

// calculate the number of preempted tokens
auto tokens_in_last_block = processed_tokens % block_size;
if (tokens_in_last_block == 0) {
tokens_in_last_block = block_size;
}
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)phisycal_blocks_released - 1, 0) * block_size;
}
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)logical_blocks_released - 1, 0) * block_size;

// case when preemption requires preempt prompt tokens
if (!m_config.dynamic_split_fuse && processed_tokens - preempted_tokens < sequence_group->get_prompt_len()) {
Expand Down
2 changes: 1 addition & 1 deletion tests/cpp/block_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ TEST(TestBlockManager, general_test) {
EXPECT_EQ(bm.get_block_table(seq_id).size(), 6);
EXPECT_EQ(bm.num_free_blocks(), 0);

bm.free_sequence_partially_single_runnning_sequence(seq_id, 4);
bm.free_sequence_partially(seq_id, 4);
EXPECT_EQ(bm.get_block_table(seq_id).size(), 2);
EXPECT_EQ(bm.num_free_blocks(), 4);

Expand Down

0 comments on commit 66f9d62

Please sign in to comment.