Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding heuristic based batch_mem_size allocation #1300

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion evadb/optimizer/rules/rules.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,7 +916,31 @@ def apply(self, before: LogicalLoadData, context: OptimizerContext):
)
yield after

class BatchMemSizeCalculator:
def __init__(self, context: 'OptimizerContext'):
self.context = context

def calculate_batch_mem_size(self, before: 'LogicalGet') -> int:
import psutil

# Get the available system memory
available_memory = psutil.virtual_memory().available

# Define a fraction of available memory to be used for batch processing
memory_fraction = 0.5

# If the target list is None, use a configuration value for batch_mem_size
if before.target_list is None:
batch_mem_size = self.context.db.config.get_value("executor", "batch_mem_size")
else:
# Calculate batch_mem_size based on the number of columns and available memory
num_columns = len(before.target_list)
batch_mem_size = int(available_memory * memory_fraction / num_columns)

# Ensure batch_mem_size is within a certain range, e.g., between 100 and 1000
batch_mem_size = max(100, min(batch_mem_size, 1000))
return batch_mem_size

class LogicalGetToSeqScan(Rule):
def __init__(self):
pattern = Pattern(OperatorType.LOGICALGET)
Expand All @@ -932,8 +956,13 @@ def apply(self, before: LogicalGet, context: OptimizerContext):
# Configure the batch_mem_size. It decides the number of rows
# read in a batch from storage engine.
# Todo: Experiment heuristics.

# Calculate batch_mem_size using the defined heuristic in calculate_batch_mem_size function

batch_mem_size = self.batch_mem_size_calculator.calculate_batch_mem_size(before)

# Create a SeqScanPlan for further processing
after = SeqScanPlan(None, before.target_list, before.alias)
batch_mem_size = context.db.config.get_value("executor", "batch_mem_size")
after.append_child(
StoragePlan(
before.table_obj,
Expand Down