-
Notifications
You must be signed in to change notification settings - Fork 44
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Scheduler optimization #185
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,7 +24,7 @@ | |
from temporian.implementation.numpy import evaluation as np_eval | ||
from temporian.implementation.numpy.data.event_set import EventSet | ||
from temporian.core.graph import infer_graph | ||
from temporian.core.schedule import Schedule | ||
from temporian.core.schedule import Schedule, ScheduleStep | ||
from temporian.core.operators.leak import LeakOperator | ||
|
||
EvaluationQuery = Union[ | ||
|
@@ -143,7 +143,7 @@ def run( | |
|
||
if verbose == 1: | ||
print( | ||
f"Run {len(schedule.ordered_operators)} operators", | ||
f"Run {len(schedule.steps)} operators", | ||
file=sys.stderr, | ||
) | ||
|
||
|
@@ -208,7 +208,8 @@ def build_schedule( | |
# Operators ready to be computed (i.e. ready to be added to "planned_ops") | ||
# as all their inputs are already computed by "planned_ops" or specified by | ||
# "inputs". | ||
ready_ops: Set[Operator] = set() | ||
ready_ops: List[Operator] = [] | ||
ready_ops_set: Set[Operator] = set() | ||
|
||
# "node_to_op[e]" is the list of operators with node "e" as input. | ||
node_to_op: Dict[EventSetNode, List[Operator]] = defaultdict(lambda: []) | ||
|
@@ -222,25 +223,50 @@ def build_schedule( | |
for op in graph.operators: | ||
num_pending_inputs = 0 | ||
for input_node in op.inputs.values(): | ||
node_to_op[input_node].append(op) | ||
if input_node in graph.inputs: | ||
# This input is already available | ||
continue | ||
node_to_op[input_node].append(op) | ||
num_pending_inputs += 1 | ||
if num_pending_inputs == 0: | ||
# Ready to be scheduled | ||
ready_ops.add(op) | ||
ready_ops.append(op) | ||
ready_ops_set.add(op) | ||
else: | ||
# Some of the inputs are missing. | ||
op_to_num_pending_inputs[op] = num_pending_inputs | ||
|
||
# Make evaluation order deterministic. | ||
# | ||
# Execute the op with smallest internal ordered id first. | ||
ready_ops.sort(key=lambda op: op._internal_ordered_id, reverse=True) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This only affects the ops that depend directly on the inputs (current ops in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sort ensure that the execution order of the operators is deterministic. This has not impact for the result. However, we have unit tests that check some of the internals that were affected by this non-deterministic evaluation. With this change, the unit tests are simpler :) While the results are not impacted, the order of execution could change the speed and RAM usage of executing the graph. Making the order of execution deterministic reduces the risk of flakiness in resource constraints environment. |
||
|
||
# Compute the schedule | ||
while ready_ops: | ||
# Get an op ready to be scheduled | ||
op = ready_ops.pop() | ||
ready_ops_set.remove(op) | ||
|
||
# Nodes released after the op is executed | ||
released_nodes = [] | ||
for input in op.inputs.values(): | ||
if input in outputs: | ||
continue | ||
if input not in node_to_op: | ||
continue | ||
# The list of ops that depends on this input (including the current | ||
# op "op"). | ||
input_usage = node_to_op[input] | ||
input_usage.remove(op) | ||
|
||
if not input_usage: | ||
released_nodes.append(input) | ||
del node_to_op[input] | ||
|
||
# Schedule the op | ||
schedule.ordered_operators.append(op) | ||
schedule.steps.append( | ||
ScheduleStep(op=op, released_nodes=released_nodes) | ||
) | ||
|
||
# Update all the ops that depends on "op". Enlist the ones that are | ||
# ready to be computed | ||
|
@@ -256,7 +282,8 @@ def build_schedule( | |
|
||
if num_missing_inputs == 0: | ||
# "new_op" can be computed | ||
ready_ops.add(new_op) | ||
ready_ops.append(new_op) | ||
ready_ops_set.add(new_op) | ||
del op_to_num_pending_inputs[new_op] | ||
|
||
assert not op_to_num_pending_inputs | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -72,6 +72,8 @@ def __exit__(self, exc_type, exc_val, traceback): | |
class Operator(ABC): | ||
"""Interface definition and common logic for operators.""" | ||
|
||
next_internal_id: int = 0 | ||
|
||
def __init__(self): | ||
self._inputs: Dict[str, EventSetNode] = {} | ||
self._outputs: Dict[str, EventSetNode] = {} | ||
|
@@ -81,10 +83,18 @@ def __init__(self): | |
attr.key: attr.type for attr in self._definition.attributes | ||
} | ||
|
||
# Id of the operator object such that an operator A instantiated before | ||
# an operator B will have a smaller _internal_ordered_id. | ||
# | ||
# _internal_ordered_id is used to ensure the deterministic graph | ||
# evaluation. | ||
self._internal_ordered_id = Operator.next_internal_id | ||
Operator.next_internal_id += 1 | ||
Comment on lines
+91
to
+92
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this necessary? could we instead ensure that graph.operators is always the same order? e.g. by using an ordered set instead of a set in the graph's _operators, _nodes, etc. while building the schedule (ordered set in python is a dict with no values :) ). not sure if this would suffice? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The alternative solution would be to replace some of the set-inputs in the API and intermediate functions by lists. I don't see that as ideal. The other benefit of this change is that "id" is now a small number that is easier for people to read in error messages. Ideally, I would like to update all the "ids" with this mechanism. The third benefit is that an "id()" is not guaranteed to be unique for two objects with non-overlapping lifetimes. This is not an issue for our current code, but this is a property that is not great for what we use it for. |
||
|
||
def __repr__(self): | ||
return ( | ||
f"Operator(key={self.definition().key!r}, id={id(self)!r}," | ||
f" attributes={self.attributes!r})" | ||
f"Operator(key={self.definition().key!r}," | ||
f" id={self._internal_ordered_id}, attributes={self.attributes!r})" | ||
) | ||
|
||
@property | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's safe to remove
ready_ops_set
now we gotready_ops
. I don't see its purpose.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"ready_ops" and "ready_ops_set" contain the same data. Those two containers have different properties and costs (one is a list, the other is a set). The list is great for a FILO, while the set is great to check for presence of an item.
You mean there is a situation where this code will not work? If so, can you give details?