Skip to content

Commit

Permalink
add_to_package decorator hook. (#834)
Browse files Browse the repository at this point in the history
* `add_to_package` decorator hook.
- Cherry picking diff from card branch.

* comment fix.

* supported including the same file as different name
- Fixed comment.

* Updated comment.

* updated comment with more context.

* Refactoring logic for readability/correctness.

* - Simplifying logic
- nit fixes (spaces after commas)
- lifecycle PNG updated.
  • Loading branch information
valayDave authored Dec 9, 2021
1 parent 747b86c commit 5305899
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 1 deletion.
4 changes: 3 additions & 1 deletion docs/lifecycle.dot
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ digraph Metaflow {
validate_dag [label="{graph|validate}", fillcolor=lightpink2]
init_environment [label="{environment|init_environment}", fillcolor=palegreen2]
package_init [label="{decorator|package_init}", fillcolor=lightblue2]
add_custom_package [label="{decorator|add_to_package}", fillcolor=lightblue2]
add_to_package [label="{environment|add_to_package}", fillcolor=palegreen2]
package [label="{package|create}", fillcolor=lightpink2]
}
Expand Down Expand Up @@ -148,7 +149,8 @@ digraph Metaflow {
/* package */
validate_dag -> init_environment
init_environment -> package_init
package_init -> add_to_package
package_init -> add_custom_package
add_custom_package -> add_to_package
add_to_package -> package
package -> command_run
package -> stepfunctions_create
Expand Down
Binary file modified docs/lifecycle.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
14 changes: 14 additions & 0 deletions metaflow/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,20 @@ def package_init(self, flow, step_name, environment):
"""
pass

def add_to_package(self):
"""
Called to add custom packages needed for a decorator. This hook will be
called in the `MetaflowPackage` class where metaflow compiles the code package
tarball. This hook is invoked in the `MetaflowPackage`'s `path_tuples`
function. The `path_tuples` function is a generator that yields a tuple of
`(file_path, arcname)`.`file_path` is the path of the file in the local file system;
the `arcname` is the path of the file in the constructed tarball or the path of the file
after decompressing the tarball.
Returns a list of tuples where each tuple represents (file_path, arcname)
"""
return []

def step_task_retry_count(self):
"""
Called to determine the number of times this task should be retried.
Expand Down
31 changes: 31 additions & 0 deletions metaflow/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,25 @@
from itertools import chain

from .metaflow_config import DEFAULT_PACKAGE_SUFFIXES
from .exception import MetaflowException
from .util import to_unicode
from . import R

DEFAULT_SUFFIXES_LIST = DEFAULT_PACKAGE_SUFFIXES.split(",")


class NonUniqueFileNameToFilePathMappingException(MetaflowException):
headline = "Non Unique file path for a file name included in code package"

def __init__(self, filename, file_paths, lineno=None):
msg = (
"Filename %s included in the code package includes multiple different paths for the same name : %s.\n"
"The `filename` in the `add_to_package` decorator hook requires a unqiue `file_path` to `file_name` mapping"
% (filename, ", ".join(file_paths))
)
super().__init__(msg=msg, lineno=lineno)


class MetaflowPackage(object):
def __init__(self, flow, environment, echo, suffixes=DEFAULT_SUFFIXES_LIST):
self.suffixes = list(set().union(suffixes, DEFAULT_SUFFIXES_LIST))
Expand All @@ -31,6 +44,7 @@ def __init__(self, flow, environment, echo, suffixes=DEFAULT_SUFFIXES_LIST):
)

self.flow_name = flow.name
self._flow = flow
self.create_time = time.time()
environment.init_environment(echo)
for step in flow:
Expand Down Expand Up @@ -75,6 +89,23 @@ def path_tuples(self):
addl_suffixes=self.metaflow_extensions_addl_suffixes,
):
yield path_tuple

# Any custom packages exposed via decorators
deco_module_paths = {}
for step in self._flow:
for deco in step.decorators:
for path_tuple in deco.add_to_package():
file_path, file_name = path_tuple
# Check if the path is not duplicated as
# many steps can have the same packages being imported
if file_name not in deco_module_paths:
deco_module_paths[file_name] = file_path
yield path_tuple
elif deco_module_paths[file_name] != file_path:
raise NonUniqueFileNameToFilePathMappingException(
file_name, [deco_module_paths[file_name], file_path]
)

# the package folders for environment
for path_tuple in self.environment.add_to_package():
yield path_tuple
Expand Down

0 comments on commit 5305899

Please sign in to comment.