Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix an issue introduced in 2.8.4 that prevented pickled MetaflowObjec… #1392

Merged
merged 3 commits into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions metaflow/client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,12 @@ def __setstate__(self, state):
self._UNPICKLE_FUNC[version](self, state["data"])
else:
# For backward compatibility: handles pickled objects that were serialized without a __getstate__ override
# We set namespace_check to False if it doesn't exist for the same
# reason as the one listed in __getstate__
self.__init__(
pathspec=state.get("_pathspec", None),
attempt=state.get("_attempt", None),
_namespace_check=state.get("_namespace_check", True),
_namespace_check=state.get("_namespace_check", False),
)

def __getstate__(self):
Expand All @@ -531,12 +533,16 @@ def __getstate__(self):
from this object) are pickled (serialized) in a later version of Metaflow, it may not be possible
to unpickle (deserialize) them in a previous version of Metaflow.
"""
# Note that we set _namespace_check to False because we want the user to
# be able to access this object even after unpickling it. If we set it to
# True, it would check the namespace again at the time of unpickling even
# if the user properly got the object in the first place and pickled it.
return {
"version": "2.8.4",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this will still be a problem with older versions that do not understand this new dictionary format. how do we handle that case? One option is to not use this custom dictionary format using a version

"data": [
self.pathspec,
self._attempt,
self._namespace_check,
False,
],
}

Expand Down
18 changes: 17 additions & 1 deletion test/core/tests/current_singleton.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ def step_start(self):
self.task_data = {current.pathspec: self.uuid}
self.tags = current.tags
self.task_obj = current.task
self.run_obj = current.run

@steps(1, ["join"])
def step_join(self):
Expand Down Expand Up @@ -72,6 +73,7 @@ def step_join(self):
self.task_data[current.pathspec] = self.uuid
self.tags.update(current.tags)
self.task_obj = current.task
self.run_obj = current.run

@steps(2, ["all"])
def step_all(self):
Expand All @@ -97,6 +99,9 @@ def step_all(self):

def check_results(self, flow, checker):
run = checker.get_run()
from metaflow import get_namespace

checker_namespace = get_namespace()
if run is None:
# very basic sanity check for CLI
for step in flow:
Expand All @@ -105,17 +110,28 @@ def check_results(self, flow, checker):
step.name, "project_names", {"current_singleton"}
)
else:
from metaflow import Task
from metaflow import Task, namespace

task_data = run.data.task_data
for pathspec, uuid in task_data.items():
assert_equals(Task(pathspec).data.uuid, uuid)

# Override the namespace for the pickling/unpickling checks
namespace("non-existent-namespace-to-test-namespacecheck")
for step in run:
for task in step:
assert_equals(task.data.step_name, step.id)
pathspec = "/".join(task.pathspec.split("/")[-4:])
assert_equals(task.data.uuid, task_data[pathspec])
assert_equals(task.data.task_obj.pathspec, task.pathspec)
# Check we can go up and down pickled objects even in a different
# namespace
assert_equals(task.data.parent.parent.id, task.data.run_obj.id)
assert_equals(
task.data.run_obj[task.data.step_name].id, task.data.step_name
)
# Restore the original namespace back for these tests
namespace(checker_namespace)
assert_equals(run.data.run_obj.pathspec, run.pathspec)
assert_equals(run.data.project_names, {"current_singleton"})
assert_equals(run.data.branch_names, {"user.tester"})
Expand Down