Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check ckpt dir exists before checking for metadata file in legacy sharding check #2835

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
9 changes: 9 additions & 0 deletions composer/utils/checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,9 +342,17 @@ def format(self, state: State, is_deepspeed: bool = False, keep_placeholders: bo
def is_checkpoint_legacy_sharded(object_store: Optional[ObjectStore], source_path: str):
metadata_path = str(Path(source_path) / Path('.metadata'))
if object_store is None:
# If directory doesn't exist, then it's not any checkpoint.
if os.path.exists(source_path):
raise FileNotFoundError(f"Couldn't find the directory {source_path}")
mvpatel2000 marked this conversation as resolved.
Show resolved Hide resolved
# If directory does exist, but metadata file doesn't, then it's a legacy checkpoint.
return not os.path.exists(metadata_path)
else:
# If prefix doesn't exist, then it's not any checkpoint.
if len(object_store.list_objects(prefix=source_path)) == 0:
raise FileNotFoundError(f"Couldn't find the prefix {object_store.get_uri(object_name=source_path)}")
try:
# If prefix does exist, but metadata file doesn't, then it's a legacy checkpoint.
with tempfile.TemporaryDirectory() as temp_dir:
metadata_destination = os.path.join(str(temp_dir), '.metadata')
if isinstance(object_store, ObjectStore):
Expand All @@ -358,6 +366,7 @@ def is_checkpoint_legacy_sharded(object_store: Optional[ObjectStore], source_pat
destination=metadata_destination,
)
return False
# If prefix does exist, and metadata file does exist, then it's a new non-legacy checkpoint.
except FileNotFoundError:
return True

Expand Down
Loading