Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix for 'upload_to_hf_hub()' path mismatch with 'save()' #3977

Merged
merged 16 commits into from
Mar 24, 2024
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
a4b9b3d
fix for 'upload_to_hf_hub()' path mismatch with 'save()'
sanjaydasgupta Mar 22, 2024
2f80119
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 22, 2024
e11a52c
added one unit test 'test_upload_to_hf_hub__validate_upload_parameters2'
sanjaydasgupta Mar 22, 2024
b9f3819
Merge remote-tracking branch 'origin/upload-hf-issue-3925' into uploa…
sanjaydasgupta Mar 22, 2024
7257808
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 22, 2024
f749c0c
passed additional argument 'model_path_is_experiment_path=False' to '…
sanjaydasgupta Mar 22, 2024
5c58b4c
always normalize any user-provided path to the experiment-path, modif…
sanjaydasgupta Mar 23, 2024
fa92d13
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 23, 2024
c56c42f
The unit test added earlier is no longer correct or relevant
sanjaydasgupta Mar 23, 2024
a62b1f4
Merge remote-tracking branch 'origin/upload-hf-issue-3925' into uploa…
sanjaydasgupta Mar 23, 2024
8cea3e2
minor docstring edit - just to kick off CI
sanjaydasgupta Mar 23, 2024
c68132e
updated the docstrings for 'model_weight' and other minor code changes
sanjaydasgupta Mar 23, 2024
8b072d2
used global literals 'MODEL_FILE_NAME' and 'MODEL_WEIGHTS_FILE_NAME' …
sanjaydasgupta Mar 23, 2024
c632b52
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Mar 23, 2024
43656be
Merge branch 'ludwig-ai:master' into upload-hf-issue-3925
sanjaydasgupta Mar 24, 2024
aa1750e
fixed a few formatting errors within docstrings
sanjaydasgupta Mar 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 28 additions & 12 deletions ludwig/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@
from ludwig.features.feature_registries import update_config_with_metadata, update_config_with_model
from ludwig.globals import (
LUDWIG_VERSION,
MODEL_FILE_NAME,
MODEL_HYPERPARAMETERS_FILE_NAME,
MODEL_WEIGHTS_FILE_NAME,
set_disable_progressbar,
TRAIN_SET_METADATA_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
Expand Down Expand Up @@ -110,6 +112,7 @@
from ludwig.utils.torch_utils import DEVICE
from ludwig.utils.trainer_utils import get_training_report
from ludwig.utils.types import DataFrame, TorchDevice
from ludwig.utils.upload_utils import HuggingFaceHub

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -1938,35 +1941,48 @@ def upload_to_hf_hub(

# Inputs

:param repo_id (`str`):
:param repo_id: (`str`)
A namespace (user or an organization) and a repo name separated
by a `/`.
:param model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
:param private (`bool`, *optional*, defaults to `False`):
:param model_path: (`str`)
The path of the saved model. This is either (a) the folder where
the 'model_weights' folder and the 'model_hyperparameters.json' file
are stored, or (b) the parent of that folder.
:param private: (`bool`, *optional*, defaults to `False`)
Whether the model repo should be private.
:param repo_type (`str`, *optional*):
:param repo_type: (`str`, *optional*)
Set to `"dataset"` or `"space"` if uploading to a dataset or
space, `None` or `"model"` if uploading to a model. Default is
`None`.
:param commit_message (`str`, *optional*):
:param commit_message: (`str`, *optional*)
The summary / title / first line of the generated commit. Defaults to:
`f"Upload {path_in_repo} with huggingface_hub"`
:param commit_description (`str` *optional*):
:param commit_description: (`str` *optional*)
The description of the generated commit

# Returns

:return: (bool) True for success, False for failure.
"""
if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = model_path
elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = os.path.dirname(model_path)
else:
raise ValueError(
f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at "
f"'{model_path}' or at '{model_path}/model'"
)
model_service = get_upload_registry()["hf_hub"]
hub = model_service()
hub: HuggingFaceHub = model_service()
hub.login()
upload_status = hub.upload(
upload_status: bool = hub.upload(
repo_id=repo_id,
model_path=model_path,
model_path=experiment_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
Expand Down
6 changes: 3 additions & 3 deletions ludwig/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from ludwig.contrib import add_contrib_callback_args
from ludwig.data.cache.types import CacheableDataset
from ludwig.datasets import load_dataset_uris
from ludwig.globals import LUDWIG_VERSION
from ludwig.globals import LUDWIG_VERSION, MODEL_FILE_NAME
from ludwig.hyperopt.run import hyperopt
from ludwig.schema.model_config import ModelConfig
from ludwig.types import ModelConfigDict
Expand Down Expand Up @@ -101,10 +101,10 @@ def best_model(self) -> Optional[LudwigModel]:
# Read remote URIs using Ludwig's internal remote file loading APIs, as
# Ray's do not handle custom credentials at the moment.
with use_credentials(self._creds):
return LudwigModel.load(os.path.join(ckpt_path, "model"))
return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))
else:
with checkpoint.as_directory() as ckpt_path:
return LudwigModel.load(os.path.join(ckpt_path, "model"))
return LudwigModel.load(os.path.join(ckpt_path, MODEL_FILE_NAME))


@PublicAPI
Expand Down
11 changes: 8 additions & 3 deletions ludwig/benchmarking/artifacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
from typing import Any, Dict

from ludwig.globals import MODEL_FILE_NAME
from ludwig.types import ModelConfigDict, TrainingSetMetadataDict
from ludwig.utils.data_utils import load_json, load_yaml

Expand Down Expand Up @@ -55,7 +56,11 @@ def build_benchmarking_result(benchmarking_config: dict, experiment_idx: int):
description=load_json(os.path.join(experiment_run_path, "description.json")),
test_statistics=load_json(os.path.join(experiment_run_path, "test_statistics.json")),
training_statistics=load_json(os.path.join(experiment_run_path, "training_statistics.json")),
model_hyperparameters=load_json(os.path.join(experiment_run_path, "model", "model_hyperparameters.json")),
training_progress=load_json(os.path.join(experiment_run_path, "model", "training_progress.json")),
training_set_metadata=load_json(os.path.join(experiment_run_path, "model", "training_set_metadata.json")),
model_hyperparameters=load_json(
os.path.join(experiment_run_path, MODEL_FILE_NAME, "model_hyperparameters.json")
),
training_progress=load_json(os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_progress.json")),
training_set_metadata=load_json(
os.path.join(experiment_run_path, MODEL_FILE_NAME, "training_set_metadata.json")
),
)
4 changes: 2 additions & 2 deletions ludwig/benchmarking/summary_dataclasses.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import ludwig.modules.metric_modules # noqa: F401
from ludwig.benchmarking.utils import format_memory, format_time
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.modules.metric_registry import get_metric_classes, metric_feature_type_registry # noqa: F401
from ludwig.types import ModelConfigDict
from ludwig.utils.data_utils import load_json
Expand Down Expand Up @@ -209,7 +209,7 @@ def build_metrics_summary(experiment_local_directory: str) -> MetricsSummary:
e.g. local_experiment_repo/ames_housing/some_experiment/
"""
config = load_json(
os.path.join(experiment_local_directory, "experiment_run", "model", MODEL_HYPERPARAMETERS_FILE_NAME)
os.path.join(experiment_local_directory, "experiment_run", MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
)
report = load_json(os.path.join(experiment_local_directory, "experiment_run", "test_statistics.json"))
output_feature_type: str = config["output_features"][0]["type"]
Expand Down
8 changes: 4 additions & 4 deletions ludwig/benchmarking/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from ludwig.constants import BINARY, CATEGORY
from ludwig.datasets import model_configs_for_dataset
from ludwig.datasets.loaders.dataset_loader import DatasetLoader
from ludwig.globals import CONFIG_YAML
from ludwig.globals import CONFIG_YAML, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME
from ludwig.utils.data_utils import load_yaml
from ludwig.utils.dataset_utils import get_repeatable_train_val_test_split
from ludwig.utils.defaults import default_random_seed
Expand Down Expand Up @@ -251,9 +251,9 @@ def delete_model_checkpoints(output_directory: str):
Args:
output_directory: output directory of the hyperopt run.
"""
shutil.rmtree(os.path.join(output_directory, "model", "training_checkpoints"), ignore_errors=True)
if os.path.isfile(os.path.join(output_directory, "model", "model_weights")):
os.remove(os.path.join(output_directory, "model", "model_weights"))
shutil.rmtree(os.path.join(output_directory, MODEL_FILE_NAME, "training_checkpoints"), ignore_errors=True)
if os.path.isfile(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)):
os.remove(os.path.join(output_directory, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME))


def delete_hyperopt_outputs(output_directory: str):
Expand Down
4 changes: 2 additions & 2 deletions ludwig/contribs/mlflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from ludwig.api_annotations import DeveloperAPI, PublicAPI
from ludwig.callbacks import Callback
from ludwig.constants import TRAINER
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, TRAIN_SET_METADATA_FILE_NAME
from ludwig.types import TrainingSetMetadataDict
from ludwig.utils.data_utils import chunk_dict, flatten_dict, save_json, to_json_dict
from ludwig.utils.package_utils import LazyLoader
Expand Down Expand Up @@ -258,7 +258,7 @@ def _log_mlflow(log_metrics, steps, save_path, should_continue, log_artifacts: b
def _log_artifacts(output_directory):
for fname in os.listdir(output_directory):
lpath = os.path.join(output_directory, fname)
if fname == "model":
if fname == MODEL_FILE_NAME:
_log_model(lpath)
else:
mlflow.log_artifact(lpath)
Expand Down
8 changes: 4 additions & 4 deletions ludwig/contribs/mlflow/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from mlflow.utils.model_utils import _get_flavor_configuration

from ludwig.api_annotations import DeveloperAPI
from ludwig.globals import MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME
from ludwig.utils.data_utils import load_json

FLAVOR_NAME = "ludwig"
Expand Down Expand Up @@ -97,7 +97,7 @@ def save_model(
path = os.path.abspath(path)
if os.path.exists(path):
raise MlflowException(f"Path '{path}' already exists")
model_data_subpath = "model"
model_data_subpath = MODEL_FILE_NAME
model_data_path = os.path.join(path, model_data_subpath)
os.makedirs(path)
if mlflow_model is None:
Expand Down Expand Up @@ -267,7 +267,7 @@ def export_model(model_path, output_path, registered_model_name=None):
if not model_path.startswith("runs:/") or output_path is not None:
# No run specified, so in order to register the model in mlflow, we need
# to create a new run and upload the model as an artifact first
output_path = output_path or "model"
output_path = output_path or MODEL_FILE_NAME
log_model(
_CopyModel(model_path),
artifact_path=output_path,
Expand Down Expand Up @@ -295,7 +295,7 @@ def log_saved_model(lpath):
"""
log_model(
_CopyModel(lpath),
artifact_path="model",
artifact_path=MODEL_FILE_NAME,
)


Expand Down
1 change: 1 addition & 0 deletions ludwig/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

LUDWIG_VERSION = "0.10.1.dev"

MODEL_FILE_NAME = "model"
MODEL_WEIGHTS_FILE_NAME = "model_weights"
MODEL_HYPERPARAMETERS_FILE_NAME = "model_hyperparameters.json"
TRAIN_SET_METADATA_FILE_NAME = "training_set_metadata.json"
Expand Down
7 changes: 4 additions & 3 deletions ludwig/hyperopt/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from ludwig.backend.ray import initialize_ray
from ludwig.callbacks import Callback
from ludwig.constants import MAXIMIZE, TEST, TRAINER, TRAINING, TYPE, VALIDATION
from ludwig.globals import MODEL_FILE_NAME
from ludwig.hyperopt.results import HyperoptResults, TrialResults
from ludwig.hyperopt.syncer import RemoteSyncer
from ludwig.hyperopt.utils import load_json_values, substitute_parameters
Expand Down Expand Up @@ -125,7 +126,7 @@ def ignore_dot_files(src, files):
return [f for f in files if f.startswith(".")]

with tune.checkpoint_dir(step=progress_tracker.tune_checkpoint_num) as checkpoint_dir:
checkpoint_model = os.path.join(checkpoint_dir, "model")
checkpoint_model = os.path.join(checkpoint_dir, MODEL_FILE_NAME)
# Atomic copying of the checkpoints
if not os.path.isdir(checkpoint_model):
copy_id = uuid.uuid4()
Expand Down Expand Up @@ -411,7 +412,7 @@ def _evaluate_best_model(
debug,
):
best_model = LudwigModel.load(
os.path.join(best_model_path, "model"),
os.path.join(best_model_path, MODEL_FILE_NAME),
backend=backend,
gpus=gpus,
gpu_memory_limit=gpu_memory_limit,
Expand Down Expand Up @@ -549,7 +550,7 @@ def on_trainer_train_setup(self, trainer, save_path, is_coordinator):
os.rename(save_path, tmp_path)

try:
safe_move_file(os.path.join(ckpt_path, "model"), save_path)
safe_move_file(os.path.join(ckpt_path, MODEL_FILE_NAME), save_path)
except Exception:
# Rollback from partial changes. Remove the save_path
# and move the original save_path back.
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/base.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
from abc import ABC, abstractmethod

from ludwig.data.dataset.base import Dataset
from ludwig.globals import MODEL_FILE_NAME
from ludwig.schema.trainer import BaseTrainerConfig
from ludwig.types import ModelConfigDict
from ludwig.utils.defaults import default_random_seed


class BaseTrainer(ABC):
@abstractmethod
def train(self, training_set, validation_set=None, test_set=None, save_path="model", **kwargs):
def train(self, training_set, validation_set=None, test_set=None, save_path=MODEL_FILE_NAME, **kwargs):
raise NotImplementedError()

@abstractmethod
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.globals import (
is_progressbar_disabled,
MODEL_FILE_NAME,
MODEL_HYPERPARAMETERS_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
TRAINING_PROGRESS_TRACKER_FILE_NAME,
Expand Down Expand Up @@ -831,7 +832,7 @@ def train(
training_set,
validation_set=None,
test_set=None,
save_path="model",
save_path=MODEL_FILE_NAME,
return_state_dict: bool = False,
**kwargs,
):
Expand Down
9 changes: 7 additions & 2 deletions ludwig/trainers/trainer_lightgbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,12 @@
from ludwig.distributed import init_dist_strategy
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import is_progressbar_disabled, TRAINING_CHECKPOINTS_DIR_PATH, TRAINING_PROGRESS_TRACKER_FILE_NAME
from ludwig.globals import (
is_progressbar_disabled,
MODEL_FILE_NAME,
TRAINING_CHECKPOINTS_DIR_PATH,
TRAINING_PROGRESS_TRACKER_FILE_NAME,
)
from ludwig.models.gbm import GBM
from ludwig.models.predictor import Predictor
from ludwig.modules.metric_modules import get_improved_fn, get_initial_validation_value
Expand Down Expand Up @@ -562,7 +567,7 @@ def train(
training_set: Union["Dataset", "RayDataset"], # noqa: F821
validation_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821
test_set: Optional[Union["Dataset", "RayDataset"]], # noqa: F821
save_path="model",
save_path=MODEL_FILE_NAME,
**kwargs,
):
# ====== General setup =======
Expand Down
3 changes: 2 additions & 1 deletion ludwig/trainers/trainer_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from ludwig.data.dataset.base import Dataset
from ludwig.distributed.base import DistributedStrategy, LocalStrategy
from ludwig.features.feature_utils import LudwigFeatureDict
from ludwig.globals import MODEL_FILE_NAME
from ludwig.models.llm import LLM
from ludwig.models.predictor import LlmFineTunePredictor, LlmPredictor
from ludwig.modules.metric_modules import get_initial_validation_value
Expand Down Expand Up @@ -140,7 +141,7 @@ def train(
training_set: Dataset,
validation_set: Optional[Dataset] = None,
test_set: Optional[Dataset] = None,
save_path: str = "model",
save_path: str = MODEL_FILE_NAME,
return_state_dict: bool = False,
**kwargs,
):
Expand Down
25 changes: 20 additions & 5 deletions ludwig/upload.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import argparse
import logging
import os
import sys
from typing import Optional

from ludwig.globals import MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME, MODEL_WEIGHTS_FILE_NAME
from ludwig.utils.print_utils import get_logging_level_registry
from ludwig.utils.upload_utils import HuggingFaceHub, Predibase

Expand Down Expand Up @@ -38,9 +40,9 @@ def upload_cli(
A namespace (user or an organization) and a repo name separated
by a `/`.
model_path (`str`):
The path of the saved model. This is the top level directory where
the models weights as well as other associated training artifacts
are saved.
The path of the saved model. This is the parent-folder of the folder
where the 'model_weights' folder and the 'model_hyperparameters.json' file
are stored.
private (`bool`, *optional*, defaults to `False`):
Whether the model repo should be private.
repo_type (`str`, *optional*):
Expand All @@ -60,10 +62,23 @@ def upload_cli(
`"predibase"`.
"""
model_service = get_upload_registry().get(service, "hf_hub")
hub = model_service()
hub: HuggingFaceHub = model_service()
if os.path.exists(os.path.join(model_path, MODEL_FILE_NAME, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_FILE_NAME, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = model_path
elif os.path.exists(os.path.join(model_path, MODEL_WEIGHTS_FILE_NAME)) and os.path.exists(
os.path.join(model_path, MODEL_HYPERPARAMETERS_FILE_NAME)
):
experiment_path = os.path.normpath(os.path.join(model_path, ".."))
else:
raise ValueError(
f"Can't find 'model_weights' and '{MODEL_HYPERPARAMETERS_FILE_NAME}' either at "
f"'{model_path}' or at '{model_path}/model'"
)
hub.upload(
repo_id=repo_id,
model_path=model_path,
model_path=experiment_path,
repo_type=repo_type,
private=private,
commit_message=commit_message,
Expand Down
Loading
Loading