Skip to content

Commit

Permalink
Store mlflow tracking URI to ensure consistency across processes (#2927)
Browse files Browse the repository at this point in the history
  • Loading branch information
tgaddair authored Jan 13, 2023
1 parent dd2123d commit f433b89
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
2 changes: 1 addition & 1 deletion ludwig/contribs/mlflow/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class MlflowCallback(Callback):
def __init__(self, tracking_uri=None, log_artifacts: bool = True):
if tracking_uri:
mlflow.set_tracking_uri(tracking_uri)
self.tracking_uri = mlflow.get_tracking_uri()

active_run = mlflow.active_run()
if active_run is not None:
Expand All @@ -56,7 +57,6 @@ def __init__(self, tracking_uri=None, log_artifacts: bool = True):
self.external_run = False

self.run_ended = False
self.tracking_uri = tracking_uri
self.training_set_metadata = None
self.config = None
self.save_in_background = True
Expand Down
26 changes: 26 additions & 0 deletions tests/integration_tests/test_mlflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,3 +170,29 @@ def test_export_mlflow_local(tmpdir):
output_path = os.path.join(tmpdir, "data/results/mlflow")
export_mlflow(model_path, output_path)
assert set(os.listdir(output_path)) == {"MLmodel", "model", "conda.yaml"}


@pytest.mark.distributed
def test_mlflow_ray(tmpdir, ray_cluster_2cpu):
epochs = 2
batch_size = 8
num_examples = 32

input_features = [sequence_feature(reduce_output="sum")]
output_features = [category_feature(vocab_size=2, reduce_input="sum", output_feature=True)]

config = {
"input_features": input_features,
"output_features": output_features,
"combiner": {"type": "concat", "output_size": 14},
TRAINER: {"epochs": epochs, "batch_size": batch_size},
}

data_csv = generate_data(
input_features, output_features, os.path.join(tmpdir, "train.csv"), num_examples=num_examples
)

exp_name = "mlflow_test"
output_dir = os.path.join(tmpdir, "output")
model = LudwigModel(config, callbacks=[MlflowCallback()], backend="ray")
_, _, output_directory = model.train(training_set=data_csv, experiment_name=exp_name, output_directory=output_dir)

0 comments on commit f433b89

Please sign in to comment.