Skip to content

Commit

Permalink
Refactor non-distributed automl utils into a separate directory. (#2296)
Browse files Browse the repository at this point in the history
  • Loading branch information
justinxzhao authored Jul 22, 2022
1 parent 987626f commit b8a4cbe
Show file tree
Hide file tree
Showing 12 changed files with 18 additions and 12 deletions.
1 change: 1 addition & 0 deletions ludwig/automl/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from ludwig.automl.automl import auto_train, cli_init_config, create_auto_config, train_with_config # noqa
2 changes: 1 addition & 1 deletion ludwig/automl/auto_tune_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
raise ImportError(" ray is not installed. In order to use auto_train please run pip install ludwig[ray]")

from ludwig.api import LudwigModel
from ludwig.automl.utils import get_model_type
from ludwig.constants import (
AUTOML_DEFAULT_TEXT_ENCODER,
AUTOML_LARGE_TEXT_DATASET,
Expand All @@ -29,6 +28,7 @@
)
from ludwig.data.preprocessing import preprocess_for_training
from ludwig.features.feature_registries import update_config_with_metadata
from ludwig.utils.automl.utils import get_model_type
from ludwig.utils.defaults import merge_with_defaults
from ludwig.utils.torch_utils import initialize_pytorch

Expand Down
9 changes: 7 additions & 2 deletions ludwig/automl/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
from ludwig.api import LudwigModel
from ludwig.automl.auto_tune_config import memory_tune_config
from ludwig.automl.base_config import _create_default_config, _get_reference_configs, DatasetInfo, get_dataset_info
from ludwig.automl.ray_utils import _ray_init, get_available_resources
from ludwig.automl.utils import _add_transfer_config, get_model_type, has_imbalanced_output, set_output_feature_metric
from ludwig.constants import (
AUTOML_DEFAULT_IMAGE_ENCODER,
AUTOML_DEFAULT_TABULAR_MODEL,
Expand All @@ -34,6 +32,13 @@
from ludwig.contrib import add_contrib_callback_args
from ludwig.globals import LUDWIG_VERSION
from ludwig.hyperopt.run import hyperopt
from ludwig.utils.automl.ray_utils import _ray_init, get_available_resources
from ludwig.utils.automl.utils import (
_add_transfer_config,
get_model_type,
has_imbalanced_output,
set_output_feature_metric,
)
from ludwig.utils.defaults import default_random_seed
from ludwig.utils.misc_utils import merge_dict
from ludwig.utils.print_utils import print_ludwig
Expand Down
8 changes: 4 additions & 4 deletions ludwig/automl/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
import pandas as pd
from dataclasses_json import dataclass_json, LetterCase

from ludwig.automl.data_source import DataframeSource, DataSource
from ludwig.automl.field_info import FieldConfig, FieldInfo, FieldMetadata
from ludwig.automl.ray_utils import _ray_init, get_available_resources
from ludwig.automl.type_inference import infer_type, should_exclude
from ludwig.constants import COMBINER, EXECUTOR, HYPEROPT, SCHEDULER, SEARCH_ALG, TEXT, TYPE
from ludwig.utils.automl.data_source import DataframeSource, DataSource
from ludwig.utils.automl.field_info import FieldConfig, FieldInfo, FieldMetadata
from ludwig.utils.automl.ray_utils import _ray_init, get_available_resources
from ludwig.utils.automl.type_inference import infer_type, should_exclude
from ludwig.utils.data_utils import load_dataset, load_yaml
from ludwig.utils.defaults import default_random_seed

Expand Down
Empty file added ludwig/utils/automl/__init__.py
Empty file.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from abc import ABC, abstractmethod
from typing import List, Tuple

from ludwig.automl.utils import avg_num_tokens
from ludwig.utils.audio_utils import is_audio_score
from ludwig.utils.automl.utils import avg_num_tokens
from ludwig.utils.image_utils import is_image_score
from ludwig.utils.types import DataFrame

Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Set

from ludwig.automl.field_info import FieldInfo
from ludwig.constants import AUDIO, BINARY, CATEGORY, DATE, IMAGE, NUMBER, TEXT
from ludwig.utils import strings_utils
from ludwig.utils.automl.field_info import FieldInfo

# For a given feature, the highest percentage of distinct values out of the total number of rows that we might still
# assign the CATEGORY type.
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

import pytest

from ludwig.automl.field_info import FieldInfo
from ludwig.automl.type_inference import infer_type, should_exclude
from ludwig.constants import AUDIO, BINARY, CATEGORY, DATE, IMAGE, NUMBER, TEXT
from ludwig.data.dataset_synthesizer import generate_string
from ludwig.utils.automl.field_info import FieldInfo
from ludwig.utils.automl.type_inference import infer_type, should_exclude

ROW_COUNT = 100
TARGET_NAME = "target"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import pandas as pd
import pytest

from ludwig.automl.utils import avg_num_tokens
from ludwig.utils.automl.utils import avg_num_tokens


@pytest.mark.parametrize("field,expected", [(pd.Series([None]), 0), (pd.Series(["string1", "string2", "string3"]), 1)])
Expand Down

0 comments on commit b8a4cbe

Please sign in to comment.