From f02e8da1c894e6f384c0d4e2447e4509fe74054a Mon Sep 17 00:00:00 2001 From: Daniel <63580393+danrgll@users.noreply.github.com> Date: Mon, 4 Dec 2023 10:48:24 +0100 Subject: [PATCH] fix issue regarding tests and search space from yaml file --- docs/pipeline_space.md | 39 +++++++++++---- src/neps/search_spaces/search_space.py | 50 +++++++++++-------- .../config_including_wrong_types.yaml | 2 +- .../correct_config.yaml | 14 ++++++ .../correct_config_including_types.yaml | 19 ++++++- .../test_search_space.py | 21 ++++++++ 6 files changed, 110 insertions(+), 35 deletions(-) diff --git a/docs/pipeline_space.md b/docs/pipeline_space.md index 3eaeb57e..75148816 100644 --- a/docs/pipeline_space.md +++ b/docs/pipeline_space.md @@ -7,7 +7,6 @@ In NePS, defining the Search Space is one of two essential tasks. You can define To define the Search Space using a Python dictionary, follow these steps: - Create a Python dictionary that specifies the parameters and their respective ranges. For example: ```python @@ -15,19 +14,18 @@ search_space = { "learning_rate": neps.FloatParameter(lower=0.00001, upper=0.1, log=True), "num_epochs": neps.IntegerParameter(lower=3, upper=30, is_fidelity=True), "optimizer": neps.CategoricalParameter(choices=["adam", "sgd", "rmsprop"]), - "dropout_rate": neps.FloatParameter(value=0.5) + "dropout_rate": neps.FloatParameter(value=0.5), } - ``` ## Option 2: Using a YAML File -Create a YAML file (e.g., search_space.yaml) with the parameter definitions following this structure. +Create a YAML file (e.g., search_space.yaml) with the parameter definitions following this structure. ```yaml search_space: # important to start with learning_rate: - lower: 0.00001 + lower: 2e-3 # or 2*10^-3 upper: 0.1 log: true @@ -43,50 +41,69 @@ search_space: # important to start with value: 0.5 ... ``` + Ensure your YAML file starts with `search_space:`. This is the root key under which all parameter configurations are defined. ## Option 3: Using ConfigSpace + For users familiar with the ConfigSpace library, can also define the Search Space through ConfigurationSpace() + ```python from configspace import ConfigurationSpace, UniformFloatHyperparameter configspace = ConfigurationSpace() -configspace.add_hyperparameter(UniformFloatHyperparameter("learning_rate", 0.00001, 0.1, log=True)) +configspace.add_hyperparameter( + UniformFloatHyperparameter("learning_rate", 0.00001, 0.1, log=True) +) ``` -Link: https://github.com/automl/ConfigSpace + +For additional information on ConfigSpace and its features, please visit the following link: +https://github.com/automl/ConfigSpace # Supported HyperParameter Types ### FloatParameter and IntegerParameter + - **Expected Arguments:** - `lower`: The minimum value of the parameter. - `upper`: The maximum value of the parameter. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'int', 'integer', or 'float'. - `log`: Indicates if the parameter uses a logarithmic scale (default: False). - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - - `default`: Sets a prior central value for the parameter (default: None. + - `default`: Sets a prior central value for the parameter (default: None). - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior - should be considered default: "low". + indicating how strongly the prior + should be considered default: "low". ### Categorical Parameter + - **Expected Arguments:** - `choices`: A list of discrete options that the parameter can take. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'cat' or 'categorical'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). - `default`: Sets a prior central value for the parameter (default: None. - `default_confidence`: Specifies the confidence level of the default value, - indicating how strongly the prior + indicating how strongly the prior should be considered default: "low". ### ConstantParameter + - **Expected Arguments:** - `value`: The fixed value for the parameter. - **Optional Arguments:** + - `type`: Specifies the data type of the parameter. Accepted Values: 'const' or 'constant'. - `is_fidelity`: Marks the parameter as a fidelity parameter (default: False). # Supported ArchitectureParameter Types +**Note**: The definition of Search Space from a YAML file is limited to supporting only Hyperparameter Types. + +If you are interested in exploring Architecture, particularly Hierarchical parameters, you can find detailed examples and usage in the following resources: + +- [Basic Usage Examples](https://github.com/automl/neps/tree/master/neps_examples/basic_usage) - This link provides basic usage examples that can help you understand the fundamentals of Architecture parameters. +- [Experimental Examples](https://github.com/automl/neps/tree/master/neps_examples/experimental) - For more advanced and experimental use cases, including Hierarchical parameters, check out this collection of examples. diff --git a/src/neps/search_spaces/search_space.py b/src/neps/search_spaces/search_space.py index 1679f0ee..f7b449d9 100644 --- a/src/neps/search_spaces/search_space.py +++ b/src/neps/search_spaces/search_space.py @@ -146,18 +146,19 @@ def pipeline_space_from_yaml(yaml_file_path): if not isinstance(details["lower"], int) or not isinstance( details["upper"], int ): - # for numbers like 1e2 - details["lower"] = int( - convert_scientific_notation(details["lower"]) - ) - details["upper"] = int( - convert_scientific_notation(details["upper"]) - ) - else: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) + try: + # for numbers like 1e2 and 10^ + details["lower"] = int( + convert_scientific_notation(details["lower"]) + ) + details["upper"] = int( + convert_scientific_notation(details["upper"]) + ) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e pipeline_space[name] = IntegerParameter( lower=details["lower"], @@ -178,14 +179,19 @@ def pipeline_space_from_yaml(yaml_file_path): if not isinstance(details["lower"], float) or not isinstance( details["upper"], float ): - # for numbers like 1e-5 - details["lower"] = convert_scientific_notation(details["lower"]) - details["upper"] = convert_scientific_notation(details["upper"]) - else: - raise TypeError( - f"'lower' and 'upper' must be integer for " - f"integer parameter '{name}'." - ) + try: + # for numbers like 1e-5 and 10^ + details["lower"] = convert_scientific_notation( + details["lower"] + ) + details["upper"] = convert_scientific_notation( + details["upper"] + ) + except ValueError as e: + raise TypeError( + f"'lower' and 'upper' must be integer for " + f"integer parameter '{name}'." + ) from e pipeline_space[name] = FloatParameter( lower=details["lower"], @@ -243,7 +249,7 @@ def convert_scientific_notation(value, show_usage_flag=False): e_notation_pattern = r"^-?\d+(\.\d+)?[eE]-?\d+$" # Pattern for '10^' style notation, with optional base and multiplication symbol - ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?\*?10\^(-?\d+)$" + ten_power_notation_pattern = r"^(-?\d+)?(\.\d+)?[xX*]?10\^(-?\d+)$" if isinstance(value, str): # Remove all whitespace from the string @@ -260,7 +266,7 @@ def convert_scientific_notation(value, show_usage_flag=False): if decimal: base = base + decimal base = float(base) if base else 1 # Default to 1 if base is empty - value = base * (10 ** float(exponent)) + value = format(base * (10 ** float(exponent)), "e") if show_usage_flag is True: return float(value), True else: diff --git a/tests/test_yaml_search_space/config_including_wrong_types.yaml b/tests/test_yaml_search_space/config_including_wrong_types.yaml index 7cd7deda..69ac631c 100644 --- a/tests/test_yaml_search_space/config_including_wrong_types.yaml +++ b/tests/test_yaml_search_space/config_including_wrong_types.yaml @@ -6,7 +6,7 @@ search_space: log: true num_epochs: - type: float + type: cat lower: 3 upper: 30 is_fidelity: True diff --git a/tests/test_yaml_search_space/correct_config.yaml b/tests/test_yaml_search_space/correct_config.yaml index 48ebe477..71a5b114 100644 --- a/tests/test_yaml_search_space/correct_config.yaml +++ b/tests/test_yaml_search_space/correct_config.yaml @@ -9,6 +9,20 @@ search_space: upper: 30 log: false is_fidelity: True + batch_size: + type: int + lower: 10^2 + upper: 3 * 10^4 + log: true + is_fidelity: false + + sec_learning_rate: + lower: 3.3e-5 + upper: 1E-1 + + parameter_ex: + lower: 3.3 x 10^-5 + upper: 3.2*10^1 optimizer: choices: ["adam", "sgd", "rmsprop"] diff --git a/tests/test_yaml_search_space/correct_config_including_types.yaml b/tests/test_yaml_search_space/correct_config_including_types.yaml index 5fdb7400..751f0db1 100644 --- a/tests/test_yaml_search_space/correct_config_including_types.yaml +++ b/tests/test_yaml_search_space/correct_config_including_types.yaml @@ -6,11 +6,28 @@ search_space: log: true num_epochs: - type: int + type: integer lower: 3 upper: 30 is_fidelity: True + batch_size: + type: "int" + lower: 10^2 + upper: 3 * 10^4 + log: true + is_fidelity: false + + sec_learning_rate: + type: "float" + lower: 3.3e-5 + upper: 1E-1 + + parameter_ex: + type: float + lower: 3.3 x 10^-5 + upper: 3.2*10^1 + optimizer: type: cat choices: ["adam", "sgd", "rmsprop"] diff --git a/tests/test_yaml_search_space/test_search_space.py b/tests/test_yaml_search_space/test_search_space.py index 7bfbcf70..4ec7814c 100644 --- a/tests/test_yaml_search_space/test_search_space.py +++ b/tests/test_yaml_search_space/test_search_space.py @@ -27,6 +27,27 @@ def test_correct_yaml_file(path): assert pipeline_space["num_epochs"].is_fidelity is True assert pipeline_space["num_epochs"].default is None assert pipeline_space["num_epochs"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["batch_size"], IntegerParameter) + assert pipeline_space["batch_size"].lower == 100 + assert pipeline_space["batch_size"].upper == 30000 + assert pipeline_space["batch_size"].log is True + assert pipeline_space["batch_size"].is_fidelity is False + assert pipeline_space["batch_size"].default is None + assert pipeline_space["batch_size"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["sec_learning_rate"], FloatParameter) + assert pipeline_space["sec_learning_rate"].lower == 3.3e-5 + assert pipeline_space["sec_learning_rate"].upper == 0.1 + assert pipeline_space["sec_learning_rate"].log is False + assert pipeline_space["sec_learning_rate"].is_fidelity is False + assert pipeline_space["sec_learning_rate"].default is None + assert pipeline_space["sec_learning_rate"].default_confidence_score == 0.5 + assert isinstance(pipeline_space["parameter_ex"], FloatParameter) + assert pipeline_space["parameter_ex"].lower == 3.3e-5 + assert pipeline_space["parameter_ex"].upper == 32.0 + assert pipeline_space["parameter_ex"].log is False + assert pipeline_space["parameter_ex"].is_fidelity is False + assert pipeline_space["parameter_ex"].default is None + assert pipeline_space["parameter_ex"].default_confidence_score == 0.5 assert isinstance(pipeline_space["optimizer"], CategoricalParameter) assert pipeline_space["optimizer"].choices == ["adam", "sgd", "rmsprop"] assert pipeline_space["optimizer"].is_fidelity is False