-
Notifications
You must be signed in to change notification settings - Fork 442
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding random suggestion algorithm implementation and manifests #540
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM python:3 | ||
|
||
ADD . /usr/src/app/github.com/kubeflow/katib | ||
WORKDIR /usr/src/app/github.com/kubeflow/katib/cmd/suggestion/random/v1alpha2 | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
ENV PYTHONPATH /usr/src/app/github.com/kubeflow/katib:/usr/src/app/github.com/kubeflow/katib/pkg/api/v1alpha2/python | ||
|
||
ENTRYPOINT ["python", "main.py"] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import grpc | ||
import time | ||
from pkg.api.v1alpha2.python import api_pb2_grpc | ||
from pkg.suggestion.v1alpha2.random_service import RandomService | ||
from concurrent import futures | ||
|
||
_ONE_DAY_IN_SECONDS = 60 * 60 * 24 | ||
DEFAULT_PORT = "0.0.0.0:6789" | ||
|
||
def serve(): | ||
server = grpc.server(futures.ThreadPoolExecutor(max_workers=10)) | ||
api_pb2_grpc.add_SuggestionServicer_to_server(RandomService(), server) | ||
server.add_insecure_port(DEFAULT_PORT) | ||
print("Listening...") | ||
server.start() | ||
try: | ||
while True: | ||
time.sleep(_ONE_DAY_IN_SECONDS) | ||
except KeyboardInterrupt: | ||
server.stop(0) | ||
|
||
if __name__ == "__main__": | ||
serve() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
grpcio | ||
duecredit | ||
cloudpickle==0.5.6 | ||
numpy>=1.13.3 | ||
scikit-learn>=0.19.0 | ||
scipy>=0.19.1 | ||
forestci | ||
protobuf | ||
googleapis-common-protos |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
apiVersion: extensions/v1beta1 | ||
kind: Deployment | ||
metadata: | ||
name: katib-suggestion-random | ||
namespace: kubeflow | ||
labels: | ||
app: katib | ||
component: suggestion-random | ||
spec: | ||
replicas: 1 | ||
template: | ||
metadata: | ||
name: katib-suggestion-random | ||
labels: | ||
app: katib | ||
component: suggestion-random | ||
spec: | ||
containers: | ||
- name: katib-suggestion-random | ||
image: katib/suggestion-random | ||
ports: | ||
- name: api | ||
containerPort: 6789 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: katib-suggestion-random | ||
namespace: kubeflow | ||
labels: | ||
app: katib | ||
component: suggestion-random | ||
spec: | ||
type: ClusterIP | ||
ports: | ||
- port: 6789 | ||
protocol: TCP | ||
name: api | ||
selector: | ||
app: katib | ||
component: suggestion-random |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import numpy as np | ||
from sklearn.preprocessing import MinMaxScaler | ||
|
||
class ParameterConfig: | ||
""" | ||
Class to hold the parameter configuration for an experiment. | ||
|
||
Attributes: | ||
name_ids (dict): Mapping from a parameter name to the index of that | ||
parameter in the other fields. | ||
dim (int): Dimension of the vectors created when parameter assignments | ||
are mapped to a vector. Each int, double, or discrete parameter | ||
adds one to the dimension, and each categorical parameter adds | ||
the number of feasible values for that parameter due to one-hot | ||
encoding. | ||
lower_bounds (ndarray): The lower bounds for each parameter in the | ||
search space. | ||
upper_bounds (ndarray): The lower bounds for each parameter in the | ||
search space. | ||
parameter_types (list): The type of each parameter. | ||
names (list): The name of each parameter. | ||
discrete_info (list): A list of dicts where each dict contains the | ||
information for a single discrete parameter. An example of a dict | ||
is {"name": "discrete_parameter, "values": [2, 3, 5]}] | ||
categorical_info (list): A list of dicts where each dict contains the | ||
information for a single categorical parameter. An example dict is | ||
{"name": "cat_param", "values": ["true", "false"], "number": 2}. | ||
""" | ||
|
||
def __init__(self, name_ids, dim, lower_bounds, upper_bounds, | ||
parameter_types, names, discrete_info, categorical_info): | ||
self.name_ids = name_ids | ||
self.dim = dim | ||
self.lower_bounds = np.array(lower_bounds).reshape((1, dim)) | ||
self.upper_bounds = np.array(upper_bounds).reshape((1, dim)) | ||
self.parameter_types = parameter_types | ||
self.names = names | ||
self.discrete_info = discrete_info | ||
self.categorical_info = categorical_info | ||
if len(self.names) != len(set(self.names)): | ||
raise Exception("Parameter names are not unique.") | ||
|
||
def create_scaler(self): | ||
search_space = np.append(self.lower_bounds, self.upper_bounds, axis=0) | ||
scaler = MinMaxScaler() | ||
scaler.fit(search_space) | ||
return scaler | ||
|
||
def random_sample(self): | ||
new_sample = np.random.uniform(self.lower_bounds, self.upper_bounds, | ||
size=(1, self.dim)) | ||
return new_sample |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
""" | ||
Module containing helper functions to translate objects that come | ||
to/from the grpc API into the format accepted/returned by the different | ||
suggestion generation algorithms. | ||
""" | ||
from collections.abc import Iterable | ||
from pkg.api.v1alpha2.python import api_pb2 | ||
import numpy as np | ||
from .parameter import ParameterConfig | ||
|
||
|
||
def _deal_with_discrete(feasible_values, current_value): | ||
""" function to embed the current values to the feasible discrete space""" | ||
diff = np.subtract(feasible_values, current_value) | ||
diff = np.absolute(diff) | ||
return feasible_values[np.argmin(diff)] | ||
|
||
|
||
def _deal_with_categorical(feasible_values, one_hot_values): | ||
""" function to do the one hot encoding of the categorical values """ | ||
index = np.argmax(one_hot_values) | ||
return feasible_values[int(index)] | ||
|
||
|
||
def parse_parameter_configs(parameter_configs): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Seems like this would belong in the ParameterConfig class. |
||
name_ids = {} | ||
dim = 0 | ||
lower_bounds = [] | ||
upper_bounds = [] | ||
parameter_types = [] | ||
names = [] | ||
discrete_info = [] | ||
categorical_info = [] | ||
for param_idx, param in enumerate(parameter_configs): | ||
name_ids[param.name] = param_idx | ||
parameter_types.append(param.parameter_type) | ||
names.append(param.name) | ||
if param.parameter_type == api_pb2.DOUBLE: | ||
new_lower = float(param.feasible_space.min) | ||
new_upper = float(param.feasible_space.max) | ||
elif param.parameter_type == api_pb2.INT: | ||
new_lower = int(param.feasible_space.min) | ||
new_upper = int(param.feasible_space.max) | ||
elif param.parameter_type == api_pb2.DISCRETE: | ||
discrete_values = [int(x) for x in param.feasible_space.list] | ||
new_lower = min(discrete_values) | ||
new_upper = max(discrete_values) | ||
discrete_info.append( | ||
{"name": param.name, "values": discrete_values}) | ||
elif param.parameter_type == api_pb2.CATEGORICAL: | ||
num_feasible = len(param.feasible_space.list) | ||
new_lower = [0 for _ in range(num_feasible)] | ||
new_upper = [1 for _ in range(num_feasible)] | ||
categorical_info.append({ | ||
"name": param.name, | ||
"values": param.feasible_space.list, | ||
"number": num_feasible, | ||
}) | ||
if isinstance(new_lower, Iterable): # handles categorical parameters | ||
lower_bounds.extend(new_lower) | ||
upper_bounds.extend(new_upper) | ||
dim += len(new_lower) | ||
else: # handles ints, doubles, and discrete parameters | ||
lower_bounds.append(new_lower) | ||
upper_bounds.append(new_upper) | ||
dim += 1 | ||
parsed_config = ParameterConfig(name_ids, | ||
dim, | ||
lower_bounds, | ||
upper_bounds, | ||
parameter_types, | ||
names, | ||
discrete_info, | ||
categorical_info) | ||
return parsed_config | ||
|
||
|
||
def parse_previous_observations(parameters_list, dim, name_id, types, categorical_info): | ||
parsed_X = np.zeros(shape=(len(parameters_list), dim)) | ||
for row_idx, parameters in enumerate(parameters_list): | ||
offset = 0 | ||
for p in parameters: | ||
map_id = name_id[p.name] | ||
if types[map_id] in [api_pb2.DOUBLE, api_pb2.INT, | ||
api_pb2.DISCRETE]: | ||
parsed_X[row_idx, offset] = float(p.value) | ||
offset += 1 | ||
elif types[map_id] == api_pb2.CATEGORICAL: | ||
for ci in categorical_info: | ||
if ci["name"] == p.name: | ||
value_num = ci["values"].index(p.value) | ||
parsed_X[row_idx, offset + value_num] = 1 | ||
offset += ci["number"] | ||
return parsed_X | ||
|
||
|
||
def parse_metric(y_train, goal): | ||
""" | ||
Parse the metric to the dictionary | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This doesn't return a dictionary? |
||
""" | ||
y_array = np.array(y_train, dtype=np.float64) | ||
if goal == api_pb2.MINIMIZE: | ||
y_array *= -1 | ||
return y_array | ||
|
||
|
||
def parse_x_next_vector(x_next, param_types, param_names, discrete_info, categorical_info): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit - The names of these methods (and the class) feel somewhat weird. "Parse*" seems to suggest taking in a string and extracting the contents from it. |
||
""" parse the next suggestion to the proper format """ | ||
counter = 0 | ||
result = [] | ||
if isinstance(x_next, np.ndarray): | ||
x_next = x_next.squeeze(axis=0) | ||
for par_type, par_name in zip(param_types, param_names): | ||
if par_type == api_pb2.INT: | ||
value = int(round(x_next[counter], 0)) | ||
counter = counter + 1 | ||
elif par_type == api_pb2.DOUBLE: | ||
value = float(x_next[counter]) | ||
counter = counter + 1 | ||
elif par_type == api_pb2.DISCRETE: | ||
for param in discrete_info: | ||
if param["name"] == par_name: | ||
value = _deal_with_discrete(param["values"], | ||
x_next[counter]) | ||
counter = counter + 1 | ||
break | ||
elif par_type == api_pb2.CATEGORICAL: | ||
for param in categorical_info: | ||
if param["name"] == par_name: | ||
value = _deal_with_categorical( | ||
feasible_values=param["values"], | ||
one_hot_values=x_next[counter:counter + param["number"]], | ||
) | ||
counter = counter + param["number"] | ||
break | ||
result.append({"name": par_name, "value": value, "type": par_type}) | ||
return result | ||
|
||
|
||
def parse_x_next_tuple(x_next, param_types, param_names): | ||
result = [] | ||
for value, param_type, param_name in zip(x_next, param_types, param_names): | ||
result.append({"name": param_name, "type": param_type, "value": str(value)}) | ||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
from logging import getLogger, StreamHandler, INFO, DEBUG | ||
from pkg.api.v1alpha2.python import api_pb2 | ||
from pkg.api.v1alpha2.python import api_pb2_grpc | ||
import grpc | ||
from . import parsing_util | ||
|
||
class RandomService(api_pb2_grpc.SuggestionServicer): | ||
def __init__(self): | ||
self.manager_addr = "katib-manager" | ||
self.manager_port = 6789 | ||
|
||
def _get_experiment(self, name): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be part of a common lib? |
||
channel = grpc.beta.implementations.insecure_channel(self.manager_addr, self.manager_port) | ||
with api_pb2.beta_create_Manager_stub(channel) as client: | ||
exp = client.GetExperiment(api_pb2.GetExperimentRequest(experiment_name=name), 10) | ||
return exp.experiment | ||
|
||
def GetSuggestions(self, request, context): | ||
""" | ||
Main function to provide suggestion. | ||
""" | ||
experiment = self._get_experiment(request.experiment_name) | ||
parameter_config = parsing_util.parse_parameter_configs( | ||
experiment.spec.parameter_specs.parameters) | ||
trial_specs = [] | ||
for _ in range(request.request_number): | ||
sample = parameter_config.random_sample() | ||
suggestion = parsing_util.parse_x_next_vector(sample, | ||
parameter_config.parameter_types, | ||
parameter_config.names, | ||
parameter_config.discrete_info, | ||
parameter_config.categorical_info) | ||
trial_spec = api_pb2.TrialSpec() | ||
trial_spec.experiment_name = request.experiment_name | ||
for param in suggestion: | ||
trial_spec.parameter_assignments.assignments.add(name=param['name'], | ||
value=str(param['value'])) | ||
trial_specs.append(trial_spec) | ||
|
||
reply = api_pb2.GetSuggestionsReply() | ||
for trial_spec in trial_specs: | ||
reply.trials.add(spec=trial_spec) | ||
|
||
return reply |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do these methods need to be in this class? They seem to be specific to Bayestian optimization and random search respectively, so maybe they should just be in the specific algorithms.
Also the file name should be parameter_config.py.