Skip to content
This repository has been archived by the owner on Jan 5, 2023. It is now read-only.

Merge kubeflow/katib/master to lyft/katib/master #1

Merged
merged 6 commits into from
Jun 20, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmd/cli/command/get-model.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func getModel(cmd *cobra.Command, opt *getModelOpt) {
log.Fatalf("GetModels failed: %v", err)
}
if len(r.StudyOverviews) == 0 {
log.Println("No Study fond")
log.Println("No Study found")
return
}
for _, si := range r.StudyOverviews {
Expand Down
38 changes: 11 additions & 27 deletions cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,21 +87,10 @@ func (s *server) GetTrials(ctx context.Context, in *pb.GetTrialsRequest) (*pb.Ge
}

func (s *server) GetSuggestions(ctx context.Context, in *pb.GetSuggestionsRequest) (*pb.GetSuggestionsReply, error) {
suggestAlgorithm := ""
if in.SuggestionAlgorithm != "" {
suggestAlgorithm = in.SuggestionAlgorithm
} else {
sc, err := dbIf.GetStudyConfig(in.StudyId)
if err != nil {
return &pb.GetSuggestionsReply{Trials: []*pb.Trial{}}, err
}
suggestAlgorithm = sc.DefaultSuggestionAlgorithm
}
if suggestAlgorithm == "" {
if in.SuggestionAlgorithm == "" {
return &pb.GetSuggestionsReply{Trials: []*pb.Trial{}}, errors.New("No suggest algorithm specified")
}

conn, err := grpc.Dial("vizier-suggestion-"+suggestAlgorithm+":6789", grpc.WithInsecure())
conn, err := grpc.Dial("vizier-suggestion-"+in.SuggestionAlgorithm+":6789", grpc.WithInsecure())
if err != nil {
return &pb.GetSuggestionsReply{Trials: []*pb.Trial{}}, err
}
Expand Down Expand Up @@ -149,20 +138,10 @@ func (s *server) GetWorkers(ctx context.Context, in *pb.GetWorkersRequest) (*pb.
}

func (s *server) GetShouldStopWorkers(ctx context.Context, in *pb.GetShouldStopWorkersRequest) (*pb.GetShouldStopWorkersReply, error) {
EarlyStoppingAlgorithm := ""
if in.EarlyStoppingAlgorithm != "" {
EarlyStoppingAlgorithm = in.EarlyStoppingAlgorithm
} else {
sc, err := dbIf.GetStudyConfig(in.StudyId)
if err != nil {
return &pb.GetShouldStopWorkersReply{}, err
}
EarlyStoppingAlgorithm = sc.DefaultEarlyStoppingAlgorithm
}
if EarlyStoppingAlgorithm == "" {
if in.EarlyStoppingAlgorithm == "" {
return &pb.GetShouldStopWorkersReply{}, errors.New("No EarlyStopping Algorithm specified")
}
conn, err := grpc.Dial("vizier-earlystopping-"+EarlyStoppingAlgorithm+":6789", grpc.WithInsecure())
conn, err := grpc.Dial("vizier-earlystopping-"+in.EarlyStoppingAlgorithm+":6789", grpc.WithInsecure())
if err != nil {
return &pb.GetShouldStopWorkersReply{}, err
}
Expand All @@ -188,9 +167,14 @@ func (s *server) GetMetrics(ctx context.Context, in *pb.GetMetricsRequest) (*pb.
}
mls := make([]*pb.MetricsLogSet, len(in.WorkerIds))
for i, w := range in.WorkerIds {
wr, err := s.GetWorkers(ctx, &pb.GetWorkersRequest{WorkerId: w})
if err != nil {
return &pb.GetMetricsReply{}, err
}
mls[i] = &pb.MetricsLogSet{
WorkerId: w,
MetricsLogs: make([]*pb.MetricsLog, len(mNames)),
WorkerId: w,
MetricsLogs: make([]*pb.MetricsLog, len(mNames)),
WorkerStatus: wr.Workers[0].Status,
}
for j, m := range mNames {
ls, err := dbIf.GetWorkerLogs(w, &kdb.GetWorkerLogOpts{Name: m})
Expand Down
Binary file added docs/images/SystemFlow.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 5 additions & 7 deletions examples/GKEDemo/git-issue-summarize-demo.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@ import (
)

var studyConfig = api.StudyConfig{
Name: "grid-demo",
Owner: "katib",
OptimizationType: api.OptimizationType_MAXIMIZE,
OptimizationGoal: 0.99,
DefaultSuggestionAlgorithm: "grid",
DefaultEarlyStoppingAlgorithm: "medianstopping",
ObjectiveValueName: "Validation-accuracy",
Name: "grid-demo",
Owner: "katib",
OptimizationType: api.OptimizationType_MAXIMIZE,
OptimizationGoal: 0.99,
ObjectiveValueName: "Validation-accuracy",
Metrics: []string{
"accuracy",
},
Expand Down
9 changes: 8 additions & 1 deletion examples/MinikubeDemo.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,21 @@ Wait until all components will be Running status.

Then, start port-forward for katib services `6789 -> manager` and `3000 -> UI`.

kubectl v1.10~
```
$ kubectl -n katib port-forward svc/vizier-core 6789:6789 &
$ kubectl -n katib port-forward svc/modeldb-frontend 3000:3000 &
```

kubectl ~v1.9
```
& kubectl -n katib port-forward $(kubectl -n katib get pod -o=name | grep vizier-core | sed -e "s@pods\/@@") 6789:6789 &
& kubectl -n katib port-forward $(kubectl -n katib get pod -o=name | grep modeldb-frontend | sed -e "s@pods\/@@") 3000:3000 &
```

To start HyperParameter Tuning, you need a katib client.
It will call API of Katib to create study, get suggestions, run trial, and get metrics.
The details of the system flow for the client and katib components is [here](https://docs.google.com/presentation/d/1Dk4XxKfVncb2v2CUDAd3OhM7XLyG3B9jEuuVmMiCIMg/edit#slide=id.g3b424a8f63_2_146).
The details of the system flow for the client and katib components is [here](../docs/images/SystemFlow.png).

An example of client is [here](./client-example.go).
The client will read three config files.
Expand Down
23 changes: 13 additions & 10 deletions examples/client-example.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ func main() {
//RunTrials
workerIds := runTrials(c, studyId, getSuggestReply)

for !isCompletedAllWorker(c, studyId) {
for true {
time.Sleep(10 * time.Second)
getMetricsRequest := &api.GetMetricsRequest{
StudyId: studyId,
Expand All @@ -52,6 +52,9 @@ func main() {
}
//Save or Update model on ModelDB
SaveOrUpdateModel(c, getMetricsReply)
if isCompletedAllWorker(c, getMetricsReply.MetricsLogSets) {
break
}
}
}

Expand Down Expand Up @@ -217,15 +220,15 @@ func SaveOrUpdateModel(c api.ManagerClient, getMetricsReply *api.GetMetricsReply
}
}

func isCompletedAllWorker(c api.ManagerClient, studyId string) bool {
ctx := context.Background()
getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
if err != nil {
log.Fatalf("GetWorker Error %v", err)
}
for _, w := range getWorkerReply.Workers {
if w.Status != api.State_COMPLETED {
func isCompletedAllWorker(c api.ManagerClient, ms []*api.MetricsLogSet) bool {
// ctx := context.Background()
// getWorkerRequest := &api.GetWorkersRequest{StudyId: studyId}
// getWorkerReply, err := c.GetWorkers(ctx, getWorkerRequest)
// if err != nil {
// log.Fatalf("GetWorker Error %v", err)
// }
for _, mls := range ms {
if mls.WorkerStatus != api.State_COMPLETED {
return false
}
}
Expand Down
6 changes: 2 additions & 4 deletions examples/study-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ parameterconfigs:
- name: --lr
parametertype: 1
feasible:
min: "0.03"
max: "0.07"
min: "0.01"
max: "0.03"
- name: --num-layers
parametertype: 2
feasible:
Expand All @@ -21,8 +21,6 @@ parameterconfigs:
- sgd
- adam
- ftrl
defaultsuggestionalgorithm: random
defaultearlystoppingalgorithm: medianstopping
objectivevaluename: Validation-accuracy
metrics:
- accuracy
Expand Down
Loading