From c2fc53200b3c185fa5765f40897a09d1db3c73dc Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Wed, 18 Nov 2020 10:28:28 +0100 Subject: [PATCH 1/6] Add waitAllProcesses to metricsCollector config --- .../v1beta1/file-metricscollector/main.go | 10 ++++++++-- pkg/metricscollector/v1beta1/common/const.go | 2 +- pkg/util/v1beta1/katibconfig/config.go | 7 ++++--- pkg/webhook/v1beta1/pod/inject_webhook.go | 9 ++++++--- pkg/webhook/v1beta1/pod/inject_webhook_test.go | 15 ++++++++++++++- 5 files changed, 33 insertions(+), 10 deletions(-) diff --git a/cmd/metricscollector/v1beta1/file-metricscollector/main.go b/cmd/metricscollector/v1beta1/file-metricscollector/main.go index f149899cfe8..cf0c6b5ae8c 100644 --- a/cmd/metricscollector/v1beta1/file-metricscollector/main.go +++ b/cmd/metricscollector/v1beta1/file-metricscollector/main.go @@ -107,7 +107,7 @@ var ( metricFilters = flag.String("f", "", "Metric filters") pollInterval = flag.Duration("p", common.DefaultPollInterval, "Poll interval between running processes check") timeout = flag.Duration("timeout", common.DefaultTimeout, "Timeout before invoke error during running processes check") - waitAll = flag.Bool("w", common.DefaultWaitAll, "Whether wait for all other main process of container exiting") + waitAllProcesses = flag.String("w", common.DefaultWaitAllProcesses, "Whether wait for all other main process of container exiting") stopRules stopRulesFlag isEarlyStopped = false ) @@ -353,10 +353,16 @@ func main() { go printMetricsFile(*metricsFilePath) } + waitAll, err := strconv.ParseBool(*waitAllProcesses) + if err != nil { + klog.Errorf("Cannot parse %s to bool, defaulting to waitAllProcesses=%s", *waitAllProcesses, common.DefaultWaitAllProcesses) + waitAll, _ = strconv.ParseBool(common.DefaultWaitAllProcesses) + } + wopts := common.WaitPidsOpts{ PollInterval: *pollInterval, Timeout: *timeout, - WaitAll: *waitAll, + WaitAll: waitAll, CompletedMarkedDirPath: filepath.Dir(*metricsFilePath), } if err := common.WaitMainProcesses(wopts); err != nil { diff --git a/pkg/metricscollector/v1beta1/common/const.go b/pkg/metricscollector/v1beta1/common/const.go index dab8a798146..cf1a8934a85 100644 --- a/pkg/metricscollector/v1beta1/common/const.go +++ b/pkg/metricscollector/v1beta1/common/const.go @@ -27,7 +27,7 @@ const ( // To run without timeout set value to 0 DefaultTimeout = 0 // DefaultWaitAll is the default value whether wait for all other main process of container exiting - DefaultWaitAll = true + DefaultWaitAllProcesses = "true" // TrainingCompleted is the job finished marker in $$$$.pid file when main training process is completed TrainingCompleted = "completed" diff --git a/pkg/util/v1beta1/katibconfig/config.go b/pkg/util/v1beta1/katibconfig/config.go index a53399a41b4..04b24a18251 100644 --- a/pkg/util/v1beta1/katibconfig/config.go +++ b/pkg/util/v1beta1/katibconfig/config.go @@ -28,9 +28,10 @@ type SuggestionConfig struct { // MetricsCollectorConfig is the JSON metrics collector structure in Katib config. type MetricsCollectorConfig struct { - Image string `json:"image"` - ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy"` - Resource corev1.ResourceRequirements `json:"resources"` + Image string `json:"image"` + ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy"` + Resource corev1.ResourceRequirements `json:"resources"` + WaitAllProcesses string `json:"waitAllProcesses"` } // EarlyStoppingConfig is the JSON early stopping structure in Katib config. diff --git a/pkg/webhook/v1beta1/pod/inject_webhook.go b/pkg/webhook/v1beta1/pod/inject_webhook.go index a32f83a018e..56797445d28 100644 --- a/pkg/webhook/v1beta1/pod/inject_webhook.go +++ b/pkg/webhook/v1beta1/pod/inject_webhook.go @@ -204,15 +204,15 @@ func (s *sidecarInjector) getMetricsCollectorContainer(trial *trialsv1beta1.Tria newRule := rule.Name + ";" + rule.Value + ";" + string(rule.Comparison) + ";" + strconv.Itoa(rule.StartStep) earlyStoppingRules = append(earlyStoppingRules, newRule) } + metricsCollectorConfigData, err := katibconfig.GetMetricsCollectorConfigData(mc.Collector.Kind, s.client) - args, err := s.getMetricsCollectorArgs(trial, metricNames, mc, earlyStoppingRules) + args, err := s.getMetricsCollectorArgs(trial, metricNames, mc, metricsCollectorConfigData, earlyStoppingRules) if err != nil { return nil, err } sidecarContainerName := getSidecarContainerName(trial.Spec.MetricsCollector.Collector.Kind) - metricsCollectorConfigData, err := katibconfig.GetMetricsCollectorConfigData(mc.Collector.Kind, s.client) if err != nil { return nil, err } @@ -287,7 +287,7 @@ func (s *sidecarInjector) getKatibJob(object *unstructured.Unstructured, namespa return jobKind, jobName, nil } -func (s *sidecarInjector) getMetricsCollectorArgs(trial *trialsv1beta1.Trial, metricNames string, mc common.MetricsCollectorSpec, esRules []string) ([]string, error) { +func (s *sidecarInjector) getMetricsCollectorArgs(trial *trialsv1beta1.Trial, metricNames string, mc common.MetricsCollectorSpec, metricsCollectorConfigData katibconfig.MetricsCollectorConfig, esRules []string) ([]string, error) { args := []string{"-t", trial.Name, "-m", metricNames, "-o-type", string(trial.Spec.Objective.Type), "-s-db", katibmanagerv1beta1.GetDBManagerAddr()} if mountPath, _ := getMountPath(mc); mountPath != "" { args = append(args, "-path", mountPath) @@ -295,6 +295,9 @@ func (s *sidecarInjector) getMetricsCollectorArgs(trial *trialsv1beta1.Trial, me if mc.Source != nil && mc.Source.Filter != nil && len(mc.Source.Filter.MetricsFormat) > 0 { args = append(args, "-f", strings.Join(mc.Source.Filter.MetricsFormat, ";")) } + if metricsCollectorConfigData.WaitAllProcesses != "" { + args = append(args, "-w", metricsCollectorConfigData.WaitAllProcesses) + } // Add stop rules and service endpoint for Early Stopping if len(esRules) > 0 { for _, rule := range esRules { diff --git a/pkg/webhook/v1beta1/pod/inject_webhook_test.go b/pkg/webhook/v1beta1/pod/inject_webhook_test.go index 4863d128295..0a2ba0905c3 100644 --- a/pkg/webhook/v1beta1/pod/inject_webhook_test.go +++ b/pkg/webhook/v1beta1/pod/inject_webhook_test.go @@ -3,6 +3,7 @@ package pod import ( "context" "fmt" + "github.com/kubeflow/katib/pkg/util/v1beta1/katibconfig" "path/filepath" "reflect" "sync" @@ -293,6 +294,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { MetricNames string MCSpec common.MetricsCollectorSpec EarlyStoppingRules []string + KatibConfig katibconfig.MetricsCollectorConfig ExpectedArgs []string Name string Err bool @@ -305,12 +307,16 @@ func TestGetMetricsCollectorArgs(t *testing.T) { Kind: common.StdOutCollector, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{ + WaitAllProcesses: "false", + }, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, "-o-type", string(testObjective), "-s-db", katibDBAddress, "-path", common.DefaultFilePath, + "-w", "false", }, Name: "StdOut MC", }, @@ -333,6 +339,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -356,6 +363,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -373,6 +381,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { Kind: common.CustomCollector, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -394,6 +403,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -411,6 +421,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { Kind: common.PrometheusMetricCollector, }, }, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -428,6 +439,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, EarlyStoppingRules: earlyStoppingRules, + KatibConfig: katibconfig.MetricsCollectorConfig{}, ExpectedArgs: []string{ "-t", testTrialName, "-m", testMetricName, @@ -452,6 +464,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, EarlyStoppingRules: earlyStoppingRules, + KatibConfig: katibconfig.MetricsCollectorConfig{}, Name: "Trial with invalid Experiment label name. Suggestion is not created", Err: true, }, @@ -465,7 +478,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, timeout).ShouldNot(gomega.HaveOccurred()) for _, tc := range testCases { - args, err := si.getMetricsCollectorArgs(tc.Trial, tc.MetricNames, tc.MCSpec, tc.EarlyStoppingRules) + args, err := si.getMetricsCollectorArgs(tc.Trial, tc.MetricNames, tc.MCSpec, tc.KatibConfig, tc.EarlyStoppingRules) if !tc.Err && err != nil { t.Errorf("Case: %v failed. Expected nil, got %v", tc.Name, err) From 59e63f65f89a55d137d64ab279674e390429e282 Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Wed, 18 Nov 2020 10:44:02 +0100 Subject: [PATCH 2/6] wait_all_processes config for python metricscollector main --- .../v1beta1/tfevent-metricscollector/main.py | 11 +++++++++-- pkg/metricscollector/v1beta1/common/const.py | 2 +- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py index e5fe18d0fb4..0f84c5a4d68 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py @@ -24,7 +24,7 @@ def parse_options(): parser.add_argument("-f", "--metric_filters", type=str, default="") parser.add_argument("-p", "--poll_interval", type=int, default=const.DEFAULT_POLL_INTERVAL) parser.add_argument("-timeout", "--timeout", type=int, default=const.DEFAULT_TIMEOUT) - parser.add_argument("-w", "--wait_all", type=bool, default=const.DEFAULT_WAIT_ALL) + parser.add_argument("-w", "--wait_all_processes", type=str, default=const.DEFAULT_WAIT_ALL) opt = parser.parse_args() return opt @@ -38,6 +38,13 @@ def parse_options(): logger.addHandler(handler) logger.propagate = False opt = parse_options() + try: + wait_all_processes = bool(opt.wait_all_processes) + except: + logger.error( + "Cannot parse {} to bool, defaulting to waitAllProcesses={}".format(opt.wait_all_processes, + const.DEFAULT_WAIT_ALL)) + wait_all_processes = bool(const.DEFAULT_WAIT_ALL) db_manager_server = opt.db_manager_server_addr.split(':') if len(db_manager_server) != 2: raise Exception("Invalid Katib DB manager service address: %s" % @@ -46,7 +53,7 @@ def parse_options(): WaitMainProcesses( pool_interval=opt.poll_interval, timout=opt.timeout, - wait_all=opt.wait_all, + wait_all=wait_all_processes, completed_marked_dir=opt.metrics_file_dir) mc = MetricsCollector(opt.metric_names.split(';')) diff --git a/pkg/metricscollector/v1beta1/common/const.py b/pkg/metricscollector/v1beta1/common/const.py index fdcf4156dfb..ae9d1a70b7c 100644 --- a/pkg/metricscollector/v1beta1/common/const.py +++ b/pkg/metricscollector/v1beta1/common/const.py @@ -3,7 +3,7 @@ # Default value for timeout before invoke error during running processes check DEFAULT_TIMEOUT = 0 # Default value whether wait for all other main process of container exiting -DEFAULT_WAIT_ALL = True +DEFAULT_WAIT_ALL = "True" # Default value for directory where TF event metrics are reported DEFAULT_METRICS_FILE_DIR = "/log" # Job finished marker in $$$$.pid file when main process is completed From 3f5d81302a9ff2f43c30ab70bf9c6d0bc49b525e Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Wed, 18 Nov 2020 11:14:57 +0100 Subject: [PATCH 3/6] Correct boolean check --- .../v1beta1/tfevent-metricscollector/main.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py index 0f84c5a4d68..41de9060611 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py @@ -38,13 +38,7 @@ def parse_options(): logger.addHandler(handler) logger.propagate = False opt = parse_options() - try: - wait_all_processes = bool(opt.wait_all_processes) - except: - logger.error( - "Cannot parse {} to bool, defaulting to waitAllProcesses={}".format(opt.wait_all_processes, - const.DEFAULT_WAIT_ALL)) - wait_all_processes = bool(const.DEFAULT_WAIT_ALL) + wait_all_processes = opt.wait_all_processes.lower() != "false" db_manager_server = opt.db_manager_server_addr.split(':') if len(db_manager_server) != 2: raise Exception("Invalid Katib DB manager service address: %s" % From 76d9373de0e797e063623bdd5193d2751d435841 Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Fri, 20 Nov 2020 16:16:39 +0100 Subject: [PATCH 4/6] waitAllProcesses config as bool --- cmd/metricscollector/v1beta1/file-metricscollector/main.go | 6 +----- .../v1beta1/tfevent-metricscollector/main.py | 2 +- pkg/metricscollector/v1beta1/common/const.py | 2 +- pkg/util/v1beta1/katibconfig/config.go | 6 +++--- pkg/webhook/v1beta1/pod/inject_webhook.go | 4 ++-- pkg/webhook/v1beta1/pod/inject_webhook_test.go | 4 ++-- 6 files changed, 10 insertions(+), 14 deletions(-) diff --git a/cmd/metricscollector/v1beta1/file-metricscollector/main.go b/cmd/metricscollector/v1beta1/file-metricscollector/main.go index cf0c6b5ae8c..ced2ec2d800 100644 --- a/cmd/metricscollector/v1beta1/file-metricscollector/main.go +++ b/cmd/metricscollector/v1beta1/file-metricscollector/main.go @@ -353,11 +353,7 @@ func main() { go printMetricsFile(*metricsFilePath) } - waitAll, err := strconv.ParseBool(*waitAllProcesses) - if err != nil { - klog.Errorf("Cannot parse %s to bool, defaulting to waitAllProcesses=%s", *waitAllProcesses, common.DefaultWaitAllProcesses) - waitAll, _ = strconv.ParseBool(common.DefaultWaitAllProcesses) - } + waitAll, _ := strconv.ParseBool(*waitAllProcesses) wopts := common.WaitPidsOpts{ PollInterval: *pollInterval, diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py index 41de9060611..6fbf47ff237 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py @@ -38,7 +38,7 @@ def parse_options(): logger.addHandler(handler) logger.propagate = False opt = parse_options() - wait_all_processes = opt.wait_all_processes.lower() != "false" + wait_all_processes = opt.wait_all_processes.lower() == "true" db_manager_server = opt.db_manager_server_addr.split(':') if len(db_manager_server) != 2: raise Exception("Invalid Katib DB manager service address: %s" % diff --git a/pkg/metricscollector/v1beta1/common/const.py b/pkg/metricscollector/v1beta1/common/const.py index ae9d1a70b7c..3c418816a71 100644 --- a/pkg/metricscollector/v1beta1/common/const.py +++ b/pkg/metricscollector/v1beta1/common/const.py @@ -3,7 +3,7 @@ # Default value for timeout before invoke error during running processes check DEFAULT_TIMEOUT = 0 # Default value whether wait for all other main process of container exiting -DEFAULT_WAIT_ALL = "True" +DEFAULT_WAIT_ALL_PROCESSES = "True" # Default value for directory where TF event metrics are reported DEFAULT_METRICS_FILE_DIR = "/log" # Job finished marker in $$$$.pid file when main process is completed diff --git a/pkg/util/v1beta1/katibconfig/config.go b/pkg/util/v1beta1/katibconfig/config.go index 04b24a18251..933cee335b8 100644 --- a/pkg/util/v1beta1/katibconfig/config.go +++ b/pkg/util/v1beta1/katibconfig/config.go @@ -29,9 +29,9 @@ type SuggestionConfig struct { // MetricsCollectorConfig is the JSON metrics collector structure in Katib config. type MetricsCollectorConfig struct { Image string `json:"image"` - ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy"` - Resource corev1.ResourceRequirements `json:"resources"` - WaitAllProcesses string `json:"waitAllProcesses"` + ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"` + Resource corev1.ResourceRequirements `json:"resources,omitempty"` + WaitAllProcesses *bool `json:"waitAllProcesses,omitempty"` } // EarlyStoppingConfig is the JSON early stopping structure in Katib config. diff --git a/pkg/webhook/v1beta1/pod/inject_webhook.go b/pkg/webhook/v1beta1/pod/inject_webhook.go index 56797445d28..26f19f9cfeb 100644 --- a/pkg/webhook/v1beta1/pod/inject_webhook.go +++ b/pkg/webhook/v1beta1/pod/inject_webhook.go @@ -295,8 +295,8 @@ func (s *sidecarInjector) getMetricsCollectorArgs(trial *trialsv1beta1.Trial, me if mc.Source != nil && mc.Source.Filter != nil && len(mc.Source.Filter.MetricsFormat) > 0 { args = append(args, "-f", strings.Join(mc.Source.Filter.MetricsFormat, ";")) } - if metricsCollectorConfigData.WaitAllProcesses != "" { - args = append(args, "-w", metricsCollectorConfigData.WaitAllProcesses) + if metricsCollectorConfigData.WaitAllProcesses != nil { + args = append(args, "-w", strconv.FormatBool(*metricsCollectorConfigData.WaitAllProcesses)) } // Add stop rules and service endpoint for Early Stopping if len(esRules) > 0 { diff --git a/pkg/webhook/v1beta1/pod/inject_webhook_test.go b/pkg/webhook/v1beta1/pod/inject_webhook_test.go index 0a2ba0905c3..c8a90430859 100644 --- a/pkg/webhook/v1beta1/pod/inject_webhook_test.go +++ b/pkg/webhook/v1beta1/pod/inject_webhook_test.go @@ -254,7 +254,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { testMetricName := "accuracy" katibDBAddress := fmt.Sprintf("katib-db-manager.%v:%v", testNamespace, consts.DefaultSuggestionPort) katibEarlyStopAddress := fmt.Sprintf("%v-%v.%v:%v", testSuggestionName, testAlgorithm, testNamespace, consts.DefaultEarlyStoppingPort) - + waitAllProcessesValue := false testPath := "/test/path" earlyStoppingRules := []string{ @@ -308,7 +308,7 @@ func TestGetMetricsCollectorArgs(t *testing.T) { }, }, KatibConfig: katibconfig.MetricsCollectorConfig{ - WaitAllProcesses: "false", + WaitAllProcesses: &waitAllProcessesValue, }, ExpectedArgs: []string{ "-t", testTrialName, From d87fd6b6d602acb486d1506fe6f1ebb47ff00f00 Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Mon, 23 Nov 2020 09:52:19 +0100 Subject: [PATCH 5/6] add omitempty to suggestion and earlystopping --- pkg/util/v1beta1/katibconfig/config.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pkg/util/v1beta1/katibconfig/config.go b/pkg/util/v1beta1/katibconfig/config.go index 933cee335b8..db35f13d903 100644 --- a/pkg/util/v1beta1/katibconfig/config.go +++ b/pkg/util/v1beta1/katibconfig/config.go @@ -18,12 +18,12 @@ import ( // SuggestionConfig is the JSON suggestion structure in Katib config. type SuggestionConfig struct { Image string `json:"image"` - ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy"` - Resource corev1.ResourceRequirements `json:"resources"` - ServiceAccountName string `json:"serviceAccountName"` - VolumeMountPath string `json:"volumeMountPath"` - PersistentVolumeClaimSpec corev1.PersistentVolumeClaimSpec `json:"persistentVolumeClaimSpec"` - PersistentVolumeSpec corev1.PersistentVolumeSpec `json:"persistentVolumeSpec"` + ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"` + Resource corev1.ResourceRequirements `json:"resources,omitempty"` + ServiceAccountName string `json:"serviceAccountName,omitempty"` + VolumeMountPath string `json:"volumeMountPath,omitempty"` + PersistentVolumeClaimSpec corev1.PersistentVolumeClaimSpec `json:"persistentVolumeClaimSpec,omitempty"` + PersistentVolumeSpec corev1.PersistentVolumeSpec `json:"persistentVolumeSpec,omitempty"` } // MetricsCollectorConfig is the JSON metrics collector structure in Katib config. @@ -37,7 +37,7 @@ type MetricsCollectorConfig struct { // EarlyStoppingConfig is the JSON early stopping structure in Katib config. type EarlyStoppingConfig struct { Image string `json:"image"` - ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy"` + ImagePullPolicy corev1.PullPolicy `json:"imagePullPolicy,omitempty"` } // GetSuggestionConfigData gets the config data for the given suggestion algorithm name. From 0df10350974997083e79cf6eff8da9c1a6158f7c Mon Sep 17 00:00:00 2001 From: Robbert van der Gugten Date: Mon, 23 Nov 2020 17:14:09 +0100 Subject: [PATCH 6/6] correct default config --- cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py index 6fbf47ff237..d9d8c71e414 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/main.py @@ -24,7 +24,7 @@ def parse_options(): parser.add_argument("-f", "--metric_filters", type=str, default="") parser.add_argument("-p", "--poll_interval", type=int, default=const.DEFAULT_POLL_INTERVAL) parser.add_argument("-timeout", "--timeout", type=int, default=const.DEFAULT_TIMEOUT) - parser.add_argument("-w", "--wait_all_processes", type=str, default=const.DEFAULT_WAIT_ALL) + parser.add_argument("-w", "--wait_all_processes", type=str, default=const.DEFAULT_WAIT_ALL_PROCESSES) opt = parser.parse_args() return opt