From e348a869242ae08f0334c20a16433e8899669e25 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 12:30:59 -0800 Subject: [PATCH 1/6] Fix the junit XML file format. * Test failures weren't being properly reported to gubernator because we had the wrong XML format. We need to use a nested failure tag and specify the message inside the tag. --- py/test_util.py | 9 ++++++--- py/test_util_test.py | 6 +++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/py/test_util.py b/py/test_util.py index 0e706148c6..d15d98e91e 100644 --- a/py/test_util.py +++ b/py/test_util.py @@ -122,14 +122,17 @@ def create_junit_xml_file(test_cases, output_path, gcs_client=None): # If the time isn't set and no message is set we interpret that as # the test not being run. if not c.time and not c.failure: - attrib["failure"] = "Test was not run." + c.failure = "Test was not run." - if c.failure: - attrib["failure"] = c.failure e = ElementTree.Element("testcase", attrib) root.append(e) + if c.failure: + f = ElementTree.Element("failure") + f.text = c.failure + e.append(f) + t = ElementTree.ElementTree(root) logging.info("Creating %s", output_path) if output_path.startswith("gs://"): diff --git a/py/test_util_test.py b/py/test_util_test.py index 84649d5191..70d0c23eb8 100644 --- a/py/test_util_test.py +++ b/py/test_util_test.py @@ -28,9 +28,9 @@ def test_write_xml(self): print(output) expected = ("""""" """""" - """""") + """failed for some reason.""" + """""") self.assertEquals(expected, output) From 126117c63ed4fccb6d3e5c07dd4c4103152d0cf3 Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 13:26:49 -0800 Subject: [PATCH 2/6] Turn a failed test into a non-zero exit status. --- py/deploy.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/py/deploy.py b/py/deploy.py index 3ae21a8ec0..942804e969 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -115,6 +115,12 @@ def test(args): util.run(["helm", "test", "tf-job"]) except subprocess.CalledProcessError as e: t.failure = "helm test failed;\n" + e.output + # Reraise the exception so that the prow job will fail and the test + # is marked as a failure. + # TODO(jlewi): It would be better to this wholistically; e.g. by + # processing all the junit xml files and checking for any failures. This + # should be more tractable when we migrate off Airflow to Argo. + raise finally: t.time = time.time() - start t.name = "e2e-test" From 6d096032b21f3d79891b69b5973509a993db025a Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 13:30:17 -0800 Subject: [PATCH 3/6] Fix lint. --- py/prow.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/prow.py b/py/prow.py index 94f40c2cf6..c9640eebe3 100644 --- a/py/prow.py +++ b/py/prow.py @@ -18,7 +18,6 @@ import logging import os import re -import subprocess import time from py import util # pylint: disable=no-name-in-module From 845c2b69e81a0e26b24bb38f1259f4046c7ff76a Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 14:07:49 -0800 Subject: [PATCH 4/6] Start adding a step to report all errors. --- py/deploy.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/py/deploy.py b/py/deploy.py index 942804e969..75b7ed2519 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -8,6 +8,7 @@ import logging import os import subprocess +import sys import tempfile import time @@ -136,6 +137,13 @@ def teardown(args): zone = args.zone util.delete_cluster(gke, cluster_name, project, zone) +def check_junit_xml_files(args): + """Check all the junit xml files and return a non zero exit code if there + are failures. + """ + gcs_client = storage.Client(project=args.project) + + def add_common_args(parser): """Add common command line arguments to a parser. @@ -212,9 +220,24 @@ def main(): # pylint: disable=too-many-locals parser_teardown.set_defaults(func=teardown) add_common_args(parser_teardown) + + ############################################################################# + # Check the junit files. + # + parser_check = subparsers.add_parser( + "check_junit", + help="Check junit files and return non-zero exit code if any had failures.") + parser_check.set_defaults(func=check_junit_xml_files) + add_common_args(parser_check) + # parse the args and call whatever function was selected args = parser.parse_args() - args.func(args) + + result = args.func(args) + + if result: + logging.error("Exiting with code: %s", result) + sys.exit(result) if __name__ == "__main__": main() From c2ec56fdb8b33bc2792587af9016c46252f21a3b Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 14:14:47 -0800 Subject: [PATCH 5/6] The flag should be --controller-config-file. * This looks like a regression introduced by all the refactoring. --- cmd/tf_operator/app/options/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/tf_operator/app/options/options.go b/cmd/tf_operator/app/options/options.go index b2e7289ef3..7f98852ee0 100644 --- a/cmd/tf_operator/app/options/options.go +++ b/cmd/tf_operator/app/options/options.go @@ -41,5 +41,5 @@ func (s *ServerOption) AddFlags(fs *flag.FlagSet) { fs.IntVar(&s.ChaosLevel, "chaos-level", -1, "DO NOT USE IN PRODUCTION - level of chaos injected into the TfJob created by the operator.") fs.BoolVar(&s.PrintVersion, "version", false, "Show version and quit") fs.DurationVar(&s.GCInterval, "gc-interval", 10*time.Minute, "GC interval") - fs.StringVar(&s.ControllerConfigFile, "controller_config_file", "", "Path to file containing the controller config.") + fs.StringVar(&s.ControllerConfigFile, "controller-config-file", "", "Path to file containing the controller config.") } From 37ad402acd30bd20358e93a661946e1927a077af Mon Sep 17 00:00:00 2001 From: Jeremy Lewi Date: Thu, 11 Jan 2018 14:20:47 -0800 Subject: [PATCH 6/6] Revert to head. --- py/deploy.py | 25 +------------------------ py/prow.py | 1 + 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/py/deploy.py b/py/deploy.py index 75b7ed2519..942804e969 100755 --- a/py/deploy.py +++ b/py/deploy.py @@ -8,7 +8,6 @@ import logging import os import subprocess -import sys import tempfile import time @@ -137,13 +136,6 @@ def teardown(args): zone = args.zone util.delete_cluster(gke, cluster_name, project, zone) -def check_junit_xml_files(args): - """Check all the junit xml files and return a non zero exit code if there - are failures. - """ - gcs_client = storage.Client(project=args.project) - - def add_common_args(parser): """Add common command line arguments to a parser. @@ -220,24 +212,9 @@ def main(): # pylint: disable=too-many-locals parser_teardown.set_defaults(func=teardown) add_common_args(parser_teardown) - - ############################################################################# - # Check the junit files. - # - parser_check = subparsers.add_parser( - "check_junit", - help="Check junit files and return non-zero exit code if any had failures.") - parser_check.set_defaults(func=check_junit_xml_files) - add_common_args(parser_check) - # parse the args and call whatever function was selected args = parser.parse_args() - - result = args.func(args) - - if result: - logging.error("Exiting with code: %s", result) - sys.exit(result) + args.func(args) if __name__ == "__main__": main() diff --git a/py/prow.py b/py/prow.py index c9640eebe3..94f40c2cf6 100644 --- a/py/prow.py +++ b/py/prow.py @@ -18,6 +18,7 @@ import logging import os import re +import subprocess import time from py import util # pylint: disable=no-name-in-module