Skip to content

Commit

Permalink
Make the test robust to test flakes due to problems initializing the …
Browse files Browse the repository at this point in the history
…ksonnet app.

* Skip install the default registries because we don't need them and talking
  to Git just creates a source of flakiness.

* Add retries to setting up the ksonnet app.

Fix kubeflow#1128
  • Loading branch information
jlewi committed Jul 6, 2018
1 parent a50c2fb commit b63041f
Showing 1 changed file with 26 additions and 4 deletions.
30 changes: 26 additions & 4 deletions testing/tf_job_simple_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
import argparse
import logging
import os

import re
import subprocess
from kubeflow.testing import test_helper, util
from retrying import retry

Expand Down Expand Up @@ -67,12 +68,33 @@ def wait_for_tf_job():
raise Exception("Could not find services with label tf_job_name=mycnnjob")
logging.info("Found services with label tf_job_name=mycnnjob")

@retry(stop_max_attempt_number=3)
def test_tf_job_simple(test_case): # pylint: disable=redefined-outer-name
args = parse_args()
util.run(["ks", "init", "tf-job-simple-app"])
try:
util.run(["ks", "init", "tf-job-simple-app", "--skip-default-registries"])
except subprocess.CalledProcessError as e:
# Keep going if the app already exists. This is a sign the a previous
# attempt failed and we are retrying.
if not re.search(".*already exists.*", e.output):
raise

os.chdir("tf-job-simple-app")
util.run(["ks", "registry", "add", "kubeflow", args.src_dir + "/kubeflow"])
util.run(["ks", "pkg", "install", "kubeflow/examples"])
try:
util.run(["ks", "pkg", "install", "kubeflow/examples"])
except subprocess.CalledProcessError as e:
# Keep going if the registry has already been added.
# This is a sign the a previous attempt failed and we are retrying.
if not re.search(".*already exists.*", e.output):
raise

try:
util.run(["ks", "pkg", "install", "kubeflow/examples"])
except subprocess.CalledProcessError as e:
# Keep going if the package has already been added.
# This is a sign the a previous attempt failed and we are retrying.
if not re.search(".*already exists.*", e.output):
raise

if args.tf_job_version == "v1alpha2":
prototype_name = "tf-job-simple"
Expand Down

0 comments on commit b63041f

Please sign in to comment.