Add acceptance tests (#2081)

## Changes - New kind of test is added - acceptance tests. See acceptance/README.md for explanation. - A few tests are converted to acceptance tests by moving databricks.yml to acceptance/ and adding corresponding script files. As these tests run against compiled binary and can capture full output of the command, they can be useful to support major changes such as refactoring internal logging / diagnostics or complex variable interpolation. These are currently run as part of 'make test' but the intention is to run them as part of integration tests as well. ### Benefits - Full binary is tested, exactly as users get it. - We're not testing custom set of mutators like many existing tests. - Not mocking anything, real SDK is used (although the HTTP endpoint is not a real Databricks env). - Easy to maintain: output can be updated automatically. - Can easily set up external env, such as env vars, CLI args, .databrickscfg location etc. ### Gaps The tests currently share the test server and there is global place to define handlers. We should have a way for tests to override / add new handlers. ## Tests I manually checked that output of new acceptance tests matches previous asserts.
databricks · Jan 8, 2025 · 185bbd2 · 185bbd2
1 parent 8fd793b
commit 185bbd2
Show file tree

Hide file tree

Showing 58 changed files with 1,462 additions and 464 deletions.
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
 default: build
 
-PACKAGES=./libs/... ./internal/... ./cmd/... ./bundle/... .
+PACKAGES=./acceptance/... ./libs/... ./internal/... ./cmd/... ./bundle/... .
 
 GOTESTSUM_FORMAT ?= pkgname-and-test-fails
 

diff --git a/acceptance/README.md b/acceptance/README.md
@@ -0,0 +1,19 @@
+Acceptance tests are blackbox tests that are run against compiled binary.
+
+Currently these tests are run against "fake" HTTP server pretending to be Databricks API. However, they will be extended to run against real environment as regular integration tests.
+
+To author a test,
+ - Add a new directory under `acceptance`. Any level of nesting is supported.
+ - Add `databricks.yml` there.
+ - Add `script` with commands to run, e.g. `$CLI bundle validate`. The test case is recognized by presence of `script`.
+
+The test runner will run script and capture output and compare it with `output.txt` file in the same directory.
+
+In order to write `output.txt` for the first time or overwrite it with the current output, set `TESTS_OUTPUT=OVERWRITE` env var.
+
+The scripts are run with `bash -e` so any errors will be propagated. They are captured in `output.txt` by appending `Exit code: N` line at the end.
+
+For more complex tests one can also use:
+- `errcode` helper: if the command fails with non-zero code, it appends `Exit code: N` to the output but returns success to caller (bash), allowing continuation of script.
+- `trace` helper: prints the arguments before executing the command.
+- custom output files: redirect output to custom file (it must start with `out`), e.g. `$CLI bundle validate > out.txt 2> out.error.txt`.
diff --git a/acceptance/acceptance_test.go b/acceptance/acceptance_test.go
@@ -0,0 +1,302 @@
+package acceptance_test
+
+import (
+	"errors"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"runtime"
+	"slices"
+	"sort"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/databricks/cli/internal/testutil"
+	"github.com/databricks/cli/libs/env"
+	"github.com/databricks/cli/libs/testdiff"
+	"github.com/stretchr/testify/require"
+)
+
+var KeepTmp = os.Getenv("KEEP_TMP") != ""
+
+const (
+	EntryPointScript = "script"
+	CleanupScript    = "script.cleanup"
+	PrepareScript    = "script.prepare"
+)
+
+var Scripts = map[string]bool{
+	EntryPointScript: true,
+	CleanupScript:    true,
+	PrepareScript:    true,
+}
+
+func TestAccept(t *testing.T) {
+	execPath := BuildCLI(t)
+	// $CLI is what test scripts are using
+	t.Setenv("CLI", execPath)
+
+	server := StartServer(t)
+	AddHandlers(server)
+	// Redirect API access to local server:
+	t.Setenv("DATABRICKS_HOST", fmt.Sprintf("http://127.0.0.1:%d", server.Port))
+	t.Setenv("DATABRICKS_TOKEN", "dapi1234")
+
+	homeDir := t.TempDir()
+	// Do not read user's ~/.databrickscfg
+	t.Setenv(env.HomeEnvVar(), homeDir)
+
+	testDirs := getTests(t)
+	require.NotEmpty(t, testDirs)
+	for _, dir := range testDirs {
+		t.Run(dir, func(t *testing.T) {
+			t.Parallel()
+			runTest(t, dir)
+		})
+	}
+}
+
+func getTests(t *testing.T) []string {
+	testDirs := make([]string, 0, 128)
+
+	err := filepath.Walk(".", func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		name := filepath.Base(path)
+		if name == EntryPointScript {
+			// Presence of 'script' marks a test case in this directory
+			testDirs = append(testDirs, filepath.Dir(path))
+		}
+		return nil
+	})
+	require.NoError(t, err)
+
+	sort.Strings(testDirs)
+	return testDirs
+}
+
+func runTest(t *testing.T, dir string) {
+	var tmpDir string
+	var err error
+	if KeepTmp {
+		tempDirBase := filepath.Join(os.TempDir(), "acceptance")
+		_ = os.Mkdir(tempDirBase, 0o755)
+		tmpDir, err = os.MkdirTemp(tempDirBase, "")
+		require.NoError(t, err)
+		t.Logf("Created directory: %s", tmpDir)
+	} else {
+		tmpDir = t.TempDir()
+	}
+
+	scriptContents := readMergedScriptContents(t, dir)
+	testutil.WriteFile(t, filepath.Join(tmpDir, EntryPointScript), scriptContents)
+
+	inputs := make(map[string]bool, 2)
+	outputs := make(map[string]bool, 2)
+	err = CopyDir(dir, tmpDir, inputs, outputs)
+	require.NoError(t, err)
+
+	args := []string{"bash", "-euo", "pipefail", EntryPointScript}
+	cmd := exec.Command(args[0], args[1:]...)
+	cmd.Dir = tmpDir
+	outB, err := cmd.CombinedOutput()
+
+	out := formatOutput(string(outB), err)
+	out = strings.ReplaceAll(out, os.Getenv("CLI"), "$CLI")
+	doComparison(t, filepath.Join(dir, "output.txt"), "script output", out)
+
+	for key := range outputs {
+		if key == "output.txt" {
+			// handled above
+			continue
+		}
+		pathNew := filepath.Join(tmpDir, key)
+		newValBytes, err := os.ReadFile(pathNew)
+		if err != nil {
+			if errors.Is(err, os.ErrNotExist) {
+				t.Errorf("%s: expected to find this file but could not (%s)", key, tmpDir)
+			} else {
+				t.Errorf("%s: could not read: %s", key, err)
+			}
+			continue
+		}
+		pathExpected := filepath.Join(dir, key)
+		doComparison(t, pathExpected, pathNew, string(newValBytes))
+	}
+
+	// Make sure there are not unaccounted for new files
+	files, err := os.ReadDir(tmpDir)
+	require.NoError(t, err)
+
+	for _, f := range files {
+		name := f.Name()
+		if _, ok := inputs[name]; ok {
+			continue
+		}
+		if _, ok := outputs[name]; ok {
+			continue
+		}
+		t.Errorf("Unexpected output: %s", f)
+		if strings.HasPrefix(name, "out") {
+			// We have a new file starting with "out"
+			// Show the contents & support overwrite mode for it:
+			pathNew := filepath.Join(tmpDir, name)
+			newVal := testutil.ReadFile(t, pathNew)
+			doComparison(t, filepath.Join(dir, name), filepath.Join(tmpDir, name), newVal)
+		}
+	}
+}
+
+func doComparison(t *testing.T, pathExpected, pathNew, valueNew string) {
+	valueNew = testdiff.NormalizeNewlines(valueNew)
+	valueExpected := string(readIfExists(t, pathExpected))
+	valueExpected = testdiff.NormalizeNewlines(valueExpected)
+	testdiff.AssertEqualTexts(t, pathExpected, pathNew, valueExpected, valueNew)
+	if testdiff.OverwriteMode {
+		if valueNew != "" {
+			t.Logf("Overwriting: %s", pathExpected)
+			testutil.WriteFile(t, pathExpected, valueNew)
+		} else {
+			t.Logf("Removing: %s", pathExpected)
+			_ = os.Remove(pathExpected)
+		}
+	}
+}
+
+// Returns combined script.prepare (root) + script.prepare (parent) + ... + script + ... + script.cleanup (parent) + ...
+// Note, cleanups are not executed if main script fails; that's not a huge issue, since it runs it temp dir.
+func readMergedScriptContents(t *testing.T, dir string) string {
+	scriptContents := testutil.ReadFile(t, filepath.Join(dir, EntryPointScript))
+	prepares := []string{}
+	cleanups := []string{}
+
+	for {
+		x := readIfExists(t, filepath.Join(dir, CleanupScript))
+		if len(x) > 0 {
+			cleanups = append(cleanups, string(x))
+		}
+
+		x = readIfExists(t, filepath.Join(dir, PrepareScript))
+		if len(x) > 0 {
+			prepares = append(prepares, string(x))
+		}
+
+		if dir == "" || dir == "." {
+			break
+		}
+
+		dir = filepath.Dir(dir)
+		require.True(t, filepath.IsLocal(dir))
+	}
+
+	slices.Reverse(prepares)
+	prepares = append(prepares, scriptContents)
+	prepares = append(prepares, cleanups...)
+	return strings.Join(prepares, "\n")
+}
+
+func BuildCLI(t *testing.T) string {
+	cwd, err := os.Getwd()
+	require.NoError(t, err)
+	execPath := filepath.Join(cwd, "build", "databricks")
+	if runtime.GOOS == "windows" {
+		execPath += ".exe"
+	}
+
+	start := time.Now()
+	args := []string{"go", "build", "-mod", "vendor", "-o", execPath}
+	cmd := exec.Command(args[0], args[1:]...)
+	cmd.Dir = ".."
+	out, err := cmd.CombinedOutput()
+	elapsed := time.Since(start)
+	t.Logf("%s took %s", args, elapsed)
+	require.NoError(t, err, "go build failed: %s: %s\n%s", args, err, out)
+	if len(out) > 0 {
+		t.Logf("go build output: %s: %s", args, out)
+	}
+
+	// Quick check + warm up cache:
+	cmd = exec.Command(execPath, "--version")
+	out, err = cmd.CombinedOutput()
+	require.NoError(t, err, "%s --version failed: %s\n%s", execPath, err, out)
+	return execPath
+}
+
+func copyFile(src, dst string) error {
+	in, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer in.Close()
+
+	out, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer out.Close()
+
+	_, err = io.Copy(out, in)
+	return err
+}
+
+func formatOutput(out string, err error) string {
+	if err == nil {
+		return out
+	}
+	if exiterr, ok := err.(*exec.ExitError); ok {
+		exitCode := exiterr.ExitCode()
+		out += fmt.Sprintf("\nExit code: %d\n", exitCode)
+	} else {
+		out += fmt.Sprintf("\nError: %s\n", err)
+	}
+	return out
+}
+
+func readIfExists(t *testing.T, path string) []byte {
+	data, err := os.ReadFile(path)
+	if err == nil {
+		return data
+	}
+
+	if !errors.Is(err, os.ErrNotExist) {
+		t.Fatalf("%s: %s", path, err)
+	}
+	return []byte{}
+}
+
+func CopyDir(src, dst string, inputs, outputs map[string]bool) error {
+	return filepath.Walk(src, func(path string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		name := info.Name()
+
+		relPath, err := filepath.Rel(src, path)
+		if err != nil {
+			return err
+		}
+
+		if strings.HasPrefix(name, "out") {
+			outputs[relPath] = true
+			return nil
+		} else {
+			inputs[relPath] = true
+		}
+
+		if _, ok := Scripts[name]; ok {
+			return nil
+		}
+
+		destPath := filepath.Join(dst, relPath)
+
+		if info.IsDir() {
+			return os.MkdirAll(destPath, info.Mode())
+		}
+
+		return copyFile(path, destPath)
+	})
+}
diff --git a/acceptance/build/.gitignore b/acceptance/build/.gitignore
@@ -0,0 +1 @@
+databricks
diff --git a/bundle/tests/clusters/databricks.yml → ...e/bundle/override/clusters/databricks.yml b/bundle/tests/clusters/databricks.yml → ...e/bundle/override/clusters/databricks.yml
@@ -1,9 +1,6 @@
 bundle:
   name: clusters
 
-workspace:
-  host: https://acme.cloud.databricks.com/
-
 resources:
   clusters:
     foo:

diff --git a/acceptance/bundle/override/clusters/output.txt b/acceptance/bundle/override/clusters/output.txt
@@ -0,0 +1,33 @@
+
+>>> $CLI bundle validate -o json -t default
+{
+  "autoscale": {
+    "max_workers": 7,
+    "min_workers": 2
+  },
+  "cluster_name": "foo",
+  "custom_tags": {},
+  "node_type_id": "i3.xlarge",
+  "num_workers": 2,
+  "spark_conf": {
+    "spark.executor.memory": "2g"
+  },
+  "spark_version": "13.3.x-scala2.12"
+}
+
+>>> $CLI bundle validate -o json -t development
+{
+  "autoscale": {
+    "max_workers": 3,
+    "min_workers": 1
+  },
+  "cluster_name": "foo-override",
+  "custom_tags": {},
+  "node_type_id": "m5.xlarge",
+  "num_workers": 3,
+  "spark_conf": {
+    "spark.executor.memory": "4g",
+    "spark.executor.memory2": "4g"
+  },
+  "spark_version": "15.2.x-scala2.12"
+}
diff --git a/acceptance/bundle/override/clusters/script b/acceptance/bundle/override/clusters/script
@@ -0,0 +1,2 @@
+trace $CLI bundle validate -o json -t default | jq .resources.clusters.foo
+trace $CLI bundle validate -o json -t development | jq .resources.clusters.foo
diff --git a/...tests/override_job_cluster/databricks.yml → ...undle/override/job_cluster/databricks.yml b/...tests/override_job_cluster/databricks.yml → ...undle/override/job_cluster/databricks.yml
@@ -1,9 +1,6 @@
 bundle:
   name: override_job_cluster
 
-workspace:
-  host: https://acme.cloud.databricks.com/
-
 resources:
   jobs:
     foo:
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		trace $CLI bundle validate -o json -t default \| jq .resources.clusters.foo
		trace $CLI bundle validate -o json -t development \| jq .resources.clusters.foo