From fdb918b4fd3f465beb81f4de0a519b456576736d Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 13 Jun 2024 16:21:02 +0200 Subject: [PATCH 1/5] Avoid multiple file tree traversals on bundle deploy --- bundle/bundle.go | 4 ++++ bundle/deploy/files/upload.go | 2 +- bundle/deploy/state_update.go | 16 ++-------------- cmd/bundle/sync.go | 3 ++- cmd/sync/sync.go | 2 +- libs/sync/sync.go | 19 ++++++++++++------- 6 files changed, 22 insertions(+), 24 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index 1dc98656a7..cb1273a771 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -16,6 +16,7 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/bundle/env" "github.com/databricks/cli/bundle/metadata" + "github.com/databricks/cli/libs/fileset" "github.com/databricks/cli/libs/folders" "github.com/databricks/cli/libs/git" "github.com/databricks/cli/libs/locker" @@ -50,6 +51,9 @@ type Bundle struct { clientOnce sync.Once client *databricks.WorkspaceClient + // Files that are synced to the workspace.file_path + SyncedFiles []fileset.File + // Stores an initialized copy of this bundle's Terraform wrapper. Terraform *tfexec.Terraform diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index fa20ed4eaf..eb63495302 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -23,7 +23,7 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.FromErr(err) } - err = sync.RunOnce(ctx) + b.SyncedFiles, err = sync.RunOnce(ctx) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 6903a9f875..8e99d3f972 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -11,7 +11,6 @@ import ( "time" "github.com/databricks/cli/bundle" - "github.com/databricks/cli/bundle/deploy/files" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/libs/diag" "github.com/databricks/cli/libs/log" @@ -40,19 +39,8 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost state.CliVersion = build.GetInfo().Version state.Version = DeploymentStateVersion - // Get the current file list. - sync, err := files.GetSync(ctx, bundle.ReadOnly(b)) - if err != nil { - return diag.FromErr(err) - } - - files, err := sync.GetFileList(ctx) - if err != nil { - return diag.FromErr(err) - } - - // Update the state with the current file list. - fl, err := FromSlice(files) + // Update the state with the current list of synced files. + fl, err := FromSlice(b.SyncedFiles) if err != nil { return diag.FromErr(err) } diff --git a/cmd/bundle/sync.go b/cmd/bundle/sync.go index 72ad8eb3a1..df3e087c2d 100644 --- a/cmd/bundle/sync.go +++ b/cmd/bundle/sync.go @@ -72,7 +72,8 @@ func newSyncCommand() *cobra.Command { return s.RunContinuous(ctx) } - return s.RunOnce(ctx) + _, err = s.RunOnce(ctx) + return err } return cmd diff --git a/cmd/sync/sync.go b/cmd/sync/sync.go index e5f1bfc9e6..bab4515937 100644 --- a/cmd/sync/sync.go +++ b/cmd/sync/sync.go @@ -135,7 +135,7 @@ func New() *cobra.Command { if f.watch { err = s.RunContinuous(ctx) } else { - err = s.RunOnce(ctx) + _, err = s.RunOnce(ctx) } s.Close() diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 585e8a887e..6d7ec76c1d 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -152,36 +152,41 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) { s.seq++ } -func (s *Sync) RunOnce(ctx context.Context) error { +// Upload all files in the file tree rooted at the local path configured in the +// SyncOptions to the remote path configured in the SyncOptions. +// +// Returns the list of files tracked by the syncer during the run, and an error +// if any occurred. +func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) { files, err := s.GetFileList(ctx) if err != nil { - return err + return files, err } change, err := s.snapshot.diff(ctx, files) if err != nil { - return err + return files, err } s.notifyStart(ctx, change) if change.IsEmpty() { s.notifyComplete(ctx, change) - return nil + return files, nil } err = s.applyDiff(ctx, change) if err != nil { - return err + return files, err } err = s.snapshot.Save(ctx) if err != nil { log.Errorf(ctx, "cannot store snapshot: %s", err) - return err + return files, err } s.notifyComplete(ctx, change) - return nil + return files, nil } func (s *Sync) GetFileList(ctx context.Context) ([]fileset.File, error) { From 3a5bc9ef8385cdfcaf35a5e6ad1fb64c9528cf40 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 13 Jun 2024 16:22:31 +0200 Subject: [PATCH 2/5] - --- libs/sync/sync.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 6d7ec76c1d..202ea36f32 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -155,8 +155,8 @@ func (s *Sync) notifyComplete(ctx context.Context, d diff) { // Upload all files in the file tree rooted at the local path configured in the // SyncOptions to the remote path configured in the SyncOptions. // -// Returns the list of files tracked by the syncer during the run, and an error -// if any occurred. +// Returns the list of files tracked (and synchronized) by the syncer during the run, +// and an error if any occurred. func (s *Sync) RunOnce(ctx context.Context) ([]fileset.File, error) { files, err := s.GetFileList(ctx) if err != nil { From 1bac5930007035a456093e749b0a77ec1bc70d40 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Thu, 13 Jun 2024 16:26:53 +0200 Subject: [PATCH 3/5] fix build --- libs/sync/sync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/sync/sync.go b/libs/sync/sync.go index 202ea36f32..12b1f1d05f 100644 --- a/libs/sync/sync.go +++ b/libs/sync/sync.go @@ -236,7 +236,7 @@ func (s *Sync) RunContinuous(ctx context.Context) error { case <-ctx.Done(): return ctx.Err() case <-ticker.C: - err := s.RunOnce(ctx) + _, err := s.RunOnce(ctx) if err != nil { return err } From 33044043c12e8def0633d7ed0552df7df0e82d6e Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Fri, 14 Jun 2024 14:28:21 +0200 Subject: [PATCH 4/5] fix tests --- bundle/deploy/state_update_test.go | 101 +++++++++++++---------------- 1 file changed, 45 insertions(+), 56 deletions(-) diff --git a/bundle/deploy/state_update_test.go b/bundle/deploy/state_update_test.go index dd8a1336ec..4638f99779 100644 --- a/bundle/deploy/state_update_test.go +++ b/bundle/deploy/state_update_test.go @@ -10,19 +10,23 @@ import ( "github.com/databricks/cli/bundle/config" "github.com/databricks/cli/internal/build" "github.com/databricks/cli/internal/testutil" - databrickscfg "github.com/databricks/databricks-sdk-go/config" - "github.com/databricks/databricks-sdk-go/experimental/mocks" + "github.com/databricks/cli/libs/fileset" + "github.com/databricks/cli/libs/vfs" "github.com/databricks/databricks-sdk-go/service/iam" - "github.com/databricks/databricks-sdk-go/service/workspace" - "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" ) -func TestStateUpdate(t *testing.T) { - s := &stateUpdate{} +func setupBundleForStateUpdate(t *testing.T) *bundle.Bundle { + tmpDir := t.TempDir() + + testutil.Touch(t, tmpDir, "test1.py") + testutil.TouchNotebook(t, tmpDir, "test2.py") + + files, err := fileset.New(vfs.MustNew(tmpDir)).All() + require.NoError(t, err) - b := &bundle.Bundle{ - RootPath: t.TempDir(), + return &bundle.Bundle{ + RootPath: tmpDir, Config: config.Root{ Bundle: config.Bundle{ Target: "default", @@ -37,22 +41,14 @@ func TestStateUpdate(t *testing.T) { }, }, }, + SyncedFiles: files, } +} - testutil.Touch(t, b.RootPath, "test1.py") - testutil.Touch(t, b.RootPath, "test2.py") - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &databrickscfg.Config{ - Host: "https://test.com", - } - b.SetWorkpaceClient(m.WorkspaceClient) - - wsApi := m.GetMockWorkspaceAPI() - wsApi.EXPECT().GetStatusByPath(mock.Anything, "/files").Return(&workspace.ObjectInfo{ - ObjectType: "DIRECTORY", - }, nil) +func TestStateUpdate(t *testing.T) { + s := &stateUpdate{} + b := setupBundleForStateUpdate(t) ctx := context.Background() diags := bundle.Apply(ctx, b, s) @@ -63,7 +59,15 @@ func TestStateUpdate(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(1), state.Seq) - require.Len(t, state.Files, 3) + require.Equal(t, state.Files, Filelist{ + { + LocalPath: "test1.py", + }, + { + LocalPath: "test2.py", + IsNotebook: true, + }, + }) require.Equal(t, build.GetInfo().Version, state.CliVersion) diags = bundle.Apply(ctx, b, s) @@ -74,45 +78,22 @@ func TestStateUpdate(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(2), state.Seq) - require.Len(t, state.Files, 3) + require.Equal(t, state.Files, Filelist{ + { + LocalPath: "test1.py", + }, + { + LocalPath: "test2.py", + IsNotebook: true, + }, + }) require.Equal(t, build.GetInfo().Version, state.CliVersion) } func TestStateUpdateWithExistingState(t *testing.T) { s := &stateUpdate{} - b := &bundle.Bundle{ - RootPath: t.TempDir(), - Config: config.Root{ - Bundle: config.Bundle{ - Target: "default", - }, - Workspace: config.Workspace{ - StatePath: "/state", - FilePath: "/files", - CurrentUser: &config.User{ - User: &iam.User{ - UserName: "test-user", - }, - }, - }, - }, - } - - testutil.Touch(t, b.RootPath, "test1.py") - testutil.Touch(t, b.RootPath, "test2.py") - - m := mocks.NewMockWorkspaceClient(t) - m.WorkspaceClient.Config = &databrickscfg.Config{ - Host: "https://test.com", - } - b.SetWorkpaceClient(m.WorkspaceClient) - - wsApi := m.GetMockWorkspaceAPI() - wsApi.EXPECT().GetStatusByPath(mock.Anything, "/files").Return(&workspace.ObjectInfo{ - ObjectType: "DIRECTORY", - }, nil) - + b := setupBundleForStateUpdate(t) ctx := context.Background() // Create an existing state file. @@ -144,6 +125,14 @@ func TestStateUpdateWithExistingState(t *testing.T) { require.NoError(t, err) require.Equal(t, int64(11), state.Seq) - require.Len(t, state.Files, 3) + require.Equal(t, state.Files, Filelist{ + { + LocalPath: "test1.py", + }, + { + LocalPath: "test2.py", + IsNotebook: true, + }, + }) require.Equal(t, build.GetInfo().Version, state.CliVersion) } From a3364270bfbabdf5152849a50fb3171582a3f773 Mon Sep 17 00:00:00 2001 From: Shreyas Goenka Date: Mon, 17 Jun 2024 11:43:38 +0200 Subject: [PATCH 5/5] SyncedFiles -> Files --- bundle/bundle.go | 2 +- bundle/deploy/files/upload.go | 2 +- bundle/deploy/state_update.go | 2 +- bundle/deploy/state_update_test.go | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/bundle/bundle.go b/bundle/bundle.go index cb1273a771..482614b9a6 100644 --- a/bundle/bundle.go +++ b/bundle/bundle.go @@ -52,7 +52,7 @@ type Bundle struct { client *databricks.WorkspaceClient // Files that are synced to the workspace.file_path - SyncedFiles []fileset.File + Files []fileset.File // Stores an initialized copy of this bundle's Terraform wrapper. Terraform *tfexec.Terraform diff --git a/bundle/deploy/files/upload.go b/bundle/deploy/files/upload.go index eb63495302..2c126623e6 100644 --- a/bundle/deploy/files/upload.go +++ b/bundle/deploy/files/upload.go @@ -23,7 +23,7 @@ func (m *upload) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics { return diag.FromErr(err) } - b.SyncedFiles, err = sync.RunOnce(ctx) + b.Files, err = sync.RunOnce(ctx) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_update.go b/bundle/deploy/state_update.go index 8e99d3f972..bfdb308c46 100644 --- a/bundle/deploy/state_update.go +++ b/bundle/deploy/state_update.go @@ -40,7 +40,7 @@ func (s *stateUpdate) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnost state.Version = DeploymentStateVersion // Update the state with the current list of synced files. - fl, err := FromSlice(b.SyncedFiles) + fl, err := FromSlice(b.Files) if err != nil { return diag.FromErr(err) } diff --git a/bundle/deploy/state_update_test.go b/bundle/deploy/state_update_test.go index 4638f99779..ed72439d2f 100644 --- a/bundle/deploy/state_update_test.go +++ b/bundle/deploy/state_update_test.go @@ -41,7 +41,7 @@ func setupBundleForStateUpdate(t *testing.T) *bundle.Bundle { }, }, }, - SyncedFiles: files, + Files: files, } }