Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise an error when there are multiple local libraries with the same basename used #2297

Merged
merged 5 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions acceptance/bundle/artifacts/same_name_libraries/databricks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
bundle:
name: same_name_libraries

variables:
cluster:
default:
spark_version: 15.4.x-scala2.12
node_type_id: i3.xlarge
data_security_mode: SINGLE_USER
num_workers: 0
spark_conf:
spark.master: "local[*, 4]"
spark.databricks.cluster.profile: singleNode
custom_tags:
ResourceClass: SingleNode

artifacts:
whl1:
type: whl
path: ./whl1
whl2:
type: whl
path: ./whl2

resources:
jobs:
test:
name: "test"
tasks:
- task_key: task1
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl
- task_key: task2
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl2/dist/*.whl
- task_key: task3
new_cluster: ${var.cluster}
python_wheel_task:
entry_point: main
package_name: my_default_python
libraries:
- whl: ./whl1/dist/*.whl
14 changes: 14 additions & 0 deletions acceptance/bundle/artifacts/same_name_libraries/output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

>>> errcode [CLI] bundle deploy
Building whl1...
Building whl2...
Error: Duplicate local library name my_default_python-0.0.1-py3-none-any.whl
at resources.jobs.test.tasks[0].libraries[0].whl
resources.jobs.test.tasks[1].libraries[0].whl
in databricks.yml:36:15
databricks.yml:43:15

Local library names must be unique


Exit code: 1
2 changes: 2 additions & 0 deletions acceptance/bundle/artifacts/same_name_libraries/script
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
trace errcode $CLI bundle deploy
rm -rf whl1 whl2
Empty file.
36 changes: 36 additions & 0 deletions acceptance/bundle/artifacts/same_name_libraries/whl1/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
setup.py configuration script describing how to build and package this project.

This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""

from setuptools import setup, find_packages

import sys

sys.path.append("./src")

import my_default_python

setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("hello")
36 changes: 36 additions & 0 deletions acceptance/bundle/artifacts/same_name_libraries/whl2/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
setup.py configuration script describing how to build and package this project.

This file is primarily used by the setuptools library and typically should not
be executed directly. See README.md for how to deploy, test, and run
the my_default_python project.
"""

from setuptools import setup, find_packages

import sys

sys.path.append("./src")

import my_default_python

setup(
name="my_default_python",
version=my_default_python.__version__,
url="https://databricks.com",
author="[USERNAME]",
description="wheel file based on my_default_python/src",
packages=find_packages(where="./src"),
package_dir={"": "src"},
entry_points={
"packages": [
"main=my_default_python.main:main",
],
},
install_requires=[
# Dependencies in case the output wheel file is used as a library dependency.
# For defining dependencies, when this package is used in Databricks, see:
# https://docs.databricks.com/dev-tools/bundles/library-dependencies.html
"setuptools"
],
)
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__ = "0.0.1"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
print("hello")
2 changes: 1 addition & 1 deletion bundle/libraries/expand_glob_references.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func expandLibraries(b *bundle.Bundle, p dyn.Path, v dyn.Value) (diag.Diagnostic

for _, match := range matches {
output = append(output, dyn.NewValue(map[string]dyn.Value{
libType: dyn.V(match),
libType: dyn.NewValue(match, lib.Locations()),
}, lib.Locations()))
}
}
Expand Down
97 changes: 97 additions & 0 deletions bundle/libraries/same_name_libraries.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
package libraries

import (
"context"
"path/filepath"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
)

type checkForSameNameLibraries struct{}

var patterns = []dyn.Pattern{
taskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
forEachTaskLibrariesPattern.Append(dyn.AnyIndex(), dyn.AnyKey()),
envDepsPattern.Append(dyn.AnyIndex()),
}

type libData struct {
fullPath string
locations []dyn.Location
paths []dyn.Path
}

func (c checkForSameNameLibraries) Apply(ctx context.Context, b *bundle.Bundle) diag.Diagnostics {
var diags diag.Diagnostics
libs := make(map[string]*libData)

err := b.Config.Mutate(func(v dyn.Value) (dyn.Value, error) {
var err error
for _, pattern := range patterns {
v, err = dyn.MapByPattern(v, pattern, func(p dyn.Path, lv dyn.Value) (dyn.Value, error) {
libPath := lv.MustString()
// If not local library, skip the check
if !IsLibraryLocal(libPath) {
return lv, nil
}

libFullPath := lv.MustString()
lib := filepath.Base(libFullPath)
// If the same basename was seen already but full path is different
// then it's a duplicate. Add the location to the location list.
lp, ok := libs[lib]
if !ok {
libs[lib] = &libData{
fullPath: libFullPath,
locations: []dyn.Location{lv.Location()},
paths: []dyn.Path{p},
}
} else if lp.fullPath != libFullPath {
lp.locations = append(lp.locations, lv.Location())
lp.paths = append(lp.paths, p)
}

return lv, nil
})
if err != nil {
return dyn.InvalidValue, err
}
}

if err != nil {
return dyn.InvalidValue, err
}

return v, nil
})

// Iterate over all the libraries and check if there are any duplicates.
// Duplicates will have more than one location.
// If there are duplicates, add a diagnostic.
for lib, lv := range libs {
if len(lv.locations) > 1 {
diags = append(diags, diag.Diagnostic{
Severity: diag.Error,
Summary: "Duplicate local library name " + lib,
Detail: "Local library names must be unique",
Locations: lv.locations,
Paths: lv.paths,
})
}
}
if err != nil {
diags = diags.Extend(diag.FromErr(err))
}

return diags
}

func (c checkForSameNameLibraries) Name() string {
return "CheckForSameNameLibraries"
}

func CheckForSameNameLibraries() bundle.Mutator {
return checkForSameNameLibraries{}
}
121 changes: 121 additions & 0 deletions bundle/libraries/same_name_libraries_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package libraries

import (
"context"
"testing"

"github.com/databricks/cli/bundle"
"github.com/databricks/cli/bundle/config"
"github.com/databricks/cli/bundle/config/resources"
"github.com/databricks/cli/bundle/internal/bundletest"
"github.com/databricks/cli/libs/diag"
"github.com/databricks/cli/libs/dyn"
"github.com/databricks/databricks-sdk-go/service/compute"
"github.com/databricks/databricks-sdk-go/service/jobs"
"github.com/stretchr/testify/require"
)

func TestSameNameLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test.whl",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test.whl",
},
},
},
},
},
},
},
},
},
}

bundletest.SetLocation(b, "resources.jobs.test.tasks[0]", []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
})
bundletest.SetLocation(b, "resources.jobs.test.tasks[1]", []dyn.Location{
{File: "databricks.yml", Line: 20, Column: 1},
})

diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Len(t, diags, 1)
require.Equal(t, diag.Error, diags[0].Severity)
require.Equal(t, "Duplicate local library name test.whl", diags[0].Summary)
require.Equal(t, []dyn.Location{
{File: "databricks.yml", Line: 10, Column: 1},
{File: "databricks.yml", Line: 20, Column: 1},
}, diags[0].Locations)

paths := make([]string, 0)
for _, p := range diags[0].Paths {
paths = append(paths, p.String())
}
require.Equal(t, []string{
"resources.jobs.test.tasks[0].libraries[0].whl",
"resources.jobs.test.tasks[1].libraries[0].whl",
}, paths)
}

func TestSameNameLibrariesWithUniqueLibraries(t *testing.T) {
b := &bundle.Bundle{
Config: config.Root{
Resources: config.Resources{
Jobs: map[string]*resources.Job{
"test": {
JobSettings: &jobs.JobSettings{
Tasks: []jobs.Task{
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl",
},

{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "other/path/test-0.1.0.whl",
},

{
Whl: "cowsay",
},
},
},
{
Libraries: []compute.Library{
{
Whl: "full/path/test-0.1.1.whl", // Use the same library as the first task
},
},
},
},
},
},
},
},
},
}

diags := bundle.Apply(context.Background(), b, CheckForSameNameLibraries())
require.Empty(t, diags)
}
5 changes: 5 additions & 0 deletions bundle/phases/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,11 @@ func Deploy(outputHandler sync.OutputHandler) bundle.Mutator {
mutator.ValidateGitDetails(),
artifacts.CleanUp(),
libraries.ExpandGlobReferences(),
// libraries.CheckForSameNameLibraries() needs to be run after we expand glob references so we
// know what are the actual library paths.
// libraries.ExpandGlobReferences() has to be run after the libraries are built and thus this
// mutator is part of the deploy step rather than validate.
libraries.CheckForSameNameLibraries(),
libraries.Upload(),
trampoline.TransformWheelTask(),
files.Upload(outputHandler),
Expand Down