Skip to content

Commit

Permalink
PythonMutator: propagate source locations
Browse files Browse the repository at this point in the history
  • Loading branch information
kanterov committed Sep 20, 2024
1 parent 18bec2b commit 33f4b73
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 30 deletions.
6 changes: 6 additions & 0 deletions bundle/config/experimental.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ type PyDABs struct {
// These packages are imported to discover resources, resource generators, and mutators.
// This list can include namespace packages, which causes the import of nested packages.
Import []string `json:"import,omitempty"`

// LoadLocations is a flag to enable loading Python source locations from the PyDABs.
//
// Locations are only supported since PyDABs 0.6.0, and because of that,
// this flag is disabled by default.
LoadLocations bool `json:"load_locations,omitempty"`
}

type Command string
Expand Down
163 changes: 163 additions & 0 deletions bundle/config/mutator/python/python_locations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package python

import (
"encoding/json"
"fmt"
"io"
"path/filepath"

"github.com/databricks/cli/libs/dyn"
)

const generatedFileName = "__generated_by_pydabs__.yml"

// pythonLocations is data structure for efficient location lookup for a given path
type pythonLocations struct {
// descendants referenced by index, e.g. '.foo'
keys map[string]*pythonLocations

// descendants referenced by key, e.g. '[0]'
indexes map[int]*pythonLocations

// location for the current node if it exists
location dyn.Location

// if true, location is present
exists bool
}

type pythonLocationEntry struct {
Path string `json:"path"`
File string `json:"file"`
Line int `json:"line"`
Column int `json:"column"`
}

// mergePythonLocations applies locations from Python mutator into given dyn.Value
//
// The primary use-case is to merge locations.json with output.json, so that any
// validation errors will point to Python source code instead of generated YAML.
func mergePythonLocations(value dyn.Value, locations *pythonLocations) (dyn.Value, error) {
return dyn.Walk(value, func(path dyn.Path, value dyn.Value) (dyn.Value, error) {
if newLocation, ok := findPythonLocation(locations, path); ok {
var newLocations []dyn.Location

// the first item in the list is the "last" location used for error reporting
newLocations = append(newLocations, newLocation)

for _, location := range value.Locations() {
if filepath.Base(location.File) == generatedFileName {
continue
}

// don't add duplicates if location already exists
if location == newLocation {
continue
}

newLocations = append(newLocations, location)
}

return value.WithLocations(newLocations), nil
} else {
return value, nil
}
})
}

// parsePythonLocations parses locations.json from the Python mutator.
//
// locations file is newline-separated JSON objects with pythonLocationEntry structure.
func parsePythonLocations(input io.Reader) (*pythonLocations, error) {
decoder := json.NewDecoder(input)
locations := newPythonLocations()

for decoder.More() {
var entry pythonLocationEntry

err := decoder.Decode(&entry)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}

path, err := dyn.NewPathFromString(entry.Path)
if err != nil {
return nil, fmt.Errorf("failed to parse python location: %s", err)
}

location := dyn.Location{
File: entry.File,
Line: entry.Line,
Column: entry.Column,
}

putPythonLocation(locations, path, location)
}

return locations, nil
}

// putPythonLocation puts the location to the trie for the given path
func putPythonLocation(trie *pythonLocations, path dyn.Path, location dyn.Location) {
var currentNode = trie

for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
currentNode.keys[key] = newPythonLocations()
}

currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
currentNode.indexes[index] = newPythonLocations()
}

currentNode = currentNode.indexes[index]
}
}

currentNode.location = location
currentNode.exists = true
}

// newPythonLocations creates a new trie node
func newPythonLocations() *pythonLocations {
return &pythonLocations{
keys: make(map[string]*pythonLocations),
indexes: make(map[int]*pythonLocations),
}
}

// findPythonLocation finds the location or closest ancestor location in the trie for the given path
// if no ancestor or exact location is found, false is returned.
func findPythonLocation(locations *pythonLocations, path dyn.Path) (dyn.Location, bool) {
var currentNode = locations
var lastLocation = locations.location
var exists = locations.exists

for _, component := range path {
if key := component.Key(); key != "" {
if _, ok := currentNode.keys[key]; !ok {
break
}

currentNode = currentNode.keys[key]
} else {
index := component.Index()
if _, ok := currentNode.indexes[index]; !ok {
break
}

currentNode = currentNode.indexes[index]
}

if currentNode.exists {
lastLocation = currentNode.location
exists = true
}
}

return lastLocation, exists
}
127 changes: 127 additions & 0 deletions bundle/config/mutator/python/python_locations_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package python

import (
"bytes"
"testing"

"github.com/databricks/cli/libs/dyn"
assert "github.com/databricks/cli/libs/dyn/dynassert"
)

func TestMergeLocations(t *testing.T) {
pythonLocation := dyn.Location{File: "foo.py", Line: 1, Column: 1}
generatedLocation := dyn.Location{File: generatedFileName, Line: 1, Column: 1}
yamlLocation := dyn.Location{File: "foo.yml", Line: 1, Column: 1}

locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), pythonLocation)

input := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
"bar": dyn.NewValue("baz", []dyn.Location{yamlLocation, pythonLocation}),
"baz": dyn.NewValue("baz", []dyn.Location{yamlLocation}),
"qux": dyn.NewValue("baz", []dyn.Location{generatedLocation, yamlLocation}),
},
[]dyn.Location{},
),
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)

expected := dyn.NewValue(
map[string]dyn.Value{
"foo": dyn.NewValue(
map[string]dyn.Value{
// pythonLocation was present before, we don't duplicate it, only move it to the beginning
"bar": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// pythonLocation is appended to the beginning of the list if absent
"baz": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
// generatedLocation is replaced by pythonLocation
"qux": dyn.NewValue("baz", []dyn.Location{pythonLocation, yamlLocation}),
},
[]dyn.Location{pythonLocation},
),
// if location is unknown, we keep it as-is
"bar": dyn.NewValue("baz", []dyn.Location{generatedLocation}),
},
[]dyn.Location{yamlLocation},
)

actual, err := mergePythonLocations(input, locations)

assert.NoError(t, err)
assert.Equal(t, expected, actual)
}

func TestFindLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}

locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)

actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar"))

assert.True(t, exists)
assert.Equal(t, location1, actual)
}

func TestFindLocation_indexPathComponent(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}
location2 := dyn.Location{File: "foo.py", Line: 3, Column: 1}

locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"), location2)

actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar[0]"))

assert.True(t, exists)
assert.Equal(t, location2, actual)
}

func TestFindLocation_closestAncestorLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}

locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)

actual, exists := findPythonLocation(locations, dyn.MustPathFromString("foo.bar.baz"))

assert.True(t, exists)
assert.Equal(t, location1, actual)
}

func TestFindLocation_unknownLocation(t *testing.T) {
location0 := dyn.Location{File: "foo.py", Line: 1, Column: 1}
location1 := dyn.Location{File: "foo.py", Line: 2, Column: 1}

locations := newPythonLocations()
putPythonLocation(locations, dyn.MustPathFromString("foo"), location0)
putPythonLocation(locations, dyn.MustPathFromString("foo.bar"), location1)

_, exists := findPythonLocation(locations, dyn.MustPathFromString("bar"))

assert.False(t, exists)
}

func TestParsePythonLocations(t *testing.T) {
expected := dyn.Location{File: "foo.py", Line: 1, Column: 2}

input := `{"path": "foo", "file": "foo.py", "line": 1, "column": 2}`
reader := bytes.NewReader([]byte(input))
locations, err := parsePythonLocations(reader)

assert.NoError(t, err)

assert.True(t, locations.keys["foo"].exists)
assert.Equal(t, expected, locations.keys["foo"].location)
}
Loading

0 comments on commit 33f4b73

Please sign in to comment.