Skip to content

Commit

Permalink
Add semantic tokens for escape sequences; fix escape sequences in tex…
Browse files Browse the repository at this point in the history
…tmate grammar
  • Loading branch information
kralicky committed Apr 2, 2024
1 parent 65b87a7 commit d4dcfff
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 6 deletions.
26 changes: 24 additions & 2 deletions editors/vscode/syntaxes/protobuf.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,34 @@
{
"name": "string.quoted.double.protobuf",
"begin": "\"",
"end": "\""
"end": "\"",
"patterns": [
{
"include": "#string_escape_char"
}
]
},
{
"name": "string.quoted.single.protobuf",
"begin": "'",
"end": "'"
"end": "'",
"patterns": [
{
"include": "#string_escape_char"
}
]
}
]
},
"string_escape_char": {
"patterns": [
{
"match": "\\\\([0-7]{1,3}|[abfnrtv\\\\'\"]|[xX][0-9a-fA-F]{1,2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})",
"name": "constant.character.escape.protobuf"
},
{
"match": "\\\\[^0-7xuUabfnrtv\\'\"]",
"name": "invalid.illegal.unknown-escape.protobuf"
}
]
},
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/google/uuid v1.6.0
github.com/kralicky/codegen v0.0.0-20240307225947-51de80fcb2f3
github.com/kralicky/gpkg v0.0.0-20240119195700-64f32830b14f
github.com/kralicky/protocompile v0.0.0-20240401205051-3af207f8546c
github.com/kralicky/protocompile v0.0.0-20240402185503-7487e2c98aec
github.com/kralicky/tools-lite v0.0.0-20240313161632-60bfa88304ff
github.com/mattn/go-tty v0.0.5
github.com/spf13/cobra v1.8.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ github.com/kralicky/codegen v0.0.0-20240307225947-51de80fcb2f3 h1:BWm3t2mZaeu4hg
github.com/kralicky/codegen v0.0.0-20240307225947-51de80fcb2f3/go.mod h1:R99TvehuNoyLOBwWThdNuQXnJT8d4gJcRYfZtB2Mw7s=
github.com/kralicky/gpkg v0.0.0-20240119195700-64f32830b14f h1:MsNe8A51V+7Fu5OMXSl8SK02erPJ40vFs2zDHn89w1g=
github.com/kralicky/gpkg v0.0.0-20240119195700-64f32830b14f/go.mod h1:vOkwMjs49XmP/7Xfo9ZL6eg2ei51lmtD/4U/Az5GTq8=
github.com/kralicky/protocompile v0.0.0-20240401205051-3af207f8546c h1:HrrAwQQDVyDFYbGFKNhOYtUd7HTlu/Y+qVGiVWhEiwg=
github.com/kralicky/protocompile v0.0.0-20240401205051-3af207f8546c/go.mod h1:UjvEi6Zifxkfhm/hW7RXpluHcBfN+VnePXcUtgHws8U=
github.com/kralicky/protocompile v0.0.0-20240402185503-7487e2c98aec h1:flnp8h/gqns0/a2MnQohtQiPTMNnXRbBZ0vFKEU92V8=
github.com/kralicky/protocompile v0.0.0-20240402185503-7487e2c98aec/go.mod h1:UjvEi6Zifxkfhm/hW7RXpluHcBfN+VnePXcUtgHws8U=
github.com/kralicky/tools-lite v0.0.0-20240313161632-60bfa88304ff h1:akxm/czMYHdr1xIGm6wmddABmc4M9KDcqksdwpIJx8A=
github.com/kralicky/tools-lite v0.0.0-20240313161632-60bfa88304ff/go.mod h1:V8GGYRLr40bvX/W3nZFxG+6S3iDFWn6o5J3NGDClr8U=
github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
Expand Down
94 changes: 93 additions & 1 deletion pkg/lsp/semantic.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
package lsp

import (
"bytes"
"fmt"
"log/slog"
"os"
"regexp"
"slices"
"sort"
"strings"
Expand Down Expand Up @@ -440,6 +442,8 @@ func init() {
celEnv, _ = celEnv.Extend(cel.EnableMacroCallTracking())
}

var escapeCharRegex = regexp.MustCompile(`\\([0-7]{1,3}|[abfnrtv\\'"]|[xX][0-9a-fA-F]{1,2}|u[0-9a-fA-F]{4}|U[0-9a-fA-F]{8})`)

func (s *semanticItems) inspect(node ast.Node, walkOptions ...ast.WalkOption) {
tracker := &paths.AncestorTracker{}
// check if node is a non-nil interface to a nil pointer
Expand All @@ -459,7 +463,11 @@ func (s *semanticItems) inspect(node ast.Node, walkOptions ...ast.WalkOption) {
s.mkcomments(node)
return true
}
s.mktokens(node, tracker.Path(), semanticTypeString, 0)
if bytes.Contains(node.Raw, []byte{'\\'}) {
s.inspectStringLiteralWithEscapeSequences(node, tracker.Path())
} else {
s.mktokens(node, tracker.Path(), semanticTypeString, 0)
}
case *ast.UintLiteralNode:
s.mktokens(node, tracker.Path(), semanticTypeNumber, 0)
case *ast.FloatLiteralNode:
Expand Down Expand Up @@ -681,6 +689,90 @@ func (s *semanticItems) inspectCompoundIdent(compoundIdent *ast.CompoundIdentNod
}
}

func (s *semanticItems) inspectStringLiteralWithEscapeSequences(node *ast.StringLiteralNode, path protopath.Path) {
// for strings containing escape sequences, create multiple tokens
// for each part of the string
// example: "\0\001\a\b\f\n\r\t\v\\\'\"\xfe" -> ["\0", "\001", "\a", "\b", "\f", "\n", "\r", "\t", "\v", "\\", "\'", "\"", "\xfe"]
indexes := escapeCharRegex.FindAllIndex(node.Raw, -1)

info := s.AST().NodeInfo(node)
if !info.IsValid() {
return
}

line := uint32(info.Start().Line - 1)
start := uint32(info.Start().Col - 1)

// first token is the opening quote
s.items = append(s.items, semanticItem{
lang: tokenLanguageProto,
line: line,
start: start,
len: 1,
typ: semanticTypeString,
node: node,
path: path,
})
start++

i := 0
for _, match := range indexes {
if i < match[0] {
// regular string
item := semanticItem{
lang: tokenLanguageProto,
line: line,
start: start + uint32(i),
len: uint32(match[0]) - uint32(i),
typ: semanticTypeString,
node: node,
path: path,
}
s.items = append(s.items, item)
i = match[0]
}
if i == match[0] {
// escape sequence
item := semanticItem{
lang: tokenLanguageProto,
line: line,
start: start + uint32(i),
len: uint32(match[1]) - uint32(match[0]),
typ: semanticTypeRegexp,
node: node,
path: path,
}
s.items = append(s.items, item)
i = match[1]
}
}
if i < len(node.Raw)-1 {
// after the last escape sequence but before the closing quote
item := semanticItem{
lang: tokenLanguageProto,
line: line,
start: start + uint32(i),
len: uint32(len(node.Raw) - i),
typ: semanticTypeString,
node: node,
path: path,
}
s.items = append(s.items, item)
i = len(node.Raw) - 1
}

// last token is the closing quote
s.items = append(s.items, semanticItem{
lang: tokenLanguageProto,
line: line,
start: start + uint32(i),
len: 1,
typ: semanticTypeString,
node: node,
path: path,
})
}

func (s *semanticItems) inspectFieldLiteral(node ast.Node, val *ast.ValueNode, path protopath.Path) {
if s.maybeLinkRes == nil {
return
Expand Down
53 changes: 53 additions & 0 deletions test/semantic_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ message Y {
x: {[bufbuild.protocompile.test2.y]: {x: {}}}
}
];
optional bytes escaped_bytes = 3 [default = "\0\001\a\b\f\n\r\t\v\\\'\"\xfe"];
optional string utf8_string = 4 [default = "\341\210\264"]; // this is utf-8 for \u1234
optional string mixed_string = 5 [default = "foo\xFFbar\u1234baz\t"];
}
`
Expand Down Expand Up @@ -235,6 +238,56 @@ message Y {
{Token: "bufbuild.protocompile.test2.y", TokenType: "property"},
{Token: "]", TokenType: "operator"},
{Token: "x", TokenType: "property"},
{Token: "optional", TokenType: "keyword"},
{Token: "bytes", TokenType: "type", Mod: "defaultLibrary"},
{Token: "escaped_bytes", TokenType: "variable", Mod: "definition"},
{Token: "=", TokenType: "operator"},
{Token: "3", TokenType: "number"},
{Token: "default", TokenType: "keyword"},
{Token: "=", TokenType: "operator"},
{Token: `"`, TokenType: "string"},
{Token: `\0`, TokenType: "regexp"},
{Token: `\001`, TokenType: "regexp"},
{Token: `\a`, TokenType: "regexp"},
{Token: `\b`, TokenType: "regexp"},
{Token: `\f`, TokenType: "regexp"},
{Token: `\n`, TokenType: "regexp"},
{Token: `\r`, TokenType: "regexp"},
{Token: `\t`, TokenType: "regexp"},
{Token: `\v`, TokenType: "regexp"},
{Token: `\\`, TokenType: "regexp"},
{Token: `\'`, TokenType: "regexp"},
{Token: `\"`, TokenType: "regexp"},
{Token: `\xfe`, TokenType: "regexp"},
{Token: `"`, TokenType: "string"},
{Token: "optional", TokenType: "keyword"},
{Token: "string", TokenType: "type", Mod: "defaultLibrary"},
{Token: "utf8_string", TokenType: "variable", Mod: "definition"},
{Token: "=", TokenType: "operator"},
{Token: "4", TokenType: "number"},
{Token: "default", TokenType: "keyword"},
{Token: "=", TokenType: "operator"},
{Token: `"`, TokenType: "string"},
{Token: `\341`, TokenType: "regexp"},
{Token: `\210`, TokenType: "regexp"},
{Token: `\264`, TokenType: "regexp"},
{Token: `"`, TokenType: "string"},
{Token: "// this is utf-8 for \\u1234", TokenType: "comment"},
{Token: "optional", TokenType: "keyword"},
{Token: "string", TokenType: "type", Mod: "defaultLibrary"},
{Token: "mixed_string", TokenType: "variable", Mod: "definition"},
{Token: "=", TokenType: "operator"},
{Token: "5", TokenType: "number"},
{Token: "default", TokenType: "keyword"},
{Token: "=", TokenType: "operator"},
{Token: `"`, TokenType: "string"},
{Token: "foo", TokenType: "string"},
{Token: `\xFF`, TokenType: "regexp"},
{Token: "bar", TokenType: "string"},
{Token: `\u1234`, TokenType: "regexp"},
{Token: "baz", TokenType: "string"},
{Token: `\t`, TokenType: "regexp"},
{Token: `"`, TokenType: "string"},
}
if x := cmp.Diff(want, tokens); x != "" {
t.Errorf("Semantic tokens do not match (-want +got):\n%s", x)
Expand Down

0 comments on commit d4dcfff

Please sign in to comment.