Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Map type annotations to source text #345

Closed
wants to merge 49 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
Show all changes
49 commits
Select commit Hold shift + click to select a range
6282f16
RFC/WIP: map type annotations to source text
timholy Feb 5, 2023
453b5ef
Handle more complicated examples
timholy Feb 6, 2023
9048ffa
Improve robustness
timholy Feb 8, 2023
50b7c9b
Apply suggestions from code review
timholy Feb 9, 2023
4a92541
handle static parameter
aviatesk Feb 10, 2023
ef29b44
use :cyan for annotating stable type
aviatesk Feb 10, 2023
51da034
handle prefix op call nicely
aviatesk Feb 10, 2023
0304f1a
Add TypedSyntax subdir package
timholy Feb 17, 2023
85a8726
README tweaks
timholy Feb 17, 2023
fa54151
Prevent matching in `->` and `do` blocks
timholy Feb 19, 2023
f5a265e
Delete sourcetext.jl from Cthulhu
timholy Feb 19, 2023
8136e6a
Ambiguity test: assign outside of inner fcn
timholy Feb 19, 2023
661ba33
Support `[ref]` nodes
timholy Feb 19, 2023
81618ab
WIP refactor to use args
timholy Feb 21, 2023
e5d4233
Finish refactor
timholy Feb 22, 2023
4d152b8
Improve matching, support tuple-destructuring
timholy Feb 22, 2023
3e3cd7c
Add duplication test
timholy Feb 22, 2023
12e4b5d
Don't error on kwargs
timholy Feb 22, 2023
fefeb0e
Support mcrocall & arg::T in funcdefs
timholy Feb 23, 2023
d79a50b
Fix `+=`, partial fix for literals
timholy Feb 23, 2023
db1feaf
Modernize & test `printstyled`
timholy Feb 23, 2023
08e4346
Wire new framework into Cthulhu
timholy Feb 23, 2023
dda4ed6
Support `where`, unnamed arguments
timholy Feb 23, 2023
b4b8a03
Pass settings down during descend
timholy Feb 23, 2023
f1780c3
Improve `return`, ambiguous nodes
timholy Feb 24, 2023
c5bfc38
Show callsites with source
timholy Feb 24, 2023
9cafb61
Sub-menu: print source-text
timholy Feb 24, 2023
6a14767
Truncate body when filling keywords
timholy Feb 26, 2023
d8a54c2
Implement toggling, make source-view default
timholy Feb 26, 2023
847b4fd
Update TypedSyntax README
timholy Feb 26, 2023
12ed91b
Describe new source-mapping in README
timholy Feb 26, 2023
7f3758c
Handle duplicate slotnames in call
timholy Feb 27, 2023
dc9e6ca
Respect Cthulhu's `type_annotations`
timholy Feb 27, 2023
92d1b30
Fix several sources of test failures
timholy Feb 27, 2023
c0bebeb
Support not showing type-annotations
timholy Feb 27, 2023
be00100
Simplify toggles
timholy Feb 27, 2023
821c3c2
Update terminal tests
timholy Feb 27, 2023
7dafee4
Require JuliaSyntax 0.3.2
timholy Feb 27, 2023
b338996
Print type-annotation with [ref] nodes
timholy Feb 27, 2023
9828003
Update images for simpler toggles menu
timholy Feb 27, 2023
cf9d003
README: indicate that more help is coming
timholy Feb 27, 2023
2989933
Better fallbacks for failure to retrieve source
timholy Feb 28, 2023
0eb8178
Handle varargs
timholy Feb 28, 2023
0d587ab
Apply suggestions from code review
timholy Feb 28, 2023
f91cd1b
Simplify getting src & mappings
timholy Feb 28, 2023
072d83e
Update src/Cthulhu.jl
timholy Feb 28, 2023
28a9b92
Update TypedSyntax/src/show.jl
timholy Feb 28, 2023
708b8b2
Merge branch 'master' into teh/sourcetext
timholy Feb 28, 2023
73fc650
Printing improvements
timholy Feb 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ version = "2.7.8"
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
FoldingTrees = "1eca21be-9b9b-4ed8-839a-6d8ae26b1781"
InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4"
Preferences = "21216c6a-2e73-6563-6e65-726566657250"
REPL = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
SnoopPrecompile = "66db9d55-30c0-4569-8b51-7e840670fc0c"
Expand All @@ -16,6 +17,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[compat]
CodeTracking = "0.5, 1"
FoldingTrees = "1"
JuliaSyntax = "0.3"
Preferences = "1"
SnoopPrecompile = "1"
julia = "1.7"
Expand Down
21 changes: 21 additions & 0 deletions TypedSyntax/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2023 Tim Holy <tim.holy@gmail.com> and contributors

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
19 changes: 19 additions & 0 deletions TypedSyntax/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name = "TypedSyntax"
uuid = "d265eb64-f81a-44ad-a842-4247ee1503de"
authors = ["Tim Holy <tim.holy@gmail.com> and contributors"]
version = "0.1.0"

[deps]
CodeTracking = "da1fd8a2-8d9e-5ec2-8556-3022fb5608a2"
JuliaSyntax = "70703baa-626e-46a2-a12c-08ffd08c73b4"

[compat]
CodeTracking = "1"
JuliaSyntax = "0.3"
julia = "1"

[extras]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test"]
104 changes: 104 additions & 0 deletions TypedSyntax/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# TypedSyntax

This package aims to map types, as determined via type-inference, back to the source code as written by the developer. It can be used to understand program behavior and identify causes of "type instability" (inference failures) without the need to read Julia's [intermediate representations](https://docs.julialang.org/en/v1/devdocs/ast/) of code.

This package is built on [JuliaSyntax](https://github.com/JuliaLang/JuliaSyntax.jl) and extends it by attaching type annotations to the nodes of its syntax trees. Here's a demo:

```julia
julia> using TypedSyntax

julia> f(x, y, z) = x + y * z;

julia> node = TypedSyntaxNode(f, (Float64, Int, Float32))
line:col│ byte_range │ tree │ type or call idxs
1:1 │ 1:22 │[=]
1:1 │ 1:10 │ [call] │Int64[]
1:1 │ 1:1 │ f │TypedSyntax.NotFound
1:3 │ 3:3 │ x │Float64
1:6 │ 6:6 │ y │Int64
1:9 │ 9:9 │ z │Float32
1:13 │ 13:22 │ [call-i] │Float64
1:14 │ 14:14 │ x │Float64
1:16 │ 16:16 │ +
1:17 │ 17:22 │ [call-i] │Float32
1:18 │ 18:18 │ y │Int64
1:20 │ 20:20 │ *
1:22 │ 22:22 │ z │Float32
```

The right hand column is the new information added by `TypedSyntaxNode`: each is either a type or a list of integers (indicating a failure to map to a unique type in the type-inferred code).

You can also display this in a form closer to the original source code, but with type-annotations:

```julia
julia> printstyled(stdout, node; hide_type_stable=false)
f(x::Float64, y::Int64, z::Float32)::Float64 = (x::Float64 + (y::Int64 * z::Float32)::Float32)::Float64
```

`hide_type_stable=true` (which is the default) will suppress printing of concrete types, so you need to set it to `false` if you want to see all the types.

The default is aimed at identifying sources of "type instability" (poor inferrability):

```julia
julia> printstyled(stdout, TypedSyntaxNode(f, (Float64, Int, Real)))
```

which produces

<code>f(x, y, z::<b>Real</b>)::<b>Any</b> = (x + (y * z::<b>Real</b>)::<b>Any</b>)::<b>Any</b></code>

The boldfaced text above is typically printed in color in the REPL:

- red indicates non-concrete types
- yellow indicates a "small union" of concrete types. These usually pose no issues, unless there are too many combinations of such unions.

Printing with color can be suppressed with the keyword argument `iswarn=false`.

## Caveats

TypedSyntax aims for accuracy, but there are a number of factors that pose challenges.
First, anonymous and internal functions appear as part of the source text, but internally Julia handles these as separate type-inferred methods, and these are hidden from the annotator.
Therefore, in

```julia
julia> sumfirst(c) = sum(x -> first(x), c); # better to use `sum(first, c)` but this is just an illustration

julia> printstyled(stdout, TypedSyntaxNode(sumfirst, (Vector{Any},)))
sumfirst(c)::Any = sum(x -> first(x), c)::Any
```

`x` and `first(x)` both have type `Any`, but they are not annotated as such because they are hidden inside the anonymous function.

Second, because not all expressions can be matched, there are cases where some of the matches are ambiguous.
Consider the following example:

```
julia> firstfirst(c) = map(x -> first(x), first(c));

julia> TypedSyntaxNode(firstfirst, (Vector{Any},))
line:col│ byte_range │ tree │ type or call idxs
1:1 │ 1:44 │[=]
1:1 │ 1:13 │ [call] │Int64[]
1:1 │ 1:10 │ firstfirst │TypedSyntax.NotFound
1:12 │ 12:12 │ c │Vector{Any}
1:17 │ 17:44 │ [call] │Any
1:17 │ 17:19 │ map │TypedSyntax.NotFound
1:21 │ 21:33 │ [->]
1:21 │ 21:21 │ x │TypedSyntax.NotFound
1:26 │ 26:33 │ [call] │[3]
1:26 │ 26:30 │ first │TypedSyntax.NotFound
1:32 │ 32:32 │ x │TypedSyntax.NotFound
1:36 │ 36:43 │ [call] │[3]
1:36 │ 36:40 │ first │TypedSyntax.NotFound
1:42 │ 42:42 │ c │Vector{Any}
```

Note that the two `[call]` expressions involving `first` are marked with "type" `[3]`.
Since this vector has only one element, it means that only one type-inferred call to `first` could be found.
However, there were two source statements "competing" to be assigned to it.
Since it could not uniquely resolve the caller, these are marked in yellow with `::NF` (for "not found"):

```julia
julia> printstyled(stdout, TypedSyntaxNode(firstfirst, (Vector{Any},)))
firstfirst(c)::Any = map(x -> first(x)::NF, first(c)::NF)::Any
```
15 changes: 15 additions & 0 deletions TypedSyntax/src/TypedSyntax.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
module TypedSyntax

using Core: CodeInfo, MethodInstance
using JuliaSyntax: JuliaSyntax, TreeNode, AbstractSyntaxData, SyntaxNode, GreenNode, SyntaxHead, SourceFile,
head, kind, children, haschildren, untokenize, first_byte, last_byte, source_line, source_location,
@K_str, is_infix_op_call, is_prefix_op_call
using Base.Meta: isexpr
using CodeTracking

export TypedSyntaxNode

include("node.jl")
include("show.jl")

end
200 changes: 200 additions & 0 deletions TypedSyntax/src/node.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@

mutable struct TypedSyntaxData <: AbstractSyntaxData
source::SourceFile
typedsource::CodeInfo
raw::GreenNode{SyntaxHead}
position::Int
val::Any
typ::Any # can either be a Type, `nothing`, or a `idxs::Vector{Int}` of *potential* call matches `src.code[idxs]`
end

const TypedSyntaxNode = TreeNode{TypedSyntaxData}

struct NotFound end
# struct Unmatched end

function TypedSyntaxNode(@nospecialize(f), @nospecialize(t); kwargs...)
m = which(f, t)
sourcetext, lineno = definition(String, m)
rootnode = JuliaSyntax.parse(SyntaxNode, sourcetext; filename=string(m.file), first_line=lineno, kwargs...)
src, rt = getsrc(f, t)
node = TypedSyntaxNode(rootnode, src, lineno - m.line)
node.data.val = rt
return node
end

function TypedSyntaxNode(node::SyntaxNode, src::CodeInfo, Δline=0)
taken = [TypedSyntaxNode[] for _ = 1:length(src.code)]
tnode = typednode_pass1(node, src, nothing, taken, Δline) # pass1 finds all possible matches
tnode = typednode_pass2!(tnode, src, taken)
tnode = typednode_pass3!(tnode)
return tnode
end

# During pass1, we list all possible matches for each call.
# Then during pass2, we use the fact that inference preserves order to say that if there are the
# same number of call sites in the sourcetext as we found in the typed code,
# we can line them up one-to-one.
# A case where that *won't* happen is a line like `ntuple(i -> a[i], b[j])`, where the anonymous
# function will not be in `src` and so there will be two `getindex` calls in the sourcetext but only one in `src`.
function typednode_pass1(node::SyntaxNode, src::CodeInfo, parent, taken, Δline)
hd = head(node)
sd = node.data
if kind(hd) == K"Identifier"
# typed value
slotidx = findfirst(==(node.val::Symbol), src.slotnames)
tsd = TypedSyntaxData(sd.source, src, sd.raw, sd.position, sd.val, slotidx === nothing ? NotFound : src.slottypes[slotidx])
return TreeNode(parent, nothing, tsd)
end
tsd = if kind(hd) == K"call" && haschildren(node)
line = source_line(node.source, node.position)
calltok = node.children[1 + is_infix_op_call(hd)]
codeidxs = match_call(JuliaSyntax.sourcetext(calltok), src, line - Δline, src.parent) # FIXME: match arg types too
TypedSyntaxData(sd.source, src, sd.raw, sd.position, sd.val, codeidxs)
else
TypedSyntaxData(sd.source, src, sd.raw, sd.position, sd.val, nothing)
end
newparent = TreeNode(parent, #= replaceme after constructing children =# nothing, tsd)
if haschildren(node)
newchildren = TypedSyntaxNode[]
for child in children(node)
push!(newchildren, typednode_pass1(child, src, newparent, taken, Δline))
end
newparent.children = newchildren
end
if isa(tsd.typ, Vector{Int})
foreach(tsd.typ) do i
push!(taken[i], newparent)
end
end
return newparent
end

function typednode_pass2!(tnode, src, taken)
# Resolve all the calls that can be unambiguously resolved
n = length(src.code)
for i = 1:n
t = taken[i]
len = length(t)
if len == 1
only(t).data.typ = src.ssavaluetypes[i]
empty!(t)
elseif len > 1
# Multiple calls map to this one. If the number mapping is equal to the number of duplicates,
# we can unambiguously assign them by order
mapsame, lineidx = [i], src.codelocs[i]
j = i + 1
while j <= n && src.codelocs[j] == lineidx
if taken[j] == t
push!(mapsame, j)
end
j += 1
end
if length(mapsame) == len
for (tidx, j) in enumerate(mapsame)
t[tidx].typ = src.ssavaluetypes[j]
end
for j in mapsame
empty!(taken[j])
end
end
end
end
return tnode # while this is the only direct use of `tnode`, we accessed it via `taken` so this is clearer
end

# Propagate typ upward in tree
function typednode_pass3!(tnode)
for child in children(tnode)
typednode_pass3!(child)
end
if kind(tnode) == K"return"
tnode.data.typ = only(children(tnode)).typ
end
return tnode
end

function find_codeloc(src, lineno)
clidx = searchsortedfirst(src.linetable, lineno; lt=(linenode, line) -> linenode.line < line)
clidx = min(clidx, length(src.linetable))
if src.linetable[clidx].line > lineno
clidx -= 1 # handle multiline statements
end
return clidx
end
function find_coderange(src, lineno)
clidx = find_codeloc(src, lineno)
ibegin = searchsortedfirst(src.codelocs, clidx)
ibegin += src.codelocs[ibegin] > lineno
iend = searchsortedlast(src.codelocs, clidx)
return ibegin:iend
end

function match_call(callname, src, lineno, mi)
clidx = find_codeloc(src, lineno)
codeidxs = Int[]
i, n = searchsortedfirst(src.codelocs, clidx) - 1, lastindex(src.codelocs)
while i < n
i += 1
src.codelocs[i] == clidx || break
stmt = src.code[i]
isa(stmt, Expr) || continue
if stmt.head == :(=)
stmt = stmt.args[2]
isa(stmt, Expr) || continue
end
stmt.head ∈ (:call, :invoke) || continue
f = stmt.args[1]
if isa(f, GlobalRef) && f.mod === Core && f.name == :_apply_iterate # handle vararg calls
# Sanity check
fiter = stmt.args[2]
@assert isa(fiter, GlobalRef) && fiter.name == :iterate
f = stmt.args[3] # get the actual call
end
if isa(f, Core.SlotNumber)
fname = src.slotnames[f.id]
if string(fname) == callname
push!(codeidxs, i)
continue
end
end
if isa(f, Core.SSAValue)
fname = src.ssavaluetypes[f.id]
if isa(fname, Core.Const)
fname = fname.val
end
fname = string(fname)
if (endswith(fname, callname) || endswith(callname, fname))
push!(codeidxs, i)
continue
end
end
if isa(f, Core.SlotNumber) || isa(f, Core.SSAValue)
@warn "unhandled slot or SSAValue in $stmt"
continue
end
if isexpr(f, :static_parameter)
varname = sparam_name(mi, f.args[1]::Int)
callname == varname && push!(codeidxs, i)
continue
end
isa(f, GlobalRef) || error("expected GlobalRef, got ", f)
string(f.name) == callname && push!(codeidxs, i)
end
return codeidxs
end

function sparam_name(mi::MethodInstance, i::Int)
sig = (mi.def::Method).sig::UnionAll
while true
i == 1 && break
sig = sig.body::UnionAll
i -= 1
end
return sig.var.name
end

function getsrc(@nospecialize(f), @nospecialize(t))
srcrts = code_typed(f, t; debuginfo=:source, optimize=false)
return only(srcrts)
end
Loading