From 24165b847614a01f3ba48c68623967147f25a729 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 6 Jul 2024 08:52:50 -0700 Subject: [PATCH 1/2] Hashing fixup, equality support, and serialization support (with tests) --- Project.toml | 4 +++- src/green_tree.jl | 1 + src/kinds.jl | 14 +++++++++++++- src/source_files.jl | 6 ++++++ src/syntax_tree.jl | 11 +++++++++++ test/runtests.jl | 1 + test/serialization.jl | 27 +++++++++++++++++++++++++++ 7 files changed, 62 insertions(+), 2 deletions(-) create mode 100644 test/serialization.jl diff --git a/Project.toml b/Project.toml index 6ffbaa40..e4215bd3 100644 --- a/Project.toml +++ b/Project.toml @@ -4,13 +4,15 @@ authors = ["Claire Foster and contributors"] version = "0.4.6" [compat] +Serialization = "1.0" julia = "1.0" [deps] [extras] Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Test", "Logging"] +test = ["Test", "Serialization", "Logging"] diff --git a/src/green_tree.jl b/src/green_tree.jl index 28b3f3fb..c4df5163 100644 --- a/src/green_tree.jl +++ b/src/green_tree.jl @@ -38,6 +38,7 @@ head(node::GreenNode) = node.head Base.summary(node::GreenNode) = summary(node.head) +Base.hash(node::GreenNode, h::UInt) = hash((node.head, node.span, node.args), h) function Base.:(==)(n1::GreenNode, n2::GreenNode) n1.head == n2.head && n1.span == n2.span && n1.args == n2.args end diff --git a/src/kinds.jl b/src/kinds.jl index 6de2f26a..f6706dd2 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -922,7 +922,7 @@ const _kind_names = """ K"name" - Kind(namestr) + Kind(id) `Kind` is a type tag for specifying the type of tokens and interior nodes of a syntax tree. Abstractly, this tag is used to define our own *sum types* for @@ -999,6 +999,18 @@ function Base.show(io::IO, k::Kind) print(io, "K\"$(convert(String, k))\"") end +# Save the string representation rather than the bit pattern so that kinds +# can be serialized and deserialized across different JuliaSyntax versions. +function Base.write(io::IO, k::Kind) + str = convert(String, k) + write(io, UInt8(length(str))) + write(io, str) +end +function Base.read(io::IO, ::Type{Kind}) + len = read(io, UInt8) + str = String(read(io, len)) + convert(Kind, str) +end + #------------------------------------------------------------------------------- """ diff --git a/src/source_files.jl b/src/source_files.jl index a8051a59..5ea4611c 100644 --- a/src/source_files.jl +++ b/src/source_files.jl @@ -23,6 +23,12 @@ struct SourceFile line_starts::Vector{Int} end +Base.hash(s::SourceFile, h::UInt) = hash((s.code, s.byte_offset, s.filename, s.first_line, s.line_starts), h) +function Base.var"=="(a::SourceFile, b::SourceFile) + a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename && + a.first_line == b.first_line && a.line_starts == b.line_starts +end + function SourceFile(code::AbstractString; filename=nothing, first_line=1, first_index=1) line_starts = Int[1] diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index 02ef17f4..e4dfa90b 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -17,6 +17,12 @@ mutable struct TreeNode{NodeData} # ? prevent others from using this with Node end end +# Exclude parent from hash and equality checks. This means that subtrees can compare equal. +Base.hash(node::TreeNode, h::UInt) = hash((node.children, node.data), h) +function Base.var"=="(a::TreeNode{T}, b::TreeNode{T}) where T + a.children == b.children && a.data == b.data +end + # Implement "pass-through" semantics for field access: access fields of `data` # as if they were part of `TreeNode` function Base.getproperty(node::TreeNode, name::Symbol) @@ -44,6 +50,11 @@ struct SyntaxData <: AbstractSyntaxData val::Any end +Base.hash(data::SyntaxData, h::UInt) = hash((data.source, data.raw, data.position, data.val), h) +function Base.var"=="(a::SyntaxData, b::SyntaxData) + a.source == b.source && a.raw == b.raw && a.position == b.position && a.val == b.val +end + """ SyntaxNode(source::SourceFile, raw::GreenNode{SyntaxHead}; keep_parens=false, position::Integer=1) diff --git a/test/runtests.jl b/test/runtests.jl index bf2f93fb..317f993d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -37,3 +37,4 @@ if VERSION >= v"1.6" include("parse_packages.jl") end +include("serialization.jl") diff --git a/test/serialization.jl b/test/serialization.jl new file mode 100644 index 00000000..5332e43d --- /dev/null +++ b/test/serialization.jl @@ -0,0 +1,27 @@ +using Serialization + +@testset "Equality" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parse(T, "f(x) = x + 2") + y = JuliaSyntax.parse(T, "f(x) = x + 2") + z = JuliaSyntax.parse(T, "f(x) = 2 + x") + @test x == y + @test x != z + @test y != z +end + +@testset "Hashing" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = hash(JuliaSyntax.parse(T, "f(x) = x + 2"))::UInt + y = hash(JuliaSyntax.parse(T, "f(x) = x + 2"))::UInt + z = hash(JuliaSyntax.parse(T, "f(x) = 2 + x"))::UInt + @test x == y # Correctness + @test x != z # Collision + @test y != z # Collision +end + +@testset "Serialization" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parse(T, "f(x) = x + 2") + f = tempname() + serialize(f, x) + y = deserialize(f) + @test x == y +end From 38208514f05cd44f1e0ab86ab0c33188509dd042 Mon Sep 17 00:00:00 2001 From: Lilith Orion Hafner Date: Sat, 6 Jul 2024 09:07:49 -0700 Subject: [PATCH 2/2] 1.0 compat --- src/source_files.jl | 2 +- src/syntax_tree.jl | 4 ++-- test/serialization.jl | 26 ++++++++++++++------------ 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/source_files.jl b/src/source_files.jl index 5ea4611c..0ae8f385 100644 --- a/src/source_files.jl +++ b/src/source_files.jl @@ -24,7 +24,7 @@ struct SourceFile end Base.hash(s::SourceFile, h::UInt) = hash((s.code, s.byte_offset, s.filename, s.first_line, s.line_starts), h) -function Base.var"=="(a::SourceFile, b::SourceFile) +function Base.:(==)(a::SourceFile, b::SourceFile) a.code == b.code && a.byte_offset == b.byte_offset && a.filename == b.filename && a.first_line == b.first_line && a.line_starts == b.line_starts end diff --git a/src/syntax_tree.jl b/src/syntax_tree.jl index e4dfa90b..608b9ce4 100644 --- a/src/syntax_tree.jl +++ b/src/syntax_tree.jl @@ -19,7 +19,7 @@ end # Exclude parent from hash and equality checks. This means that subtrees can compare equal. Base.hash(node::TreeNode, h::UInt) = hash((node.children, node.data), h) -function Base.var"=="(a::TreeNode{T}, b::TreeNode{T}) where T +function Base.:(==)(a::TreeNode{T}, b::TreeNode{T}) where T a.children == b.children && a.data == b.data end @@ -51,7 +51,7 @@ struct SyntaxData <: AbstractSyntaxData end Base.hash(data::SyntaxData, h::UInt) = hash((data.source, data.raw, data.position, data.val), h) -function Base.var"=="(a::SyntaxData, b::SyntaxData) +function Base.:(==)(a::SyntaxData, b::SyntaxData) a.source == b.source && a.raw == b.raw && a.position == b.position && a.val == b.val end diff --git a/test/serialization.jl b/test/serialization.jl index 5332e43d..5d194f05 100644 --- a/test/serialization.jl +++ b/test/serialization.jl @@ -1,27 +1,29 @@ using Serialization -@testset "Equality" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] - x = JuliaSyntax.parse(T, "f(x) = x + 2") - y = JuliaSyntax.parse(T, "f(x) = x + 2") - z = JuliaSyntax.parse(T, "f(x) = 2 + x") +@testset "Equality $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + y = JuliaSyntax.parsestmt(T, "f(x) = x + 2") + z = JuliaSyntax.parsestmt(T, "f(x) = 2 + x") @test x == y @test x != z @test y != z end -@testset "Hashing" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] - x = hash(JuliaSyntax.parse(T, "f(x) = x + 2"))::UInt - y = hash(JuliaSyntax.parse(T, "f(x) = x + 2"))::UInt - z = hash(JuliaSyntax.parse(T, "f(x) = 2 + x"))::UInt +@testset "Hashing $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + y = hash(JuliaSyntax.parsestmt(T, "f(x) = x + 2"))::UInt + z = hash(JuliaSyntax.parsestmt(T, "f(x) = 2 + x"))::UInt @test x == y # Correctness @test x != z # Collision @test y != z # Collision end -@testset "Serialization" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] - x = JuliaSyntax.parse(T, "f(x) = x + 2") +@testset "Serialization $T" for T in [Expr, SyntaxNode, JuliaSyntax.GreenNode] + x = JuliaSyntax.parsestmt(T, "f(x) = x + 2") f = tempname() - serialize(f, x) - y = deserialize(f) + open(f, "w") do io + serialize(io, x) + end + y = open(deserialize, f, "r") @test x == y end