Skip to content

Commit 9559411

Browse files
authored
Merge pull request #4 from zombocom/schneems/lotza-syntax-errors
Multiple tests
2 parents 74a1b9a + c2188e4 commit 9559411

File tree

8 files changed

+479
-12
lines changed

8 files changed

+479
-12
lines changed

lib/syntax_error_search.rb

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,72 @@ def self.indent(string)
1414
end
1515
end
1616

17+
# This will tell you if the `code_lines` would be valid
18+
# if you removed the `without_lines`. In short it's a
19+
# way to detect if we've found the lines with syntax errors
20+
# in our document yet.
21+
#
22+
# code_lines = [
23+
# CodeLine.new(line: "def foo\n", index: 0)
24+
# CodeLine.new(line: " def bar\n", index: 1)
25+
# CodeLine.new(line: "end\n", index: 2)
26+
# ]
27+
#
28+
# SyntaxErrorSearch.valid_without?(
29+
# without_lines: code_lines[1],
30+
# code_lines: code_lines
31+
# ) # => true
32+
#
33+
# SyntaxErrorSearch.valid?(code_lines) # => false
34+
def self.valid_without?(without_lines: , code_lines:)
35+
lines = code_lines - Array(without_lines).flatten
1736

37+
if lines.empty?
38+
return true
39+
else
40+
return valid?(lines)
41+
end
42+
end
43+
44+
# Returns truthy if a given input source is valid syntax
45+
#
46+
# SyntaxErrorSearch.valid?(<<~EOM) # => true
47+
# def foo
48+
# end
49+
# EOM
50+
#
51+
# SyntaxErrorSearch.valid?(<<~EOM) # => false
52+
# def foo
53+
# def bar # Syntax error here
54+
# end
55+
# EOM
56+
#
57+
# You can also pass in an array of lines and they'll be
58+
# joined before evaluating
59+
#
60+
# SyntaxErrorSearch.valid?(
61+
# [
62+
# "def foo\n",
63+
# "end\n"
64+
# ]
65+
# ) # => true
66+
#
67+
# SyntaxErrorSearch.valid?(
68+
# [
69+
# "def foo\n",
70+
# " def bar\n", # Syntax error here
71+
# "end\n"
72+
# ]
73+
# ) # => false
74+
#
75+
# As an FYI the CodeLine class instances respond to `to_s`
76+
# so passing a CodeLine in as an object or as an array
77+
# will convert it to it's code representation.
1878
def self.valid?(source)
1979
source = source.join if source.is_a?(Array)
2080
source = source.to_s
2181

2282
# Parser writes to stderr even if you catch the error
23-
#
2483
stderr = $stderr
2584
$stderr = StringIO.new
2685

@@ -37,3 +96,4 @@ def self.valid?(source)
3796
require_relative "syntax_error_search/code_block"
3897
require_relative "syntax_error_search/code_frontier"
3998
require_relative "syntax_error_search/code_search"
99+
require_relative "syntax_error_search/display_invalid_blocks"

lib/syntax_error_search/code_frontier.rb

Lines changed: 152 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,143 @@
11
module SyntaxErrorSearch
22
# This class is responsible for generating, storing, and sorting code blocks
3+
#
4+
# The search algorithm for finding our syntax errors isn't in this class, but
5+
# this is class holds the bulk of the logic for generating, storing, detecting
6+
# and filtering invalid code.
7+
#
8+
# This is loosely based on the idea of a "frontier" for searching for a path
9+
# example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
10+
#
11+
# In this case our path is going from code with a syntax error to code without a
12+
# syntax error. We're currently doing that by evaluating individual lines
13+
# with respect to indentation and other whitespace (empty lines). As represented
14+
# by individual "code blocks".
15+
#
16+
# This class does not just store the frontier that we're searching, but is responsible
17+
# for generating new code blocks as well. This is not ideal, but the state of generating
18+
# and evaluating paths i.e. codeblocks is very tightly coupled.
19+
#
20+
# ## Creation
21+
#
22+
# This example code is re-used in the other sections
23+
#
24+
# Example:
25+
#
26+
# code_lines = [
27+
# CodeLine.new(line: "def cinco\n", index: 0)
28+
# CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
29+
# CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
30+
# CodeLine.new(line: "end\n", index: 3)
31+
# ]
32+
#
33+
# frontier = CodeFrontier.new(code_lines: code_lines)
34+
#
35+
# frontier << frontier.next_block if frontier.next_block?
36+
# frontier << frontier.next_block if frontier.next_block?
37+
#
38+
# frontier.holds_all_syntax_errors? # => true
39+
# block = frontier.pop
40+
# frontier.holds_all_syntax_errors? # => false
41+
# frontier << block
42+
# frontier.holds_all_syntax_errors? # => true
43+
#
44+
# frontier.detect_invalid_blocks.map(&:to_s) # =>
45+
# [
46+
# "def dog\n",
47+
# "def cat\n"
48+
# ]
49+
#
50+
# ## Block Generation
51+
#
52+
# Currently code blocks are generated based off of indentation. With the idea that blocks are,
53+
# well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
54+
# then we also need to remove those lines from our generation code so we don't generate the same block
55+
# twice by accident.
56+
#
57+
# This is block generation is currently done via the "indent_hash" internally by starting at the outer
58+
# most indentation.
59+
#
60+
# Example:
61+
#
62+
# ```
63+
# def river
64+
# puts "lol" # <=== Start looking here and expand outwards
65+
# end
66+
# ```
67+
#
68+
# Generating new code blocks is a little verbose but looks like this:
69+
#
70+
# frontier << frontier.next_block if frontier.next_block?
71+
#
72+
# Once a block is in the frontier, it can be popped off:
73+
#
74+
# frontier.pop
75+
# # => <# CodeBlock >
76+
#
77+
# ## Block (frontier) storage, ordering and retrieval
78+
#
79+
# Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
80+
# The array is sorted by indentation order, so that when a block is popped off the array, the one with
81+
# the largest current indentation is evaluated first.
82+
#
83+
# For example, if we have these two blocks in the frontier:
84+
#
85+
# ```
86+
# # Block A - 0 spaces for indentation
87+
#
88+
# def cinco
89+
# puts "lol"
90+
# end
91+
# ```
92+
#
93+
# ```
94+
# # Block B - 2 spaces for indentation
95+
#
96+
# def river
97+
# puts "hehe"
98+
# end
99+
# ```
100+
#
101+
# The "Block B" has more current indentation, so it would be evaluated first.
102+
#
103+
# ## Frontier evaluation (Find the syntax error)
104+
#
105+
# Another key difference between this and a normal search "frontier" is that we're not checking if
106+
# an individual code block meets the goal (turning invalid code to valid code) since there can
107+
# be multiple syntax errors and this will require multiple code blocks. To handle this, we're
108+
# evaluating all the contents of the frontier at the same time to see if the solution exists in any
109+
# of our search blocks.
110+
#
111+
# # Using the previously generated frontier
112+
#
113+
# frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
114+
# frontier.holds_all_syntax_errors? # => false
115+
#
116+
# frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
117+
# frontier.holds_all_syntax_errors? # => true
118+
#
119+
# ## Detect invalid blocks (Filter for smallest solution)
120+
#
121+
# After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
122+
# Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
123+
# of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
124+
#
125+
# # Using the previously generated frontier
126+
#
127+
# frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
128+
# frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
129+
# frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
130+
# frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
131+
#
132+
# frontier.count # => 4
133+
# frontier.detect_invalid_blocks.length => 2
134+
# frontier.detect_invalid_blocks.map(&:to_s) # =>
135+
# [
136+
# "def dog\n",
137+
# "def cat\n"
138+
# ]
139+
#
140+
# Once invalid blocks are found and filtered, then they can be passed to a formatter.
3141
class CodeFrontier
4142
def initialize(code_lines: )
5143
@code_lines = code_lines
@@ -13,33 +151,36 @@ def initialize(code_lines: )
13151
end
14152
end
15153

154+
def count
155+
@frontier.count
156+
end
157+
16158
# Returns true if the document is valid with all lines
17159
# removed. By default it checks all blocks in present in
18160
# the frontier array, but can be used for arbitrary arrays
19161
# of codeblocks as well
20162
def holds_all_syntax_errors?(block_array = @frontier)
21-
lines = @code_lines
22-
block_array.each do |block|
23-
lines -= block.lines
163+
without_lines = block_array.map do |block|
164+
block.lines
24165
end
25166

26-
return true if lines.empty?
27-
28-
CodeBlock.new(
29-
code_lines: @code_lines,
30-
lines: lines
31-
).valid?
167+
SyntaxErrorSearch.valid_without?(
168+
without_lines: without_lines,
169+
code_lines: @code_lines
170+
)
32171
end
33172

34173
# Returns a code block with the largest indentation possible
35174
def pop
36175
return nil if empty?
37176

38-
self << next_block unless @indent_hash.empty?
39-
40177
return @frontier.pop
41178
end
42179

180+
def next_block?
181+
!@indent_hash.empty?
182+
end
183+
43184
def next_block
44185
indent = @indent_hash.keys.sort.last
45186
lines = @indent_hash[indent].first

lib/syntax_error_search/code_search.rb

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,27 @@
11
module SyntaxErrorSearch
2+
# Searches code for a syntax error
3+
#
4+
# The bulk of the heavy lifting is done by the CodeFrontier
5+
#
6+
# The flow looks like this:
7+
#
8+
# ## Syntax error detection
9+
#
10+
# When the frontier holds the syntax error, we can stop searching
11+
#
12+
#
13+
# search = CodeSearch.new(<<~EOM)
14+
# def dog
15+
# def lol
16+
# end
17+
# EOM
18+
#
19+
# search.call
20+
#
21+
# search.invalid_blocks.map(&:to_s) # =>
22+
# # => ["def lol\n"]
23+
#
24+
#
225
class CodeSearch
326
private; attr_reader :frontier; public
427
public; attr_reader :invalid_blocks
@@ -13,6 +36,8 @@ def initialize(string)
1336

1437
def call
1538
until frontier.holds_all_syntax_errors?
39+
frontier << frontier.next_block if frontier.next_block?
40+
1641
block = frontier.pop
1742

1843
if block.valid?
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
module SyntaxErrorSearch
2+
# Used for formatting invalid blocks
3+
class DisplayInvalidBlocks
4+
attr_reader :filename
5+
6+
def initialize(block_array, io: $stderr, filename: nil)
7+
@filename = filename
8+
@io = io
9+
@blocks = block_array
10+
@lines = @blocks.map(&:lines).flatten
11+
@digit_count = @lines.last.line_number.to_s.length
12+
@code_lines = @blocks.first.code_lines
13+
14+
@invalid_line_hash = @lines.each_with_object({}) {|line, h| h[line] = true}
15+
end
16+
17+
def call
18+
@io.puts <<~EOM
19+
20+
SyntaxErrorSearch: A syntax error was detected
21+
22+
This code has an unmatched `end` this is caused by either
23+
missing a syntax keyword (`def`, `do`, etc.) or inclusion
24+
of an extra `end` line
25+
26+
EOM
27+
@io.puts("file: #{filename}") if filename
28+
@io.puts <<~EOM
29+
simplified:
30+
31+
#{code_with_filename(indent: 2)}
32+
EOM
33+
end
34+
35+
36+
def code_with_filename(indent: 0)
37+
string = String.new("")
38+
string << "```\n"
39+
# string << "#".rjust(@digit_count) + " filename: #{filename}\n\n" if filename
40+
string << code_with_lines
41+
string << "```\n"
42+
43+
string.each_line.map {|l| " " * indent + l }.join
44+
end
45+
46+
def code_with_lines
47+
@code_lines.map do |line|
48+
next if line.hidden?
49+
number = line.line_number.to_s.rjust(@digit_count)
50+
if line.empty?
51+
"#{number.to_s}#{line}"
52+
else
53+
string = String.new
54+
string << "\e[1;3m" if @invalid_line_hash[line] # Bold, italics
55+
string << "#{number.to_s} "
56+
string << line.to_s
57+
string << "\e[0m"
58+
string
59+
end
60+
end.join
61+
end
62+
end
63+
end

0 commit comments

Comments
 (0)