11module SyntaxErrorSearch
22 # This class is responsible for generating, storing, and sorting code blocks
3+ #
4+ # The search algorithm for finding our syntax errors isn't in this class, but
5+ # this is class holds the bulk of the logic for generating, storing, detecting
6+ # and filtering invalid code.
7+ #
8+ # This is loosely based on the idea of a "frontier" for searching for a path
9+ # example: https://en.wikipedia.org/wiki/Dijkstra%27s_algorithm
10+ #
11+ # In this case our path is going from code with a syntax error to code without a
12+ # syntax error. We're currently doing that by evaluating individual lines
13+ # with respect to indentation and other whitespace (empty lines). As represented
14+ # by individual "code blocks".
15+ #
16+ # This class does not just store the frontier that we're searching, but is responsible
17+ # for generating new code blocks as well. This is not ideal, but the state of generating
18+ # and evaluating paths i.e. codeblocks is very tightly coupled.
19+ #
20+ # ## Creation
21+ #
22+ # This example code is re-used in the other sections
23+ #
24+ # Example:
25+ #
26+ # code_lines = [
27+ # CodeLine.new(line: "def cinco\n", index: 0)
28+ # CodeLine.new(line: " def dog\n", index: 1) # Syntax error 1
29+ # CodeLine.new(line: " def cat\n", index: 2) # Syntax error 2
30+ # CodeLine.new(line: "end\n", index: 3)
31+ # ]
32+ #
33+ # frontier = CodeFrontier.new(code_lines: code_lines)
34+ #
35+ # frontier << frontier.next_block if frontier.next_block?
36+ # frontier << frontier.next_block if frontier.next_block?
37+ #
38+ # frontier.holds_all_syntax_errors? # => true
39+ # block = frontier.pop
40+ # frontier.holds_all_syntax_errors? # => false
41+ # frontier << block
42+ # frontier.holds_all_syntax_errors? # => true
43+ #
44+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
45+ # [
46+ # "def dog\n",
47+ # "def cat\n"
48+ # ]
49+ #
50+ # ## Block Generation
51+ #
52+ # Currently code blocks are generated based off of indentation. With the idea that blocks are,
53+ # well, indented. Once a code block is added to the frontier or it is expanded, or it is generated
54+ # then we also need to remove those lines from our generation code so we don't generate the same block
55+ # twice by accident.
56+ #
57+ # This is block generation is currently done via the "indent_hash" internally by starting at the outer
58+ # most indentation.
59+ #
60+ # Example:
61+ #
62+ # ```
63+ # def river
64+ # puts "lol" # <=== Start looking here and expand outwards
65+ # end
66+ # ```
67+ #
68+ # Generating new code blocks is a little verbose but looks like this:
69+ #
70+ # frontier << frontier.next_block if frontier.next_block?
71+ #
72+ # Once a block is in the frontier, it can be popped off:
73+ #
74+ # frontier.pop
75+ # # => <# CodeBlock >
76+ #
77+ # ## Block (frontier) storage, ordering and retrieval
78+ #
79+ # Once a block is generated it is stored internally in a frontier array. This is very similar to a search algorithm.
80+ # The array is sorted by indentation order, so that when a block is popped off the array, the one with
81+ # the largest current indentation is evaluated first.
82+ #
83+ # For example, if we have these two blocks in the frontier:
84+ #
85+ # ```
86+ # # Block A - 0 spaces for indentation
87+ #
88+ # def cinco
89+ # puts "lol"
90+ # end
91+ # ```
92+ #
93+ # ```
94+ # # Block B - 2 spaces for indentation
95+ #
96+ # def river
97+ # puts "hehe"
98+ # end
99+ # ```
100+ #
101+ # The "Block B" has more current indentation, so it would be evaluated first.
102+ #
103+ # ## Frontier evaluation (Find the syntax error)
104+ #
105+ # Another key difference between this and a normal search "frontier" is that we're not checking if
106+ # an individual code block meets the goal (turning invalid code to valid code) since there can
107+ # be multiple syntax errors and this will require multiple code blocks. To handle this, we're
108+ # evaluating all the contents of the frontier at the same time to see if the solution exists in any
109+ # of our search blocks.
110+ #
111+ # # Using the previously generated frontier
112+ #
113+ # frontier << Block.new(lines: code_lines[1], code_lines: code_lines)
114+ # frontier.holds_all_syntax_errors? # => false
115+ #
116+ # frontier << Block.new(lines: code_lines[2], code_lines: code_lines)
117+ # frontier.holds_all_syntax_errors? # => true
118+ #
119+ # ## Detect invalid blocks (Filter for smallest solution)
120+ #
121+ # After we prove that a solution exists and we've found it to be in our frontier, we can start stop searching.
122+ # Once we've done this, we need to search through the existing frontier code blocks to find the minimum combination
123+ # of blocks that hold the solution. This is done in: `detect_invalid_blocks`.
124+ #
125+ # # Using the previously generated frontier
126+ #
127+ # frontier << CodeBlock.new(lines: code_lines[0], code_lines: code_lines)
128+ # frontier << CodeBlock.new(lines: code_lines[1], code_lines: code_lines)
129+ # frontier << CodeBlock.new(lines: code_lines[2], code_lines: code_lines)
130+ # frontier << CodeBlock.new(lines: code_lines[3], code_lines: code_lines)
131+ #
132+ # frontier.count # => 4
133+ # frontier.detect_invalid_blocks.length => 2
134+ # frontier.detect_invalid_blocks.map(&:to_s) # =>
135+ # [
136+ # "def dog\n",
137+ # "def cat\n"
138+ # ]
139+ #
140+ # Once invalid blocks are found and filtered, then they can be passed to a formatter.
3141 class CodeFrontier
4142 def initialize ( code_lines : )
5143 @code_lines = code_lines
@@ -13,33 +151,36 @@ def initialize(code_lines: )
13151 end
14152 end
15153
154+ def count
155+ @frontier . count
156+ end
157+
16158 # Returns true if the document is valid with all lines
17159 # removed. By default it checks all blocks in present in
18160 # the frontier array, but can be used for arbitrary arrays
19161 # of codeblocks as well
20162 def holds_all_syntax_errors? ( block_array = @frontier )
21- lines = @code_lines
22- block_array . each do |block |
23- lines -= block . lines
163+ without_lines = block_array . map do |block |
164+ block . lines
24165 end
25166
26- return true if lines . empty?
27-
28- CodeBlock . new (
29- code_lines : @code_lines ,
30- lines : lines
31- ) . valid?
167+ SyntaxErrorSearch . valid_without? (
168+ without_lines : without_lines ,
169+ code_lines : @code_lines
170+ )
32171 end
33172
34173 # Returns a code block with the largest indentation possible
35174 def pop
36175 return nil if empty?
37176
38- self << next_block unless @indent_hash . empty?
39-
40177 return @frontier . pop
41178 end
42179
180+ def next_block?
181+ !@indent_hash . empty?
182+ end
183+
43184 def next_block
44185 indent = @indent_hash . keys . sort . last
45186 lines = @indent_hash [ indent ] . first
0 commit comments