@@ -42,11 +42,22 @@ internal class ParserStructure<in Output>(
4242}
4343
4444/* *
45- * Concatenates a list of parser structures into a single structure, processing them in reverse order.
46- * Simplifies the result by merging number spans and handling unconditional modifications.
45+ * Concatenates a list of parser structures into a single *valid* structure.
46+ *
47+ * A *valid* parser is one where, if numeric values are parsed consecutively without a separator
48+ * (or with zero-width [UnconditionalModification] separators) between them,
49+ * they are represented as a single [NumberSpanParserOperation].
4750 */
4851internal fun <T > List<ParserStructure<T>>.concat (): ParserStructure <T > {
49- // Invariant: only called when simplifiedParserStructure.operations is non-empty
52+ /* *
53+ * Returns a *valid* parser obtained by prepending [baseOperations] followed by [numberSpan]
54+ * to [simplifiedParserStructure],
55+ * while ensuring that [unconditionalModifications] are preserved in the result.
56+ *
57+ * Requirements:
58+ * - [simplifiedParserStructure] must have non-empty [ParserStructure.operations].
59+ * - [simplifiedParserStructure] is a *valid* parser.
60+ */
5061 fun mergeOperations (
5162 baseOperations : List <ParserOperation <T >>,
5263 numberSpan : List <NumberConsumer <T >>? ,
@@ -78,33 +89,48 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
7889 return ParserStructure (mergedOperations, simplifiedParserStructure.followedBy)
7990 }
8091
81- // Simplifies this parser and appends [other] to all execution paths.
82- // Merges number spans, collects unconditional modifications, and flattens alternatives.
92+ /* *
93+ * Returns a *valid* parser obtained by prepending *any* parser `this` to a *valid* parser [other].
94+ */
8395 fun ParserStructure<T>.simplifyAndAppend (other : ParserStructure <T >): ParserStructure <T > {
8496 val newOperations = mutableListOf<ParserOperation <T >>()
8597 var currentNumberSpan: MutableList <NumberConsumer <T >>? = null
8698 val unconditionalModifications = mutableListOf<UnconditionalModification <T >>()
8799
100+ // Loop invariant:
101+ //
102+ // |- zero-width parsers interspersing the number span
103+ // |
104+ // unconditionalModifications
105+ // \-------------------------/
106+ // operation, ..., operation, number, number, UnconditionalModification, number, operation, operation
107+ // \_______________________/ \______________ . . . . . . . . . . . . . ______/ \_______/
108+ // newOperations currentNumberSpan op
109+ // | | |- next operation
110+ // |- operations where spans of |- the continued span of
111+ // number parsers are merged into number parsers
112+ // `NumberSpanParserOperation`
88113 for (op in operations) {
89- if (op is NumberSpanParserOperation ) {
90- if (currentNumberSpan != null ) {
114+ when (op) {
115+ is NumberSpanParserOperation -> if (currentNumberSpan != null ) {
91116 currentNumberSpan.addAll(op.consumers)
92117 } else {
93118 currentNumberSpan = op.consumers.toMutableList()
94119 }
95- } else if (op is UnconditionalModification ) {
96- unconditionalModifications.add(op)
97- } else {
98- if (currentNumberSpan != null ) {
99- newOperations.add(NumberSpanParserOperation (currentNumberSpan))
100- currentNumberSpan = null
101- newOperations.addAll(unconditionalModifications)
102- unconditionalModifications.clear()
120+ is UnconditionalModification -> unconditionalModifications.add(op)
121+ else -> {
122+ if (currentNumberSpan != null ) {
123+ newOperations.add(NumberSpanParserOperation (currentNumberSpan))
124+ currentNumberSpan = null
125+ newOperations.addAll(unconditionalModifications)
126+ unconditionalModifications.clear()
127+ }
128+ newOperations.add(op)
103129 }
104- newOperations.add(op)
105130 }
106131 }
107132
133+ // *Valid* parsers resulting from appending [other] to every parser in `this.followedBy`.
108134 val mergedTails = followedBy.flatMap {
109135 val simplified = it.simplifyAndAppend(other)
110136 // Parser `ParserStructure(emptyList(), p)` is equivalent to `p`,
@@ -116,8 +142,12 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
116142 else
117143 listOf (simplified)
118144 }.ifEmpty {
145+ // We only enter this branch if [followedBy] is empty.
146+ // In that case, [mergedTails] is exactly `listOf(other)`.
147+ // We optimize this common case here as a fast-path and to reduce indirection in the resulting parser.
119148 if (other.operations.isNotEmpty()) {
120- // The invariant is preserved: other.operations is non-empty
149+ // Directly append `other` to the simplified `this`.
150+ // The call is valid: `other.operations` is non-empty
121151 return mergeOperations(newOperations, currentNumberSpan, unconditionalModifications, other)
122152 }
123153 // [other] has no operations, just alternatives; use them as our tails
@@ -156,6 +186,15 @@ internal fun <T> List<ParserStructure<T>>.concat(): ParserStructure<T> {
156186 }
157187 }
158188
189+ // Loop invariant:
190+ //
191+ // this = Parser, ..., Parser, operations, operations, operations, Parser, Parser, ...
192+ // \____/ \________________________________/ \_________________/
193+ // parser accumulatedOperations.reversed() result
194+ // | | |- simplified parser
195+ // | |- span of parsers without branching
196+ // |
197+ // |- next parser to be processed
159198 for (parser in this .asReversed()) {
160199 if (parser.followedBy.isEmpty()) {
161200 accumulatedOperations.add(parser.operations)
0 commit comments