diff --git a/kson-lib/src/commonMain/kotlin/org/kson/Kson.kt b/kson-lib/src/commonMain/kotlin/org/kson/Kson.kt index 749c6747e..ad296f030 100644 --- a/kson-lib/src/commonMain/kotlin/org/kson/Kson.kt +++ b/kson-lib/src/commonMain/kotlin/org/kson/Kson.kt @@ -346,7 +346,7 @@ private fun convertTokens(internalTokens: List): List { contentStart = contentToken.lexeme.location.start } contentEnd = contentToken.lexeme.location.end - contentBuilder.append(contentToken.value) + contentBuilder.append(contentToken.lexeme.text) } // Add the open quote token diff --git a/kson-tooling-lib/src/commonMain/kotlin/org.kson/navigation/KsonValuePathBuilder.kt b/kson-tooling-lib/src/commonMain/kotlin/org.kson/navigation/KsonValuePathBuilder.kt index a6b9b2d12..8a215914f 100644 --- a/kson-tooling-lib/src/commonMain/kotlin/org.kson/navigation/KsonValuePathBuilder.kt +++ b/kson-tooling-lib/src/commonMain/kotlin/org.kson/navigation/KsonValuePathBuilder.kt @@ -189,7 +189,7 @@ class KsonValuePathBuilder(private val document: String, private val location: C targetNode is KsonObject && forDefinition -> { // Extract the property name from the token - val propertyName = lastToken.value + val propertyName = lastToken.lexeme.text path + propertyName } // Location is outside the token - target the parent element (for completions) @@ -245,4 +245,4 @@ class KsonValuePathBuilder(private val document: String, private val location: C } -} \ No newline at end of file +} diff --git a/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoDisplayTest.kt b/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoDisplayTest.kt index 107a4bdcf..589bb56a7 100644 --- a/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoDisplayTest.kt +++ b/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoDisplayTest.kt @@ -283,7 +283,7 @@ class SchemaInfoDisplayTest { """ // Get hover info for the name field (line 2, pointing to "John") - val hoverInfo = KsonTooling.getSchemaInfoAtLocation(document, schema, 2, 14) + val hoverInfo = KsonTooling.getSchemaInfoAtLocation(document, schema, 2, 15) assertNotNull(hoverInfo) assertTrue(hoverInfo.contains("User's full name")) assertTrue(hoverInfo.contains("*Type:* `string`")) diff --git a/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoLocationTest.kt b/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoLocationTest.kt index 6158e572f..eba8d35d1 100644 --- a/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoLocationTest.kt +++ b/kson-tooling-lib/src/commonTest/kotlin/org/kson/SchemaInfoLocationTest.kt @@ -149,7 +149,7 @@ class SchemaInfoLocationTest { """.trimIndent() val hoverInfo = getInfoAtCaret(schema, """ - email: 'user@example.com' + email: 'user@example.com' """.trimIndent()) assertNotNull(hoverInfo, "Expected hover info but got null") @@ -473,4 +473,4 @@ class SchemaInfoLocationTest { assertTrue(hoverInfo.contains("Item name from ref"), "Expected description from resolved ref. Got: $hoverInfo") assertTrue(hoverInfo.contains("*Type:* `string`"), "Expected type from resolved ref. Got: $hoverInfo") } -} \ No newline at end of file +} diff --git a/src/commonMain/kotlin/org/kson/ast/Ast.kt b/src/commonMain/kotlin/org/kson/ast/Ast.kt index 091381a0a..5e244b78c 100644 --- a/src/commonMain/kotlin/org/kson/ast/Ast.kt +++ b/src/commonMain/kotlin/org/kson/ast/Ast.kt @@ -10,10 +10,18 @@ import org.kson.parser.NumberParser import org.kson.parser.NumberParser.ParsedNumber import org.kson.tools.IndentType import org.kson.parser.Parser +import org.kson.parser.Token +import org.kson.parser.TokenType.COMMENT +import org.kson.parser.TokenType.WHITESPACE +import org.kson.parser.behavior.IdentityContentTransformer +import org.kson.parser.behavior.KsonContentTransformer import org.kson.parser.behavior.StringQuote import org.kson.parser.behavior.StringQuote.* import org.kson.parser.behavior.StringUnquoted +import org.kson.parser.behavior.embedblock.EmbedContentTransformer import org.kson.parser.behavior.embedblock.EmbedObjectKeys +import org.kson.parser.behavior.quotedstring.QuotedStringContentTransformer +import org.kson.stdlibx.exceptions.ShouldNotHappenException import org.kson.tools.FormattingStyle.* interface AstNode { @@ -36,6 +44,11 @@ interface AstNode { */ val location: Location + /** + * The sequence of tokens that defines this [AstNode] in the originally parsed source + */ + val sourceTokens: List + /** * Abstract representation of the indentation to apply when serializing an AST as source code */ @@ -91,7 +104,11 @@ interface AstNode { /** * Base [AstNode] to be subclassed by all Kson AST Node classes */ -sealed class AstNodeImpl(override val location: Location) : AstNode { +sealed class AstNodeImpl(override val sourceTokens: List) : AstNode { + override val location: Location by lazy { + Location.merge(sourceTokens.first().lexeme.location, sourceTokens.last().lexeme.location) + } + /** * Transpiles this [AstNode] to the given [compileTarget] source, respecting the configuration in the given * [CompileTarget] @@ -128,10 +145,11 @@ sealed class AstNodeImpl(override val location: Location) : AstNode { * two implementations: the concrete `Impl` version for valid [AstNode]s and the "shadow" `Error` implementation * which patches the AST with an [AstNodeError] where an [AstNodeImpl] would otherwise go */ -open class AstNodeError(private val invalidSource: String, location: Location) : AstNode, AstNodeImpl(location) { +open class AstNodeError(sourceTokens: List) : AstNode, AstNodeImpl(sourceTokens) { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { is Kson, is Yaml -> { + val invalidSource = sourceTokens.joinToString("") { it.lexeme.text } invalidSource.split("\n") .joinToString("\n") { line -> indent.firstLineIndent() + line @@ -146,8 +164,8 @@ open class AstNodeError(private val invalidSource: String, location: Location) : * the grammar documented on [Parser] */ interface KsonValueNode : AstNode -class KsonValueNodeError(content: String, location: Location) : KsonValueNode, AstNodeError(content, location) -abstract class KsonValueNodeImpl(location: Location) : KsonValueNode, AstNodeImpl(location) +class KsonValueNodeError(sourceTokens: List) : KsonValueNode, AstNodeError(sourceTokens) +abstract class KsonValueNodeImpl(sourceTokens: List) : KsonValueNode, AstNodeImpl(sourceTokens) /** * Any kson entity is either the [KsonRoot] of the document, an [ObjectPropertyNode] @@ -160,14 +178,14 @@ interface Documented { } interface KsonRoot : AstNode -class KsonRootError(content: String, location: Location) : KsonRoot, AstNodeError(content, location) +class KsonRootError(sourceTokens: List) : KsonRoot, AstNodeError(sourceTokens) class KsonRootImpl( val rootNode: KsonValueNode, private val trailingContent: List, override val comments: List, private val documentEndComments: List, - location: Location -) : KsonRoot, AstNodeImpl(location), Documented { + sourceTokens: List +) : KsonRoot, AstNodeImpl(sourceTokens), Documented { /** * Produces valid [compileTarget] source code for the AST rooted at this [KsonRoot] @@ -225,7 +243,7 @@ private fun isTrailingContent(nextNode: AstNode?): Boolean { return nextNode is KsonValueNode } -class ObjectNode(val properties: List, location: Location) : KsonValueNodeImpl(location) { +class ObjectNode(val properties: List, sourceTokens: List) : KsonValueNodeImpl(sourceTokens) { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { if (properties.isEmpty()) { return "${indent.firstLineIndent()}{}" @@ -332,10 +350,10 @@ class ObjectNode(val properties: List, location: Location) : } interface ObjectKeyNode : StringNode -class ObjectKeyNodeError(content: String, location: Location) : ObjectKeyNode, AstNodeError(content, location) +class ObjectKeyNodeError(sourceTokens: List) : ObjectKeyNode, AstNodeError(sourceTokens) class ObjectKeyNodeImpl( val key: StringNode -) : ObjectKeyNode, AstNodeImpl(key.location) { +) : ObjectKeyNode, AstNodeImpl(key.sourceTokens) { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { val keyOutput = key.toSourceWithNext(indent, null, compileTarget) return "$keyOutput:" @@ -343,14 +361,14 @@ class ObjectKeyNodeImpl( } interface ObjectPropertyNode : AstNode -class ObjectPropertyNodeError(content: String, location: Location) : ObjectPropertyNode, AstNodeError(content, location) +class ObjectPropertyNodeError(sourceTokens: List) : ObjectPropertyNode, AstNodeError(sourceTokens) class ObjectPropertyNodeImpl( val key: ObjectKeyNode, val value: KsonValueNode, override val comments: List, - location: Location + sourceTokens: List ) : - ObjectPropertyNode, AstNodeImpl(location), Documented { + ObjectPropertyNode, AstNodeImpl(sourceTokens), Documented { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { is Kson -> { @@ -424,8 +442,8 @@ class ObjectPropertyNodeImpl( class ListNode( val elements: List, - location: Location -) : KsonValueNodeImpl(location) { + sourceTokens: List +) : KsonValueNodeImpl(sourceTokens) { private sealed class ListDelimiters(val open: Char, val close: Char) { data object AngleBrackets : ListDelimiters('<', '>') data object SquareBrackets : ListDelimiters('[', ']') @@ -558,9 +576,11 @@ class ListNode( } interface ListElementNode : AstNode -class ListElementNodeError(content: String, location: Location) : AstNodeError(content, location), ListElementNode -class ListElementNodeImpl(val value: KsonValueNode, override val comments: List, location: Location) : - ListElementNode, AstNodeImpl(location), Documented { +class ListElementNodeError(sourceTokens: List) : AstNodeError(sourceTokens), ListElementNode +class ListElementNodeImpl(val value: KsonValueNode, + override val comments: List, + sourceTokens: List) : + ListElementNode, AstNodeImpl(sourceTokens), Documented { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { @@ -600,24 +620,41 @@ class ListElementNodeImpl(val value: KsonValueNode, override val comments: List< } interface StringNode : KsonValueNode -abstract class StringNodeImpl(location: Location) : StringNode, KsonValueNodeImpl(location) { +abstract class StringNodeImpl(sourceTokens: List) : StringNode, KsonValueNodeImpl(sourceTokens) { abstract val stringContent: String - val processedStringContent: String by lazy { - unescapeStringContent(stringContent) - } + abstract val processedStringContent: String + + abstract val contentTransformer: KsonContentTransformer } -/** - * Note: [ksonEscapedStringContent] is expected to be the exact content of a [stringQuote]-delimited [Kson] string, - * including all escapes, but excluding the outer quotes. A [Kson] string is escaped identically to a Json string, - * except that [Kson] allows raw whitespace to be embedded in strings - */ -open class QuotedStringNode( - private val ksonEscapedStringContent: String, - private val stringQuote: StringQuote, - location: Location -) : StringNodeImpl(location) { +class QuotedStringNode( + sourceTokens: List, + // TODO this should not be nullable + private val stringQuote: StringQuote?, +) : StringNodeImpl(sourceTokens) { + + override val contentTransformer: QuotedStringContentTransformer by lazy { + QuotedStringContentTransformer(stringContent, location) + } + + override val processedStringContent: String by lazy { + if (stringQuote != null) { + contentTransformer.processedContent + } else { + stringContent + + } + } + + /** + * Note: [stringContent] is the exact content of a [stringQuote]-delimited [Kson] string, + * including all escapes, but excluding the outer quotes. A [Kson] string is escaped identically to a Json string, + * except that [Kson] allows raw whitespace to be embedded in strings + */ + override val stringContent: String by lazy { + renderTokens(sourceTokens) + } /** * An "unquoted" Kson string: i.e. a valid Kson string with all escapes intact except for quote escapes. @@ -625,11 +662,7 @@ open class QuotedStringNode( * to obtain a fully valid KsonString */ private val unquotedString: String by lazy { - stringQuote.unescapeQuotes(ksonEscapedStringContent) - } - - override val stringContent: String by lazy { - unquotedString + stringQuote?.unescapeQuotes(stringContent) ?: stringContent } override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { @@ -668,7 +701,19 @@ open class QuotedStringNode( } } -class UnquotedStringNode(override val stringContent: String, location: Location) : StringNodeImpl(location) { +open class UnquotedStringNode(sourceTokens: List) : StringNodeImpl(sourceTokens) { + override val processedStringContent: String by lazy { + stringContent + } + + override val contentTransformer: IdentityContentTransformer by lazy { + IdentityContentTransformer(stringContent, location) + } + + override val stringContent: String by lazy { + renderTokens(sourceTokens) + } + val yamlReservedKeywords = setOf( // Boolean true values "y", "Y", "yes", "Yes", "YES", @@ -704,14 +749,23 @@ class UnquotedStringNode(override val stringContent: String, location: Location) } /** - * Callers are in charge of ensuring that `stringValue` is parseable by [NumberParser] + * Callers are in charge of ensuring that [sourceTokens] are fully valid, i.e. that the + * [stringValue] produced by them is parseable by [NumberParser] */ -class NumberNode(stringValue: String, location: Location) : KsonValueNodeImpl(location) { +class NumberNode(sourceTokens: List) : KsonValueNodeImpl(sourceTokens) { + val stringValue: String by lazy { + renderTokens(sourceTokens) + } + + val contentTransformer: IdentityContentTransformer by lazy { + IdentityContentTransformer(stringValue, location) + } + val value: ParsedNumber by lazy { val parsedNumber = NumberParser(stringValue).parse() parsedNumber.number ?: throw IllegalStateException( "Hitting this indicates a parser bug: unparseable " + - "strings should be passed here but we got: " + stringValue + "strings NEVER should be passed here but we got: " + stringValue ) } @@ -724,7 +778,13 @@ class NumberNode(stringValue: String, location: Location) : KsonValueNodeImpl(lo } } -class TrueNode(location: Location) : KsonValueNodeImpl(location) { +abstract class BooleanNode(sourceTokens: List) : KsonValueNodeImpl(sourceTokens) { + abstract val value: Boolean +} + +class TrueNode(sourceTokens: List) : BooleanNode(sourceTokens) { + override val value = true + override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { is Kson, is Yaml, is Json -> { @@ -734,7 +794,9 @@ class TrueNode(location: Location) : KsonValueNodeImpl(location) { } } -class FalseNode(location: Location) : KsonValueNodeImpl(location) { +class FalseNode(sourceTokens: List) : BooleanNode(sourceTokens) { + override val value = false + override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { is Kson, is Yaml, is Json -> { @@ -744,7 +806,7 @@ class FalseNode(location: Location) : KsonValueNodeImpl(location) { } } -class NullNode(location: Location) : KsonValueNodeImpl(location) { +class NullNode(sourceTokens: List) : KsonValueNodeImpl(sourceTokens) { override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { is Kson, is Yaml, is Json -> { @@ -758,16 +820,13 @@ class EmbedBlockNode( val embedTagNode: StringNodeImpl?, val metadataTagNode: StringNodeImpl?, val embedContentNode: StringNodeImpl, - embedDelim: EmbedDelim, - location: Location + sourceTokens: List ) : - KsonValueNodeImpl(location) { + KsonValueNodeImpl(sourceTokens) { - private val embedTag: String = embedTagNode?.stringContent ?: "" - private val metadataTag: String = metadataTagNode?.stringContent ?: "" - private val embedContent: String by lazy { - embedDelim.unescapeEmbedContent(embedContentNode.stringContent) - } + private val embedTag: String = embedTagNode?.processedStringContent ?: "" + private val metadataTag: String = metadataTagNode?.processedStringContent ?: "" + private val embedContent: String = embedContentNode.processedStringContent override fun toSourceInternal(indent: Indent, nextNode: AstNode?, compileTarget: CompileTarget): String { return when (compileTarget) { @@ -925,3 +984,41 @@ class EmbedBlockNode( } } } + +class EmbedBlockContentNode( + sourceTokens: List, + private val embedDelim: EmbedDelim +) : StringNodeImpl(sourceTokens) { + override val stringContent: String by lazy { + renderTokens(sourceTokens) + } + + override val contentTransformer: EmbedContentTransformer by lazy { + EmbedContentTransformer( + rawContent = stringContent, + embedDelim = embedDelim, + rawLocation = this.location + ) + } + + override val processedStringContent: String by lazy { + contentTransformer.processedContent + } + + override fun toSourceInternal( + indent: Indent, + nextNode: AstNode?, + compileTarget: CompileTarget + ): String { + /** + * [EmbedBlockNode] renders this using its [processedStringContent] property + */ + throw ShouldNotHappenException("this node is render by ${EmbedBlockNode::class.simpleName}") + } +} + +private fun renderTokens(sourceTokens: List): String { + return sourceTokens + .filter { it.tokenType != WHITESPACE && it.tokenType != COMMENT } + .joinToString("") { it.lexeme.text } +} diff --git a/src/commonMain/kotlin/org/kson/ast/Escaping.kt b/src/commonMain/kotlin/org/kson/ast/Escaping.kt index 3955a15b6..a2eb3dccc 100644 --- a/src/commonMain/kotlin/org/kson/ast/Escaping.kt +++ b/src/commonMain/kotlin/org/kson/ast/Escaping.kt @@ -96,110 +96,3 @@ private fun appendSurrogatePair(sb: StringBuilder, codePoint: Int) { appendUnicodeEscape(sb, high) appendUnicodeEscape(sb, low) } - -/** - * Unescape a string by converting escape sequences back to their original characters. - * This is the reverse operation of [renderForJsonString]. - * - * @param stringContent to unescape - * @return the unescaped string - */ -fun unescapeStringContent(stringContent: String): String { - val sb = StringBuilder(stringContent.length) - var i = 0 - - while (i < stringContent.length) { - val char = stringContent[i] - - if (char == '\\' && i + 1 < stringContent.length) { - when (val escaped = stringContent[i + 1]) { - '"', '\\', '/' -> { - sb.append(escaped) - i += 2 - } - 'b' -> { - sb.append('\b') - i += 2 - } - 'f' -> { - sb.append('\u000C') - i += 2 - } - 'n' -> { - sb.append('\n') - i += 2 - } - 'r' -> { - sb.append('\r') - i += 2 - } - 't' -> { - sb.append('\t') - i += 2 - } - 'u' -> { - val (chars, consumed) = handleUnicodeEscape(stringContent.substring(i)) - for (c in chars) { - sb.append(c) - } - i += consumed - } - else -> { - // Unknown escape sequence, append backslash as is - sb.append(char) - i++ - } - } - } else { - sb.append(char) - i++ - } - } - - return sb.toString() -} - -/** - * Handles Unicode escape sequences including surrogate pairs. - * - * @param input the string containing the Unicode escape starting with \u - * @return Pair of (characters produced, characters consumed from input) - */ -private fun handleUnicodeEscape(input: String): Pair { - // Check if we have enough characters for a Unicode escape (\uXXXX = 6 chars) - if (input.length < 6) { - // Not enough characters for a valid Unicode escape - return Pair(charArrayOf('\\'), 1) - } - - // Check if this is actually a Unicode escape - if (input[0] != '\\' || input[1] != 'u') { - return Pair(charArrayOf('\\'), 1) - } - - val hexStr = input.substring(2, 6) - val codePoint = hexStr.toIntOrNull(16) ?: run { - // Invalid hex sequence, return backslash - return Pair(charArrayOf('\\'), 1) - } - - // Check for high surrogate - if (codePoint.toChar().isHighSurrogate()) { - // Look for low surrogate - if (input.length >= 12 && - input[6] == '\\' && - input[7] == 'u') { - - val lowHexStr = input.substring(8, 12) - val lowCodePoint = lowHexStr.toIntOrNull(16) - - if (lowCodePoint != null && lowCodePoint.toChar().isLowSurrogate()) { - // Valid surrogate pair - return both surrogates and consumed 12 chars - return Pair(charArrayOf(codePoint.toChar(), lowCodePoint.toChar()), 12) - } - } - } - - // Regular Unicode character or unpaired surrogate - consumed 6 chars - return Pair(charArrayOf(codePoint.toChar()), 6) -} diff --git a/src/commonMain/kotlin/org/kson/parser/KsonBuilder.kt b/src/commonMain/kotlin/org/kson/parser/KsonBuilder.kt index db28b8a76..6c72c1764 100644 --- a/src/commonMain/kotlin/org/kson/parser/KsonBuilder.kt +++ b/src/commonMain/kotlin/org/kson/parser/KsonBuilder.kt @@ -41,14 +41,8 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo } }) - override fun getValue(firstTokenIndex: Int, lastTokenIndex: Int): String { + override fun getSourceTokens(firstTokenIndex: Int, lastTokenIndex: Int): List { return tokens.subList(firstTokenIndex, lastTokenIndex + 1) - .filter { it.tokenType != WHITESPACE && it.tokenType != COMMENT } - .joinToString("") { it.value } - } - - override fun getRawText(firstTokenIndex: Int, lastTokenIndex: Int): String { - return tokens.subList(firstTokenIndex, lastTokenIndex + 1).joinToString("") { it.lexeme.text } } override fun getLocation(firstTokenIndex: Int, lastTokenIndex: Int): Location { @@ -115,7 +109,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo if (!ignoreErrors) { walkForMessages(rootMarker, messageSink) } - return unsafeAstCreate(rootMarker) { KsonRootError(it, rootMarker.getLocation()) } + return unsafeAstCreate(rootMarker) { KsonRootError(it) } } /** @@ -172,19 +166,19 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo throw ShouldNotHappenException("These tokens do not generate their own AST nodes") } FALSE -> { - FalseNode(marker.getLocation()) + FalseNode(marker.getSourceTokens()) } UNQUOTED_STRING -> { - UnquotedStringNode(marker.getValue(), marker.getLocation()) + UnquotedStringNode(marker.getSourceTokens()) } NULL -> { - NullNode(marker.getLocation()) + NullNode(marker.getSourceTokens()) } NUMBER -> { - NumberNode(marker.getValue(), marker.getLocation()) + NumberNode(marker.getSourceTokens()) } TRUE -> { - TrueNode(marker.getLocation()) + TrueNode(marker.getSourceTokens()) } else -> { // Kotlin seems to having trouble validating that our when is exhaustive here, so we @@ -201,30 +195,33 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo * [Parser.embedBlock] ensures we always find an [EMBED_OPEN_DELIM], * here, which we use to understand which [EmbedDelim] is in use here */ - val embedDelimChar = childMarkers.find { it.element == EMBED_OPEN_DELIM }?.getValue() + val embedDelimChar = childMarkers.find { it.element == EMBED_OPEN_DELIM }?.getRawText() ?: throw ShouldNotHappenException("The parser should have ensured we could find an open delim here") val embedTagNode = childMarkers.find { it.element == EMBED_TAG }?.let{ QuotedStringNode( - StringQuote.SingleQuote.escapeQuotes(it.getValue()), - StringQuote.SingleQuote, - it.getLocation() + it.getSourceTokens(), + // TODO this needs to be formalized as a "quoted" string, even if the quotes are novel + // then this should pass the quote type, not null + null ) } val metadataNode = childMarkers.find { it.element == EMBED_METADATA }?.let{ QuotedStringNode( - StringQuote.SingleQuote.escapeQuotes(it.getValue()), - StringQuote.SingleQuote, - it.getLocation() + it.getSourceTokens(), + // TODO this needs to be formalized as a "quoted" string, even if the quotes are novel + // then this should pass the quote type, not null + null ) } + val embedDelim = EmbedDelim.fromString(embedDelimChar) + val embedContentNode = childMarkers.find { it.element == EMBED_CONTENT }?.let{ - QuotedStringNode( - StringQuote.SingleQuote.escapeQuotes(it.getValue()), - StringQuote.SingleQuote, - it.getLocation() + EmbedBlockContentNode( + it.getSourceTokens(), + embedDelim ) } ?: throw ShouldNotHappenException("Embed block should always have embed content") @@ -232,8 +229,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo embedTagNode, metadataNode, embedContentNode, - EmbedDelim.fromString(embedDelimChar), - marker.getLocation()) + marker.getSourceTokens()) } OBJECT_KEY -> { /** @@ -242,7 +238,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo */ val stringContentMark = childMarkers.first() val objectKey = when (stringContentMark.element) { - UNQUOTED_STRING -> UnquotedStringNode(stringContentMark.getValue(), marker.getLocation()) + UNQUOTED_STRING -> UnquotedStringNode(stringContentMark.getSourceTokens()) QUOTED_STRING -> quoteStringToStringNode(stringContentMark) else -> { throw ShouldNotHappenException("unless our assumptions about keyword parsing have been invalidated") @@ -253,37 +249,40 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo DASH_LIST, DASH_DELIMITED_LIST, BRACKET_LIST -> { val listElementNodes = childMarkers.map { listElementMarker -> unsafeAstCreate(listElementMarker) { - ListElementNodeError(it, listElementMarker.getLocation()) + ListElementNodeError(it) } } - ListNode(listElementNodes, marker.getLocation()) + ListNode( + listElementNodes, + marker.getSourceTokens() + ) } LIST_ELEMENT -> { val comments = marker.getComments() val listElementValue: KsonValueNode = if (childMarkers.size == 1) { - unsafeAstCreate(childMarkers.first()) { KsonValueNodeError(it, childMarkers.first().getLocation()) } + unsafeAstCreate(childMarkers.first()) { KsonValueNodeError(it) } } else { throw FatalParseException("list element markers should mark exactly one value") } ListElementNodeImpl( listElementValue, comments, - marker.getLocation()) + marker.getSourceTokens()) } OBJECT -> { val propertyNodes = childMarkers.map { property -> unsafeAstCreate(property) { - ObjectPropertyNodeError(it, property.getLocation()) + ObjectPropertyNodeError(it) } } - val embedBlockNode = decodeEmbedBlock(propertyNodes, marker.getLocation()) + val embedBlockNode = decodeEmbedBlock(propertyNodes, marker) if (embedBlockNode != null) { return embedBlockNode } - ObjectNode(propertyNodes, marker.getLocation()) + ObjectNode(propertyNodes, marker.getSourceTokens()) } OBJECT_PROPERTY -> { val comments = marker.getComments() @@ -297,24 +296,24 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo val keywordMark = childMarkers.getOrNull(0) ?: throw ShouldNotHappenException("should have a keyword marker") val keyNode: ObjectKeyNode = unsafeAstCreate(keywordMark) { - ObjectKeyNodeError(it, keywordMark.getLocation()) + ObjectKeyNodeError(it) } val valueMark = childMarkers.getOrNull(1) val ksonValueNode: KsonValueNode = if (valueMark == null) { - KsonValueNodeError("", marker.getLocation()) + KsonValueNodeError(marker.getSourceTokens()) } else { unsafeAstCreate(valueMark) { - KsonValueNodeError(it, marker.getLocation()) + KsonValueNodeError(it) } } if (keyNode is ObjectKeyNodeError || ksonValueNode is KsonValueNodeError) { - ObjectPropertyNodeError(marker.getRawText().trim(), marker.getLocation()) + ObjectPropertyNodeError(marker.getSourceTokens().dropLastWhile { it.tokenType == WHITESPACE }) } else { ObjectPropertyNodeImpl( keyNode, ksonValueNode, comments, - marker.getLocation() + marker.getSourceTokens() ) } } @@ -335,12 +334,12 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo } val rootNode = unsafeAstCreate(childMarkers[0]) { - KsonValueNodeError(it, marker.getLocation()) + KsonValueNodeError(it) } val erroneousTrailingContent = childMarkers.drop(1).map { unsafeAstCreate(it) { - KsonValueNodeError(it, marker.getLocation()) + KsonValueNodeError(it) } } @@ -348,7 +347,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo erroneousTrailingContent, comments, eofToken.comments, - marker.getLocation()) + marker.getSourceTokens()) } else -> { // Kotlin seems to having trouble validating that our when is exhaustive here, so we @@ -374,7 +373,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo */ private fun decodeEmbedBlock( propertyNodes: List, - location: Location + marker: KsonMarker ): EmbedBlockNode? { if (propertyNodes.size > 3){ return null } @@ -382,7 +381,7 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo val propertiesMap = propertyNodes.mapNotNull { prop -> (prop as? ObjectPropertyNodeImpl)?.let { property -> val keyString = (property.key as? ObjectKeyNodeImpl)?.key as? StringNodeImpl - keyString?.stringContent?.let { key -> + keyString?.processedStringContent?.let { key -> key to (property.value as? StringNodeImpl) } } @@ -398,19 +397,12 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo val embedContentProperty = propertiesMap[EmbedObjectKeys.EMBED_CONTENT.key] ?: throw ShouldNotHappenException("should have been validated for nullability above") - val escapedContent = EmbedDelim.Percent.escapeEmbedContent(embedContentProperty.processedStringContent) - val embedContentValue = QuotedStringNode( - StringQuote.SingleQuote.escapeQuotes(escapedContent), - StringQuote.SingleQuote, - embedContentProperty.location - ) return EmbedBlockNode( embedTagValue, embedMetadataValue, - embedContentValue, - EmbedDelim.Percent, - location + embedContentProperty, + marker.getSourceTokens() ) } @@ -420,14 +412,18 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo */ private fun quoteStringToStringNode(marker: KsonMarker): QuotedStringNode { /** - * [Parser.string] ensures that a [QUOTED_STRING] contains its [STRING_OPEN_QUOTE] - * and [STRING_CLOSE_QUOTE] + * [Parser.string] ensures that a [QUOTED_STRING] starts with [STRING_OPEN_QUOTE] */ - val quotedString = marker.getValue() + val quotedString = marker.getRawText() val stringDelim = quotedString.first() - val stringContent = quotedString.drop(1).dropLast(1) - return QuotedStringNode(stringContent, StringQuote.fromChar(stringDelim), marker.getLocation()) + val stringContentTokens = marker.getSourceTokens() + // drop the open quote token + .drop(1) + // and take everything up to the close quote (or the end, whichever comes first) + .takeWhile { it.tokenType != TokenType.STRING_CLOSE_QUOTE } + + return QuotedStringNode(stringContentTokens, StringQuote.fromChar(stringDelim)) } /** @@ -440,9 +436,9 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo * @param errorNodeGenerator a lambda to wrap an [ERROR] [marker]'s content in the appropriately typed * [AstNodeError] to be used in place of the node we can't create */ - private fun unsafeAstCreate(marker: KsonMarker, errorNodeGenerator: (errorContent: String) -> A): A { + private fun unsafeAstCreate(marker: KsonMarker, errorNodeGenerator: (errorContent: List) -> A): A { if (marker.element == ERROR ) { - return errorNodeGenerator(marker.getRawText()) + return errorNodeGenerator(marker.getSourceTokens()) } val nodeToCast = toAst(marker) @Suppress("UNCHECKED_CAST") // see method doc for suppress rationale @@ -461,14 +457,9 @@ class KsonBuilder(private val tokens: List, private val ignoreErrors: Boo */ private interface MarkerBuilderContext { /** - * Get the parsed [String] value for the range of tokens from [firstTokenIndex] to [lastTokenIndex], inclusive + * Get the underlying source [Token]s for the range [firstTokenIndex] to [lastTokenIndex], inclusive */ - fun getValue(firstTokenIndex: Int, lastTokenIndex: Int): String - - /** - * Get the raw underlying text for the range of tokens from [firstTokenIndex] to [lastTokenIndex], inclusive - */ - fun getRawText(firstTokenIndex: Int, lastTokenIndex: Int): String + fun getSourceTokens(firstTokenIndex: Int, lastTokenIndex: Int): List /** * Get the location of the underlying text for the range of tokens from [firstTokenIndex] to [lastTokenIndex], @@ -527,15 +518,11 @@ private class KsonMarker(private val context: MarkerBuilderContext, private val } fun getRawText(): String { - return context.getRawText(this.firstTokenIndex, this.lastTokenIndex) + return context.getSourceTokens(this.firstTokenIndex, this.lastTokenIndex).joinToString("") { it.lexeme.text } } - fun getValue(): String { - return context.getValue(this.firstTokenIndex, this.lastTokenIndex) - } - - fun getLocation() : Location { - return context.getLocation(this.firstTokenIndex, this.lastTokenIndex) + fun getSourceTokens(): List { + return context.getSourceTokens(this.firstTokenIndex, this.lastTokenIndex) } /** @@ -673,4 +660,4 @@ private class KsonMarker(private val context: MarkerBuilderContext, private val markedError = CoreParseMessage(message) done(ERROR) } -} \ No newline at end of file +} diff --git a/src/commonMain/kotlin/org/kson/parser/Lexer.kt b/src/commonMain/kotlin/org/kson/parser/Lexer.kt index d15f9db0f..31e112db9 100644 --- a/src/commonMain/kotlin/org/kson/parser/Lexer.kt +++ b/src/commonMain/kotlin/org/kson/parser/Lexer.kt @@ -6,7 +6,6 @@ import org.kson.stdlibx.collections.toImmutableMap import org.kson.parser.TokenType.* import org.kson.parser.behavior.StringUnquoted import org.kson.parser.behavior.embedblock.EmbedDelim.* -import org.kson.parser.behavior.embedblock.EmbedBlockIndent import org.kson.stdlibx.exceptions.ShouldNotHappenException private val KEYWORDS = @@ -250,10 +249,6 @@ data class Token( * The [Lexeme] (raw token text and original location) for this token */ val lexeme: Lexeme, - /** - * The final lexed [value] of this token, extracted (and possibly transformed) from [lexeme] - */ - val value: String, /** * The comments that the scanner found for this token. * @@ -324,7 +319,7 @@ class Lexer(source: String, gapFree: Boolean = false) { scan() } - addToken(EOF, Lexeme("", sourceScanner.currentLocation()), "") + addToken(EOF, Lexeme("", sourceScanner.currentLocation())) return tokens.toList() } @@ -391,7 +386,7 @@ class Lexer(source: String, gapFree: Boolean = false) { private fun comment(): String { val commentToken = extractCommentToken() tokens.add(commentToken) - return commentToken.value + return commentToken.lexeme.text } /** @@ -402,7 +397,7 @@ class Lexer(source: String, gapFree: Boolean = false) { while (sourceScanner.peek() != '\n' && !sourceScanner.eof()) sourceScanner.advance() val commentLexeme = sourceScanner.extractLexeme() - return Token(COMMENT, commentLexeme, commentLexeme.text, emptyList()) + return Token(COMMENT, commentLexeme, emptyList()) } /** @@ -424,11 +419,21 @@ class Lexer(source: String, gapFree: Boolean = false) { val lexeme = sourceScanner.extractLexeme() val type: TokenType = KEYWORDS[lexeme.text] ?: UNQUOTED_STRING - addToken(type, lexeme, lexeme.text) + addToken(type, lexeme) } private fun quotedString(delimiter: Char) { var hasUntokenizedStringCharacters = false + + if (sourceScanner.peek() == delimiter || sourceScanner.eof()) { + // empty string + addLiteralToken(STRING_CONTENT) + if (sourceScanner.peek() == delimiter) { + sourceScanner.advance() + addLiteralToken(STRING_CLOSE_QUOTE) + } + return + } while (sourceScanner.peek() != delimiter) { val nextStringChar = sourceScanner.peek() ?: break @@ -525,9 +530,7 @@ class Lexer(source: String, gapFree: Boolean = false) { // extract our embed tag (note: may be empty, that's supported) val embedTagLexeme = sourceScanner.extractLexeme() addToken( - EMBED_TAG, embedTagLexeme, - // trim any trailing whitespace from the embed tag's value - embedTagLexeme.text.trim() + EMBED_TAG, embedTagLexeme ) if(sourceScanner.peek() == ':') { @@ -542,9 +545,7 @@ class Lexer(source: String, gapFree: Boolean = false) { // extract our embed metadata (note: may be empty, that's supported) val embedMetadataLexeme = sourceScanner.extractLexeme() addToken( - EMBED_METADATA, embedMetadataLexeme, - // trim any trailing whitespace from the embed tag's value - embedMetadataLexeme.text.trim() + EMBED_METADATA, embedMetadataLexeme ) } @@ -574,9 +575,7 @@ class Lexer(source: String, gapFree: Boolean = false) { } val embedBlockLexeme = sourceScanner.extractLexeme() - - val trimmedEmbedBlockContent = EmbedBlockIndent(embedBlockLexeme.text).trimMinimumIndent() - addToken(EMBED_CONTENT, embedBlockLexeme, trimmedEmbedBlockContent) + addToken(EMBED_CONTENT, embedBlockLexeme) /** * We scanned everything that wasn't an [TokenType.EMBED_CLOSE_DELIM] into our embed content, @@ -614,7 +613,7 @@ class Lexer(source: String, gapFree: Boolean = false) { */ private fun addLiteralToken(tokenType: TokenType): Location { val lexeme = sourceScanner.extractLexeme() - addToken(tokenType, lexeme, lexeme.text) + addToken(tokenType, lexeme) return lexeme.location } @@ -623,11 +622,11 @@ class Lexer(source: String, gapFree: Boolean = false) { * * @return the location of the added [Token] */ - private fun addToken(type: TokenType, lexeme: Lexeme, value: String): Location { + private fun addToken(type: TokenType, lexeme: Lexeme): Location { val commentMetadata = commentMetadataForCurrentToken(type) - tokens.add(Token(type, lexeme, value, commentMetadata.comments)) + tokens.add(Token(type, lexeme, commentMetadata.comments)) for (commentLookaheadTokens in commentMetadata.lookaheadTokens) { tokens.add(commentLookaheadTokens) @@ -675,7 +674,6 @@ class Lexer(source: String, gapFree: Boolean = false) { Token( WHITESPACE, whitespaceLexeme, - whitespaceLexeme.text, emptyList() ) ) @@ -683,7 +681,7 @@ class Lexer(source: String, gapFree: Boolean = false) { val trailingComment = if (sourceScanner.peek() == '#') { val commentToken = extractCommentToken() trailingCommentTokens.add(commentToken) - commentToken.value + commentToken.lexeme.text } else { "" } diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/KsonContentTransformer.kt b/src/commonMain/kotlin/org/kson/parser/behavior/KsonContentTransformer.kt new file mode 100644 index 000000000..3a495f435 --- /dev/null +++ b/src/commonMain/kotlin/org/kson/parser/behavior/KsonContentTransformer.kt @@ -0,0 +1,132 @@ +package org.kson.parser.behavior + +import org.kson.parser.Coordinates +import org.kson.parser.Location + +/** + * [KsonContentTransformer] is responsible for taking [rawContent] (content directly from a source + * KSON file) and performing transformations (such as unescaping and/or indent stripping) to transform + * it into the [processedContent] value that this [rawContent] represents. + * + * In addition to this [rawContent] -> [processedContent] transformation, [KsonContentTransformer] maintains the + * state needed to act as a source map from [processedContent] -> [rawContent] allowing [Location]s relative + * to [processedContent] to be mapped back their [Location]s in [rawContent] (and hence the source KSON document). + * This is key for bubbling messages up from sub-parsers that may log messages on the embedded content up to + * the original source for that embedded content. + * + * @param rawContent The raw content from the original KSON document + * @param rawLocation Where rawContent exists in the original KSON document + */ +abstract class KsonContentTransformer( + protected val rawContent: String, + protected val rawLocation: Location +) { + /** + * The processed content after all transformations + */ + abstract val processedContent: String + + /** + * Maps a position range in the processed content to a [Location] in the original KSON source. + * + * @param processedStart Character offset in processedContent (0-based) + * @param processedEnd Character offset in processedContent (0-based, exclusive) + * @return Location in the original KSON source document + */ + fun mapToOriginal(processedStart: Int, processedEnd: Int): Location { + // Step 1: Map offsets through the transformation pipeline (reverse) + val rawStart = mapProcessedOffsetToRawOffset(processedStart) + val rawEnd = mapProcessedOffsetToRawOffset(processedEnd) + + // Step 2: Compute line/column coordinates within rawContent + val startCoordsInRaw = computeCoordinatesInString(rawContent, rawStart) + val endCoordsInRaw = computeCoordinatesInString(rawContent, rawEnd) + + // Step 3: Add these offsets to the base location's coordinates + val finalStart = addCoordinates(rawLocation.start, startCoordsInRaw) + val finalEnd = addCoordinates(rawLocation.start, endCoordsInRaw) + + // Step 4: Compute absolute character offsets in the original document + val finalStartOffset = rawLocation.startOffset + rawStart + val finalEndOffset = rawLocation.startOffset + rawEnd + + return Location(finalStart, finalEnd, finalStartOffset, finalEndOffset) + } + + /** + * Maps a position range specified as line/column coordinates in the processed content to a Location in the + * original KSON source. + * + * @param startLine Line number in processedContent (0-based) + * @param startColumn Column number in processedContent (0-based) + * @param endLine Line number in processedContent (0-based) + * @param endColumn Column number in processedContent (0-based) + * @return Location in the original KSON source document + */ + fun mapToOriginal(startLine: Int, startColumn: Int, endLine: Int, endColumn: Int): Location { + val processedStart = computeOffsetFromCoordinates(processedContent, startLine, startColumn) + val processedEnd = computeOffsetFromCoordinates(processedContent, endLine, endColumn) + return mapToOriginal(processedStart, processedEnd) + } + + /** + * Maps a single offset in processed content to an offset in raw content. + * + * Subclasses implement this to handle their specific transformation logic + * (e.g., escaping, indent trimming, or both). + */ + protected abstract fun mapProcessedOffsetToRawOffset(processedOffset: Int): Int + + /** + * Computes line/column coordinates for a given offset within a string. + */ + protected fun computeCoordinatesInString(text: String, offset: Int): Coordinates { + val upToOffset = text.take(offset) + val line = upToOffset.count { it == '\n' } + val lastNewline = upToOffset.lastIndexOf('\n') + val column = if (lastNewline == -1) offset else offset - lastNewline - 1 + return Coordinates(line, column) + } + + /** + * Computes a character offset from line/column coordinates within a string. + */ + protected fun computeOffsetFromCoordinates(text: String, line: Int, column: Int): Int { + var currentLine = 0 + var offset = 0 + + while (currentLine < line && offset < text.length) { + if (text[offset] == '\n') { + currentLine++ + } + offset++ + } + + return offset + column + } + + /** + * Adds coordinate offsets to a base coordinate + */ + protected fun addCoordinates(base: Coordinates, offset: Coordinates): Coordinates { + return if (offset.line == 0) { + // No change to the line, so just add columns + Coordinates(base.line, base.column + offset.column) + } else { + // Offset puts us on a later line, so offset.column is the absolute column on that line + Coordinates(base.line + offset.line, offset.column) + } + } +} + +/** + * A [KsonContentTransformer] for content that performs the "identity" transformation, i.e. no transformation + */ +class IdentityContentTransformer(content: String, location: Location) + : KsonContentTransformer(content, location) { + override val processedContent = content + + override fun mapProcessedOffsetToRawOffset(processedOffset: Int): Int { + return processedOffset + } +} diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/StringQuote.kt b/src/commonMain/kotlin/org/kson/parser/behavior/StringQuote.kt index a1302b7a4..b36d1a19f 100644 --- a/src/commonMain/kotlin/org/kson/parser/behavior/StringQuote.kt +++ b/src/commonMain/kotlin/org/kson/parser/behavior/StringQuote.kt @@ -44,6 +44,7 @@ sealed class StringQuote(private val quoteChar: Char) { * @return a copy of [escapedContent] with all delimiter escapes processed */ fun unescapeQuotes(escapedContent: String): String { + // unescape any escaped internal quotes return escapedContent.replace(escapedDelimiterString, delimiterString) } diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedBlockIndent.kt b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedBlockIndent.kt index 6a55b0b74..eedeafd21 100644 --- a/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedBlockIndent.kt +++ b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedBlockIndent.kt @@ -64,4 +64,4 @@ class EmbedBlockIndent(embedContent: String) { private fun isInlineWhitespace(char: Char?): Boolean { return char == ' ' || char == '\r' || char == '\t' } -} \ No newline at end of file +} diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformer.kt b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformer.kt new file mode 100644 index 000000000..19876d226 --- /dev/null +++ b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformer.kt @@ -0,0 +1,83 @@ +package org.kson.parser.behavior.embedblock + +import org.kson.parser.Location +import org.kson.parser.behavior.KsonContentTransformer + +/** + * A [KsonContentTransformer] for Embed Blocks, handling the processing from raw KSON source to actual String + * value, and maintaining a [Location] source-map back + * + * @param rawContent The raw embed content from the original KSON document + * @param embedDelim The delimiter used to delimit [rawContent] (needed to perform unescaping) + * @param rawLocation Where rawContent exists in the original KSON document + */ +class EmbedContentTransformer( + rawContent: String, + embedDelim: EmbedDelim, + rawLocation: Location +) : KsonContentTransformer(rawContent, rawLocation) { + + override val processedContent: String + + /** + * [sortedEscapeOffsets] and [minIndent] are all the state needed to perform source mapping + * from [processedContent] back to [rawContent] + */ + private val sortedEscapeOffsets: List = embedDelim.findEscapePositions(rawContent).toList() + private val minIndent: Int + + init { + /** + * Transformation pipeline: + * 1. Unescape the content + * 2. Trim the minimum indent + */ + val unescapedContent = embedDelim.unescapeEmbedContent(rawContent) + val indentTrimmer = EmbedBlockIndent(unescapedContent) + minIndent = indentTrimmer.computeMinimumIndent() + processedContent = indentTrimmer.trimMinimumIndent() + } + + + /** + * Maps a single offset in processed content to an offset in raw content. + * + * Reverse transformation pipeline: + * 1. Add back trimmed indentation (per line) + * 2. Add back removed escape backslashes + */ + override fun mapProcessedOffsetToRawOffset(processedOffset: Int): Int { + // Step 1: Map processed → unescaped (add back trimmed indent) + val processedUpToOffset = processedContent.take(processedOffset) + val lineBreaks = processedUpToOffset.count { it == '\n' } + // Each line (including the first) had minIndent characters removed + val indentAdjustment = (lineBreaks + 1) * minIndent + val unescapedOffset = processedOffset + indentAdjustment + + // Step 2: Map unescaped → raw (add back escape backslashes) + val rawOffset = mapUnescapedOffsetToRawOffset(unescapedOffset) + + return rawOffset + } + + /** + * Maps an offset in unescaped content to an offset in raw content. + * + * For each escape backslash that was removed: + * - Determine where it would have appeared in unescaped content + * - If before our target position, add 1 to the offset + */ + private fun mapUnescapedOffsetToRawOffset(unescapedOffset: Int): Int { + var shift = 0 + for (rawEscapePos in sortedEscapeOffsets) { + // Where does this escape appear after removing previous escapes? + val unescapedEscapePos = rawEscapePos - shift + if (unescapedEscapePos < unescapedOffset) { + shift++ + } else { + break + } + } + return unescapedOffset + shift + } +} diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedDelim.kt b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedDelim.kt index 99ada7734..5100bbc40 100644 --- a/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedDelim.kt +++ b/src/commonMain/kotlin/org/kson/parser/behavior/embedblock/EmbedDelim.kt @@ -96,6 +96,27 @@ sealed class EmbedDelim(val char: Char) { return content.replace(hasEscapesPattern, "$delimCharForRegex\$1$delimCharForRegex") } + /** + * Finds all positions in the content where escape backslashes appear. + * Returns a sequence of character offsets where backslashes are used to escape + * the delimiter character, in ascending order (left-to-right as they appear in content). + * + * For example, in "%\%", the escape backslash is at position 1. + * In "%\\%", the escape backslash is at position 1 (position 2 is preserved in output). + * + * @param content The raw embed content + * @return Sequence of character offsets (0-based) where escape backslashes appear, in ascending order + */ + fun findEscapePositions(content: String): Sequence { + return hasEscapesPattern.findAll(content) + .map { matchResult -> + // The pattern matches: delimChar + backslash(es) + delimChar + // The first backslash (right after the first delimChar) is the escape backslash + // that gets removed during unescaping + matchResult.range.first + 1 + } + } + override fun toString(): String { return char.toString() } diff --git a/src/commonMain/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformer.kt b/src/commonMain/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformer.kt new file mode 100644 index 000000000..18d3ce8d6 --- /dev/null +++ b/src/commonMain/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformer.kt @@ -0,0 +1,188 @@ +package org.kson.parser.behavior.quotedstring + +import org.kson.parser.Location +import org.kson.parser.behavior.KsonContentTransformer + +/** + * A [KsonContentTransformer] for quoted KSON Strings, handling the processing from raw KSON source to actual String + * value, and maintaining a [Location] source-map back + * + * @param rawContent The raw quoted string content from the original KSON document (without surrounding quotes) + * @param rawLocation Where rawContent exists in the original KSON document + */ +class QuotedStringContentTransformer( + rawContent: String, + rawLocation: Location +) : KsonContentTransformer(rawContent, rawLocation) { + // The processed content after all transformations + override val processedContent: String + + /** + * [escapeInfoList] contains information about all escape sequences in [rawContent]. + * This is all the state needed to perform source mapping from [processedContent] back to [rawContent]. + */ + private val escapeInfoList: List + + init { + val (unescapedContent, escapes) = unescapeAndTrackEscapes(rawContent) + processedContent = unescapedContent + escapeInfoList = escapes + } + + /** + * Maps a single offset in processed content to an offset in raw content. + * + * For each escape sequence that was processed: + * - Determine where it appears in the processed content + * - If before our target position, add the difference between raw and processed length + */ + override fun mapProcessedOffsetToRawOffset(processedOffset: Int): Int { + var shift = 0 + + for (escape in escapeInfoList) { + // Calculate where this escape appears in processed content + // It appears at its raw position minus the cumulative shift from previous escapes + val escapeProcessedPos = escape.rawPosition - shift + + if (escapeProcessedPos < processedOffset) { + // This escape is before our target position, so we need to account for it + // The escape took 'rawLength' chars in raw but 'processedLength' chars in processed + shift += (escape.rawLength - escape.processedLength) + } else { + break + } + } + + return processedOffset + shift + } +} + +/** + * Information about an escape sequence in the raw content. + * @param rawPosition The position in rawContent where the backslash starts + * @param rawLength The total length of the escape sequence in rawContent (e.g., "\n" is 2, "\u0041" is 6) + * @param processedLength The length of the resulting character(s) in processedContent (usually 1, but can be 2 for surrogate pairs) + */ +private data class EscapeInfo( + val rawPosition: Int, + val rawLength: Int, + val processedLength: Int +) + +@Deprecated("Only supports testing to ensure behavior after a refactor. Will be removed.") +fun unescapeStringContent(content: String): String { + return unescapeAndTrackEscapes(content).first +} + +/** + * Unescapes the content and tracks all escape sequences for source mapping. + * Returns a pair of (unescaped content, list of escape info). + */ +private fun unescapeAndTrackEscapes(content: String): Pair> { + val sb = StringBuilder(content.length) + val escapes = mutableListOf() + + var i = 0 + while (i < content.length) { + val char = content[i] + + if (char == '\\' && i + 1 < content.length) { + val rawStart = i + when (val escaped = content[i + 1]) { + '"', '\\', '/', '\'' -> { + sb.append(escaped) + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 'b' -> { + sb.append('\b') + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 'f' -> { + sb.append('\u000C') + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 'n' -> { + sb.append('\n') + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 'r' -> { + sb.append('\r') + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 't' -> { + sb.append('\t') + escapes.add(EscapeInfo(rawStart, 2, 1)) + i += 2 + } + 'u' -> { + val (chars, consumed) = handleUnicodeEscape(content.substring(i)) + for (c in chars) { + sb.append(c) + } + escapes.add(EscapeInfo(rawStart, consumed, chars.size)) + i += consumed + } + else -> { + // Unknown escape sequence, append backslash as is + sb.append(char) + i++ + } + } + } else { + sb.append(char) + i++ + } + } + + return Pair(sb.toString(), escapes) +} + +/** + * Handles Unicode escape sequences including surrogate pairs. + * + * @param input the string containing the Unicode escape starting with \u + * @return Pair of (characters produced, characters consumed from input) + */ +private fun handleUnicodeEscape(input: String): Pair { + // Check if we have enough characters for a Unicode escape (\uXXXX = 6 chars) + if (input.length < 6) { + // Not enough characters for a valid Unicode escape + return Pair(charArrayOf('\\'), 1) + } + + // Check if this is actually a Unicode escape + if (input[0] != '\\' || input[1] != 'u') { + return Pair(charArrayOf('\\'), 1) + } + + val hexStr = input.substring(2, 6) + val codePoint = hexStr.toIntOrNull(16) ?: run { + // Invalid hex sequence, return backslash + return Pair(charArrayOf('\\'), 1) + } + + // Check for high surrogate + if (codePoint.toChar().isHighSurrogate()) { + // Look for low surrogate + if (input.length >= 12 && + input[6] == '\\' && + input[7] == 'u') { + + val lowHexStr = input.substring(8, 12) + val lowCodePoint = lowHexStr.toIntOrNull(16) + + if (lowCodePoint != null && lowCodePoint.toChar().isLowSurrogate()) { + // Valid surrogate pair - return both surrogates and consumed 12 chars + return Pair(charArrayOf(codePoint.toChar(), lowCodePoint.toChar()), 12) + } + } + } + + // Regular Unicode character or unpaired surrogate - consumed 6 chars + return Pair(charArrayOf(codePoint.toChar()), 6) +} diff --git a/src/commonMain/kotlin/org/kson/tools/Formatter.kt b/src/commonMain/kotlin/org/kson/tools/Formatter.kt index 59a2eabc3..35979aeb3 100644 --- a/src/commonMain/kotlin/org/kson/tools/Formatter.kt +++ b/src/commonMain/kotlin/org/kson/tools/Formatter.kt @@ -7,6 +7,7 @@ import org.kson.parser.Token import org.kson.parser.TokenType import org.kson.ast.AstNode import org.kson.parser.TokenType.* +import org.kson.parser.behavior.embedblock.EmbedBlockIndent /** * Format the given Kson source according to [formatterConfig] @@ -122,7 +123,8 @@ class IndentFormatter( // write out anything we've read before this embed block result.append(prefixWithIndent(lineContent.joinToString(""), nesting.size)) // write out the lines of the embed content, indenting the whole block appropriately - result.append(prefixWithIndent(token.value, embedContentIndent, true)) + val trimmedEmbedContent = EmbedBlockIndent(token.lexeme.text).trimMinimumIndent() + result.append(prefixWithIndent(trimmedEmbedContent, embedContentIndent, true)) tokenIndex++ // write the rest of the trailing content from this line while (tokenIndex < line.size) { diff --git a/src/commonMain/kotlin/org/kson/value/KsonValue.kt b/src/commonMain/kotlin/org/kson/value/KsonValue.kt index a77798089..f452b7205 100644 --- a/src/commonMain/kotlin/org/kson/value/KsonValue.kt +++ b/src/commonMain/kotlin/org/kson/value/KsonValue.kt @@ -15,7 +15,12 @@ import org.kson.stdlibx.exceptions.ShouldNotHappenException * [location] (which we consider metadata). The ability to treat these [KsonValue]s as _values_ leads to * more ergonomic code than having a strict equals that incorporates [location]. */ -sealed class KsonValue(val location: Location) { +sealed class KsonValue(protected val astNode: KsonValueNode) { + + val location: Location by lazy { + astNode.location + } + /** * Ensure all our [KsonValue] classes implement their [equals] and [hashCode] * NOTE: this [equals] and [hashCode] must be logical equality of the underlying values, and @@ -36,7 +41,7 @@ class KsonObject( * * For a direct [String] key to [KsonValue] value lookup for this [KsonObject], so [propertyLookup] */ - val propertyMap: Map, location: Location) : KsonValue(location) { + val propertyMap: Map, astNode: KsonValueNode) : KsonValue(astNode) { /** * Convenience lookup with the [String] keys pointing directly to the regular [KsonValue] values */ @@ -57,7 +62,7 @@ class KsonObject( } } -class KsonList(val elements: List, location: Location) : KsonValue(location) { +class KsonList(val elements: List, astNode: ListNode) : KsonValue(astNode) { override fun equals(other: Any?): Boolean { if (this === other) return true if (other !is KsonList) return false @@ -74,11 +79,15 @@ class KsonList(val elements: List, location: Location) : KsonValue(lo } } -class EmbedBlock( - val embedTag: KsonString?, - val metadataTag: KsonString?, - val embedContent: KsonString, - location: Location) : KsonValue(location) { +class EmbedBlock(embedBlockNode: EmbedBlockNode) : KsonValue(embedBlockNode) { + private val embedTagNode = embedBlockNode.embedTagNode + private val metadataTagNode = embedBlockNode.metadataTagNode + private val embedContentNode = embedBlockNode.embedContentNode + + val embedTag: KsonString? = embedTagNode?.let { KsonString(it) } + val metadataTag: KsonString? = metadataTagNode?.let { KsonString(it) } + val embedContent: KsonString = KsonString(embedContentNode) + override fun equals(other: Any?): Boolean { if (this === other) return true if (other !is EmbedBlock) return false @@ -89,18 +98,33 @@ class EmbedBlock( fun asKsonObject(): KsonObject { return KsonObject( buildMap { - embedTag?.let { - val embedTagKey = KsonString(EmbedObjectKeys.EMBED_TAG.key, embedTag.location) - put(embedTagKey.value, KsonObjectProperty(embedTagKey, it)) + embedTagNode?.let { + val embedTagStringNode = object: UnquotedStringNode(embedTagNode.sourceTokens) { + override val stringContent = EmbedObjectKeys.EMBED_TAG.key + override val processedStringContent = EmbedObjectKeys.EMBED_TAG.key + } + put(EmbedObjectKeys.EMBED_TAG.key, + KsonObjectProperty(KsonString(embedTagStringNode), + KsonString(it))) } - metadataTag?.let { - val embedMetadataKey = KsonString(EmbedObjectKeys.EMBED_METADATA.key, metadataTag.location) - put(embedMetadataKey.value, KsonObjectProperty(embedMetadataKey, it)) + metadataTagNode?.let { + val metadataTagStringNode = object: UnquotedStringNode(metadataTagNode.sourceTokens) { + override val stringContent = EmbedObjectKeys.EMBED_METADATA.key + override val processedStringContent = EmbedObjectKeys.EMBED_METADATA.key + } + put(EmbedObjectKeys.EMBED_METADATA.key, + KsonObjectProperty(KsonString(metadataTagStringNode), + KsonString(it))) } - val embedContentKey = KsonString(EmbedObjectKeys.EMBED_CONTENT.key, embedContent.location) - put(embedContentKey. value, KsonObjectProperty(embedContentKey, embedContent)) + val embedContentStringNode = object: UnquotedStringNode(embedContentNode.sourceTokens) { + override val stringContent = EmbedObjectKeys.EMBED_CONTENT.key + override val processedStringContent = EmbedObjectKeys.EMBED_CONTENT.key + } + put(EmbedObjectKeys.EMBED_CONTENT.key, + KsonObjectProperty(KsonString(embedContentStringNode), + KsonString(embedContentNode))) }, - location + astNode ) } @@ -109,7 +133,20 @@ class EmbedBlock( } } -class KsonString(val value: String, location: Location) : KsonValue(location) { +class KsonString(private val stringNode: StringNodeImpl) : KsonValue(stringNode), SubParseable { + val value: String by lazy { + stringNode.processedStringContent + } + + override fun subOffsetLocation(subStartOffset: Int, subEndOffset: Int): Location { + return stringNode.contentTransformer.mapToOriginal(subStartOffset,subEndOffset) + } + + override fun subCoordinatesLocation(subStartLine: Int, subStartColumn: Int, subEndLine: Int, subEndColumn: Int): Location { + return stringNode.contentTransformer + .mapToOriginal(subStartLine, subStartColumn, subEndLine, subEndColumn) + } + override fun equals(other: Any?): Boolean { if (this === other) return true if (other !is KsonString) return false @@ -122,7 +159,18 @@ class KsonString(val value: String, location: Location) : KsonValue(location) { } } -class KsonNumber(val value: NumberParser.ParsedNumber, location: Location) : KsonValue(location) { +class KsonNumber(private val numberNode: NumberNode) : KsonValue(numberNode), SubParseable { + val value = numberNode.value + + override fun subOffsetLocation(subStartOffset: Int, subEndOffset: Int): Location { + return numberNode.contentTransformer.mapToOriginal(subStartOffset,subEndOffset) + } + + override fun subCoordinatesLocation(subStartLine: Int, subStartColumn: Int, subEndLine: Int, subEndColumn: Int): Location { + return numberNode.contentTransformer + .mapToOriginal(subStartLine, subStartColumn, subEndLine, subStartColumn) + } + override fun equals(other: Any?): Boolean { if (this === other) return true if (other !is KsonNumber) return false @@ -150,7 +198,9 @@ class KsonNumber(val value: NumberParser.ParsedNumber, location: Location) : Kso } } -class KsonBoolean(val value: Boolean, location: Location) : KsonValue(location) { +class KsonBoolean(astNode: BooleanNode) : KsonValue(astNode) { + val value = astNode.value + override fun equals(other: Any?): Boolean { if (this === other) return true if (other !is KsonBoolean) return false @@ -163,7 +213,7 @@ class KsonBoolean(val value: Boolean, location: Location) : KsonValue(location) } } -class KsonNull(location: Location) : KsonValue(location) { +class KsonNull(astNode: NullNode) : KsonValue(astNode) { override fun equals(other: Any?): Boolean { return other is KsonNull } @@ -191,25 +241,20 @@ fun AstNode.toKsonValue(): KsonValue { val keyName = propKey.key.toKsonValue() as KsonString keyName.value to KsonObjectProperty(keyName, propImpl.value.toKsonValue()) }, - location) + this) } is ListNode -> KsonList(elements.map { elem -> val listElementNode = elem as? ListElementNodeImpl ?: throw ShouldNotHappenException("this AST is fully valid") listElementNode.value.toKsonValue() - }, location) - is EmbedBlockNode -> EmbedBlock( - embedTagNode?.toKsonValue() as? KsonString, - metadataTagNode?.toKsonValue() as? KsonString, - embedContentNode.toKsonValue() as KsonString, - location - ) - is StringNodeImpl -> KsonString(processedStringContent, location) - is NumberNode -> KsonNumber(value, location) - is TrueNode -> KsonBoolean(true, location) - is FalseNode -> KsonBoolean(false, location) - is NullNode -> KsonNull(location) + }, this) + is EmbedBlockNode -> EmbedBlock(this) + is StringNodeImpl -> KsonString(this) + is NumberNode -> KsonNumber(this) + is TrueNode -> KsonBoolean(this) + is FalseNode -> KsonBoolean(this) + is NullNode -> KsonNull(this) is KsonValueNodeImpl -> this.toKsonValue() is ObjectKeyNodeImpl -> { throw ShouldNotHappenException("these properties are processed above in the ${ObjectNode::class.simpleName} case") diff --git a/src/commonMain/kotlin/org/kson/value/SubParseable.kt b/src/commonMain/kotlin/org/kson/value/SubParseable.kt new file mode 100644 index 000000000..db344f29c --- /dev/null +++ b/src/commonMain/kotlin/org/kson/value/SubParseable.kt @@ -0,0 +1,21 @@ +package org.kson.value + +import org.kson.parser.Location + +/** + * Interface for [KsonValue]s whose values may be sub-parsed by external parsers + */ +interface SubParseable { + /** + * Convert an offset range within this [SubParseable] [KsonValue] into a [org.kson.parser.Location] in the original + * KSON source + */ + fun subOffsetLocation(subStartOffset: Int, subEndOffset: Int): Location + + /** + * Convert a line/column coordinate range within this [SubParseable] [KsonValue] into a [Location] in the original + * KSON source + */ + fun subCoordinatesLocation(subStartLine: Int, subStartColumn: Int, + subEndLine: Int, subEndColumn: Int,): Location +} diff --git a/src/commonTest/kotlin/org/kson/KsonCoreTestEmbedBlock.kt b/src/commonTest/kotlin/org/kson/KsonCoreTestEmbedBlock.kt index ef0d42d62..8a5bdab14 100644 --- a/src/commonTest/kotlin/org/kson/KsonCoreTestEmbedBlock.kt +++ b/src/commonTest/kotlin/org/kson/KsonCoreTestEmbedBlock.kt @@ -71,7 +71,7 @@ class KsonCoreTestEmbedBlock : KsonCoreTest { assertParsesTo( """ - %sql: ::::::::::::database:::::: + %sql: ::::::::::::database:::::: select * from something %% """, diff --git a/src/commonTest/kotlin/org/kson/KsonValueNavigationTest.kt b/src/commonTest/kotlin/org/kson/KsonValueNavigationTest.kt index 7b82a3ce3..c2b683c01 100644 --- a/src/commonTest/kotlin/org/kson/KsonValueNavigationTest.kt +++ b/src/commonTest/kotlin/org/kson/KsonValueNavigationTest.kt @@ -148,7 +148,7 @@ class KsonNavigationUtilTest { // Find location inside 'Springfield' val result = KsonValueNavigation.navigateToLocationWithPath( sampleKson, - Coordinates(4, 8) // Line with "city: 'Springfield'" + Coordinates(4, 9) // Line with "city: 'Springfield'" ) assertNotNull(result) @@ -227,7 +227,7 @@ class KsonNavigationUtilTest { // metadata.tags[1] = 'author' on line 17 (0-indexed: line 16) val result = KsonValueNavigation.navigateToLocationWithPath( sampleKson, - Coordinates(16, 6) // Inside 'author' + Coordinates(16, 7) // Inside 'author' ) assertNotNull(result) @@ -280,4 +280,4 @@ class KsonNavigationUtilTest { assertEquals(listOf("outer", "inner"), result.pathFromRoot) } -} \ No newline at end of file +} diff --git a/src/commonTest/kotlin/org/kson/parser/EscapingTest.kt b/src/commonTest/kotlin/org/kson/parser/EscapingTest.kt index 032610c78..3f5d8f206 100644 --- a/src/commonTest/kotlin/org/kson/parser/EscapingTest.kt +++ b/src/commonTest/kotlin/org/kson/parser/EscapingTest.kt @@ -1,7 +1,7 @@ package org.kson.parser import org.kson.ast.renderForJsonString -import org.kson.ast.unescapeStringContent +import org.kson.parser.behavior.quotedstring.unescapeStringContent import org.kson.testSupport.validateJson import kotlin.test.Test import kotlin.test.assertEquals diff --git a/src/commonTest/kotlin/org/kson/parser/LexerTest.kt b/src/commonTest/kotlin/org/kson/parser/LexerTest.kt index 644e003e3..3138b2a64 100644 --- a/src/commonTest/kotlin/org/kson/parser/LexerTest.kt +++ b/src/commonTest/kotlin/org/kson/parser/LexerTest.kt @@ -76,7 +76,6 @@ class LexerTest { // assert EOF renders how we want when we render token lists to strings assertEquals("", eofToken.lexeme.text, "EOF Token's raw text should be empty (can't render an EOF)") - assertEquals("", eofToken.value, "EOF Token's value should be empty (can't render an EOF)") return tokens.subList(0, tokens.size - 1) } @@ -448,66 +447,6 @@ class LexerTest { ) } - @Test - fun testEmbedBlockIndentTrimming() { - val oneLineEmbedTokens = assertTokenizesTo( - """ - % - this is a raw embed - %% - """, - listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) - ) - - assertEquals("this is a raw embed\n", oneLineEmbedTokens[2].value) - - val mulitLineEmbedTokens = assertTokenizesTo( - """ - %sql - this is a multi-line - raw embed - who's indent will be determined by - the leftmost line - %% - """, - listOf(EMBED_OPEN_DELIM, EMBED_TAG, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) - ) - - assertEquals( - """ - this is a multi-line - raw embed - who's indent will be determined by - the leftmost line - - """.trimIndent(), - mulitLineEmbedTokens[3].value - ) - - val mulitLineIndentedEmbedTokens = assertTokenizesTo( - """ - %sql - this is a multi-line - raw embed - who's indent will be determined by - the leftmost line, - which is the end delimiter in this case - %% - """, - listOf(EMBED_OPEN_DELIM, EMBED_TAG, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) - ) - - assertEquals( - """ this is a multi-line - raw embed - who's indent will be determined by - the leftmost line, - which is the end delimiter in this case -""", - mulitLineIndentedEmbedTokens[3].value - ) - } - @Test fun testEmbedBlockTrialingWhitespace() { val trailingNewlineTokens = assertTokenizesTo( @@ -515,11 +454,11 @@ class LexerTest { % this should have a newline at the end %% - """, + """.trimIndent(), listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) ) - assertEquals("this should have a newline at the end\n", trailingNewlineTokens[2].value) + assertEquals("this should have a newline at the end\n", trailingNewlineTokens[2].lexeme.text) val trailingSpacesTokens = assertTokenizesTo( """ @@ -528,7 +467,7 @@ class LexerTest { should have four trailing spaces and a newline at the end %% - """, + """.trimIndent(), listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) ) @@ -539,7 +478,7 @@ class LexerTest { spaces and a newline at the end """.trimIndent(), - trailingSpacesTokens[2].value + trailingSpacesTokens[2].lexeme.text ) val zeroTrailingWhitespaceTokens = assertTokenizesTo( @@ -547,13 +486,13 @@ class LexerTest { % this on the other hand, should have spaces but no newline at the end %% - """, + """.trimIndent(), listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) ) assertEquals( - "this on the other hand,\nshould have spaces but no newline at the end ", - zeroTrailingWhitespaceTokens[2].value + " this on the other hand,\n should have spaces but no newline at the end ", + zeroTrailingWhitespaceTokens[2].lexeme.text ) } @@ -664,9 +603,9 @@ class LexerTest { listOf(UNQUOTED_STRING, COLON, STRING_OPEN_QUOTE, STRING_CONTENT, STRING_CLOSE_QUOTE) ) - assertEquals("a_key", tokens[0].value) - assertEquals("\"", tokens[2].value) - assertEquals("a_value", tokens[3].value) + assertEquals("a_key", tokens[0].lexeme.text) + assertEquals("\"", tokens[2].lexeme.text) + assertEquals("a_value", tokens[3].lexeme.text) } @Test @@ -716,9 +655,9 @@ class LexerTest { ) // sanity check the tokens are lexing to what we expect - assertEquals("string with 'unescaped' and ", tokens[1].value) - assertEquals("\\\"", tokens[2].value) - assertEquals("embedded", tokens[3].value) + assertEquals("string with 'unescaped' and ", tokens[1].lexeme.text) + assertEquals("\\\"", tokens[2].lexeme.text) + assertEquals("embedded", tokens[3].lexeme.text) } @Test @@ -731,9 +670,9 @@ class LexerTest { ) // sanity check the tokens are lexing to what we expect - assertEquals("string with \"unescaped\" and ", tokens[1].value) - assertEquals("\\'", tokens[2].value) - assertEquals("embedded", tokens[3].value) + assertEquals("string with \"unescaped\" and ", tokens[1].lexeme.text) + assertEquals("\\'", tokens[2].lexeme.text) + assertEquals("embedded", tokens[3].lexeme.text) } @Test @@ -778,11 +717,11 @@ class LexerTest { """ % these double %\% percents are embedded but escaped%% - """, + """.trimIndent(), listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) ) - assertEquals("these double %\\% percents are embedded but escaped", singleEscapeTokens[2].value) + assertEquals("these double %\\% percents are embedded but escaped", singleEscapeTokens[2].lexeme.text) } @Test @@ -791,11 +730,11 @@ class LexerTest { """ $ these double $\$ dollars are embedded but escaped$$ - """, + """.trimIndent(), listOf(EMBED_OPEN_DELIM, EMBED_PREAMBLE_NEWLINE, EMBED_CONTENT, EMBED_CLOSE_DELIM) ) - assertEquals("these double $\\$ dollars are embedded but escaped", singleEscapeTokens[2].value) + assertEquals("these double $\\$ dollars are embedded but escaped", singleEscapeTokens[2].lexeme.text) } @Test diff --git a/src/commonTest/kotlin/org/kson/parser/ParserTest.kt b/src/commonTest/kotlin/org/kson/parser/ParserTest.kt index a6fa263d7..17797ad6d 100644 --- a/src/commonTest/kotlin/org/kson/parser/ParserTest.kt +++ b/src/commonTest/kotlin/org/kson/parser/ParserTest.kt @@ -20,8 +20,8 @@ class ParserTest { @Test fun testSanityCheckParse() { val nullTokenStream = listOf( - Token(TokenType.NULL, Lexeme("null", Location.create(0, 0, 0, 4, 0, 4)), "null"), - Token(TokenType.EOF, Lexeme("", Location.create(0, 4, 0, 4, 4, 4)), "") + Token(TokenType.NULL, Lexeme("null", Location.create(0, 0, 0, 4, 0, 4))), + Token(TokenType.EOF, Lexeme("", Location.create(0, 4, 0, 4, 4, 4))) ) val builder = KsonBuilder(nullTokenStream) Parser(builder).parse() diff --git a/src/commonTest/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformerTest.kt b/src/commonTest/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformerTest.kt new file mode 100644 index 000000000..70d3dfc11 --- /dev/null +++ b/src/commonTest/kotlin/org/kson/parser/behavior/embedblock/EmbedContentTransformerTest.kt @@ -0,0 +1,384 @@ +package org.kson.parser.behavior.embedblock + +import org.kson.parser.Coordinates +import org.kson.parser.Location +import kotlin.test.Test +import kotlin.test.assertEquals + +class EmbedContentTransformerTest { + + @Test + fun testSimpleSingleLineNoEscapesNoIndent() { + val rawEmbedContent = "hello" + val baseLocation = Location( + Coordinates(10, 5), + Coordinates(10, 10), + 100, + 105 + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + // Verify no transformation occurred + assertEquals("hello", transformer.processedContent) + + // Position at "he|llo" (offset 2) + val result = transformer.mapToOriginal(2, 3) + + // Maps to offset 7 on line 10 of `baseLocation` + assertEquals(Coordinates(10, 7), result.start) + assertEquals(Coordinates(10, 8), result.end) + assertEquals(102, result.startOffset) + assertEquals(103, result.endOffset) + } + + @Test + fun testSingleLineWithEscape() { + val rawEmbedContent = """{ "key": "val%\%ue" }""" + val processed = """{ "key": "val%%ue" }""" + val baseLocation = Location( + Coordinates(5, 0), + Coordinates(5, rawEmbedContent.length), + 50, + 50 + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Position at "val|%%|ue" in processed (offsets 13-15) + val result = transformer.mapToOriginal(13, 15) + + // In rawEmbedContent, this should map to "val|%\%|ue" (offsets 13-16) + assertEquals(Coordinates(5, 13), result.start) + assertEquals(Coordinates(5, 16), result.end) + assertEquals(63, result.startOffset) + assertEquals(66, result.endOffset) + } + + @Test + fun testMultiLineWithUniformIndent() { + val rawEmbedContent = """| { + | "key": "value" + | }""".trimMargin() + // Processed (indent trimmed): + val processed = """|{ + | "key": "value" + |}""".trimMargin() + + val baseLocation = Location( + Coordinates(10, 0), + Coordinates(12, 5), + 100, + 100 + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Processed line 1, columns 2-7: "key" + val result = transformer.mapToOriginal(1, 2, 1, 7) + + // In rawEmbedContent, this is line 11 (second line), columns 6-11 (after 4 spaces indent) + assertEquals(Coordinates(11, 6), result.start) + assertEquals(Coordinates(11, 11), result.end) + } + + @Test + fun testMultiLineWithEscapes() { + val rawEmbedContent = " line 1\n line %\\% 2\n line 3" + val processed = "line 1\nline %% 2\nline 3" + + val baseLocation = Location( + Coordinates(5, 0), + Coordinates(7, 10), + 50, + 50 + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Position at "%%" in processed (line 1, columns 5-7) + val result = transformer.mapToOriginal(1, 5, 1, 7) + + // Line 1 of `processed` is line 6 of `baseLocation` + // The "%\%" starts at position 9 in `baseLocation` and ends three characters later at 12 accounting for + // the escape slash + assertEquals(Coordinates(6, 9), result.start) + assertEquals(Coordinates(6, 12), result.end) + } + + @Test + fun testOffsetBasedAPI() { + val rawEmbedContent = " hello" + val processed = "hello" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, 7), + 0, + 7 + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map offset 0-5 in processed + val result = transformer.mapToOriginal(0, 5) + + // Should map to offset 2-7 in rawEmbedContent (accounting for indent) + assertEquals(Coordinates(0, 2), result.start) + assertEquals(Coordinates(0, 7), result.end) + assertEquals(2, result.startOffset) + assertEquals(7, result.endOffset) + } + + @Test + fun testLineColumnBasedAPI() { + val rawEmbedContent = " line 1\n line 2" + val processed = "line 1\nline 2" + val baseLocation = Location( + Coordinates(10, 0), + Coordinates(11, 8), + 100, + 118 + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map line 1, columns 0-4 in processed ("line") + val result = transformer.mapToOriginal( + startLine = 1, + startColumn = 0, + endLine = 1, + endColumn = 4 + ) + + // Should map to line 11, columns 2-6 in original + assertEquals(Coordinates(11, 2), result.start) + assertEquals(Coordinates(11, 6), result.end) + } + + @Test + fun testEmptyContent() { + val rawEmbedContent = "" + val processed = "" + val baseLocation = Location( + Coordinates(5, 10), + Coordinates(5, 10), + 100, + 100 + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map zero-length range at start + val result = transformer.mapToOriginal(0, 0) + + assertEquals(Coordinates(5, 10), result.start) + assertEquals(Coordinates(5, 10), result.end) + assertEquals(100, result.startOffset) + assertEquals(100, result.endOffset) + } + + @Test + fun testMultipleEscapesOnSameLine() { + val rawEmbedContent = """a%\%b%\%c""" + val processed = "a%%b%%c" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawEmbedContent.length), + 0, + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "%%b%%" in processed (offsets 1-7) + val result = transformer.mapToOriginal(1, 7) + + // In rawEmbedContent: "%\%b%\%" (offsets 1-9) + assertEquals(Coordinates(0, 1), result.start) + assertEquals(Coordinates(0, 9), result.end) + assertEquals(1, result.startOffset) + assertEquals(9, result.endOffset) + } + + @Test + fun testEscapeAtStartOfContent() { + val rawEmbedContent = """%\%hello""" + val processed = "%%hello" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawEmbedContent.length), + 0, + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "%%" at start (offsets 0-2) + val result = transformer.mapToOriginal(0, 2) + + // In rawEmbedContent: "%\%" (offsets 0-3) + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, 3), result.end) + } + + @Test + fun testCombinedEscapesAndIndent() { + val rawEmbedContent = """ a%\%b""" + val processed = "a%%b" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawEmbedContent.length), + 0, + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "a%%" in processed (offsets 0-3) + val result = transformer.mapToOriginal(0, 3) + + // In rawEmbedContent: " a%\%" (offsets 4-9) + assertEquals(Coordinates(0, 4), result.start) + assertEquals(Coordinates(0, 8), result.end) + } + + @Test + fun testMultiLineVaryingIndent() { + val rawEmbedContent = " line1\n line2\n line3" + val processed = "line1\n line2\nline3" + + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(2, 8), + 0, + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "line2" on second line in processed (line 1, columns 2-7) + val result = transformer.mapToOriginal(1, 2, 1, 7) + + assertEquals(Coordinates(1, 4), result.start) + assertEquals(Coordinates(1, 9), result.end) + } + + @Test + fun testZeroLengthRange() { + val rawEmbedContent = " hello" + val processed = "hello" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, 7), + 0, + 7 + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Cursor at position 3 in processed + val result = transformer.mapToOriginal(3, 3) + + // Should map to position 5 in rawEmbedContent (3 + 2 indent) + assertEquals(Coordinates(0, 5), result.start) + assertEquals(Coordinates(0, 5), result.end) + assertEquals(5, result.startOffset) + assertEquals(5, result.endOffset) + } + + @Test + fun testDoubleEscapedDelimiter() { + val rawEmbedContent = "%\\\\\\%" + val processed = "%\\\\%" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawEmbedContent.length), + 0, + rawEmbedContent.length + ) + + val transformer = EmbedContentTransformer( + rawContent = rawEmbedContent, + embedDelim = EmbedDelim.Percent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map the whole processed content + val result = transformer.mapToOriginal(0, processed.length) + + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, rawEmbedContent.length), result.end) + } +} diff --git a/src/commonTest/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformerTest.kt b/src/commonTest/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformerTest.kt new file mode 100644 index 000000000..cdf9e7a0c --- /dev/null +++ b/src/commonTest/kotlin/org/kson/parser/behavior/quotedstring/QuotedStringContentTransformerTest.kt @@ -0,0 +1,505 @@ +package org.kson.parser.behavior.quotedstring + +import org.kson.parser.Coordinates +import org.kson.parser.Location +import kotlin.test.Test +import kotlin.test.assertEquals + +class QuotedStringContentTransformerTest { + + @Test + fun testSimpleSingleLineNoEscapes() { + val rawQuotedContent = "hello world" + val baseLocation = Location( + Coordinates(10, 5), + Coordinates(10, 16), + 100, + 111 + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + // Verify no transformation occurred + assertEquals("hello world", transformer.processedContent) + + // Position at "hello| |world" (offset 5-6) + val result = transformer.mapToOriginal(5, 6) + + // Maps to offset 10-11 on line 10 of `baseLocation` + assertEquals(Coordinates(10, 10), result.start) + assertEquals(Coordinates(10, 11), result.end) + assertEquals(105, result.startOffset) + assertEquals(106, result.endOffset) + } + + @Test + fun testSingleLineWithSimpleEscape() { + val rawQuotedContent = """hello\nworld""" + val processed = "hello\nworld" + val baseLocation = Location( + Coordinates(5, 0), + Coordinates(5, rawQuotedContent.length), + 50, + 50 + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Position at the newline character in processed (offset 5-6) + val result = transformer.mapToOriginal(5, 6) + + // In rawQuotedContent, this should map to "\n" (offsets 5-7) + assertEquals(Coordinates(5, 5), result.start) + assertEquals(Coordinates(5, 7), result.end) + assertEquals(55, result.startOffset) + assertEquals(57, result.endOffset) + } + + @Test + fun testMultipleEscapesOnSameLine() { + val rawQuotedContent = """tab\there\tand\tthere""" + val processed = "tab\there\tand\tthere" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "and" in processed (offsets 9-12) + // In raw: "tab\there\t|and|\tthere" + val result = transformer.mapToOriginal(9, 12) + + // Positions shift by two in the raw due to the escapes + assertEquals(Coordinates(0, 11), result.start) + assertEquals(Coordinates(0, 14), result.end) + } + + @Test + fun testEscapedQuotes() { + val rawQuotedContent = """say \"hello\"""" + val processed = """say "hello"""" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map the word "hello" in processed (offsets 5-10) + val result = transformer.mapToOriginal(5, 10) + + // In raw: say \"|hello|\" + // Positions: 0-3 (say ) + 4-5 (\") + 6-10 (hello) + assertEquals(Coordinates(0, 6), result.start) + assertEquals(Coordinates(0, 11), result.end) + } + + @Test + fun testEscapedBackslash() { + val rawQuotedContent = """path\\to\\file""" + val processed = """path\to\file""" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "to" in processed (offsets 5-7) + val result = transformer.mapToOriginal(5, 7) + + // In raw: path\\|to|\\file + // Positions: 0-3 (path) + 4-5 (\\) + 6-7 (to) + assertEquals(Coordinates(0, 6), result.start) + assertEquals(Coordinates(0, 8), result.end) + } + + @Test + fun testUnicodeEscape() { + val rawQuotedContent = """A\u0041B""" + val processed = "AAB" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map the escaped 'A' in processed (offset 1-2) + val result = transformer.mapToOriginal(1, 2) + + // In raw: A|\u0041|B (positions 1-7) + assertEquals(Coordinates(0, 1), result.start) + assertEquals(Coordinates(0, 7), result.end) + } + + @Test + fun testSurrogatePair() { + // \uD83D\uDE00 is the surrogate pair for the grinning face emoji 😀 + val rawQuotedContent = """\uD83D\uDE00""" + val processed = "\uD83D\uDE00" // The actual emoji + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map the emoji in processed (offsets 0-2, as it's 2 chars in Kotlin) + val result = transformer.mapToOriginal(0, 2) + + // Should map to the entire escape sequence (12 chars: \uD83D\uDE00) + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, 12), result.end) + } + + @Test + fun testMultiLineWithRawWhitespace() { + val rawQuotedContent = "line 1\nline 2\nline 3" + val processed = "line 1\nline 2\nline 3" + val baseLocation = Location( + Coordinates(10, 0), + Coordinates(12, 6), + 100, + 120 + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + // No transformation for raw whitespace + assertEquals(processed, transformer.processedContent) + + // Map "line 2" on the second line (line 1, columns 0-6 in processed) + val result = transformer.mapToOriginal(1, 0, 1, 6) + + // Should map to line 11, columns 0-6 + assertEquals(Coordinates(11, 0), result.start) + assertEquals(Coordinates(11, 6), result.end) + } + + @Test + fun testMultiLineWithEscapedWhitespace() { + val rawQuotedContent = """line 1\nline 2""" + val processed = "line 1\nline 2" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "line 2" in processed (line 1, columns 0-6) + val result = transformer.mapToOriginal(1, 0, 1, 6) + + // In raw, this is still on line 0, starting after "\n" + // "line 1\n|line 2|" + // Positions: 0-5 (line 1) + 6-7 (\n) + 8-13 (line 2) + assertEquals(Coordinates(0, 8), result.start) + assertEquals(Coordinates(0, 14), result.end) + } + + @Test + fun testMixedRawAndEscapedNewlines() { + val rawQuotedContent = "line 1\nline 2\\nline 3" + val processed = "line 1\nline 2\nline 3" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(1, 13), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "line 3" in processed (line 2, columns 0-6) + val result = transformer.mapToOriginal(2, 0, 2, 6) + + // In raw: "line 1\nline 2\n|line 3|" + // The second newline is escaped, so it's on the same line in raw + assertEquals(Coordinates(1, 8), result.start) + assertEquals(Coordinates(1, 14), result.end) + } + + @Test + fun testOffsetBasedAPI() { + val rawQuotedContent = """hello\tworld""" + val processed = "hello\tworld" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map offset 0-5 in processed ("hello") + val result = transformer.mapToOriginal(0, 5) + + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, 5), result.end) + assertEquals(0, result.startOffset) + assertEquals(5, result.endOffset) + } + + @Test + fun testLineColumnBasedAPI() { + val rawQuotedContent = "line 1\nline 2" + val processed = "line 1\nline 2" + val baseLocation = Location( + Coordinates(10, 0), + Coordinates(11, 6), + 100, + 113 + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map line 1, columns 0-4 in processed ("line") + val result = transformer.mapToOriginal( + startLine = 1, + startColumn = 0, + endLine = 1, + endColumn = 4 + ) + + // Should map to line 11, columns 0-4 in original + assertEquals(Coordinates(11, 0), result.start) + assertEquals(Coordinates(11, 4), result.end) + } + + @Test + fun testEmptyContent() { + val rawQuotedContent = "" + val processed = "" + val baseLocation = Location( + Coordinates(5, 10), + Coordinates(5, 10), + 100, + 100 + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map zero-length range at start + val result = transformer.mapToOriginal(0, 0) + + assertEquals(Coordinates(5, 10), result.start) + assertEquals(Coordinates(5, 10), result.end) + assertEquals(100, result.startOffset) + assertEquals(100, result.endOffset) + } + + @Test + fun testEscapeAtStartOfContent() { + val rawQuotedContent = """\nhello""" + val processed = "\nhello" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "\n" at start (offsets 0-1) + val result = transformer.mapToOriginal(0, 1) + + // In rawQuotedContent: "\n" (offsets 0-2) + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, 2), result.end) + } + + @Test + fun testAllCommonEscapes() { + val rawQuotedContent = """\\\/\b\f\n\r\t\"""" + """\'""" + val processed = "\\/\b\u000C\n\r\t\"'" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map the entire processed content + val result = transformer.mapToOriginal(0, processed.length) + + assertEquals(Coordinates(0, 0), result.start) + assertEquals(Coordinates(0, rawQuotedContent.length), result.end) + } + + @Test + fun testZeroLengthRange() { + val rawQuotedContent = """hello\nworld""" + val processed = "hello\nworld" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Cursor at the newline position in processed (offset 5) + val result = transformer.mapToOriginal(5, 5) + + // Should map to position 5 in rawQuotedContent (start of \n) + assertEquals(Coordinates(0, 5), result.start) + assertEquals(Coordinates(0, 5), result.end) + assertEquals(5, result.startOffset) + assertEquals(5, result.endOffset) + } + + @Test + fun testComplexMixedContent() { + val rawQuotedContent = """Hello\n\tWorld!\nThis is a "test" with 'quotes' and unicode: \u0041""" + val processed = "Hello\n\tWorld!\nThis is a \"test\" with 'quotes' and unicode: A" + val baseLocation = Location( + Coordinates(0, 0), + Coordinates(0, rawQuotedContent.length), + 0, + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "test" in processed + // In processed: after "This is a \"" + val testStart = processed.indexOf("test") + val testEnd = testStart + 4 + val result = transformer.mapToOriginal(testStart, testEnd) + + // Verify the mapping makes sense (should be after the escaped quote) + val rawTestStart = rawQuotedContent.indexOf("test") + assertEquals(rawTestStart, result.start.column) + } + + @Test + fun testMultilineSelectionInComplexMixedContent() { + val rawQuotedContent = "Hello\n\tWorld!\nThis is a \"test\" with 'quotes' and unicode: \\u0041" + val processed = "Hello\n\tWorld!\nThis is a \"test\" with 'quotes' and unicode: A" + val baseLocation = Location( + Coordinates(4, 0), + Coordinates(4, rawQuotedContent.length), + 5, + 5 + rawQuotedContent.length + ) + + val transformer = QuotedStringContentTransformer( + rawContent = rawQuotedContent, + rawLocation = baseLocation + ) + + assertEquals(processed, transformer.processedContent) + + // Map "\tWorld!\nThis is a "test" with 'quotes' and unicode: A" in processed + val result = transformer.mapToOriginal(6, 59) + + // Raw text for this test goes from position 6 to 64 in rawQuotedContent plus the baseLocation start of 5 + assertEquals(11, result.startOffset) + assertEquals(69, result.endOffset) + + assertEquals(5, result.start.line) + assertEquals(0, result.start.column) + assertEquals(6, result.end.line) + assertEquals(50, result.end.column) + } +} diff --git a/src/commonTest/kotlin/org/kson/value/SubParseableTest.kt b/src/commonTest/kotlin/org/kson/value/SubParseableTest.kt new file mode 100644 index 000000000..423d133e0 --- /dev/null +++ b/src/commonTest/kotlin/org/kson/value/SubParseableTest.kt @@ -0,0 +1,193 @@ +package org.kson.value + +import org.kson.KsonCore +import org.kson.KsonCoreTestError +import org.kson.parser.Location +import kotlin.test.Test +import kotlin.test.assertEquals +import kotlin.test.assertNotNull + +class SubParseableTest: KsonCoreTestError { + + /** + * Given a chunk of KSON source ([ksonSourceWithSRCTag]) which has a String that begins with "SRC", assert that + * [org.kson.parser.Location] [embeddedSourceLocation] in [ksonSourceWithSRCTag] is translated to [expectedOriginalSourceLocation] + * using the [SubParseable] interface of [KsonString] + */ + fun assertSubLocationMessageLogged(ksonSourceWithSRCTag: String, + embeddedSourceLocation: Location, + expectedOriginalSourceLocation: Location + ) { + val parseResult = KsonCore.parseToAst(ksonSourceWithSRCTag) + val ksonValue = parseResult.ksonValue + assertNotNull(ksonValue) + val embeddedSrcString = findEmbeddedSRC(ksonValue) + assertNotNull( + embeddedSrcString, + "No embedded src found. Does ksonSource have a string that starts with SRC??" + ) + assertEquals( + expectedOriginalSourceLocation, + embeddedSrcString.subOffsetLocation(embeddedSourceLocation.startOffset, embeddedSourceLocation.endOffset), + "should properly map back to original source location using offsets" + ) + + assertEquals( + expectedOriginalSourceLocation, + embeddedSrcString.subCoordinatesLocation( + embeddedSourceLocation.start.line, + embeddedSourceLocation.start.column, + embeddedSourceLocation.end.line, + embeddedSourceLocation.end.column + ), + "should properly map back to original source location using line/column data" + ) + } + + /** + * Return the first found [KsonString] in the given [KsonValue] that starts with "SRC". This is a bit informal, + * but it makes for an easy way for these tests to tag the string they would like to describe a sub-location of + */ + private fun findEmbeddedSRC(ksonValue: KsonValue): KsonString? { + when (ksonValue) { + is EmbedBlock -> { + return ksonValue.embedTag?.let { findEmbeddedSRC(it) } ?: + ksonValue.metadataTag?.let { findEmbeddedSRC(it) } ?: + findEmbeddedSRC(ksonValue.embedContent) + } + is KsonString -> { + if (!ksonValue.value.startsWith("SRC")) { + return null + } + return ksonValue + } + is KsonObject -> { + ksonValue.propertyMap.values.forEach { + val ksonString = findEmbeddedSRC(it.propName) ?: + findEmbeddedSRC(it.propValue) + if (ksonString != null) { return ksonString } + } + return null + } + is KsonList -> { + ksonValue.elements.forEach { + val ksonString = findEmbeddedSRC(it) + if (ksonString != null) { + return ksonString + } + } + return null + } + is KsonNumber, is KsonBoolean, is KsonNull -> return null + } + } + + @Test + fun testSubLocationInSimpleStrings() { + val ksonUnquotedString = """ + key: SRCa_simple_string + """.trimIndent() + + assertSubLocationMessageLogged( + ksonUnquotedString, + // location of "simple" in SRC-denoted KSON string + Location.Companion.create(0, 5, 0, 11, 5, 11), + // location of "simple" in original source + Location.Companion.create(0, 10, 0, 16, 10, 16)) + + val ksonQuotedPlainString = """ + key: 'SRCa plain quoted string' + """.trimIndent() + + assertSubLocationMessageLogged( + ksonQuotedPlainString, + // location of " quoted " in SRC-denoted KSON string + Location.Companion.create(0, 10, 0, 18, 10, 18), + Location.Companion.create(0, 16, 0, 24, 16, 24)) + + val ksonQuotedStringWithNewlines = """ + key: 'SRCa quoted string + with newlines' + """.trimIndent() + + assertSubLocationMessageLogged( + ksonQuotedStringWithNewlines, + // location of "ted string\nwith new" in SRC-denoted KSON string + Location.Companion.create(0, 8, 1, 8, 8, 27), + Location.Companion.create(0, 14, 1, 8, 14, 33)) + } + + @Test + fun testSubLocationInStringWithEscapedQuotes() { + val ksonStringWithEscapes = """ + key: 'SRCthis string has \'escaped\' quotes inside it' + """.trimIndent() + + assertSubLocationMessageLogged( + ksonStringWithEscapes, + // location of "quotes" in the SRC-denoted KSON string + Location.create(0, 29, 0, 35, 29, 35), + Location.Companion.create(0, 37, 0, 43, 37, 43)) + } + + @Test + fun testSubLocationInStringWithEscaping() { + val ksonStringWithEscapes = """ + key: 'SRCthis string\t\n\t has escapes' + """.trimIndent() + + assertSubLocationMessageLogged( + ksonStringWithEscapes, + // location of "g\t\n\t h" in the SRC-denoted KSON string + Location.Companion.create(0, 13, 1, 3, 13, 19), + Location.Companion.create(0, 19, 0, 28, 19, 28)) + } + + @Test + fun testSubLocationInPlainEmbed() { + val ksonPlainEmbed = """ + % + this is a very simple embed, with no escapes or + indent stripping. It is equivalent to a string + with newlines + %% + """.trimIndent() + + // TODO test this case + } + + @Test + fun testSubLocationInEmbedWithIndent() { + val ksonIndentedEmbed = """ + key: % + SRCthis is an indented simple embed, with no escapes + for testing accuracy of sub-location generation + %% + """.trimIndent() + + // TODO test this case + } + + @Test + fun testSubLocationInEmbedWithEscapes() { + val ksonIndentedEmbed = """ + % + SRCthis is an indent-free embed block with escaped %\% + embed delimiters %\\\\% for testing accuracy of + sub-location generation + %% + """.trimIndent() + + // TODO test this case + + val ksonIndentedEmbedWithEscapes = """ + key: % + this is an indented embed block with escaped %\% + embed delimiters %\\\\% for testing accuracy of + sub-location generation + %% + """.trimIndent() + + // TODO test this case + } +} diff --git a/tooling/language-server-protocol/src/test/core/features/DocumentHighlightService.test.ts b/tooling/language-server-protocol/src/test/core/features/DocumentHighlightService.test.ts index b94856714..5515de4c2 100644 --- a/tooling/language-server-protocol/src/test/core/features/DocumentHighlightService.test.ts +++ b/tooling/language-server-protocol/src/test/core/features/DocumentHighlightService.test.ts @@ -151,9 +151,9 @@ describe('DocumentHighlightService', () => { // Should highlight all three properties at the same level const ranges = highlights.map(h => h.range); - assertDeepIncludes(ranges, {start: {line: 3, character: 12}, end: {line: 3, character: 19}}); // "prop1" - assertDeepIncludes(ranges, {start: {line: 4, character: 12}, end: {line: 4, character: 19}}); // "prop2" - assertDeepIncludes(ranges, {start: {line: 5, character: 12}, end: {line: 5, character: 19}}); // "prop3" + assertDeepIncludes(ranges, {start: {line: 3, character: 13}, end: {line: 3, character: 18}}); // "prop1" + assertDeepIncludes(ranges, {start: {line: 4, character: 13}, end: {line: 4, character: 18}}); // "prop2" + assertDeepIncludes(ranges, {start: {line: 5, character: 13}, end: {line: 5, character: 18}}); // "prop3" }); }); @@ -180,8 +180,8 @@ describe('DocumentHighlightService', () => { // Should highlight keys from first object only const ranges = highlights.map(h => h.range); - assertDeepIncludes(ranges, {start: {line: 2, character: 8}, end: {line: 2, character: 12}}); // "id" - assertDeepIncludes(ranges, {start: {line: 3, character: 8}, end: {line: 3, character: 14}}); // "name" + assertDeepIncludes(ranges, {start: {line: 2, character: 9}, end: {line: 2, character: 11}}); // "id" + assertDeepIncludes(ranges, {start: {line: 3, character: 9}, end: {line: 3, character: 13}}); // "name" }); it('should not highlight keys from different objects in array', () => { @@ -206,8 +206,8 @@ describe('DocumentHighlightService', () => { // Should highlight keys from second object only const ranges = highlights.map(h => h.range); - assertDeepIncludes(ranges, {start: {line: 6, character: 8}, end: {line: 6, character: 12}}); // "id" - assertDeepIncludes(ranges, {start: {line: 7, character: 8}, end: {line: 7, character: 14}}); // "name" + assertDeepIncludes(ranges, {start: {line: 6, character: 9}, end: {line: 6, character: 11}}); // "id" + assertDeepIncludes(ranges, {start: {line: 7, character: 9}, end: {line: 7, character: 13}}); // "name" }); }); @@ -231,9 +231,9 @@ describe('DocumentHighlightService', () => { // Should highlight all three top-level keys const ranges = highlights.map(h => h.range); - assertDeepIncludes(ranges, {start: {line: 1, character: 4}, end: {line: 1, character: 12}}); // "simple" - assertDeepIncludes(ranges, {start: {line: 2, character: 4}, end: {line: 2, character: 13}}); // "complex" - assertDeepIncludes(ranges, {start: {line: 5, character: 4}, end: {line: 5, character: 13}}); // "another" + assertDeepIncludes(ranges, {start: {line: 1, character: 5}, end: {line: 1, character: 11}}); // "simple" + assertDeepIncludes(ranges, {start: {line: 2, character: 5}, end: {line: 2, character: 12}}); // "complex" + assertDeepIncludes(ranges, {start: {line: 5, character: 5}, end: {line: 5, character: 12}}); // "another" }); it('should handle duplicate property keys', () => { @@ -258,12 +258,12 @@ describe('DocumentHighlightService', () => { // Check that at least one "value" key is highlighted const valueHighlights = ranges.filter(r => - r.start.character === 4 && r.end.character === 11 + r.start.character === 5 && r.end.character === 10 ); assert.ok(valueHighlights.length >= 1, 'Should highlight at least one "value" key'); // Check that "other" is highlighted - assertDeepIncludes(ranges, {start: {line: 4, character: 4}, end: {line: 4, character: 11}}); // "other" + assertDeepIncludes(ranges, {start: {line: 4, character: 5}, end: {line: 4, character: 10}}); // "other" }); }); @@ -351,9 +351,9 @@ describe('DocumentHighlightService', () => { // Should still highlight the single key assert.strictEqual(highlights.length, 1); assert.deepStrictEqual(highlights[0].range, { - start: {line: 1, character: 4}, - end: {line: 1, character: 13} + start: {line: 1, character: 5}, + end: {line: 1, character: 12} }); }); }); -}); \ No newline at end of file +}); diff --git a/tooling/lsp-clients/vscode/test/suite/schema-loading.test.ts b/tooling/lsp-clients/vscode/test/suite/schema-loading.test.ts index 989a55b7b..2173e358c 100644 --- a/tooling/lsp-clients/vscode/test/suite/schema-loading.test.ts +++ b/tooling/lsp-clients/vscode/test/suite/schema-loading.test.ts @@ -305,7 +305,7 @@ describeNode('Schema Loading Tests', () => { await new Promise(resolve => setTimeout(resolve, 500)); // Request completions inside the array (should suggest enum values) - const position = new vscode.Position(2, 11); // After "features: -""" + const position = new vscode.Position(2, 12); // After "features: -""" try { const completions = await waitForCompletions(document, position); @@ -397,4 +397,4 @@ describeNode('Schema Loading Tests', () => { await cleanUp(testFileUri); } }).timeout(10000); -}); \ No newline at end of file +});