Skip to content

Commit

Permalink
feat(markdown): add support for Shire code fence parsing
Browse files Browse the repository at this point in the history
- Added support for parsing Shire code fences (`<shire>` and `</shire>` tags).
- Enhanced `parseAll` to handle mixed Shire and Markdown content.
- Updated test cases to validate Shire code fence parsing.
  • Loading branch information
phodal committed Jan 14, 2025
1 parent f391a59 commit b72753c
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 20 deletions.
Original file line number Diff line number Diff line change
@@ -1,29 +1,52 @@
package com.phodal.shirecore.utils.markdown

import ai.grazie.nlp.utils.length
import com.intellij.lang.Language
import com.phodal.shirecore.utils.markdown.CodeFenceLanguage.findLanguage
import com.phodal.shirecore.utils.markdown.CodeFenceLanguage.lookupFileExt

class CodeFence(
val ideaLanguage: Language,
val text: String,
var isComplete: Boolean,
val extension: String?,
val originLanguage: String? = null
val originLanguage: String? = null,
) {
companion object {
private var lastTxtBlock: CodeFence? = null
val shireStartRegex = Regex("<shire>")
val shireEndRegex = Regex("</shire>")

fun parse(content: String): CodeFence {
val regex = Regex("```([\\w#+\\s]*)")
val markdownRegex = Regex("```([\\w#+\\s]*)")

val lines = content.replace("\\n", "\n").lines()

// 检查是否存在 shire 开始标签
val startMatch = shireStartRegex.find(content)
if (startMatch != null) {
val endMatch = shireEndRegex.find(content)
val isComplete = endMatch != null

// 提取内容:如果有结束标签就截取中间内容,没有就取整个后续内容
val shireContent = if (isComplete) {
content.substring(startMatch.range.last + 1, endMatch.range.first).trim()
} else {
content.substring(startMatch.range.last + 1).trim()
}

return CodeFence(findLanguage("Shire"), shireContent, isComplete, "shire", "Shire")
}

// 原有的 Markdown 代码块解析逻辑
var codeStarted = false
var codeClosed = false
var languageId: String? = null
val codeBuilder = StringBuilder()

for (line in lines) {
if (!codeStarted) {
val matchResult: MatchResult? = regex.find(line.trimStart())
val matchResult: MatchResult? = markdownRegex.find(line.trimStart())
if (matchResult != null) {
val substring = matchResult.groups[1]?.value
languageId = substring
Expand All @@ -38,9 +61,9 @@ class CodeFence(
}

val trimmedCode = codeBuilder.trim().toString()
val language = CodeFenceLanguage.findLanguage(languageId ?: "")
val language = findLanguage(languageId ?: "")
val extension =
language.associatedFileType?.defaultExtension ?: CodeFenceLanguage.lookupFileExt(languageId ?: "txt")
language.associatedFileType?.defaultExtension ?: lookupFileExt(languageId ?: "txt")

return if (trimmedCode.isEmpty()) {
CodeFence(language, "", codeClosed, extension, languageId)
Expand All @@ -51,6 +74,47 @@ class CodeFence(

fun parseAll(content: String): List<CodeFence> {
val codeFences = mutableListOf<CodeFence>()
var currentIndex = 0

val startMatches = shireStartRegex.findAll(content)
for (startMatch in startMatches) {
// 处理标签前的文本
if (startMatch.range.first > currentIndex) {
val beforeText = content.substring(currentIndex, startMatch.range.first)
if (beforeText.isNotEmpty()) {
parseMarkdownContent(beforeText, codeFences)
}
}

// 处理 shire 标签内容
val searchRegion = content.substring(startMatch.range.first)
val endMatch = shireEndRegex.find(searchRegion)
val isComplete = endMatch != null

val shireContent = if (isComplete) {
searchRegion.substring(startMatch.range.length, endMatch!!.range.first).trim()
} else {
searchRegion.substring(startMatch.range.length).trim()
}

codeFences.add(CodeFence(findLanguage("Shire"), shireContent, isComplete, "shire", "Shire"))
currentIndex = if (isComplete) {
startMatch.range.first + endMatch!!.range.last + 1
} else {
content.length
}
}

// 处理最后剩余的内容
if (currentIndex < content.length) {
val remainingContent = content.substring(currentIndex)
parseMarkdownContent(remainingContent, codeFences)
}

return codeFences
}

private fun parseMarkdownContent(content: String, codeFences: MutableList<CodeFence>) {
val regex = Regex("```([\\w#+\\s]*)")
val lines = content.replace("\\n", "\n").lines()

Expand All @@ -59,18 +123,12 @@ class CodeFence(
val codeBuilder = StringBuilder()
val textBuilder = StringBuilder()

for ((index, line) in lines.withIndex()) {
for (line in lines) {
if (!codeStarted) {
val matchResult = regex.find(line.trimStart())
if (matchResult != null) {
if (textBuilder.isNotEmpty()) {
val textBlock = CodeFence(
CodeFenceLanguage.findLanguage("markdown"),
textBuilder.trim().toString(),
false,
"txt"
)

val textBlock = CodeFence(findLanguage("markdown"), textBuilder.trim().toString(), true, "txt")
lastTxtBlock = textBlock
codeFences.add(textBlock)
textBuilder.clear()
Expand All @@ -82,10 +140,6 @@ class CodeFence(
textBuilder.append(line).append("\n")
}
} else {
if (lastTxtBlock != null && lastTxtBlock?.isComplete == false) {
lastTxtBlock!!.isComplete = true
}

if (line.startsWith("```")) {
val codeContent = codeBuilder.trim().toString()
val codeFence = parse("```$languageId\n$codeContent\n```")
Expand All @@ -101,7 +155,7 @@ class CodeFence(
}
}

val ideaLanguage = CodeFenceLanguage.findLanguage(languageId ?: "markdown")
val ideaLanguage = findLanguage(languageId ?: "markdown")
if (textBuilder.isNotEmpty()) {
val normal = CodeFence(ideaLanguage, textBuilder.trim().toString(), true, null, languageId)
codeFences.add(normal)
Expand All @@ -117,8 +171,6 @@ class CodeFence(
codeFences.add(defaultLanguage)
}
}

return codeFences
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.phodal.shirecore.utils.markdown

import com.intellij.testFramework.fixtures.BasePlatformTestCase
import kotlin.collections.get

class CodeFenceTest : BasePlatformTestCase() {

Expand Down Expand Up @@ -142,4 +143,84 @@ class CodeFenceTest : BasePlatformTestCase() {
assertEquals(last.ideaLanguage.displayName, "Markdown")
assertEquals(true, last.isComplete)
}


fun testShouldParseHtmlCode() {
val content = """
// patch to call tools for step 3 with DevIns language, should use DevIns code fence
<shire>
/patch:src/main/index.html
```patch
// the index.html code
```
</shire>
""".trimIndent()
val code = CodeFence.parse(content)
assertEquals(
code.text, """
/patch:src/main/index.html
```patch
// the index.html code
```
""".trimIndent()
)
assertTrue(code.isComplete)
}

fun testShouldParseUndoneHtmlCode() {
val content = """
// patch to call tools for step 3 with DevIns language, should use DevIns code fence
<shire>
/patch:src/main/index.html
```patch
// the index.html code
```
""".trimIndent()
val code = CodeFence.parse(content)
assertFalse(code.isComplete)
assertEquals(
code.text, """
/patch:src/main/index.html
```patch
// the index.html code
```
""".trimIndent()
)
}

/// parse all with shires
fun testShouldParseAllWithDevin() {
val content = """
|<shire>
|// the index.html code
|</shire>
|
|```java
|public class HelloWorld {
| public static void main(String[] args) {
| System.out.println("Hello, World");
| }
|}
|```
""".trimMargin()

val codeFences = CodeFence.parseAll(content)
assertEquals(codeFences.size, 3)
assertEquals(
codeFences[0].text, """
|// the index.html code
""".trimMargin()
)
assertTrue(codeFences[0].isComplete)
assertEquals(
codeFences[2].text, """
|public class HelloWorld {
| public static void main(String[] args) {
| System.out.println("Hello, World");
| }
|}
""".trimMargin()
)
assertTrue(codeFences[2].isComplete)
}
}

0 comments on commit b72753c

Please sign in to comment.