diff --git a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt index ba310ad7..323a70ec 100644 --- a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt +++ b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt @@ -73,51 +73,7 @@ object KDocFormatter { else this } - when (tokenType) { - KDocTokens.START -> tokens.add(Token(BEGIN_KDOC, tokenText)) - KDocTokens.END -> tokens.add(Token(END_KDOC, tokenText)) - KDocTokens.LEADING_ASTERISK -> Unit // Ignore, no need to output anything - KDocTokens.TAG_NAME -> tokens.add(Token(TAG, tokenText)) - KDocTokens.CODE_BLOCK_TEXT -> tokens.add(Token(CODE, tokenText)) - KDocTokens.MARKDOWN_INLINE_LINK, KDocTokens.MARKDOWN_LINK -> { - tokens.add(Token(MARKDOWN_LINK, tokenText)) - } - KDocTokens.TEXT -> { - if (tokenText.isBlank()) { - tokens.add(Token(WHITESPACE, " ")) - } else { - val words = tokenText.trim().split(" +".toRegex()) - var first = true - for (word in words) { - if (first) { - if (word == "-" || word == "*" || word.matches(NUMBERED_LIST_PATTERN)) { - tokens.add(Token(LIST_ITEM_OPEN_TAG, "")) - } - first = false - } - // If the KDoc is malformed (e.g. unclosed code block) KDocLexer doesn't report an - // END_KDOC properly. We want to recover in such cases - if (word == "*/") { - tokens.add(Token(END_KDOC, word)) - } else if (word == "```") { - tokens.add(Token(CODE_BLOCK_MARKER, word)) - } else { - tokens.add(Token(LITERAL, word)) - tokens.add(Token(WHITESPACE, " ")) - } - } - } - } - WHITE_SPACE -> { - if (previousType === KDocTokens.TAG_NAME || previousType === KDocTokens.MARKDOWN_LINK) { - tokens.add(Token(WHITESPACE, " ")) - } else if (previousType == KDocTokens.LEADING_ASTERISK || - tokenText.count { it == '\n' } >= 2) { - tokens.add(Token(BLANK_LINE, "")) - } - } - else -> throw RuntimeException("Unexpected: $tokenType") - } + processToken(tokenType, tokens, tokenText, previousType) previousType = tokenType kDocLexer.advance() @@ -126,6 +82,55 @@ object KDocFormatter { return makeSingleLineIfPossible(blockIndent, result, maxLineLength) } + private fun processToken( + tokenType: IElementType?, + tokens: MutableList, + tokenText: String, + previousType: IElementType? + ) { + when (tokenType) { + KDocTokens.START -> tokens.add(Token(BEGIN_KDOC, tokenText)) + KDocTokens.END -> tokens.add(Token(END_KDOC, tokenText)) + KDocTokens.LEADING_ASTERISK -> Unit // Ignore, no need to output anything + KDocTokens.TAG_NAME -> tokens.add(Token(TAG, tokenText)) + KDocTokens.CODE_BLOCK_TEXT -> tokens.add(Token(CODE, tokenText)) + KDocTokens.MARKDOWN_INLINE_LINK, KDocTokens.MARKDOWN_LINK -> { + tokens.add(Token(MARKDOWN_LINK, tokenText)) + } + KDocTokens.TEXT -> { + var first = true + for (word in tokenizeKdocText(tokenText)) { + if (word.first().isWhitespace()) { + tokens.add(Token(WHITESPACE, " ")) + continue + } + if (first) { + if (word == "-" || word == "*" || word.matches(NUMBERED_LIST_PATTERN)) { + tokens.add(Token(LIST_ITEM_OPEN_TAG, "")) + } + first = false + } + // If the KDoc is malformed (e.g. unclosed code block) KDocLexer doesn't report an + // END_KDOC properly. We want to recover in such cases + if (word == "*/") { + tokens.add(Token(END_KDOC, word)) + } else if (word == "```") { + tokens.add(Token(CODE_BLOCK_MARKER, word)) + } else { + tokens.add(Token(LITERAL, word)) + } + } + } + WHITE_SPACE -> { + if (previousType == KDocTokens.LEADING_ASTERISK || tokenText.count { it == '\n' } >= 2) { + tokens.add(Token(BLANK_LINE, "")) + } else { + tokens.add(Token(WHITESPACE, " ")) + } + } + else -> throw RuntimeException("Unexpected: $tokenType") + } + } private fun render(input: List, blockIndent: Int, maxLineLength: Int): String { val output = KDocWriter(blockIndent, maxLineLength) for (token in input) { @@ -173,4 +178,32 @@ object KDocFormatter { } return input } + + /** + * tokenizeKdocText splits 's' by whitespace, and returns both whitespace and non-whitespace + * parts. + * + * Multiple adjacent whitespace characters are collapsed into one. Trailing and leading spaces are + * included in the result. + * + * Example: `" one two three "` becomes `[" ", "one", " ", "two", " ", "three", " "]`. See tests + * for more examples. + */ + fun tokenizeKdocText(s: String) = sequence { + if (s.isEmpty()) { + return@sequence + } + var mark = 0 + var inWhitespace = s[0].isWhitespace() + for (i in 1..s.lastIndex) { + if (inWhitespace == s[i].isWhitespace()) { + continue + } + val result = if (inWhitespace) " " else s.substring(mark, i) + inWhitespace = s[i].isWhitespace() + mark = i + yield(result) + } + yield(if (inWhitespace) " " else s.substring(mark, s.length)) + } } diff --git a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt index 2b959640..675d1636 100644 --- a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt +++ b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt @@ -188,7 +188,6 @@ internal class KDocWriter(private val blockIndent: Int, private val maxLineLengt fun writeMarkdownLink(token: Token) { writeToken(token) - requestWhitespace(CONDITIONAL_WHITESPACE) } override fun toString(): String { diff --git a/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt b/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt index 1f0a1dd9..93a6be89 100644 --- a/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt +++ b/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt @@ -3787,40 +3787,45 @@ class FormatterKtTest { |""".trimMargin()) @Test - fun `add spaces after links in Kdoc`() { - val code = - """ + fun `don't add spaces after links in Kdoc`() = + assertFormatted( + """ |/** Here are some links [AnotherClass][AnotherClass2]hello */ |class MyClass {} - |""".trimMargin() - val expected = - """ - |/** Here are some links [AnotherClass] [AnotherClass2] hello */ + |""".trimMargin()) + + @Test + fun `don't remove spaces after links in Kdoc`() = + assertFormatted( + """ + |/** Please see [onNext] (which has more details) */ |class MyClass {} - |""".trimMargin() - assertThatFormatting(code).isEqualTo(expected) - } + |""".trimMargin()) @Test - fun `add spaces between links in KDoc`() { - val code = - """ + fun `link anchor in KDoc are preserved`() = + assertFormatted( + """ + |/** [link anchor](the URL for the link anchor goes here) */ + |class MyClass {} + |""".trimMargin()) + + @Test + fun `don't add spaces between links in KDoc (because they're actually references)`() = + assertFormatted( + """ |/** Here are some links [AnotherClass][AnotherClass2] */ |class MyClass {} - |""".trimMargin() - val expected = - """ - |/** Here are some links [AnotherClass] [AnotherClass2] */ + | + |/** The final produced value may have [size][ByteString.size] < [bufferSize]. */ |class MyClass {} - |""".trimMargin() - assertThatFormatting(code).isEqualTo(expected) - } + |""".trimMargin()) @Test fun `collapse spaces after links in KDoc`() { val code = """ - |/** Here are some links [Class1],[Class2] [Class3]. hello */ + |/** Here are some links [Class1], [Class2] [Class3]. hello */ |class MyClass {} |""".trimMargin() val expected = @@ -3864,6 +3869,9 @@ class FormatterKtTest { """ |/** Enjoy this link [linkstuff]. */ |class MyClass {} + | + |/** There are many [FooObject]s. */ + |class MyClass {} |""".trimMargin()) @Test diff --git a/core/src/test/java/com/facebook/ktfmt/kdoc/KDocFormatterTest.kt b/core/src/test/java/com/facebook/ktfmt/kdoc/KDocFormatterTest.kt new file mode 100644 index 00000000..b1734de8 --- /dev/null +++ b/core/src/test/java/com/facebook/ktfmt/kdoc/KDocFormatterTest.kt @@ -0,0 +1,43 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.facebook.ktfmt.kdoc + +import com.facebook.ktfmt.kdoc.KDocFormatter.tokenizeKdocText +import com.google.common.truth.Truth.assertThat +import org.junit.Test +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 + +@RunWith(JUnit4::class) +class KDocFormatterTest { + @Test + fun testTokenizeKdocText() { + assertThat(tokenizeKdocText(" one two three ").asIterable()) + .containsExactly(" ", "one", " ", "two", " ", "three", " ") + .inOrder() + assertThat(tokenizeKdocText("one two three ").asIterable()) + .containsExactly("one", " ", "two", " ", "three", " ") + .inOrder() + assertThat(tokenizeKdocText("one two three").asIterable()) + .containsExactly("one", " ", "two", " ", "three") + .inOrder() + assertThat(tokenizeKdocText("onetwothree").asIterable()) + .containsExactly("onetwothree") + .inOrder() + assertThat(tokenizeKdocText("").asIterable()).isEmpty() + } +}