Don't add spaces in KDoc, not even after links.

Summary: This fixes #190 This collapses multiple spaces into a single space but never adds any spaces anywhere. Reviewed By: hick209 Differential Revision: D27322773 fbshipit-source-id: 3cd264e2908e044a368285787ab2620214746d25
facebook · Mar 25, 2021 · cd3f6c1 · cd3f6c1
1 parent 07a8e81
commit cd3f6c1
Show file tree

Hide file tree

Showing 4 changed files with 150 additions and 67 deletions.
diff --git a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocFormatter.kt
@@ -73,51 +73,7 @@ object KDocFormatter {
  else this
  }
 
- when (tokenType) {
- KDocTokens.START -> tokens.add(Token(BEGIN_KDOC, tokenText))
- KDocTokens.END -> tokens.add(Token(END_KDOC, tokenText))
- KDocTokens.LEADING_ASTERISK -> Unit // Ignore, no need to output anything
- KDocTokens.TAG_NAME -> tokens.add(Token(TAG, tokenText))
- KDocTokens.CODE_BLOCK_TEXT -> tokens.add(Token(CODE, tokenText))
- KDocTokens.MARKDOWN_INLINE_LINK, KDocTokens.MARKDOWN_LINK -> {
- tokens.add(Token(MARKDOWN_LINK, tokenText))
- }
- KDocTokens.TEXT -> {
- if (tokenText.isBlank()) {
- tokens.add(Token(WHITESPACE, " "))
- } else {
- val words = tokenText.trim().split(" +".toRegex())
- var first = true
- for (word in words) {
- if (first) {
- if (word == "-" || word == "*" || word.matches(NUMBERED_LIST_PATTERN)) {
- tokens.add(Token(LIST_ITEM_OPEN_TAG, ""))
- }
- first = false
- }
- // If the KDoc is malformed (e.g. unclosed code block) KDocLexer doesn't report an
- // END_KDOC properly. We want to recover in such cases
- if (word == "*/") {
- tokens.add(Token(END_KDOC, word))
- } else if (word == "```") {
- tokens.add(Token(CODE_BLOCK_MARKER, word))
- } else {
- tokens.add(Token(LITERAL, word))
- tokens.add(Token(WHITESPACE, " "))
- }
- }
- }
- }
- WHITE_SPACE -> {
- if (previousType === KDocTokens.TAG_NAME || previousType === KDocTokens.MARKDOWN_LINK) {
- tokens.add(Token(WHITESPACE, " "))
- } else if (previousType == KDocTokens.LEADING_ASTERISK ||
- tokenText.count { it == '\n' } >= 2) {
- tokens.add(Token(BLANK_LINE, ""))
- }
- }
- else -> throw RuntimeException("Unexpected: $tokenType")
- }
+ processToken(tokenType, tokens, tokenText, previousType)
 
  previousType = tokenType
  kDocLexer.advance()
@@ -126,6 +82,55 @@ object KDocFormatter {
  return makeSingleLineIfPossible(blockIndent, result, maxLineLength)
  }
 
+ private fun processToken(
+ tokenType: IElementType?,
+ tokens: MutableList<Token>,
+ tokenText: String,
+ previousType: IElementType?
+ ) {
+ when (tokenType) {
+ KDocTokens.START -> tokens.add(Token(BEGIN_KDOC, tokenText))
+ KDocTokens.END -> tokens.add(Token(END_KDOC, tokenText))
+ KDocTokens.LEADING_ASTERISK -> Unit // Ignore, no need to output anything
+ KDocTokens.TAG_NAME -> tokens.add(Token(TAG, tokenText))
+ KDocTokens.CODE_BLOCK_TEXT -> tokens.add(Token(CODE, tokenText))
+ KDocTokens.MARKDOWN_INLINE_LINK, KDocTokens.MARKDOWN_LINK -> {
+ tokens.add(Token(MARKDOWN_LINK, tokenText))
+ }
+ KDocTokens.TEXT -> {
+ var first = true
+ for (word in tokenizeKdocText(tokenText)) {
+ if (word.first().isWhitespace()) {
+ tokens.add(Token(WHITESPACE, " "))
+ continue
+ }
+ if (first) {
+ if (word == "-" || word == "*" || word.matches(NUMBERED_LIST_PATTERN)) {
+ tokens.add(Token(LIST_ITEM_OPEN_TAG, ""))
+ }
+ first = false
+ }
+ // If the KDoc is malformed (e.g. unclosed code block) KDocLexer doesn't report an
+ // END_KDOC properly. We want to recover in such cases
+ if (word == "*/") {
+ tokens.add(Token(END_KDOC, word))
+ } else if (word == "```") {
+ tokens.add(Token(CODE_BLOCK_MARKER, word))
+ } else {
+ tokens.add(Token(LITERAL, word))
+ }
+ }
+ }
+ WHITE_SPACE -> {
+ if (previousType == KDocTokens.LEADING_ASTERISK || tokenText.count { it == '\n' } >= 2) {
+ tokens.add(Token(BLANK_LINE, ""))
+ } else {
+ tokens.add(Token(WHITESPACE, " "))
+ }
+ }
+ else -> throw RuntimeException("Unexpected: $tokenType")
+ }
+ }
  private fun render(input: List<Token>, blockIndent: Int, maxLineLength: Int): String {
  val output = KDocWriter(blockIndent, maxLineLength)
  for (token in input) {
@@ -173,4 +178,32 @@ object KDocFormatter {
  }
  return input
  }
+
+ /**
+ * tokenizeKdocText splits 's' by whitespace, and returns both whitespace and non-whitespace
+ * parts.
+ *
+ * Multiple adjacent whitespace characters are collapsed into one. Trailing and leading spaces are
+ * included in the result.
+ *
+ * Example: `" one two three "` becomes `[" ", "one", " ", "two", " ", "three", " "]`. See tests
+ * for more examples.
+ */
+ fun tokenizeKdocText(s: String) = sequence {
+ if (s.isEmpty()) {
+ return@sequence
+ }
+ var mark = 0
+ var inWhitespace = s[0].isWhitespace()
+ for (i in 1..s.lastIndex) {
+ if (inWhitespace == s[i].isWhitespace()) {
+ continue
+ }
+ val result = if (inWhitespace) " " else s.substring(mark, i)
+ inWhitespace = s[i].isWhitespace()
+ mark = i
+ yield(result)
+ }
+ yield(if (inWhitespace) " " else s.substring(mark, s.length))
+ }
 }
diff --git a/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt b/core/src/main/java/com/facebook/ktfmt/kdoc/KDocWriter.kt
@@ -188,7 +188,6 @@ internal class KDocWriter(private val blockIndent: Int, private val maxLineLengt
 
  fun writeMarkdownLink(token: Token) {
  writeToken(token)
- requestWhitespace(CONDITIONAL_WHITESPACE)
  }
 
  override fun toString(): String {

diff --git a/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt b/core/src/test/java/com/facebook/ktfmt/FormatterKtTest.kt
@@ -3787,40 +3787,45 @@ class FormatterKtTest {
  |""".trimMargin())
 
  @Test
- fun `add spaces after links in Kdoc`() {
- val code =
- """
+ fun `don't add spaces after links in Kdoc`() =
+  assertFormatted(
+  """
  |/** Here are some links [AnotherClass][AnotherClass2]hello */
  |class MyClass {}
- |""".trimMargin()
- val expected =
- """
- |/** Here are some links [AnotherClass] [AnotherClass2] hello */
+ |""".trimMargin())
+
+ @Test
+ fun `don't remove spaces after links in Kdoc`() =
+ assertFormatted(
+ """
+ |/** Please see [onNext] (which has more details) */
  |class MyClass {}
- |""".trimMargin()
- assertThatFormatting(code).isEqualTo(expected)
- }
+ |""".trimMargin())
 
  @Test
- fun `add spaces between links in KDoc`() {
- val code =
- """
+ fun `link anchor in KDoc are preserved`() =
+ assertFormatted(
+ """
+ |/** [link anchor](the URL for the link anchor goes here) */
+ |class MyClass {}
+ |""".trimMargin())
+
+ @Test
+ fun `don't add spaces between links in KDoc (because they're actually references)`() =
+ assertFormatted(
+ """
  |/** Here are some links [AnotherClass][AnotherClass2] */
  |class MyClass {}
- |""".trimMargin()
- val expected =
- """
- |/** Here are some links [AnotherClass] [AnotherClass2] */
+ |
+ |/** The final produced value may have [size][ByteString.size] < [bufferSize]. */
  |class MyClass {}
- |""".trimMargin()
- assertThatFormatting(code).isEqualTo(expected)
- }
+ |""".trimMargin())
 
  @Test
  fun `collapse spaces after links in KDoc`() {
  val code =
  """
- |/** Here are some links [Class1],[Class2] [Class3]. hello */
+ |/** Here are some links [Class1], [Class2] [Class3]. hello */
  |class MyClass {}
  |""".trimMargin()
  val expected =
@@ -3864,6 +3869,9 @@ class FormatterKtTest {
  """
  |/** Enjoy this link [linkstuff]. */
  |class MyClass {}
+ |
+ |/** There are many [FooObject]s. */
+ |class MyClass {}
  |""".trimMargin())
 
  @Test

diff --git a/core/src/test/java/com/facebook/ktfmt/kdoc/KDocFormatterTest.kt b/core/src/test/java/com/facebook/ktfmt/kdoc/KDocFormatterTest.kt
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) Facebook, Inc. and its affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.facebook.ktfmt.kdoc
+
+import com.facebook.ktfmt.kdoc.KDocFormatter.tokenizeKdocText
+import com.google.common.truth.Truth.assertThat
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.junit.runners.JUnit4
+
+@RunWith(JUnit4::class)
+class KDocFormatterTest {
+ @Test
+ fun testTokenizeKdocText() {
+ assertThat(tokenizeKdocText(" one two three ").asIterable())
+ .containsExactly(" ", "one", " ", "two", " ", "three", " ")
+ .inOrder()
+ assertThat(tokenizeKdocText("one two three ").asIterable())
+ .containsExactly("one", " ", "two", " ", "three", " ")
+ .inOrder()
+ assertThat(tokenizeKdocText("one two three").asIterable())
+ .containsExactly("one", " ", "two", " ", "three")
+ .inOrder()
+ assertThat(tokenizeKdocText("onetwothree").asIterable())
+ .containsExactly("onetwothree")
+ .inOrder()
+ assertThat(tokenizeKdocText("").asIterable()).isEmpty()
+ }
+}