Skip to content

Commit

Permalink
Fix text block incidental whitespace handling
Browse files Browse the repository at this point in the history
This commit fixes the handling of incidental whitespace on the last
line of a text block when parsing, and fixes the formatting of text
blocks so that indentation and non-incidental whitespace isn't
removed.

There was a bug in parsing the last line of text blocks that needed
to be fixed, and then the formatter needed special handling for
serializing text blocks.

Closes #2141
  • Loading branch information
mtdowling committed Feb 16, 2024
1 parent f89e236 commit 40855c1
Show file tree
Hide file tree
Showing 6 changed files with 232 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -237,22 +237,16 @@ private static List<CharSequence> lines(final CharSequence text) {
/**
* Computes the number of leading whitespace characters in a string.
*
* <p>This method returns -1 if the string is empty or if the string
* contains only whitespace. When determining the whitespace of the
* last line, the length of the line is returned if the entire
* line consists of only spaces. This ensures that the whitespace
* of the last line can be used to influence the indentation of the
* content as a whole (a significant trailing line policy).
* <p>This method returns -1 if the string contains only whitespace. When determining the whitespace of the
* last line, the length of the line is returned if the entire line consists of only spaces. This ensures that
* the whitespace of the last line can be used to influence the indentation of the content as a whole
* (a significant trailing line policy).
*
* @param line Line to search for whitespace.
* @param isLastLine Whether or not this is the last line.
* @return Returns the last whitespace index starting at 0 or -1.
*/
private static int computeLeadingWhitespace(CharSequence line, boolean isLastLine) {
if (line.length() == 0) {
return -1;
}

for (int offset = 0; offset < line.length(); offset++) {
if (line.charAt(offset) != ' ') {
return offset;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,13 @@
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.Matchers.startsWith;

import java.util.stream.Stream;
import org.hamcrest.MatcherAssert;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.MethodSource;

public class IdlInternalTokenizerTest {

Expand Down Expand Up @@ -181,4 +185,81 @@ public void returnsCapturedDocsInRange() {
assertThat(lines, equalTo("Hi\nThere\n123\n456"));
MatcherAssert.assertThat(tokenizer.removePendingDocCommentLines(), nullValue());
}

public static Stream<Arguments> textBlockTests() {
return Stream.of(
Arguments.of(
"\"\"\"\n"
+ " Hello\n"
+ " - Indented\"\"\"\n",
"Hello\n - Indented"
),
Arguments.of(
"\"\"\"\n"
+ " Hello\n"
+ " - Indented\n"
+ " \"\"\"\n",
"Hello\n - Indented\n"
),
Arguments.of(
"\"\"\"\n"
+ " Hello\n"
+ " - Indented\n"
+ "\"\"\"\n",
" Hello\n - Indented\n"
),
Arguments.of(
"\"\"\"\n"
+ " Hello\"\"\"\n",
"Hello"
),
Arguments.of(
"\"\"\"\n"
+ " Hello\n"
+ "\n"
+ " - Indented\n"
+ "\"\"\"\n",
" Hello\n\n - Indented\n"
),
Arguments.of(
"\"\"\"\n"
+ " \n" // only WS doesn't influence line length calculations.
+ " Hello\n"
+ " \n" // only WS doesn't influence line length calculations.
+ " \"\"\"",
"\nHello\n\n"
),
Arguments.of(
"\"\"\"\n"
+ "\n" // empty lines are incidental whitespace.
+ " Hello\n"
+ " \n" // only WS doesn't influence line length calculations.
+ " \"\"\"",
"\nHello\n\n"
),
Arguments.of(
"\"\"\"\n"
+ "\n" // empty lines are incidental whitespace.
+ "Hello\n"
+ "\n"
+ "\n"
+ "\"\"\"",
"\nHello\n\n\n"
),
Arguments.of(
"\"\"\"\n"
+ "\"\"\"",
""
)
);
}

@ParameterizedTest
@MethodSource("textBlockTests")
public void parsesTextBlocks(String model, String stringValue) {
IdlInternalTokenizer tokenizer = new IdlInternalTokenizer("a.smithy", model);
tokenizer.expect(IdlToken.TEXT_BLOCK);

assertThat(tokenizer.getCurrentTokenStringSlice().toString(), equalTo(stringValue));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@

import com.opencastsoftware.prettier4j.Doc;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import software.amazon.smithy.model.shapes.ShapeId;
import software.amazon.smithy.utils.StringUtils;

final class FormatVisitor {

Expand Down Expand Up @@ -447,11 +446,49 @@ Doc visit(TreeCursor cursor) {
case TEXT_BLOCK: {
// Dispersing the lines of the text block preserves any indentation applied from formatting parent
// nodes.
List<Doc> lines = Arrays.stream(tree.concatTokens().split(System.lineSeparator()))
.map(String::trim)
.map(Doc::text)
.collect(Collectors.toList());
return Doc.intersperse(Doc.line(), lines);

// We need to rebuild the text block to remove any incidental leading whitespace. The easiest way to
// do that is to use the already parsed and resolved value from the lexer.
String stringValue = cursor.getTree()
.tokens()
.findFirst()
.orElseThrow(() -> new RuntimeException("TEXT_BLOCK cursor does not have an IDL token"))
.getStringContents();

// If the last character is a newline, then the closing triple quote must be on the next line.
boolean endQuoteOnNextLine = stringValue.endsWith("\n") || stringValue.endsWith("\r");

List<Doc> resultLines = new ArrayList<>();
resultLines.add(Doc.text("\"\"\""));

String[] inputLines = stringValue.split("\\r?\\n", -1);
for (int i = 0; i < inputLines.length; i++) {
boolean lastLine = i == inputLines.length - 1;

// If this is the last line and the ending quote is on the next line, then skip the extra line.
if (endQuoteOnNextLine && lastLine) {
break;
}

String lineValue = inputLines[i];

// Trim trailing whitespace.
// TODO: This may need to be configurable.
lineValue = StringUtils.stripEnd(lineValue, null);

// Add the closing quote to this line if it needs to be on the last line.
if (lastLine) {
lineValue += "\"\"\"";
}

resultLines.add(Doc.text(lineValue));
}

if (endQuoteOnNextLine) {
resultLines.add(Doc.text("\"\"\""));
}

return Doc.intersperse(Doc.line(), resultLines);
}

case TOKEN:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ metadata validators = [
configuration: {
selector: """
operation -[input]-> structure > member
:test(member > string:not([trait|enum]))
:test(member > string:not([trait|enum]))
:test(member > string:not([trait|length]))
:test(member > string:not([trait|aws.api#arnReference]))
:test(member > string:not([trait|aws.api#providesPassRole]))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ $version: "2.0"

namespace smithy.example

// This will look exactly the same when formatted.
@documentation(
"""
This is the documentation for Foo.
Lorem ipsum dolor."""
)
string Foo

// This will look exactly the same when formatted.
@documentation(
"""
This is the documentation for Bar.
Expand All @@ -17,6 +19,7 @@ string Foo
)
string Bar

// The quotes and text will be indented inside the parens when formatted.
@documentation(
"""
This is the documentation for Baz.
Expand All @@ -25,10 +28,61 @@ string Bar
)
string Baz

// The opening quotes will be indented and move to the next line, the contents of the block will remain indented
// because indentation is based on the closing quote, the closing quote will be indented, and the closing paren on
// the next line.
@documentation(
"""
This is the documentation for Bux.
Lorem ipsum dolor.
This is the documentation for Bux.
Lorem ipsum dolor.
"""
)
string Bux

// The opening quote will move to the next line, indentend, the content will remain the same, the closing quote
// will stay on last line, and the closing paren will move to the next line.
@documentation(
"""
This doc string must not have the leading whitespace altered.
{
"foo": true,
bar: [
false
]
}"""
)
string JsonDocsNoTrailingNewLine

// The opening quote will move to the next line, indentend, the content will remain the same, the closing quote
// will stay on its own line, and the closing paren will move to the next line.
@documentation(
"""
This doc string must not have the leading whitespace altered.
{
"foo": true,
bar: [
false
]
}
"""
)
string JsonDocsTrailingNewLine

// Ensure extra newlines at the end are included.
@documentation(
"""
Hello with extra newlines.
"""
)
string ExtraTrailingNewlines

// Moves the quotes to the same indentation level and the closing parens to the next line.
@documentation(
"""
"""
)
string EmptyTextBlock
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ $version: "2.0"

namespace smithy.example

// This will look exactly the same when formatted.
@documentation(
"""
This is the documentation for Foo.
Lorem ipsum dolor."""
)
string Foo

// This will look exactly the same when formatted.
@documentation(
"""
This is the documentation for Bar.
Expand All @@ -17,6 +19,7 @@ string Foo
)
string Bar

// The quotes and text will be indented inside the parens when formatted.
@documentation(
"""
This is the documentation for Baz.
Expand All @@ -25,8 +28,51 @@ Lorem ipsum dolor.
)
string Baz

// The opening quotes will be indented and move to the next line, the contents of the block will remain indented
// because indentation is based on the closing quote, the closing quote will be indented, and the closing paren on
// the next line.
@documentation("""
This is the documentation for Bux.
Lorem ipsum dolor.
""")
string Bux

// The opening quote will move to the next line, indentend, the content will remain the same, the closing quote
// will stay on last line, and the closing paren will move to the next line.
@documentation("""
This doc string must not have the leading whitespace altered.
{
"foo": true,
bar: [
false
]
}""")
string JsonDocsNoTrailingNewLine

// The opening quote will move to the next line, indentend, the content will remain the same, the closing quote
// will stay on its own line, and the closing paren will move to the next line.
@documentation("""
This doc string must not have the leading whitespace altered.
{
"foo": true,
bar: [
false
]
}
""")
string JsonDocsTrailingNewLine

// Ensure extra newlines at the end are included.
@documentation("""
Hello with extra newlines.
""")
string ExtraTrailingNewlines

// Moves the quotes to the same indentation level and the closing parens to the next line.
@documentation("""
""")
string EmptyTextBlock

0 comments on commit 40855c1

Please sign in to comment.