Skip to content

Commit

Permalink
Merge pull request #134 from JohannesKaufmann/improve-hard-line-break-2
Browse files Browse the repository at this point in the history
improve-hard-line-break-2
  • Loading branch information
JohannesKaufmann authored Jan 7, 2025
2 parents e128d43 + f6aab8f commit 3d2ff94
Show file tree
Hide file tree
Showing 17 changed files with 208 additions and 127 deletions.
10 changes: 10 additions & 0 deletions internal/textutils/consecutive_newlines.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,19 @@
package textutils

import (
"bytes"
"unicode/utf8"
)

func TrimUnnecessaryHardLineBreaks(content []byte) []byte {
content = bytes.ReplaceAll(content, []byte(" \n\n"), []byte("\n\n"))
content = bytes.ReplaceAll(content, []byte(" \n \n"), []byte("\n\n"))
content = bytes.ReplaceAll(content, []byte(" \n \n"), []byte("\n\n"))
// out = bytes.ReplaceAll(out, []byte("\n \n"), []byte("\n\n"))

return content
}

func TrimConsecutiveNewlines(input []byte) []byte {
var result []byte
newlineCount := 0
Expand Down
85 changes: 52 additions & 33 deletions internal/textutils/consecutive_newlines_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,32 @@ func TestTrimConsecutiveNewlines(t *testing.T) {

// Double newline cases
{"double newline", "a\n\nb", "a\n\nb"},
{"double newline with spaces", "a \n\nb", "a \n\nb"},
{"double newline with spaces", "a \n\nb", "a\n\nb"},
{"spaces between newlines", "a\n \nb", "a\n \nb"},
// Note: It should not change the spaces *after* the newlines since this could impacts lists
{"spaces after double newline", "a\n\n b", "a\n\n b"},

// Triple+ newline cases
{"triple newline", "a\n\n\nb", "a\n\nb"},
{"quad newline", "a\n\n\n\nb", "a\n\nb"},
{"triple newline with spaces", "a \n\n\nb", "a \n\nb"},
{"triple newline with spaces", "a \n\n\nb", "a\n\nb"},

// Multiple segment cases
{"multiple segments", "a\n\nb\n\nc", "a\n\nb\n\nc"},
{"multiple segments with spaces", "a \n\nb \n\nc", "a \n\nb \n\nc"},
{"multiple segments with spaces", "a \n\nb \n\nc", "a\n\nb\n\nc"},

// Spaces at end of line
{"hard-line-break followed by text", "a \nb", "a \nb"},
{"hard-line-break followed by newline", "a \n\nb", "a \n\nb"},
{"hard-line-break followed by newline", "a \n\nb", "a\n\nb"},

// Edge cases
{"only newlines", "\n\n\n", "\n\n"},
{"only spaces", " ", " "},

{"leading and trailing newlines", "\n\n\ntext\n\n\n", "\n\ntext\n\n"},
{"newlines and spaces", " \n \n \n \n ", " \n \n "},
{"newlines and spaces 1", " \n \n \n \n ", "\n\n "},
{"newlines and spaces 2", "a \n \nb", "a\n\nb"},
{"newlines and spaces 3", "a \n \nb", "a\n\nb"},

{"leading spaces", " a", " a"},
{"leading newline 1", "\na", "\na"},
Expand All @@ -60,14 +63,20 @@ func TestTrimConsecutiveNewlines(t *testing.T) {
// UTF-8 cases
{"german special chars", "äöü\n\n\näöü", "äöü\n\näöü"},
{"utf8 chars", "🌟\n\n\n🌟\n\n\n🌟", "🌟\n\n🌟\n\n🌟"},

// Markdown
// Note: The sublist needs to be indented by " -"
{"indented sublist", "- The main list\n \n - The sublist", "- The main list\n \n - The sublist"},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := string(TrimConsecutiveNewlines([]byte(tt.input)))
if got != tt.expected {
output := TrimConsecutiveNewlines([]byte(tt.input))
output = TrimUnnecessaryHardLineBreaks(output)

if string(output) != tt.expected {
t.Errorf("\ninput: %q\nexpected: %q\ngot: %q",
tt.input, tt.expected, got,
tt.input, tt.expected, string(output),
)
}
})
Expand All @@ -77,49 +86,59 @@ func TestTrimConsecutiveNewlines(t *testing.T) {
func TestTrimConsecutiveNewlines_Allocs(t *testing.T) {
const N = 1000

var avg float64
/*
avg = testing.AllocsPerRun(N, func() {
t.Run("no newlines", func(t *testing.T) {
var expectedAverage float64 = 1

actualAverage := testing.AllocsPerRun(N, func() {
input := []byte("abc")
output := TrimConsecutiveNewlines(input)
_ = output
})
if avg != 0 {
t.Errorf("with no newlines there should be no allocations but got %f", avg)
if actualAverage != expectedAverage {
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
}
})
t.Run("exactly two newlines", func(t *testing.T) {
var expectedAverage float64 = 1

avg = testing.AllocsPerRun(N, func() {
actualAverage := testing.AllocsPerRun(N, func() {
input := []byte("abc\n\nabc")
output := TrimConsecutiveNewlines(input)
_ = output
})
if avg != 0 {
t.Errorf("with only two newlines there should be no allocations but got %f", avg)
if actualAverage != expectedAverage {
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
}
*/
})
t.Run("three newlines", func(t *testing.T) {
var expectedAverage float64 = 1

avg = testing.AllocsPerRun(N, func() {
input := []byte("abc\n\n\nabc")
output := TrimConsecutiveNewlines(input)
_ = output
actualAverage := testing.AllocsPerRun(N, func() {
input := []byte("abc\n\n\nabc")
output := TrimConsecutiveNewlines(input)
_ = output
})
if actualAverage != expectedAverage {
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
}
})
if avg != 1 {
t.Errorf("with three newlines there should be 1 allocation but got %f", avg)
}
t.Run("many newlines", func(t *testing.T) {
var expectedAverage float64 = 16

avg = testing.AllocsPerRun(N, func() {
input := []byte("abc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc\n\n\n\n\n\nabc")
output := TrimConsecutiveNewlines(input)
_ = output
actualAverage := testing.AllocsPerRun(N, func() {
input := bytes.Repeat([]byte("abc\n\n\n\n\n\nabc"), 1000)
output := TrimConsecutiveNewlines(input)
_ = output
})
if actualAverage != expectedAverage {
t.Errorf("expected %f allocations but got %f", expectedAverage, actualAverage)
}
})
if avg != 3 {
t.Errorf("with many newlines there should be 3 allocation but got %f", avg)
}
}

const Repeat = 10

func BenchmarkTrimConsecutiveNewlines(b *testing.B) {
const Repeat = 10

runs := []struct {
desc string
input []byte
Expand Down
72 changes: 43 additions & 29 deletions internal/textutils/escape_multiline.go
Original file line number Diff line number Diff line change
@@ -1,42 +1,56 @@
package textutils

// EscapeMultiLine deals with multiline content inside a link or a heading.
func EscapeMultiLine(content []byte) []byte {
content = TrimConsecutiveNewlines(content)
import (
"bytes"
"unicode"
)

var (
doubleSpace = []byte{' ', ' '}

newContent := make([]byte, 0, len(content))
newlineBreak = []byte{'\n'}
hardLineBreak = []byte{' ', ' ', '\n'}
escapedNoContentLineBreak = []byte{'\\', '\n'}
)

startNormal := 0
lineHasContent := false
for index, char := range content {
isNewline := char == '\n'
isSpace := char == ' ' || char == ' '
// EscapeMultiLine deals with multiline content inside a link or a heading.
func EscapeMultiLine(content []byte) []byte {
parts := bytes.Split(content, newlineBreak)
if len(parts) == 1 {
return content
}

isFirstNewline := isNewline && lineHasContent
isLastNewline := isNewline && !lineHasContent
output := make([]byte, 0, len(content))
for i := range parts {
trimmedLeft := bytes.TrimLeftFunc(parts[i], unicode.IsSpace)

if isFirstNewline {
newContent = append(newContent, content[startNormal:index]...)
newContent = append(newContent, '\n')
if len(trimmedLeft) == 0 {
// A blank line would interrupt the link.
// So we need to escape the line
output = append(output, escapedNoContentLineBreak...)
continue
}

startNormal = index + 1
lineHasContent = false
isLast := i == len(parts)-1
if isLast {
// For the last line we don't need to add any "\n" anymore
output = append(output, trimmedLeft...)
continue
}

// Now decide what ending we want:
if bytes.HasSuffix(trimmedLeft, doubleSpace) {
// We already have " " so adding a "\n" is enough
output = append(output, trimmedLeft...)
output = append(output, newlineBreak...)
continue
} else {
// We *prefer* having a hard-line-break " \n"
output = append(output, trimmedLeft...)
output = append(output, hardLineBreak...)
continue
} else if isLastNewline {
newContent = append(newContent, '\\')
newContent = append(newContent, '\n')

startNormal = index + 1
lineHasContent = false
} else if !isSpace {
lineHasContent = true
} else if isSpace && !lineHasContent {
startNormal = index + 1
}
}

newContent = append(newContent, content[startNormal:]...)

return newContent
return output
}
69 changes: 15 additions & 54 deletions internal/textutils/escape_multiline_test.go
Original file line number Diff line number Diff line change
@@ -1,33 +1,10 @@
package textutils

import (
"bytes"
"strings"
"testing"
)

var newline = []byte{'\n'}
var escape = []byte{'\\'}

func EscapeMultiLine_Old(content []byte) []byte {
content = bytes.TrimSpace(content)
content = TrimConsecutiveNewlines(content)
if len(content) == 0 {
return content
}

parts := bytes.Split(content, newline)
for i := range parts {
parts[i] = bytes.TrimSpace(parts[i])
if len(parts[i]) == 0 {
parts[i] = escape
}
}
content = bytes.Join(parts, newline)

return content
}

func TestEscapeMultiLine(t *testing.T) {
var tests = []struct {
Name string
Expand All @@ -48,19 +25,19 @@ func TestEscapeMultiLine(t *testing.T) {
{
Name: "one newline",
Text: "A\nB",
Expected: "A\nB",
Expected: "A \nB",
},
{
Name: "two newlines",
Text: "A\n\nB",
Expected: "A\n\\\nB",
Expected: "A \n\\\nB",
},
{

Name: "many newlines",
// Will be max two newlines characters
Text: "line 1\n\n\n\nline 2",
Expected: "line 1\n\\\nline 2",
Expected: "line 1 \n\\\nline 2",
},

{
Expand All @@ -74,64 +51,48 @@ line3
line4`,
Expected: `line1
line2
Expected: `line1
line2
\
line3
line3
\
line4`,
},

{
Name: "empty line with a space",
Text: "line 1\n \nline 2",
Expected: "line 1\n\\\nline 2",
Expected: "line 1 \n\\\nline 2",
},

{
Name: "content has a space",
Text: "a\n\n b",
Expected: "a\n\\\nb",
Expected: "a \n\\\nb",
},
{
Name: "content is indented",
Text: "line 1\n line 2\n\tline 3",
Expected: "line 1\nline 2\nline 3",
Expected: "line 1 \nline 2 \nline 3",
},

// TODO: keep existing "\" characters?
}

for _, test := range tests {
t.Run(test.Name, func(t *testing.T) {
t.Run("old", func(t *testing.T) {
output := EscapeMultiLine_Old([]byte(test.Text))

if string(output) != test.Expected {
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
}
})
t.Run("new", func(t *testing.T) {
output := EscapeMultiLine([]byte(test.Text))

if string(output) != test.Expected {
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
}
})
input := TrimConsecutiveNewlines([]byte(test.Text))
output := EscapeMultiLine(input)

if string(output) != test.Expected {
t.Errorf("expected '%s' but got '%s'", test.Expected, string(output))
}
})

}
}

func BenchmarkEscapeMultiLine(b *testing.B) {

b.Run("old", func(b *testing.B) {
input := []byte(strings.Repeat("line 1\n\n \nline 2", 100))

for i := 0; i < b.N; i++ {
_ = EscapeMultiLine_Old(input)
}
})
b.Run("new", func(b *testing.B) {
input := []byte(strings.Repeat("line 1\n\n \nline 2", 100))

Expand Down
1 change: 1 addition & 0 deletions plugin/base/base.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ func (b *base) postRenderTrimContent(ctx converter.Context, result []byte) []byt

// Remove too many newlines
result = textutils.TrimConsecutiveNewlines(result)
result = textutils.TrimUnnecessaryHardLineBreaks(result)

return result
}
Expand Down
Loading

0 comments on commit 3d2ff94

Please sign in to comment.