From c4dd88c584aed91808a0850ba239c7a9ef9d581a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20=C4=8C=C3=AD=C5=BEek?= Date: Wed, 1 Apr 2020 00:24:52 +0200 Subject: [PATCH] Respect original whitespace instead of using hardcoded 0x20. Do not merge ASCII and non-ASCII whitespace. Make sure non-ASCII whitespace is moved out of inline elements to prevent generating broken Markdown. Fix #102. Fix #250. --- src/node.js | 35 +++++++++++++++++++------------ test/index.html | 55 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/src/node.js b/src/node.js index 675e9799..83b4a7a5 100644 --- a/src/node.js +++ b/src/node.js @@ -18,24 +18,33 @@ function isBlank (node) { } function flankingWhitespace (node) { - var leading = '' - var trailing = '' + if (node.isBlock) return { leading: '', trailing: '' } - if (!node.isBlock) { - var hasLeading = /^\s/.test(node.textContent) - var hasTrailing = /\s$/.test(node.textContent) - var blankWithSpaces = node.isBlank && hasLeading && hasTrailing + var edges = edgeWhitespace(node.textContent) - if (hasLeading && !isFlankedByWhitespace('left', node)) { - leading = ' ' - } + // abandon leading ASCII WS if left-flanked by ASCII WS + if (edges.leadingAscii && isFlankedByWhitespace('left', node)) { + edges.leading = edges.leadingNonAscii + } - if (!blankWithSpaces && hasTrailing && !isFlankedByWhitespace('right', node)) { - trailing = ' ' - } + // abandon trailing ASCII WS if right-flanked by ASCII WS + if (edges.trailingAscii && isFlankedByWhitespace('right', node)) { + edges.leading = edges.trailingNonAscii } - return { leading: leading, trailing: trailing } + return { leading: edges.leading, trailing: edges.trailing } +} + +function edgeWhitespace (string) { + var m = string.match(/^(([ \t\r\n]*)(\s*))[\s\S]*?((\s*?)([ \t\r\n]*))$/) + return { + leading: m[1], // whole string for whitespace-only strings + leadingAscii: m[2], + leadingNonAscii: m[3], + trailing: m[4], // empty for whitespace-only strings + trailingNonAscii: m[5], + trailingAscii: m[6] + } } function isFlankedByWhitespace (side, node) { diff --git a/test/index.html b/test/index.html index 67118a90..0973f4ae 100644 --- a/test/index.html +++ b/test/index.html @@ -899,7 +899,7 @@

This is a header.

Foo Bar

-
Foo Bar
+
Foo Bar
@@ -958,6 +958,59 @@

This is a header.

```
+
+
+

Foo   Bar

+
+
Foo   Bar
+
+ +
+
 1. First
 2. Second
+
 1. First  
+ 2. Second
+
+ +
+
foo bar
+
_foo_ bar
+
+ +
+
foo  bar
+
_foo_  bar
+
+ +
+
foo  bar
+
_foo_  bar
+
+ +
+
foo  bar
+
_foo_  bar
+
+ +
+
foo bar
+
foo _bar_
+
+ +
+
foo  bar
+
foo  _bar_
+
+ +
+
foo  bar
+
foo  _bar_
+
+ +
+
foo  bar
+
foo  _bar_
+
+