Skip to content

Commit e489bf5

Browse files
authored
Recognize non-BMP punctuation & symbols (#297)
Closes #296.
1 parent 38d2938 commit e489bf5

File tree

2 files changed

+38
-2
lines changed

2 files changed

+38
-2
lines changed

lib/inlines.js

+22-2
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,10 @@ var match = function(re) {
127127

128128
// Returns the code for the character at the current subject position, or -1
129129
// there are no more characters.
130+
// This function must be non-BMP aware because the Unicode category of its result is used.
130131
var peek = function() {
131132
if (this.pos < this.subject.length) {
132-
return this.subject.charCodeAt(this.pos);
133+
return this.subject.codePointAt(this.pos);
133134
} else {
134135
return -1;
135136
}
@@ -270,7 +271,7 @@ var scanDelims = function(cc) {
270271
return null;
271272
}
272273

273-
char_before = startpos === 0 ? "\n" : this.subject.charAt(startpos - 1);
274+
char_before = previousChar(this.subject, startpos);
274275

275276
cc_after = this.peek();
276277
if (cc_after === -1) {
@@ -304,6 +305,25 @@ var scanDelims = function(cc) {
304305
}
305306
this.pos = startpos;
306307
return { numdelims: numdelims, can_open: can_open, can_close: can_close };
308+
309+
function previousChar(str, pos) {
310+
if (pos === 0) {
311+
return "\n";
312+
}
313+
var previous_cc = str.charCodeAt(pos - 1);
314+
// not low surrogate (BMP)
315+
if ((previous_cc & 0xfc00) !== 0xdc00) {
316+
return str.charAt(pos - 1);
317+
}
318+
// returns NaN if out of range
319+
var two_previous_cc = str.charCodeAt(pos - 2);
320+
// NaN & 0xfc00 = 0
321+
// checks if 2 previous char is high surrogate
322+
if ((two_previous_cc & 0xfc00) !== 0xd800) {
323+
return previous_char;
324+
}
325+
return str.slice(pos - 2, pos);
326+
}
307327
};
308328

309329
// Handle a delimiter marker for emphasis or a quote.

test/regression.txt

+16
Original file line numberDiff line numberDiff line change
@@ -546,3 +546,19 @@ foo <!-- test --> more -->
546546
<p> 全角スペース (U+3000) 全形空白 </p>
547547
<p>ZWNBSP (U+FEFF) ZWNBSP</p>
548548
````````````````````````````````
549+
550+
#296
551+
```````````````````````````````` example
552+
a**a∇**a
553+
554+
a**∇a**a
555+
556+
a**a𝜵**a
557+
558+
a**𝜵a**a
559+
.
560+
<p>a**a∇**a</p>
561+
<p>a**∇a**a</p>
562+
<p>a**a𝜵**a</p>
563+
<p>a**𝜵a**a</p>
564+
````````````````````````````````

0 commit comments

Comments
 (0)