@@ -127,9 +127,10 @@ var match = function(re) {
127
127
128
128
// Returns the code for the character at the current subject position, or -1
129
129
// there are no more characters.
130
+ // This function must be non-BMP aware because the Unicode category of its result is used.
130
131
var peek = function ( ) {
131
132
if ( this . pos < this . subject . length ) {
132
- return this . subject . charCodeAt ( this . pos ) ;
133
+ return this . subject . codePointAt ( this . pos ) ;
133
134
} else {
134
135
return - 1 ;
135
136
}
@@ -270,7 +271,7 @@ var scanDelims = function(cc) {
270
271
return null ;
271
272
}
272
273
273
- char_before = startpos === 0 ? "\n" : this . subject . charAt ( startpos - 1 ) ;
274
+ char_before = previousChar ( this . subject , startpos ) ;
274
275
275
276
cc_after = this . peek ( ) ;
276
277
if ( cc_after === - 1 ) {
@@ -304,6 +305,25 @@ var scanDelims = function(cc) {
304
305
}
305
306
this . pos = startpos ;
306
307
return { numdelims : numdelims , can_open : can_open , can_close : can_close } ;
308
+
309
+ function previousChar ( str , pos ) {
310
+ if ( pos === 0 ) {
311
+ return "\n" ;
312
+ }
313
+ var previous_cc = str . charCodeAt ( pos - 1 ) ;
314
+ // not low surrogate (BMP)
315
+ if ( ( previous_cc & 0xfc00 ) !== 0xdc00 ) {
316
+ return str . charAt ( pos - 1 ) ;
317
+ }
318
+ // returns NaN if out of range
319
+ var two_previous_cc = str . charCodeAt ( pos - 2 ) ;
320
+ // NaN & 0xfc00 = 0
321
+ // checks if 2 previous char is high surrogate
322
+ if ( ( two_previous_cc & 0xfc00 ) !== 0xd800 ) {
323
+ return previous_char ;
324
+ }
325
+ return str . slice ( pos - 2 , pos ) ;
326
+ }
307
327
} ;
308
328
309
329
// Handle a delimiter marker for emphasis or a quote.
0 commit comments