diff --git a/src/highlight.js b/src/highlight.js
index ad0bb19815..8815c80c80 100644
--- a/src/highlight.js
+++ b/src/highlight.js
@@ -237,6 +237,47 @@ https://highlightjs.org/
);
}
+ // joinRe logically computes regexps.join(separator), but fixes the
+ // backreferences so they continue to match.
+ function joinRe(regexps, separator) {
+ // backreferenceRe matches an open parenthesis or backreference. To avoid
+ // an incorrect parse, it additionally matches the following:
+ // - [...] elements, where the meaning of parentheses and escapes change
+ // - other escape sequences, so we do not misparse escape sequences as
+ // interesting elements
+ // - non-matching or lookahead parentheses, which do not capture. These
+ // follow the '(' with a '?'.
+ var backreferenceRe = /\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./;
+ var numCaptures = 0;
+ var ret = '';
+ for (var i = 0; i < regexps.length; i++) {
+ var offset = numCaptures;
+ var re = reStr(regexps[i]);
+ if (i > 0) {
+ ret += separator;
+ }
+ while (re.length > 0) {
+ var match = backreferenceRe.exec(re);
+ if (match == null) {
+ ret += re;
+ break;
+ }
+ ret += re.substring(0, match.index);
+ re = re.substring(match.index + match[0].length);
+ if (match[0][0] == '\\' && match[1]) {
+ // Adjust the backreference.
+ ret += '\\' + String(Number(match[1]) + offset);
+ } else {
+ ret += match[0];
+ if (match[0] == '(') {
+ numCaptures++;
+ }
+ }
+ }
+ }
+ return ret;
+ }
+
function compileMode(mode, parent) {
if (mode.compiled)
return;
@@ -302,12 +343,12 @@ https://highlightjs.org/
var terminators =
mode.contains.map(function(c) {
- return c.beginKeywords ? '\\.?(' + c.begin + ')\\.?' : c.begin;
+ return c.beginKeywords ? '\\.?(?:' + c.begin + ')\\.?' : c.begin;
})
.concat([mode.terminator_end, mode.illegal])
.map(reStr)
.filter(Boolean);
- mode.terminators = terminators.length ? langRe(terminators.join('|'), true) : {exec: function(/*s*/) {return null;}};
+ mode.terminators = terminators.length ? langRe(joinRe(terminators, '|'), true) : {exec: function(/*s*/) {return null;}};
}
compileMode(language);
diff --git a/src/languages/cpp.js b/src/languages/cpp.js
index f91e3f4a61..7817d6da09 100644
--- a/src/languages/cpp.js
+++ b/src/languages/cpp.js
@@ -19,13 +19,7 @@ function(hljs) {
illegal: '\\n',
contains: [hljs.BACKSLASH_ESCAPE]
},
- {
- // TODO: This does not handle raw string literals with prefixes. Using
- // a single regex with backreferences would work (note to use *?
- // instead of * to make it non-greedy), but the mode.terminators
- // computation in highlight.js breaks the counting.
- begin: '(u8?|U|L)?R"\\(', end: '\\)"',
- },
+ { begin: /(?:u8?|U|L)?R"([^()\\ ]{0,16})\((?:.|\n)*?\)\1"/ },
{
begin: '\'\\\\?.', end: '\'',
illegal: '.'
diff --git a/test/markup/cpp/string-literals.expect.txt b/test/markup/cpp/string-literals.expect.txt
index 1210b01e5e..4c5baca14f 100644
--- a/test/markup/cpp/string-literals.expect.txt
+++ b/test/markup/cpp/string-literals.expect.txt
@@ -10,21 +10,47 @@
auto char_multi = R"(Hello
"normal"
-muliline
+multiline
string.)";
auto utf8_multi = u8R"(Hello
"utf-8"
-muliline
+multiline
string)";
auto utf16_multi = uR"(Hello
"utf-16"
-muliline
+multiline
string)";
auto utf32_multi = UR"(Hello
"utf-32"
-muliline
+multiline
string)";
+
+auto char_multi = R"blah1(Hello
+"normal"
+multiline
+)"
+)blah"
+string.)blah1";
+auto utf8_multi = u8R"blah2(Hello
+"utf-8"
+multiline
+)"
+)blah"
+string)blah2";
+auto utf16_multi = uR"blah3(Hello
+"utf-16"
+multiline
+)"
+)blah"
+string)blah3";
+auto utf32_multi = UR"blah4(Hello
+"utf-32"
+multiline
+)"
+)blah"
+string)blah4";
+
#include <stdio>
#include "lib.h"
diff --git a/test/markup/cpp/string-literals.txt b/test/markup/cpp/string-literals.txt
index 68b8bd411a..9939edba3e 100644
--- a/test/markup/cpp/string-literals.txt
+++ b/test/markup/cpp/string-literals.txt
@@ -10,21 +10,47 @@ auto wide_char = L"Hello wchar_t string";
// Raw string literals (multiline)
auto char_multi = R"(Hello
"normal"
-muliline
+multiline
string.)";
auto utf8_multi = u8R"(Hello
"utf-8"
-muliline
+multiline
string)";
auto utf16_multi = uR"(Hello
"utf-16"
-muliline
+multiline
string)";
auto utf32_multi = UR"(Hello
"utf-32"
-muliline
+multiline
string)";
+// Raw string literals with delimiter (multiline)
+auto char_multi = R"blah1(Hello
+"normal"
+multiline
+)"
+)blah"
+string.)blah1";
+auto utf8_multi = u8R"blah2(Hello
+"utf-8"
+multiline
+)"
+)blah"
+string)blah2";
+auto utf16_multi = uR"blah3(Hello
+"utf-16"
+multiline
+)"
+)blah"
+string)blah3";
+auto utf32_multi = UR"blah4(Hello
+"utf-32"
+multiline
+)"
+)blah"
+string)blah4";
+
// Meta strings
#include
#include "lib.h"