From 930dddc609ab00e74b385f46b070fd0f4b4b15bf Mon Sep 17 00:00:00 2001 From: Anthony Fu Date: Wed, 4 Sep 2024 18:10:49 +0200 Subject: [PATCH] feat: allow to swap regex constructor for JavaScript engine --- packages/core/src/engines/javascript.ts | 32 +++++++++++++++---------- packages/core/src/index.ts | 2 +- packages/core/src/types/engines.ts | 7 ++++++ 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/packages/core/src/engines/javascript.ts b/packages/core/src/engines/javascript.ts index 3c22aecd1..aa2789e58 100644 --- a/packages/core/src/engines/javascript.ts +++ b/packages/core/src/engines/javascript.ts @@ -3,6 +3,23 @@ import type { JavaScriptRegexEngineOptions, PatternScanner, RegexEngine, RegexEn const MAX = 4294967295 +/** + * The default RegExp constructor for JavaScript regex engine. + */ +export function defaultJavaScriptRegexConstructor(pattern: string): RegExp { + return onigurumaToRegexp( + pattern + .replace(/\|\\G(\||\))/g, '$1') + .replace(/(\(|\|)\\G\|/g, '$1') + // YAML specific handling; TODO: move to tm-grammars + .replaceAll('[^\\s[-?:,\\[\\]{}#&*!|>\'"%@`]]', '[^\\s\\-?:,\\[\\]{}#&*!|>\'"%@`]'), + { + flags: 'dgm', + ignoreContiguousAnchors: true, + }, + ) +} + export class JavaScriptScanner implements PatternScanner { regexps: (RegExp | null)[] @@ -10,6 +27,7 @@ export class JavaScriptScanner implements PatternScanner { public patterns: string[], public cache: Map, public forgiving: boolean, + public regexConstructor: (pattern: string) => RegExp = defaultJavaScriptRegexConstructor, ) { this.regexps = patterns.map((p) => { const cached = cache?.get(p) @@ -22,17 +40,7 @@ export class JavaScriptScanner implements PatternScanner { throw cached } try { - const regex = onigurumaToRegexp( - p - .replace(/\|\\G(\||\))/g, '$1') - .replace(/(\(|\|)\\G\|/g, '$1') - // YAML specific handling; TODO: move to tm-grammars - .replaceAll('[^\\s[-?:,\\[\\]{}#&*!|>\'"%@`]]', '[^\\s\\-?:,\\[\\]{}#&*!|>\'"%@`]'), - { - flags: 'dgm', - ignoreContiguousAnchors: true, - }, - ) + const regex = regexConstructor(p) cache?.set(p, regex) return regex } @@ -126,7 +134,7 @@ export function createJavaScriptRegexEngine(options: JavaScriptRegexEngineOption return { createScanner(patterns: string[]) { - return new JavaScriptScanner(patterns, cache, forgiving) + return new JavaScriptScanner(patterns, cache, forgiving, options.regexConstructor) }, createString(s: string) { return { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index d6bc99c56..a13c157d3 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -9,7 +9,7 @@ export { createShikiInternal, getShikiInternal, setDefaultWasmLoader } from './c // Engines export { createWasmOnigEngine, loadWasm } from './engines/wasm' -export { createJavaScriptRegexEngine } from './engines/javascript' +export { createJavaScriptRegexEngine, defaultJavaScriptRegexConstructor } from './engines/javascript' // TextMate Utilities export { normalizeTheme } from './textmate/normalize-theme' diff --git a/packages/core/src/types/engines.ts b/packages/core/src/types/engines.ts index dd5ea752e..ab24bdc3e 100644 --- a/packages/core/src/types/engines.ts +++ b/packages/core/src/types/engines.ts @@ -41,4 +41,11 @@ export interface JavaScriptRegexEngineOptions { * Cache for regex patterns. */ cache?: Map + + /** + * Custom pattern to RegExp constructor. + * + * By default `oniguruma-to-js` is used. + */ + regexConstructor?: (pattern: string) => RegExp }