From 90fa148573cc740e89eee8a4d2b25cd855412723 Mon Sep 17 00:00:00 2001 From: Anthony Fu Date: Thu, 21 Dec 2023 20:01:53 +0100 Subject: [PATCH] feat: introduce lazy embedded, more efficient bundle (#59) --- packages/shikiji-core/src/registry.ts | 24 +++-- packages/shikiji-core/src/types.ts | 5 + packages/shikiji/scripts/prepare/langs.ts | 49 +++++++--- .../shikiji/src/assets/langs-bundle-web.ts | 95 +++++++++++++++++- packages/shikiji/test/bundle.test.ts | 19 ++++ .../test/{index.test.ts => general.test.ts} | 97 ++++++------------- vitest.config.ts | 2 + 7 files changed, 200 insertions(+), 91 deletions(-) create mode 100644 packages/shikiji/test/bundle.test.ts rename packages/shikiji/test/{index.test.ts => general.test.ts} (70%) diff --git a/packages/shikiji-core/src/registry.ts b/packages/shikiji-core/src/registry.ts index 31a9cd74..c931beb9 100644 --- a/packages/shikiji-core/src/registry.ts +++ b/packages/shikiji-core/src/registry.ts @@ -60,21 +60,17 @@ export class Registry extends TextMateRegistry { if (this.getGrammar(lang.name)) return + const embeddedLazilyBy = new Set(Object.values(this._langMap).filter(i => i.embeddedLangsLazy?.includes(lang.name))) + this._resolver.addLanguage(lang) - const embeddedLanguages = lang.embeddedLangs?.reduce(async (acc, l, idx) => { - if (!this.getLoadedLanguages().includes(l) && this._resolver.getLangRegistration(l)) { - await this._resolver.loadGrammar(this._resolver.getLangRegistration(l).scopeName) - acc[this._resolver.getLangRegistration(l).scopeName] = idx + 2 - return acc - } - }, {} as any) const grammarConfig: IGrammarConfiguration = { - embeddedLanguages, balancedBracketSelectors: lang.balancedBracketSelectors || ['*'], unbalancedBracketSelectors: lang.unbalancedBracketSelectors || [], } + // @ts-expect-error Private members, set this to override the previous grammar (that can be a stub) + this._syncRegistry._rawGrammars.set(lang.scopeName, lang) const g = await this.loadGrammarWithConfiguration(lang.scopeName, 1, grammarConfig) this._resolvedGrammars[lang.name] = g! if (lang.aliases) { @@ -82,6 +78,18 @@ export class Registry extends TextMateRegistry { this.alias[alias] = lang.name }) } + + // If there is a language that embeds this language lazily, we need to reload it + if (embeddedLazilyBy.size) { + for (const e of embeddedLazilyBy) { + delete this._resolvedGrammars[e.name] + // @ts-expect-error clear cache + this._syncRegistry?._injectionGrammars?.delete(e.scopeName) + // @ts-expect-error clear cache + this._syncRegistry?._grammars?.delete(e.scopeName) + await this.loadLanguage(this._langMap[e.name]) + } + } } async init() { diff --git a/packages/shikiji-core/src/types.ts b/packages/shikiji-core/src/types.ts index 9b586ef5..1c35f347 100644 --- a/packages/shikiji-core/src/types.ts +++ b/packages/shikiji-core/src/types.ts @@ -195,6 +195,11 @@ export interface LanguageRegistration extends RawGrammar { * languages for each parent language. */ embeddedLangs?: string[] + /** + * A list of languages that embed the current language. + * Unlike `embeddedLangs`, the embedded languages will not be loaded automatically. + */ + embeddedLangsLazy?: string[] balancedBracketSelectors?: string[] unbalancedBracketSelectors?: string[] diff --git a/packages/shikiji/scripts/prepare/langs.ts b/packages/shikiji/scripts/prepare/langs.ts index b37c71b4..48145719 100644 --- a/packages/shikiji/scripts/prepare/langs.ts +++ b/packages/shikiji/scripts/prepare/langs.ts @@ -4,6 +4,15 @@ import fg from 'fast-glob' import type { LanguageRegistration } from 'shikiji-core' import { COMMENT_HEAD } from './constants' +/** + * Languages that includes a lot of embedded langs, + * We only load on-demand for these langs. + */ +const LANGS_LAZY_EMBEDDED = [ + 'markdown', + 'mdx', +] + export async function prepareLangs() { const allLangFiles = await fg('*.json', { cwd: './node_modules/tm-grammars/grammars', @@ -30,13 +39,13 @@ export async function prepareLangs() { aliases: lang.aliases, } - // F# and Markdown has circular dependency - if (lang.name === 'fsharp' && json.embeddedLangs) - json.embeddedLangs = json.embeddedLangs.filter((i: string) => i !== 'markdown') + // We don't load all the embedded langs for markdown + if (LANGS_LAZY_EMBEDDED.includes(lang.name)) { + json.embeddedLangsLazy = json.embeddedLangs + json.embeddedLangs = [] + } - const deps: string[] = [ - ...(json.embeddedLangs || []), - ] + const deps: string[] = json.embeddedLangs || [] await fs.writeFile(`./src/assets/langs/${lang.name}.ts`, `${COMMENT_HEAD} import type { LanguageRegistration } from 'shikiji-core' @@ -51,15 +60,34 @@ ${[ ' lang', ].join(',\n') || ''} ] -`, 'utf-8') +`.replace(/\n\n+/g, '\n\n'), 'utf-8') } async function writeLanguageBundleIndex( fileName: string, ids: string[], - exclude: string[] = [], ) { - const bundled = ids.map(id => grammars.find(i => i.name === id)!).filter(i => !exclude.includes(i.name)) + // We flatten all the embedded langs + const bundledIds = new Set(ids) + let changed = true + while (changed) { + changed = false + for (const id of bundledIds) { + if (LANGS_LAZY_EMBEDDED.includes(id)) + continue + const lang = grammars.find(i => i.name === id) + if (!lang) + continue + for (const e of lang.embedded || []) { + if (!bundledIds.has(e)) { + bundledIds.add(e) + changed = true + } + } + } + } + + const bundled = Array.from(bundledIds).map(id => grammars.find(i => i.name === id)!).filter(Boolean) const info = bundled.map(i => ({ id: i.name, @@ -103,8 +131,5 @@ export const bundledLanguages = { ...grammars.filter(i => i.categories?.includes('web')).map(i => i.name), 'shellscript', ], - [ - 'coffee', - ], ) } diff --git a/packages/shikiji/src/assets/langs-bundle-web.ts b/packages/shikiji/src/assets/langs-bundle-web.ts index ec4657e0..c8fdbeaa 100644 --- a/packages/shikiji/src/assets/langs-bundle-web.ts +++ b/packages/shikiji/src/assets/langs-bundle-web.ts @@ -14,11 +14,37 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'name': 'Blade', 'import': (() => import('./langs/blade')) as DynamicImportLanguageRegistration }, + { + 'id': 'c', + 'name': 'C', + 'import': (() => import('./langs/c')) as DynamicImportLanguageRegistration + }, + { + 'id': 'coffee', + 'name': 'CoffeeScript', + 'aliases': [ + 'coffeescript' + ], + 'import': (() => import('./langs/coffee')) as DynamicImportLanguageRegistration + }, + { + 'id': 'cpp', + 'name': 'C++', + 'aliases': [ + 'c++' + ], + 'import': (() => import('./langs/cpp')) as DynamicImportLanguageRegistration + }, { 'id': 'css', 'name': 'CSS', 'import': (() => import('./langs/css')) as DynamicImportLanguageRegistration }, + { + 'id': 'glsl', + 'name': 'GLSL', + 'import': (() => import('./langs/glsl')) as DynamicImportLanguageRegistration + }, { 'id': 'graphql', 'name': 'GraphQL', @@ -55,6 +81,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'name': 'Imba', 'import': (() => import('./langs/imba')) as DynamicImportLanguageRegistration }, + { + 'id': 'java', + 'name': 'Java', + 'import': (() => import('./langs/java')) as DynamicImportLanguageRegistration + }, { 'id': 'javascript', 'name': 'JavaScript', @@ -78,6 +109,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'name': 'JSON', 'import': (() => import('./langs/json')) as DynamicImportLanguageRegistration }, + { + 'id': 'json5', + 'name': 'JSON5', + 'import': (() => import('./langs/json5')) as DynamicImportLanguageRegistration + }, { 'id': 'jsonc', 'name': 'JSON with Comments', @@ -103,6 +139,19 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'name': 'Less', 'import': (() => import('./langs/less')) as DynamicImportLanguageRegistration }, + { + 'id': 'lua', + 'name': 'Lua', + 'import': (() => import('./langs/lua')) as DynamicImportLanguageRegistration + }, + { + 'id': 'markdown', + 'name': 'Markdown', + 'aliases': [ + 'md' + ], + 'import': (() => import('./langs/markdown')) as DynamicImportLanguageRegistration + }, { 'id': 'marko', 'name': 'Marko', @@ -136,6 +185,27 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ ], 'import': (() => import('./langs/pug')) as DynamicImportLanguageRegistration }, + { + 'id': 'python', + 'name': 'Python', + 'aliases': [ + 'py' + ], + 'import': (() => import('./langs/python')) as DynamicImportLanguageRegistration + }, + { + 'id': 'r', + 'name': 'R', + 'import': (() => import('./langs/r')) as DynamicImportLanguageRegistration + }, + { + 'id': 'ruby', + 'name': 'Ruby', + 'aliases': [ + 'rb' + ], + 'import': (() => import('./langs/ruby')) as DynamicImportLanguageRegistration + }, { 'id': 'sass', 'name': 'Sass', @@ -157,6 +227,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ ], 'import': (() => import('./langs/shellscript')) as DynamicImportLanguageRegistration }, + { + 'id': 'sql', + 'name': 'SQL', + 'import': (() => import('./langs/sql')) as DynamicImportLanguageRegistration + }, { 'id': 'stylus', 'name': 'Stylus', @@ -170,6 +245,11 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'name': 'Svelte', 'import': (() => import('./langs/svelte')) as DynamicImportLanguageRegistration }, + { + 'id': 'toml', + 'name': 'TOML', + 'import': (() => import('./langs/toml')) as DynamicImportLanguageRegistration + }, { 'id': 'tsx', 'name': 'TSX', @@ -202,6 +282,19 @@ export const bundledLanguagesInfo: BundledLanguageInfo[] = [ 'id': 'wgsl', 'name': 'WGSL', 'import': (() => import('./langs/wgsl')) as DynamicImportLanguageRegistration + }, + { + 'id': 'xml', + 'name': 'XML', + 'import': (() => import('./langs/xml')) as DynamicImportLanguageRegistration + }, + { + 'id': 'yaml', + 'name': 'YAML', + 'aliases': [ + 'yml' + ], + 'import': (() => import('./langs/yaml')) as DynamicImportLanguageRegistration } ] @@ -209,7 +302,7 @@ export const bundledLanguagesBase = Object.fromEntries(bundledLanguagesInfo.map( export const bundledLanguagesAlias = Object.fromEntries(bundledLanguagesInfo.flatMap(i => i.aliases?.map(a => [a, i.import]) || [])) -export type BundledLanguage = 'astro' | 'bash' | 'blade' | 'css' | 'gql' | 'graphql' | 'haml' | 'handlebars' | 'hbs' | 'html' | 'http' | 'imba' | 'jade' | 'javascript' | 'jinja' | 'jison' | 'js' | 'json' | 'jsonc' | 'jsonl' | 'jsx' | 'julia' | 'less' | 'marko' | 'mdc' | 'mdx' | 'php' | 'postcss' | 'pug' | 'sass' | 'scss' | 'sh' | 'shell' | 'shellscript' | 'styl' | 'stylus' | 'svelte' | 'ts' | 'tsx' | 'typescript' | 'vue' | 'vue-html' | 'wasm' | 'wgsl' | 'zsh' +export type BundledLanguage = 'astro' | 'bash' | 'blade' | 'c' | 'c++' | 'coffee' | 'coffeescript' | 'cpp' | 'css' | 'glsl' | 'gql' | 'graphql' | 'haml' | 'handlebars' | 'hbs' | 'html' | 'http' | 'imba' | 'jade' | 'java' | 'javascript' | 'jinja' | 'jison' | 'js' | 'json' | 'json5' | 'jsonc' | 'jsonl' | 'jsx' | 'julia' | 'less' | 'lua' | 'markdown' | 'marko' | 'md' | 'mdc' | 'mdx' | 'php' | 'postcss' | 'pug' | 'py' | 'python' | 'r' | 'rb' | 'ruby' | 'sass' | 'scss' | 'sh' | 'shell' | 'shellscript' | 'sql' | 'styl' | 'stylus' | 'svelte' | 'toml' | 'ts' | 'tsx' | 'typescript' | 'vue' | 'vue-html' | 'wasm' | 'wgsl' | 'xml' | 'yaml' | 'yml' | 'zsh' export const bundledLanguages = { ...bundledLanguagesBase, diff --git a/packages/shikiji/test/bundle.test.ts b/packages/shikiji/test/bundle.test.ts new file mode 100644 index 00000000..cd4429ac --- /dev/null +++ b/packages/shikiji/test/bundle.test.ts @@ -0,0 +1,19 @@ +import { expect, it } from 'vitest' + +it('bundle-full', async () => { + const highlighter = await import('shikiji/bundle/full').then(r => r.getHighlighter({ + langs: Object.keys(r.bundledLanguages), + })) + + expect(highlighter.getLoadedLanguages().length) + .toMatchInlineSnapshot(`233`) +}) + +it('bundle-web', async () => { + const highlighter = await import('shikiji/bundle/web').then(r => r.getHighlighter({ + langs: Object.keys(r.bundledLanguages), + })) + + expect(highlighter.getLoadedLanguages().length) + .toMatchInlineSnapshot(`72`) +}) diff --git a/packages/shikiji/test/index.test.ts b/packages/shikiji/test/general.test.ts similarity index 70% rename from packages/shikiji/test/index.test.ts rename to packages/shikiji/test/general.test.ts index e558cec1..4db8cff8 100644 --- a/packages/shikiji/test/index.test.ts +++ b/packages/shikiji/test/general.test.ts @@ -55,107 +55,38 @@ describe('should', () => { expect(shiki.getLoadedLanguages().sort()) .toMatchInlineSnapshot(` [ - "bash", - "bat", - "batch", - "bibtex", - "c", - "c#", - "c++", - "clj", - "clojure", - "cmd", "coffee", "coffeescript", - "cpp", - "cpp-macro", - "cs", - "csharp", "css", - "dart", - "diff", - "docker", - "dockerfile", - "elixir", - "erl", - "erlang", - "f#", - "fs", - "fsharp", - "git-commit", - "git-rebase", - "glsl", - "gnuplot", - "go", "gql", "graphql", - "groovy", - "handlebars", - "haskell", - "hbs", - "hs", "html", - "ini", "jade", - "java", "javascript", "js", "json", "json5", "jsonc", "jsx", - "julia", - "latex", "less", - "lua", - "make", - "makefile", "markdown", "markdown-vue", "md", - "objc", - "objective-c", - "perl", - "perl6", - "php", - "powershell", - "properties", - "ps", - "ps1", "pug", - "py", - "python", - "r", - "raku", - "rb", - "rs", - "ruby", - "rust", "sass", - "scala", "scss", - "sh", - "shell", - "shellscript", - "sql", "styl", "stylus", - "swift", - "tex", "toml", "ts", "tsx", "typescript", - "vb", "vue", "vue-directives", "vue-interpolations", "vue-sfc-style-variable-injection", - "xml", - "xsl", "yaml", "yml", - "zsh", ] `) }) @@ -167,9 +98,10 @@ describe('should', () => { await shiki.loadTheme('min-dark') await shiki.loadLanguage('md') await shiki.loadLanguage('js') + await shiki.loadLanguage('ts') expect(shiki.getLoadedLanguages().length) - .toMatchInlineSnapshot(`91`) + .toMatchInlineSnapshot(`6`) expect(shiki.getLoadedThemes()) .toMatchInlineSnapshot(` @@ -187,6 +119,31 @@ describe('should', () => { console.log(1) \`\`\`" `) + + expect(shiki.codeToHtml('```ts\nconsole.log(1)\n```', { lang: 'md', theme: 'min-dark' })) + .toMatchInlineSnapshot(` + "
\`\`\`ts
+        console.log(1)
+        \`\`\`
" + `) + + // This should be unstyled + expect(shiki.codeToHtml('```cpp\nint a = 1;\n```', { lang: 'md', theme: 'min-dark' })) + .toMatchInlineSnapshot(` + "
\`\`\`cpp
+        int a = 1;
+        \`\`\`
" + `) + + await shiki.loadLanguage('cpp') + + // This should be styled + expect(shiki.codeToHtml('```cpp\nint a = 1;\n```', { lang: 'md', theme: 'min-dark' })) + .toMatchInlineSnapshot(` + "
\`\`\`cpp
+        int a = 1;
+        \`\`\`
" + `) }) }) diff --git a/vitest.config.ts b/vitest.config.ts index d2db7fb1..fb37c8d2 100644 --- a/vitest.config.ts +++ b/vitest.config.ts @@ -17,6 +17,8 @@ export default defineConfig({ 'markdown-it-shikiji': fileURLToPath(new URL('./packages/markdown-it-shikiji/src/index.ts', import.meta.url)), 'shikiji/wasm': fileURLToPath(new URL('./packages/shikiji/src/wasm.ts', import.meta.url)), 'shikiji/core': fileURLToPath(new URL('./packages/shikiji/src/core.ts', import.meta.url)), + 'shikiji/bundle/full': fileURLToPath(new URL('./packages/shikiji/src/bundle-full.ts', import.meta.url)), + 'shikiji/bundle/web': fileURLToPath(new URL('./packages/shikiji/src/bundle-web.ts', import.meta.url)), 'shikiji': fileURLToPath(new URL('./packages/shikiji/src/index.ts', import.meta.url)), }, },