From 253ef1f8621123dc9b70d1c2d0a18888cff4556c Mon Sep 17 00:00:00 2001 From: Leo McArdle Date: Mon, 16 Sep 2024 13:31:51 +0100 Subject: [PATCH] feat(syntax-highlight): do client side, support more languages (#11654) Co-authored-by: Florian Dieminger --- build/blog.ts | 4 +- build/code-headers.ts | 45 +++++ build/curriculum.ts | 4 +- build/index.ts | 6 +- build/syntax-highlight.ts | 139 --------------- client/src/blog/post.tsx | 7 +- client/src/document/code/syntax-highlight.tsx | 168 ++++++++++++++++++ client/src/document/hooks.ts | 16 +- client/src/document/index.tsx | 8 +- client/src/plus/ai-help/index.tsx | 20 +-- package.json | 1 + yarn.lock | 5 + 12 files changed, 247 insertions(+), 176 deletions(-) create mode 100644 build/code-headers.ts delete mode 100644 build/syntax-highlight.ts create mode 100644 client/src/document/code/syntax-highlight.tsx diff --git a/build/blog.ts b/build/blog.ts index 715115345a98..4caafa0bc912 100644 --- a/build/blog.ts +++ b/build/blog.ts @@ -28,7 +28,7 @@ import { postProcessSmallerHeadingIDs, } from "./utils.js"; import { slugToFolder } from "../libs/slug-utils/index.js"; -import { syntaxHighlight } from "./syntax-highlight.js"; +import { wrapCodeExamples } from "./code-headers.js"; import { wrapTables } from "./wrap-tables.js"; import { Doc } from "../libs/types/document.js"; import { extractSections } from "./extract-sections.js"; @@ -391,7 +391,7 @@ export async function buildPost( doc.hasMathML = true; } $("div.hidden").remove(); - syntaxHighlight($, doc); + wrapCodeExamples($); injectNoTranslate($); injectLoadingLazyAttributes($); postProcessExternalLinks($); diff --git a/build/code-headers.ts b/build/code-headers.ts new file mode 100644 index 000000000000..8b0823fb32a5 --- /dev/null +++ b/build/code-headers.ts @@ -0,0 +1,45 @@ +import * as cheerio from "cheerio"; + +// Over the years we have accumulated some weird
 tags whose
+// brush is more or less "junk".
+// TODO: Perhaps, if you have a doc with 
 tags that matches
+// this, it should become a flaw.
+const IGNORE = new Set(["none", "text", "plain", "unix"]);
+
+/**
+ * Mutate the `$` instance by adding headers to 
 tags containing code blocks.
+ *
+ */
+export function wrapCodeExamples($: cheerio.CheerioAPI) {
+  // Our content will be like this: `
` or
+  // `
` so we're technically not looking for an exact
+  // match. The wildcard would technically match `
`
+  // too. But within the loop, we do a more careful regex on the class name
+  // and only proceed if it's something sensible.
+  $("pre[class*=brush]").each((_, element) => {
+    // The language is whatever string comes after the `brush(:)`
+    // portion of the class name.
+    const $pre = $(element);
+
+    const className = $pre.attr("class").toLowerCase();
+    const match = className.match(/brush:?\s*([\w_-]+)/);
+    if (!match) {
+      return;
+    }
+    const name = match[1].replace("-nolint", "");
+    if (IGNORE.has(name)) {
+      // Seems to exist a couple of these in our docs. Just bail.
+      return;
+    }
+    const code = $pre.text();
+    $pre.wrapAll(`
`); + if (!$pre.hasClass("hidden")) { + $( + `
${name}
` + ).insertBefore($pre); + } + const $code = $("").text(code); + + $pre.empty().append($code); + }); +} diff --git a/build/curriculum.ts b/build/curriculum.ts index 863e338070ff..068c7fc15f4e 100644 --- a/build/curriculum.ts +++ b/build/curriculum.ts @@ -9,7 +9,7 @@ import { DocParent } from "../libs/types/document.js"; import { CURRICULUM_TITLE, DEFAULT_LOCALE } from "../libs/constants/index.js"; import * as kumascript from "../kumascript/index.js"; import LANGUAGES_RAW from "../libs/languages/index.js"; -import { syntaxHighlight } from "./syntax-highlight.js"; +import { wrapCodeExamples } from "./code-headers.js"; import { escapeRegExp, injectLoadingLazyAttributes, @@ -321,7 +321,7 @@ export async function buildCurriculumPage( doc.hasMathML = true; } $("div.hidden").remove(); - syntaxHighlight($, doc); + wrapCodeExamples($); injectNoTranslate($); injectLoadingLazyAttributes($); postProcessCurriculumLinks($, (p: string | undefined) => { diff --git a/build/index.ts b/build/index.ts index c9cb88533910..ccacb3b2b8ef 100644 --- a/build/index.ts +++ b/build/index.ts @@ -28,7 +28,7 @@ import { } from "./flaws/index.js"; import { checkImageReferences, checkImageWidths } from "./check-images.js"; import { getPageTitle } from "./page-title.js"; -import { syntaxHighlight } from "./syntax-highlight.js"; +import { wrapCodeExamples } from "./code-headers.js"; import { formatNotecards } from "./format-notecards.js"; import buildOptions from "./build-options.js"; import LANGUAGES_RAW from "../libs/languages/index.js"; @@ -456,8 +456,8 @@ export async function buildDocument( plainHTML = $.html(); } - // Apply syntax highlighting all
 tags.
-  syntaxHighlight($, doc);
+  // Add headers to all 
 tags with code.
+  wrapCodeExamples($);
 
   // Post process HTML so that the right elements gets tagged so they
   // *don't* get translated by tools like Google Translate.
diff --git a/build/syntax-highlight.ts b/build/syntax-highlight.ts
deleted file mode 100644
index d27a6902a60c..000000000000
--- a/build/syntax-highlight.ts
+++ /dev/null
@@ -1,139 +0,0 @@
-import Prism from "prismjs";
-import loadLanguages from "prismjs/components/index.js";
-import "prism-svelte";
-import * as cheerio from "cheerio";
-import { createHmac } from "node:crypto";
-import { SAMPLE_SIGN_KEY } from "../libs/env/index.js";
-
-const lazy = (creator) => {
-  let res;
-  let processed = false;
-  return (...args) => {
-    if (processed) return res;
-    res = creator.apply(this, args);
-    processed = true;
-    return res;
-  };
-};
-
-const loadAllLanguages = lazy(() => {
-  // Some languages are always loaded by Prism, so we can omit them here:
-  // - Markup (atom, html, markup, mathml, rss, ssml, svg, xml)
-  // - CSS (css)
-  // - C-like (clike)
-  // - JavaScript (javascript, js)
-  loadLanguages([
-    "apacheconf",
-    "bash",
-    "batch",
-    "c",
-    "cpp",
-    "cs",
-    "diff",
-    "django",
-    "glsl",
-    "go",
-    "handlebars",
-    "http",
-    "ignore",
-    "ini",
-    "java",
-    "json",
-    "jsx",
-    "latex",
-    "less",
-    "md",
-    "nginx",
-    "php",
-    "powershell",
-    "pug",
-    "python",
-    "regex",
-    "rust",
-    "scss",
-    "sql",
-    // 'svelte', // Loaded by `prism-svelte` extension
-    "toml",
-    "tsx",
-    "typescript",
-    "uri",
-    "wasm",
-    "webidl",
-    "yaml",
-  ]);
-});
-
-// Add things to this list to help make things convenient. Sometimes
-// there are `
` whose name is not that which
-// Prism expects. It'd be hard to require that content writers
-// have to stick to the exact naming conventions that Prism uses
-// because Prism is an implementation detail.
-const ALIASES = new Map([
-  ["sh", "shell"],
-  ["vue", "markup"], // See https://github.com/PrismJS/prism/issues/1665#issuecomment-536529608
-]);
-
-// Over the years we have accumulated some weird 
 tags whose
-// brush is more or less "junk".
-// TODO: Perhaps, if you have a doc with 
 tags that matches
-// this, it should become a flaw.
-const IGNORE = new Set(["none", "text", "plain", "unix"]);
-
-/**
- * Mutate the `$` instance for by looking for 
 tags that can be
- * syntax highlighted with Prism.
- *
- */
-export function syntaxHighlight($: cheerio.CheerioAPI, doc) {
-  loadAllLanguages();
-
-  // Our content will be like this: `
` or
-  // `
` so we're technically not looking for an exact
-  // match. The wildcard would technically match `
`
-  // too. But within the loop, we do a more careful regex on the class name
-  // and only proceed if it's something sensible we can use in Prism.
-  $("pre[class*=brush]").each((_, element) => {
-    // The language is whatever string comes after the `brush(:)`
-    // portion of the class name.
-    const $pre = $(element);
-
-    const className = $pre.attr("class").toLowerCase();
-    const match = className.match(/brush:?\s*([\w_-]+)/);
-    if (!match) {
-      return;
-    }
-    let name = match[1].replace("-nolint", "");
-    if (ALIASES.has(name)) {
-      name = ALIASES.get(name);
-    }
-    if (IGNORE.has(name)) {
-      // Seems to exist a couple of these in our docs. Just bail.
-      return;
-    }
-    const code = $pre.text();
-    if (SAMPLE_SIGN_KEY) {
-      const hmac = createHmac("sha256", SAMPLE_SIGN_KEY);
-      hmac.update(name.toLowerCase());
-      hmac.update(code);
-      const signature = hmac.digest("base64");
-      $pre.attr("data-signature", signature);
-    }
-    $pre.wrapAll(`
`); - if (!$pre.hasClass("hidden")) { - $( - `
${name}
` - ).insertBefore($pre); - } - const grammar = Prism.languages[name]; - if (!grammar) { - console.warn( - `Unable to find a Prism grammar for '${name}' found in ${doc.mdn_url}` - ); - return; // bail! - } - const html = Prism.highlight(code, grammar, name); - const $code = $("").html(html); - - $pre.empty().append($code); - }); -} diff --git a/client/src/blog/post.tsx b/client/src/blog/post.tsx index 75282a72fc3d..4f817184489b 100644 --- a/client/src/blog/post.tsx +++ b/client/src/blog/post.tsx @@ -14,10 +14,7 @@ import { BlogPostLimitedMetadata, AuthorMetadata, } from "../../../libs/types/blog"; -import { - useCopyExamplesToClipboardAndAIExplain, - useRunSample, -} from "../document/hooks"; +import { useDecorateCodeExamples, useRunSample } from "../document/hooks"; import { DEFAULT_LOCALE } from "../../../libs/constants"; import { SignUpSection as NewsletterSignUp } from "../newsletter"; import { TOC } from "../document/organisms/toc"; @@ -190,7 +187,7 @@ export function BlogPost(props: HydrationData) { ); const { doc, blogMeta } = data || props || {}; useRunSample(doc); - useCopyExamplesToClipboardAndAIExplain(doc); + useDecorateCodeExamples(doc); return ( <> {doc && blogMeta && ( diff --git a/client/src/document/code/syntax-highlight.tsx b/client/src/document/code/syntax-highlight.tsx new file mode 100644 index 000000000000..d60461d0502c --- /dev/null +++ b/client/src/document/code/syntax-highlight.tsx @@ -0,0 +1,168 @@ +import Prism from "prismjs"; +import components from "prismjs/components"; +import { useMemo, useState, useEffect } from "react"; + +Prism.manual = true; + +const PRISM_LANGUAGES = components.languages as Record< + string, + { + alias?: string | string[]; + require?: string | string[]; + optional?: string | string[]; + [key: string]: any; + } +>; + +// Add things to this list to help make things convenient. Sometimes +// there are `
` whose name is not that which
+// Prism expects. It'd be hard to require that content writers
+// have to stick to the exact naming conventions that Prism uses
+// because Prism is an implementation detail.
+const ALIASES = new Map([
+  ["vue", "markup"], // See https://github.com/PrismJS/prism/issues/1665#issuecomment-536529608
+  ...Object.entries(PRISM_LANGUAGES).flatMap(([lang, config]) => {
+    if (config.alias) {
+      const aliases =
+        typeof config.alias === "string" ? [config.alias] : config.alias;
+      return aliases.map((alias) => [alias, lang] satisfies [string, string]);
+    }
+    return [];
+  }),
+]);
+
+interface HighlightedCodeProps extends React.HTMLAttributes {
+  language?: string;
+  children: React.ReactNode;
+}
+
+export function CodeWithSyntaxHighlight({
+  language,
+  children,
+  ...props
+}: HighlightedCodeProps) {
+  const initial = useMemo(
+    // needed to prevent flashing
+    () =>
+      language ? highlightStringSync(String(children), language) : undefined,
+    [children, language]
+  );
+  const [html, setHtml] = useState(initial);
+
+  useEffect(() => {
+    (async () => {
+      if (language) {
+        const highlighted = await highlightString(String(children), language);
+        setHtml(highlighted);
+      }
+    })();
+  }, [children, language]);
+
+  return html ? (
+    
+  ) : (
+    {children}
+  );
+}
+
+export async function highlightElement(element: Element, language: string) {
+  const highlighted = await highlightString(
+    element.textContent || "",
+    language
+  );
+  if (highlighted) {
+    element.innerHTML = `${highlighted}`;
+  }
+}
+
+async function highlightString(
+  text: string,
+  language: string
+): Promise {
+  const resolvedLanguage = ALIASES.get(language) || language;
+
+  try {
+    await importLanguage(resolvedLanguage);
+  } catch {
+    return;
+  }
+
+  return highlightStringSync(text, language);
+}
+
+function highlightStringSync(
+  text: string,
+  language: string
+): string | undefined {
+  const resolvedLanguage = ALIASES.get(language) || language;
+  const prismLanguage = Prism.languages[resolvedLanguage];
+  if (prismLanguage) {
+    try {
+      return Prism.highlight(text, prismLanguage, resolvedLanguage);
+    } catch {
+      console.warn("Syntax highlighting: prism error");
+    }
+  }
+  return;
+}
+
+async function importLanguage(language: string, recursiveDepth = 0) {
+  if (recursiveDepth > 100) {
+    console.warn("Syntax highlighting: recursion error");
+    throw new Error("Syntax highlighting: recursion error");
+  }
+
+  const prismLanguage = Prism.languages[language];
+
+  if (!prismLanguage) {
+    if (language === "svelte") {
+      try {
+        await import(
+          /* webpackChunkName: "prism-svelte" */
+          "prism-svelte"
+        );
+      } catch (e) {
+        console.warn(
+          `Syntax highlighting: failed to import ${language} prism language`
+        );
+        throw e;
+      }
+    } else {
+      const config = PRISM_LANGUAGES[language];
+      if (config.require) {
+        try {
+          await Promise.all(
+            (typeof config.require === "string"
+              ? [config.require]
+              : config.require
+            ).map((dependency) =>
+              importLanguage(dependency, recursiveDepth + 1)
+            )
+          );
+        } catch {
+          return;
+        }
+      }
+      if (config.optional) {
+        await Promise.allSettled(
+          (typeof config.optional === "string"
+            ? [config.optional]
+            : config.optional
+          ).map((dependency) => importLanguage(dependency, recursiveDepth + 1))
+        );
+      }
+      try {
+        await import(
+          /* webpackChunkName: "[request]" */
+          /* webpackExclude: /\.min\.js$/ */
+          `prismjs/components/prism-${language}.js`
+        );
+      } catch (e) {
+        console.warn(
+          `Syntax highlighting: failed to import ${language} prism language`
+        );
+        throw e;
+      }
+    }
+  }
+}
diff --git a/client/src/document/hooks.ts b/client/src/document/hooks.ts
index 38c9114cea10..c75aa69515a5 100644
--- a/client/src/document/hooks.ts
+++ b/client/src/document/hooks.ts
@@ -95,15 +95,11 @@ export function useRunSample(doc: Doc | undefined) {
     });
   }, [doc, isServer, locale]);
 }
-export function useCopyExamplesToClipboardAndAIExplain(doc: Doc | undefined) {
+
+export function useDecorateCodeExamples(doc: Doc | undefined) {
   const location = useLocation();
-  const isServer = useIsServer();
 
   useEffect(() => {
-    if (isServer) {
-      return;
-    }
-
     if (!doc) {
       return;
     }
@@ -122,8 +118,14 @@ export function useCopyExamplesToClipboardAndAIExplain(doc: Doc | undefined) {
         } else {
           addCopyToClipboardButton(element, header);
         }
+        import("./code/syntax-highlight").then(({ highlightElement }) => {
+          highlightElement(
+            element,
+            header?.querySelector(".language-name")?.textContent || "plain"
+          );
+        });
       });
-  }, [doc, location, isServer]);
+  }, [doc, location]);
 }
 
 /**
diff --git a/client/src/document/index.tsx b/client/src/document/index.tsx
index bc19fe391833..52ab4700bb75 100644
--- a/client/src/document/index.tsx
+++ b/client/src/document/index.tsx
@@ -6,11 +6,7 @@ import { WRITER_MODE, PLACEMENT_ENABLED } from "../env";
 import { useGA } from "../ga-context";
 import { useIsServer, useLocale } from "../hooks";
 
-import {
-  useDocumentURL,
-  useCopyExamplesToClipboardAndAIExplain,
-  useRunSample,
-} from "./hooks";
+import { useDocumentURL, useDecorateCodeExamples, useRunSample } from "./hooks";
 import { Doc } from "../../../libs/types/document";
 // Ingredients
 import { Prose } from "./ingredients/prose";
@@ -124,7 +120,7 @@ export function Document(props /* TODO: define a TS interface for this */) {
   useIncrementFrequentlyViewed(doc);
   useRunSample(doc);
   //useCollectSample(doc);
-  useCopyExamplesToClipboardAndAIExplain(doc);
+  useDecorateCodeExamples(doc);
   useInteractiveExamplesTelemetry();
 
   React.useEffect(() => {
diff --git a/client/src/plus/ai-help/index.tsx b/client/src/plus/ai-help/index.tsx
index 9f44827e36c0..d0fb7d464d80 100644
--- a/client/src/plus/ai-help/index.tsx
+++ b/client/src/plus/ai-help/index.tsx
@@ -1,4 +1,3 @@
-import Prism from "prismjs";
 import {
   Children,
   MutableRefObject,
@@ -59,6 +58,7 @@ import {
 } from "./constants";
 import InternalLink from "../../ui/atoms/internal-link";
 import { isPlusSubscriber } from "../../utils";
+import { CodeWithSyntaxHighlight } from "../../document/code/syntax-highlight";
 
 type Category = "apis" | "css" | "html" | "http" | "js" | "learn";
 
@@ -482,19 +482,15 @@ function AIHelpAssistantResponse({
               },
               code: ({ className, children, ...props }) => {
                 const match = /language-(\w+)/.exec(className || "");
-                const lang = Prism.languages[match?.[1]];
-                return lang ? (
-                  
-                ) : (
-                  
+                    {...props}
+                  >
                     {children}
-                  
+                  
                 );
               },
             }}
diff --git a/package.json b/package.json
index afa3fe85a40d..49093261accb 100644
--- a/package.json
+++ b/package.json
@@ -168,6 +168,7 @@
     "@types/js-yaml": "^4.0.9",
     "@types/mdast": "^4.0.4",
     "@types/node": "^18.19.50",
+    "@types/prismjs": "^1.26.4",
     "@types/react": "^18.3.6",
     "@types/react-dom": "^18.3.0",
     "@types/react-modal": "^3.16.3",
diff --git a/yarn.lock b/yarn.lock
index 8aa543b8c81b..7aea3c3f91b7 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -3310,6 +3310,11 @@
   resolved "https://registry.yarnpkg.com/@types/prettier/-/prettier-2.7.3.tgz#3e51a17e291d01d17d3fc61422015a933af7a08f"
   integrity sha512-+68kP9yzs4LMp7VNh8gdzMSPZFL44MLGqiHWvttYJe+6qnuVr4Ek9wSBQoveqY/r+LwjCcU29kNVkidwim+kYA==
 
+"@types/prismjs@^1.26.4":
+  version "1.26.4"
+  resolved "https://registry.yarnpkg.com/@types/prismjs/-/prismjs-1.26.4.tgz#1a9e1074619ce1d7322669e5b46fbe823925103a"
+  integrity sha512-rlAnzkW2sZOjbqZ743IHUhFcvzaGbqijwOu8QZnZCjfQzBqFE3s4lOTJEsxikImav9uzz/42I+O7YUs1mWgMlg==
+
 "@types/prop-types@*":
   version "15.7.12"
   resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.12.tgz#12bb1e2be27293c1406acb6af1c3f3a1481d98c6"