diff --git a/kit/preprocessors/docstring.js b/kit/preprocessors/docstring.js index 86904589..fe61569c 100644 --- a/kit/preprocessors/docstring.js +++ b/kit/preprocessors/docstring.js @@ -1,25 +1,25 @@ import domUtils from "domutils"; import htmlparser2 from "htmlparser2"; -import { replaceAsync, renderSvelteChars } from "./utils.js"; +import { replaceAsync, renderSvelteChars, generateTagRegex } from "./utils.js"; import { mdsvexPreprocess } from "./mdsvex/index.js"; // Preprocessor that converts markdown into Docstring // svelte component using mdsvexPreprocess export const docstringPreprocess = { markup: async ({ content, filename }) => { - const REGEX_DOCSTRING = /(((?!).)*)<\/docstring>/gms; - const REGEX_NAME = /(((?!).)*)<\/name>/ms; - const REGEX_ANCHOR = /(((?!).)*)<\/anchor>/ms; - const REGEX_SIGNATURE = /(((?!).)*)<\/parameters>/ms; - const REGEX_PARAMSDESC = /(((?!).)*)<\/paramsdesc>/ms; - const REGEX_PARAMSGROUPS = /(((?!).)*)<\/paramgroups>/ms; - const REGEX_RETDESC = /(((?!).)*)<\/retdesc>/ms; - const REGEX_RETTYPE = /(((?!).)*)<\/rettype>/ms; - const REGEX_YIELDESC = /(((?!).)*)<\/yieldesc>/ms; - const REGEX_YIELDTYPE = /(((?!).)*)<\/yieldtype>/ms; - const REGEX_RAISEDESC = /(((?!).)*)<\/raises>/ms; - const REGEX_RAISETYPE = /(((?!).)*)<\/raisederrors>/ms; - const REGEX_SOURCE = /(((?!).)*)<\/source>/ms; + const REGEX_DOCSTRING = generateTagRegex("docstring", true); + const REGEX_NAME = generateTagRegex("name"); + const REGEX_ANCHOR = generateTagRegex("anchor"); + const REGEX_SIGNATURE = generateTagRegex("parameters"); + const REGEX_PARAMSDESC = generateTagRegex("paramsdesc"); + const REGEX_PARAMSGROUPS = generateTagRegex("paramgroups"); + const REGEX_RETDESC = generateTagRegex("retdesc"); + const REGEX_RETTYPE = generateTagRegex("rettype"); + const REGEX_YIELDESC = generateTagRegex("yieldesc"); + const REGEX_YIELDTYPE = generateTagRegex("yieldtype"); + const REGEX_RAISEDESC = generateTagRegex("raises"); + const REGEX_RAISETYPE = generateTagRegex("raisederrors"); + const REGEX_SOURCE = generateTagRegex("source"); const REGEX_TIP = /(((?!).)*)<\/Tip>/gms; const REGEX_CHANGED = /<(Added|Changed|Deprecated) version="([0-9.v]+)" ?\/?>((((?!<(Added|Changed|Deprecated) version="([0-9.v]+)"\/?>).)*)<\/(Added|Changed|Deprecated)>)?/gms; diff --git a/kit/preprocessors/frameworkcontent.js b/kit/preprocessors/frameworkcontent.js index cedf6d98..1fab62a4 100644 --- a/kit/preprocessors/frameworkcontent.js +++ b/kit/preprocessors/frameworkcontent.js @@ -1,14 +1,13 @@ -import { replaceAsync } from "./utils.js"; +import { replaceAsync, generateTagRegex } from "./utils.js"; // Preprocessor that converts markdown into FrameworkContent // svelte component using mdsvexPreprocess export const frameworkcontentPreprocess = { markup: async ({ content }) => { - const REGEX_FRAMEWORKCONTENT = - /(((?!).)*)<\/frameworkcontent>/gms; - const REGEX_PYTORCH = /(((?!).)*)<\/pt>/ms; - const REGEX_TENSORFLOW = /(((?!).)*)<\/tf>/ms; - const REGEX_JAX = /(((?!).)*)<\/jax>/ms; + const REGEX_FRAMEWORKCONTENT = generateTagRegex("frameworkcontent", true); + const REGEX_PYTORCH = generateTagRegex("pt"); + const REGEX_TENSORFLOW = generateTagRegex("tf"); + const REGEX_JAX = generateTagRegex("jax"); content = await replaceAsync(content, REGEX_FRAMEWORKCONTENT, async (_, fwcontentBody) => { const FRAMEWORKS = [ diff --git a/kit/preprocessors/inferenceSnippet.js b/kit/preprocessors/inferenceSnippet.js index 5c9a92e3..44c63d8c 100644 --- a/kit/preprocessors/inferenceSnippet.js +++ b/kit/preprocessors/inferenceSnippet.js @@ -1,14 +1,13 @@ -import { replaceAsync } from "./utils.js"; +import { replaceAsync, generateTagRegex } from "./utils.js"; // Preprocessor that converts markdown into InferenceApi // svelte component using mdsvexPreprocess export const inferenceSnippetPreprocess = { markup: async ({ content }) => { - const REGEX_FRAMEWORKCONTENT = - /(((?!).)*)<\/inferencesnippet>/gms; - const REGEX_PYTHON = /(((?!).)*)<\/python>/ms; - const REGEX_JS = /(((?!).)*)<\/js>/ms; - const REGEX_CURL = /(((?!).)*)<\/curl>/ms; + const REGEX_FRAMEWORKCONTENT = generateTagRegex("inferencesnippet", true); + const REGEX_PYTHON = generateTagRegex("python"); + const REGEX_JS = generateTagRegex("js"); + const REGEX_CURL = generateTagRegex("curl"); const FRAMEWORKS = [ { framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false }, { framework: "js", REGEX_FW: REGEX_JS, isExist: false }, diff --git a/kit/preprocessors/tokenizersLang.js b/kit/preprocessors/tokenizersLang.js index fbbb8bf4..249c02c4 100644 --- a/kit/preprocessors/tokenizersLang.js +++ b/kit/preprocessors/tokenizersLang.js @@ -1,14 +1,13 @@ -import { replaceAsync } from "./utils.js"; +import { replaceAsync, generateTagRegex } from "./utils.js"; // Preprocessor that converts markdown into TokenizersLanguageContent // svelte component using mdsvexPreprocess export const tokenizersLangPreprocess = { markup: async ({ content }) => { - const REGEX_FRAMEWORKCONTENT = - /(((?!).)*)<\/tokenizerslangcontent>/gms; - const REGEX_PYTHON = /(((?!).)*)<\/python>/ms; - const RGEX_RUST = /(((?!).)*)<\/rust>/ms; - const REGEX_NODE = /(((?!).)*)<\/node>/ms; + const REGEX_FRAMEWORKCONTENT = generateTagRegex("tokenizerslangcontent", true); + const REGEX_PYTHON = generateTagRegex("python"); + const RGEX_RUST = generateTagRegex("rust"); + const REGEX_NODE = generateTagRegex("node"); const FRAMEWORKS = [ { framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false }, { framework: "rust", REGEX_FW: RGEX_RUST, isExist: false }, diff --git a/kit/preprocessors/utils.js b/kit/preprocessors/utils.js index 46bf81d8..7e700968 100644 --- a/kit/preprocessors/utils.js +++ b/kit/preprocessors/utils.js @@ -31,3 +31,17 @@ export async function replaceAsync(string, searchValue, replacer) { export function renderSvelteChars(code) { return code.replace(/&lcub;/g, "{").replace(/&lt;/g, "<"); } + +/** + * Create a regex that captures html-like opening and closing tag and its contents. + * used for parsing hf custom syntax + * example: generateTagRegex("inferenceSnippet", true) -> /(.*?)<\/inferenceSnippet>/msg + * @param {string} tag - The name of the tag to match content within. + * @param {boolean} [global=false] - Whether to create a global pattern that matches all occurrences. + * @returns {RegExp} - The generated RegExp pattern. + */ +export function generateTagRegex(tag, global = false) { + const flags = global ? "msg" : "ms"; + const pattern = new RegExp(`<${tag}>(.*?)<\\/${tag}>`, flags); + return pattern; +}