Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor tag regex #429

Merged
merged 1 commit into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions kit/preprocessors/docstring.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import domUtils from "domutils";
import htmlparser2 from "htmlparser2";
import { replaceAsync, renderSvelteChars } from "./utils.js";
import { replaceAsync, renderSvelteChars, generateTagRegex } from "./utils.js";
import { mdsvexPreprocess } from "./mdsvex/index.js";

// Preprocessor that converts markdown into Docstring
// svelte component using mdsvexPreprocess
export const docstringPreprocess = {
markup: async ({ content, filename }) => {
const REGEX_DOCSTRING = /<docstring>(((?!<docstring>).)*)<\/docstring>/gms;
const REGEX_NAME = /<name>(((?!<name>).)*)<\/name>/ms;
const REGEX_ANCHOR = /<anchor>(((?!<anchor>).)*)<\/anchor>/ms;
const REGEX_SIGNATURE = /<parameters>(((?!<parameters>).)*)<\/parameters>/ms;
const REGEX_PARAMSDESC = /<paramsdesc>(((?!<paramsdesc>).)*)<\/paramsdesc>/ms;
const REGEX_PARAMSGROUPS = /<paramgroups>(((?!<paramgroups>).)*)<\/paramgroups>/ms;
const REGEX_RETDESC = /<retdesc>(((?!<retdesc>).)*)<\/retdesc>/ms;
const REGEX_RETTYPE = /<rettype>(((?!<rettype>).)*)<\/rettype>/ms;
const REGEX_YIELDESC = /<yieldesc>(((?!<yieldesc>).)*)<\/yieldesc>/ms;
const REGEX_YIELDTYPE = /<yieldtype>(((?!<yieldtype>).)*)<\/yieldtype>/ms;
const REGEX_RAISEDESC = /<raises>(((?!<raises>).)*)<\/raises>/ms;
const REGEX_RAISETYPE = /<raisederrors>(((?!<raisederrors>).)*)<\/raisederrors>/ms;
const REGEX_SOURCE = /<source>(((?!<source>).)*)<\/source>/ms;
const REGEX_DOCSTRING = generateTagRegex("docstring", true);
const REGEX_NAME = generateTagRegex("name");
const REGEX_ANCHOR = generateTagRegex("anchor");
const REGEX_SIGNATURE = generateTagRegex("parameters");
const REGEX_PARAMSDESC = generateTagRegex("paramsdesc");
const REGEX_PARAMSGROUPS = generateTagRegex("paramgroups");
const REGEX_RETDESC = generateTagRegex("retdesc");
const REGEX_RETTYPE = generateTagRegex("rettype");
const REGEX_YIELDESC = generateTagRegex("yieldesc");
const REGEX_YIELDTYPE = generateTagRegex("yieldtype");
const REGEX_RAISEDESC = generateTagRegex("raises");
const REGEX_RAISETYPE = generateTagRegex("raisederrors");
const REGEX_SOURCE = generateTagRegex("source");
const REGEX_TIP = /<Tip( warning={true})?>(((?!<Tip( warning={true})?>).)*)<\/Tip>/gms;
const REGEX_CHANGED =
/<(Added|Changed|Deprecated) version="([0-9.v]+)" ?\/?>((((?!<(Added|Changed|Deprecated) version="([0-9.v]+)"\/?>).)*)<\/(Added|Changed|Deprecated)>)?/gms;
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/frameworkcontent.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into FrameworkContent
// svelte component using mdsvexPreprocess
export const frameworkcontentPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<frameworkcontent>(((?!<frameworkcontent>).)*)<\/frameworkcontent>/gms;
const REGEX_PYTORCH = /<pt>(((?!<pt>).)*)<\/pt>/ms;
const REGEX_TENSORFLOW = /<tf>(((?!<tf>).)*)<\/tf>/ms;
const REGEX_JAX = /<jax>(((?!<jax>).)*)<\/jax>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("frameworkcontent", true);
const REGEX_PYTORCH = generateTagRegex("pt");
const REGEX_TENSORFLOW = generateTagRegex("tf");
const REGEX_JAX = generateTagRegex("jax");

content = await replaceAsync(content, REGEX_FRAMEWORKCONTENT, async (_, fwcontentBody) => {
const FRAMEWORKS = [
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/inferenceSnippet.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into InferenceApi
// svelte component using mdsvexPreprocess
export const inferenceSnippetPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<inferencesnippet>(((?!<inferencesnippet>).)*)<\/inferencesnippet>/gms;
const REGEX_PYTHON = /<python>(((?!<python>).)*)<\/python>/ms;
const REGEX_JS = /<js>(((?!<js>).)*)<\/js>/ms;
const REGEX_CURL = /<curl>(((?!<curl>).)*)<\/curl>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("inferencesnippet", true);
const REGEX_PYTHON = generateTagRegex("python");
const REGEX_JS = generateTagRegex("js");
const REGEX_CURL = generateTagRegex("curl");
const FRAMEWORKS = [
{ framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false },
{ framework: "js", REGEX_FW: REGEX_JS, isExist: false },
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/tokenizersLang.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into TokenizersLanguageContent
// svelte component using mdsvexPreprocess
export const tokenizersLangPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<tokenizerslangcontent>(((?!<tokenizerslangcontent>).)*)<\/tokenizerslangcontent>/gms;
const REGEX_PYTHON = /<python>(((?!<python>).)*)<\/python>/ms;
const RGEX_RUST = /<rust>(((?!<rust>).)*)<\/rust>/ms;
const REGEX_NODE = /<node>(((?!<node>).)*)<\/node>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("tokenizerslangcontent", true);
const REGEX_PYTHON = generateTagRegex("python");
const RGEX_RUST = generateTagRegex("rust");
const REGEX_NODE = generateTagRegex("node");
const FRAMEWORKS = [
{ framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false },
{ framework: "rust", REGEX_FW: RGEX_RUST, isExist: false },
Expand Down
14 changes: 14 additions & 0 deletions kit/preprocessors/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,17 @@ export async function replaceAsync(string, searchValue, replacer) {
export function renderSvelteChars(code) {
return code.replace(/&amp;lcub;/g, "{").replace(/&amp;lt;/g, "<");
}

/**
* Create a regex that captures html-like opening and closing tag and its contents.
* used for parsing hf custom syntax
* example: generateTagRegex("inferenceSnippet", true) -> /<inferenceSnippet>(.*?)<\/inferenceSnippet>/msg
* @param {string} tag - The name of the tag to match content within.
* @param {boolean} [global=false] - Whether to create a global pattern that matches all occurrences.
* @returns {RegExp} - The generated RegExp pattern.
*/
export function generateTagRegex(tag, global = false) {
const flags = global ? "msg" : "ms";
const pattern = new RegExp(`<${tag}>(.*?)<\\/${tag}>`, flags);
return pattern;
}
Loading