Skip to content

Commit

Permalink
Refactor tag regex (#429)
Browse files Browse the repository at this point in the history
  • Loading branch information
mishig25 committed Oct 19, 2023
1 parent 0ebb485 commit f4ad3d7
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 32 deletions.
28 changes: 14 additions & 14 deletions kit/preprocessors/docstring.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
import domUtils from "domutils";
import htmlparser2 from "htmlparser2";
import { replaceAsync, renderSvelteChars } from "./utils.js";
import { replaceAsync, renderSvelteChars, generateTagRegex } from "./utils.js";
import { mdsvexPreprocess } from "./mdsvex/index.js";

// Preprocessor that converts markdown into Docstring
// svelte component using mdsvexPreprocess
export const docstringPreprocess = {
markup: async ({ content, filename }) => {
const REGEX_DOCSTRING = /<docstring>(((?!<docstring>).)*)<\/docstring>/gms;
const REGEX_NAME = /<name>(((?!<name>).)*)<\/name>/ms;
const REGEX_ANCHOR = /<anchor>(((?!<anchor>).)*)<\/anchor>/ms;
const REGEX_SIGNATURE = /<parameters>(((?!<parameters>).)*)<\/parameters>/ms;
const REGEX_PARAMSDESC = /<paramsdesc>(((?!<paramsdesc>).)*)<\/paramsdesc>/ms;
const REGEX_PARAMSGROUPS = /<paramgroups>(((?!<paramgroups>).)*)<\/paramgroups>/ms;
const REGEX_RETDESC = /<retdesc>(((?!<retdesc>).)*)<\/retdesc>/ms;
const REGEX_RETTYPE = /<rettype>(((?!<rettype>).)*)<\/rettype>/ms;
const REGEX_YIELDESC = /<yieldesc>(((?!<yieldesc>).)*)<\/yieldesc>/ms;
const REGEX_YIELDTYPE = /<yieldtype>(((?!<yieldtype>).)*)<\/yieldtype>/ms;
const REGEX_RAISEDESC = /<raises>(((?!<raises>).)*)<\/raises>/ms;
const REGEX_RAISETYPE = /<raisederrors>(((?!<raisederrors>).)*)<\/raisederrors>/ms;
const REGEX_SOURCE = /<source>(((?!<source>).)*)<\/source>/ms;
const REGEX_DOCSTRING = generateTagRegex("docstring", true);
const REGEX_NAME = generateTagRegex("name");
const REGEX_ANCHOR = generateTagRegex("anchor");
const REGEX_SIGNATURE = generateTagRegex("parameters");
const REGEX_PARAMSDESC = generateTagRegex("paramsdesc");
const REGEX_PARAMSGROUPS = generateTagRegex("paramgroups");
const REGEX_RETDESC = generateTagRegex("retdesc");
const REGEX_RETTYPE = generateTagRegex("rettype");
const REGEX_YIELDESC = generateTagRegex("yieldesc");
const REGEX_YIELDTYPE = generateTagRegex("yieldtype");
const REGEX_RAISEDESC = generateTagRegex("raises");
const REGEX_RAISETYPE = generateTagRegex("raisederrors");
const REGEX_SOURCE = generateTagRegex("source");
const REGEX_TIP = /<Tip( warning={true})?>(((?!<Tip( warning={true})?>).)*)<\/Tip>/gms;
const REGEX_CHANGED =
/<(Added|Changed|Deprecated) version="([0-9.v]+)" ?\/?>((((?!<(Added|Changed|Deprecated) version="([0-9.v]+)"\/?>).)*)<\/(Added|Changed|Deprecated)>)?/gms;
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/frameworkcontent.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into FrameworkContent
// svelte component using mdsvexPreprocess
export const frameworkcontentPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<frameworkcontent>(((?!<frameworkcontent>).)*)<\/frameworkcontent>/gms;
const REGEX_PYTORCH = /<pt>(((?!<pt>).)*)<\/pt>/ms;
const REGEX_TENSORFLOW = /<tf>(((?!<tf>).)*)<\/tf>/ms;
const REGEX_JAX = /<jax>(((?!<jax>).)*)<\/jax>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("frameworkcontent", true);
const REGEX_PYTORCH = generateTagRegex("pt");
const REGEX_TENSORFLOW = generateTagRegex("tf");
const REGEX_JAX = generateTagRegex("jax");

content = await replaceAsync(content, REGEX_FRAMEWORKCONTENT, async (_, fwcontentBody) => {
const FRAMEWORKS = [
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/inferenceSnippet.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into InferenceApi
// svelte component using mdsvexPreprocess
export const inferenceSnippetPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<inferencesnippet>(((?!<inferencesnippet>).)*)<\/inferencesnippet>/gms;
const REGEX_PYTHON = /<python>(((?!<python>).)*)<\/python>/ms;
const REGEX_JS = /<js>(((?!<js>).)*)<\/js>/ms;
const REGEX_CURL = /<curl>(((?!<curl>).)*)<\/curl>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("inferencesnippet", true);
const REGEX_PYTHON = generateTagRegex("python");
const REGEX_JS = generateTagRegex("js");
const REGEX_CURL = generateTagRegex("curl");
const FRAMEWORKS = [
{ framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false },
{ framework: "js", REGEX_FW: REGEX_JS, isExist: false },
Expand Down
11 changes: 5 additions & 6 deletions kit/preprocessors/tokenizersLang.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import { replaceAsync } from "./utils.js";
import { replaceAsync, generateTagRegex } from "./utils.js";

// Preprocessor that converts markdown into TokenizersLanguageContent
// svelte component using mdsvexPreprocess
export const tokenizersLangPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<tokenizerslangcontent>(((?!<tokenizerslangcontent>).)*)<\/tokenizerslangcontent>/gms;
const REGEX_PYTHON = /<python>(((?!<python>).)*)<\/python>/ms;
const RGEX_RUST = /<rust>(((?!<rust>).)*)<\/rust>/ms;
const REGEX_NODE = /<node>(((?!<node>).)*)<\/node>/ms;
const REGEX_FRAMEWORKCONTENT = generateTagRegex("tokenizerslangcontent", true);
const REGEX_PYTHON = generateTagRegex("python");
const RGEX_RUST = generateTagRegex("rust");
const REGEX_NODE = generateTagRegex("node");
const FRAMEWORKS = [
{ framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false },
{ framework: "rust", REGEX_FW: RGEX_RUST, isExist: false },
Expand Down
14 changes: 14 additions & 0 deletions kit/preprocessors/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,17 @@ export async function replaceAsync(string, searchValue, replacer) {
export function renderSvelteChars(code) {
return code.replace(/&amp;lcub;/g, "{").replace(/&amp;lt;/g, "<");
}

/**
* Create a regex that captures html-like opening and closing tag and its contents.
* used for parsing hf custom syntax
* example: generateTagRegex("inferenceSnippet", true) -> /<inferenceSnippet>(.*?)<\/inferenceSnippet>/msg
* @param {string} tag - The name of the tag to match content within.
* @param {boolean} [global=false] - Whether to create a global pattern that matches all occurrences.
* @returns {RegExp} - The generated RegExp pattern.
*/
export function generateTagRegex(tag, global = false) {
const flags = global ? "msg" : "ms";
const pattern = new RegExp(`<${tag}>(.*?)<\\/${tag}>`, flags);
return pattern;
}

0 comments on commit f4ad3d7

Please sign in to comment.