Skip to content

Commit

Permalink
New directory for preprocessors (#425)
Browse files Browse the repository at this point in the history
* New directory for preprocessors

* #427

* #426
  • Loading branch information
mishig25 authored Oct 19, 2023
1 parent dcc8b46 commit 0ebb485
Show file tree
Hide file tree
Showing 9 changed files with 686 additions and 668 deletions.
667 changes: 0 additions & 667 deletions kit/preprocess.js

This file was deleted.

198 changes: 198 additions & 0 deletions kit/preprocessors/docstring.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
import domUtils from "domutils";
import htmlparser2 from "htmlparser2";
import { replaceAsync, renderSvelteChars } from "./utils.js";
import { mdsvexPreprocess } from "./mdsvex/index.js";

// Preprocessor that converts markdown into Docstring
// svelte component using mdsvexPreprocess
export const docstringPreprocess = {
markup: async ({ content, filename }) => {
const REGEX_DOCSTRING = /<docstring>(((?!<docstring>).)*)<\/docstring>/gms;
const REGEX_NAME = /<name>(((?!<name>).)*)<\/name>/ms;
const REGEX_ANCHOR = /<anchor>(((?!<anchor>).)*)<\/anchor>/ms;
const REGEX_SIGNATURE = /<parameters>(((?!<parameters>).)*)<\/parameters>/ms;
const REGEX_PARAMSDESC = /<paramsdesc>(((?!<paramsdesc>).)*)<\/paramsdesc>/ms;
const REGEX_PARAMSGROUPS = /<paramgroups>(((?!<paramgroups>).)*)<\/paramgroups>/ms;
const REGEX_RETDESC = /<retdesc>(((?!<retdesc>).)*)<\/retdesc>/ms;
const REGEX_RETTYPE = /<rettype>(((?!<rettype>).)*)<\/rettype>/ms;
const REGEX_YIELDESC = /<yieldesc>(((?!<yieldesc>).)*)<\/yieldesc>/ms;
const REGEX_YIELDTYPE = /<yieldtype>(((?!<yieldtype>).)*)<\/yieldtype>/ms;
const REGEX_RAISEDESC = /<raises>(((?!<raises>).)*)<\/raises>/ms;
const REGEX_RAISETYPE = /<raisederrors>(((?!<raisederrors>).)*)<\/raisederrors>/ms;
const REGEX_SOURCE = /<source>(((?!<source>).)*)<\/source>/ms;
const REGEX_TIP = /<Tip( warning={true})?>(((?!<Tip( warning={true})?>).)*)<\/Tip>/gms;
const REGEX_CHANGED =
/<(Added|Changed|Deprecated) version="([0-9.v]+)" ?\/?>((((?!<(Added|Changed|Deprecated) version="([0-9.v]+)"\/?>).)*)<\/(Added|Changed|Deprecated)>)?/gms;
const REGEX_IS_GETSET_DESC = /<isgetsetdescriptor>/ms;

content = await replaceAsync(content, REGEX_DOCSTRING, async (_, docstringBody) => {
docstringBody = renderSvelteChars(docstringBody);

const name = docstringBody.match(REGEX_NAME)[1];
const anchor = docstringBody.match(REGEX_ANCHOR)[1];
const signature = docstringBody.match(REGEX_SIGNATURE)[1];

let svelteComponent = `<Docstring name={${JSON.stringify(
unescapeUnderscores(name)
)}} anchor={${JSON.stringify(anchor)}} parameters={${signature}} `;

if (docstringBody.match(REGEX_PARAMSDESC)) {
let content = docstringBody.match(REGEX_PARAMSDESC)[1];
// escape }} by adding void character `&zwnj;` in between
content = content.replace(/}}/g, "}&zwnj;}");
let { code } = await mdsvexPreprocess.markup({ content, filename });
// render <Tip> components that are inside parameter descriptions
code = code.replace(REGEX_TIP, (_, isWarning, tipContent) => {
const color = isWarning ? "orange" : "green";
return `<div
class="course-tip ${
color === "orange" ? "course-tip-orange" : ""
} bg-gradient-to-br dark:bg-gradient-to-r before:border-${color}-500 dark:before:border-${color}-800 from-${color}-50 dark:from-gray-900 to-white dark:to-gray-950 border border-${color}-50 text-${color}-700 dark:text-gray-400"
>
${tipContent}
</div>`;
});
// render <Added>, <Changed>, <Deprecated> components that are inside parameter descriptions
code = code.replace(REGEX_CHANGED, (_, componentType, version, __, descriptionContent) => {
const color = /Added|Changed/.test(componentType) ? "green" : "orange";
if (!descriptionContent) {
descriptionContent = "";
}
return `<div
class="course-tip ${
color === "orange" ? "course-tip-orange" : ""
} bg-gradient-to-br dark:bg-gradient-to-r before:border-${color}-500 dark:before:border-${color}-800 from-${color}-50 dark:from-gray-900 to-white dark:to-gray-950 border border-${color}-50 text-${color}-700 dark:text-gray-400"
>
<p class="font-medium">${componentType} in ${version}</p>
${descriptionContent}
</div>`;
});

const dom = htmlparser2.parseDocument(code);
const lists = domUtils.getElementsByTagName("ul", dom);
if (lists.length) {
const list = lists[0];
const result = [];
for (const childEl of list.childNodes.filter(({ type }) => type === "tag")) {
const nameEl = domUtils.getElementsByTagName("strong", childEl)[0];
const name = domUtils.innerText(nameEl);
const paramAnchor = `${anchor}.${name}`;
let description = domUtils.getInnerHTML(childEl).trim();

// strip enclosing paragraph tags <p> & </p>
if (description.startsWith("<p>")) {
description = description.slice("<p>".length);
}
if (description.endsWith("</p>")) {
description = description.slice(0, -"</p>".length);
}

result.push({ anchor: paramAnchor, description, name });
}
svelteComponent += ` parametersDescription={${JSON.stringify(result)}} `;
}
}

if (docstringBody.match(REGEX_SOURCE)) {
const source = docstringBody.match(REGEX_SOURCE)[1];
svelteComponent += ` source={${JSON.stringify(source)}} `;
}

if (docstringBody.match(REGEX_RETDESC)) {
const retDesc = docstringBody.match(REGEX_RETDESC)[1];
const { code } = await mdsvexPreprocess.markup({ content: retDesc, filename });
svelteComponent += ` returnDescription={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_RETTYPE)) {
const retType = docstringBody.match(REGEX_RETTYPE)[1];
const { code } = await mdsvexPreprocess.markup({ content: retType, filename });
svelteComponent += ` returnType={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_YIELDESC)) {
const yieldDesc = docstringBody.match(REGEX_YIELDESC)[1];
const { code } = await mdsvexPreprocess.markup({ content: yieldDesc, filename });
svelteComponent += ` returnDescription={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_YIELDTYPE)) {
const yieldType = docstringBody.match(REGEX_YIELDTYPE)[1];
const { code } = await mdsvexPreprocess.markup({ content: yieldType, filename });
svelteComponent += ` returnType={${JSON.stringify(code)}} isYield={true} `;
}

if (docstringBody.match(REGEX_RAISEDESC)) {
const raiseDesc = docstringBody.match(REGEX_RAISEDESC)[1];
const { code } = await mdsvexPreprocess.markup({ content: raiseDesc, filename });
svelteComponent += ` raiseDescription={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_RAISETYPE)) {
const raiseType = docstringBody.match(REGEX_RAISETYPE)[1];
const { code } = await mdsvexPreprocess.markup({ content: raiseType, filename });
svelteComponent += ` raiseType={${JSON.stringify(code)}} `;
}

if (docstringBody.match(REGEX_IS_GETSET_DESC)) {
svelteComponent += ` isGetSetDescriptor={true} `;
}

if (docstringBody.match(REGEX_PARAMSGROUPS)) {
const nParamGroups = parseInt(docstringBody.match(REGEX_PARAMSGROUPS)[1]);
if (nParamGroups > 0) {
const parameterGroups = [];
for (let groupId = 1; groupId <= nParamGroups; groupId++) {
const REGEX_GROUP_TITLE = new RegExp(
`<paramsdesc${groupId}title>(((?!<paramsdesc${groupId}title>).)*)</paramsdesc${groupId}title>`,
"ms"
);
const REGEX_GROUP_CONTENT = new RegExp(
`<paramsdesc${groupId}>(((?!<paramsdesc${groupId}>).)*)</paramsdesc${groupId}>`,
"ms"
);
const title = docstringBody.match(REGEX_GROUP_TITLE)[1];
const content = docstringBody.match(REGEX_GROUP_CONTENT)[1];
const { code } = await mdsvexPreprocess.markup({ content, filename });
const dom = htmlparser2.parseDocument(code);
const lists = domUtils.getElementsByTagName("ul", dom);
const result = [];
if (lists.length) {
const list = lists[0];
for (const childEl of list.childNodes.filter(({ type }) => type === "tag")) {
const nameEl = domUtils.getElementsByTagName("strong", childEl)[0];
const name = domUtils.innerText(nameEl);
const paramAnchor = `${anchor}.${name}`;
let description = domUtils.getInnerHTML(childEl).trim();

// strip enclosing paragraph tags <p> & </p>
if (description.startsWith("<p>")) {
description = description.slice("<p>".length);
}
if (description.endsWith("</p>")) {
description = description.slice(0, -"</p>".length);
}

result.push({ anchor: paramAnchor, description, name });
}
}
parameterGroups.push({ title, parametersDescription: result });
}
svelteComponent += ` parameterGroups={${JSON.stringify(parameterGroups)}} `;
}
}

svelteComponent += ` />\n`;
return svelteComponent;
});

return { code: content };
},
};

/**
* The mdx file contains unnecessarily escaped underscores in the docstring's name
*/
function unescapeUnderscores(content) {
return content.replace(/\\_/g, "_");
}
42 changes: 42 additions & 0 deletions kit/preprocessors/frameworkcontent.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import { replaceAsync } from "./utils.js";

// Preprocessor that converts markdown into FrameworkContent
// svelte component using mdsvexPreprocess
export const frameworkcontentPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<frameworkcontent>(((?!<frameworkcontent>).)*)<\/frameworkcontent>/gms;
const REGEX_PYTORCH = /<pt>(((?!<pt>).)*)<\/pt>/ms;
const REGEX_TENSORFLOW = /<tf>(((?!<tf>).)*)<\/tf>/ms;
const REGEX_JAX = /<jax>(((?!<jax>).)*)<\/jax>/ms;

content = await replaceAsync(content, REGEX_FRAMEWORKCONTENT, async (_, fwcontentBody) => {
const FRAMEWORKS = [
{ framework: "pytorch", REGEX_FW: REGEX_PYTORCH, isExist: false },
{ framework: "tensorflow", REGEX_FW: REGEX_TENSORFLOW, isExist: false },
{ framework: "jax", REGEX_FW: REGEX_JAX, isExist: false },
];

let svelteSlots = "";

for (const [i, value] of Object.entries(FRAMEWORKS)) {
const { framework, REGEX_FW } = value;
if (fwcontentBody.match(REGEX_FW)) {
FRAMEWORKS[i].isExist = true;
const fwContent = fwcontentBody.match(REGEX_FW)[1];
svelteSlots += `<svelte:fragment slot="${framework}">
<Markdown>
\n\n${fwContent}\n\n
</Markdown>
</svelte:fragment>`;
}
}

const svelteProps = FRAMEWORKS.map((fw) => `${fw.framework}={${fw.isExist}}`).join(" ");

return `\n\n<FrameworkContent ${svelteProps}>\n${svelteSlots}\n</FrameworkContent>\n\n`;
});

return { code: content };
},
};
5 changes: 5 additions & 0 deletions kit/preprocessors/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
export { frameworkcontentPreprocess } from "./frameworkcontent.js";
export { docstringPreprocess } from "./docstring.js";
export { inferenceSnippetPreprocess } from "./inferenceSnippet.js";
export { tokenizersLangPreprocess } from "./tokenizersLang.js";
export { mdsvexPreprocess } from "./mdsvex/index.js";
41 changes: 41 additions & 0 deletions kit/preprocessors/inferenceSnippet.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import { replaceAsync } from "./utils.js";

// Preprocessor that converts markdown into InferenceApi
// svelte component using mdsvexPreprocess
export const inferenceSnippetPreprocess = {
markup: async ({ content }) => {
const REGEX_FRAMEWORKCONTENT =
/<inferencesnippet>(((?!<inferencesnippet>).)*)<\/inferencesnippet>/gms;
const REGEX_PYTHON = /<python>(((?!<python>).)*)<\/python>/ms;
const REGEX_JS = /<js>(((?!<js>).)*)<\/js>/ms;
const REGEX_CURL = /<curl>(((?!<curl>).)*)<\/curl>/ms;
const FRAMEWORKS = [
{ framework: "python", REGEX_FW: REGEX_PYTHON, isExist: false },
{ framework: "js", REGEX_FW: REGEX_JS, isExist: false },
{ framework: "curl", REGEX_FW: REGEX_CURL, isExist: false },
];

content = await replaceAsync(content, REGEX_FRAMEWORKCONTENT, async (_, fwcontentBody) => {
let svelteSlots = "";

for (const [i, value] of Object.entries(FRAMEWORKS)) {
const { framework, REGEX_FW } = value;
if (fwcontentBody.match(REGEX_FW)) {
FRAMEWORKS[i].isExist = true;
const fwContent = fwcontentBody.match(REGEX_FW)[1];
svelteSlots += `<svelte:fragment slot="${framework}">
<Markdown>
\n\n${fwContent}\n\n
</Markdown>
</svelte:fragment>`;
}
}

const svelteProps = FRAMEWORKS.map((fw) => `${fw.framework}={${fw.isExist}}`).join(" ");

return `<InferenceApi ${svelteProps}>\n${svelteSlots}\n</InferenceApi>`;
});

return { code: content };
},
};
Loading

0 comments on commit 0ebb485

Please sign in to comment.