Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
rowanc1 committed Jan 27, 2025
1 parent fad8f67 commit 8a8a210
Show file tree
Hide file tree
Showing 13 changed files with 529 additions and 91 deletions.
58 changes: 58 additions & 0 deletions packages/markdown-it-myst/src/inlineAttributes.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// parseRoleHeader.spec.ts
import { describe, expect, test } from 'vitest';
import { inlineOptionsToTokens, tokenizeInlineAttributes } from './inlineAttributes';

describe('parseRoleHeader', () => {
// Good (valid) test cases
test.each([
['simple', [{ kind: 'bare', value: 'simple' }]],
[
'someRole .cls1 .cls2',
[
{ kind: 'bare', value: 'someRole' },
{ kind: 'class', value: 'cls1' },
{ kind: 'class', value: 'cls2' },
],
],
[
'myRole #foo',
[
{ kind: 'bare', value: 'myRole' },
{ kind: 'id', value: 'foo' },
],
],
[
'myRole .red #xyz attr="value"',
[
{ kind: 'bare', value: 'myRole' },
{ kind: 'class', value: 'red' },
{ kind: 'id', value: 'xyz' },
{ kind: 'attr', key: 'attr', value: 'value' },
],
],
[
'roleName data="some \\"escaped\\" text"',
[
{ kind: 'bare', value: 'roleName' },
{ kind: 'attr', key: 'data', value: 'some "escaped" text' },
],
],
])('parses valid header: %s', (header, expected) => {
const result = tokenizeInlineAttributes(header);
expect(result).toEqual(expected);
});

// Error test cases
test.each([
['Missing name', '.classOnly', 'Missing mandatory role name as the first token'],
[
'Extra bare token after name',
'myRole anotherWord',
'No additional bare tokens allowed after the first token',
],
['Multiple IDs', 'myRole #first #second', 'Cannot have more than one ID defined'],
['ID starts with a digit', 'myRole #1bad', 'ID cannot start with a number: "1bad"'],
])('throws error: %s', (_, header, expectedMessage) => {
expect(() => inlineOptionsToTokens(header, 0, null as any)).toThrow(expectedMessage);
});
});
114 changes: 114 additions & 0 deletions packages/markdown-it-myst/src/inlineAttributes.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
import type StateCore from 'markdown-it/lib/rules_core/state_core.js';
import { nestedPartToTokens } from './nestedParse.js';
import type Token from 'markdown-it/lib/token.js';

export type InlineAttributes = {
name: string;
id?: string;
classes?: string[];
attrs?: Record<string, string>;
};

/**
* Tokenizes the inline-attributes header into:
* - `.className` => { kind: 'class', value: string }
* - `#something` => { kind: 'id', value: string } (relaxed to match digits too)
* - `key="someValue"` => { kind: 'attr', key, value }
* - leftover / bare => { kind: 'bare', value }
*/
export function tokenizeInlineAttributes(header: string) {
// This pattern uses four alternations:
// 1) (\.[A-Za-z0-9_-]+) => matches `.className`
// 2) (#[A-Za-z0-9_:.~-]+) => matches `#id` (relaxed to allow digits)
// 3) ([a-zA-Z0-9_:.-]+)="((?:\\.|[^\\"])*)" => matches key="value" with possible escapes
// 4) ([^\s]+) => matches leftover / bare tokens
const pattern =
/(\.[A-Za-z0-9_-]+)|(#[A-Za-z0-9_:.~-]+)|([a-zA-Z0-9_:.-]+)="((?:\\.|[^\\"])*)"|([^\s]+)/g;

const results: Array<
| { kind: 'class'; value: string }
| { kind: 'id'; value: string }
| { kind: 'attr'; key: string; value: string }
| { kind: 'bare'; value: string }
> = [];

let match;
while ((match = pattern.exec(header)) !== null) {
const [, classGroup, idGroup, attrKey, attrVal, bareGroup] = match;

if (classGroup) {
results.push({ kind: 'class', value: classGroup.slice(1) });
} else if (idGroup) {
results.push({ kind: 'id', value: idGroup.slice(1) });
} else if (attrKey && attrVal !== undefined) {
// unescape any \" within the attribute value
const unescaped = attrVal.replace(/\\"/g, '"');
results.push({ kind: 'attr', key: attrKey, value: unescaped });
} else if (bareGroup) {
results.push({ kind: 'bare', value: bareGroup });
}
}

return results;
}

export function inlineOptionsToTokens(
header: string,
lineNumber: number,
state: StateCore,
): { name: string; tokens: Token[] } {
let name = '';
// 1) Tokenize
const tokens = tokenizeInlineAttributes(header);

// 2) The first token must be a “bare” token => the role name
if (tokens.length === 0 || tokens[0].kind !== 'bare') {
throw new Error('Missing mandatory role name as the first token');
}
name = tokens[0].value;
tokens.shift();

if (tokens.filter(({ kind }) => kind === 'id').length > 1) {
// TODO: change this to a warning and take the last ID
throw new Error('Cannot have more than one ID defined');
}
if (tokens.some(({ kind }) => kind === 'bare')) {
// TODO: Choose to open this up to boolean attributes
throw new Error('No additional bare tokens allowed after the first token');
}

const markdownItTokens = tokens.map((opt) => {
if (opt.kind === 'id' && /^[0-9]/.test(opt.value)) {
throw new Error(`ID cannot start with a number: "${opt.value}"`);
}
if (opt.kind === 'class' || opt.kind === 'id' || opt.kind === 'bare') {
const classTokens = [
new state.Token('myst_option_open', '', 1),
new state.Token('myst_option_close', '', -1),
];
classTokens[0].info = opt.kind;
classTokens[0].content =
opt.kind === 'class' ? `.${opt.value}` : opt.kind === 'id' ? `#${opt.value}` : opt.value;
classTokens[0].meta = { location: 'inline', ...opt };
return classTokens;
}

// lineNumber mapping assumes each option is only one line;
// not necessarily true for yaml options.
const optTokens = nestedPartToTokens(
'myst_option',
opt.value,
lineNumber,
state,
'run_roles',
true,
);
if (optTokens.length) {
optTokens[0].info = opt.key;
optTokens[0].content = opt.value;
optTokens[0].meta = { location: 'inline', ...opt };
}
return optTokens;
});
return { name, tokens: markdownItTokens.flat() };
}
2 changes: 1 addition & 1 deletion packages/markdown-it-myst/src/nestedParse.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export function nestedPartToTokens(
state: StateCore,
pluginRuleName: string,
inline: boolean,
) {
): Token[] {
if (!part) return [];
const openToken = new state.Token(`${partName}_open`, '', 1);
openToken.content = part;
Expand Down
60 changes: 37 additions & 23 deletions packages/markdown-it-myst/src/roles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import type MarkdownIt from 'markdown-it/lib';
import type StateCore from 'markdown-it/lib/rules_core/state_core.js';
import type StateInline from 'markdown-it/lib/rules_inline/state_inline.js';
import { nestedPartToTokens } from './nestedParse.js';
import { inlineOptionsToTokens } from './inlineAttributes.js';

export function rolePlugin(md: MarkdownIt): void {
md.inline.ruler.before('backticks', 'parse_roles', roleRule);
Expand All @@ -21,39 +22,46 @@ export function rolePlugin(md: MarkdownIt): void {
};
}

// This captures everything between { and }, then captures backticks + body
// We keep negative lookahead/lookbehind for avoiding triple-backtick or edge issues
let _x: RegExp;
try {
// This pattern has three capturing groups:
// 1) Everything inside { }, e.g. `something .class #id attr="value"`
// 2) The sequence of backticks
// 3) The actual content between those backticks
_x = new RegExp('^\\{\\s*([^}]+?)\\s*\\}(`+)(?!`)(.+?)(?<!`)\\2(?!`)');
} catch (err) {
// Safari does not support negative look-behinds
// This is a slightly down-graded variant, as it does not require a space.
_x = /^\{\s*([^}]+?)\s*\}(`+)(?!`)(.+?)\2(?!`)/;
}

const ROLE_PATTERN = _x;

function roleRule(state: StateInline, silent: boolean): boolean {
// Check if the role is escaped
if (state.src.charCodeAt(state.pos - 1) === 0x5c) {
/* \ */
// TODO: this could be improved in the case of edge case '\\{', also multi-line
if (state.src.charCodeAt(state.pos - 1) === 0x5c /* '\' */) {
return false;
}

const match = ROLE_PATTERN.exec(state.src.slice(state.pos));
if (match == null) return false;
const [str, name, , content] = match;

// match[1] = everything inside the braces
// match[2] = sequence of backticks
// match[4] = content inside those backticks
const [str, header, , content] = match;
if (!silent) {
const token = state.push('role', '', 0);
token.meta = { name };
token.info = header;
token.content = content;
(token as any).col = [state.pos, state.pos + str.length];
}
state.pos += str.length;
return true;
}

// MyST role syntax format e.g. {role}`text`
// TODO: support role with no value e.g. {role}``
let _x: RegExp;
try {
// This regex must be defined like this or Safari will crash
_x = new RegExp('^\\{\\s*([a-zA-Z_\\-+:]{1,36})\\s*\\}(`+)(?!`)(.+?)(?<!`)\\2(?!`)');
} catch (error) {
// Safari does not support negative look-behinds
// This is a slightly down-graded variant, as it does not require a space.
_x = /^\{\s*([a-zA-Z_\-+:]{1,36})\s*\}(`+)(?!`)(.+?)\2(?!`)/;
}
const ROLE_PATTERN = _x;

/** Run all roles, replacing the original token */
function runRoles(state: StateCore): boolean {
for (const token of state.tokens) {
Expand All @@ -64,25 +72,31 @@ function runRoles(state: StateCore): boolean {
try {
const { map } = token;
const { content, col } = child as any;
const { name, tokens: optTokens } = inlineOptionsToTokens(
child.info,
map?.[0] ?? 0,
state,
);
const roleOpen = new state.Token('parsed_role_open', '', 1);
roleOpen.content = content;
roleOpen.hidden = true;
roleOpen.info = child.meta.name;
roleOpen.info = name;
roleOpen.meta = { header: child.info };
roleOpen.block = false;
roleOpen.map = map;
(roleOpen as any).col = col;
const contentTokens = roleContentToTokens(content, map ? map[0] : 0, state);
const contentTokens = roleContentToTokens(content, map?.[0] ?? 0, state);
const roleClose = new state.Token('parsed_role_close', '', -1);
roleClose.block = false;
roleClose.hidden = true;
roleOpen.info = child.meta.name;
const newTokens = [roleOpen, ...contentTokens, roleClose];
roleOpen.info = name;
const newTokens = [roleOpen, ...optTokens, ...contentTokens, roleClose];
childTokens.push(...newTokens);
} catch (err) {
const errorToken = new state.Token('role_error', '', 0);
errorToken.content = child.content;
errorToken.info = child.info;
errorToken.meta = child.meta;
errorToken.meta = child.meta ?? {};
errorToken.map = child.map;
errorToken.meta.error_message = (err as Error).message;
errorToken.meta.error_name = (err as Error).name;
Expand Down
47 changes: 46 additions & 1 deletion packages/markdown-it-myst/tests/roles.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,51 @@ describe('parses roles', () => {
const mdit = MarkdownIt().use(plugin);
const tokens = mdit.parse('{ ab c }`hello`', {});
expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']);
expect(tokens[1].children?.map((t) => t.type)).toEqual(['text', 'code_inline']);
expect(tokens[1].children?.map((t) => t.type)).toEqual(['role_error']);
});
it('inline role has attributes', () => {
const mdit = MarkdownIt().use(plugin);
const tokens = mdit.parse('{ab .c #my-id something="_blah_"}`hello`', {});
expect(tokens.map((t) => t.type)).toEqual(['paragraph_open', 'inline', 'paragraph_close']);
expect(tokens[1].children?.map((t) => t.type)).toEqual([
'parsed_role_open',
'myst_option_open',
'myst_option_close',
'myst_option_open',
'myst_option_close',
'myst_option_open',
'inline',
'myst_option_close',
'role_body_open',
'inline',
'role_body_close',
'parsed_role_close',
]);
const role = tokens[1];
expect(role.children?.[0].content).toBe('hello');
expect(role.children?.[0].info).toBe('ab');
expect(role.children?.[0].meta.header).toBe('ab .c #my-id something="_blah_"');
// Classes
expect(role.children?.[1].info).toBe('class');
expect(role.children?.[1].content).toBe('.c');
expect(role.children?.[1].meta).toEqual({ location: 'inline', kind: 'class', value: 'c' });
// IDs
expect(role.children?.[3].info).toBe('id');
expect(role.children?.[3].content).toBe('#my-id');
expect(role.children?.[3].meta).toEqual({ location: 'inline', kind: 'id', value: 'my-id' });
// Attributes
expect(role.children?.[5].info).toBe('something');
expect(role.children?.[5].content).toBe('_blah_');
expect(role.children?.[5].meta).toEqual({
location: 'inline',
kind: 'attr',
key: 'something',
value: '_blah_',
});
// Inline parse
expect(role.children?.[6].info).toBe('');
expect(role.children?.[6].content).toBe('_blah_');
expect(role.children?.[6].children?.length).toBe(3);
expect(role.children?.[6].children?.[0].tag).toBe('em');
});
});
2 changes: 2 additions & 0 deletions packages/myst-common/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ export type RoleData = {
name: string;
node: Role;
body?: ParseTypes;
options?: Record<string, ParseTypes>;
};

export type DirectiveContext = {
Expand All @@ -106,6 +107,7 @@ export type RoleSpec = {
name: string;
alias?: string[];
doc?: string;
options?: Record<string, OptionDefinition>;
body?: BodyDefinition;
validate?: (data: RoleData, vfile: VFile) => RoleData;
run: (data: RoleData, vfile: VFile) => GenericNode[];
Expand Down
2 changes: 1 addition & 1 deletion packages/myst-parser/src/directives.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import type {
import { RuleId, fileError, fileWarn } from 'myst-common';
import { selectAll } from 'unist-util-select';
import type { VFile } from 'vfile';
import { contentFromNode } from './roles.js';
import { contentFromNode } from './utils.js';
import type { Directive } from 'myst-spec';

type MystDirectiveNode = GenericNode & {
Expand Down
2 changes: 2 additions & 0 deletions packages/myst-parser/src/fromMarkdown.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ const UNHIDDEN_TOKENS = new Set([
'directive_option_close',
'directive_body_open',
'directive_body_close',
'myst_option_open',
'myst_option_close',
'parsed_role_open',
'parsed_role_close',
'role_body_open',
Expand Down
Loading

0 comments on commit 8a8a210

Please sign in to comment.