diff --git a/doc/learn/build-a-syntax-tree.md b/doc/learn/build-a-syntax-tree.md new file mode 100644 index 000000000000..48be9fc83e2e --- /dev/null +++ b/doc/learn/build-a-syntax-tree.md @@ -0,0 +1,140 @@ +--- +group: recipe +index: 8 +title: Building a content syntax tree +description: How to build content with syntax trees +tags: + - mdast + - hast + - xast + - builder + - hyperscript + - jsx + - typescript +author: Christian Murphy +authorGithub: ChristianMurphy +published: 2020-06-09 +modified: 2020-06-15 +--- + +## How to build syntax tree + +It’s often useful to build new (fragments of) syntax trees when adding or +replacing content. +It’s possible to create trees with plain object and array literals (JSON) or +programmatically with a small utility. +Finally it’s even possible to use JSX to build trees. + +### JSON + +The most basic way to create a tree is with plain object and arrays, for some +extra type safety this can be checked with the types for the given syntax tree +language, in this case MDAST: + +```ts +import type {Root} from 'mdast' + +// Note the `: Root` is a TypeScript annotation. Remove it (and the import) for plain JavaScript. +const mdast: Root = { + type: 'root', + children: [ + { + type: 'paragraph', + children: [ + { + type: 'text', + value: 'example' + } + ] + } + ] +} +``` + +#### `unist-builder` + +It’s also possible to build trees with [`unist-builder`](https://github.com/syntax-tree/unist-builder#readme). +It allows a more concise, hyperscript (similar to `React.createElement`) like +syntax: + +```ts +import {u} from 'unist-builder' + +const mdast = u('root', [ + u('paragraph', [ + u('text', 'example') + ]) +]) +``` + +#### `hastscript` + +When working with hast (HTML), [`hastscript`](https://github.com/syntax-tree/hastscript#readme) +can be used. + +```ts +import {h, s} from 'hastscript' + +console.log( + h('div#some-id.foo', [ + h('span', 'some text'), + h('input', {type: 'text', value: 'foo'}), + h('a.alpha.bravo.charlie', {download: true}, 'delta') + ]) +) + +// SVG: +console.log( + s('svg', {xmlns: 'http://www.w3.org/2000/svg', viewbox: '0 0 500 500'}, [ + s('title', 'SVG `` element'), + s('circle', {cx: 120, cy: 120, r: 100}) + ]) +) +``` + +hastscript can also be used as a JSX pragma: + +```tsx +/** @jsx h @jsxFrag null */ +import {h} from 'hastscript' + +console.log( +
+ + + +
+) +``` + +#### `xastscript` + +When working with xast (XML), [`xastscript`](https://github.com/syntax-tree/xastscript#readme) +can be used. + +```ts +import {x} from 'xastscript' + +console.log( + x('album', {id: 123}, [ + x('name', 'Exile in Guyville'), + x('artist', 'Liz Phair'), + x('releasedate', '1993-06-22') + ]) +) +``` + +xastscript can also be used as a JSX pragma: + +```tsx +/** @jsx x @jsxFrag null */ +import {x} from 'xastscript' + +console.log( + + Born in the U.S.A. + Bruce Springsteen + 1984-04-06 + +) +``` diff --git a/doc/learn/node-type-narrowing-in-typescript.md b/doc/learn/node-type-narrowing-in-typescript.md new file mode 100644 index 000000000000..987f6e7f53af --- /dev/null +++ b/doc/learn/node-type-narrowing-in-typescript.md @@ -0,0 +1,90 @@ +--- +group: recipe +index: 10 +title: Tree traversal +description: How to do tree traversal (also known as walking or visiting a tree) +tags: + - typescript + - unist + - mdast +author: Christian Murphy +authorGithub: ChristianMurphy +published: 2020-06-09 +modified: 2020-06-15 +--- + +## How to narrow generic `Node` to specific syntax types + +To work with a specific node type or a set of node types we need to +[narrow](https://www.typescriptlang.org/docs/handbook/2/narrowing.html) their +type. +For example, we can take a `Node` and perform a type safe check to get a more +specific type like a `Link`. +Unified provides a utility to help with this and there are some TypeScript +language features which can also help. +Let’s first take a look at `unist-util-is`. + +[`unist-util-is`](https://github.com/syntax-tree/unist-util-is#readme) takes a +`Node` and a `Test` and returns whether the test passes. +It can be used as a [TypeScript type predicate](https://www.typescriptlang.org/docs/handbook/2/narrowing.html#using-type-predicates) +which when used as a condition (such as in an if-statement) tells TypeScript +to narrow a node. + +For example: + +```ts +import type {Node, Literal} from 'unist' +import type {List, Blockquote, Strong, Emphasis, Heading} from 'mdast' +import {is, convert} from 'unist-util-is' + +// `Node` could come from a plugin, a utility, or be passed into a function +// here we hard code a Node for testing purposes +const node: Node = {type: 'example'} + +if (is(node, 'list')) { + // If we're here, node is List. + // + // 'list' is compared to node.type to make sure they match + // true means a match, false means no match + // + // tells TypeScript to ensure 'list' matches List.type + // and that if 'list' matches both node.type and List.type + // we know that node is List within this if condition. +} + +if (is(node, ['strong', 'emphasis'])) { + // If we get here, node is Strong or Emphasis + + // If we want even more specific type, we can use a discriminated union + // https://www.typescriptlang.org/docs/handbook/2/narrowing.html#discriminated-unions + if (node.type === 'emphasis') { + // If we get here, node is Emphasis + } +} + +if (is(node, {type: 'heading', depth: 1})) { + // If we get here, node is Heading + // + // Typescript checks that the properties used in the Test + // are valid attributes of + // + // It does not narrow node.depth only be 1, + // which can be done with +} + +// For advanced use cases, another predicate can be passed to `is` +if (is(node, (node: Node): node is Literal => 'value' in node)) { + // If we get here, node is one of the Literal types + // + // Here any comparison function can be used, as long as it is a predicate + // https://www.typescriptlang.org/docs/handbook/2/narrowing.html#using-type-predicates + // and as long as the predicate and generic match. + // For example here, and `is Literal` match. +} + +// Reusable predicates can also be created using any `Test` +const isBlockquote = convert
('blockquote') +if (isBlockquote(node)) { + // If we get here, node is Blockquote +} +``` diff --git a/doc/learn/syntax-trees-with-typescript.md b/doc/learn/syntax-trees-with-typescript.md new file mode 100644 index 000000000000..6ac18763bbc4 --- /dev/null +++ b/doc/learn/syntax-trees-with-typescript.md @@ -0,0 +1,277 @@ +--- +group: guide +title: Typing syntax trees with TypeScript +description: Guide that shows how to use types packages to work with syntax trees +author: Christian Murphy +authorGithub: ChristianMurphy +tags: + - typescript + - unist + - mdast + - hast + - xast +published: 2020-06-09 +modified: 2020-06-15 +--- + +## Working with syntax trees in TypeScript + +This guide will introduce you to using unist and unified with TypeScript. + +### Contents + +* [The basics](#the-basics) +* [unist](#unist) +* [mdast (markdown)](#mdast-markdown) +* [hast (HTML)](#hast-html) +* [xast (XML)](#xast-xml) +* [Summary](#summary) +* [Next steps](#next-steps) + +### The basics + +All unified syntax trees are based off [unist (**uni**versal **s**yntax **t**ree)](https://github.com/syntax-tree/unist). +The core types are available in a types only package: [`@types/unist`](https://www.npmjs.com/package/@types/unist). +The main type is `Node`. +Everything else extends it. +`Literal` and `Parent` are more practical types which also extend `Node`. + +The types provided by unist are abstract interfaces. +In many cases, you will instead use more specific interfaces depending on what +language you’re working with. +Each language supported by unified, like markdown, HTML, and XML, has its own +syntax tree standard which extends `unist`. + +Let’s take a look at these. + +### unist + +#### `Node` + +`Node` is the syntactic unit of syntax trees. +Each node extends `Node` (sometimes indirectly through `Literal` or `Parent`) +and sets `type` to a [string literal](https://www.typescriptlang.org/docs/handbook/2/everyday-types.html#literal-types). +The type field tells us what kind of content the node is. +This field uniquely identifies a kind of content (in TypeScript parlance a +[discriminated union](https://www.typescriptlang.org/docs/handbook/2/narrowing.html#discriminated-unions)). +For example in markdown (mdast) `Node` will be extended to make different things +such as a `Heading` or `Link`, which respectively use a `type` field +of `'heading'` and `'link'`. + +A node can optionally include a `Data` interface at the `data` field. +This is an object (dictionary) that stores extra metadata which is not standard +to the node but defined by the ecosystem (utilities and plugins). + +When a syntax tree is parsed from a file, it includes positional information: +a `Position` interface at the `position` field. +This describes where the node occurred in the source file. + +```ts +/** + * Syntactic units in unist syntax trees are called nodes. + */ +interface Node { + /** + * The variant of a node. + */ + type: string + + /** + * Information from the ecosystem. + */ + data?: Data | undefined + + /** + * Location of a node in a source document. + * Must not be present if a node is generated. + */ + position?: Position | undefined +} + +/** + * Information associated by the ecosystem with the node. + * Space is guaranteed to never be specified by unist or specifications + * implementing unist. + */ +export interface Data { + [key: string]: unknown +} + +/** + * Location of a node in a source file. + */ +export interface Position { + /** + * Place of the first character of the parsed source region. + */ + start: Point + + /** + * Place of the first character after the parsed source region. + */ + end: Point + + /** + * Start column at each index (plus start line) in the source region, + * for elements that span multiple lines. + */ + indent?: number[] | undefined +} + +``` + +#### `Literal` + +`Literal` extends `Node` and adds a `value` property. +For example a markdown `Code` node extends `Literal` and sets `value` to be a `string`. + +```ts +/** + * Nodes containing a value. + */ +export interface Literal extends Node { + value: unknown +} +``` + +#### `Parent` + +`Parent` extends `Node` and adds `children`. +Children represent other content which is inside or a part of this node. + +```ts +/** + * Nodes containing other nodes. + */ +export interface Parent extends Node { + /** + * List representing the children of a node. + */ + children: Node[]; +} +``` + +#### Pulling unist into a project + +Install: + +```bash +npm install --save-dev @types/unist +``` + +To import the types into a TypeScript file, use: + +```ts +import type {Node, Literal, Parent} from 'unist' +``` + +or into a [JSDoc TypeScript](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html) project with: + +```js +/** + * @typedef {import('unist').Node} Node + * @typedef {import('unist').Literal} Literal + * @typedef {import('unist').Parent} Parent + */ +``` + +### mdast (markdown) + +[mdast (**m**arkdown **a**bstract **s**yntax **t**ree)](https://github.com/syntax-tree/mdast#readme) +extends unist with types specific for markdown such as `Heading`, `Code`, +`Link`, and many more. +A full list of nodes can be found in the [specification](https://github.com/syntax-tree/mdast#readme). +The types are available in a types only package: [`@types/mdast`](https://www.npmjs.com/package/@types/mdast). + +Install: + +```bash +npm install --save-dev @types/mdast +``` + +To import the types into a TypeScript file, use: + +```ts +import type {Heading, Code, Link} from 'mdast' +``` + +To import the types in [JSDoc TypeScript](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html), use: + +```js +/** + * @typedef {import('mdast').Heading} Heading + * @typedef {import('mdast').Code} Code + * @typedef {import('mdast').Link} Link + */ +``` + +### hast (HTML) + +[hast (**h**ypertext **a**bstract **s**yntax **t**ree)](https://github.com/syntax-tree/hast#readme) +extends unist with types specific for HTML such as `Element`, `Comment`, +`DocType`, and many more. +A full list of nodes can be found in the [specification](https://github.com/syntax-tree/hast#readme). +The types are available in a types only package: [`@types/hast`](https://www.npmjs.com/package/@types/hast). + +Install: + +```bash +npm install --save-dev @types/hast +``` + +To import the types into a TypeScript file, use: + +```ts +import type {Element, Comment, DocType} from 'hast' +``` + +To import the types in [JSDoc TypeScript](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html), use: + +```js +/** + * @typedef {import('hast').Element} Element + * @typedef {import('hast').Comment} Comment + * @typedef {import('hast').DocType} DocType + */ +``` + +### xast (XML) + +[xast (e**x**tensible **a**bstract **s**yntax **t**ree)](https://github.com/syntax-tree/xast#readme) +extends unist with types specific for HTML such as `Element`, `CData`, +`Instruction`, and many more. +A full list of nodes can be found in the [specification](https://github.com/syntax-tree/xast#readme). +The types are available in a types only package: [`@types/xast`](https://www.npmjs.com/package/@types/xast). + +Install: + +```bash +npm install --save-dev @types/xast +``` + +To import the types into a TypeScript file, use: + +```ts +import type {Element, CData, Instruction} from 'xast' +``` + +To import the types in [JSDoc TypeScript](https://www.typescriptlang.org/docs/handbook/intro-to-js-ts.html), use: + +```js +/** + * @typedef {import('xast').Element} Element + * @typedef {import('xast').CData} CData + * @typedef {import('xast').Instruction} Instruction + */ +``` + +### Summary + +* Unified provides types for each language’s syntax tree +* These types can be import into TypeScript projects and into JSDoc projects + +### Next steps + +* [Learn to traverse syntax trees with TypeScript](/learn/recipe/tree-traversal-typescript/) +* [Learn to narrow `Node` to a more specific type with TypeScript](/learn/recipe/node-type-narrowing-in-typescript/) +* [Learn to build content with syntax trees in TypeScript](/learn/recipe/build-a-syntax-tree/) diff --git a/doc/learn/tree-traversal-typescript.md b/doc/learn/tree-traversal-typescript.md new file mode 100644 index 000000000000..bce3aab761e3 --- /dev/null +++ b/doc/learn/tree-traversal-typescript.md @@ -0,0 +1,129 @@ +--- +group: recipe +index: 9 +title: Tree traversal with TypeScript +description: How to do tree traversal (also known as walking or visiting a tree) +tags: + - unist + - tree + - traverse + - walk + - visit +author: Christian Murphy +authorGithub: ChristianMurphy +published: 2020-06-09 +modified: 2020-06-11 +--- + +## How to traverse a syntax tree + +:notebook: please read the [introduction to tree traversal in JavaScript](./tree-traversal/) +before reading this section. + +A frequent task when working with unified is to traverse trees to find certain +nodes and then doing something with them (often validating or transforming +them). +There are several type-safe utilities provided by unified to help with this. + +### `unist-util-visit` + +[`unist-util-visit`](https://github.com/syntax-tree/unist-util-visit#readme) +takes a syntax tree, a `Test`, and a callback. +The callback is called for each node in the tree that passes `Test`. + +For example if we want to increasing the heading level of all headings in a +markdown document: + +```ts +import remark from 'remark' +import type {Node} from 'unist' +import type {Heading} from 'mdast' +import {visit} from 'unist-util-visit' + +const markdownFile = await remark() + .use(() => (mdast: Node) => { + visit( + mdast, + // Check that the Node is a heading: + 'heading', + (node: Heading) => { + node.depth += 1 + } + ) + }) + .process('## Hello, *World*!') + +console.log(markdownFile.toString()) +``` + +Or if we want to make all ordered lists in a markdown document unordered: + +```ts +import remark from 'remark' +import type {Node} from 'unist' +import type {List} from 'mdast' +import {visit} from 'unist-util-visit' + +const markdownFile = await remark() + .use(() => (mdast: Node) => { + visit( + mdast, + // Check that the Node is a list and that it is ordered: + {type: 'list', ordered: true}, + (node: List) => { + node.ordered = false + } + ) + }) + .process('1. list item') + +console.log(markdownFile.toString()) +``` + +### `unist-util-visit-parents` + +Sometimes it’s needed to know the ancestors of a node (all its parents). +[`unist-util-visit-parents`](https://github.com/syntax-tree/unist-util-visit-parents) +is like `unist-util-visit` but includes a list of all parent nodes. + +For example if we want to check if all markdown `ListItem` are inside a `List` +we could: + +```ts +import remark from 'remark' +import type {Node, Parent} from 'unist' +import type {ListItem} from 'mdast' +import {visitParents} from 'unist-util-visit-parents' + +remark() + .use(() => (mdast: Node) => { + visitParents(mdast, 'listItem', (listItem: ListItem, parents: Parent[]) => { + if (!parents.some((parent) => parent.type === 'list')) { + console.warn('listItem is outside a list') + } + }) + }) + .process('1. list item') +``` + +### `unist-util-select` + +Sometimes CSS selectors are easier to read than several (nested) if/else +statements. +[`unist-util-select`](https://github.com/syntax-tree/unist-util-select) lets +you do that. +For example if we want to find all `Paragraph`s that are somewhere in a +`Blockquote`, we could: + +```ts +import remark from 'remark' +import type {Node} from 'unist' +import {selectAll} from 'unist-util-select' + +remark() + .use(() => (mdast: Node) => { + const matches = selectAll('blockquote paragraph', mdast) + console.log(matches) + }) + .process('1. list item') +```