From 23a304fa4075ecfa6448aa344cbfb0d0e3a0976b Mon Sep 17 00:00:00 2001 From: Nozomu Ikuta <16436160+NozomuIkuta@users.noreply.github.com> Date: Sun, 26 Nov 2023 23:18:06 +0900 Subject: [PATCH 1/3] feat: add i18n support --- .vitepress/config.mts | 362 ++++++--- .../theme/components/AppBlogPostHeader.vue | 5 +- src/docs/guide/introduction.md | 2 +- src/index.md | 4 +- src/ja/blog/2022-02-10-js-tooling-research.md | 8 + src/ja/blog/2022-08-08-linter-research.md | 8 + src/ja/blog/2023-11-07-announcing-oxc.md | 8 + src/ja/blog/2023-11-08-announcing-oxlint.md | 8 + src/ja/blog/index.md | 68 ++ src/ja/docs/contribute/codegen.md | 6 + src/ja/docs/contribute/development.md | 67 ++ src/ja/docs/contribute/formatter.md | 9 + src/ja/docs/contribute/introduction.md | 41 + src/ja/docs/contribute/linter.md | 37 + src/ja/docs/contribute/minifier.md | 20 + src/ja/docs/contribute/parser.md | 35 + src/ja/docs/contribute/performance.md | 71 ++ src/ja/docs/contribute/prettier.md | 6 + src/ja/docs/contribute/resolver.md | 11 + src/ja/docs/contribute/showcase.md | 11 + src/ja/docs/contribute/transformer.md | 9 + src/ja/docs/contribute/vscode.md | 18 + src/ja/docs/guide/benchmarks.md | 8 + src/ja/docs/guide/introduction.md | 31 + src/ja/docs/guide/philosophy.md | 8 + src/ja/docs/guide/usage/benchmarks.md | 6 + src/ja/docs/guide/usage/linter.md | 167 ++++ src/ja/docs/guide/usage/parser.md | 72 ++ src/ja/docs/guide/usage/philosophy.md | 6 + src/ja/docs/guide/usage/resolver.md | 80 ++ src/ja/docs/learn/architecture/formatter.md | 6 + .../docs/learn/architecture/introduction.md | 6 + src/ja/docs/learn/architecture/linter.md | 8 + src/ja/docs/learn/architecture/minifier.md | 6 + src/ja/docs/learn/architecture/parser.md | 29 + src/ja/docs/learn/architecture/resolver.md | 6 + src/ja/docs/learn/architecture/transformer.md | 6 + src/ja/docs/learn/ecmascript/grammar.md | 691 +++++++++++++++++ src/ja/docs/learn/ecmascript/spec.md | 146 ++++ src/ja/docs/learn/ecosystem.md | 25 + src/ja/docs/learn/performance.md | 719 ++++++++++++++++++ src/ja/docs/learn/references.md | 50 ++ src/ja/index.md | 45 ++ 43 files changed, 2838 insertions(+), 97 deletions(-) create mode 100644 src/ja/blog/2022-02-10-js-tooling-research.md create mode 100644 src/ja/blog/2022-08-08-linter-research.md create mode 100644 src/ja/blog/2023-11-07-announcing-oxc.md create mode 100644 src/ja/blog/2023-11-08-announcing-oxlint.md create mode 100644 src/ja/blog/index.md create mode 100644 src/ja/docs/contribute/codegen.md create mode 100644 src/ja/docs/contribute/development.md create mode 100644 src/ja/docs/contribute/formatter.md create mode 100644 src/ja/docs/contribute/introduction.md create mode 100644 src/ja/docs/contribute/linter.md create mode 100644 src/ja/docs/contribute/minifier.md create mode 100644 src/ja/docs/contribute/parser.md create mode 100644 src/ja/docs/contribute/performance.md create mode 100644 src/ja/docs/contribute/prettier.md create mode 100644 src/ja/docs/contribute/resolver.md create mode 100644 src/ja/docs/contribute/showcase.md create mode 100644 src/ja/docs/contribute/transformer.md create mode 100644 src/ja/docs/contribute/vscode.md create mode 100644 src/ja/docs/guide/benchmarks.md create mode 100644 src/ja/docs/guide/introduction.md create mode 100644 src/ja/docs/guide/philosophy.md create mode 100644 src/ja/docs/guide/usage/benchmarks.md create mode 100644 src/ja/docs/guide/usage/linter.md create mode 100644 src/ja/docs/guide/usage/parser.md create mode 100644 src/ja/docs/guide/usage/philosophy.md create mode 100644 src/ja/docs/guide/usage/resolver.md create mode 100644 src/ja/docs/learn/architecture/formatter.md create mode 100644 src/ja/docs/learn/architecture/introduction.md create mode 100644 src/ja/docs/learn/architecture/linter.md create mode 100644 src/ja/docs/learn/architecture/minifier.md create mode 100644 src/ja/docs/learn/architecture/parser.md create mode 100644 src/ja/docs/learn/architecture/resolver.md create mode 100644 src/ja/docs/learn/architecture/transformer.md create mode 100644 src/ja/docs/learn/ecmascript/grammar.md create mode 100644 src/ja/docs/learn/ecmascript/spec.md create mode 100644 src/ja/docs/learn/ecosystem.md create mode 100644 src/ja/docs/learn/performance.md create mode 100644 src/ja/docs/learn/references.md create mode 100644 src/ja/index.md diff --git a/.vitepress/config.mts b/.vitepress/config.mts index b931a451da..7c6661abf6 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -1,5 +1,7 @@ +import { dirname } from "node:path"; +import { fileURLToPath } from "node:url"; import { defineConfig } from "vitepress"; -import blogSidebar from "./sidebar.blog.json"; +import BLOG_SIDEBAR from "./sidebar.blog.json"; export default defineConfig({ srcDir: "src", @@ -89,132 +91,308 @@ export default defineConfig({ themeConfig: { siteTitle: "OXC", logo: "https://raw.githubusercontent.com/oxc-project/oxc-assets/main/logo-round.png", - logoLink: "/", search: { provider: "local", }, - nav: [ - { text: "Guide", link: "/docs/guide/introduction" }, - { text: "Learn", link: "/docs/learn/architecture/introduction" }, - { text: "Contribute", link: "/docs/contribute/introduction" }, - { text: "Blog", link: "/blog/2022-02-10-js-tooling-research" }, - { - text: "Playground", - target: "_blank", - link: "https://oxc-project.github.io/oxc/playground/", - }, - ], socialLinks: [ { icon: "twitter", link: "https://x.com/boshen_c" }, { icon: "discord", link: "https://discord.gg/9uXCAwqQZW" }, { icon: "github", link: "https://github.com/oxc-project" }, ], - editLink: { - pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", - }, lastUpdated: { formatOptions: { dateStyle: "full", }, }, - footer: { - copyright: "© 2023 OXC Project", - }, - sidebar: { - "/docs/guide/": [ - { - text: "Getting Started", - items: [ - { text: "Introduction", link: "/docs/guide/introduction" }, - { text: "Benchmarks", link: "/docs/guide/benchmarks" }, - ], - }, - { - text: "Usage", - items: [ - { text: "Linter", link: "/docs/guide/usage/linter" }, - { text: "Parser", link: "/docs/guide/usage/parser" }, - { text: "Resolver", link: "/docs/guide/usage/resolver" }, - ], - }, - ], - "/docs/learn/": [ - { - text: "Architecture", - items: [ - { - text: "Introduction", - link: "/docs/learn/architecture/introduction", - }, + }, + locales: { + root: { + label: "English", + lang: "en", + themeConfig: { + nav: [ + { text: "Guide", link: "/docs/guide/introduction" }, + { text: "Learn", link: "/docs/learn/architecture/introduction" }, + { text: "Contribute", link: "/docs/contribute/introduction" }, + { text: "Blog", link: "/blog/2022-02-10-js-tooling-research" }, + { + text: "Playground", + target: "_blank", + link: "https://oxc-project.github.io/oxc/playground/", + }, + ], + sidebar: { + "/docs/guide/": [ { - text: "Parser", - link: "/docs/learn/architecture/parser", + text: "Getting Started", + items: [ + { text: "Introduction", link: "/docs/guide/introduction" }, + { text: "Benchmarks", link: "/docs/guide/benchmarks" }, + ], }, { - text: "Linter", - link: "/docs/learn/architecture/linter", + text: "Usage", + items: [ + { text: "Linter", link: "/docs/guide/usage/linter" }, + { text: "Parser", link: "/docs/guide/usage/parser" }, + { text: "Resolver", link: "/docs/guide/usage/resolver" }, + ], }, + ], + "/docs/learn/": [ { - text: "Resolver", - link: "/docs/learn/architecture/resolver", + text: "Architecture", + items: [ + { + text: "Introduction", + link: "/docs/learn/architecture/introduction", + }, + { + text: "Parser", + link: "/docs/learn/architecture/parser", + }, + { + text: "Linter", + link: "/docs/learn/architecture/linter", + }, + { + text: "Resolver", + link: "/docs/learn/architecture/resolver", + }, + { + text: "Transformer", + link: "/docs/learn/architecture/transformer", + }, + { + text: "Formatter", + link: "/docs/learn/architecture/formatter", + }, + { + text: "Minifier", + link: "/docs/learn/architecture/minifier", + }, + ], }, { - text: "Transformer", - link: "/docs/learn/architecture/transformer", + text: "ECMAScript", + items: [ + { text: "Spec", link: "/docs/learn/ecmascript/spec" }, + { + text: "Grammar", + link: "/docs/learn/ecmascript/grammar", + }, + ], }, + { text: "Performance", link: "/docs/learn/performance" }, + { text: "Ecosystem", link: "/docs/learn/ecosystem" }, + { text: "References", link: "/docs/learn/references" }, + ], + "/docs/contribute/": [ { - text: "Formatter", - link: "/docs/learn/architecture/formatter", + text: "Contributing Guide", + items: [ + { text: "Introduction", link: "/docs/contribute/introduction" }, + { text: "Development", link: "/docs/contribute/development" }, + ], }, { - text: "Minifier", - link: "/docs/learn/architecture/minifier", + text: "Domain", + items: [ + { text: "Parser", link: "/docs/contribute/parser" }, + { text: "Linter", link: "/docs/contribute/linter" }, + { text: "Prettier", link: "/docs/contribute/prettier" }, + { text: "Resolver", link: "/docs/contribute/resolver" }, + { + text: "Transformer", + link: "/docs/contribute/transformer", + }, + { text: "Formatter", link: "/docs/contribute/formatter" }, + { text: "Codegen", link: "/docs/contribute/codegen" }, + { text: "Minifier", link: "/docs/contribute/minifier" }, + { text: "VSCode", link: "/docs/contribute/vscode" }, + ], }, + { text: "Performance", link: "/docs/contribute/performance" }, + { text: "Showcase", link: "/docs/contribute/showcase" }, ], + "/blog/": BLOG_SIDEBAR, + }, + editLink: { + pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", + text: "Edit this page", + }, + footer: { + copyright: "© 2023 OXC Project", + }, + }, + }, + ja: { + label: "日本語", + lang: "ja", + themeConfig: { + search: { + provider: "local", + options: { + locales: { + ja: { + translations: { + button: { + buttonText: "検索する", + buttonAriaLabel: "検索する", + }, + modal: { + noResultsText: "見つかりませんでした", + resetButtonTitle: "リセットする", + footer: { + selectText: "選ぶ", + navigateText: "切り替える", + closeText: "閉じる", + }, + }, + }, + }, + }, + }, }, - { - text: "ECMAScript", - items: [ - { text: "Spec", link: "/docs/learn/ecmascript/spec" }, + nav: [ + { text: "ガイド", link: "/ja/docs/guide/introduction" }, + { text: "学ぶ", link: "/ja/docs/learn/architecture/introduction" }, + { text: "貢献", link: "/ja/docs/contribute/introduction" }, + { text: "ブログ", link: "/ja/blog/2022-02-10-js-tooling-research" }, + { + text: "プレイグラウンド", + target: "_blank", + link: "https://oxc-project.github.io/oxc/playground/", + }, + ], + sidebar: { + "/ja/docs/guide/": [ { - text: "Grammar", - link: "/docs/learn/ecmascript/grammar", + text: "はじめる", + items: [ + { + text: "イントロダクション", + link: "/ja/docs/guide/introduction", + }, + { text: "ベンチマーク", link: "/ja/docs/guide/benchmarks" }, + ], + }, + { + text: "使いかた", + items: [ + { text: "リンタ", link: "/ja/docs/guide/usage/linter" }, + { text: "パーサ", link: "/ja/docs/guide/usage/parser" }, + { text: "リザルバ", link: "/ja/docs/guide/usage/resolver" }, + ], }, ], - }, - { text: "Performance", link: "/docs/learn/performance" }, - { text: "Ecosystem", link: "/docs/learn/ecosystem" }, - { text: "References", link: "/docs/learn/references" }, - ], - "/docs/contribute/": [ - { - text: "Contributing Guide", - items: [ - { text: "Introduction", link: "/docs/contribute/introduction" }, - { text: "Development", link: "/docs/contribute/development" }, + "/ja/docs/learn/": [ + { + text: "アーキテクチャ", + items: [ + { + text: "イントロダクション", + link: "/ja/docs/learn/architecture/introduction", + }, + { + text: "パーサ", + link: "/ja/docs/learn/architecture/parser", + }, + { + text: "リンタ", + link: "/ja/docs/learn/architecture/linter", + }, + { + text: "リザルバ", + link: "/ja/docs/learn/architecture/resolver", + }, + { + text: "トランスフォーマ", + link: "/ja/docs/learn/architecture/transformer", + }, + { + text: "フォーマッタ", + link: "/ja/docs/learn/architecture/formatter", + }, + { + text: "モディファイア", + link: "/ja/docs/learn/architecture/minifier", + }, + ], + }, + { + text: "ECMAScript", + items: [ + { text: "仕様", link: "/ja/docs/learn/ecmascript/spec" }, + { + text: "文法", + link: "/ja/docs/learn/ecmascript/grammar", + }, + ], + }, + { text: "パフォーマンス", link: "/ja/docs/learn/performance" }, + { text: "エコシステム", link: "/ja/docs/learn/ecosystem" }, + { text: "参考文献", link: "/ja/docs/learn/references" }, ], - }, - { - text: "Domain", - items: [ - { text: "Parser", link: "/docs/contribute/parser" }, - { text: "Linter", link: "/docs/contribute/linter" }, - { text: "Prettier", link: "/docs/contribute/prettier" }, - { text: "Resolver", link: "/docs/contribute/resolver" }, + "/ja/docs/contribute/": [ + { + text: "Contributing Guide", + items: [ + { + text: "Introduction", + link: "/ja/docs/contribute/introduction", + }, + { + text: "Development", + link: "/ja/docs/contribute/development", + }, + ], + }, { - text: "Transformer", - link: "/docs/contribute/transformer", + text: "Domain", + items: [ + { text: "Parser", link: "/ja/docs/contribute/parser" }, + { text: "Linter", link: "/ja/docs/contribute/linter" }, + { text: "Prettier", link: "/ja/docs/contribute/prettier" }, + { text: "Resolver", link: "/ja/docs/contribute/resolver" }, + { + text: "Transformer", + link: "/ja/docs/contribute/transformer", + }, + { text: "Formatter", link: "/ja/docs/contribute/formatter" }, + { text: "Codegen", link: "/ja/docs/contribute/codegen" }, + { text: "Minifier", link: "/ja/docs/contribute/minifier" }, + { text: "VSCode", link: "/ja/docs/contribute/vscode" }, + ], }, - { text: "Formatter", link: "/docs/contribute/formatter" }, - { text: "Codegen", link: "/docs/contribute/codegen" }, - { text: "Minifier", link: "/docs/contribute/minifier" }, - { text: "VSCode", link: "/docs/contribute/vscode" }, + { text: "Performance", link: "/ja/docs/contribute/performance" }, + { text: "Showcase", link: "/ja/docs/contribute/showcase" }, ], + "/ja/blog/": BLOG_SIDEBAR.map(({ text, link }) => ({ + text, + link: `/ja${link}`, + })), + }, + editLink: { + pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", + text: "このページを編集する", + }, + footer: { + copyright: "© 2023 OXC プロジェクト", }, - { text: "Performance", link: "/docs/contribute/performance" }, - { text: "Showcase", link: "/docs/contribute/showcase" }, - ], - "/blog/": blogSidebar, + }, + }, + }, + vite: { + resolve: { + alias: { + "@components": `${dirname( + fileURLToPath(import.meta.url), + )}/theme/components`, + "@constants": `${dirname( + fileURLToPath(import.meta.url), + )}/theme/constants`, + }, }, }, }); diff --git a/.vitepress/theme/components/AppBlogPostHeader.vue b/.vitepress/theme/components/AppBlogPostHeader.vue index df59141cb2..174f6edf2e 100644 --- a/.vitepress/theme/components/AppBlogPostHeader.vue +++ b/.vitepress/theme/components/AppBlogPostHeader.vue @@ -6,6 +6,7 @@ import type { TeamMember } from '../constants/team' const vitePressData = useData() const title = computed(() => vitePressData.frontmatter.value.title) +const lang = computed(() => vitePressData.lang.value) const authors = computed(() => (vitePressData.frontmatter.value.authors as TeamMember['id'][]).flatMap((id) => { const member = TEAM_MEMBERS_MAP[id] @@ -20,7 +21,7 @@ const authors = computed(() => (vitePressData.frontmatter.value.authors as TeamM })) const date = computed(() => { const filePath = vitePressData.page.value.filePath - const result = filePath.match(/^blog\/(?\d{4}-\d{2}-\d{2})-.*$/) + const result = filePath.match(/blog\/(?\d{4}-\d{2}-\d{2})-.*$/) const { date } = result?.groups ?? {} if (date) { @@ -36,7 +37,7 @@ const datetime = ref('') onMounted(() => { watchEffect(() => { if (date.value) { - datetime.value = new Intl.DateTimeFormat(vitePressData.lang.value, { dateStyle: 'long' }).format(date.value) + datetime.value = new Intl.DateTimeFormat(lang.value, { dateStyle: 'long' }).format(date.value) } }) }) diff --git a/src/docs/guide/introduction.md b/src/docs/guide/introduction.md index 9b9c15bd6d..c20a80175f 100644 --- a/src/docs/guide/introduction.md +++ b/src/docs/guide/introduction.md @@ -5,7 +5,7 @@ outline: deep # What is OXC? diff --git a/src/index.md b/src/index.md index a8f8a3cabd..b2828c8bc5 100644 --- a/src/index.md +++ b/src/index.md @@ -15,11 +15,11 @@ hero: link: https://github.com/oxc-project/oxc features: - title: Parser ✅ - details: 2x faster than swc + details: 2x faster than SWC link: /docs/guide/usage/parser linkText: Learn more - title: Linter ✅ - details: 50 - 100x faster than ESLint + details: 50~100x faster than ESLint link: /docs/guide/usage/linter linkText: Learn more - title: Resolver ✅ diff --git a/src/ja/blog/2022-02-10-js-tooling-research.md b/src/ja/blog/2022-02-10-js-tooling-research.md new file mode 100644 index 0000000000..29943d1fcd --- /dev/null +++ b/src/ja/blog/2022-02-10-js-tooling-research.md @@ -0,0 +1,8 @@ +--- +title: High Performance JavaScript Toolchain +outline: deep +authors: + - boshen +--- + + diff --git a/src/ja/blog/2022-08-08-linter-research.md b/src/ja/blog/2022-08-08-linter-research.md new file mode 100644 index 0000000000..d4f872c532 --- /dev/null +++ b/src/ja/blog/2022-08-08-linter-research.md @@ -0,0 +1,8 @@ +--- +title: A research on JavaScript linters +outline: deep +authors: + - boshen +--- + + diff --git a/src/ja/blog/2023-11-07-announcing-oxc.md b/src/ja/blog/2023-11-07-announcing-oxc.md new file mode 100644 index 0000000000..9d0ed83cdb --- /dev/null +++ b/src/ja/blog/2023-11-07-announcing-oxc.md @@ -0,0 +1,8 @@ +--- +title: Announcing Oxc +outline: deep +authors: + - boshen +--- + + diff --git a/src/ja/blog/2023-11-08-announcing-oxlint.md b/src/ja/blog/2023-11-08-announcing-oxlint.md new file mode 100644 index 0000000000..01da0cb315 --- /dev/null +++ b/src/ja/blog/2023-11-08-announcing-oxlint.md @@ -0,0 +1,8 @@ +--- +title: Oxlint General Availability +outline: deep +authors: + - boshen +--- + + diff --git a/src/ja/blog/index.md b/src/ja/blog/index.md new file mode 100644 index 0000000000..aa7934145c --- /dev/null +++ b/src/ja/blog/index.md @@ -0,0 +1,68 @@ +--- +outline: deep +--- + +[Rome](https://github.com/rome/tools) uses a different set of techniques for parsing JavaScript and TypeScript. +This tutorial summarizes them in learning order for better understanding. + +## History + +- The Rome codebase was rewritten from TypeScript to Rust, see [Rome will be rewritten in Rust](https://rome.tools/blog/2021/09/21/rome-will-be-rewritten-in-rust) +- The decision was made after talking to the author of [rslint](https://github.com/rslint/rslint) and [rust-analyzer](https://github.com/rust-lang/rust-analyzer) +- rust-analyzer proved that IDE-centric tools built around concrete syntax tree are possible +- rslint proved that it is possible to write a JavaScript parser in Rust, with the same base libraries as rust-analyzer +- Rome ported the rslint codebase to their own repo with permission from rslint's author + +## Concrete Syntax Tree + +- The base library is called [rowan](https://github.com/rust-analyzer/rowan), see [overview of rowan](https://github.com/rust-lang/rust-analyzer/blob/master/docs/dev/syntax.md) +- Rowan, also known as red-green trees, is named after the real green [rowan tree](https://en.wikipedia.org/wiki/Rowan) that makes red berries +- The origin of red-green trees is described in this [blog post](https://ericlippert.com/2012/06/08/red-green-trees/), by the authors of the C# programming language +- The whole point of rowan is to define a lossless concrete syntax tree (CST) that describes all the details of the source code and provides a set of traversal APIs (parent, children, siblings, etc) +- Read the advantage of having a CST over an AST: [Pure AST based linting sucks](https://rdambrosio016.github.io/rust/2020/09/18/pure-ast-based-linting-sucks.html) +- CST provides the ability to build a fully recoverable parser + +## Grammar + +- Just like an AST, we need to define the grammar. The grammar is auto-generated by using [xtask/codegen](https://github.com/rome/tools/tree/main/xtask/codegen) +- The grammar is generated from the [ungrammar](https://github.com/rust-analyzer/ungrammar) DSL +- The input `ungrammar` source file is in [xtask/codegen/js.ungram](https://github.com/rome/tools/blob/main/xtask/codegen/js.ungram) +- The output of the codegen is in [rome_js_syntax/src/generated](https://github.com/rome/tools/tree/main/crates/rome_js_syntax/src/generated) + +## Entry Point + +The Rome codebase is getting large and slightly difficult to find the parser entry point. + +For first-time contributors, the `rome_cli` crate is the binary entry point for running the code: + +```bash +cargo run -p rome_cli + +touch test.js +cargo run -p rome_cli -- check ./test.js +``` + +`rome_cli` will eventually call `rome_js_parser::parse` + +```rust reference +https://github.com/rome/tools/blob/9815467c66688773bc1bb6ef9a5b2d86ca7b3682/crates/rome_js_parser/src/parse.rs#L178-L187 +``` + +and finally the actual parsing code + +```rust reference +https://github.com/rome/tools/blob/9815467c66688773bc1bb6ef9a5b2d86ca7b3682/crates/rome_js_parser/src/syntax/program.rs#L14-L17 +``` + +## Contributing + +- [CONTRIBUTING.md](https://github.com/rome/tools/blob/main/CONTRIBUTING.md) has instructions on how to contribute +- [rome_js_parser crate doc](https://rome.github.io/tools/rome_js_parser/index.html) has some more details on the parser +- See [`cargo codegen test`](https://github.com/rome/tools/tree/main/xtask/codegen#cargo-codegen-test) for working with parser tests +- See [`cargo coverage`](https://github.com/rome/tools/tree/main/xtask/coverage) for working with conformance tests +- Join the [Discord Server](https://discord.com/invite/rome) for inquiries + +:::info +The JavaScript / TypeScript parser is 99% complete, the best way to help is to test Rome in your own codebases +or take a look at the [issues on Github](https://github.com/rome/tools/issues). +::: diff --git a/src/ja/docs/contribute/codegen.md b/src/ja/docs/contribute/codegen.md new file mode 100644 index 0000000000..a176a42488 --- /dev/null +++ b/src/ja/docs/contribute/codegen.md @@ -0,0 +1,6 @@ +--- +title: Codegen +outline: deep +--- + +# Codegen diff --git a/src/ja/docs/contribute/development.md b/src/ja/docs/contribute/development.md new file mode 100644 index 0000000000..975e51f455 --- /dev/null +++ b/src/ja/docs/contribute/development.md @@ -0,0 +1,67 @@ +--- +title: Development +outline: deep +--- + +# Development + +This page explains how to set up enrionment for OXC development. + +## Clone Repository + +```bash +git clone --recurse-submodules --shallow-submodules git@github.com:oxc-project/oxc.git +``` + +The `--recurse-submodules` and `--shallow-submodules` flags are needed to initialize git submodules that are needed to run conformance tests. + +## Set Up Project + +### Install Rust + +If you have not yet installed Rust, follow [the official instruction](https://www.rust-lang.org/tools/install) and install Rust. + +After installing Rust, run the following command at the project root: + +```bash +rustup show +``` + +`rustup show` reads the `./rust-toolchain.toml` file and installs the correct Rust toolchain and components for this project. + +### Install Project Tools + +#### `cargo-binstall` + +Some Cargo tools are required to develop OXC, and it is recommended to use [cargo binstall](https://github.com/cargo-bins/cargo-binstall), which provides a low-complexity mechanism to install rust binaries and is fater way than building them from source by running `cargo install`. + +```bash +cargo install cargo-binstall +``` + +You can also download [the pre-compiled binary](https://github.com/cargo-bins/cargo-binstall#installation) and save it in `~/.cargo/bin`. + +#### `just` + +OXC utilizes [`just`](https://github.com/casey/just), which is a handy way to save and run project-specific commands: + +```bash +cargo binstall just -y +``` + +#### Dependencies + +Run the following command in `justfile` at the project root to install dependencies: + +```bash +just init +``` + +You can see the list of available commands by running `just`. + +You can run `just ready` (or, `just r` in short) to make sure the whole project builds and runs correctly. + +--- + +Now you are ready to develop OXC! +You can check out [good first issues](https://github.com/oxc-project/oxc/contribute) or ask us on [Discord](https://discord.gg/9uXCAwqQZW). diff --git a/src/ja/docs/contribute/formatter.md b/src/ja/docs/contribute/formatter.md new file mode 100644 index 0000000000..5093991676 --- /dev/null +++ b/src/ja/docs/contribute/formatter.md @@ -0,0 +1,9 @@ +--- +title: Formatter +outline: deep +--- + +# Formatter + +While [prettier] has established itself as the de facto code formatter for JavaScript, there is a significant demand in the developer community for a less opinionated alternative. Recognizing this need, our ambition is to undertake research and development to create a new JavaScript formatter that offers increased flexibility and customization options. +Unfortunately we are currently lacking the resources to do so. diff --git a/src/ja/docs/contribute/introduction.md b/src/ja/docs/contribute/introduction.md new file mode 100644 index 0000000000..2b22fae6d1 --- /dev/null +++ b/src/ja/docs/contribute/introduction.md @@ -0,0 +1,41 @@ +--- +title: Introduction +outline: deep +--- + +# Introduction + +Thank you for getting interested in contributing to OXC project! +Before starting, please make sure to read the following rules and policy. + +## General Rules + +- We welcome and appreciate any form of contributions. +- Please create an issue or discussion if you want to make an architectural change. + +## PR Rules + +- We [prefer smaller PRs](https://graphite.dev/blog/how-large-prs-slow-down-development) for faster development. +- Try stacked PRs with [graphite](https://graphite.dev) if you are given access permission to the repository. + +## Action Policy + +Taken from [Astral's values](https://astral-sh.notion.site/Astral-s-Values-0ed6a642bcc84e91af6836b2373572f5): + +> We bias towards action, even in the face of uncertainty. We favor _pragmatic doing_ over **prolonged debating**; we favor asking for _forgiveness_ over _permission_. We value **decisiveness — especially** when a decision isn’t clear cut, and **especially** when a decision is reversible. +> +> A bias towards action is _not_ the same as recklessness. Rather, it’s a bias towards making _responsible_ decisions and acting on them with _urgency_, even if we’re left with lingering ambiguity or known unknowns. + +## Development Policy + +- All performance issues (runtime and compilation speed) are considered as bugs in this project. +- Embrace data-oriented design. +- APIs should be simple and well-documented. +- Third-party dependencies should be minimal. +- Avoid the `regex` crate when possible. Regexes are slow, most of them can be rewritten in a performant way by using Rust iterator and string methods. +- Avoid macros, traits or any Rust techniques that would penalize compilation speed. + +## Maintenance Policy + +- Monitor code coverage for unused code. Aim for 99% code coverage. +- CI time should be actively monitored and reduced to speed up merging of PRs. The current CI time on GitHub actions is around 3 minutes. diff --git a/src/ja/docs/contribute/linter.md b/src/ja/docs/contribute/linter.md new file mode 100644 index 0000000000..5bd2390d0e --- /dev/null +++ b/src/ja/docs/contribute/linter.md @@ -0,0 +1,37 @@ +--- +title: Linter +outline: deep +--- + +# Linter + +## Development + +Create a `./test.ts` and then + +```bash +just watch "run -p oxc_cli --bin oxlint -- test.ts" +``` + +## Rule generation + +Create a new lint rule by providing the ESLint name + +```bash +just new-rule name +``` + +Then add the rule to `crates/oxc_linter/src/rules.rs`: + +1. Add to `mod` +2. Add to `oxc_macros::declare_all_lint_rules` at the bottom of the file + +For other plugins, there are also: + +```bash +just new-jest-rule name +just new-ts-rule name +just new-unicorn-rule name +just new-react-rule name +just new-jsx-a11y-rule name +``` diff --git a/src/ja/docs/contribute/minifier.md b/src/ja/docs/contribute/minifier.md new file mode 100644 index 0000000000..5b633397cc --- /dev/null +++ b/src/ja/docs/contribute/minifier.md @@ -0,0 +1,20 @@ +--- +title: Minifier +outline: deep +--- + +# Minifier + +JavaScript minification plays a crucial role in optimizing website performance as it reduces the amount of data sent to users, +resulting in faster page loads. +This holds tremendous economic value, particularly for e-commerce websites, where every second can equate to millions of dollars. + +However, existing minifiers typically require a trade-off between compression quality and speed. +You have to choose between the slowest for the best compression or the fastest for less compression. +But what if we could develop a faster minifier without compromising on compression? + +We are actively working on a prototype that aims to achieve this goal, +by porting all test cases from well-known minifiers such as [google-closure-compiler], [terser], [esbuild], and [tdewolff-minify]. + +Preliminary results indicate that we are on track to achieve our objectives. +With the Oxc minifier, you can expect faster minification times without sacrificing compression quality. diff --git a/src/ja/docs/contribute/parser.md b/src/ja/docs/contribute/parser.md new file mode 100644 index 0000000000..6ea93e3ae4 --- /dev/null +++ b/src/ja/docs/contribute/parser.md @@ -0,0 +1,35 @@ +--- +title: Parser +outline: deep +--- + +# Parser + +We aim to be the fastest Rust-based ready-for-production parser. + +## Conformance Tests + +```bash +just c +``` + +Aliased to `just coverage`, runs the following conformance test suites by using the conformance runner found in [tasks/coverage](https://github.com/oxc-project/oxc/tree/main/tasks/coverage). + +### Test262 + +JavaScript has the [ECMAScript Test Suite](https://github.com/tc39/test262) called Test262. +The goal of Test262 is to provide test material that covers every observable behavior specified in the specification. +Parser conformance uses the [parse phase tests](https://github.com/tc39/test262/blob/main/INTERPRETING.md#negative). + +### Babel + +When new language features are added to JavaScript, it is required to have them implemented by Babel, +this means Babel has another set of [parser tests](https://github.com/babel/babel/tree/main/packages/babel-parser/test). + +### TypeScript + +The TypeScript conformance tests can be found [here](https://github.com/microsoft/TypeScript/tree/main/tests/cases/conformance). + +## Test Runner + +Rome has implemented a test runner for the above test suites, they can be found . diff --git a/src/ja/docs/contribute/performance.md b/src/ja/docs/contribute/performance.md new file mode 100644 index 0000000000..eb25139886 --- /dev/null +++ b/src/ja/docs/contribute/performance.md @@ -0,0 +1,71 @@ +--- +title: Performance +outline: deep +--- + +# Performance Tuning + +## Compile Time + +While Rust has gained a reputation for its comparatively slower compilation speed, +we have dedicated significant effort to fine-tune the Rust compilation speed. +Our aim is to minimize any impact on your development workflow, +ensuring that developing your own Oxc based tools remains a smooth and efficient experience. + +This is demonstrated by our [CI runs](https://github.com/oxc-project/oxc/actions/workflows/ci.yml?query=branch%3Amain), +where warm runs complete in 5 minutes. + +## Profile + +### Mac Xcode Instruments + +Mac Xcode instruments can be used to produce a CPU profile. + +To install Xcode Instruments, install the Command Line Tools: + +```bash +xcode-select --install +``` + +For normal Rust builds, [`cargo instruments`](https://github.com/cmyr/cargo-instruments) can be used as the glue +for profiling and creating the trace file. + +First, change the profile for showing debug symbols. + +```toml +[profile.release] +debug = 1 # debug info with line tables only +strip = false # do not strip symbols +``` + +Then build the project + +```bash +cargo build --release -p oxc_cli --bin oxlint +``` + +The binary is located at `./target/release/oxlint` once the project is built. + +Under the hood, `cargo instruments` invokes the `xcrun` command, equivalent to + +```bash +xcrun xctrace record --template 'Time Profile' --output . --launch -- /path/to/oxc/target/release/oxlint --quiet +``` + +Running the command above produces the following output + +``` +Starting recording with the Time Profiler template. Launching process: oxlint. +Ctrl-C to stop the recording +Target app exited, ending recording... +Recording completed. Saving output file... +Output file saved as: Launch_oxlint_2023-09-03_4.41.45 PM_EB179B85.trace +``` + +Open the trace file `open Launch_oxlint_2023-09-03_4.41.45\ PM_EB179B85.trace`. + +To see a top down trace: + +1. On the top panel, click CPUs +2. On the left input box, click `x` then select `Time Profiler` +3. At the bottom panel, click "Call Tree", turn on "Invert Call Tree" and turn off separate by thread. diff --git a/src/ja/docs/contribute/prettier.md b/src/ja/docs/contribute/prettier.md new file mode 100644 index 0000000000..14a62b7548 --- /dev/null +++ b/src/ja/docs/contribute/prettier.md @@ -0,0 +1,6 @@ +--- +title: Prettier +outline: deep +--- + +# Prettier diff --git a/src/ja/docs/contribute/resolver.md b/src/ja/docs/contribute/resolver.md new file mode 100644 index 0000000000..729560262c --- /dev/null +++ b/src/ja/docs/contribute/resolver.md @@ -0,0 +1,11 @@ +--- +title: Resolver +outline: deep +--- + +# Resolver + +Module resolution plays a crucial role in JavaScript tooling, especially for tasks like multi-file analysis or bundling. However, it can often become a performance bottleneck. +To address this, we are actively working on porting [enhanced-resolve]. + +[eslint-plugin-import] will be our first application for the resolver, since it is currently a performance and complexity blocker for a lot of projects. diff --git a/src/ja/docs/contribute/showcase.md b/src/ja/docs/contribute/showcase.md new file mode 100644 index 0000000000..679f3595f8 --- /dev/null +++ b/src/ja/docs/contribute/showcase.md @@ -0,0 +1,11 @@ +--- +title: Showcase +outline: deep +--- + +# Showcase + +Example PRs adding `oxlint`: + +- [Affine](https://github.com/toeverything/AFFiNE/pull/4867) +- [Rspack](https://github.com/oxc-project/rspack/pull/3999) diff --git a/src/ja/docs/contribute/transformer.md b/src/ja/docs/contribute/transformer.md new file mode 100644 index 0000000000..79ba0f7950 --- /dev/null +++ b/src/ja/docs/contribute/transformer.md @@ -0,0 +1,9 @@ +--- +title: Transformer +outline: deep +--- + +# Transformer + +A transformer is responsible for turning higher versions of ECMAScript to a lower version that can be used in older browsers. +We are currently focusing on an esnext to es2015 transpiler. See the [umbrella issue](https://github.com/oxc-project/oxc/issues/974) for details. diff --git a/src/ja/docs/contribute/vscode.md b/src/ja/docs/contribute/vscode.md new file mode 100644 index 0000000000..a76998d2be --- /dev/null +++ b/src/ja/docs/contribute/vscode.md @@ -0,0 +1,18 @@ +--- +title: VSCode Extension +outline: deep +--- + +# VSCode Extension + +## Development + +Build the extension and run it inside vscode: + +1. `pnpm install` +2. `pnpm run build` +3. `pnpm run package` +4. open vscode and run the command palette "Extensions: Install from VSIX..." +5. find the `oxc-vscode-x.x.x.vsix` file from `./editor/vscode` directory +6. open a `.js` / `.ts` file, add `debugger;` and save +7. see the warning `eslint(no-debugger): debugger statement is not allowed - oxc` diff --git a/src/ja/docs/guide/benchmarks.md b/src/ja/docs/guide/benchmarks.md new file mode 100644 index 0000000000..64cf42dc0b --- /dev/null +++ b/src/ja/docs/guide/benchmarks.md @@ -0,0 +1,8 @@ +--- +title: Benchmarks +outline: deep +--- + +# Benchmarks + +TBD diff --git a/src/ja/docs/guide/introduction.md b/src/ja/docs/guide/introduction.md new file mode 100644 index 0000000000..c20a80175f --- /dev/null +++ b/src/ja/docs/guide/introduction.md @@ -0,0 +1,31 @@ +--- +title: Introduction +outline: deep +--- + + + +# What is OXC? + +The Oxidation Compiler is a collection of high-performance tools for the JavaScript and TypeScript language. + +We are building a parser, linter, formatter, transpiler, minifier, resolver ... all written in Rust. + +Our goal is to create and empower the fastest and most user friendly tools for [The Third Age of JavaScript](https://www.swyx.io/js-third-age). + +## Philosophy + +This project shares the same philosophies as [Biome][biome] and [Ruff][ruff]. + +1. JavaScript tooling could be rewritten in a more performant language. +2. An integrated toolchain can tap into efficiencies that are not available to a disparate set of tools. + +## Core Team + + + +[biome]: https://biomejs.dev +[ruff]: https://beta.ruff.rs diff --git a/src/ja/docs/guide/philosophy.md b/src/ja/docs/guide/philosophy.md new file mode 100644 index 0000000000..09d8841f18 --- /dev/null +++ b/src/ja/docs/guide/philosophy.md @@ -0,0 +1,8 @@ +--- +title: Philosophy +outline: deep +--- + +# Philosophy + +TBD diff --git a/src/ja/docs/guide/usage/benchmarks.md b/src/ja/docs/guide/usage/benchmarks.md new file mode 100644 index 0000000000..a01c30d629 --- /dev/null +++ b/src/ja/docs/guide/usage/benchmarks.md @@ -0,0 +1,6 @@ +--- +title: Benchmarks +outline: deep +--- + +# Benchmarks diff --git a/src/ja/docs/guide/usage/linter.md b/src/ja/docs/guide/usage/linter.md new file mode 100644 index 0000000000..183147cbdc --- /dev/null +++ b/src/ja/docs/guide/usage/linter.md @@ -0,0 +1,167 @@ +--- +title: Linter +outline: deep +badges: + - src: https://img.shields.io/npm/dw/oxlint + alt: npm + - src: https://img.shields.io/github/stars/oxc-project/oxc + alt: GitHub Repo stars +--- + + + +# oxlint + +## Features + +- Runs 50-100x faster than ESLint, and scales with the number of CPU cores ([Benchmark](https://github.com/oxc-project/bench-javascript-linter)) +- No configuration required +- Convention over configuration +- Reports errors and useless code by default +- Supports [.eslintignore](https://eslint.org/docs/latest/use/configure/ignore#the-eslintignore-file) +- Supports [ESLint comment disabling](https://eslint.org/docs/latest/use/configure/rules#disabling-rules) + +## Adoptions + +- A 5M LOC typescript codebase previously running ESLint parallelized across 48 workers in CI taking 75 mins (12m wall time), it is now 8 seconds on a single worker. ([Source](https://twitter.com/boshen_c/status/1714827365136929029)) + +## Installation + +Install [oxlint](https://www.npmjs.com/package/oxlint): + +::: code-group + +```sh [npm] +$ npm add -D oxlint +``` + +```sh [pnpm] +$ pnpm add -D oxlint +``` + +```sh [yarn] +$ yarn add -D oxlint +``` + +```sh [bun] +$ bun add -D oxlint +``` + +::: + +You can also run oxlint directly: + +::: code-group + +```sh [npm] +$ npx oxlint@latest +``` + +```sh [pnpm] +$ pnpm dlx oxlint@latest +``` + +```sh [yarn] +$ yarn dlx oxlint@latest +``` + +```sh [bun] +$ bunx oxlint@latest +``` + +```sh [deno] +$ deno run oxlint@latest +``` + +::: + +You can download the binary files from [the latest GitHub releases](https://github.com/oxc-project/oxc/releases/latest). + +## Integration + +### IDEs + +#### VSCode Extension + +You can download [the official VSCode extension](https://marketplace.visualstudio.com/items?itemName=oxc.oxc-vscode). + +#### Vim / Nvim (coc) + +Add the following config to `coc-settings.json`: + +```javascript +{ + "languageserver": { + "oxc": { + "command": "oxc_vscode", + "filetypes": [ + "typescript", + "javascript" + ], + "rootPatterns": [ + ".git" + ] + } + } +} +``` + +### Continuous Integration + +It is recommended to run `oxlint` before `eslint` for faster feedback loops, +since `oxlint` only takes a few seconds to run. + +#### GitHub Actions + +```yaml +jobs: + oxlint: + name: Lint JS + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - run: npx --yes oxlint@latest +``` + +### Nix + +Check [nixpkgs repository](https://github.com/NixOS/nixpkgs/blob/master/pkgs/development/tools/oxlint/default.nix) for more detail. + +## Commands + +- `npx oxlint@latest --rules` shows the list of rules +- `npx oxlint@latest --help` shows usage instructions + + ``` + Usage: oxlint [-A=NAME | -D=NAME]... [--fix] [PATH]... + + Allowing / Denying Multiple Lints + For example `-D correctness -A no-debugger` or `-A all -D no-debugger`. + The default category is "-D correctness". + Use "--rules" for rule names. + Use "--help --help" for rule categories. + -A, --allow=NAME Allow the rule or category (suppress the lint) + -D, --deny=NAME Deny the rule or category (emit an error) + + Enable Plugins + --import-plugin Enable the experimental import plugin and detect ESM problems + --jest-plugin Enable the Jest plugin and detect test problems + --jsx-a11y-plugin Enable the JSX-a11y plugin and detect accessibility problems + + Available positional items: + PATH Single file, single path or list of paths + + Available options: + -h, --help Prints help information + ``` + +## System Requirements + +`oxlint` is built for the following systems: + +- darwin-arm64 +- darwin-x64 +- linux-arm64 +- linux-x64 +- win32-arm64 +- win32-x64 diff --git a/src/ja/docs/guide/usage/parser.md b/src/ja/docs/guide/usage/parser.md new file mode 100644 index 0000000000..eee8bea027 --- /dev/null +++ b/src/ja/docs/guide/usage/parser.md @@ -0,0 +1,72 @@ +--- +title: Parser +outline: deep +badges: + - src: https://img.shields.io/npm/dw/oxc-parser + alt: npm +--- + + + +# Parser + +## Features + +- 2x faster then [SWC][url-swc] parser +- By far the fastest and most conformant JavaScript and TypeScript (including JSX and TSX) parser written in Rust + +You can check [benchmark][url-benchmark] for more detail. + +## Installation + +### Rust + +Install crates: + +```sh +$ cargo install oxc +``` + +```sh +$ cargo install oxc_ast +``` + +```sh +$ cargo install oxc_parser +``` + +- The umbrella crate [oxc][url-oxc-crate] exports all public crates from this repository +- The AST and parser crates [oxc_ast][url-oxc-ast-crate] and [oxc_parser][url-oxc-parser-crate] are production ready + +### Node.js + +Install [oxc-parser][url-oxc-parser-npm]: + +::: code-group + +```sh [npm] +$ npm add -D oxc-parser +``` + +```sh [pnpm] +$ pnpm add -D oxc-parser +``` + +```sh [yarn] +$ yarn add -D oxc-parser +``` + +```sh [bun] +$ bun add -D oxc-parser +``` + +::: + + + +[url-swc]: https://swc.rs +[url-benchmark]: https://github.com/oxc-project/bench-javascript-parser-written-in-rust +[url-oxc-crate]: https://docs.rs/oxc +[url-oxc-ast-crate]: https://docs.rs/oxc_ast +[url-oxc-parser-crate]: https://docs.rs/oxc_parser +[url-oxc-parser-npm]: https://www.npmjs.com/package/oxc-parser diff --git a/src/ja/docs/guide/usage/philosophy.md b/src/ja/docs/guide/usage/philosophy.md new file mode 100644 index 0000000000..9e7a08fe1c --- /dev/null +++ b/src/ja/docs/guide/usage/philosophy.md @@ -0,0 +1,6 @@ +--- +title: Philosophy +outline: deep +--- + +# Philosophy diff --git a/src/ja/docs/guide/usage/resolver.md b/src/ja/docs/guide/usage/resolver.md new file mode 100644 index 0000000000..b1382b62bd --- /dev/null +++ b/src/ja/docs/guide/usage/resolver.md @@ -0,0 +1,80 @@ +--- +title: Resolver +outline: deep +badges: + - src: https://img.shields.io/npm/dw/oxc-resolver + alt: npm +--- + + + +# Resolver + +Node.js Module Resolution. + +- Feature complete +- All configuration options are aligned with enhanced-resolve + +## Installation + +### Rust + +Install [oxc_resolver][url-oxc-resolver-crate] crate: + +```sh +$ cargo install oxc_resolver +``` + +You should also check [documentation][url-oxc-resolver-docs]. + +### Node.js + +Install [oxc-resolver][url-oxc-resolver-npm]: + +::: code-group + +```sh [npm] +$ npm add -D oxc-parser +``` + +```sh [pnpm] +$ pnpm add -D oxc-parser +``` + +```sh [yarn] +$ yarn add -D oxc-parser +``` + +```sh [bun] +$ bun add -D oxc-parser +``` + +::: + +## Example + +```rust +use std::{env, path::PathBuf}; + +use oxc_resolver::{ResolveOptions, Resolver}; + +fn main() { + let path = env::args().nth(1).expect("require path"); + let request = env::args().nth(2).expect("require request"); + let path = PathBuf::from(path).canonicalize().unwrap(); + + println!("path: {path:?}"); + println!("request: {request}"); + + match Resolver::new(ResolveOptions::default()).resolve(path, &request) { + Err(error) => println!("Error: {error}"), + Ok(resolution) => println!("Resolved: {}", resolution.full_path().to_string_lossy()), + } +} +``` + + + +[url-oxc-resolver-crate]: https://crates.io/oxc_resolver +[url-oxc-resolver-docs]: https://docs.rs/oxc_resolver +[url-oxc-resolver-npm]: https://www.npmjs.com/package/oxc-resolver diff --git a/src/ja/docs/learn/architecture/formatter.md b/src/ja/docs/learn/architecture/formatter.md new file mode 100644 index 0000000000..8db419eec6 --- /dev/null +++ b/src/ja/docs/learn/architecture/formatter.md @@ -0,0 +1,6 @@ +--- +title: Formatter +outline: deep +--- + +# Formatter diff --git a/src/ja/docs/learn/architecture/introduction.md b/src/ja/docs/learn/architecture/introduction.md new file mode 100644 index 0000000000..e3cf93830d --- /dev/null +++ b/src/ja/docs/learn/architecture/introduction.md @@ -0,0 +1,6 @@ +--- +title: Introduction +outline: deep +--- + +# Introduction diff --git a/src/ja/docs/learn/architecture/linter.md b/src/ja/docs/learn/architecture/linter.md new file mode 100644 index 0000000000..c68456bf33 --- /dev/null +++ b/src/ja/docs/learn/architecture/linter.md @@ -0,0 +1,8 @@ +--- +title: Linter +outline: deep +--- + +# Example + +See `crates/oxc_linter/examples/linter.rs` for the bare minimum linter implementation. diff --git a/src/ja/docs/learn/architecture/minifier.md b/src/ja/docs/learn/architecture/minifier.md new file mode 100644 index 0000000000..1ea8ee19cd --- /dev/null +++ b/src/ja/docs/learn/architecture/minifier.md @@ -0,0 +1,6 @@ +--- +title: Minifier +outline: deep +--- + +# Minifier diff --git a/src/ja/docs/learn/architecture/parser.md b/src/ja/docs/learn/architecture/parser.md new file mode 100644 index 0000000000..bf51cd0ba2 --- /dev/null +++ b/src/ja/docs/learn/architecture/parser.md @@ -0,0 +1,29 @@ +--- +title: Parser +outline: deep +--- + +# Parser + +Oxc maintains its own AST and parser, which is by far the fastest and most conformant JavaScript and TypeScript (including JSX and TSX) parser written in Rust. + +As the parser often represents a key performance bottleneck in JavaScript tooling, +any minor improvements can have a cascading effect on our downstream tools. +By developing our parser, we have the opportunity to explore and implement well-researched performance techniques. + +While many existing JavaScript tools rely on [estree] as their AST specification, +a notable drawback is its abundance of ambiguous nodes. +This ambiguity often leads to confusion during development with [estree]. + +The Oxc AST differs slightly from the [estree] AST by removing ambiguous nodes and introducing distinct types. +For example, instead of using a generic [estree] `Identifier`, +the Oxc AST provides specific types such as `BindingIdentifier`, `IdentifierReference`, and `IdentifierName`. + +This clear distinction greatly enhances the development experience by aligning more closely with the ECMAScript specification. + +## How is it so fast + +- AST is allocated in a [memory arena](https://crates.io/crates/bumpalo) for fast AST memory allocation and deallocation +- Short strings are inlined by [CompactString](https://crates.io/crates/compact_str) +- No other heap allocations are done except the above two +- Scope binding, symbol resolution and some syntax errors are not done in the parser, they are delegated to the semantic analyzer diff --git a/src/ja/docs/learn/architecture/resolver.md b/src/ja/docs/learn/architecture/resolver.md new file mode 100644 index 0000000000..7f306a6972 --- /dev/null +++ b/src/ja/docs/learn/architecture/resolver.md @@ -0,0 +1,6 @@ +--- +title: Resolver +outline: deep +--- + +# Resolver diff --git a/src/ja/docs/learn/architecture/transformer.md b/src/ja/docs/learn/architecture/transformer.md new file mode 100644 index 0000000000..a5e019e3bd --- /dev/null +++ b/src/ja/docs/learn/architecture/transformer.md @@ -0,0 +1,6 @@ +--- +title: Transformer +outline: deep +--- + +# Transformer diff --git a/src/ja/docs/learn/ecmascript/grammar.md b/src/ja/docs/learn/ecmascript/grammar.md new file mode 100644 index 0000000000..79ddca7ac5 --- /dev/null +++ b/src/ja/docs/learn/ecmascript/grammar.md @@ -0,0 +1,691 @@ +--- +title: Grammar +outline: deep +--- + +JavaScript has one of the most challenging grammar to parse, +this tutorial details all the sweat and tears I had while learning it. + +## LL(1) Grammar + +According to [Wikipedia](https://en.wikipedia.org/wiki/LL_grammar), + +> an LL grammar is a context-free grammar that can be parsed by an LL parser, which parses the input from Left to right + +The first **L** means the scanning the source from **L**eft to right, +and the second **L** means the construction of a **L**eftmost derivation tree. + +Context-free and the (1) in LL(1) means a tree can be constructed by just peeking at the next token and nothing else. + +LL Grammars are of particular interest in academia because we are lazy human beings and we want to write programs that generate parsers automatically so we don't need to write parsers by hand. + +Unfortunately, most industrial programming languages do not have a nice LL(1) grammar, +and this applies to JavaScript too. + +:::info +Mozilla started the [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus) project a few years ago +and wrote a [LALR parser generator in Python](https://github.com/mozilla-spidermonkey/jsparagus/tree/master/jsparagus). +They haven't updated it much in the past two years and they sent a strong message at the end of [js-quirks.md](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/js-quirks.md) + +> What have we learned today? +> +> - Do not write a JS parser. +> - JavaScript has some syntactic horrors in it. But hey, you don't make the world's most widely used programming language by avoiding all mistakes. You do it by shipping a serviceable tool, in the right circumstances, for the right users. + +::: + +--- + +The only practical way to parse JavaScript is to write a recursive descent parser by hand because of the nature of its grammar, +so let's learn all the quirks in the grammar before we shoot ourselves in the foot. + +The list below starts simple and will become difficult to grasp, +so please take grab a coffee and take your time. + +## Identifiers + +There are three types of identifiers defined in `#sec-identifiers`, + +``` +IdentifierReference[Yield, Await] : +BindingIdentifier[Yield, Await] : +LabelIdentifier[Yield, Await] : +``` + +`estree` and some ASTs do not distinguish the above identifiers, +and the specification does not explain them in plain text. + +`BindingIdentifier`s are declarations and `IdentifierReference`s are references to binding identifiers. +For example in `var foo = bar`, `foo` is a `BindingIdentifier` and `bar` is a `IdentifierReference` in the grammar: + +``` +VariableDeclaration[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] opt + +Initializer[In, Yield, Await] : + = AssignmentExpression[?In, ?Yield, ?Await] +``` + +follow `AssignmentExpression` into `PrimaryExpression` we get + +``` +PrimaryExpression[Yield, Await] : + IdentifierReference[?Yield, ?Await] +``` + +Declaring these identifiers differently in the AST will greatly simply downstream tools, especially for semantic analysis. + +```rust +pub struct BindingIdentifier { + pub node: Node, + pub name: Atom, +} + +pub struct IdentifierReference { + pub node: Node, + pub name: Atom, +} +``` + +--- + +## Class and Strict Mode + +ECMAScript Class is born after strict mode, so they decided that everything inside a class must be strict mode for simplicity. +It is stated as such in `#sec-class-definitions` with just a `Node: A class definition is always strict mode code.` + +It is easy to declare strict mode by associating it with function scopes, but a `class` declaration does not have a scope, +we need to keep an extra state just for parsing classes. + +```rust reference +https://github.com/swc-project/swc/blob/f9c4eff94a133fa497778328fa0734aa22d5697c/crates/swc_ecma_parser/src/parser/class_and_fn.rs#L85 +``` + +--- + +## Legacy Octal and Use Strict + +`#sec-string-literals-early-errors` disallows escaped legacy octal inside strings `"\01"`: + +``` +EscapeSequence :: + LegacyOctalEscapeSequence + NonOctalDecimalEscapeSequence + +It is a Syntax Error if the source text matched by this production is strict mode code. +``` + +The best place to detect this is inside the lexer, it can ask the parser for strict mode state and throw errors accordingly. + +But, this becomes impossible when mixed with directives: + +```javascript reference +https://github.com/tc39/test262/blob/747bed2e8aaafe8fdf2c65e8a10dd7ae64f66c47/test/language/literals/string/legacy-octal-escape-sequence-prologue-strict.js#L16-L19 +``` + +`use strict` is declared after the escaped legacy octal, yet the syntax error needs to be thrown. +Fortunately, no real code uses directives with legacy octals ... unless you want to pass the test262 case from above. + +--- + +## Non-simple Parameter and Strict Mode + +Identical function parameters is allowed in non-strict mode `function foo(a, a) { }`, +and we can forbid this by adding `use strict`: `function foo(a, a) { "use strict" }`. +Later on in es6, other grammars were added to function parameters, for example `function foo({ a }, b = c) {}`. + +Now, what happens if we write the following where "01" is a strict mode error? + +```javaScript +function foo(value=(function() { return "\01" }())) { + "use strict"; + return value; +} +``` + +More specifically, what should we do if there is a strict mode syntax error inside the parameters thinking from the parser perspective? +So in `#sec-function-definitions-static-semantics-early-errors`, it just bans this by stating + +``` +FunctionDeclaration : +FunctionExpression : + +It is a Syntax Error if FunctionBodyContainsUseStrict of FunctionBody is true and IsSimpleParameterList of FormalParameters is false. +``` + +Chrome throws this error with a mysterious message "Uncaught SyntaxError: Illegal 'use strict' directive in function with non-simple parameter list". + +A more in-depth explanation is described in [this blog post](https://humanwhocodes.com/blog/2016/10/the-ecmascript-2016-change-you-probably-dont-know/) by the author of ESLint. + +:::info + +Fun fact, the above rule does not apply if we are targeting `es5` in TypeScript, it transpiles to + +```javaScript +function foo(a, b) { + "use strict"; + if (b === void 0) { b = "\01"; } +} +``` + +::: + +--- + +## Parenthesized Expression + +Parenthesized expressions are supposed to not have any semantic meanings? +For instance the AST for `((x))` can just be a single `IdentifierReference`, not `ParenthesizedExpression` -> `ParenthesizedExpression` -> `IdentifierReference`. +And this is the case for JavaScript grammar. + +But ... who would have thought it can have run-time meanings. +Found in [this estree issue](https://github.com/estree/estree/issues/194), it shows that + +```javascript +> fn = function () {}; +> fn.name +< "fn" + +> (fn) = function () {}; +> fn.name +< '' +``` + +So eventually acorn and babel added the `preserveParens` option for compatibility. + +--- + +## Function Declaration in If Statement + +If we follow the grammar precisely in `#sec-ecmascript-language-statements-and-declarations`: + +``` +Statement[Yield, Await, Return] : + ... lots of statements + +Declaration[Yield, Await] : + ... declarations +``` + +The `Statement` node we define for our AST would obviously not contain `Declaration`, + +but in Annex B `#sec-functiondeclarations-in-ifstatement-statement-clauses`, +it allows declaration inside the statement position of `if` statements in non-strict mode: + +```javascript +if (x) function foo() {} +else function bar() {} +``` + +--- + +## Label statement is legit + +We probably have never written a single line of labelled statement, but it is legit in modern JavaScript and not banned by strict mode. + +The following syntax is correct, it returns a labelled statement (not object literal). + +```javascript + { + baz: "quaz"; + }} +/> +// ^^^^^^^^^^^ `LabelledStatement` +``` + +--- + +## `let` is not a keyword + +`let` is not a keyword so it is allowed to appear anywhere unless the grammar explicitly states `let` is not allowed in such positions. +Parsers need to peek at the token after the `let` token and decide what it needs to be parsed into, e.g.: + +```javascript +let a; +let = foo; +let instanceof x; +let + 1; +while (true) let; +a = let[0]; +``` + +--- + +## For-in / For-of and the [In] context + +If we look at the grammar for `for-in` and `for-of` in `#prod-ForInOfStatement`, +it is immediately confusing to understand how to parse these. + +There are two major obstacles for us to understand: the `[lookahead ≠ let]` part and the `[+In]` part. + +If we have parsed to `for (let`, we need to check the peeking token is: + +- not `in` to disallow `for (let in)` +- is `{`, `[` or an identifier to allow `for (let {} = foo)`, `for (let [] = foo)` and `for (let bar = foo)` + +Once reached the `of` or `in` keyword, the right-hand side expression needs to be passed with the correct [+In] context to disallow +the two `in` expressions in `#prod-RelationalExpression`: + +``` +RelationalExpression[In, Yield, Await] : + [+In] RelationalExpression[+In, ?Yield, ?Await] in ShiftExpression[?Yield, ?Await] + [+In] PrivateIdentifier in ShiftExpression[?Yield, ?Await] + +Note 2: The [In] grammar parameter is needed to avoid confusing the in operator in a relational expression with the in operator in a for statement. +``` + +And this is the only application for the `[In]` context in the entire specification. + +Also to note, the grammar `[lookahead ∉ { let, async of }]` forbids `for (async of ...)`, +and it needs to be explicitly guarded against. + +--- + +## Block-Level Function Declarations + +In Annex B.3.2 `#sec-block-level-function-declarations-web-legacy-compatibility-semantics`, +an entire page is dedicated to explain how `FunctionDeclaration` is supposed to behave in `Block` statements. +It boils down to + +```javascript reference +https://github.com/acornjs/acorn/blob/11735729c4ebe590e406f952059813f250a4cbd1/acorn/src/scope.js#L30-L35 +``` + +The name of a `FunctionDeclaration` needs to be treated the same as a `var` declaration if its inside a function declaration. +This code snippet errors with a re-declaration error since `bar` is inside a block scope: + +```javascript +function foo() { + if (true) { + var bar; + function bar() {} // redeclaration error + } +} +``` + +meanwhile, the following does not error because it is inside a function scope, function `bar` is treated as a var declaration: + +```javascript +function foo() { + var bar; + function bar() {} +} +``` + +--- + +## Grammar Context + +The syntactic grammar has 5 context parameters for allowing and disallowing certain constructs, +namely `[In]`, `[Return]`, `[Yield]`, `[Await]` and `[Default]`. + +It is best to keep a context during parsing, for example in Rome: + +```rust reference +https://github.com/rome/tools/blob/5a059c0413baf1d54436ac0c149a829f0dfd1f4d/crates/rome_js_parser/src/state.rs#L404-L425 +``` + +And toggle and check these flags accordingly by following the grammar. + +## AssignmentPattern vs BindingPattern + +In `estree`, the left-hand side of an `AssignmentExpression` is a `Pattern`: + +``` +extend interface AssignmentExpression { + left: Pattern; +} +``` + +and the left-hand side of a `VariableDeclarator` is a `Pattern`: + +``` +interface VariableDeclarator <: Node { + type: "VariableDeclarator"; + id: Pattern; + init: Expression | null; +} +``` + +A `Pattern` can be a `Identifier`, `ObjectPattern`, `ArrayPattern`: + +``` +interface Identifier <: Expression, Pattern { + type: "Identifier"; + name: string; +} + +interface ObjectPattern <: Pattern { + type: "ObjectPattern"; + properties: [ AssignmentProperty ]; +} + +interface ArrayPattern <: Pattern { + type: "ArrayPattern"; + elements: [ Pattern | null ]; +} +``` + +But from the specification perspective, we have the following JavaScript: + +```javascript +// AssignmentExpression: +{ foo } = bar; + ^^^ IdentifierReference +[ foo ] = bar; + ^^^ IdentifierReference + +// VariableDeclarator +var { foo } = bar; + ^^^ BindingIdentifier +var [ foo ] = bar; + ^^^ BindingIdentifier +``` + +This starts to become confusing because we now have a situation where we cannot directly distinguish whether the `Identifier` is a `BindingIdentifier` or a `IdentifierReference` +inside a `Pattern`: + +```rust +enum Pattern { + Identifier, // Is this a `BindingIdentifier` or a `IdentifierReference`? + ArrayPattern, + ObjectPattern, +} +``` + +This will lead to all sorts of unnecessary code further down the parser pipeline. +For example, when setting up the scope for semantic analysis, we need to inspect the parents of this `Identifier` +to determine whether we should bind it to the scope or not. + +A better solution is to fully understand the specification and decide what to do. + +The grammar for `AssignmentExpression` and `VariableDeclaration` are defined as: + +``` +13.15 Assignment Operators + +AssignmentExpression[In, Yield, Await] : + LeftHandSideExpression[?Yield, ?Await] = AssignmentExpression[?In, ?Yield, ?Await] + +13.15.5 Destructuring Assignment + +In certain circumstances when processing an instance of the production +AssignmentExpression : LeftHandSideExpression = AssignmentExpression +the interpretation of LeftHandSideExpression is refined using the following grammar: + +AssignmentPattern[Yield, Await] : + ObjectAssignmentPattern[?Yield, ?Await] + ArrayAssignmentPattern[?Yield, ?Await] +``` + +``` +14.3.2 Variable Statement + +VariableDeclaration[In, Yield, Await] : + BindingIdentifier[?Yield, ?Await] Initializer[?In, ?Yield, ?Await]opt + BindingPattern[?Yield, ?Await] Initializer[?In, ?Yield, ?Await] +``` + +The specification distinguishes this two grammar by defining them separately with an `AssignmentPattern` and a `BindingPattern`. + +So in situations like this, do not be afraid to deviate from `estree` and define extra AST nodes for our parser: + +```rust +enum BindingPattern { + BindingIdentifier, + ObjectBindingPattern, + ArrayBindingPattern, +} + +enum AssignmentPattern { + IdentifierReference, + ObjectAssignmentPattern, + ArrayAssignmentPattern, +} +``` + +I was in a super confusing state for a whole week until I finally reached enlightenment: +we need to define an `AssignmentPattern` node and a `BindingPattern` node instead of a single `Pattern` node. + +- `estree` must be correct because people have been using it for years so it cannot be wrong? +- how are we going to cleanly distinguish the `Identifier`s inside the patterns without defining two separate nodes? + I just cannot find where the grammar is? +- After a whole day of navigating the specification ... + the grammar for `AssignmentPattern` is in the 5th subsection of the main section "13.15 Assignment Operators" with the subtitle "Supplemental Syntax" 🤯 - + this is really out of place because all grammar is defined in the main section, not like this one defined after the "Runtime Semantics" section + +--- + +:::caution +The following cases are really difficult to grasp. Here be dragons. +::: + +## Ambiguous Grammar + +Let's first think like a parser and solve the problem - given the `/` token, is it a division operator or the start of a regex expression? + +```javascript +a / b; +a / / regex /; +a /= / regex /; +/ regex / / b; +/=/ / /=/; +``` + +It is almost impossible, isn't it? Let's break these down and follow the grammar. + +The first thing we need to understand is that the syntactic grammar drives the lexical grammar as stated in `#sec-ecmascript-language-lexical-grammar` + +> There are several situations where the identification of lexical input elements is sensitive to the syntactic grammar context that is consuming the input elements. + +This means that the parser is responsible for telling the lexer which token to return next. +The above example indicates that the lexer needs to return either a `/` token or a `RegExp` token. +For getting the correct `/` or `RegExp` token, the specification says: + +> The InputElementRegExp goal symbol is used in all syntactic grammar contexts where a RegularExpressionLiteral is permitted ... +> In all other contexts, InputElementDiv is used as the lexical goal symbol. + +And the syntax for `InputElementDiv` and `InputElementRegExp` are + +``` +InputElementDiv :: + WhiteSpace + LineTerminator + Comment + CommonToken + DivPunctuator <---------- the `/` and `/=` token + RightBracePunctuator + +InputElementRegExp :: + WhiteSpace + LineTerminator + Comment + CommonToken + RightBracePunctuator + RegularExpressionLiteral <-------- the `RegExp` token +``` + +This means whenever the grammar reaches `RegularExpressionLiteral`, `/` need to be tokenized as a `RegExp` token (and throw an error if it does not have a matching `/`). +All other cases we'll tokenize `/` as a slash token. + +Let's walk through an example: + +``` +a / / regex / +^ ------------ PrimaryExpression:: IdentifierReference + ^ ---------- MultiplicativeExpression: MultiplicativeExpression MultiplicativeOperator ExponentiationExpression + ^^^^^^^^ - PrimaryExpression: RegularExpressionLiteral +``` + +This statement does not match any other start of `Statement`, +so it'll go down the `ExpressionStatement` route: + +`ExpressionStatement` --> `Expression` --> `AssignmentExpression` --> ... --> +`MultiplicativeExpression` --> ... --> +`MemberExpression` --> `PrimaryExpression` --> `IdentifierReference`. + +We stopped at `IdentifierReference` and not `RegularExpressionLiteral`, +the statement "In all other contexts, InputElementDiv is used as the lexical goal symbol" applies. +The first slash is a `DivPunctuator` token. + +Since this is a `DivPunctuator` token, +the grammar `MultiplicativeExpression: MultiplicativeExpression MultiplicativeOperator ExponentiationExpression` is matched, +the right-hand side is expected to be an `ExponentiationExpression`. + +Now we are at the second slash in `a / /`. +By following `ExponentiationExpression`, +we reach `PrimaryExpression: RegularExpressionLiteral` because `RegularExpressionLiteral` is the only matching grammar with a `/`: + +``` +RegularExpressionLiteral :: + / RegularExpressionBody / RegularExpressionFlags +``` + +This second `/` will be tokenized as `RegExp` because +the specification states "The InputElementRegExp goal symbol is used in all syntactic grammar contexts where a RegularExpressionLiteral is permitted". + +:::info +As an exercise, try and follow the grammar for `/=/ / /=/`. +::: + +--- + +## Cover Grammar + +Read the [V8 blog post](https://v8.dev/blog/understanding-ecmascript-part-4) on this topic first. + +To summarize, the specification states the following three cover grammars: + +#### CoverParenthesizedExpressionAndArrowParameterList + +``` +PrimaryExpression[Yield, Await] : + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] + +When processing an instance of the production +PrimaryExpression[Yield, Await] : CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] + the interpretation of CoverParenthesizedExpressionAndArrowParameterList is refined using the following grammar: + +ParenthesizedExpression[Yield, Await] : + ( Expression[+In, ?Yield, ?Await] ) +``` + +``` +ArrowFunction[In, Yield, Await] : + ArrowParameters[?Yield, ?Await] [no LineTerminator here] => ConciseBody[?In] + +ArrowParameters[Yield, Await] : + BindingIdentifier[?Yield, ?Await] + CoverParenthesizedExpressionAndArrowParameterList[?Yield, ?Await] +``` + +These definitions defines: + +```javascript +let foo = (a, b, c); // SequenceExpression +let bar = (a, b, c) => {}; // ArrowExpression + ^^^^^^^^^ CoverParenthesizedExpressionAndArrowParameterList +``` + +A simple but cumbersome approach to solving this problem is to parse it as a `Vec` first, +then write a converter function to convert it to `ArrowParameters` node, i.e. each individual `Expression` need to be converted to a `BindingPattern`. + +It should be noted that, if we are building the scope tree within the parser, +i.e. create the scope for arrow expression during parsing, +but do not create one for a sequence expression, +it is not obvious how to do this. [esbuild](https://github.com/evanw/esbuild) solved this problem by creating a temporary scope first, +and then dropping it if it is not an `ArrowExpression`. + +This is stated in its [architecture document](https://github.com/evanw/esbuild/blob/master/docs/architecture.md#symbols-and-scopes): + +> This is mostly pretty straightforward except for a few places where the parser has pushed a scope and is in the middle of parsing a declaration only to discover that it's not a declaration after all. This happens in TypeScript when a function is forward-declared without a body, and in JavaScript when it's ambiguous whether a parenthesized expression is an arrow function or not until we reach the => token afterwards. This would be solved by doing three passes instead of two so we finish parsing before starting to set up scopes and declare symbols, but we're trying to do this in just two passes. So instead we call popAndDiscardScope() or popAndFlattenScope() instead of popScope() to modify the scope tree later if our assumptions turn out to be incorrect. + +--- + +#### CoverCallExpressionAndAsyncArrowHead + +``` +CallExpression : + CoverCallExpressionAndAsyncArrowHead + +When processing an instance of the production +CallExpression : CoverCallExpressionAndAsyncArrowHead +the interpretation of CoverCallExpressionAndAsyncArrowHead is refined using the following grammar: + +CallMemberExpression[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] +``` + +``` +AsyncArrowFunction[In, Yield, Await] : + CoverCallExpressionAndAsyncArrowHead[?Yield, ?Await] [no LineTerminator here] => AsyncConciseBody[?In] + +CoverCallExpressionAndAsyncArrowHead[Yield, Await] : + MemberExpression[?Yield, ?Await] Arguments[?Yield, ?Await] + +When processing an instance of the production +AsyncArrowFunction : CoverCallExpressionAndAsyncArrowHead => AsyncConciseBody +the interpretation of CoverCallExpressionAndAsyncArrowHead is refined using the following grammar: + +AsyncArrowHead : + async [no LineTerminator here] ArrowFormalParameters[~Yield, +Await] +``` + +These definitions define: + +```javascript +async (a, b, c); // CallExpression +async (a, b, c) => {} // AsyncArrowFunction +^^^^^^^^^^^^^^^ CoverCallExpressionAndAsyncArrowHead +``` + +This looks strange because `async` is not a keyword. The first `async` is a function name. + +--- + +#### CoverInitializedName + +``` +13.2.5 Object Initializer + +ObjectLiteral[Yield, Await] : + ... + +PropertyDefinition[Yield, Await] : + CoverInitializedName[?Yield, ?Await] + +Note 3: In certain contexts, ObjectLiteral is used as a cover grammar for a more restricted secondary grammar. +The CoverInitializedName production is necessary to fully cover these secondary grammars. However, use of this production results in an early Syntax Error in normal contexts where an actual ObjectLiteral is expected. + +13.2.5.1 Static Semantics: Early Errors + +In addition to describing an actual object initializer the ObjectLiteral productions are also used as a cover grammar for ObjectAssignmentPattern and may be recognized as part of a CoverParenthesizedExpressionAndArrowParameterList. When ObjectLiteral appears in a context where ObjectAssignmentPattern is required the following Early Error rules are not applied. In addition, they are not applied when initially parsing a CoverParenthesizedExpressionAndArrowParameterList or CoverCallExpressionAndAsyncArrowHead. + +PropertyDefinition : CoverInitializedName + I* t is a Syntax Error if any source text is matched by this production. +``` + +``` +13.15.1 Static Semantics: Early Errors + +AssignmentExpression : LeftHandSideExpression = AssignmentExpression +If LeftHandSideExpression is an ObjectLiteral or an ArrayLiteral, the following Early Error rules are applied: + * LeftHandSideExpression must cover an AssignmentPattern. +``` + +These definitions define: + +```javascript +({ prop = value } = {}); // ObjectAssignmentPattern +({ prop = value }); // ObjectLiteral with SyntaxError +``` + +Parsers need to parse `ObjectLiteral` with `CoverInitializedName`, +and throw the syntax error if it does not reach `=` for `ObjectAssignmentPattern`. + +As an exercise, which one of the following `=` should throw a syntax error? + +```javascript +let { x = 1 } = { x = 1 } = { x = 1 } +``` diff --git a/src/ja/docs/learn/ecmascript/spec.md b/src/ja/docs/learn/ecmascript/spec.md new file mode 100644 index 0000000000..e418a5df4a --- /dev/null +++ b/src/ja/docs/learn/ecmascript/spec.md @@ -0,0 +1,146 @@ +--- +title: Specification +outline: deep +--- + +[The ECMAScript® 2023 Language Specification](https://tc39.es/ecma262/) details everything about the JavaScript language, so anyone can implement their own JavaScript engine. + +The following chapters need to be studied for our parser: + +- Chapter 5: Notational Conventions +- Chapter 11: ECMAScript Language: Source Text +- Chapter 12: ECMAScript Language: Lexical Grammar +- Chapter 13 - 16: Expressions, Statements, Functions, Classes, Scripts and Modules +- Annex B: Additional ECMAScript Features for Web Browsers +- Annex C: The Strict Mode of ECMAScript + +For navigation inside the specification: + +- Anything clickable has a permanent link, they are shown on the URL as anchors, for example `#sec-identifiers` +- Hovering over things may show a tooltip, clicking on `References` shows all its references + +## Notational Conventions + +[Chapter 5.1.5 Grammar Notation](https://tc39.es/ecma262/#sec-grammar-notation) is the section we need to read. + +The things to note here are: + +### Recursion + +This is how lists are presented in the grammar. + +``` +ArgumentList : + AssignmentExpression + ArgumentList , AssignmentExpression +``` + +means + +```javascript +a, b = 1, c = 2 +^_____________^ ArgumentList + ^__________^ ArgumentList, AssignmentExpression, + ^___^ AssignmentExpression +``` + +### Optional + +The `_opt_` suffix for optional syntax. For example, + +``` +VariableDeclaration : + BindingIdentifier Initializer_opt +``` + +means + +```javascript +var binding_identifier; +var binding_identifier = Initializer; + ______________ Initializer_opt +``` + +### Parameters + +The `[Return]` and `[In]` are parameters of the grammar. + +For example + +``` +ScriptBody : + StatementList[~Yield, ~Await, ~Return] +``` + +means top-level yield, await and return are not allowed in scripts, but + +``` +ModuleItem : + ImportDeclaration + ExportDeclaration + StatementListItem[~Yield, +Await, ~Return] +``` + +allows for top-level await. + +## Source Text + +[Chapter 11.2 Types of Source Code](https://tc39.es/ecma262/#sec-types-of-source-code) tells us that +there is a huge distinction between script code and module code. +And there is a `use strict` mode that makes the grammar saner by disallowing old JavaScript behaviors. + +**Script Code** is not strict, `use strict` need to be inserted at the top of the file to make script code strict. +In html we write ``. + +**Module Code** is automatically strict. +In html we write ``. + +## ECMAScript Language: Lexical Grammar + +For more in-depth explanation, read the V8 blog on [Understanding the ECMAScript spec](https://v8.dev/blog/understanding-ecmascript-part-3). + +### [Automatic Semicolon Insertion](https://tc39.es/ecma262/#sec-automatic-semicolon-insertion) + +This section describes all the rules where we can omit a semicolon while writing JavaScript. +All the explanation boils down to + +```rust + pub fn asi(&mut self) -> Result<()> { + if self.eat(Kind::Semicolon) || self.can_insert_semicolon() { + return Ok(()); + } + let range = self.prev_node_end..self.cur_token().start; + Err(SyntaxError::AutoSemicolonInsertion(range.into())) + } + + pub const fn can_insert_semicolon(&self) -> bool { + self.cur_token().is_on_new_line || matches!(self.cur_kind(), Kind::RCurly | Kind::Eof) + } +``` + +The `asi` function need to be manually called where applicable, for example in the end of statement: + +```rust + fn parse_debugger_statement(&mut self) -> Result> { + let node = self.start_node(); + self.expect(Kind::Debugger)?; + // highlight-next-line + self.asi()?; + self.ast.debugger_statement(self.finish_node(node)) + } +``` + +:::info + +This section on asi is written with a parser in mind, +it explicitly states that the source text is parsed from left to right, +which makes it almost impossible to write the parser in any other way. +The author of jsparagus made a rant about this [here](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/js-quirks.md#automatic-semicolon-insertion-). + +> The specification for this feature is both very-high-level and weirdly procedural (“When, as the source text is parsed from left to right, a token is encountered...”, as if the specification is telling a story about a browser. As far as I know, this is the only place in the spec where anything is assumed or implied about the internal implementation details of parsing.) But it would be hard to specify ASI any other way. + +::: + +## Expressions, Statements, Functions, Classes, Scripts and Modules + +It takes a while to understand the syntactic grammar, then apply them to writing a parser. diff --git a/src/ja/docs/learn/ecosystem.md b/src/ja/docs/learn/ecosystem.md new file mode 100644 index 0000000000..4182f937eb --- /dev/null +++ b/src/ja/docs/learn/ecosystem.md @@ -0,0 +1,25 @@ +--- +title: Ecosystem +outline: deep +--- + +Contributing to oxc will benefit the whole frontend development community. +It would be a great pleasure if you could join as well. + +We are being integrated into: + +## [Rspack](https://www.rspack.dev) + +A fast Rust-based web bundler. + +## Rolldown + +To be announced. + +## [Ezno Type Checker](https://github.com/kaleidawave/ezno) + +A JavaScript compiler and TypeScript checker written in Rust with a focus on static analysis and runtime performance. + +## [Tyvm](https://github.com/zackradisic/tyvm) + +An experimental bytecode interpreter for type-level Typescript. diff --git a/src/ja/docs/learn/performance.md b/src/ja/docs/learn/performance.md new file mode 100644 index 0000000000..79f64e18d0 --- /dev/null +++ b/src/ja/docs/learn/performance.md @@ -0,0 +1,719 @@ +--- +title: Performance +outline: deep +--- + +# Pursuit of Performance on Building a JavaScript Compiler + +Originally posted on https://rustmagazine.org/issue-3/javascript-compiler/ + +## On Performance + +After two years of writing Rust, performance has become an ingrained discipline for me - it boils down to +**allocate less memory** and **use fewer CPU cycles**. + +However, achieving optimal performance can be difficult without the knowledge of the problem domain or awareness of potential solutions. + +I will take you on my journey of performance and optimization in the following sections. +My preferred method of learning is through a combination of research, trial, and error, +so the following sections will be organized as such. + +# Parsing + +Oxc is a standard compiler that includes an abstract syntax tree (AST), a lexer, and a recursive descent parser. + +## Abstract Syntax Tree (AST) + +The first architectural design for a compiler is its AST. + +All JavaScript tools work on the AST level, for example: + +- A linter (e.g. ESLint) checks the AST for errors +- A formatter (e.g.prettier) prints the AST back to JavaScript text +- A minifier (e.g. terser) transforms the AST +- A bundler connects all import and export statements between ASTs from different files + +It will be painful to build these tools if the AST is not user-friendly. + +For JavaScript, the most used AST specification is [estree](https://github.com/estree/estree). +My first AST version replicates estree: + +```rust +pub struct Program { + pub node: Node, + pub body: Vec, +} + +pub enum Statement { + VariableDeclarationStatement(VariableDeclaration), +} + +pub struct VariableDeclaration { + pub node: Node, + pub declarations: Vec, +} +``` + +In Rust, declaring a tree is relatively straightforward, as it involves using structs and enums. + +### Memory Allocation + +I worked on this version of AST for a couple of months while writing the parser. +And one day I decided to profile it. The profiler showed the program was spending a lot of time calling `drop`. + +💡 Nodes of the AST are allocated on the heap via `Box` or `Vec`, they are allocated individually so they are dropped in sequential order. + +Is there a solution to mitigate this? + +So while working on the parser I studied some of the other JavaScript parsers written in Rust, +mainly [ratel](https://github.com/ratel-rust/ratel-core) and [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus). + +Both of these parsers declare their AST with a lifetime annotation, + +```rust +pub enum Statement<'ast> { + Expression(ExpressionNode<'ast>), +} +``` + +and they have an accompanying file called `arena.rs`. + +I did not understand what it does so I neglected them until I started reading about their usage of memory arenas: +[bumpalo](https://docs.rs/bumpalo/latest/bumpalo/) and [toolshed](https://docs.rs/toolshed/latest/toolshed/struct.Arena.html). + +In summary, memory arena allocates memory upfront in chunks or pages and deallocate altogether when the arena is dropped. +The AST is allocated on the arena so dropping the AST is a fast operation. + +Another nice side effect that comes with this is that, +the AST is constructed in a specific order, and tree traversal also follows the same order, resulting in linear memory access during the visitation process. +This access pattern will be efficient since all nearby memory will be read into the CPU cache in pages, resulting in faster access times. + +Unfortunately it can be challenging for Rust beginners to use memory arenas because all data structures and relevant functions need to be parameterized by lifetime annotations. +It took me five attempts to allocate the AST inside `bumpalo`. + +Changing to a memory arena for the AST resulted around 20% performance improvement. + +### Enum Sizes + +Due to the recursive nature of ASTs, we need to define the types in a way to avoid the "recursive without indirection" error: + +``` +error[E0072]: recursive types `Enum` and `Variant` have infinite size + --> crates/oxc_linter/src/lib.rs:1:1 + | +1 | enum Enum { + | ^^^^^^^^^ +2 | Variant(Variant), + | ------- recursive without indirection +3 | } +4 | struct Variant { + | ^^^^^^^^^^^^^^ +5 | field: Enum, + | ---- recursive without indirection + | +help: insert some indirection (e.g., a `Box`, `Rc`, or `&`) to break the cycle + | +2 ~ Variant(Box), +3 | } +4 | struct Variant { +5 ~ field: Box, +``` + +There are two ways to do this. Either box the enum in the enum variant or box the struct field. + +I found the same question in the Rust forum back in 2017, +[Is there a better way to represent an abstract syntax tree?](https://users.rust-lang.org/t/is-there-a-better-way-to-represent-an-abstract-syntax-tree/9549/4) + +Aleksey (matklad) told us to box the enum variants to keep the `Expression` enum small. But what does this mean? + +As it turns out, the memory layout of a Rust enum is dependent on the sizes of all its variants, its total byte size dependents on the largest variant. +For example, the following enum will take up 56 bytes (1 byte for the tag, 48 bytes for the payload, and 8 bytes for alignment). + +```rust +enum Enum { + A, // 0 byte payload + B(String), // 24 byte payload + C { first: String, last: String }, // 48 byte payload +} +``` + +In a typical JavaScript AST, the `Expression` enum holds 45 variants and the `Statement` enum holds 20 variants. They take up more than 200 bytes if not boxed by enum variants. +These 200 bytes have to be passed around, and also accessed every time we do a `matches!(expr, Expression::Variant(_))` check, which is not very cache friendly for performance. + +So to make memory access efficient, it is best to box the enum variants. + +The [perf-book](https://nnethercote.github.io/perf-book/type-sizes.html) describes additional info on how to find large types. + +I also copied the test for restricting small enum sizes. + +```rust +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +#[test] +fn no_bloat_enum_sizes() { + use std::mem::size_of; + use crate::ast::*; + assert_eq!(size_of::(), 16); + assert_eq!(size_of::(), 16); + assert_eq!(size_of::(), 16); +} +``` + +Boxing the enum variants resulted around 10% speed-up. + +### Span + +Occasionally, we may not realize that a smaller memory footprint is possible until we spend some extra time examining the data structures. + +In this instance, the leaf of all AST nodes contains a small data structure called the "span", which is used for storing the byte offset from the source text and comprises two `usize`s. + +```rust +pub struct Node { + pub start: usize, + pub end: usize, +} +``` + +It was [pointed out to me](https://github.com/Boshen/oxc/pull/4#pullrequestreview-1294538874) that I can safely change `usize` to `u32` +to reduce peak memory because larger than `u32` is a 4GB file. + +Changing to `u32` improved the performance [up to 5% performance on large files](https://github.com/Boshen/oxc/pull/31). + +### Strings and Identifiers + +Inside the AST, one may attempt to use a string reference to the source text for identifier names and string literals. + +```rust +pub struct StringLiteral<'a> { + pub value: &'a str, +} + +pub struct Identifier<'a> { + pub name: &'a str, +} +``` + +But unfortunately in JavaScript, strings and identifiers can have [escape sequences](https://mathiasbynens.be/notes/javascript-escapes), +i.e. `'\251'`, `'\xA9'` and `'©'` are the same for the copyright symbol. + +This implies that we must compute the escaped values and allocate a new `String`. + +### String interning + +When there are lots of heap-allocated strings, +a technique called [string interning](https://en.wikipedia.org/wiki/String_interning) can be used to reduce total memory by storing only one copy of each distinct string value. + +[string-cache](https://crates.io/crates/string_cache) is a popular and widely used library published by the servo team. +Initially, I used the `string-cache` library for identifiers and strings in the AST. +The performance of the parser was fast in a single thread, +but when I started implementing the linter where there are multiples parser running parallel with rayon, +CPU utilization was at about 50% of all cores. + +Upon profiling, a method called `parking_lot::raw_mutex::RawMutex::lock_slow` showed up on the top of the execution time. +I did not know much about locks and multi-core programming, +but a global lock was just strange to start with, +so I decided to remove the `string-cache` library to enable full CPU utilization. + +Removing `string-cache` from the AST improved the performance of parallel parsing by about 30%. + +#### string-cache + +Half a year later, while working on another performance-critical project, +the `string-cache` library resurfaced again. It was blocking all the threads during parallel text parsing. + +I decided to study what `string-cache` does because I am +prepared this time after reading the book [Rust Atomics and Locks](https://marabos.nl/atomics/) by Mara Bos. + +Here are the +[relevant](https://github.com/servo/string-cache/blob/6c044c91bb3d8212dae931152a7895f498574f71/src/dynamic_set.rs#L41-L42) +[code](https://github.com/servo/string-cache/blob/6c044c91bb3d8212dae931152a7895f498574f71/src/atom.rs#L204) +around the lock. Please note that the code was written eight years ago in 2015. + +```rust +pub(crate) static DYNAMIC_SET: Lazy> = Lazy::new(|| { + Mutex::new({ + +// ... in another place +let ptr: std::ptr::NonNull = + DYNAMIC_SET.lock().insert(string_to_add, hash.g); +``` + +So this is straightforward. It locks the data structure `Set` every time a string is being inserted. +As this routine is called frequently within a parser, its performance is impacted negatively by synchronization. + +Now let's take a look at the [`Set` data structure](https://github.com/servo/string-cache/blob/6c044c91bb3d8212dae931152a7895f498574f71/src/dynamic_set.rs#L53-L86) +and see what it does: + +```rust +pub(crate) fn insert(&mut self, string: Cow, hash: u32) -> NonNull { + let bucket_index = (hash & BUCKET_MASK) as usize; + { + let mut ptr: Option<&mut Box> = self.buckets[bucket_index].as_mut(); + + while let Some(entry) = ptr.take() { + if entry.hash == hash && *entry.string == *string { + if entry.ref_count.fetch_add(1, SeqCst) > 0 { + return NonNull::from(&mut **entry); + } + entry.ref_count.fetch_sub(1, SeqCst); + break; + } + ptr = entry.next_in_bucket.as_mut(); + } + } + debug_assert!(mem::align_of::() >= ENTRY_ALIGNMENT); + let string = string.into_owned(); + let mut entry = Box::new(Entry { + next_in_bucket: self.buckets[bucket_index].take(), + hash, + ref_count: AtomicIsize::new(1), + string: string.into_boxed_str(), + }); + let ptr = NonNull::from(&mut *entry); + self.buckets[bucket_index] = Some(entry); + + ptr +} +``` + +It looks like it is looking for a bucket to store the string and it inserts the string if it is not in the bucket. + +💡 Is this linear probing? If this is linear probing then this `Set` is just a `HashMap` without saying it is a `HashMap`. +💡 If this is a `HashMap`, then `Mutex` is a concurrent hashmap. + +Although the solution may seem straightforward when we know what to look for, it took me a month to figure this out because I was unaware of the issue. +When it became evident that this is just a concurrent hashmap, applying the Mutex to the buckets instead of the entire hashmap was a clear and logical solution. +Within an hour of implementing this change, I submitted a pull request and was happy with the outcome 😃. + +``` +https://github.com/servo/string-cache/pull/268 +``` + +It is worth mentioning that string interning is a battlefield within the Rust community. +For the example shown in [this blog post](https://dev.to/cad97/string-interners-in-rust-797), +there are single-threaded libraries such `string-interner`, `lasso`, `lalrpop-intern`, `intaglio` and `strena`. + +Since we are parsing files in parallel, an option is to utilize a multi-threaded string interner library such as [`ustr`](https://crates.io/crates/ustr). +However, after profiling both `ustr` and the enhanced version of `string-cache`, it became apparent that the performance was still below expectations compared to the approach I am going to explain below. + +Some preliminary guesses for the sub-par performance are: + +- Hashing - the interners need to hash the string for deduplication +- Indirection - we need to read the string value from a "far away" heap, which is not cache friendly + +### String Inlining + +So we are back to the initial problem of having to allocate lots of strings. +Fortunately, there is a partial solution to this problem if we look at what kind of data we are dealing with: +short JavaScript variable names and some short strings. +There is a technique called string inlining, +where we store all of the bytes of a string on the stack. + +In essence, we want the following enum to store our string. + +```rust +enum Str { + Static(&'static str), + Inline(InlineReprensation), + Heap(String), +} +``` + +To minimize the size of the enum, `InlineRepresentation` should have the same size as `String`. + +```rust +#[cfg(all(target_arch = "x86_64", target_pointer_width = "64"))] +#[test] +fn test_size() { + use std::mem::size_of; + assert_eq!(size_of::(), size_of::()); +} +``` + +Many crates in the Rust community aim to optimize memory usage. This is yet another battlefield within the community. +The most popular ones are + +- [smol_str](https://crates.io/crates/smol_str) +- [smartstring](https://crates.io/crates/smartstring) +- [compact_str](https://crates.io/crates/compact_str) +- [flexstr](https://crates.io/crates/flexstr) + +Each of these crates have unique characteristics and approaches to achieving memory optimization, leading to a variety of trade-offs and considerations when choosing which one to use. +For example `smol_str` and `flexstr` clones are O(1). +`flexstr` can store 22 bytes, `smol_str` and `smartstring` can store 23 bytes, and `compact_str` can store 24 bytes on 64-bit systems. + +[https://fasterthanli.me](https://fasterthanli.me) has a [deep dive](https://fasterthanli.me/articles/small-strings-in-rust) on this topic. + +Changing `String` to `compact_str::CompactStr` reduced memory allocations by a large amount. + +## Lexer + +### Token + +The job of the lexer (also known as tokenizer) is to turn source text into structured data called a token. + +```rust +pub struct Token { + pub kind: Kind, +} +``` + +To make it easier to work with, a token kind is typically defined as an enum in Rust. The variants of the enums hold the corresponding data for each token. + +```rust +pub enum Kind { + // Keywords + For, + While, + ... + // Literals + String(String), + Num(f64), + ... +} +``` + +This enum currently uses 32 bytes, and a lexer often need to construct millions of this token `Kind`. +Every time it constructs a `Kind::For` or `Kind::While`, it has to allocate 32 bytes of memory on the stack. + +A clever way to improve this is to break up the enum variant to keep `Kind` to a single byte and move the values into another enum, + +```rust +pub struct Token<'a> { + pub kind: Kind, + pub value: TokenValue +} + +pub enum TokenValue { + None, + String(String), + Num(f64), +} +``` + +Since we control all the parsing code, it is our job to keep this safe by always declaring the corresponding token value to its kind. + +While a `TokenValue` of 32 bytes is already quite small, it may still have a negative impact on performance because it is allocated frequently. + +Let's take a look at the `String` type and see what we can find, by using the "go-to definition" in our code editors, +we'll go through `String` -> `Vec` -> `RawVec`: + +```rust +pub struct String { + vec: Vec, +} + +pub struct Vec { + buf: RawVec, + len: usize, +} + +pub struct RawVec { + ptr: Unique, + cap: usize, + alloc: A, +} +``` + +As advertised, a `String` is just a `Vec` of `u8`s, and a `Vec` has a length and a capacity field. +Since we are never going to mutate this string, an optimization in terms of memory usage would be to drop the cap field and use a string slice (`&str`) instead. + +```rust +pub enum TokenValue<'a> { + None, + String(&'a str), + Num(f64), +} +``` + +`TokenValue` becomes 24 bytes. + +While using a string slice instead of String in `TokenValue` would reduce memory usage, it does come with the downside of adding a lifetime annotation. +This can lead to issues with the borrow checker and the lifetime annotation will propagate to the rest of the codebase, making our code somewhat difficult to manage. +I lost the borrow checking game 8 months ago but [finally won](https://github.com/Boshen/oxc/pull/174) when I revisited this. + +When it makes sense, we can always go for the owned version of the immutable data instead of using references. +For example `Box` for `String` and `Box<[u8]>` for `Vec`. + +In summary, we can always come up with tricks to keep our data structures small, +and it will sometimes reward us performance improvement. + +### Cow + +I first encountered the term `Cow` when I was studying jsparagus's code, +it has an infrastructure called [`AutoCow`](https://github.com/mozilla-spidermonkey/jsparagus/blob/212f6bdbc2cae909e7d5cfebf36284560c3c4ef4/crates/parser/src/lexer.rs#L2256). + +I vaguely understood what the code was doing. +When a JavaScript string is being tokenized, +it allocates a new string when it encounters an escaped sequence or it returns the original string slice if it doesn't: + +```rust +fn finish(&mut self, lexer: &Lexer<'alloc>) -> &'alloc str { + match self.value.take() { + Some(arena_string) => arena_string.into_bump_str(), + None => &self.start[..self.start.len() - lexer.chars.as_str().len()], + } +} +``` + +This is clever because 99.9% of the time it will not allocate a new string because escaped strings are rare. + +But the term `Cow` or "clone-on-write smart pointer" never made sense to me. + +> The type Cow is a smart pointer providing clone-on-write functionality: it can enclose and provide immutable access to borrowed data, and clone the data lazily when mutation or ownership is required. The type is designed to work with general borrowed data via the Borrow trait. + +If you are new to Rust (like I was), then this description just doesn't help (I still don't understand what it is talking about). + +It was [pointed out to me](https://twitter.com/zack_overflow/status/1620387950264713216) that `clone-on-write` is +just a use case of this data structure. A better name should be called `RefOrOwned` because it is a type that contains either +owned data or a reference. + +### SIMD + +When I was going through the old Rust blogs, the [Announcing the Portable SIMD Project Group](https://blog.rust-lang.org/inside-rust/2020/09/29/Portable-SIMD-PG.html) +caught my attention. +I always wanted to play around with SIMD but never got the chance. +After some research, I found a use case that may apply to a parser: +[How quickly can you remove spaces from a string?](https://lemire.me/blog/2017/01/20/how-quickly-can-you-remove-spaces-from-a-string) by Daniel Lemire. +So it turns out this has been done before, in a JSON parser called RapidJSON, +which [uses SIMD to remove whitespaces](https://rapidjson.org/md_doc_internals.html#SkipwhitespaceWithSIMD). + +So eventually with the help of portable-SIMD and RapidJSON's code, +not only did I manage to [skip whitespaces](https://github.com/Boshen/oxc/pull/26), +I also managed to [skip multi-line comments](https://github.com/Boshen/oxc/pull/23) as well. + +Both changes improved the performance by a few percent. + +### Keyword match + +At the top of the performance profile, +there is a hot code path that takes about 1 - 2% of the total execution time. + +It tries to match a string to a JavaScript keyword: + +```rust +fn match_keyword(s: &str) -> Self { + match s { + "as" => As, + "do" => Do, + "if" => If, + ... + "constructor" => Constructor, + _ => Ident, + } +} +``` + +With the addition of TypeScript, there are 84 strings for us to match from. +After some research, I found a blog from V8 [Blazingly fast parsing, part 1: optimizing the scanner](https://v8.dev/blog/scanner), +it describes its [keyword matching code](https://source.chromium.org/chromium/chromium/src/+/main:v8/src/parsing/keywords-gen.h) in detail. + +> Since the list of keywords is static, we can compute a perfect hash function that for each identifier gives us at most one candidate keyword. V8 uses gperf to compute this function. The result computes a hash from the length and first two identifier characters to find the single candidate keyword. We only compare the identifier with the keyword if the length of that keyword matches the input identifier length. + +So a quick hash plus an integer comparison should be faster than 84 string comparisons. +But we tried [again](https://github.com/Boshen/oxc/pull/140) and [again](https://github.com/Boshen/oxc/pull/171) to no avail. + +As it turns out, [LLVM already optimized our code](https://github.com/Boshen/oxc/issues/151#issuecomment-1464818336). +By using `--emit=llvm-ir` on `rustc`, we find the relevant code: + +``` + switch i64 %s.1, label %bb6 [ + i64 2, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit.i" + i64 3, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit280.i" + i64 4, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit325.i" + i64 5, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit380.i" + i64 6, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit450.i" + i64 7, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit540.i" + i64 8, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit590.i" + i64 9, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit625.i" + i64 10, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit655.i" + i64 11, label %"_ZN4core5slice3cmp81_$LT$impl$u20$core..cmp..PartialEq$LT$$u5b$B$u5d$$GT$$u20$for$u20$$u5b$A$u5d$$GT$2eq17h46d405acb5da4997E.exit665.i" + ], !dbg !191362 +``` + +`%s` is the string, `%s.1` is its length ... it is branching on the string length! The compiler is smarter than us 😃. + +(Yes, we got so serious with this so we started looking at LLVM IR and assembly code.) + +Later on, [@strager](https://twitter.com/strager) posted a very educational YouTube video [Faster than Rust and C++: the PERFECT hash table ](https://www.youtube.com/watch?v=DMQ_HcNSOAI) on this topic. +The video taught us a systematic approach to reasoning about fine-tuning performance problems + +In the end, we concluded that the simple keyword match is enough for us since it was only about 1 - 2% of the performance, +and the effort is not worth it after spending a few days on it - Rust does not have all the pieces we need to build this perfect hashmap. + +## Linter + +A linter is a program that analyzes the source code for problems. + +The simplest linter visits each AST node and checks for rules. +[The visitor pattern](https://rust-unofficial.github.io/patterns/patterns/behavioural/visitor.html) can be used: + +```rust +pub trait Visit<'a>: Sized { + // ... lots of visit functions + + fn visit_debugger_statement(&mut self, stmt: &'a DebuggerStatement) { + // report error + } +} +``` + +### Parent Pointing Tree + +It is easy to go down the AST by using visitors, but what if we want to go up the tree to collect some information? + +This problem is particularly challenging to solve in Rust, because it is not possible to add a pointer to the nodes of the AST. + +Let's forget about ASTs for a second and focus on generic trees with the property of a node having a pointer to its parent. +To build a generic tree, each tree node needs to be the same type `Node`, we can reference their parent by using `Rc`: + +```rust +struct Node { + parent: Option>, +} +``` + +It is tedious to work with this pattern if we need mutation, and +it is not performant because the nodes have to be dropped at different times. + +A more efficient solution is to use a `Vec` as its backing storage and use indexes for pointers. + +```rust +struct Tree { + nodes: Vec +} + +struct Node { + parent: Option // index into `nodes` +} +``` + +[`indextree`](https://crates.io/crates/indextree) is a nice library for this task. + +Back to our AST, we can build a `indextree` by having the nodes point to an enum that wraps every single kind of AST node. +We call this the untyped AST. + +```rust +struct Node<'a> { + kind: AstKind<'a> +} + +enum AstKind<'a> { + BlockStatement(&'a BlockStatement<'a>), + // ... + ArrayExpression(&'a ArrayExpression<'a>), + // ... + Class(&'a Class<'a>), + // ... +} +``` + +The last missing piece is to have callbacks inside the visitor pattern that builds this tree. + +```rust +pub trait Visit<'a> { + fn enter_node(&mut self, _kind: AstKind<'a>) {} + fn leave_node(&mut self, _kind: AstKind<'a>) {} + + fn visit_block_statement(&mut self, stmt: &'a BlockStatement<'a>) { + let kind = AstKind::BlockStatement(stmt); + self.enter_node(kind); + self.visit_statements(&stmt.body); + self.leave_node(kind); + } +} + +impl<'a> Visit<'a> for TreeBuilder<'a> { + fn enter_node(&mut self, kind: AstKind<'a>) { + self.push_ast_node(kind); + } + + fn leave_node(&mut self, kind: AstKind<'a>) { + self.pop_ast_node(); + } +} +``` + +The final data structure becomes `indextree::Arena>` where each `Node` has a pointer to an `AstKind<'a>`. +`indextree::Node::parent` can be called to get the parent of any node. + +The nice benefit of making this parent pointing tree is that it becomes convenient to visit AST nodes without having to implement any visitors. +A linter becomes a simple loop over all the nodes inside the `indextree`: + +```rust +for node in nodes { + match node.get().kind { + AstKind::DebuggerStatement(stmt) => { + // report error + } + _ => {} + } +} +``` + +A full example is provided [here](https://github.com/Boshen/oxc/blob/main/crates/oxc_linter/examples/linter.rs). + +At first glance, this process may seem slow and inefficient. +However, visiting the typed AST through a memory arena and pushing a pointer into `indextree` are efficient linear memory access patterns. +The current benchmark indicates that this approach is 84 times faster than ESLint, so it is certainly fast enough for our purposes. + +### Processing files in parallel + +The linter uses the [ignore](https://crates.io/crates/ignore) crate for directory traversal, +it supports `.gitignore` and adds additional ignore files such as `.eslintignore`. + +A small problem with this crate is that it does not have a parallel interface, +There is no `par_iter` for `ignore::Walk::new(".")`. + +Instead, [primitives need to be used](https://github.com/Boshen/oxc/blob/b51c2df3cc43b9f7d57380acc1552fac7db75fab/crates/oxc_cli/src/lint/runner.rs#L116-L139) + +```rust +let walk = Walk::new(&self.options); +rayon::spawn(move || { + walk.iter().for_each(|path| { + tx_path.send(path).unwrap(); + }); +}); + +let linter = Arc::clone(&self.linter); +rayon::spawn(move || { + while let Ok(path) = rx_path.recv() { + let tx_error = tx_error.clone(); + let linter = Arc::clone(&linter); + rayon::spawn(move || { + if let Some(diagnostics) = Self::lint_path(&linter, &path) { + tx_error.send(diagnostics).unwrap(); + } + drop(tx_error); + }); + } +}); +``` + +This unlocks a useful feature where we can print all diagnostics in a single thread, which leads us to the final topic of this article. + +### Printing is slow + +Printing the diagnostics was fast, but I have been working on this project for so long that it felt like an eternity to print thousands of diagnostic messages every time I run the linter on huge monorepos. +So I started searching through the Rust GitHub issues and eventually found the relevant ones: + +- [io::Stdout should use block buffering when appropriate](https://github.com/rust-lang/rust/issues/60673) +- [stdin and stdout performance considerations are not documented](https://github.com/rust-lang/rust/issues/106133) + +In summary, a `println!` call will lock `stdout` every time it encounters a newline, this is called line buffering. +To make things print faster, we need to opt-in for block buffering which is [documented here](https://rust-cli.github.io/book/tutorial/output.html#a-note-on-printing-performance). + +```rust +use std::io::{self, Write}; + +let stdout = io::stdout(); // get the global stdout entity +let mut handle = io::BufWriter::new(stdout); // optional: wrap that handle in a buffer +writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here +``` + +Or acquire the lock on stdout. + +```rust +let stdout = io::stdout(); // get the global stdout entity +let mut handle = stdout.lock(); // acquire a lock on it +writeln!(handle, "foo: {}", 42); // add `?` if you care about errors here +``` diff --git a/src/ja/docs/learn/references.md b/src/ja/docs/learn/references.md new file mode 100644 index 0000000000..f891b0231f --- /dev/null +++ b/src/ja/docs/learn/references.md @@ -0,0 +1,50 @@ +--- +title: References +outline: deep +--- + +# References + +## Projects Using Oxc + +## 📚 Learning Resources + +- My small tutorial on [how to write a JavaScript Parser in Rust](https://oxc-project.github.io/javascript-parser-in-rust) +- My small article [Pursuit of Performance on Building a JavaScript Compiler](https://rustmagazine.org/issue-3/javascript-compiler/) +- [Crafting Interpreters](https://craftinginterpreters.com) +- [Andrew Kelley - Practical DOD](https://vimeo.com/649009599) + +## Parsers (in active development) + +- Rust + - [swc](https://swc.rs) + - [Biome](https://biomejs.dev) + - [jsparagus](https://github.com/mozilla-spidermonkey/jsparagus) + - [ratel](https://github.com/ratel-rust/ratel-core) + - [boa](https://github.com/lastmjs/boa-azle) +- JavaScript + - [acorn](https://github.com/acornjs/acorn) + - [babel](https://babeljs.io) +- Go + - [esbuild](https://esbuild.github.io) +- C++ + - [quick-lint-js](https://github.com/quick-lint/quick-lint-js) + +## AST + +- [estree](https://github.com/estree/estree) +- [swc_ecma_ast](https://github.com/swc-project/swc/tree/main/crates/swc_ecma_ast/src) +- [babel ast](https://github.com/babel/babel/blob/main/packages/babel-types/src/ast-types/generated/index.ts) +- [jsparagus](https://gist.github.com/Boshen/0b481a058cd715576aaf1624d2c6d469) + +## Rust + +- [The Rust Performance Book](https://nnethercote.github.io/perf-book/introduction.html) + +## Blog Posts + +- [Blazingly fast parsing, part 1: optimizing the scanner](https://v8.dev/blog/scanner) +- [Blazingly fast parsing, part 2: lazy parsing](https://v8.dev/blog/preparser) +- [Understanding ECMAScript](https://v8.dev/blog/tags/understanding-ecmascript) +- [Simple but Powerful Pratt Parsing](https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html) +- [JS syntactic quirks](https://github.com/mozilla-spidermonkey/jsparagus/blob/master/js-quirks.md) diff --git a/src/ja/index.md b/src/ja/index.md new file mode 100644 index 0000000000..228b867457 --- /dev/null +++ b/src/ja/index.md @@ -0,0 +1,45 @@ +--- +layout: home +hero: + name: "The JavaScript Oxidation Compiler" + tagline: Rust製JavaScriptツール群 + image: + src: https://raw.githubusercontent.com/oxc-project/oxc-assets/main/logo-round.png + alt: The JavaScript Oxidation Compiler + actions: + - theme: brand + text: はじめる + link: /ja/docs/guide/introduction + - theme: alt + text: GitHubで見る + link: https://github.com/oxc-project/oxc +features: + - title: パーサ ✅ + details: SWCの2倍速 + link: /ja/docs/guide/usage/parser + linkText: さらに学ぶ + - title: リンタ ✅ + details: ESLintの50~100倍速 + link: /ja/docs/guide/usage/linter + linkText: さらに学ぶ + - title: リザルバ ✅ + details: enhanced-resolveの28倍速 + link: /ja/docs/guide/usage/resolver + linkText: さらに学ぶ + - title: フォーマッタ 🚧 + details: Prettier互換 + - title: トランスフォーマ 🚧 + details: Babel互換 + - title: ミニファイア 🚧 + details: より早く効率的な圧縮 + - title: Rspack ✅ + details: Webpack互換 + link: https://rspack.dev + external: true + - title: Rolldown + details: Rust製Rollup + - title: Ezno Type Checker + details: 新しいTypeScript型チェッカー + link: https://github.com/kaleidawave/ezno + external: true +--- From 7b221ed6dfae97623d66676efd60b12b9dc5bdbc Mon Sep 17 00:00:00 2001 From: Nozomu Ikuta <16436160+NozomuIkuta@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:01:53 +0900 Subject: [PATCH 2/3] fix: undo translation of some words --- .vitepress/config.mts | 18 +++++++++--------- src/ja/index.md | 14 +++++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.vitepress/config.mts b/.vitepress/config.mts index 7c6661abf6..8c274d1aac 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -280,9 +280,9 @@ export default defineConfig({ { text: "使いかた", items: [ - { text: "リンタ", link: "/ja/docs/guide/usage/linter" }, - { text: "パーサ", link: "/ja/docs/guide/usage/parser" }, - { text: "リザルバ", link: "/ja/docs/guide/usage/resolver" }, + { text: "Linter", link: "/ja/docs/guide/usage/linter" }, + { text: "Parser", link: "/ja/docs/guide/usage/parser" }, + { text: "Resolver", link: "/ja/docs/guide/usage/resolver" }, ], }, ], @@ -295,27 +295,27 @@ export default defineConfig({ link: "/ja/docs/learn/architecture/introduction", }, { - text: "パーサ", + text: "Parser", link: "/ja/docs/learn/architecture/parser", }, { - text: "リンタ", + text: "Linter", link: "/ja/docs/learn/architecture/linter", }, { - text: "リザルバ", + text: "Resolver", link: "/ja/docs/learn/architecture/resolver", }, { - text: "トランスフォーマ", + text: "Transformer", link: "/ja/docs/learn/architecture/transformer", }, { - text: "フォーマッタ", + text: "Formatter", link: "/ja/docs/learn/architecture/formatter", }, { - text: "モディファイア", + text: "Modifier", link: "/ja/docs/learn/architecture/minifier", }, ], diff --git a/src/ja/index.md b/src/ja/index.md index 228b867457..949fe73121 100644 --- a/src/ja/index.md +++ b/src/ja/index.md @@ -2,7 +2,7 @@ layout: home hero: name: "The JavaScript Oxidation Compiler" - tagline: Rust製JavaScriptツール群 + tagline: Rust製JavaScriptツールコレクション image: src: https://raw.githubusercontent.com/oxc-project/oxc-assets/main/logo-round.png alt: The JavaScript Oxidation Compiler @@ -14,23 +14,23 @@ hero: text: GitHubで見る link: https://github.com/oxc-project/oxc features: - - title: パーサ ✅ + - title: Parser ✅ details: SWCの2倍速 link: /ja/docs/guide/usage/parser linkText: さらに学ぶ - - title: リンタ ✅ + - title: Linter ✅ details: ESLintの50~100倍速 link: /ja/docs/guide/usage/linter linkText: さらに学ぶ - - title: リザルバ ✅ + - title: Resolver ✅ details: enhanced-resolveの28倍速 link: /ja/docs/guide/usage/resolver linkText: さらに学ぶ - - title: フォーマッタ 🚧 + - title: Formatter 🚧 details: Prettier互換 - - title: トランスフォーマ 🚧 + - title: Transformer 🚧 details: Babel互換 - - title: ミニファイア 🚧 + - title: Minifier 🚧 details: より早く効率的な圧縮 - title: Rspack ✅ details: Webpack互換 From 0245076cb2b333005a94df585505a41f31c685a3 Mon Sep 17 00:00:00 2001 From: Nozomu Ikuta <16436160+NozomuIkuta@users.noreply.github.com> Date: Mon, 27 Nov 2023 11:19:40 +0900 Subject: [PATCH 3/3] chore: separate locale configs --- .vitepress/config.en.mts | 125 +++++++++++++++++ .vitepress/config.ja.mts | 161 ++++++++++++++++++++++ .vitepress/config.mts | 281 +-------------------------------------- 3 files changed, 290 insertions(+), 277 deletions(-) create mode 100644 .vitepress/config.en.mts create mode 100644 .vitepress/config.ja.mts diff --git a/.vitepress/config.en.mts b/.vitepress/config.en.mts new file mode 100644 index 0000000000..ccdf7ae230 --- /dev/null +++ b/.vitepress/config.en.mts @@ -0,0 +1,125 @@ +import type { DefaultTheme, LocaleConfig } from "vitepress"; +import BLOG_SIDEBAR from "./sidebar.blog.json"; + +export const EN_LOCALE_CONFIG: LocaleConfig = { + root: { + label: "English", + lang: "en", + themeConfig: { + nav: [ + { text: "Guide", link: "/docs/guide/introduction" }, + { text: "Learn", link: "/docs/learn/architecture/introduction" }, + { text: "Contribute", link: "/docs/contribute/introduction" }, + { text: "Blog", link: "/blog/2022-02-10-js-tooling-research" }, + { + text: "Playground", + target: "_blank", + link: "https://oxc-project.github.io/oxc/playground/", + }, + ], + sidebar: { + "/docs/guide/": [ + { + text: "Getting Started", + items: [ + { text: "Introduction", link: "/docs/guide/introduction" }, + { text: "Benchmarks", link: "/docs/guide/benchmarks" }, + ], + }, + { + text: "Usage", + items: [ + { text: "Linter", link: "/docs/guide/usage/linter" }, + { text: "Parser", link: "/docs/guide/usage/parser" }, + { text: "Resolver", link: "/docs/guide/usage/resolver" }, + ], + }, + ], + "/docs/learn/": [ + { + text: "Architecture", + items: [ + { + text: "Introduction", + link: "/docs/learn/architecture/introduction", + }, + { + text: "Parser", + link: "/docs/learn/architecture/parser", + }, + { + text: "Linter", + link: "/docs/learn/architecture/linter", + }, + { + text: "Resolver", + link: "/docs/learn/architecture/resolver", + }, + { + text: "Transformer", + link: "/docs/learn/architecture/transformer", + }, + { + text: "Formatter", + link: "/docs/learn/architecture/formatter", + }, + { + text: "Minifier", + link: "/docs/learn/architecture/minifier", + }, + ], + }, + { + text: "ECMAScript", + items: [ + { text: "Spec", link: "/docs/learn/ecmascript/spec" }, + { + text: "Grammar", + link: "/docs/learn/ecmascript/grammar", + }, + ], + }, + { text: "Performance", link: "/docs/learn/performance" }, + { text: "Ecosystem", link: "/docs/learn/ecosystem" }, + { text: "References", link: "/docs/learn/references" }, + ], + "/docs/contribute/": [ + { + text: "Contributing Guide", + items: [ + { text: "Introduction", link: "/docs/contribute/introduction" }, + { text: "Development", link: "/docs/contribute/development" }, + ], + }, + { + text: "Domain", + items: [ + { text: "Parser", link: "/docs/contribute/parser" }, + { text: "Linter", link: "/docs/contribute/linter" }, + { text: "Prettier", link: "/docs/contribute/prettier" }, + { text: "Resolver", link: "/docs/contribute/resolver" }, + { + text: "Transformer", + link: "/docs/contribute/transformer", + }, + { text: "Formatter", link: "/docs/contribute/formatter" }, + { text: "Codegen", link: "/docs/contribute/codegen" }, + { text: "Minifier", link: "/docs/contribute/minifier" }, + { text: "VSCode", link: "/docs/contribute/vscode" }, + ], + }, + { text: "Performance", link: "/docs/contribute/performance" }, + { text: "Showcase", link: "/docs/contribute/showcase" }, + ], + "/blog/": BLOG_SIDEBAR, + }, + editLink: { + pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", + text: "Edit this page", + }, + footer: { + copyright: "© 2023 OXC Project", + }, + }, + }, +}; diff --git a/.vitepress/config.ja.mts b/.vitepress/config.ja.mts new file mode 100644 index 0000000000..35b527e548 --- /dev/null +++ b/.vitepress/config.ja.mts @@ -0,0 +1,161 @@ +import type { DefaultTheme, LocaleConfig } from "vitepress"; +import BLOG_SIDEBAR from "./sidebar.blog.json"; + +export const JA_LOCALE_CONFIG: LocaleConfig = { + ja: { + label: "日本語", + lang: "ja", + themeConfig: { + search: { + provider: "local", + options: { + locales: { + ja: { + translations: { + button: { + buttonText: "検索する", + buttonAriaLabel: "検索する", + }, + modal: { + noResultsText: "見つかりませんでした", + resetButtonTitle: "リセットする", + footer: { + selectText: "選ぶ", + navigateText: "切り替える", + closeText: "閉じる", + }, + }, + }, + }, + }, + }, + }, + nav: [ + { text: "ガイド", link: "/ja/docs/guide/introduction" }, + { text: "学ぶ", link: "/ja/docs/learn/architecture/introduction" }, + { text: "貢献", link: "/ja/docs/contribute/introduction" }, + { text: "ブログ", link: "/ja/blog/2022-02-10-js-tooling-research" }, + { + text: "プレイグラウンド", + target: "_blank", + link: "https://oxc-project.github.io/oxc/playground/", + }, + ], + sidebar: { + "/ja/docs/guide/": [ + { + text: "はじめる", + items: [ + { + text: "イントロダクション", + link: "/ja/docs/guide/introduction", + }, + { text: "ベンチマーク", link: "/ja/docs/guide/benchmarks" }, + ], + }, + { + text: "使いかた", + items: [ + { text: "Linter", link: "/ja/docs/guide/usage/linter" }, + { text: "Parser", link: "/ja/docs/guide/usage/parser" }, + { text: "Resolver", link: "/ja/docs/guide/usage/resolver" }, + ], + }, + ], + "/ja/docs/learn/": [ + { + text: "アーキテクチャ", + items: [ + { + text: "イントロダクション", + link: "/ja/docs/learn/architecture/introduction", + }, + { + text: "Parser", + link: "/ja/docs/learn/architecture/parser", + }, + { + text: "Linter", + link: "/ja/docs/learn/architecture/linter", + }, + { + text: "Resolver", + link: "/ja/docs/learn/architecture/resolver", + }, + { + text: "Transformer", + link: "/ja/docs/learn/architecture/transformer", + }, + { + text: "Formatter", + link: "/ja/docs/learn/architecture/formatter", + }, + { + text: "Modifier", + link: "/ja/docs/learn/architecture/minifier", + }, + ], + }, + { + text: "ECMAScript", + items: [ + { text: "仕様", link: "/ja/docs/learn/ecmascript/spec" }, + { + text: "文法", + link: "/ja/docs/learn/ecmascript/grammar", + }, + ], + }, + { text: "パフォーマンス", link: "/ja/docs/learn/performance" }, + { text: "エコシステム", link: "/ja/docs/learn/ecosystem" }, + { text: "参考文献", link: "/ja/docs/learn/references" }, + ], + "/ja/docs/contribute/": [ + { + text: "Contributing Guide", + items: [ + { + text: "Introduction", + link: "/ja/docs/contribute/introduction", + }, + { + text: "Development", + link: "/ja/docs/contribute/development", + }, + ], + }, + { + text: "Domain", + items: [ + { text: "Parser", link: "/ja/docs/contribute/parser" }, + { text: "Linter", link: "/ja/docs/contribute/linter" }, + { text: "Prettier", link: "/ja/docs/contribute/prettier" }, + { text: "Resolver", link: "/ja/docs/contribute/resolver" }, + { + text: "Transformer", + link: "/ja/docs/contribute/transformer", + }, + { text: "Formatter", link: "/ja/docs/contribute/formatter" }, + { text: "Codegen", link: "/ja/docs/contribute/codegen" }, + { text: "Minifier", link: "/ja/docs/contribute/minifier" }, + { text: "VSCode", link: "/ja/docs/contribute/vscode" }, + ], + }, + { text: "Performance", link: "/ja/docs/contribute/performance" }, + { text: "Showcase", link: "/ja/docs/contribute/showcase" }, + ], + "/ja/blog/": BLOG_SIDEBAR.map(({ text, link }) => ({ + text, + link: `/ja${link}`, + })), + }, + editLink: { + pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", + text: "このページを編集する", + }, + footer: { + copyright: "© 2023 OXC プロジェクト", + }, + }, + }, +}; diff --git a/.vitepress/config.mts b/.vitepress/config.mts index 8c274d1aac..5387512517 100644 --- a/.vitepress/config.mts +++ b/.vitepress/config.mts @@ -1,7 +1,8 @@ import { dirname } from "node:path"; import { fileURLToPath } from "node:url"; import { defineConfig } from "vitepress"; -import BLOG_SIDEBAR from "./sidebar.blog.json"; +import { EN_LOCALE_CONFIG } from "./config.en.mjs"; +import { JA_LOCALE_CONFIG } from "./config.ja.mjs"; export default defineConfig({ srcDir: "src", @@ -106,282 +107,8 @@ export default defineConfig({ }, }, locales: { - root: { - label: "English", - lang: "en", - themeConfig: { - nav: [ - { text: "Guide", link: "/docs/guide/introduction" }, - { text: "Learn", link: "/docs/learn/architecture/introduction" }, - { text: "Contribute", link: "/docs/contribute/introduction" }, - { text: "Blog", link: "/blog/2022-02-10-js-tooling-research" }, - { - text: "Playground", - target: "_blank", - link: "https://oxc-project.github.io/oxc/playground/", - }, - ], - sidebar: { - "/docs/guide/": [ - { - text: "Getting Started", - items: [ - { text: "Introduction", link: "/docs/guide/introduction" }, - { text: "Benchmarks", link: "/docs/guide/benchmarks" }, - ], - }, - { - text: "Usage", - items: [ - { text: "Linter", link: "/docs/guide/usage/linter" }, - { text: "Parser", link: "/docs/guide/usage/parser" }, - { text: "Resolver", link: "/docs/guide/usage/resolver" }, - ], - }, - ], - "/docs/learn/": [ - { - text: "Architecture", - items: [ - { - text: "Introduction", - link: "/docs/learn/architecture/introduction", - }, - { - text: "Parser", - link: "/docs/learn/architecture/parser", - }, - { - text: "Linter", - link: "/docs/learn/architecture/linter", - }, - { - text: "Resolver", - link: "/docs/learn/architecture/resolver", - }, - { - text: "Transformer", - link: "/docs/learn/architecture/transformer", - }, - { - text: "Formatter", - link: "/docs/learn/architecture/formatter", - }, - { - text: "Minifier", - link: "/docs/learn/architecture/minifier", - }, - ], - }, - { - text: "ECMAScript", - items: [ - { text: "Spec", link: "/docs/learn/ecmascript/spec" }, - { - text: "Grammar", - link: "/docs/learn/ecmascript/grammar", - }, - ], - }, - { text: "Performance", link: "/docs/learn/performance" }, - { text: "Ecosystem", link: "/docs/learn/ecosystem" }, - { text: "References", link: "/docs/learn/references" }, - ], - "/docs/contribute/": [ - { - text: "Contributing Guide", - items: [ - { text: "Introduction", link: "/docs/contribute/introduction" }, - { text: "Development", link: "/docs/contribute/development" }, - ], - }, - { - text: "Domain", - items: [ - { text: "Parser", link: "/docs/contribute/parser" }, - { text: "Linter", link: "/docs/contribute/linter" }, - { text: "Prettier", link: "/docs/contribute/prettier" }, - { text: "Resolver", link: "/docs/contribute/resolver" }, - { - text: "Transformer", - link: "/docs/contribute/transformer", - }, - { text: "Formatter", link: "/docs/contribute/formatter" }, - { text: "Codegen", link: "/docs/contribute/codegen" }, - { text: "Minifier", link: "/docs/contribute/minifier" }, - { text: "VSCode", link: "/docs/contribute/vscode" }, - ], - }, - { text: "Performance", link: "/docs/contribute/performance" }, - { text: "Showcase", link: "/docs/contribute/showcase" }, - ], - "/blog/": BLOG_SIDEBAR, - }, - editLink: { - pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", - text: "Edit this page", - }, - footer: { - copyright: "© 2023 OXC Project", - }, - }, - }, - ja: { - label: "日本語", - lang: "ja", - themeConfig: { - search: { - provider: "local", - options: { - locales: { - ja: { - translations: { - button: { - buttonText: "検索する", - buttonAriaLabel: "検索する", - }, - modal: { - noResultsText: "見つかりませんでした", - resetButtonTitle: "リセットする", - footer: { - selectText: "選ぶ", - navigateText: "切り替える", - closeText: "閉じる", - }, - }, - }, - }, - }, - }, - }, - nav: [ - { text: "ガイド", link: "/ja/docs/guide/introduction" }, - { text: "学ぶ", link: "/ja/docs/learn/architecture/introduction" }, - { text: "貢献", link: "/ja/docs/contribute/introduction" }, - { text: "ブログ", link: "/ja/blog/2022-02-10-js-tooling-research" }, - { - text: "プレイグラウンド", - target: "_blank", - link: "https://oxc-project.github.io/oxc/playground/", - }, - ], - sidebar: { - "/ja/docs/guide/": [ - { - text: "はじめる", - items: [ - { - text: "イントロダクション", - link: "/ja/docs/guide/introduction", - }, - { text: "ベンチマーク", link: "/ja/docs/guide/benchmarks" }, - ], - }, - { - text: "使いかた", - items: [ - { text: "Linter", link: "/ja/docs/guide/usage/linter" }, - { text: "Parser", link: "/ja/docs/guide/usage/parser" }, - { text: "Resolver", link: "/ja/docs/guide/usage/resolver" }, - ], - }, - ], - "/ja/docs/learn/": [ - { - text: "アーキテクチャ", - items: [ - { - text: "イントロダクション", - link: "/ja/docs/learn/architecture/introduction", - }, - { - text: "Parser", - link: "/ja/docs/learn/architecture/parser", - }, - { - text: "Linter", - link: "/ja/docs/learn/architecture/linter", - }, - { - text: "Resolver", - link: "/ja/docs/learn/architecture/resolver", - }, - { - text: "Transformer", - link: "/ja/docs/learn/architecture/transformer", - }, - { - text: "Formatter", - link: "/ja/docs/learn/architecture/formatter", - }, - { - text: "Modifier", - link: "/ja/docs/learn/architecture/minifier", - }, - ], - }, - { - text: "ECMAScript", - items: [ - { text: "仕様", link: "/ja/docs/learn/ecmascript/spec" }, - { - text: "文法", - link: "/ja/docs/learn/ecmascript/grammar", - }, - ], - }, - { text: "パフォーマンス", link: "/ja/docs/learn/performance" }, - { text: "エコシステム", link: "/ja/docs/learn/ecosystem" }, - { text: "参考文献", link: "/ja/docs/learn/references" }, - ], - "/ja/docs/contribute/": [ - { - text: "Contributing Guide", - items: [ - { - text: "Introduction", - link: "/ja/docs/contribute/introduction", - }, - { - text: "Development", - link: "/ja/docs/contribute/development", - }, - ], - }, - { - text: "Domain", - items: [ - { text: "Parser", link: "/ja/docs/contribute/parser" }, - { text: "Linter", link: "/ja/docs/contribute/linter" }, - { text: "Prettier", link: "/ja/docs/contribute/prettier" }, - { text: "Resolver", link: "/ja/docs/contribute/resolver" }, - { - text: "Transformer", - link: "/ja/docs/contribute/transformer", - }, - { text: "Formatter", link: "/ja/docs/contribute/formatter" }, - { text: "Codegen", link: "/ja/docs/contribute/codegen" }, - { text: "Minifier", link: "/ja/docs/contribute/minifier" }, - { text: "VSCode", link: "/ja/docs/contribute/vscode" }, - ], - }, - { text: "Performance", link: "/ja/docs/contribute/performance" }, - { text: "Showcase", link: "/ja/docs/contribute/showcase" }, - ], - "/ja/blog/": BLOG_SIDEBAR.map(({ text, link }) => ({ - text, - link: `/ja${link}`, - })), - }, - editLink: { - pattern: "https://github.com/oxc-project/oxc/edit/main/src/:path", - text: "このページを編集する", - }, - footer: { - copyright: "© 2023 OXC プロジェクト", - }, - }, - }, + ...EN_LOCALE_CONFIG, + ...JA_LOCALE_CONFIG, }, vite: { resolve: {