Skip to content

Commit 576be20

Browse files
committed
feat(linter/plugins): support selectors DSL (#14435)
Implement support for selectors DSL in Oxlint JS plugins. The implementation is based on ESLint's implementation. Like ESLint, it uses [esquery](https://www.npmjs.com/package/esquery) as the underlying selector parser and matcher. One detail is not implemented. ESLint ensures that visit functions are called in order of their "specificity" with the most specific called first. This implementation just calls them in the order they're defined. This is a shortcoming and will need to be fixed in future, but I imagine that in practice visitation order makes no difference for the vast majority of rules. I've made what I hope are some optimizations versus ESLint's version. Notably: 1. Making simple selectors which unconditionally visit a subset of nodes (e.g. `:matches(FunctionDeclaration, FunctionExpression)` bypass the selector match process - as it'd always return `true` anyway. 2. Faster implementations of union and intersection.
1 parent ccdb5ea commit 576be20

File tree

13 files changed

+785
-20
lines changed

13 files changed

+785
-20
lines changed

apps/oxlint/package.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,10 @@
3636
"dist"
3737
],
3838
"devDependencies": {
39+
"@types/esquery": "^1.5.4",
40+
"@types/estree": "^1.0.8",
3941
"eslint": "^9.36.0",
42+
"esquery": "^1.6.0",
4043
"execa": "^9.6.0",
4144
"jiti": "^2.6.0",
4245
"tsdown": "^0.15.5",

apps/oxlint/src-js/generated/type_ids.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ export const NODE_TYPE_IDS_MAP = new Map([
174174

175175
export const NODE_TYPES_COUNT = 165;
176176
export const LEAF_NODE_TYPES_COUNT = 27;
177+
export const FUNCTION_NODE_TYPE_IDS = [30, 55, 56];

apps/oxlint/src-js/generated/visitor.d.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,4 +380,5 @@ export interface VisitorObject {
380380
'TSTypeReference:exit'?: (node: ESTree.TSTypeReference) => void;
381381
TSUnionType?: (node: ESTree.TSUnionType) => void;
382382
'TSUnionType:exit'?: (node: ESTree.TSUnionType) => void;
383+
[key: string]: (node: ESTree.Node) => void;
383384
}
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
import esquery from 'esquery';
2+
import visitorKeys from '../generated/keys.js';
3+
import { FUNCTION_NODE_TYPE_IDS, NODE_TYPE_IDS_MAP } from '../generated/type_ids.js';
4+
// @ts-expect-error we need to generate `.d.ts` file for this module
5+
import { ancestors } from '../generated/walk.js';
6+
7+
import type { ESQueryOptions, Selector as EsquerySelector } from 'esquery';
8+
import type { Node as EsqueryNode } from 'estree';
9+
import type { Node, VisitFn } from './types.ts';
10+
11+
const ObjectKeys = Object.keys;
12+
13+
const { matches: esqueryMatches, parse: esqueryParse } = esquery;
14+
15+
type NodeTypeId = number;
16+
17+
// Options to call `esquery.matches` with.
18+
const ESQUERY_OPTIONS: ESQueryOptions = {
19+
nodeTypeKey: 'type',
20+
visitorKeys,
21+
fallback: (node: EsqueryNode) => ObjectKeys(node).filter(filterKey),
22+
matchClass: (_className: unknown, _node: EsqueryNode, _ancestors: EsqueryNode[]) => false, // TODO: Is this right?
23+
};
24+
const filterKey = (key: string) => key !== 'parent' && key !== 'range' && key !== 'loc';
25+
26+
// Parsed selector.
27+
interface Selector {
28+
// Array of IDs of types this selector matches, or `null` if selector matches all types.
29+
typeIds: NodeTypeId[] | null;
30+
// `esquery` selector object for this selector.
31+
esquerySelector: EsquerySelector;
32+
// `true` if selector applies matching beyond just filtering on node type.
33+
// * `FunctionExpression > Identifier` is complex.
34+
// * `:matches(FunctionExpression, FunctionDeclaration)` is not complex.
35+
// Primarily this exists to make simple `:matches` faster.
36+
isComplex: boolean;
37+
// Number of attributes in selector. Used for calculating selector's specificity.
38+
attributeCount: number;
39+
// Number of identifiers in selector. Used for calculating selector's specificity.
40+
identifierCount: number;
41+
}
42+
43+
// Cache of parsed `Selector`s.
44+
const cache: Map<string, Selector> = new Map([]);
45+
46+
const EMPTY_TYPE_IDS_ARRAY: NodeTypeId[] = [];
47+
48+
/**
49+
* Parse a selector string and return a `Selector` object which represents it.
50+
*
51+
* @param key - Selector string e.g. `Program > VariableDeclaration`
52+
* @returns `Selector` object
53+
*/
54+
export function parseSelector(key: string): Selector {
55+
// Used cached object if we've parsed this key before
56+
let selector = cache.get(key);
57+
if (selector !== void 0) return selector;
58+
59+
// Parse with `esquery` and analyse
60+
const esquerySelector = esqueryParse(key);
61+
62+
selector = {
63+
typeIds: null,
64+
esquerySelector,
65+
isComplex: false,
66+
attributeCount: 0,
67+
identifierCount: 0,
68+
};
69+
selector.typeIds = analyzeSelector(esquerySelector, selector);
70+
71+
// Store in cache for next time
72+
cache.set(key, selector);
73+
74+
return selector;
75+
}
76+
77+
/**
78+
* Analyse an `EsquerySelector` to determine:
79+
*
80+
* 1. What node types it matches on.
81+
* 2. Whether it is "simple" or "complex" - "simple" matches a subset of node types without further conditions.
82+
* 3. It's specificity (number of identifiers and attributes).
83+
*
84+
* This function traverses the `EsquerySelector` and calls itself recursively.
85+
* It returns an array of node type IDs which the selector may match.
86+
*
87+
* @param esquerySelector - `EsquerySelector` to analyse.
88+
* @param selector - `Selector` which has its `isSimple`, `attributeCount`, and `identifierCount` updated.
89+
* @returns Array of node type IDs the selector matches, or `null` if it matches all nodes.
90+
*/
91+
function analyzeSelector(esquerySelector: EsquerySelector, selector: Selector): NodeTypeId[] | null {
92+
switch (esquerySelector.type) {
93+
case 'identifier': {
94+
selector.identifierCount++;
95+
96+
const typeId = NODE_TYPE_IDS_MAP.get(esquerySelector.value);
97+
// If the type is invalid, just treat this selector as not matching any types.
98+
// But still increment `identifierCount`.
99+
// This matches ESLint's behavior.
100+
return typeId === void 0 ? EMPTY_TYPE_IDS_ARRAY : [typeId];
101+
}
102+
103+
case 'not':
104+
for (let i = 0, childSelectors = esquerySelector.selectors, len = childSelectors.length; i < len; i++) {
105+
analyzeSelector(childSelectors[i], selector);
106+
}
107+
selector.isComplex = true;
108+
return null;
109+
110+
case 'matches': {
111+
// OR matcher. Matches a node if any of child selectors matches it.
112+
let nodeTypes: NodeTypeId[] | null = [];
113+
for (let i = 0, childSelectors = esquerySelector.selectors, len = childSelectors.length; i < len; i++) {
114+
const childNodeTypes = analyzeSelector(childSelectors[i], selector);
115+
if (childNodeTypes === null) {
116+
nodeTypes = null;
117+
} else if (nodeTypes !== null) {
118+
nodeTypes.push(...childNodeTypes);
119+
}
120+
}
121+
if (nodeTypes === null) return null;
122+
// De-duplicate
123+
// TODO: Faster way to do this? Sort and then dedupe manually?
124+
return [...new Set(nodeTypes)];
125+
}
126+
127+
case 'compound': {
128+
// AND matcher. Only matches a node if all child selectors match it.
129+
const childSelectors = esquerySelector.selectors,
130+
len = childSelectors.length;
131+
// TODO: Can `childSelectors` have 0 length?
132+
if (len === 0) return [];
133+
134+
let nodeTypes: NodeTypeId[] | null = null;
135+
for (let i = 0; i < len; i++) {
136+
const childNodeTypes = analyzeSelector(childSelectors[i], selector);
137+
138+
// If child selector matches all types, does not narrow the types the selector matches
139+
if (childNodeTypes === null) continue;
140+
141+
if (nodeTypes === null) {
142+
// First child selector which matches specific types
143+
nodeTypes = childNodeTypes;
144+
} else {
145+
// Selector only matches intersection of all child selectors.
146+
// TODO: Could make this faster if `analyzeSelector` always returned an ordered array.
147+
nodeTypes = childNodeTypes.filter(nodeType => nodeTypes.includes(nodeType));
148+
}
149+
}
150+
return nodeTypes;
151+
}
152+
153+
case 'attribute':
154+
case 'field':
155+
case 'nth-child':
156+
case 'nth-last-child':
157+
selector.isComplex = true;
158+
selector.attributeCount++;
159+
return null;
160+
161+
case 'child':
162+
case 'descendant':
163+
case 'sibling':
164+
case 'adjacent':
165+
selector.isComplex = true;
166+
analyzeSelector(esquerySelector.left, selector);
167+
return analyzeSelector(esquerySelector.right, selector);
168+
169+
case 'class':
170+
// TODO: Should TS function types be included in `FUNCTION_NODE_TYPE_IDS`?
171+
// This TODO comment is from ESLint's implementation. Not sure what it means!
172+
// TODO: Abstract into JSLanguage somehow.
173+
if (esquerySelector.name === 'function') return FUNCTION_NODE_TYPE_IDS;
174+
selector.isComplex = true;
175+
return null;
176+
177+
case 'wildcard':
178+
return null;
179+
180+
default:
181+
selector.isComplex = true;
182+
return null;
183+
}
184+
}
185+
186+
/**
187+
* Wrap a visit function so it's only called if the provided `EsquerySelector` matches the AST node.
188+
*
189+
* IMPORTANT: Selector matching will only be correct if `ancestors` from `generated/walk.js`
190+
* contains the ancestors of the AST node passed to the returned visit function.
191+
* Therefore, the returned visit function can only be called during AST traversal.
192+
*
193+
* @params visitFn - Visit function to wrap
194+
* @params esquerySelector - `EsquerySelector` object
195+
* @returns Wrapped visit function
196+
*/
197+
export function wrapVisitFnWithSelectorMatch(visitFn: VisitFn, esquerySelector: EsquerySelector): VisitFn {
198+
return (node: Node) => {
199+
if (esqueryMatches(node as unknown as EsqueryNode, esquerySelector, ancestors, ESQUERY_OPTIONS)) {
200+
visitFn(node);
201+
}
202+
};
203+
}

apps/oxlint/src-js/plugins/visitor.ts

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
// it will avoid full GC runs, which should greatly improve performance.
7474

7575
import { LEAF_NODE_TYPES_COUNT, NODE_TYPE_IDS_MAP, NODE_TYPES_COUNT } from '../generated/type_ids.js';
76+
import { parseSelector, wrapVisitFnWithSelectorMatch } from './selector.js';
7677

7778
import type { CompiledVisitorEntry, EnterExit, Node, VisitFn, Visitor } from './types.ts';
7879

@@ -212,17 +213,49 @@ export function addVisitorToCompiled(visitor: Visitor): void {
212213
for (let i = 0; i < keysLen; i++) {
213214
let name = keys[i];
214215

215-
const visitFn = (visitor as { [key: string]: VisitFn })[name];
216+
let visitFn = visitor[name];
216217
if (typeof visitFn !== 'function') {
217218
throw new TypeError(`'${name}' property of visitor object is not a function`);
218219
}
219220

220221
const isExit = name.endsWith(':exit');
221222
if (isExit) name = name.slice(0, -5);
222223

223-
const typeId = NODE_TYPE_IDS_MAP.get(name);
224-
if (typeId === void 0) throw new Error(`Unknown node type '${name}' in visitor object`);
225-
addVisitFn(typeId, isExit, visitFn);
224+
// TODO: Combine the two hashmaps `NODE_TYPE_IDS_MAP` and selectors cache into one `Map`
225+
// to avoid 2 hashmap lookups for selectors?
226+
let typeId = NODE_TYPE_IDS_MAP.get(name);
227+
if (typeId !== void 0) {
228+
// Single type visit function e.g. `Program`
229+
addVisitFn(typeId, isExit, visitFn);
230+
continue;
231+
}
232+
233+
// `*` matches any node without any filtering, so no need to wrap it
234+
if (name !== '*') {
235+
// Selector.
236+
// Parse selector.
237+
// Wrap `visitFn` so it only executes if the selector matches.
238+
// If selector is simple (unconditionally matches certain types e.g. `:matches(X, Y)`), skip wrapping.
239+
const selector = parseSelector(name);
240+
if (selector.isComplex) visitFn = wrapVisitFnWithSelectorMatch(visitFn, selector.esquerySelector);
241+
242+
const { typeIds } = selector;
243+
if (typeIds !== null) {
244+
// Selector matches a specific set of node types
245+
for (let i = 0, len = typeIds.length; i < len; i++) {
246+
addVisitFn(typeIds[i], isExit, visitFn);
247+
}
248+
continue;
249+
}
250+
}
251+
252+
// `*` selector or some other selector that matches all node types
253+
for (typeId = 0; typeId < LEAF_NODE_TYPES_COUNT; typeId++) {
254+
addLeafVisitFn(typeId, isExit, visitFn);
255+
}
256+
for (; typeId < NODE_TYPES_COUNT; typeId++) {
257+
addNonLeafVisitFn(typeId, isExit, visitFn);
258+
}
226259
}
227260
}
228261

@@ -326,6 +359,8 @@ function addNonLeafVisitFn(typeId: number, isExit: boolean, visitFn: VisitFn): v
326359
export function finalizeCompiledVisitor() {
327360
if (hasActiveVisitors === false) return false;
328361

362+
// TODO: Visit functions need to be ordered by specificity of their selectors, with most specific first
363+
329364
// Merge visit functions for node types which have multiple visitors from different rules,
330365
// or enter+exit functions for leaf nodes
331366
for (let i = mergedLeafVisitorTypeIds.length - 1; i >= 0; i--) {

0 commit comments

Comments
 (0)