From 6c093b643bd3a34dd5aeba2a17e01a941e162239 Mon Sep 17 00:00:00 2001 From: Vadim Kibana <82822460+vadimkibana@users.noreply.github.com> Date: Fri, 26 Jul 2024 21:47:48 +0200 Subject: [PATCH] [ES|QL] Function AST node subtypes (#189268) ## Summary Closes https://github.com/elastic/kibana/issues/189259 - Introduces `subtype` property for *function* AST node types. This allows to discriminate between real functions an various expression types. ### Checklist Delete any items that are not applicable to this PR. - [x] [Unit or functional tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html) were updated or added to match the most common scenarios ### For maintainers - [x] This was checked for breaking API changes and was [labeled appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process) --- .../src/__tests__/ast.function.test.ts | 227 ++++++++++++++++++ packages/kbn-esql-ast/src/ast_factory.ts | 1 + packages/kbn-esql-ast/src/ast_helpers.ts | 14 +- packages/kbn-esql-ast/src/ast_walker.ts | 32 ++- packages/kbn-esql-ast/src/types.ts | 58 ++++- packages/kbn-esql-ast/src/walker/walker.ts | 24 +- 6 files changed, 329 insertions(+), 27 deletions(-) create mode 100644 packages/kbn-esql-ast/src/__tests__/ast.function.test.ts diff --git a/packages/kbn-esql-ast/src/__tests__/ast.function.test.ts b/packages/kbn-esql-ast/src/__tests__/ast.function.test.ts new file mode 100644 index 0000000000000..e40320a514593 --- /dev/null +++ b/packages/kbn-esql-ast/src/__tests__/ast.function.test.ts @@ -0,0 +1,227 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +import { getAstAndSyntaxErrors as parse } from '../ast_parser'; +import { Walker } from '../walker'; + +describe('function AST nodes', () => { + describe('"variadic-call"', () => { + it('function call with a single argument', () => { + const query = 'ROW fn(1)'; + const { ast, errors } = parse(query); + + expect(errors.length).toBe(0); + expect(ast).toMatchObject([ + { + type: 'command', + name: 'row', + args: [ + { + type: 'function', + name: 'fn', + args: [ + { + type: 'literal', + value: 1, + }, + ], + }, + ], + }, + ]); + }); + + it('function call with multiple argument', () => { + const query = 'ROW fn(1, 2, 3)'; + const { ast, errors } = parse(query); + + expect(errors.length).toBe(0); + expect(ast).toMatchObject([ + { + type: 'command', + name: 'row', + args: [ + { + type: 'function', + name: 'fn', + args: [ + { + type: 'literal', + value: 1, + }, + { + type: 'literal', + value: 2, + }, + { + type: 'literal', + value: 3, + }, + ], + }, + ], + }, + ]); + }); + }); + + describe('"unary-expression"', () => { + it('logical NOT', () => { + const query = 'FROM a | STATS NOT b'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === 'not'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'unary-expression', + name: 'not', + args: [expect.any(Object)], + }); + }); + + // Currently arithmetic unary expressions, like "-x", are transformed to + // binary expressions: "-1 * x". Enable this test once unary expressions + // are supported. + it.skip('arithmetic', () => { + const query = 'FROM a | STATS -a'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === '*'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'unary-expression', + name: '-', + args: [expect.any(Object)], + }); + }); + }); + + describe('"postfix-unary-expression"', () => { + it('IS [NOT] NULL', () => { + const query = 'FROM a | STATS a IS NOT NULL'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === 'is not null'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'postfix-unary-expression', + name: 'is not null', + args: [expect.any(Object)], + }); + }); + }); + + describe('"binary-expression"', () => { + it('arithmetic and logical operations', () => { + const ops = ['+', '-', '*', '/', '%', 'and', 'or', '>', '>=', '<', '<=', '==', '!=']; + + for (const op of ops) { + const query = `ROW 1 ${op} 2`; + const { ast, errors } = parse(query); + + expect(errors.length).toBe(0); + expect(ast).toMatchObject([ + { + type: 'command', + name: 'row', + args: [ + { + type: 'function', + subtype: 'binary-expression', + name: op, + args: [ + { + type: 'literal', + value: 1, + }, + { + type: 'literal', + value: 2, + }, + ], + }, + ], + }, + ]); + } + }); + + it('logical IN', () => { + const query = 'FROM a | STATS a IN (1, 2, 3)'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === 'in'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'binary-expression', + name: 'in', + args: [expect.any(Object), expect.any(Object)], + }); + }); + + it('logical NOT IN', () => { + const query = 'FROM a | STATS a NOT IN (1, 2, 3)'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === 'not_in'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'binary-expression', + name: 'not_in', + args: [expect.any(Object), expect.any(Object)], + }); + }); + + it('regex expression', () => { + const query = 'FROM a | STATS a LIKE "adsf"'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === 'like'); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'binary-expression', + name: 'like', + args: [expect.any(Object), expect.any(Object)], + }); + }); + + it('assignment in ENRICH .. WITH clause', () => { + const query = 'FROM a | ENRICH b ON c WITH d = e'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === '='); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'binary-expression', + name: '=', + args: [expect.any(Object), expect.any(Object)], + }); + }); + + it('assignment in STATS', () => { + const query = 'FROM a | STATS b = c'; + const { ast, errors } = parse(query); + const fn = Walker.findFunction(ast, ({ name }) => name === '='); + + expect(errors.length).toBe(0); + expect(fn).toMatchObject({ + type: 'function', + subtype: 'binary-expression', + name: '=', + args: [expect.any(Object), expect.any(Object)], + }); + }); + }); +}); diff --git a/packages/kbn-esql-ast/src/ast_factory.ts b/packages/kbn-esql-ast/src/ast_factory.ts index 18c2a3c7ecbec..f389be63b3afd 100644 --- a/packages/kbn-esql-ast/src/ast_factory.ts +++ b/packages/kbn-esql-ast/src/ast_factory.ts @@ -76,6 +76,7 @@ export class AstListener implements ESQLParserListener { this.ast.push(commandAst); commandAst.text = ctx.getText(); if (textExistsAndIsValid(ctx.INFO().getText())) { + // TODO: these probably should not be functions, instead use "column", like: INFO ? commandAst?.args.push(createFunction('info', ctx, getPosition(ctx.INFO().symbol))); } } diff --git a/packages/kbn-esql-ast/src/ast_helpers.ts b/packages/kbn-esql-ast/src/ast_helpers.ts index f3c5ad0b0df18..cc6488d5bbc0c 100644 --- a/packages/kbn-esql-ast/src/ast_helpers.ts +++ b/packages/kbn-esql-ast/src/ast_helpers.ts @@ -35,6 +35,7 @@ import type { ESQLCommandMode, ESQLInlineCast, ESQLUnknownItem, + FunctionSubtype, } from './types'; export function nonNullable(v: T): v is NonNullable { @@ -187,12 +188,13 @@ export function createTimeUnit(ctx: QualifiedIntegerLiteralContext): ESQLTimeInt }; } -export function createFunction( +export function createFunction( name: string, ctx: ParserRuleContext, - customPosition?: ESQLLocation -): ESQLFunction { - return { + customPosition?: ESQLLocation, + subtype?: Subtype +): ESQLFunction { + const node: ESQLFunction = { type: 'function', name, text: ctx.getText(), @@ -200,6 +202,10 @@ export function createFunction( args: [], incomplete: Boolean(ctx.exception), }; + if (subtype) { + node.subtype = subtype; + } + return node; } function walkFunctionStructure( diff --git a/packages/kbn-esql-ast/src/ast_walker.ts b/packages/kbn-esql-ast/src/ast_walker.ts index a5e2117fe26db..1b603975cf68f 100644 --- a/packages/kbn-esql-ast/src/ast_walker.ts +++ b/packages/kbn-esql-ast/src/ast_walker.ts @@ -194,7 +194,7 @@ export function getEnrichClauses(ctx: EnrichCommandContext) { } } if (args.length) { - const fn = createFunction('=', clause); + const fn = createFunction('=', clause, undefined, 'binary-expression'); fn.args.push(args[0], args[1] ? [args[1]] : []); option.args.push(fn); } @@ -207,7 +207,7 @@ export function getEnrichClauses(ctx: EnrichCommandContext) { } function visitLogicalNot(ctx: LogicalNotContext) { - const fn = createFunction('not', ctx); + const fn = createFunction('not', ctx, undefined, 'unary-expression'); fn.args.push(...collectBooleanExpression(ctx.booleanExpression())); // update the location of the assign based on arguments const argsLocationExtends = computeLocationExtends(fn); @@ -216,7 +216,7 @@ function visitLogicalNot(ctx: LogicalNotContext) { } function visitLogicalAndsOrs(ctx: LogicalBinaryContext) { - const fn = createFunction(ctx.AND() ? 'and' : 'or', ctx); + const fn = createFunction(ctx.AND() ? 'and' : 'or', ctx, undefined, 'binary-expression'); fn.args.push(...collectBooleanExpression(ctx._left), ...collectBooleanExpression(ctx._right)); // update the location of the assign based on arguments const argsLocationExtends = computeLocationExtends(fn); @@ -225,7 +225,7 @@ function visitLogicalAndsOrs(ctx: LogicalBinaryContext) { } function visitLogicalIns(ctx: LogicalInContext) { - const fn = createFunction(ctx.NOT() ? 'not_in' : 'in', ctx); + const fn = createFunction(ctx.NOT() ? 'not_in' : 'in', ctx, undefined, 'binary-expression'); const [left, ...list] = ctx.valueExpression_list(); const leftArg = visitValueExpression(left); if (leftArg) { @@ -264,7 +264,12 @@ function visitValueExpression(ctx: ValueExpressionContext) { } if (ctx instanceof ComparisonContext) { const comparisonNode = ctx.comparisonOperator(); - const comparisonFn = createFunction(getComparisonName(comparisonNode), comparisonNode); + const comparisonFn = createFunction( + getComparisonName(comparisonNode), + comparisonNode, + undefined, + 'binary-expression' + ); comparisonFn.args.push( visitOperatorExpression(ctx._left)!, visitOperatorExpression(ctx._right)! @@ -283,7 +288,7 @@ function visitOperatorExpression( if (ctx instanceof ArithmeticUnaryContext) { const arg = visitOperatorExpression(ctx.operatorExpression()); // this is a number sign thing - const fn = createFunction('*', ctx); + const fn = createFunction('*', ctx, undefined, 'binary-expression'); fn.args.push(createFakeMultiplyLiteral(ctx)); if (arg) { fn.args.push(arg); @@ -291,7 +296,7 @@ function visitOperatorExpression( return fn; } if (ctx instanceof ArithmeticBinaryContext) { - const fn = createFunction(getMathOperation(ctx), ctx); + const fn = createFunction(getMathOperation(ctx), ctx, undefined, 'binary-expression'); const args = [visitOperatorExpression(ctx._left), visitOperatorExpression(ctx._right)]; for (const arg of args) { if (arg) { @@ -443,7 +448,12 @@ export function visitPrimaryExpression(ctx: PrimaryExpressionContext): ESQLAstIt } if (ctx instanceof FunctionContext) { const functionExpressionCtx = ctx.functionExpression(); - const fn = createFunction(functionExpressionCtx.identifier().getText().toLowerCase(), ctx); + const fn = createFunction( + functionExpressionCtx.identifier().getText().toLowerCase(), + ctx, + undefined, + 'variadic-call' + ); const asteriskArg = functionExpressionCtx.ASTERISK() ? createColumnStar(functionExpressionCtx.ASTERISK()!) : undefined; @@ -494,7 +504,7 @@ function collectRegexExpression(ctx: BooleanExpressionContext): ESQLFunction[] { const negate = regex.NOT(); const likeType = regex._kind.text?.toLowerCase() || ''; const fnName = `${negate ? 'not_' : ''}${likeType}`; - const fn = createFunction(fnName, regex); + const fn = createFunction(fnName, regex, undefined, 'binary-expression'); const arg = visitValueExpression(regex.valueExpression()); if (arg) { fn.args.push(arg); @@ -514,7 +524,7 @@ function collectIsNullExpression(ctx: BooleanExpressionContext) { } const negate = ctx.NOT(); const fnName = `is${negate ? ' not ' : ' '}null`; - const fn = createFunction(fnName, ctx); + const fn = createFunction(fnName, ctx, undefined, 'postfix-unary-expression'); const arg = visitValueExpression(ctx.valueExpression()); if (arg) { fn.args.push(arg); @@ -547,7 +557,7 @@ export function collectBooleanExpression(ctx: BooleanExpressionContext | undefin export function visitField(ctx: FieldContext) { if (ctx.qualifiedName() && ctx.ASSIGN()) { - const fn = createFunction(ctx.ASSIGN()!.getText(), ctx); + const fn = createFunction(ctx.ASSIGN()!.getText(), ctx, undefined, 'binary-expression'); fn.args.push( createColumn(ctx.qualifiedName()!), collectBooleanExpression(ctx.booleanExpression()) diff --git a/packages/kbn-esql-ast/src/types.ts b/packages/kbn-esql-ast/src/types.ts index 78f2dd58bec89..257a004e78f10 100644 --- a/packages/kbn-esql-ast/src/types.ts +++ b/packages/kbn-esql-ast/src/types.ts @@ -67,11 +67,67 @@ export interface ESQLCommandMode extends ESQLAstBaseItem { type: 'mode'; } -export interface ESQLFunction extends ESQLAstBaseItem { +/** + * We coalesce all function calls and expressions into a single "function" + * node type. This subtype is used to distinguish between different types + * of function calls and expressions. + * + * - `variadic-call` is a function call with any number of arguments: fn(a, b, c, ...) + * - `unary-expression` is a unary expression: -a, +a, NOT a, ... + * - `binary-expression` is a binary expression: a + b, a - b, a * b, ... + */ +export type FunctionSubtype = + | 'variadic-call' // fn(a, b, c, ...) + | 'unary-expression' // -a, +a, NOT a, ... + | 'postfix-unary-expression' // a IS NULL, a IS NOT NULL, ... + | 'binary-expression'; // a + b, a - b, a * b, ... + +export interface ESQLFunction< + Subtype extends FunctionSubtype = FunctionSubtype, + Name extends string = string +> extends ESQLAstBaseItem { type: 'function'; + + /** + * Default is 'variadic-call'. + */ + subtype?: Subtype; + + args: ESQLAstItem[]; +} + +export interface ESQLFunctionCallExpression extends ESQLFunction<'variadic-call'> { + subtype: 'variadic-call'; args: ESQLAstItem[]; } +export interface ESQLUnaryExpression extends ESQLFunction<'unary-expression'> { + subtype: 'unary-expression'; + args: [ESQLAstItem]; +} + +export interface ESQLPostfixUnaryExpression extends ESQLFunction<'postfix-unary-expression'> { + subtype: 'postfix-unary-expression'; + args: [ESQLAstItem]; +} + +export interface ESQLBinaryExpression + extends ESQLFunction<'binary-expression', BinaryExpressionOperator> { + subtype: 'binary-expression'; + args: [ESQLAstItem, ESQLAstItem]; +} + +export type BinaryExpressionOperator = + | BinaryExpressionArithmeticOperator + | BinaryExpressionAssignmentOperator + | BinaryExpressionComparisonOperator + | BinaryExpressionRegexOperator; + +export type BinaryExpressionArithmeticOperator = '+' | '-' | '*' | '/' | '%'; +export type BinaryExpressionAssignmentOperator = '='; +export type BinaryExpressionComparisonOperator = '==' | '=~' | '!=' | '<' | '<=' | '>' | '>='; +export type BinaryExpressionRegexOperator = 'like' | 'not_like' | 'rlike' | 'not_rlike'; + export interface ESQLInlineCast extends ESQLAstBaseItem { type: 'inlineCast'; value: ValueType; diff --git a/packages/kbn-esql-ast/src/walker/walker.ts b/packages/kbn-esql-ast/src/walker/walker.ts index 7fbec8644694e..20e052d211fe1 100644 --- a/packages/kbn-esql-ast/src/walker/walker.ts +++ b/packages/kbn-esql-ast/src/walker/walker.ts @@ -42,6 +42,8 @@ export interface WalkerOptions { visitUnknown?: (node: ESQLUnknownItem) => void; } +export type WalkerAstNode = ESQLAstNode | ESQLAstNode[]; + /** * Iterates over all nodes in the AST and calls the appropriate visitor * functions. @@ -64,7 +66,7 @@ export class Walker { /** * Walks the AST and calls the appropriate visitor functions. */ - public static readonly walk = (node: Node, options: WalkerOptions): Walker => { + public static readonly walk = (node: WalkerAstNode, options: WalkerOptions): Walker => { const walker = new Walker(options); walker.walk(node); return walker; @@ -88,7 +90,7 @@ export class Walker { * * @param node AST node to extract parameters from. */ - public static readonly params = (node: Node): ESQLParamLiteral[] => { + public static readonly params = (node: WalkerAstNode): ESQLParamLiteral[] => { const params: ESQLParamLiteral[] = []; Walker.walk(node, { visitLiteral: (param) => { @@ -101,21 +103,21 @@ export class Walker { }; /** - * Returns the first function that matches the predicate. + * Finds the first function that matches the predicate. * - * @param node AST subtree to search in. - * @param predicate Function to test each function with. - * @returns The first function that matches the predicate. + * @param node AST node from which to search for a function + * @param predicate Callback function to determine if the function is found + * @returns The first function that matches the predicate */ public static readonly findFunction = ( - node: Node, - predicate: (fn: ESQLFunction) => boolean + node: WalkerAstNode, + predicate: (node: ESQLFunction) => boolean ): ESQLFunction | undefined => { let found: ESQLFunction | undefined; Walker.walk(node, { - visitFunction: (fn) => { - if (!found && predicate(fn)) { - found = fn; + visitFunction: (func) => { + if (!found && predicate(func)) { + found = func; } }, });