diff --git a/.github/get-sonatype-credentials.sh b/.github/get-sonatype-credentials.sh
new file mode 100755
index 00000000000..f1660f509d5
--- /dev/null
+++ b/.github/get-sonatype-credentials.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# Script to retrieve Sonatype credentials from AWS Secrets Manager
+
+SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text)
+SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text)
+echo "::add-mask::$SONATYPE_USERNAME"
+echo "::add-mask::$SONATYPE_PASSWORD"
+echo "SONATYPE_USERNAME=$SONATYPE_USERNAME" >> $GITHUB_ENV
+echo "SONATYPE_PASSWORD=$SONATYPE_PASSWORD" >> $GITHUB_ENV
\ No newline at end of file
diff --git a/.github/workflows/maven-publish.yml b/.github/workflows/maven-publish.yml
index 6ea76fd783a..019ff7384eb 100644
--- a/.github/workflows/maven-publish.yml
+++ b/.github/workflows/maven-publish.yml
@@ -8,6 +8,9 @@ on:
- 1.*
- 2.*
+env:
+ SNAPSHOT_REPO_URL: https://aws.oss.sonatype.org/content/repositories/snapshots/
+
jobs:
build-and-publish-snapshots:
strategy:
@@ -30,33 +33,11 @@ jobs:
role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}
aws-region: us-east-1
- # Create the initial direct-query directory structure
- - name: Create direct-query directory structure in repository
+ - name: get credentials
run: |
# Get credentials for publishing
- export SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text)
- export SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text)
- echo "::add-mask::$SONATYPE_USERNAME"
- echo "::add-mask::$SONATYPE_PASSWORD"
-
- # Create a placeholder file
- TEMP_DIR=$(mktemp -d)
- echo "Directory placeholder - $(date)" > "${TEMP_DIR}/.placeholder"
-
- # Upload the placeholder file to create the directory structure
- echo "Creating initial directory structure..."
- curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" \
- --upload-file "${TEMP_DIR}/.placeholder" \
- "https://aws.oss.sonatype.org/content/repositories/snapshots/org/opensearch/direct-query/.placeholder"
-
- # Clean up
- rm -rf "${TEMP_DIR}"
- echo "Directory structure created"
+ .github/get-sonatype-credentials.sh
- name: publish snapshots to maven
run: |
- export SONATYPE_USERNAME=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-username --query SecretString --output text)
- export SONATYPE_PASSWORD=$(aws secretsmanager get-secret-value --secret-id maven-snapshots-password --query SecretString --output text)
- echo "::add-mask::$SONATYPE_USERNAME"
- echo "::add-mask::$SONATYPE_PASSWORD"
- ./gradlew publishPluginZipPublicationToSnapshotsRepository
+ ./gradlew publishPluginZipPublicationToSnapshotsRepository
\ No newline at end of file
diff --git a/.github/workflows/publish-snapshots-and-grammar-files.yml b/.github/workflows/publish-snapshots-and-grammar-files.yml
new file mode 100644
index 00000000000..7417c5a39ba
--- /dev/null
+++ b/.github/workflows/publish-snapshots-and-grammar-files.yml
@@ -0,0 +1,354 @@
+name: Publish snapshots and grammar files to maven
+
+on:
+ workflow_dispatch:
+ push:
+ branches:
+ - main
+ - 1.*
+ - 2.*
+
+env:
+ SNAPSHOT_REPO_URL: https://aws.oss.sonatype.org/content/repositories/snapshots/
+
+jobs:
+ publish-grammar-files:
+ strategy:
+ fail-fast: false
+ if: github.repository == 'opensearch-project/sql'
+ runs-on: ubuntu-latest
+
+ permissions:
+ id-token: write
+ contents: write
+
+ env:
+ TARGET_REPO_PATH: org/opensearch/language-grammar
+
+ steps:
+ - uses: actions/setup-java@v3
+ with:
+ distribution: temurin # Temurin is a distribution of adoptium
+ java-version: 21
+ - uses: actions/checkout@v3
+ - uses: aws-actions/configure-aws-credentials@v1.7.0
+ with:
+ role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}
+ aws-region: us-east-1
+
+ # Get Maven credentials
+ - name: Setup publishing credentials
+ id: creds
+ run: |
+ .github/get-sonatype-credentials.sh
+
+ # Extract version information directly from build.gradle
+ - name: Set version
+ id: set_version
+ run: |
+ VERSION=$(grep "version = " ./language-grammar/build.gradle | cut -d "'" -f 2)
+ echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
+ echo "Using version: ${VERSION}"
+
+ # Capture commit ID
+ - name: Set commit ID
+ id: set_commit
+ run: |
+ COMMIT_ID=$(git log -1 --format='%H')
+ echo "commit_id=${COMMIT_ID}" >> $GITHUB_OUTPUT
+ echo "Using commit ID: ${COMMIT_ID}"
+
+ # Create ZIP of grammar files
+ - name: Package grammar files
+ run: |
+ # Create directory for the zip content
+ mkdir -p grammar_files
+
+ # Copy all .g4 files to the directory
+ find ./language-grammar/src/main/antlr4 -name "*.g4" -type f -exec cp {} grammar_files/ \;
+
+ # List the files that will be included in the zip
+ echo "Files to be included in the zip:"
+ ls -la grammar_files/
+
+ # Create zip file
+ cd grammar_files
+ zip -r ../grammar.zip ./*
+ cd ..
+
+ # Check the zip file
+ ls -la grammar.zip
+
+ - name: Prepare for Maven publishing
+ run: |
+ # Define constants
+ ARTIFACT_ID="language-grammar"
+ GROUP_ID="org.opensearch"
+ VERSION="${{ steps.set_version.outputs.VERSION }}"
+
+ # Create directory structure for Maven
+ MAVEN_LOCAL_PATH="${HOME}/.m2/repository/${GROUP_ID//.//}/${ARTIFACT_ID}/${VERSION}"
+ mkdir -p "${MAVEN_LOCAL_PATH}"
+
+ # Copy the zip file to Maven directory with proper naming
+ MAVEN_ZIP_NAME="${ARTIFACT_ID}-${VERSION}.zip"
+ cp grammar.zip "${MAVEN_LOCAL_PATH}/${MAVEN_ZIP_NAME}"
+
+ # Generate POM file
+ cat > "${MAVEN_LOCAL_PATH}/${ARTIFACT_ID}-${VERSION}.pom" << EOF
+
+
+ 4.0.0
+ ${GROUP_ID}
+ ${ARTIFACT_ID}
+ ${VERSION}
+ zip
+ OpenSearch Language Grammar Files
+
+ EOF
+
+ echo "Grammar files prepared for Maven publishing as version ${VERSION}"
+
+ # Generate checksums for the Maven artifacts
+ - name: Generate checksums
+ run: |
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.pom" -type f`; do sha512sum "$i" | awk '{print $1}' >> "$i.sha512"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.zip" -type f`; do sha512sum "$i" | awk '{print $1}' >> "$i.sha512"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.pom" -type f`; do sha256sum "$i" | awk '{print $1}' >> "$i.sha256"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.zip" -type f`; do sha256sum "$i" | awk '{print $1}' >> "$i.sha256"; done
+
+ # Checkout build libraries for publishing scripts
+ - uses: actions/checkout@v4
+ with:
+ repository: 'opensearch-project/opensearch-build-libraries'
+ path: 'build'
+
+ # Publish to Maven
+ - name: Publish to Maven
+ run: |
+ # Copy local Maven repo to build directory
+ cd build/resources/publish/
+ cp -a $HOME/.m2/repository/* ./
+
+ # Run the publish script
+ ./publish-snapshot.sh ./
+
+ # Update metadata with commit ID
+ - name: Add commit ID to metadata
+ run: |
+ COMMIT_ID="${{ steps.set_commit.outputs.commit_id }}"
+ ARTIFACT_ID="language-grammar"
+ VERSION="${{ steps.set_version.outputs.VERSION }}"
+
+ TEMP_DIR=$(mktemp -d)
+ METADATA_FILE="${TEMP_DIR}/maven-metadata.xml"
+
+ # Download existing metadata
+ META_URL="${SNAPSHOT_REPO_URL}org/opensearch/${ARTIFACT_ID}/${VERSION}/maven-metadata.xml"
+ curl -s -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" -o "${METADATA_FILE}" "${META_URL}"
+
+ if [ -s "${METADATA_FILE}" ]; then
+ cp "${METADATA_FILE}" "${METADATA_FILE}.bak"
+
+ # Add commit ID to metadata
+ awk -v commit="${COMMIT_ID}" '
+ // {
+ print $0
+ print " " commit ""
+ next
+ }
+ {print}
+ ' "${METADATA_FILE}.bak" > "${METADATA_FILE}"
+
+ # Upload modified metadata
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "${METADATA_FILE}" "${META_URL}"
+
+ # Update checksums
+ cd "${TEMP_DIR}"
+ sha256sum "maven-metadata.xml" | awk '{print $1}' > "maven-metadata.xml.sha256"
+ sha512sum "maven-metadata.xml" | awk '{print $1}' > "maven-metadata.xml.sha512"
+
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "maven-metadata.xml.sha256" "${META_URL}.sha256"
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "maven-metadata.xml.sha512" "${META_URL}.sha512"
+
+ echo "Version metadata updated with commit ID"
+ else
+ echo "Failed to download metadata, skipping commit ID addition"
+ fi
+
+ rm -rf "${TEMP_DIR}"
+
+ publish-async-query-core:
+ strategy:
+ fail-fast: false
+ if: github.repository == 'opensearch-project/sql'
+ runs-on: ubuntu-latest
+
+ permissions:
+ id-token: write
+ contents: write
+
+ steps:
+ - uses: actions/setup-java@v3
+ with:
+ distribution: temurin
+ java-version: 21
+ - uses: actions/checkout@v3
+ - uses: aws-actions/configure-aws-credentials@v1.7.0
+ with:
+ role-to-assume: ${{ secrets.PUBLISH_SNAPSHOTS_ROLE }}
+ aws-region: us-east-1
+
+ # Get and mask credentials once in a dedicated step
+ - name: Setup publishing credentials
+ id: creds
+ run: |
+ .github/get-sonatype-credentials.sh
+
+ # Capture the commit ID for metadata purposes
+ - name: Set commit ID
+ id: set_commit
+ run: |
+ COMMIT_ID=$(git log -1 --format='%H')
+ echo "commit_id=${COMMIT_ID}" >> $GITHUB_OUTPUT
+ echo "Using commit ID: ${COMMIT_ID}"
+
+ # Replace the current "Extract version from build.gradle" step with this:
+ - name: Extract version from build.gradle
+ id: extract_version
+ run: |
+ # Extract the version directly from the build.gradle file
+ VERSION=$(grep -m 1 "archiveVersion.set" ./async-query-core/build.gradle | sed -n "s/.*archiveVersion.set('\([^']*\)').*/\1/p")
+
+ # Add -SNAPSHOT suffix for snapshot repository if not already present
+ if [[ ! $VERSION == *-SNAPSHOT ]]; then
+ VERSION="${VERSION}-SNAPSHOT"
+ fi
+
+ echo "VERSION=${VERSION}" >> $GITHUB_OUTPUT
+ echo "Version: ${VERSION}"
+
+ - name: Build and publish shadow JAR
+ run: |
+ # Build the shadow JAR
+ ./gradlew :async-query-core:shadowJar
+
+ # Define constants
+ ARTIFACT_ID="async-query-core"
+ GROUP_PATH="org/opensearch"
+ VERSION="${{ steps.extract_version.outputs.VERSION }}"
+
+ # Find the generated shadow JAR
+ SHADOW_JAR=$(find ./async-query-core/build/libs/ -name "*-all.jar" | head -n 1)
+
+ if [ -z "$SHADOW_JAR" ]; then
+ echo "Error: Shadow JAR not found!"
+ exit 1
+ fi
+
+ # Create directory structure in local Maven repository
+ MAVEN_LOCAL_PATH="${HOME}/.m2/repository/${GROUP_PATH}/${ARTIFACT_ID}/${VERSION}"
+ mkdir -p "${MAVEN_LOCAL_PATH}"
+
+ # Copy the shadow JAR to the local Maven repository with proper naming
+ MAVEN_JAR_NAME="${ARTIFACT_ID}-${VERSION}.jar"
+ cp "${SHADOW_JAR}" "${MAVEN_LOCAL_PATH}/${MAVEN_JAR_NAME}"
+
+ # Generate a POM file
+ cat > "${MAVEN_LOCAL_PATH}/${ARTIFACT_ID}-${VERSION}.pom" << EOF
+
+
+ 4.0.0
+ org.opensearch
+ ${ARTIFACT_ID}
+ ${VERSION}
+
+ EOF
+
+ echo "Shadow JAR and POM published to local Maven repository for version ${VERSION}"
+
+ # Checkout opensearch-build-libraries repository for publishing scripts
+ - uses: actions/checkout@v4
+ with:
+ repository: 'opensearch-project/opensearch-build-libraries'
+ path: 'build'
+
+ - name: Generate SHA checksums for JAR and POM files
+ run: |
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.pom" -type f`; do sha512sum "$i" | awk '{print $1}' >> "$i.sha512"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.jar" -type f`; do sha512sum "$i" | awk '{print $1}' >> "$i.sha512"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.pom" -type f`; do sha256sum "$i" | awk '{print $1}' >> "$i.sha256"; done
+ for i in `find ${HOME}/.m2/repository/org/opensearch/ -name "*.jar" -type f`; do sha256sum "$i" | awk '{print $1}' >> "$i.sha256"; done
+
+ - name: Install XML tools
+ run: sudo apt-get update && sudo apt-get install -y xmlstarlet
+
+ - name: Publish snapshots to maven
+ run: |
+ # Publish snapshots to maven
+ cd build/resources/publish/
+ cp -a $HOME/.m2/repository/* ./
+ ./publish-snapshot.sh ./
+
+ - name: Update version metadata with commit ID
+ run: |
+ COMMIT_ID="${{ steps.set_commit.outputs.commit_id }}"
+ ARTIFACT_ID="async-query-core"
+ VERSION="${{ steps.extract_version.outputs.VERSION }}"
+
+ # Add commit ID to version-specific metadata file
+ echo "Processing commit ID for version: ${VERSION}"
+
+ TEMP_DIR=$(mktemp -d)
+ METADATA_FILE="${TEMP_DIR}/maven-metadata.xml"
+
+ # Download metadata from repository
+ META_URL="${SNAPSHOT_REPO_URL}org/opensearch/${ARTIFACT_ID}/${VERSION}/maven-metadata.xml"
+ echo "Downloading metadata from ${META_URL}"
+
+ # Try to download the metadata file
+ curl -s -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" -o "${METADATA_FILE}" "${META_URL}"
+
+ # If successful, modify and upload back
+ if [ -s "${METADATA_FILE}" ]; then
+ echo "Modifying metadata for ${VERSION}"
+ cp "${METADATA_FILE}" "${METADATA_FILE}.bak"
+
+ # Apply same awk command from working example
+ awk -v commit="${COMMIT_ID}" '
+ // {
+ print $0
+ print " " commit ""
+ next
+ }
+ {print}
+ ' "${METADATA_FILE}.bak" > "${METADATA_FILE}"
+
+ # Upload modified file back
+ echo "Uploading modified metadata to ${META_URL}"
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "${METADATA_FILE}" "${META_URL}"
+
+ # Update the SHA checksums
+ cd "${TEMP_DIR}"
+ sha256sum "maven-metadata.xml" | awk '{print $1}' > "maven-metadata.xml.sha256"
+ sha512sum "maven-metadata.xml" | awk '{print $1}' > "maven-metadata.xml.sha512"
+
+ # Upload the checksums
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "maven-metadata.xml.sha256" "${META_URL}.sha256"
+ curl -X PUT -u "${SONATYPE_USERNAME}:${SONATYPE_PASSWORD}" --upload-file "maven-metadata.xml.sha512" "${META_URL}.sha512"
+ cd -
+
+ echo "Updated metadata and checksums for ${VERSION}"
+ else
+ echo "Failed to download metadata for ${VERSION} or file is empty"
+ exit 1
+ fi
+
+ # Clean up
+ rm -rf "${TEMP_DIR}"
+
+ echo "Version metadata updated with commit ID"
\ No newline at end of file
diff --git a/language-grammar/build.gradle b/language-grammar/build.gradle
new file mode 100644
index 00000000000..177cc069011
--- /dev/null
+++ b/language-grammar/build.gradle
@@ -0,0 +1,13 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+plugins {
+ id 'java'
+ id 'java-library'
+}
+
+group = 'org.opensearch'
+version = '0.1.0-SNAPSHOT'
+description = 'OpenSearch Language Grammar Files'
\ No newline at end of file
diff --git a/language-grammar/src/main/antlr4/FlintSparkSqlExtensions.g4 b/language-grammar/src/main/antlr4/FlintSparkSqlExtensions.g4
new file mode 100644
index 00000000000..46e814e9f56
--- /dev/null
+++ b/language-grammar/src/main/antlr4/FlintSparkSqlExtensions.g4
@@ -0,0 +1,208 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+grammar FlintSparkSqlExtensions;
+
+import SparkSqlBase;
+
+
+// Flint SQL Syntax Extension
+
+singleStatement
+ : statement SEMICOLON* EOF
+ ;
+
+statement
+ : skippingIndexStatement
+ | coveringIndexStatement
+ | materializedViewStatement
+ | indexManagementStatement
+ | indexJobManagementStatement
+ ;
+
+skippingIndexStatement
+ : createSkippingIndexStatement
+ | refreshSkippingIndexStatement
+ | describeSkippingIndexStatement
+ | alterSkippingIndexStatement
+ | dropSkippingIndexStatement
+ | vacuumSkippingIndexStatement
+ | analyzeSkippingIndexStatement
+ ;
+
+createSkippingIndexStatement
+ : CREATE SKIPPING INDEX (IF NOT EXISTS)?
+ ON tableName
+ LEFT_PAREN indexColTypeList RIGHT_PAREN
+ whereClause?
+ (WITH LEFT_PAREN propertyList RIGHT_PAREN)?
+ ;
+
+refreshSkippingIndexStatement
+ : REFRESH SKIPPING INDEX ON tableName
+ ;
+
+describeSkippingIndexStatement
+ : (DESC | DESCRIBE) SKIPPING INDEX ON tableName
+ ;
+
+alterSkippingIndexStatement
+ : ALTER SKIPPING INDEX
+ ON tableName
+ WITH LEFT_PAREN propertyList RIGHT_PAREN
+ ;
+
+dropSkippingIndexStatement
+ : DROP SKIPPING INDEX ON tableName
+ ;
+
+vacuumSkippingIndexStatement
+ : VACUUM SKIPPING INDEX ON tableName
+ ;
+
+coveringIndexStatement
+ : createCoveringIndexStatement
+ | refreshCoveringIndexStatement
+ | showCoveringIndexStatement
+ | describeCoveringIndexStatement
+ | alterCoveringIndexStatement
+ | dropCoveringIndexStatement
+ | vacuumCoveringIndexStatement
+ ;
+
+createCoveringIndexStatement
+ : CREATE INDEX (IF NOT EXISTS)? indexName
+ ON tableName
+ LEFT_PAREN indexColumns=multipartIdentifierPropertyList RIGHT_PAREN
+ whereClause?
+ (WITH LEFT_PAREN propertyList RIGHT_PAREN)?
+ ;
+
+refreshCoveringIndexStatement
+ : REFRESH INDEX indexName ON tableName
+ ;
+
+showCoveringIndexStatement
+ : SHOW (INDEX | INDEXES) ON tableName
+ ;
+
+describeCoveringIndexStatement
+ : (DESC | DESCRIBE) INDEX indexName ON tableName
+ ;
+
+alterCoveringIndexStatement
+ : ALTER INDEX indexName
+ ON tableName
+ WITH LEFT_PAREN propertyList RIGHT_PAREN
+ ;
+
+dropCoveringIndexStatement
+ : DROP INDEX indexName ON tableName
+ ;
+
+vacuumCoveringIndexStatement
+ : VACUUM INDEX indexName ON tableName
+ ;
+
+analyzeSkippingIndexStatement
+ : ANALYZE SKIPPING INDEX ON tableName
+ ;
+
+materializedViewStatement
+ : createMaterializedViewStatement
+ | refreshMaterializedViewStatement
+ | showMaterializedViewStatement
+ | describeMaterializedViewStatement
+ | alterMaterializedViewStatement
+ | dropMaterializedViewStatement
+ | vacuumMaterializedViewStatement
+ ;
+
+createMaterializedViewStatement
+ : CREATE MATERIALIZED VIEW (IF NOT EXISTS)? mvName=multipartIdentifier
+ AS query=materializedViewQuery
+ (WITH LEFT_PAREN propertyList RIGHT_PAREN)?
+ ;
+
+refreshMaterializedViewStatement
+ : REFRESH MATERIALIZED VIEW mvName=multipartIdentifier
+ ;
+
+showMaterializedViewStatement
+ : SHOW MATERIALIZED (VIEW | VIEWS) IN catalogDb=multipartIdentifier
+ ;
+
+describeMaterializedViewStatement
+ : (DESC | DESCRIBE) MATERIALIZED VIEW mvName=multipartIdentifier
+ ;
+
+alterMaterializedViewStatement
+ : ALTER MATERIALIZED VIEW mvName=multipartIdentifier
+ WITH LEFT_PAREN propertyList RIGHT_PAREN
+ ;
+
+dropMaterializedViewStatement
+ : DROP MATERIALIZED VIEW mvName=multipartIdentifier
+ ;
+
+vacuumMaterializedViewStatement
+ : VACUUM MATERIALIZED VIEW mvName=multipartIdentifier
+ ;
+
+indexManagementStatement
+ : showFlintIndexStatement
+ ;
+
+showFlintIndexStatement
+ : SHOW FLINT (INDEX | INDEXES)
+ IN catalogDb=multipartIdentifier #showFlintIndex
+ | SHOW FLINT (INDEX | INDEXES) EXTENDED
+ IN catalogDb=multipartIdentifier #showFlintIndexExtended
+ ;
+
+indexJobManagementStatement
+ : recoverIndexJobStatement
+ ;
+
+recoverIndexJobStatement
+ : RECOVER INDEX JOB identifier
+ ;
+
+/*
+ * Match all remaining tokens in non-greedy way
+ * so WITH clause won't be captured by this rule.
+ */
+materializedViewQuery
+ : .+?
+ ;
+
+whereClause
+ : WHERE filterCondition
+ ;
+
+filterCondition
+ : .+?
+ ;
+
+indexColTypeList
+ : indexColType (COMMA indexColType)*
+ ;
+
+indexColType
+ : multipartIdentifier skipType=(PARTITION | VALUE_SET | MIN_MAX | BLOOM_FILTER)
+ (LEFT_PAREN skipParams RIGHT_PAREN)?
+ ;
+
+skipParams
+ : propertyValue (COMMA propertyValue)*
+ ;
+
+indexName
+ : identifier
+ ;
+
+tableName
+ : multipartIdentifier
+ ;
diff --git a/language-grammar/src/main/antlr4/OpenSearchLegacySqlLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchLegacySqlLexer.g4
new file mode 100644
index 00000000000..eb075d12ba5
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchLegacySqlLexer.g4
@@ -0,0 +1,352 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+MySQL (Positive Technologies) grammar
+The MIT License (MIT).
+Copyright (c) 2015-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies.
+Copyright (c) 2017, Ivan Khudyashev (IHudyashov@ptsecurity.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+lexer grammar OpenSearchLegacySqlLexer;
+
+channels { SQLCOMMENT, ERRORCHANNEL }
+
+
+// SKIP
+
+SPACE: [ \t\r\n]+ -> channel(HIDDEN);
+SPEC_SQL_COMMENT: '/*!' .+? '*/' -> channel(SQLCOMMENT);
+COMMENT_INPUT: '/*' .*? '*/' -> channel(HIDDEN);
+LINE_COMMENT: (
+ ('-- ' | '#') ~[\r\n]* ('\r'? '\n' | EOF)
+ | '--' ('\r'? '\n' | EOF)
+ ) -> channel(HIDDEN);
+
+
+// Keywords
+// Common Keywords
+
+ALL: 'ALL';
+AND: 'AND';
+AS: 'AS';
+ASC: 'ASC';
+BETWEEN: 'BETWEEN';
+BY: 'BY';
+CASE: 'CASE';
+CAST: 'CAST';
+CROSS: 'CROSS';
+DATETIME: 'DATETIME';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DISTINCT: 'DISTINCT';
+DOUBLE: 'DOUBLE';
+ELSE: 'ELSE';
+EXISTS: 'EXISTS';
+FALSE: 'FALSE';
+FLOAT: 'FLOAT';
+FROM: 'FROM';
+GROUP: 'GROUP';
+HAVING: 'HAVING';
+IN: 'IN';
+INNER: 'INNER';
+INT: 'INT';
+IS: 'IS';
+JOIN: 'JOIN';
+LEFT: 'LEFT';
+LIKE: 'LIKE';
+LIMIT: 'LIMIT';
+LONG: 'LONG';
+MATCH: 'MATCH';
+NATURAL: 'NATURAL';
+NOT: 'NOT';
+NULL_LITERAL: 'NULL';
+ON: 'ON';
+OR: 'OR';
+ORDER: 'ORDER';
+OUTER: 'OUTER';
+REGEXP: 'REGEXP';
+RIGHT: 'RIGHT';
+SELECT: 'SELECT';
+SHOW: 'SHOW';
+STRING: 'STRING';
+THEN: 'THEN';
+TRUE: 'TRUE';
+UNION: 'UNION';
+USING: 'USING';
+WHEN: 'WHEN';
+WHERE: 'WHERE';
+
+
+// OD SQL special keyword
+MISSING: 'MISSING';
+EXCEPT: 'MINUS';
+
+
+// Group function Keywords
+
+AVG: 'AVG';
+COUNT: 'COUNT';
+MAX: 'MAX';
+MIN: 'MIN';
+SUM: 'SUM';
+
+
+// Common function Keywords
+
+SUBSTRING: 'SUBSTRING';
+TRIM: 'TRIM';
+YEAR: 'YEAR';
+STRCMP: 'STRCMP';
+
+
+// Keywords, but can be ID
+// Common Keywords, but can be ID
+
+END: 'END';
+FULL: 'FULL';
+OFFSET: 'OFFSET';
+
+
+// PRIVILEGES
+
+TABLES: 'TABLES';
+
+
+// Common function names
+
+ABS: 'ABS';
+ACOS: 'ACOS';
+ADD: 'ADD';
+ASCII: 'ASCII';
+ASIN: 'ASIN';
+ATAN: 'ATAN';
+ATAN2: 'ATAN2';
+CBRT: 'CBRT';
+CEIL: 'CEIL';
+CONCAT: 'CONCAT';
+CONCAT_WS: 'CONCAT_WS';
+COS: 'COS';
+COSH: 'COSH';
+COT: 'COT';
+CURDATE: 'CURDATE';
+DATE: 'DATE';
+DATE_FORMAT: 'DATE_FORMAT';
+DAYOFMONTH: 'DAYOFMONTH';
+DEGREES: 'DEGREES';
+E: 'E';
+EXP: 'EXP';
+EXPM1: 'EXPM1';
+FLOOR: 'FLOOR';
+IF: 'IF';
+IFNULL: 'IFNULL';
+ISNULL: 'ISNULL';
+LENGTH: 'LENGTH';
+LN: 'LN';
+LOCATE: 'LOCATE';
+LOG: 'LOG';
+LOG10: 'LOG10';
+LOG2: 'LOG2';
+LOWER: 'LOWER';
+LTRIM: 'LTRIM';
+MAKETIME: 'MAKETIME';
+MODULUS: 'MODULUS';
+MONTH: 'MONTH';
+MONTHNAME: 'MONTHNAME';
+MULTIPLY: 'MULTIPLY';
+NOW: 'NOW';
+PI: 'PI';
+POW: 'POW';
+POWER: 'POWER';
+RADIANS: 'RADIANS';
+RAND: 'RAND';
+REPLACE: 'REPLACE';
+RINT: 'RINT';
+ROUND: 'ROUND';
+RTRIM: 'RTRIM';
+SIGN: 'SIGN';
+SIGNUM: 'SIGNUM';
+SIN: 'SIN';
+SINH: 'SINH';
+SQRT: 'SQRT';
+SUBTRACT: 'SUBTRACT';
+TAN: 'TAN';
+TIMESTAMP: 'TIMESTAMP';
+UPPER: 'UPPER';
+
+D: 'D';
+T: 'T';
+TS: 'TS';
+LEFT_BRACE: '{';
+RIGHT_BRACE: '}';
+
+
+// OD SQL special functions
+DATE_HISTOGRAM: 'DATE_HISTOGRAM';
+DAY_OF_MONTH: 'DAY_OF_MONTH';
+DAY_OF_YEAR: 'DAY_OF_YEAR';
+DAY_OF_WEEK: 'DAY_OF_WEEK';
+EXCLUDE: 'EXCLUDE';
+EXTENDED_STATS: 'EXTENDED_STATS';
+FIELD: 'FIELD';
+FILTER: 'FILTER';
+GEO_BOUNDING_BOX: 'GEO_BOUNDING_BOX';
+GEO_CELL: 'GEO_CELL';
+GEO_DISTANCE: 'GEO_DISTANCE';
+GEO_DISTANCE_RANGE: 'GEO_DISTANCE_RANGE';
+GEO_INTERSECTS: 'GEO_INTERSECTS';
+GEO_POLYGON: 'GEO_POLYGON';
+HISTOGRAM: 'HISTOGRAM';
+HOUR_OF_DAY: 'HOUR_OF_DAY';
+INCLUDE: 'INCLUDE';
+IN_TERMS: 'IN_TERMS';
+MATCHPHRASE: 'MATCHPHRASE';
+MATCH_PHRASE: 'MATCH_PHRASE';
+MATCHQUERY: 'MATCHQUERY';
+MATCH_QUERY: 'MATCH_QUERY';
+MINUTE_OF_DAY: 'MINUTE_OF_DAY';
+MINUTE_OF_HOUR: 'MINUTE_OF_HOUR';
+MONTH_OF_YEAR: 'MONTH_OF_YEAR';
+MULTIMATCH: 'MULTIMATCH';
+MULTI_MATCH: 'MULTI_MATCH';
+NESTED: 'NESTED';
+PERCENTILES: 'PERCENTILES';
+REGEXP_QUERY: 'REGEXP_QUERY';
+REVERSE_NESTED: 'REVERSE_NESTED';
+QUERY: 'QUERY';
+RANGE: 'RANGE';
+SCORE: 'SCORE';
+SECOND_OF_MINUTE: 'SECOND_OF_MINUTE';
+STATS: 'STATS';
+TERM: 'TERM';
+TERMS: 'TERMS';
+TOPHITS: 'TOPHITS';
+WEEK_OF_YEAR: 'WEEK_OF_YEAR';
+WILDCARDQUERY: 'WILDCARDQUERY';
+WILDCARD_QUERY: 'WILDCARD_QUERY';
+
+
+// Operators
+
+// Operators. Arithmetics
+
+STAR: '*';
+DIVIDE: '/';
+MODULE: '%';
+PLUS: '+';
+MINUS: '-';
+DIV: 'DIV';
+MOD: 'MOD';
+
+
+// Operators. Comparation
+
+EQUAL_SYMBOL: '=';
+GREATER_SYMBOL: '>';
+LESS_SYMBOL: '<';
+EXCLAMATION_SYMBOL: '!';
+
+
+// Operators. Bit
+
+BIT_NOT_OP: '~';
+BIT_OR_OP: '|';
+BIT_AND_OP: '&';
+BIT_XOR_OP: '^';
+
+
+// Constructors symbols
+
+DOT: '.';
+LR_BRACKET: '(';
+RR_BRACKET: ')';
+COMMA: ',';
+SEMI: ';';
+AT_SIGN: '@';
+ZERO_DECIMAL: '0';
+ONE_DECIMAL: '1';
+TWO_DECIMAL: '2';
+SINGLE_QUOTE_SYMB: '\'';
+DOUBLE_QUOTE_SYMB: '"';
+REVERSE_QUOTE_SYMB: '`';
+COLON_SYMB: ':';
+
+
+// Literal Primitives
+
+START_NATIONAL_STRING_LITERAL: 'N' SQUOTA_STRING;
+STRING_LITERAL: SQUOTA_STRING;
+DECIMAL_LITERAL: DEC_DIGIT+;
+HEXADECIMAL_LITERAL: 'X' '\'' (HEX_DIGIT HEX_DIGIT)+ '\''
+ | '0X' HEX_DIGIT+;
+
+REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
+ | DEC_DIGIT+ '.' EXPONENT_NUM_PART
+ | (DEC_DIGIT+)? '.' (DEC_DIGIT+ EXPONENT_NUM_PART)
+ | DEC_DIGIT+ EXPONENT_NUM_PART;
+NULL_SPEC_LITERAL: '\\' 'N';
+BIT_STRING: BIT_STRING_L;
+
+
+
+// Hack for dotID
+// Prevent recognize string: .123somelatin AS ((.123), FLOAT_LITERAL), ((somelatin), ID)
+// it must recoginze: .123somelatin AS ((.), DOT), (123somelatin, ID)
+
+DOT_ID: '.' ID_LITERAL;
+
+
+
+// Identifiers
+
+ID: ID_LITERAL;
+// DOUBLE_QUOTE_ID: '"' ~'"'+ '"';
+REVERSE_QUOTE_ID: '`' ~'`'+ '`';
+DOUBLE_QUOTE_ID: DQUOTA_STRING;
+BACKTICK_QUOTE_ID: BQUOTA_STRING;
+STRING_USER_NAME: (
+ SQUOTA_STRING | DQUOTA_STRING
+ | BQUOTA_STRING | ID_LITERAL
+ ) '@'
+ (
+ SQUOTA_STRING | DQUOTA_STRING
+ | BQUOTA_STRING | ID_LITERAL
+ );
+
+
+// Fragments for Literal primitives
+
+fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
+fragment ID_LITERAL: [A-Z_$0-9@]*?[A-Z_$]+?[A-Z_$\-0-9]*;
+fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
+fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
+fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
+fragment HEX_DIGIT: [0-9A-F];
+fragment DEC_DIGIT: [0-9];
+fragment BIT_STRING_L: 'B' '\'' [01]+ '\'';
+
+
+
+// Last tokens must generate Errors
+
+ERROR_RECOGNITION: . -> channel(ERRORCHANNEL);
diff --git a/language-grammar/src/main/antlr4/OpenSearchLegacySqlParser.g4 b/language-grammar/src/main/antlr4/OpenSearchLegacySqlParser.g4
new file mode 100644
index 00000000000..217aa5eaa39
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchLegacySqlParser.g4
@@ -0,0 +1,561 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+MySQL (Positive Technologies) grammar
+The MIT License (MIT).
+Copyright (c) 2015-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies.
+Copyright (c) 2017, Ivan Khudyashev (IHudyashov@ptsecurity.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+parser grammar OpenSearchLegacySqlParser;
+
+
+options { tokenVocab = OpenSearchLegacySqlLexer; }
+// Top Level Description
+
+// Root rule
+root
+ : sqlStatement? SEMI? EOF
+ ;
+
+// Only SELECT, DELETE, SHOW and DSCRIBE are supported for now
+sqlStatement
+ : dmlStatement
+ | administrationStatement
+ | utilityStatement
+ ;
+
+dmlStatement
+ : selectStatement
+ ;
+
+ // Data Manipulation Language
+
+ // Primary DML Statements
+selectStatement
+ : querySpecification # simpleSelect
+ | queryExpression # parenthesisSelect
+ | querySpecification unionStatement+ orderByClause? limitClause? # unionSelect
+ | querySpecification minusStatement+ orderByClause? limitClause? # minusSelect
+ ;
+
+
+// Detailed DML Statements
+
+orderByClause
+ : ORDER BY orderByExpression (',' orderByExpression)*
+ ;
+
+orderByExpression
+ : expression order = (ASC | DESC)?
+ ;
+
+tableSources
+ : tableSource (',' tableSource)*
+ ;
+
+tableSource
+ : tableSourceItem joinPart* # tableSourceBase
+ | '(' tableSourceItem joinPart* ')' # tableSourceNested
+ ;
+
+tableSourceItem
+ : tableName (AS? alias = uid)? # atomTableItem
+ | (selectStatement | '(' parenthesisSubquery = selectStatement ')') AS? alias = uid # subqueryTableItem
+ | '(' tableSources ')' # tableSourcesItem
+ ;
+
+joinPart
+ : (INNER | CROSS)? JOIN tableSourceItem (ON expression | USING '(' uidList ')')? # innerJoin
+ | (LEFT | RIGHT) OUTER? JOIN tableSourceItem (ON expression | USING '(' uidList ')')? # outerJoin
+ | NATURAL ((LEFT | RIGHT) OUTER?)? JOIN tableSourceItem # naturalJoin
+ ;
+
+// Select Statement's Details
+queryExpression
+ : '(' querySpecification ')'
+ | '(' queryExpression ')'
+ ;
+
+querySpecification
+ : SELECT selectSpec* selectElements fromClause orderByClause? limitClause?
+ ;
+
+unionStatement
+ : UNION unionType = (ALL | DISTINCT)? (querySpecification | queryExpression)
+ ;
+
+minusStatement
+ : EXCEPT (querySpecification | queryExpression)
+ ;
+
+selectSpec
+ : (ALL | DISTINCT)
+ ;
+
+selectElements
+ : (star = '*' | selectElement) (',' selectElement)*
+ ;
+
+selectElement
+ : fullId '.' '*' # selectStarElement
+ | fullColumnName (AS? uid)? # selectColumnElement
+ | functionCall (AS? uid)? # selectFunctionElement
+ | expression (AS? uid)? # selectExpressionElement
+ | NESTED '(' fullId DOT STAR ')' # selectNestedStarElement
+ ;
+
+fromClause
+ : FROM tableSources (WHERE whereExpr = expression)? (GROUP BY groupByItem (',' groupByItem)*)? (HAVING havingExpr = expression)?
+ ;
+
+groupByItem
+ : expression order = (ASC | DESC)?
+ ;
+
+limitClause
+ : LIMIT ((offset = limitClauseAtom ',')? limit = limitClauseAtom | limit = limitClauseAtom OFFSET offset = limitClauseAtom)
+ ;
+
+limitClauseAtom
+ : decimalLiteral
+ ;
+
+// SHOW/DESCIRBE statements
+administrationStatement
+ : showStatement
+ ;
+
+showStatement
+ : SHOW showSchemaEntity (schemaFormat = (FROM | IN) uid)? showFilter?
+ ;
+
+utilityStatement
+ : simpleDescribeStatement
+ ;
+
+simpleDescribeStatement
+ : command = DESCRIBE tableName (column = uid | pattern = STRING_LITERAL)?
+ ;
+
+showFilter
+ : LIKE STRING_LITERAL
+ | WHERE expression
+ ;
+
+showSchemaEntity
+ : FULL? TABLES
+ ;
+
+// Common Clauses
+
+// DB Objects
+fullId
+ : uid (DOT_ID | '.' uid)?
+ ;
+
+tableName
+ : fullId # simpleTableName
+ | uid STAR # tableNamePattern
+ | uid DIVIDE uid # tableAndTypeName
+ ;
+
+fullColumnName
+ : uid dottedId*
+ ;
+
+uid
+ : simpleId
+ | REVERSE_QUOTE_ID
+ ;
+
+simpleId
+ : ID
+ | DOT_ID // note: the current scope by adding DOT_ID to simpleId is large, move DOT_ID upwards tablename if needed
+ | DOUBLE_QUOTE_ID
+ | BACKTICK_QUOTE_ID
+ | keywordsCanBeId
+ | functionNameBase
+ ;
+
+dottedId
+ : DOT_ID
+ | '.' uid
+ ;
+
+// Literals
+decimalLiteral
+ : DECIMAL_LITERAL
+ | ZERO_DECIMAL
+ | ONE_DECIMAL
+ | TWO_DECIMAL
+ ;
+
+stringLiteral
+ : (STRING_LITERAL | START_NATIONAL_STRING_LITERAL) STRING_LITERAL+
+ | (STRING_LITERAL | START_NATIONAL_STRING_LITERAL)
+ ;
+
+booleanLiteral
+ : TRUE
+ | FALSE
+ ;
+
+nullNotnull
+ : NOT? (NULL_LITERAL | NULL_SPEC_LITERAL)
+ ;
+
+constant
+ : stringLiteral
+ | decimalLiteral
+ | '-' decimalLiteral
+ | booleanLiteral
+ | REAL_LITERAL
+ | BIT_STRING
+ | NOT? nullLiteral = (NULL_LITERAL | NULL_SPEC_LITERAL)
+ | LEFT_BRACE dateType = (D | T | TS | DATE | TIME | TIMESTAMP) stringLiteral RIGHT_BRACE
+ ;
+
+// Common Lists
+uidList
+ : uid (',' uid)*
+ ;
+
+expressions
+ : expression (',' expression)*
+ ;
+
+aggregateFunction
+ : functionAsAggregatorFunction # functionAsAggregatorFunctionCall
+ | aggregateWindowedFunction # aggregateWindowedFunctionCall
+ ;
+
+scalarFunction
+ : scalarFunctionName '(' nestedFunctionArgs+ ')' # nestedFunctionCall
+ | scalarFunctionName '(' functionArgs? ')' # scalarFunctionCall
+ ;
+
+functionCall
+ : aggregateFunction # aggregateFunctionCall
+ | scalarFunctionName '(' aggregateWindowedFunction ')' # aggregationAsArgFunctionCall
+ | scalarFunction # scalarFunctionsCall
+ | specificFunction # specificFunctionCall
+ | fullId '(' functionArgs? ')' # udfFunctionCall
+ ;
+
+specificFunction
+ : CAST '(' expression AS convertedDataType ')' # dataTypeFunctionCall
+ | CASE expression caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall
+ | CASE caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall
+ ;
+
+caseFuncAlternative
+ : WHEN condition = functionArg THEN consequent = functionArg
+ ;
+
+convertedDataType
+ : typeName = DATETIME
+ | typeName = INT
+ | typeName = DOUBLE
+ | typeName = LONG
+ | typeName = FLOAT
+ | typeName = STRING
+ ;
+
+aggregateWindowedFunction
+ : (AVG | MAX | MIN | SUM) '(' aggregator = (ALL | DISTINCT)? functionArg ')'
+ | COUNT '(' (starArg = '*' | aggregator = ALL? functionArg) ')'
+ | COUNT '(' aggregator = DISTINCT functionArgs ')'
+ ;
+
+functionAsAggregatorFunction
+ : (AVG | MAX | MIN | SUM) '(' aggregator = (ALL | DISTINCT)? functionCall ')'
+ | COUNT '(' aggregator = (ALL | DISTINCT)? functionCall ')'
+ ;
+
+scalarFunctionName
+ : functionNameBase
+ ;
+/*
+Separated aggregate to function-aggregator and nonfunction-aggregator aggregations.
+
+Current related rules: aggregateWindowedFunction, functionAsAggregatorFunction, aggregateFunction, functionCall
+Original rules: functionCall (as is shown in below changes), no aggregateWindowFunction, no functionAsAggregatorFunction,
+ no aggregateFunction
+
+====
+
+Separated function argument rule to nonFunctionCall and functionCall
+functions with functionCall arguments are taken as nested functions
+
+Current related rules: functionArgs, functionArg, nestedFunctionArgs
+Original rules:
+functionArgs
+ : (constant | fullColumnName | functionCall | expression)
+ (
+ ','
+ (constant | fullColumnName | functionCall | expression)
+ )*
+ ;
+
+functionArg
+ : constant | fullColumnName | functionCall | expression
+ ;
+
+====
+
+Accordingly functionCall rule is changed by separating scalar functions
+to nested functions and non-nested functons.
+Current related rules: functionCall, scalarFunction
+Original rule:
+functionCall
+ : specificFunction #specificFunctionCall
+ | aggregateWindowedFunction #aggregateFunctionCall
+ | scalarFunctionName '(' functionArgs? ')' #scalarFunctionCall
+ | fullId '(' functionArgs? ')' #udfFunctionCall
+ ;
+*/
+
+
+functionArgs
+ : (constant | fullColumnName | expression) (',' (constant | fullColumnName | expression))*
+ ;
+
+functionArg
+ : constant
+ | fullColumnName
+ | expression
+ ;
+
+nestedFunctionArgs
+ : functionCall (',' functionArgs)?
+ ;
+
+// Expressions, predicates
+
+// Simplified approach for expression
+expression
+ : notOperator = (NOT | '!') expression # notExpression
+ | expression logicalOperator expression # logicalExpression
+ | predicate IS NOT? testValue = (TRUE | FALSE | MISSING) # isExpression
+ | predicate # predicateExpression
+ ;
+
+predicate
+ : predicate NOT? IN '(' (selectStatement | expressions) ')' # inPredicate
+ | predicate IS nullNotnull # isNullPredicate
+ | left = predicate comparisonOperator right = predicate # binaryComparisonPredicate
+ | predicate NOT? BETWEEN predicate AND predicate # betweenPredicate
+ | predicate NOT? LIKE predicate # likePredicate
+ | predicate NOT? regex = REGEXP predicate # regexpPredicate
+ | expressionAtom # expressionAtomPredicate
+ ;
+
+// Add in ASTVisitor nullNotnull in constant
+expressionAtom
+ : constant # constantExpressionAtom
+ | fullColumnName # fullColumnNameExpressionAtom
+ | functionCall # functionCallExpressionAtom
+ | unaryOperator expressionAtom # unaryExpressionAtom
+ | '(' expression (',' expression)* ')' # nestedExpressionAtom
+ | EXISTS '(' selectStatement ')' # existsExpessionAtom
+ | '(' selectStatement ')' # subqueryExpessionAtom
+ | left = expressionAtom bitOperator right = expressionAtom # bitExpressionAtom
+ | left = expressionAtom mathOperator right = expressionAtom # mathExpressionAtom
+ ;
+
+unaryOperator
+ : '!'
+ | '~'
+ | '+'
+ | '-'
+ | NOT
+ ;
+
+comparisonOperator
+ : '='
+ | '>'
+ | '<'
+ | '<' '='
+ | '>' '='
+ | '<' '>'
+ | '!' '='
+ ;
+
+logicalOperator
+ : AND
+ | '&' '&'
+ | OR
+ | '|' '|'
+ ;
+
+bitOperator
+ : '<' '<'
+ | '>' '>'
+ | '&'
+ | '^'
+ | '|'
+ ;
+
+mathOperator
+ : '*'
+ | '/'
+ | '%'
+ | DIV
+ | MOD
+ | '+'
+ | '-'
+ ;
+
+// Simple id sets
+// (that keyword, which can be id)
+keywordsCanBeId
+ : FULL
+ | FIELD
+ | D
+ | T
+ | TS // OD SQL and ODBC special
+ | COUNT
+ | MIN
+ | MAX
+ | AVG
+ | SUM
+ ;
+
+functionNameBase
+ : openSearchFunctionNameBase
+ | ABS
+ | ACOS
+ | ADD
+ | ASCII
+ | ASIN
+ | ATAN
+ | ATAN2
+ | CBRT
+ | CEIL
+ | CONCAT
+ | CONCAT_WS
+ | COS
+ | COSH
+ | COT
+ | CURDATE
+ | DATE
+ | DATE_FORMAT
+ | DAYOFMONTH
+ | DEGREES
+ | E
+ | EXP
+ | EXPM1
+ | FLOOR
+ | IF
+ | IFNULL
+ | ISNULL
+ | LEFT
+ | LENGTH
+ | LN
+ | LOCATE
+ | LOG
+ | LOG10
+ | LOG2
+ | LOWER
+ | LTRIM
+ | MAKETIME
+ | MODULUS
+ | MONTH
+ | MONTHNAME
+ | MULTIPLY
+ | NOW
+ | PI
+ | POW
+ | POWER
+ | RADIANS
+ | RAND
+ | REPLACE
+ | RIGHT
+ | RINT
+ | ROUND
+ | RTRIM
+ | SIGN
+ | SIGNUM
+ | SIN
+ | SINH
+ | SQRT
+ | SUBSTRING
+ | SUBTRACT
+ | TAN
+ | TIMESTAMP
+ | TRIM
+ | UPPER
+ | YEAR
+ | ADDDATE
+ | ADDTIME
+ | GREATEST
+ | LEAST
+ | STRCMP
+ ;
+
+openSearchFunctionNameBase
+ : DATE_HISTOGRAM
+ | DAY_OF_MONTH
+ | DAY_OF_YEAR
+ | DAY_OF_WEEK
+ | EXCLUDE
+ | EXTENDED_STATS
+ | FILTER
+ | GEO_BOUNDING_BOX
+ | GEO_CELL
+ | GEO_DISTANCE
+ | GEO_DISTANCE_RANGE
+ | GEO_INTERSECTS
+ | GEO_POLYGON
+ | INCLUDE
+ | IN_TERMS
+ | HISTOGRAM
+ | HOUR_OF_DAY
+ | MATCHPHRASE
+ | MATCH_PHRASE
+ | MATCHQUERY
+ | MATCH_QUERY
+ | MINUTE_OF_DAY
+ | MINUTE_OF_HOUR
+ | MISSING
+ | MONTH_OF_YEAR
+ | MULTIMATCH
+ | MULTI_MATCH
+ | NESTED
+ | PERCENTILES
+ | QUERY
+ | RANGE
+ | REGEXP_QUERY
+ | REVERSE_NESTED
+ | SCORE
+ | SECOND_OF_MINUTE
+ | STATS
+ | TERM
+ | TERMS
+ | TOPHITS
+ | WEEK_OF_YEAR
+ | WILDCARDQUERY
+ | WILDCARD_QUERY
+ ;
diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4
new file mode 100644
index 00000000000..b7dc4b7286d
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchPPLLexer.g4
@@ -0,0 +1,516 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+
+lexer grammar OpenSearchPPLLexer;
+
+channels { WHITESPACE, ERRORCHANNEL }
+options { caseInsensitive = true; }
+
+// COMMAND KEYWORDS
+SEARCH: 'SEARCH';
+DESCRIBE: 'DESCRIBE';
+SHOW: 'SHOW';
+FROM: 'FROM';
+WHERE: 'WHERE';
+FIELDS: 'FIELDS';
+RENAME: 'RENAME';
+STATS: 'STATS';
+EVENTSTATS: 'EVENTSTATS';
+DEDUP: 'DEDUP';
+SORT: 'SORT';
+EVAL: 'EVAL';
+HEAD: 'HEAD';
+TOP_APPROX: 'TOP_APPROX';
+TOP: 'TOP';
+RARE_APPROX: 'RARE_APPROX';
+RARE: 'RARE';
+PARSE: 'PARSE';
+METHOD: 'METHOD';
+REGEX: 'REGEX';
+PUNCT: 'PUNCT';
+GROK: 'GROK';
+PATTERN: 'PATTERN';
+PATTERNS: 'PATTERNS';
+NEW_FIELD: 'NEW_FIELD';
+KMEANS: 'KMEANS';
+AD: 'AD';
+ML: 'ML';
+FILLNULL: 'FILLNULL';
+EXPAND: 'EXPAND';
+FLATTEN: 'FLATTEN';
+TRENDLINE: 'TRENDLINE';
+APPENDCOL: 'APPENDCOL';
+
+//Native JOIN KEYWORDS
+JOIN: 'JOIN';
+ON: 'ON';
+INNER: 'INNER';
+OUTER: 'OUTER';
+FULL: 'FULL';
+SEMI: 'SEMI';
+ANTI: 'ANTI';
+CROSS: 'CROSS';
+LEFT_HINT: 'HINT.LEFT';
+RIGHT_HINT: 'HINT.RIGHT';
+
+//CORRELATION KEYWORDS
+CORRELATE: 'CORRELATE';
+SELF: 'SELF';
+EXACT: 'EXACT';
+APPROXIMATE: 'APPROXIMATE';
+SCOPE: 'SCOPE';
+MAPPING: 'MAPPING';
+
+//EXPLAIN KEYWORDS
+EXPLAIN: 'EXPLAIN';
+FORMATTED: 'FORMATTED';
+COST: 'COST';
+CODEGEN: 'CODEGEN';
+EXTENDED: 'EXTENDED';
+SIMPLE: 'SIMPLE';
+
+// COMMAND ASSIST KEYWORDS
+AS: 'AS';
+BY: 'BY';
+SOURCE: 'SOURCE';
+INDEX: 'INDEX';
+D: 'D';
+DESC: 'DESC';
+DATASOURCES: 'DATASOURCES';
+USING: 'USING';
+WITH: 'WITH';
+
+// SORT FIELD KEYWORDS
+// TODO #963: Implement 'num', 'str', and 'ip' sort syntax
+AUTO: 'AUTO';
+STR: 'STR';
+IP: 'IP';
+NUM: 'NUM';
+
+// FIELDSUMMARY keywords
+FIELDSUMMARY: 'FIELDSUMMARY';
+INCLUDEFIELDS: 'INCLUDEFIELDS';
+NULLS: 'NULLS';
+
+//TRENDLINE KEYWORDS
+SMA: 'SMA';
+WMA: 'WMA';
+
+// APPENDCOL options
+OVERRIDE: 'OVERRIDE';
+
+// ARGUMENT KEYWORDS
+KEEPEMPTY: 'KEEPEMPTY';
+CONSECUTIVE: 'CONSECUTIVE';
+DEDUP_SPLITVALUES: 'DEDUP_SPLITVALUES';
+PARTITIONS: 'PARTITIONS';
+ALLNUM: 'ALLNUM';
+DELIM: 'DELIM';
+CENTROIDS: 'CENTROIDS';
+ITERATIONS: 'ITERATIONS';
+DISTANCE_TYPE: 'DISTANCE_TYPE';
+NUMBER_OF_TREES: 'NUMBER_OF_TREES';
+SHINGLE_SIZE: 'SHINGLE_SIZE';
+SAMPLE_SIZE: 'SAMPLE_SIZE';
+OUTPUT_AFTER: 'OUTPUT_AFTER';
+TIME_DECAY: 'TIME_DECAY';
+ANOMALY_RATE: 'ANOMALY_RATE';
+CATEGORY_FIELD: 'CATEGORY_FIELD';
+TIME_FIELD: 'TIME_FIELD';
+TIME_ZONE: 'TIME_ZONE';
+TRAINING_DATA_SIZE: 'TRAINING_DATA_SIZE';
+ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD';
+APPEND: 'APPEND';
+
+// COMPARISON FUNCTION KEYWORDS
+CASE: 'CASE';
+ELSE: 'ELSE';
+IN: 'IN';
+EXISTS: 'EXISTS';
+
+// LOGICAL KEYWORDS
+NOT: 'NOT';
+OR: 'OR';
+AND: 'AND';
+XOR: 'XOR';
+TRUE: 'TRUE';
+FALSE: 'FALSE';
+REGEXP: 'REGEXP';
+
+// DATETIME, INTERVAL AND UNIT KEYWORDS
+CONVERT_TZ: 'CONVERT_TZ';
+DATETIME: 'DATETIME';
+DAY: 'DAY';
+DAY_HOUR: 'DAY_HOUR';
+DAY_MICROSECOND: 'DAY_MICROSECOND';
+DAY_MINUTE: 'DAY_MINUTE';
+DAY_OF_YEAR: 'DAY_OF_YEAR';
+DAY_SECOND: 'DAY_SECOND';
+HOUR: 'HOUR';
+HOUR_MICROSECOND: 'HOUR_MICROSECOND';
+HOUR_MINUTE: 'HOUR_MINUTE';
+HOUR_OF_DAY: 'HOUR_OF_DAY';
+HOUR_SECOND: 'HOUR_SECOND';
+INTERVAL: 'INTERVAL';
+MICROSECOND: 'MICROSECOND';
+MILLISECOND: 'MILLISECOND';
+MINUTE: 'MINUTE';
+MINUTE_MICROSECOND: 'MINUTE_MICROSECOND';
+MINUTE_OF_DAY: 'MINUTE_OF_DAY';
+MINUTE_OF_HOUR: 'MINUTE_OF_HOUR';
+MINUTE_SECOND: 'MINUTE_SECOND';
+MONTH: 'MONTH';
+MONTH_OF_YEAR: 'MONTH_OF_YEAR';
+QUARTER: 'QUARTER';
+SECOND: 'SECOND';
+SECOND_MICROSECOND: 'SECOND_MICROSECOND';
+SECOND_OF_MINUTE: 'SECOND_OF_MINUTE';
+WEEK: 'WEEK';
+WEEK_OF_YEAR: 'WEEK_OF_YEAR';
+YEAR: 'YEAR';
+YEAR_MONTH: 'YEAR_MONTH';
+
+// DATASET TYPES
+DATAMODEL: 'DATAMODEL';
+LOOKUP: 'LOOKUP';
+SAVEDSEARCH: 'SAVEDSEARCH';
+
+// CONVERTED DATA TYPES
+INT: 'INT';
+INTEGER: 'INTEGER';
+DOUBLE: 'DOUBLE';
+LONG: 'LONG';
+FLOAT: 'FLOAT';
+STRING: 'STRING';
+BOOLEAN: 'BOOLEAN';
+
+// SPECIAL CHARACTERS AND OPERATORS
+PIPE: '|';
+COMMA: ',';
+DOT: '.';
+EQUAL: '=';
+GREATER: '>';
+LESS: '<';
+NOT_GREATER: '<' '=';
+NOT_LESS: '>' '=';
+NOT_EQUAL: '!' '=';
+PLUS: '+';
+MINUS: '-';
+STAR: '*';
+DIVIDE: '/';
+MODULE: '%';
+EXCLAMATION_SYMBOL: '!';
+COLON: ':';
+LT_PRTHS: '(';
+RT_PRTHS: ')';
+LT_SQR_PRTHS: '[';
+RT_SQR_PRTHS: ']';
+SINGLE_QUOTE: '\'';
+DOUBLE_QUOTE: '"';
+BACKTICK: '`';
+ARROW: '->';
+
+// Operators. Bit
+
+BIT_NOT_OP: '~';
+BIT_AND_OP: '&';
+BIT_XOR_OP: '^';
+
+// AGGREGATIONS
+AVG: 'AVG';
+COUNT: 'COUNT';
+DISTINCT_COUNT: 'DISTINCT_COUNT';
+DISTINCT_COUNT_APPROX: 'DISTINCT_COUNT_APPROX';
+ESTDC: 'ESTDC';
+ESTDC_ERROR: 'ESTDC_ERROR';
+MAX: 'MAX';
+MEAN: 'MEAN';
+MEDIAN: 'MEDIAN';
+MIN: 'MIN';
+MODE: 'MODE';
+RANGE: 'RANGE';
+STDEV: 'STDEV';
+STDEVP: 'STDEVP';
+SUM: 'SUM';
+SUMSQ: 'SUMSQ';
+VAR_SAMP: 'VAR_SAMP';
+VAR_POP: 'VAR_POP';
+STDDEV_SAMP: 'STDDEV_SAMP';
+STDDEV_POP: 'STDDEV_POP';
+PERCENTILE: 'PERCENTILE';
+PERCENTILE_APPROX: 'PERCENTILE_APPROX';
+TAKE: 'TAKE';
+FIRST: 'FIRST';
+LAST: 'LAST';
+LIST: 'LIST';
+VALUES: 'VALUES';
+PER_DAY: 'PER_DAY';
+PER_HOUR: 'PER_HOUR';
+PER_MINUTE: 'PER_MINUTE';
+PER_SECOND: 'PER_SECOND';
+RATE: 'RATE';
+SPARKLINE: 'SPARKLINE';
+C: 'C';
+DC: 'DC';
+
+// BASIC FUNCTIONS
+ABS: 'ABS';
+CBRT: 'CBRT';
+CEIL: 'CEIL';
+CEILING: 'CEILING';
+CONV: 'CONV';
+CRC32: 'CRC32';
+E: 'E';
+EXP: 'EXP';
+FLOOR: 'FLOOR';
+LN: 'LN';
+LOG: 'LOG';
+LOG10: 'LOG10';
+LOG2: 'LOG2';
+MOD: 'MOD';
+PI: 'PI';
+POSITION: 'POSITION';
+POW: 'POW';
+POWER: 'POWER';
+RAND: 'RAND';
+ROUND: 'ROUND';
+SIGN: 'SIGN';
+SIGNUM: 'SIGNUM';
+SQRT: 'SQRT';
+TRUNCATE: 'TRUNCATE';
+
+// TRIGONOMETRIC FUNCTIONS
+ACOS: 'ACOS';
+ASIN: 'ASIN';
+ATAN: 'ATAN';
+ATAN2: 'ATAN2';
+COS: 'COS';
+COT: 'COT';
+DEGREES: 'DEGREES';
+RADIANS: 'RADIANS';
+SIN: 'SIN';
+TAN: 'TAN';
+
+// CRYPTOGRAPHIC FUNCTIONS
+MD5: 'MD5';
+SHA1: 'SHA1';
+SHA2: 'SHA2';
+
+// DATE AND TIME FUNCTIONS
+ADDDATE: 'ADDDATE';
+ADDTIME: 'ADDTIME';
+CURDATE: 'CURDATE';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+CURRENT_TIMEZONE: 'CURRENT_TIMEZONE';
+CURTIME: 'CURTIME';
+DATE: 'DATE';
+DATEDIFF: 'DATEDIFF';
+DATE_ADD: 'DATE_ADD';
+DATE_FORMAT: 'DATE_FORMAT';
+DATE_SUB: 'DATE_SUB';
+DAYNAME: 'DAYNAME';
+DAYOFMONTH: 'DAYOFMONTH';
+DAYOFWEEK: 'DAYOFWEEK';
+DAYOFYEAR: 'DAYOFYEAR';
+DAY_OF_MONTH: 'DAY_OF_MONTH';
+DAY_OF_WEEK: 'DAY_OF_WEEK';
+DURATION: 'DURATION';
+EXTRACT: 'EXTRACT';
+FROM_DAYS: 'FROM_DAYS';
+FROM_UNIXTIME: 'FROM_UNIXTIME';
+GET_FORMAT: 'GET_FORMAT';
+LAST_DAY: 'LAST_DAY';
+LOCALTIME: 'LOCALTIME';
+LOCALTIMESTAMP: 'LOCALTIMESTAMP';
+MAKEDATE: 'MAKEDATE';
+MAKE_DATE: 'MAKE_DATE';
+MAKETIME: 'MAKETIME';
+MONTHNAME: 'MONTHNAME';
+NOW: 'NOW';
+PERIOD_ADD: 'PERIOD_ADD';
+PERIOD_DIFF: 'PERIOD_DIFF';
+SEC_TO_TIME: 'SEC_TO_TIME';
+STR_TO_DATE: 'STR_TO_DATE';
+SUBDATE: 'SUBDATE';
+SUBTIME: 'SUBTIME';
+SYSDATE: 'SYSDATE';
+TIME: 'TIME';
+TIMEDIFF: 'TIMEDIFF';
+TIMESTAMP: 'TIMESTAMP';
+TIMESTAMPADD: 'TIMESTAMPADD';
+TIMESTAMPDIFF: 'TIMESTAMPDIFF';
+TIME_FORMAT: 'TIME_FORMAT';
+TIME_TO_SEC: 'TIME_TO_SEC';
+TO_DAYS: 'TO_DAYS';
+TO_SECONDS: 'TO_SECONDS';
+UNIX_TIMESTAMP: 'UNIX_TIMESTAMP';
+UTC_DATE: 'UTC_DATE';
+UTC_TIME: 'UTC_TIME';
+UTC_TIMESTAMP: 'UTC_TIMESTAMP';
+WEEKDAY: 'WEEKDAY';
+YEARWEEK: 'YEARWEEK';
+
+// RELATIVE TIME FUNCTIONS
+RELATIVE_TIMESTAMP: 'RELATIVE_TIMESTAMP';
+EARLIEST: 'EARLIEST';
+LATEST: 'LATEST';
+
+// TEXT FUNCTIONS
+SUBSTR: 'SUBSTR';
+SUBSTRING: 'SUBSTRING';
+LTRIM: 'LTRIM';
+RTRIM: 'RTRIM';
+TRIM: 'TRIM';
+TO: 'TO';
+LOWER: 'LOWER';
+UPPER: 'UPPER';
+CONCAT: 'CONCAT';
+CONCAT_WS: 'CONCAT_WS';
+LENGTH: 'LENGTH';
+STRCMP: 'STRCMP';
+RIGHT: 'RIGHT';
+LEFT: 'LEFT';
+ASCII: 'ASCII';
+LOCATE: 'LOCATE';
+REPLACE: 'REPLACE';
+REVERSE: 'REVERSE';
+CAST: 'CAST';
+
+// JSON TEXT FUNCTIONS
+JSON: 'JSON';
+JSON_OBJECT: 'JSON_OBJECT';
+JSON_ARRAY: 'JSON_ARRAY';
+JSON_ARRAY_LENGTH: 'JSON_ARRAY_LENGTH';
+TO_JSON_STRING: 'TO_JSON_STRING';
+JSON_EXTRACT: 'JSON_EXTRACT';
+JSON_DELETE : 'JSON_DELETE';
+JSON_KEYS: 'JSON_KEYS';
+JSON_VALID: 'JSON_VALID';
+JSON_APPEND: 'JSON_APPEND';
+JSON_EXTEND : 'JSON_EXTEND';
+JSON_SET: 'JSON_SET';
+//JSON_ARRAY_ALL_MATCH: 'JSON_ARRAY_ALL_MATCH';
+//JSON_ARRAY_ANY_MATCH: 'JSON_ARRAY_ANY_MATCH';
+//JSON_ARRAY_FILTER: 'JSON_ARRAY_FILTER';
+//JSON_ARRAY_MAP: 'JSON_ARRAY_MAP';
+//JSON_ARRAY_REDUCE: 'JSON_ARRAY_REDUCE';
+
+// COLLECTION FUNCTIONS
+ARRAY: 'ARRAY';
+ARRAY_LENGTH: 'ARRAY_LENGTH';
+
+// LAMBDA FUNCTIONS
+//EXISTS: 'EXISTS';
+FORALL: 'FORALL';
+FILTER: 'FILTER';
+TRANSFORM: 'TRANSFORM';
+REDUCE: 'REDUCE';
+
+// BOOL FUNCTIONS
+LIKE: 'LIKE';
+ISNULL: 'ISNULL';
+ISNOTNULL: 'ISNOTNULL';
+BETWEEN: 'BETWEEN';
+CIDRMATCH: 'CIDRMATCH';
+ISPRESENT: 'ISPRESENT';
+ISEMPTY: 'ISEMPTY';
+ISBLANK: 'ISBLANK';
+
+// FLOWCONTROL FUNCTIONS
+IFNULL: 'IFNULL';
+NULLIF: 'NULLIF';
+IF: 'IF';
+TYPEOF: 'TYPEOF';
+
+//OTHER CONDITIONAL EXPRESSIONS
+COALESCE: 'COALESCE';
+
+//GEOLOCATION FUNCTIONS
+GEOIP: 'GEOIP';
+
+//GEOLOCATION PROPERTIES
+COUNTRY_ISO_CODE: 'COUNTRY_ISO_CODE';
+COUNTRY_NAME: 'COUNTRY_NAME';
+CONTINENT_NAME: 'CONTINENT_NAME';
+REGION_ISO_CODE: 'REGION_ISO_CODE';
+REGION_NAME: 'REGION_NAME';
+CITY_NAME: 'CITY_NAME';
+LOCATION: 'LOCATION';
+
+// RELEVANCE FUNCTIONS AND PARAMETERS
+MATCH: 'MATCH';
+MATCH_PHRASE: 'MATCH_PHRASE';
+MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
+MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX';
+SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING';
+MULTI_MATCH: 'MULTI_MATCH';
+QUERY_STRING: 'QUERY_STRING';
+
+ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD';
+ANALYZE_WILDCARD: 'ANALYZE_WILDCARD';
+ANALYZER: 'ANALYZER';
+AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY';
+BOOST: 'BOOST';
+CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY';
+DEFAULT_FIELD: 'DEFAULT_FIELD';
+DEFAULT_OPERATOR: 'DEFAULT_OPERATOR';
+ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS';
+ESCAPE: 'ESCAPE';
+FLAGS: 'FLAGS';
+FUZZY_MAX_EXPANSIONS: 'FUZZY_MAX_EXPANSIONS';
+FUZZY_PREFIX_LENGTH: 'FUZZY_PREFIX_LENGTH';
+FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS';
+FUZZY_REWRITE: 'FUZZY_REWRITE';
+FUZZINESS: 'FUZZINESS';
+LENIENT: 'LENIENT';
+LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR';
+MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES';
+MAX_EXPANSIONS: 'MAX_EXPANSIONS';
+MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH';
+OPERATOR: 'OPERATOR';
+PHRASE_SLOP: 'PHRASE_SLOP';
+PREFIX_LENGTH: 'PREFIX_LENGTH';
+QUOTE_ANALYZER: 'QUOTE_ANALYZER';
+QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX';
+REWRITE: 'REWRITE';
+SLOP: 'SLOP';
+TIE_BREAKER: 'TIE_BREAKER';
+TYPE: 'TYPE';
+ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY';
+
+// SPAN KEYWORDS
+SPAN: 'SPAN';
+MS: 'MS';
+S: 'S';
+M: 'M';
+H: 'H';
+W: 'W';
+Q: 'Q';
+Y: 'Y';
+
+
+// LITERALS AND VALUES
+//STRING_LITERAL: DQUOTA_STRING | SQUOTA_STRING | BQUOTA_STRING;
+ID: ID_LITERAL;
+CLUSTER: CLUSTER_PREFIX_LITERAL;
+INTEGER_LITERAL: DEC_DIGIT+;
+DECIMAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+;
+
+fragment DATE_SUFFIX: ([\-.][*0-9]+)+;
+fragment ID_LITERAL: [@*A-Z]+?[*A-Z_\-0-9]*;
+fragment CLUSTER_PREFIX_LITERAL: [*A-Z]+?[*A-Z_\-0-9]* COLON;
+ID_DATE_SUFFIX: CLUSTER_PREFIX_LITERAL? ID_LITERAL DATE_SUFFIX;
+DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
+SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
+BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
+fragment DEC_DIGIT: [0-9];
+
+LINE_COMMENT: '//' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN);
+BLOCK_COMMENT: '/*' .*? '*/' -> channel(HIDDEN);
+
+ERROR_RECOGNITION: . -> channel(ERRORCHANNEL);
diff --git a/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4
new file mode 100644
index 00000000000..cae57b53181
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchPPLParser.g4
@@ -0,0 +1,1208 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+parser grammar OpenSearchPPLParser;
+
+
+options { tokenVocab = OpenSearchPPLLexer; }
+root
+ : pplStatement? EOF
+ ;
+
+// statement
+pplStatement
+ : dmlStatement
+ ;
+
+dmlStatement
+ : (explainCommand PIPE)? queryStatement
+ ;
+
+queryStatement
+ : pplCommands (PIPE commands)*
+ ;
+
+subSearch
+ : searchCommand (PIPE commands)*
+ ;
+
+// commands
+pplCommands
+ : searchCommand
+ | describeCommand
+ ;
+
+commands
+ : whereCommand
+ | correlateCommand
+ | joinCommand
+ | fieldsCommand
+ | statsCommand
+ | dedupCommand
+ | sortCommand
+ | headCommand
+ | topCommand
+ | rareCommand
+ | evalCommand
+ | grokCommand
+ | parseCommand
+ | patternsCommand
+ | lookupCommand
+ | renameCommand
+ | fillnullCommand
+ | fieldsummaryCommand
+ | flattenCommand
+ | expandCommand
+ | trendlineCommand
+ | appendcolCommand
+ ;
+
+commandName
+ : SEARCH
+ | DESCRIBE
+ | SHOW
+ | AD
+ | ML
+ | KMEANS
+ | WHERE
+ | CORRELATE
+ | JOIN
+ | FIELDS
+ | STATS
+ | EVENTSTATS
+ | DEDUP
+ | EXPLAIN
+ | SORT
+ | HEAD
+ | TOP
+ | TOP_APPROX
+ | RARE
+ | RARE_APPROX
+ | EVAL
+ | GROK
+ | PARSE
+ | PATTERNS
+ | LOOKUP
+ | RENAME
+ | EXPAND
+ | FILLNULL
+ | FIELDSUMMARY
+ | FLATTEN
+ | TRENDLINE
+ | APPENDCOL
+ ;
+
+searchCommand
+ : (SEARCH)? fromClause # searchFrom
+ | (SEARCH)? fromClause logicalExpression # searchFromFilter
+ | (SEARCH)? logicalExpression fromClause # searchFilterFrom
+ ;
+
+fieldsummaryCommand
+ : FIELDSUMMARY (fieldsummaryParameter)*
+ ;
+
+fieldsummaryParameter
+ : INCLUDEFIELDS EQUAL fieldList # fieldsummaryIncludeFields
+ | NULLS EQUAL booleanLiteral # fieldsummaryNulls
+ ;
+
+describeCommand
+ : DESCRIBE tableSourceClause
+ ;
+
+explainCommand
+ : EXPLAIN explainMode
+ ;
+
+explainMode
+ : FORMATTED
+ | COST
+ | CODEGEN
+ | EXTENDED
+ | SIMPLE
+ ;
+
+showDataSourcesCommand
+ : SHOW DATASOURCES
+ ;
+
+whereCommand
+ : WHERE logicalExpression
+ ;
+
+correlateCommand
+ : CORRELATE correlationType FIELDS LT_PRTHS fieldList RT_PRTHS (scopeClause)? mappingList
+ ;
+
+correlationType
+ : SELF
+ | EXACT
+ | APPROXIMATE
+ ;
+
+scopeClause
+ : SCOPE LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS
+ ;
+
+mappingList
+ : MAPPING LT_PRTHS ( mappingClause (COMMA mappingClause)* ) RT_PRTHS
+ ;
+
+mappingClause
+ : left = qualifiedName comparisonOperator right = qualifiedName # mappingCompareExpr
+ ;
+
+fieldsCommand
+ : FIELDS (PLUS | MINUS)? fieldList
+ ;
+
+renameCommand
+ : RENAME renameClasue (COMMA renameClasue)*
+ ;
+
+statsCommand
+ : (STATS | EVENTSTATS) (PARTITIONS EQUAL partitions = integerLiteral)? (ALLNUM EQUAL allnum = booleanLiteral)? (DELIM EQUAL delim = stringLiteral)? statsAggTerm (COMMA statsAggTerm)* (statsByClause)? (DEDUP_SPLITVALUES EQUAL dedupsplit = booleanLiteral)?
+ ;
+
+dedupCommand
+ : DEDUP (number = integerLiteral)? fieldList (KEEPEMPTY EQUAL keepempty = booleanLiteral)? (CONSECUTIVE EQUAL consecutive = booleanLiteral)?
+ ;
+
+sortCommand
+ : SORT sortbyClause
+ ;
+
+evalCommand
+ : EVAL evalClause (COMMA evalClause)*
+ ;
+
+headCommand
+ : HEAD (number = integerLiteral)? (FROM from = integerLiteral)?
+ ;
+
+topCommand
+ : (TOP | TOP_APPROX) (number = integerLiteral)? fieldList (byClause)?
+ ;
+
+rareCommand
+ : (RARE | RARE_APPROX) (number = integerLiteral)? fieldList (byClause)?
+ ;
+
+grokCommand
+ : GROK (source_field = expression) (pattern = stringLiteral)
+ ;
+
+parseCommand
+ : PARSE (source_field = expression) (pattern = stringLiteral)
+ ;
+
+patternsCommand
+ : PATTERNS (patternsParameter)* (source_field = expression)
+ ;
+
+patternsParameter
+ : (NEW_FIELD EQUAL new_field = stringLiteral)
+ | (PATTERN EQUAL pattern = stringLiteral)
+ ;
+
+patternsMethod
+ : PUNCT
+ | REGEX
+ ;
+
+// lookup
+lookupCommand
+ : LOOKUP tableSource lookupMappingList ((APPEND | REPLACE) outputCandidateList)?
+ ;
+
+lookupMappingList
+ : lookupPair (COMMA lookupPair)*
+ ;
+
+outputCandidateList
+ : lookupPair (COMMA lookupPair)*
+ ;
+
+ // The lookup pair will generate a K-V pair.
+ // The format is Key -> Alias(outputFieldName, inputField), Value -> outputField. For example:
+ // 1. When lookupPair is "name AS cName", the key will be Alias(cName, Field(name)), the value will be Field(cName)
+ // 2. When lookupPair is "dept", the key is Alias(dept, Field(dept)), value is Field(dept)
+lookupPair
+ : inputField = fieldExpression (AS outputField = fieldExpression)?
+ ;
+
+fillnullCommand
+ : FILLNULL (fillNullWithTheSameValue
+ | fillNullWithFieldVariousValues)
+ ;
+
+fillNullWithTheSameValue
+ : WITH nullReplacement = valueExpression IN nullableFieldList = fieldList
+ ;
+
+fillNullWithFieldVariousValues
+ : USING nullableReplacementExpression (COMMA nullableReplacementExpression)*
+ ;
+
+nullableReplacementExpression
+ : nullableField = fieldExpression EQUAL nullableReplacement = valueExpression
+ ;
+
+expandCommand
+ : EXPAND fieldExpression (AS alias = qualifiedName)?
+ ;
+
+flattenCommand
+ : FLATTEN fieldExpression (AS alias = identifierSeq)?
+ ;
+
+trendlineCommand
+ : TRENDLINE (SORT sortField)? trendlineClause (trendlineClause)*
+ ;
+
+trendlineClause
+ : trendlineType LT_PRTHS numberOfDataPoints = INTEGER_LITERAL COMMA field = fieldExpression RT_PRTHS (AS alias = qualifiedName)?
+ ;
+
+trendlineType
+ : SMA
+ | WMA
+ ;
+
+appendcolCommand
+ : APPENDCOL (OVERRIDE EQUAL override = booleanLiteral)? LT_SQR_PRTHS commands (PIPE commands)* RT_SQR_PRTHS
+ ;
+
+kmeansCommand
+ : KMEANS (kmeansParameter)*
+ ;
+
+kmeansParameter
+ : (CENTROIDS EQUAL centroids = integerLiteral)
+ | (ITERATIONS EQUAL iterations = integerLiteral)
+ | (DISTANCE_TYPE EQUAL distance_type = stringLiteral)
+ ;
+
+adCommand
+ : AD (adParameter)*
+ ;
+
+adParameter
+ : (NUMBER_OF_TREES EQUAL number_of_trees = integerLiteral)
+ | (SHINGLE_SIZE EQUAL shingle_size = integerLiteral)
+ | (SAMPLE_SIZE EQUAL sample_size = integerLiteral)
+ | (OUTPUT_AFTER EQUAL output_after = integerLiteral)
+ | (TIME_DECAY EQUAL time_decay = decimalLiteral)
+ | (ANOMALY_RATE EQUAL anomaly_rate = decimalLiteral)
+ | (CATEGORY_FIELD EQUAL category_field = stringLiteral)
+ | (TIME_FIELD EQUAL time_field = stringLiteral)
+ | (DATE_FORMAT EQUAL date_format = stringLiteral)
+ | (TIME_ZONE EQUAL time_zone = stringLiteral)
+ | (TRAINING_DATA_SIZE EQUAL training_data_size = integerLiteral)
+ | (ANOMALY_SCORE_THRESHOLD EQUAL anomaly_score_threshold = decimalLiteral)
+ ;
+
+mlCommand
+ : ML (mlArg)*
+ ;
+
+mlArg
+ : (argName = ident EQUAL argValue = literalValue)
+ ;
+
+// clauses
+fromClause
+ : SOURCE EQUAL tableOrSubqueryClause
+ | INDEX EQUAL tableOrSubqueryClause
+ ;
+
+tableOrSubqueryClause
+ : LT_SQR_PRTHS subSearch RT_SQR_PRTHS (AS alias = qualifiedName)?
+ | tableSourceClause
+ ;
+
+// One tableSourceClause will generate one Relation node with/without one alias
+// even if the relation contains more than one table sources.
+// These table sources in one relation will be readed one by one in OpenSearch.
+// But it may have different behaivours in different execution backends.
+// For example, a Spark UnresovledRelation node only accepts one data source.
+tableSourceClause
+ : tableSource (COMMA tableSource)* (AS alias = qualifiedName)?
+ ;
+
+// join
+joinCommand
+ : (joinType) JOIN sideAlias joinHintList? joinCriteria? right = tableOrSubqueryClause
+ ;
+
+joinType
+ : INNER?
+ | CROSS
+ | LEFT OUTER?
+ | RIGHT OUTER?
+ | FULL OUTER?
+ | LEFT? SEMI
+ | LEFT? ANTI
+ ;
+
+sideAlias
+ : (LEFT EQUAL leftAlias = qualifiedName)? COMMA? (RIGHT EQUAL rightAlias = qualifiedName)?
+ ;
+
+joinCriteria
+ : ON logicalExpression
+ ;
+
+joinHintList
+ : hintPair (COMMA? hintPair)*
+ ;
+
+hintPair
+ : leftHintKey = LEFT_HINT DOT ID EQUAL leftHintValue = ident #leftHint
+ | rightHintKey = RIGHT_HINT DOT ID EQUAL rightHintValue = ident #rightHint
+ ;
+
+renameClasue
+ : orignalField = wcFieldExpression AS renamedField = wcFieldExpression
+ ;
+
+byClause
+ : BY fieldList
+ ;
+
+statsByClause
+ : BY fieldList
+ | BY bySpanClause
+ | BY bySpanClause COMMA fieldList
+ ;
+
+bySpanClause
+ : spanClause (AS alias = qualifiedName)?
+ ;
+
+spanClause
+ : SPAN LT_PRTHS fieldExpression COMMA value = literalValue (unit = timespanUnit)? RT_PRTHS
+ ;
+
+sortbyClause
+ : sortField (COMMA sortField)*
+ ;
+
+evalClause
+ : fieldExpression EQUAL expression
+ | geoipCommand
+ ;
+
+geoipCommand
+ : fieldExpression EQUAL GEOIP LT_PRTHS ipAddress = functionArg (COMMA properties = geoIpPropertyList)? RT_PRTHS
+ ;
+
+// aggregation terms
+statsAggTerm
+ : statsFunction (AS alias = wcFieldExpression)?
+ ;
+
+// aggregation functions
+statsFunction
+ : statsFunctionName LT_PRTHS valueExpression RT_PRTHS # statsFunctionCall
+ | COUNT LT_PRTHS RT_PRTHS # countAllFunctionCall
+ | (DISTINCT_COUNT | DC | DISTINCT_COUNT_APPROX) LT_PRTHS valueExpression RT_PRTHS # distinctCountFunctionCall
+ | percentileFunctionName = (PERCENTILE | PERCENTILE_APPROX) LT_PRTHS valueExpression COMMA percent = integerLiteral RT_PRTHS # percentileFunctionCall
+ ;
+
+statsFunctionName
+ : AVG
+ | COUNT
+ | SUM
+ | MIN
+ | MAX
+ | STDDEV_SAMP
+ | STDDEV_POP
+ ;
+
+// expressions
+expression
+ : logicalExpression
+ | valueExpression
+ ;
+
+logicalExpression
+ : NOT logicalExpression # logicalNot
+ | LT_PRTHS logicalExpression RT_PRTHS # parentheticLogicalExpr
+ | comparisonExpression # comparsion
+ | left = logicalExpression (AND)? right = logicalExpression # logicalAnd
+ | left = logicalExpression OR right = logicalExpression # logicalOr
+ | left = logicalExpression XOR right = logicalExpression # logicalXor
+ | booleanExpression # booleanExpr
+ ;
+
+comparisonExpression
+ : left = valueExpression comparisonOperator right = valueExpression # compareExpr
+ | valueExpression NOT? IN valueList # inExpr
+ | expr1 = functionArg NOT? BETWEEN expr2 = functionArg AND expr3 = functionArg # between
+ ;
+
+valueExpressionList
+ : valueExpression
+ | LT_PRTHS valueExpression (COMMA valueExpression)* RT_PRTHS
+ ;
+
+valueExpression
+ : left = valueExpression binaryOperator = (STAR | DIVIDE | MODULE) right = valueExpression # binaryArithmetic
+ | left = valueExpression binaryOperator = (PLUS | MINUS) right = valueExpression # binaryArithmetic
+ | primaryExpression # valueExpressionDefault
+ | positionFunction # positionFunctionCall
+ | caseFunction # caseExpr
+ | timestampFunction # timestampFunctionCall
+ | LT_PRTHS valueExpression RT_PRTHS # parentheticValueExpr
+ | LT_SQR_PRTHS subSearch RT_SQR_PRTHS # scalarSubqueryExpr
+ | ident ARROW expression # lambda
+ | LT_PRTHS ident (COMMA ident)+ RT_PRTHS ARROW expression # lambda
+ ;
+
+primaryExpression
+ : evalFunctionCall
+ | fieldExpression
+ | literalValue
+ | dataTypeFunctionCall
+ ;
+
+positionFunction
+ : positionFunctionName LT_PRTHS functionArg IN functionArg RT_PRTHS
+ ;
+
+booleanExpression
+ : booleanFunctionCall # booleanFunctionCallExpr
+ | valueExpressionList NOT? IN LT_SQR_PRTHS subSearch RT_SQR_PRTHS # inSubqueryExpr
+ | EXISTS LT_SQR_PRTHS subSearch RT_SQR_PRTHS # existsSubqueryExpr
+ | cidrMatchFunctionCall # cidrFunctionCallExpr
+ ;
+
+ caseFunction
+ : CASE LT_PRTHS logicalExpression COMMA valueExpression (COMMA logicalExpression COMMA valueExpression)* (ELSE valueExpression)? RT_PRTHS
+ ;
+
+relevanceExpression
+ : singleFieldRelevanceFunction
+ | multiFieldRelevanceFunction
+ ;
+
+// Field is a single column
+singleFieldRelevanceFunction
+ : singleFieldRelevanceFunctionName LT_PRTHS field = relevanceField COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS
+ ;
+
+// Field is a list of columns
+multiFieldRelevanceFunction
+ : multiFieldRelevanceFunctionName LT_PRTHS LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA query = relevanceQuery (COMMA relevanceArg)* RT_PRTHS
+ ;
+
+// tables
+tableSource
+ : tableQualifiedName
+ | ID_DATE_SUFFIX
+ ;
+
+tableFunction
+ : qualifiedName LT_PRTHS functionArgs RT_PRTHS
+ ;
+
+// fields
+fieldList
+ : fieldExpression (COMMA fieldExpression)*
+ ;
+
+wcFieldList
+ : wcFieldExpression (COMMA wcFieldExpression)*
+ ;
+
+sortField
+ : (PLUS | MINUS)? sortFieldExpression
+ ;
+
+sortFieldExpression
+ : fieldExpression
+
+ // TODO #963: Implement 'num', 'str', and 'ip' sort syntax
+ | AUTO LT_PRTHS fieldExpression RT_PRTHS
+ | STR LT_PRTHS fieldExpression RT_PRTHS
+ | IP LT_PRTHS fieldExpression RT_PRTHS
+ | NUM LT_PRTHS fieldExpression RT_PRTHS
+ ;
+
+fieldExpression
+ : qualifiedName
+ ;
+
+wcFieldExpression
+ : wcQualifiedName
+ ;
+
+// functions
+evalFunctionCall
+ : evalFunctionName LT_PRTHS functionArgs RT_PRTHS
+ ;
+
+// cast function
+dataTypeFunctionCall
+ : CAST LT_PRTHS expression AS convertedDataType RT_PRTHS
+ ;
+
+// boolean functions
+booleanFunctionCall
+ : conditionFunctionBase LT_PRTHS functionArgs RT_PRTHS
+ ;
+
+cidrMatchFunctionCall
+ : CIDRMATCH LT_PRTHS ipAddress = functionArg COMMA cidrBlock = functionArg RT_PRTHS
+ ;
+
+convertedDataType
+ : typeName = DATE
+ | typeName = TIME
+ | typeName = TIMESTAMP
+ | typeName = INT
+ | typeName = INTEGER
+ | typeName = DOUBLE
+ | typeName = LONG
+ | typeName = FLOAT
+ | typeName = STRING
+ | typeName = BOOLEAN
+ ;
+
+evalFunctionName
+ : mathematicalFunctionName
+ | dateTimeFunctionName
+ | textFunctionName
+ | conditionFunctionBase
+ | systemFunctionName
+ | positionFunctionName
+ | coalesceFunctionName
+ | cryptographicFunctionName
+ | jsonFunctionName
+ | collectionFunctionName
+ | lambdaFunctionName
+ ;
+
+functionArgs
+ : (functionArg (COMMA functionArg)*)?
+ ;
+
+functionArg
+ : (ident EQUAL)? valueExpression
+ ;
+
+relevanceArg
+ : relevanceArgName EQUAL relevanceArgValue
+ ;
+
+relevanceArgName
+ : ALLOW_LEADING_WILDCARD
+ | ANALYZER
+ | ANALYZE_WILDCARD
+ | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY
+ | BOOST
+ | CUTOFF_FREQUENCY
+ | DEFAULT_FIELD
+ | DEFAULT_OPERATOR
+ | ENABLE_POSITION_INCREMENTS
+ | ESCAPE
+ | FIELDS
+ | FLAGS
+ | FUZZINESS
+ | FUZZY_MAX_EXPANSIONS
+ | FUZZY_PREFIX_LENGTH
+ | FUZZY_REWRITE
+ | FUZZY_TRANSPOSITIONS
+ | LENIENT
+ | LOW_FREQ_OPERATOR
+ | MAX_DETERMINIZED_STATES
+ | MAX_EXPANSIONS
+ | MINIMUM_SHOULD_MATCH
+ | OPERATOR
+ | PHRASE_SLOP
+ | PREFIX_LENGTH
+ | QUOTE_ANALYZER
+ | QUOTE_FIELD_SUFFIX
+ | REWRITE
+ | SLOP
+ | TIE_BREAKER
+ | TIME_ZONE
+ | TYPE
+ | ZERO_TERMS_QUERY
+ ;
+
+relevanceFieldAndWeight
+ : field = relevanceField
+ | field = relevanceField weight = relevanceFieldWeight
+ | field = relevanceField BIT_XOR_OP weight = relevanceFieldWeight
+ ;
+
+relevanceFieldWeight
+ : integerLiteral
+ | decimalLiteral
+ ;
+
+relevanceField
+ : qualifiedName
+ | stringLiteral
+ ;
+
+relevanceQuery
+ : relevanceArgValue
+ ;
+
+relevanceArgValue
+ : qualifiedName
+ | literalValue
+ ;
+
+mathematicalFunctionName
+ : ABS
+ | CBRT
+ | CEIL
+ | CEILING
+ | CONV
+ | CRC32
+ | E
+ | EXP
+ | FLOOR
+ | LN
+ | LOG
+ | LOG10
+ | LOG2
+ | MOD
+ | PI
+ | POW
+ | POWER
+ | RAND
+ | ROUND
+ | SIGN
+ | SIGNUM
+ | SQRT
+ | TRUNCATE
+ | trigonometricFunctionName
+ ;
+
+trigonometricFunctionName
+ : ACOS
+ | ASIN
+ | ATAN
+ | ATAN2
+ | COS
+ | COT
+ | DEGREES
+ | RADIANS
+ | SIN
+ | TAN
+ ;
+
+cryptographicFunctionName
+ : MD5
+ | SHA1
+ | SHA2
+ ;
+
+dateTimeFunctionName
+ : ADDDATE
+ | ADDTIME
+ | CONVERT_TZ
+ | CURDATE
+ | CURRENT_DATE
+ | CURRENT_TIME
+ | CURRENT_TIMESTAMP
+ | CURRENT_TIMEZONE
+ | CURTIME
+ | DATE
+ | DATEDIFF
+ | DATETIME
+ | DATE_ADD
+ | DATE_FORMAT
+ | DATE_SUB
+ | DAY
+ | DAYNAME
+ | DAYOFMONTH
+ | DAYOFWEEK
+ | DAYOFYEAR
+ | DAY_OF_MONTH
+ | DAY_OF_WEEK
+ | DAY_OF_YEAR
+ | FROM_DAYS
+ | FROM_UNIXTIME
+ | HOUR
+ | HOUR_OF_DAY
+ | LAST_DAY
+ | LOCALTIME
+ | LOCALTIMESTAMP
+ | MAKEDATE
+ | MAKE_DATE
+ | MAKETIME
+ | MICROSECOND
+ | MINUTE
+ | MINUTE_OF_DAY
+ | MINUTE_OF_HOUR
+ | MONTH
+ | MONTHNAME
+ | MONTH_OF_YEAR
+ | NOW
+ | PERIOD_ADD
+ | PERIOD_DIFF
+ | QUARTER
+ | SECOND
+ | SECOND_OF_MINUTE
+ | SEC_TO_TIME
+ | STR_TO_DATE
+ | SUBDATE
+ | SUBTIME
+ | SYSDATE
+ | TIME
+ | TIMEDIFF
+ | TIMESTAMP
+ | TIME_FORMAT
+ | TIME_TO_SEC
+ | TO_DAYS
+ | TO_SECONDS
+ | UNIX_TIMESTAMP
+ | UTC_DATE
+ | UTC_TIME
+ | UTC_TIMESTAMP
+ | WEEK
+ | WEEKDAY
+ | WEEK_OF_YEAR
+ | YEAR
+ | YEARWEEK
+ | relativeTimeFunctionName
+ ;
+
+relativeTimeFunctionName
+ : RELATIVE_TIMESTAMP
+ | EARLIEST
+ | LATEST
+ ;
+
+getFormatFunction
+ : GET_FORMAT LT_PRTHS getFormatType COMMA functionArg RT_PRTHS
+ ;
+
+getFormatType
+ : DATE
+ | DATETIME
+ | TIME
+ | TIMESTAMP
+ ;
+
+extractFunction
+ : EXTRACT LT_PRTHS datetimePart FROM functionArg RT_PRTHS
+ ;
+
+simpleDateTimePart
+ : MICROSECOND
+ | SECOND
+ | MINUTE
+ | HOUR
+ | DAY
+ | WEEK
+ | MONTH
+ | QUARTER
+ | YEAR
+ ;
+
+complexDateTimePart
+ : SECOND_MICROSECOND
+ | MINUTE_MICROSECOND
+ | MINUTE_SECOND
+ | HOUR_MICROSECOND
+ | HOUR_SECOND
+ | HOUR_MINUTE
+ | DAY_MICROSECOND
+ | DAY_SECOND
+ | DAY_MINUTE
+ | DAY_HOUR
+ | YEAR_MONTH
+ ;
+
+datetimePart
+ : simpleDateTimePart
+ | complexDateTimePart
+ ;
+
+timestampFunction
+ : timestampFunctionName LT_PRTHS simpleDateTimePart COMMA firstArg = functionArg COMMA secondArg = functionArg RT_PRTHS
+ ;
+
+timestampFunctionName
+ : TIMESTAMPADD
+ | TIMESTAMPDIFF
+ ;
+
+// condition function return boolean value
+conditionFunctionBase
+ : LIKE
+ | IF
+ | ISNULL
+ | ISNOTNULL
+ | IFNULL
+ | NULLIF
+ | ISPRESENT
+ | JSON_VALID
+ | EARLIEST
+ | LATEST
+ | ISEMPTY
+ | ISBLANK
+ ;
+
+systemFunctionName
+ : TYPEOF
+ ;
+
+textFunctionName
+ : SUBSTR
+ | SUBSTRING
+ | TRIM
+ | LTRIM
+ | RTRIM
+ | LOWER
+ | UPPER
+ | CONCAT
+ | CONCAT_WS
+ | LENGTH
+ | STRCMP
+ | RIGHT
+ | LEFT
+ | ASCII
+ | LOCATE
+ | REPLACE
+ | REVERSE
+ | ISEMPTY
+ | ISBLANK
+ ;
+
+jsonFunctionName
+ : JSON
+ | JSON_OBJECT
+ | JSON_ARRAY
+ | JSON_ARRAY_LENGTH
+ | TO_JSON_STRING
+ | JSON_EXTRACT
+ | JSON_DELETE
+ | JSON_APPEND
+ | JSON_KEYS
+ | JSON_VALID
+ | JSON_EXTEND
+ | JSON_SET
+// | JSON_ARRAY_ALL_MATCH
+// | JSON_ARRAY_ANY_MATCH
+// | JSON_ARRAY_FILTER
+// | JSON_ARRAY_MAP
+// | JSON_ARRAY_REDUCE
+ ;
+
+collectionFunctionName
+ : ARRAY
+ | ARRAY_LENGTH
+ ;
+
+lambdaFunctionName
+ : FORALL
+ | EXISTS
+ | FILTER
+ | TRANSFORM
+ | REDUCE
+ ;
+
+positionFunctionName
+ : POSITION
+ ;
+
+coalesceFunctionName
+ : COALESCE
+ ;
+
+geoIpPropertyList
+ : geoIpProperty (COMMA geoIpProperty)*
+ ;
+
+geoIpProperty
+ : COUNTRY_ISO_CODE
+ | COUNTRY_NAME
+ | CONTINENT_NAME
+ | REGION_ISO_CODE
+ | REGION_NAME
+ | CITY_NAME
+ | TIME_ZONE
+ | LOCATION
+ ;
+
+// operators
+ comparisonOperator
+ : EQUAL
+ | NOT_EQUAL
+ | LESS
+ | NOT_LESS
+ | GREATER
+ | NOT_GREATER
+ | REGEXP
+ ;
+
+singleFieldRelevanceFunctionName
+ : MATCH
+ | MATCH_PHRASE
+ | MATCH_BOOL_PREFIX
+ | MATCH_PHRASE_PREFIX
+ ;
+
+multiFieldRelevanceFunctionName
+ : SIMPLE_QUERY_STRING
+ | MULTI_MATCH
+ | QUERY_STRING
+ ;
+
+// literals and values
+literalValue
+ : stringLiteral
+ | integerLiteral
+ | decimalLiteral
+ | booleanLiteral
+ | datetimeLiteral //#datetime
+ | intervalLiteral
+ ;
+
+intervalLiteral
+ : INTERVAL valueExpression intervalUnit
+ ;
+
+stringLiteral
+ : DQUOTA_STRING
+ | SQUOTA_STRING
+ ;
+
+integerLiteral
+ : (PLUS | MINUS)? INTEGER_LITERAL
+ ;
+
+decimalLiteral
+ : (PLUS | MINUS)? DECIMAL_LITERAL
+ ;
+
+booleanLiteral
+ : TRUE
+ | FALSE
+ ;
+
+// Date and Time Literal, follow ANSI 92
+datetimeLiteral
+ : dateLiteral
+ | timeLiteral
+ | timestampLiteral
+ ;
+
+dateLiteral
+ : DATE date = stringLiteral
+ ;
+
+timeLiteral
+ : TIME time = stringLiteral
+ ;
+
+timestampLiteral
+ : TIMESTAMP timestamp = stringLiteral
+ ;
+
+intervalUnit
+ : MICROSECOND
+ | SECOND
+ | MINUTE
+ | HOUR
+ | DAY
+ | WEEK
+ | MONTH
+ | QUARTER
+ | YEAR
+ | SECOND_MICROSECOND
+ | MINUTE_MICROSECOND
+ | MINUTE_SECOND
+ | HOUR_MICROSECOND
+ | HOUR_SECOND
+ | HOUR_MINUTE
+ | DAY_MICROSECOND
+ | DAY_SECOND
+ | DAY_MINUTE
+ | DAY_HOUR
+ | YEAR_MONTH
+ ;
+
+timespanUnit
+ : MS
+ | S
+ | M
+ | H
+ | D
+ | W
+ | Q
+ | Y
+ | MILLISECOND
+ | SECOND
+ | MINUTE
+ | HOUR
+ | DAY
+ | WEEK
+ | MONTH
+ | QUARTER
+ | YEAR
+ ;
+
+valueList
+ : LT_PRTHS literalValue (COMMA literalValue)* RT_PRTHS
+ ;
+
+qualifiedName
+ : ident (DOT ident)* # identsAsQualifiedName
+ ;
+
+identifierSeq
+ : qualifiedName (COMMA qualifiedName)* # identsAsQualifiedNameSeq
+ | LT_PRTHS qualifiedName (COMMA qualifiedName)* RT_PRTHS # identsAsQualifiedNameSeq
+ ;
+
+tableQualifiedName
+ : tableIdent (DOT ident)* # identsAsTableQualifiedName
+ ;
+
+wcQualifiedName
+ : wildcard (DOT wildcard)* # identsAsWildcardQualifiedName
+ ;
+
+ident
+ : (DOT)? ID
+ | BACKTICK ident BACKTICK
+ | BQUOTA_STRING
+ | keywordsCanBeId
+ ;
+
+tableIdent
+ : (CLUSTER)? ident
+ ;
+
+wildcard
+ : ident (MODULE ident)* (MODULE)?
+ | SINGLE_QUOTE wildcard SINGLE_QUOTE
+ | DOUBLE_QUOTE wildcard DOUBLE_QUOTE
+ | BACKTICK wildcard BACKTICK
+ ;
+
+keywordsCanBeId
+ : D // OD SQL and ODBC special
+ | timespanUnit
+ | SPAN
+ | evalFunctionName
+ | relevanceArgName
+ | intervalUnit
+ | dateTimeFunctionName
+ | textFunctionName
+ | jsonFunctionName
+ | mathematicalFunctionName
+ | positionFunctionName
+ | cryptographicFunctionName
+ | singleFieldRelevanceFunctionName
+ | multiFieldRelevanceFunctionName
+ | commandName
+ | comparisonOperator
+ | explainMode
+ | correlationType
+ | geoIpProperty
+ // commands assist keywords
+ | GEOIP
+ | OVERRIDE
+ | ARROW
+ | IN
+ | SOURCE
+ | INDEX
+ | DESC
+ | DATASOURCES
+ | FROM
+ | PATTERN
+ | NEW_FIELD
+ | SCOPE
+ | MAPPING
+ | WITH
+ | USING
+ | CAST
+ | GET_FORMAT
+ | EXTRACT
+ | INTERVAL
+ | PLUS
+ | MINUS
+ | INCLUDEFIELDS
+ | NULLS
+ // ARGUMENT KEYWORDS
+ | KEEPEMPTY
+ | CONSECUTIVE
+ | DEDUP_SPLITVALUES
+ | PARTITIONS
+ | ALLNUM
+ | DELIM
+ | CENTROIDS
+ | ITERATIONS
+ | DISTANCE_TYPE
+ | NUMBER_OF_TREES
+ | SHINGLE_SIZE
+ | SAMPLE_SIZE
+ | OUTPUT_AFTER
+ | TIME_DECAY
+ | ANOMALY_RATE
+ | CATEGORY_FIELD
+ | TIME_FIELD
+ | TIME_ZONE
+ | TRAINING_DATA_SIZE
+ | ANOMALY_SCORE_THRESHOLD
+ // AGGREGATIONS
+ | statsFunctionName
+ | DISTINCT_COUNT
+ | DISTINCT_COUNT_APPROX
+ | PERCENTILE
+ | PERCENTILE_APPROX
+ | ESTDC
+ | ESTDC_ERROR
+ | MEAN
+ | MEDIAN
+ | MODE
+ | RANGE
+ | STDEV
+ | STDEVP
+ | SUMSQ
+ | VAR_SAMP
+ | VAR_POP
+ | TAKE
+ | FIRST
+ | LAST
+ | LIST
+ | VALUES
+ | PER_DAY
+ | PER_HOUR
+ | PER_MINUTE
+ | PER_SECOND
+ | RATE
+ | SPARKLINE
+ | C
+ | DC
+ // JOIN TYPE
+ | OUTER
+ | INNER
+ | CROSS
+ | LEFT
+ | RIGHT
+ | FULL
+ | SEMI
+ | ANTI
+ | BETWEEN
+ | CIDRMATCH
+ | trendlineType
+ // SORT FIELD KEYWORDS
+ | AUTO
+ | STR
+ | IP
+ | NUM
+ ;
diff --git a/language-grammar/src/main/antlr4/OpenSearchSQLLexer.g4 b/language-grammar/src/main/antlr4/OpenSearchSQLLexer.g4
new file mode 100644
index 00000000000..ba7c5be85ab
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchSQLLexer.g4
@@ -0,0 +1,484 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+MySQL (Positive Technologies) grammar
+The MIT License (MIT).
+Copyright (c) 2015-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies.
+Copyright (c) 2017, Ivan Khudyashev (IHudyashov@ptsecurity.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+lexer grammar OpenSearchSQLLexer;
+
+channels { SQLCOMMENT, ERRORCHANNEL }
+
+
+// SKIP
+
+SPACE: [ \t\r\n]+ -> channel(HIDDEN);
+SPEC_SQL_COMMENT: '/*!' .+? '*/' -> channel(SQLCOMMENT);
+COMMENT_INPUT: '/*' .*? '*/' -> channel(HIDDEN);
+LINE_COMMENT: (
+ ('-- ' | '#') ~[\r\n]* ('\r'? '\n' | EOF)
+ | '--' ('\r'? '\n' | EOF)
+ ) -> channel(HIDDEN);
+
+
+// Keywords
+// Common Keywords
+
+ALL: 'ALL';
+AND: 'AND';
+AS: 'AS';
+ASC: 'ASC';
+BOOLEAN: 'BOOLEAN';
+BETWEEN: 'BETWEEN';
+BY: 'BY';
+CASE: 'CASE';
+CAST: 'CAST';
+CROSS: 'CROSS';
+COLUMNS: 'COLUMNS';
+DATETIME: 'DATETIME';
+DELETE: 'DELETE';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DISTINCT: 'DISTINCT';
+DOUBLE: 'DOUBLE';
+ELSE: 'ELSE';
+EXISTS: 'EXISTS';
+FALSE: 'FALSE';
+FLOAT: 'FLOAT';
+FIRST: 'FIRST';
+FROM: 'FROM';
+GROUP: 'GROUP';
+HAVING: 'HAVING';
+IN: 'IN';
+INNER: 'INNER';
+INT: 'INT';
+INTEGER: 'INTEGER';
+IS: 'IS';
+JOIN: 'JOIN';
+LAST: 'LAST';
+LEFT: 'LEFT';
+LIKE: 'LIKE';
+LIMIT: 'LIMIT';
+LONG: 'LONG';
+MATCH: 'MATCH';
+NATURAL: 'NATURAL';
+MISSING_LITERAL: 'MISSING';
+NOT: 'NOT';
+NULL_LITERAL: 'NULL';
+NULLS: 'NULLS';
+ON: 'ON';
+OR: 'OR';
+ORDER: 'ORDER';
+OUTER: 'OUTER';
+OVER: 'OVER';
+PARTITION: 'PARTITION';
+REGEXP: 'REGEXP';
+RIGHT: 'RIGHT';
+SELECT: 'SELECT';
+SHOW: 'SHOW';
+STRING: 'STRING';
+THEN: 'THEN';
+TRUE: 'TRUE';
+UNION: 'UNION';
+USING: 'USING';
+WHEN: 'WHEN';
+WHERE: 'WHERE';
+
+
+// OD SQL special keyword
+MISSING: 'MISSING';
+EXCEPT: 'MINUS';
+
+
+// Group function Keywords
+
+AVG: 'AVG';
+COUNT: 'COUNT';
+MAX: 'MAX';
+MIN: 'MIN';
+SUM: 'SUM';
+VAR_POP: 'VAR_POP';
+VAR_SAMP: 'VAR_SAMP';
+VARIANCE: 'VARIANCE';
+STD: 'STD';
+STDDEV: 'STDDEV';
+STDDEV_POP: 'STDDEV_POP';
+STDDEV_SAMP: 'STDDEV_SAMP';
+
+
+// Common function Keywords
+
+SUBSTRING: 'SUBSTRING';
+TRIM: 'TRIM';
+
+// Keywords, but can be ID
+// Common Keywords, but can be ID
+
+END: 'END';
+FULL: 'FULL';
+OFFSET: 'OFFSET';
+
+// INTERVAL AND UNIT KEYWORDS
+INTERVAL: 'INTERVAL';
+MICROSECOND: 'MICROSECOND';
+SECOND: 'SECOND';
+MINUTE: 'MINUTE';
+HOUR: 'HOUR';
+DAY: 'DAY';
+WEEK: 'WEEK';
+MONTH: 'MONTH';
+QUARTER: 'QUARTER';
+YEAR: 'YEAR';
+SECOND_MICROSECOND: 'SECOND_MICROSECOND';
+MINUTE_MICROSECOND: 'MINUTE_MICROSECOND';
+MINUTE_SECOND: 'MINUTE_SECOND';
+HOUR_MICROSECOND: 'HOUR_MICROSECOND';
+HOUR_SECOND: 'HOUR_SECOND';
+HOUR_MINUTE: 'HOUR_MINUTE';
+DAY_MICROSECOND: 'DAY_MICROSECOND';
+DAY_SECOND: 'DAY_SECOND';
+DAY_MINUTE: 'DAY_MINUTE';
+DAY_HOUR: 'DAY_HOUR';
+YEAR_MONTH: 'YEAR_MONTH';
+
+
+// PRIVILEGES
+
+TABLES: 'TABLES';
+
+
+// Common function names
+
+ABS: 'ABS';
+ACOS: 'ACOS';
+ADD: 'ADD';
+ADDTIME: 'ADDTIME';
+ASCII: 'ASCII';
+ASIN: 'ASIN';
+ATAN: 'ATAN';
+ATAN2: 'ATAN2';
+CBRT: 'CBRT';
+CEIL: 'CEIL';
+CEILING: 'CEILING';
+CONCAT: 'CONCAT';
+CONCAT_WS: 'CONCAT_WS';
+CONV: 'CONV';
+CONVERT_TZ: 'CONVERT_TZ';
+COS: 'COS';
+COSH: 'COSH';
+COT: 'COT';
+CRC32: 'CRC32';
+CURDATE: 'CURDATE';
+CURTIME: 'CURTIME';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+DATE: 'DATE';
+DATE_ADD: 'DATE_ADD';
+DATE_FORMAT: 'DATE_FORMAT';
+DATE_SUB: 'DATE_SUB';
+DATEDIFF: 'DATEDIFF';
+DAYNAME: 'DAYNAME';
+DAYOFMONTH: 'DAYOFMONTH';
+DAYOFWEEK: 'DAYOFWEEK';
+DAYOFYEAR: 'DAYOFYEAR';
+DEGREES: 'DEGREES';
+DIVIDE: 'DIVIDE';
+E: 'E';
+EXP: 'EXP';
+EXPM1: 'EXPM1';
+EXTRACT: 'EXTRACT';
+FLOOR: 'FLOOR';
+FROM_DAYS: 'FROM_DAYS';
+FROM_UNIXTIME: 'FROM_UNIXTIME';
+GET_FORMAT: 'GET_FORMAT';
+IF: 'IF';
+IFNULL: 'IFNULL';
+ISNULL: 'ISNULL';
+LAST_DAY: 'LAST_DAY';
+LENGTH: 'LENGTH';
+LN: 'LN';
+LOCALTIME: 'LOCALTIME';
+LOCALTIMESTAMP: 'LOCALTIMESTAMP';
+LOCATE: 'LOCATE';
+LOG: 'LOG';
+LOG10: 'LOG10';
+LOG2: 'LOG2';
+LOWER: 'LOWER';
+LTRIM: 'LTRIM';
+MAKEDATE: 'MAKEDATE';
+MAKETIME: 'MAKETIME';
+MODULUS: 'MODULUS';
+MONTHNAME: 'MONTHNAME';
+MULTIPLY: 'MULTIPLY';
+NOW: 'NOW';
+NULLIF: 'NULLIF';
+PERIOD_ADD: 'PERIOD_ADD';
+PERIOD_DIFF: 'PERIOD_DIFF';
+PI: 'PI';
+POSITION: 'POSITION';
+POW: 'POW';
+POWER: 'POWER';
+RADIANS: 'RADIANS';
+RAND: 'RAND';
+REPLACE: 'REPLACE';
+RINT: 'RINT';
+ROUND: 'ROUND';
+RTRIM: 'RTRIM';
+REVERSE: 'REVERSE';
+SEC_TO_TIME: 'SEC_TO_TIME';
+SIGN: 'SIGN';
+SIGNUM: 'SIGNUM';
+SIN: 'SIN';
+SINH: 'SINH';
+SQRT: 'SQRT';
+STR_TO_DATE: 'STR_TO_DATE';
+SUBDATE: 'SUBDATE';
+SUBTIME: 'SUBTIME';
+SUBTRACT: 'SUBTRACT';
+SYSDATE: 'SYSDATE';
+TAN: 'TAN';
+TIME: 'TIME';
+TIMEDIFF: 'TIMEDIFF';
+TIME_FORMAT: 'TIME_FORMAT';
+TIME_TO_SEC: 'TIME_TO_SEC';
+TIMESTAMP: 'TIMESTAMP';
+TRUNCATE: 'TRUNCATE';
+TO_DAYS: 'TO_DAYS';
+TO_SECONDS: 'TO_SECONDS';
+UNIX_TIMESTAMP: 'UNIX_TIMESTAMP';
+UPPER: 'UPPER';
+UTC_DATE: 'UTC_DATE';
+UTC_TIME: 'UTC_TIME';
+UTC_TIMESTAMP: 'UTC_TIMESTAMP';
+
+D: 'D';
+T: 'T';
+TS: 'TS';
+LEFT_BRACE: '{';
+RIGHT_BRACE: '}';
+
+
+// Window function names
+DENSE_RANK: 'DENSE_RANK';
+RANK: 'RANK';
+ROW_NUMBER: 'ROW_NUMBER';
+
+// OD SQL special functions
+DATE_HISTOGRAM: 'DATE_HISTOGRAM';
+DAY_OF_MONTH: 'DAY_OF_MONTH';
+DAY_OF_YEAR: 'DAY_OF_YEAR';
+DAY_OF_WEEK: 'DAY_OF_WEEK';
+EXCLUDE: 'EXCLUDE';
+EXTENDED_STATS: 'EXTENDED_STATS';
+FIELD: 'FIELD';
+FILTER: 'FILTER';
+GEO_BOUNDING_BOX: 'GEO_BOUNDING_BOX';
+GEO_CELL: 'GEO_CELL';
+GEO_DISTANCE: 'GEO_DISTANCE';
+GEO_DISTANCE_RANGE: 'GEO_DISTANCE_RANGE';
+GEO_INTERSECTS: 'GEO_INTERSECTS';
+GEO_POLYGON: 'GEO_POLYGON';
+HISTOGRAM: 'HISTOGRAM';
+HOUR_OF_DAY: 'HOUR_OF_DAY';
+INCLUDE: 'INCLUDE';
+IN_TERMS: 'IN_TERMS';
+MATCHPHRASE: 'MATCHPHRASE';
+MATCH_PHRASE: 'MATCH_PHRASE';
+MATCHPHRASEQUERY: 'MATCHPHRASEQUERY';
+SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING';
+QUERY_STRING: 'QUERY_STRING';
+MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
+MATCHQUERY: 'MATCHQUERY';
+MATCH_QUERY: 'MATCH_QUERY';
+MINUTE_OF_DAY: 'MINUTE_OF_DAY';
+MINUTE_OF_HOUR: 'MINUTE_OF_HOUR';
+MONTH_OF_YEAR: 'MONTH_OF_YEAR';
+MULTIMATCH: 'MULTIMATCH';
+MULTI_MATCH: 'MULTI_MATCH';
+MULTIMATCHQUERY: 'MULTIMATCHQUERY';
+NESTED: 'NESTED';
+PERCENTILES: 'PERCENTILES';
+PERCENTILE: 'PERCENTILE';
+PERCENTILE_APPROX: 'PERCENTILE_APPROX';
+REGEXP_QUERY: 'REGEXP_QUERY';
+REVERSE_NESTED: 'REVERSE_NESTED';
+QUERY: 'QUERY';
+RANGE: 'RANGE';
+SCORE: 'SCORE';
+SCOREQUERY: 'SCOREQUERY';
+SCORE_QUERY: 'SCORE_QUERY';
+SECOND_OF_MINUTE: 'SECOND_OF_MINUTE';
+STATS: 'STATS';
+TERM: 'TERM';
+TERMS: 'TERMS';
+TIMESTAMPADD: 'TIMESTAMPADD';
+TIMESTAMPDIFF: 'TIMESTAMPDIFF';
+TOPHITS: 'TOPHITS';
+TYPEOF: 'TYPEOF';
+WEEK_OF_YEAR: 'WEEK_OF_YEAR';
+WEEKOFYEAR: 'WEEKOFYEAR';
+WEEKDAY: 'WEEKDAY';
+WILDCARDQUERY: 'WILDCARDQUERY';
+WILDCARD_QUERY: 'WILDCARD_QUERY';
+
+// TEXT FUNCTIONS
+SUBSTR: 'SUBSTR';
+STRCMP: 'STRCMP';
+
+// DATE AND TIME FUNCTIONS
+ADDDATE: 'ADDDATE';
+YEARWEEK: 'YEARWEEK';
+
+// RELEVANCE FUNCTIONS AND PARAMETERS
+ALLOW_LEADING_WILDCARD: 'ALLOW_LEADING_WILDCARD';
+ANALYZER: 'ANALYZER';
+ANALYZE_WILDCARD: 'ANALYZE_WILDCARD';
+AUTO_GENERATE_SYNONYMS_PHRASE_QUERY:'AUTO_GENERATE_SYNONYMS_PHRASE_QUERY';
+BOOST: 'BOOST';
+CASE_INSENSITIVE: 'CASE_INSENSITIVE';
+CUTOFF_FREQUENCY: 'CUTOFF_FREQUENCY';
+DEFAULT_FIELD: 'DEFAULT_FIELD';
+DEFAULT_OPERATOR: 'DEFAULT_OPERATOR';
+ESCAPE: 'ESCAPE';
+ENABLE_POSITION_INCREMENTS: 'ENABLE_POSITION_INCREMENTS';
+FIELDS: 'FIELDS';
+FLAGS: 'FLAGS';
+FUZZINESS: 'FUZZINESS';
+FUZZY_MAX_EXPANSIONS: 'FUZZY_MAX_EXPANSIONS';
+FUZZY_PREFIX_LENGTH: 'FUZZY_PREFIX_LENGTH';
+FUZZY_REWRITE: 'FUZZY_REWRITE';
+FUZZY_TRANSPOSITIONS: 'FUZZY_TRANSPOSITIONS';
+LENIENT: 'LENIENT';
+LOW_FREQ_OPERATOR: 'LOW_FREQ_OPERATOR';
+MAX_DETERMINIZED_STATES: 'MAX_DETERMINIZED_STATES';
+MAX_EXPANSIONS: 'MAX_EXPANSIONS';
+MINIMUM_SHOULD_MATCH: 'MINIMUM_SHOULD_MATCH';
+OPERATOR: 'OPERATOR';
+PHRASE_SLOP: 'PHRASE_SLOP';
+PREFIX_LENGTH: 'PREFIX_LENGTH';
+QUOTE_ANALYZER: 'QUOTE_ANALYZER';
+QUOTE_FIELD_SUFFIX: 'QUOTE_FIELD_SUFFIX';
+REWRITE: 'REWRITE';
+SLOP: 'SLOP';
+TIE_BREAKER: 'TIE_BREAKER';
+TIME_ZONE: 'TIME_ZONE';
+TYPE: 'TYPE';
+ZERO_TERMS_QUERY: 'ZERO_TERMS_QUERY';
+HIGHLIGHT: 'HIGHLIGHT';
+HIGHLIGHT_PRE_TAGS: 'PRE_TAGS';
+HIGHLIGHT_POST_TAGS: 'POST_TAGS';
+
+// RELEVANCE FUNCTIONS
+MATCH_BOOL_PREFIX: 'MATCH_BOOL_PREFIX';
+// Operators
+
+// Operators. Arithmetics
+
+STAR: '*';
+SLASH: '/';
+MODULE: '%';
+PLUS: '+';
+MINUS: '-';
+DIV: 'DIV';
+MOD: 'MOD';
+
+
+// Operators. Comparation
+
+EQUAL_SYMBOL: '=';
+GREATER_SYMBOL: '>';
+LESS_SYMBOL: '<';
+EXCLAMATION_SYMBOL: '!';
+
+
+// Operators. Bit
+
+BIT_NOT_OP: '~';
+BIT_OR_OP: '|';
+BIT_AND_OP: '&';
+BIT_XOR_OP: '^';
+
+
+// Constructors symbols
+
+DOT: '.';
+LR_BRACKET: '(';
+RR_BRACKET: ')';
+LT_SQR_PRTHS: '[';
+RT_SQR_PRTHS: ']';
+COMMA: ',';
+SEMI: ';';
+AT_SIGN: '@';
+ZERO_DECIMAL: '0';
+ONE_DECIMAL: '1';
+TWO_DECIMAL: '2';
+SINGLE_QUOTE_SYMB: '\'';
+DOUBLE_QUOTE_SYMB: '"';
+REVERSE_QUOTE_SYMB: '`';
+COLON_SYMB: ':';
+
+
+// Literal Primitives
+
+START_NATIONAL_STRING_LITERAL: 'N' SQUOTA_STRING;
+STRING_LITERAL: SQUOTA_STRING;
+DECIMAL_LITERAL: DEC_DIGIT+;
+HEXADECIMAL_LITERAL: 'X' '\'' (HEX_DIGIT HEX_DIGIT)+ '\''
+ | '0X' HEX_DIGIT+;
+
+REAL_LITERAL: (DEC_DIGIT+)? '.' DEC_DIGIT+
+ | DEC_DIGIT+ '.' EXPONENT_NUM_PART
+ | (DEC_DIGIT+)? '.' (DEC_DIGIT+ EXPONENT_NUM_PART)
+ | DEC_DIGIT+ EXPONENT_NUM_PART;
+NULL_SPEC_LITERAL: '\\' 'N';
+BIT_STRING: BIT_STRING_L;
+
+
+
+// Identifiers
+
+ID: ID_LITERAL;
+DOUBLE_QUOTE_ID: DQUOTA_STRING;
+BACKTICK_QUOTE_ID: BQUOTA_STRING;
+
+
+// Fragments for Literal primitives
+fragment EXPONENT_NUM_PART: 'E' [-+]? DEC_DIGIT+;
+fragment DQUOTA_STRING: '"' ( '\\'. | '""' | ~('"'| '\\') )* '"';
+fragment SQUOTA_STRING: '\'' ('\\'. | '\'\'' | ~('\'' | '\\'))* '\'';
+fragment BQUOTA_STRING: '`' ( '\\'. | '``' | ~('`'|'\\'))* '`';
+fragment HEX_DIGIT: [0-9A-F];
+fragment DEC_DIGIT: [0-9];
+fragment BIT_STRING_L: 'B' '\'' [01]+ '\'';
+
+// Identifiers cannot start with a single '_' since this an OpenSearch reserved
+// metadata field. Two underscores (or more) is acceptable, such as '__field'.
+fragment ID_LITERAL: ([@*A-Z_])+?[*A-Z_\-0-9]*;
+
+// Last tokens must generate Errors
+
+ERROR_RECOGNITION: . -> channel(ERRORCHANNEL);
diff --git a/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4 b/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4
new file mode 100644
index 00000000000..5f7361160b3
--- /dev/null
+++ b/language-grammar/src/main/antlr4/OpenSearchSQLParser.g4
@@ -0,0 +1,844 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+MySQL (Positive Technologies) grammar
+The MIT License (MIT).
+Copyright (c) 2015-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies.
+Copyright (c) 2017, Ivan Khudyashev (IHudyashov@ptsecurity.com)
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+*/
+
+parser grammar OpenSearchSQLParser;
+
+
+options { tokenVocab = OpenSearchSQLLexer; }
+// Top Level Description
+
+// Root rule
+
+root
+ : sqlStatement? SEMI? EOF
+ ;
+
+// Only SELECT
+sqlStatement
+ : dmlStatement
+ | adminStatement
+ ;
+
+dmlStatement
+ : selectStatement
+ ;
+
+// Data Manipulation Language
+
+// Primary DML Statements
+selectStatement
+ : querySpecification # simpleSelect
+ ;
+
+adminStatement
+ : showStatement
+ | describeStatement
+ ;
+
+showStatement
+ : SHOW TABLES tableFilter
+ ;
+
+describeStatement
+ : DESCRIBE TABLES tableFilter columnFilter?
+ ;
+
+columnFilter
+ : COLUMNS LIKE showDescribePattern
+ ;
+
+tableFilter
+ : LIKE showDescribePattern
+ ;
+
+showDescribePattern
+ : stringLiteral
+ ;
+
+// Select Statement's Details
+querySpecification
+ : selectClause fromClause? limitClause?
+ ;
+
+selectClause
+ : SELECT selectSpec? selectElements
+ ;
+
+selectSpec
+ : (ALL | DISTINCT)
+ ;
+
+selectElements
+ : (star = STAR | selectElement) (COMMA selectElement)*
+ ;
+
+selectElement
+ : expression (AS? alias)?
+ ;
+
+fromClause
+ : FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY
+
+ ;
+
+relation
+ : tableName (AS? alias)? # tableAsRelation
+ | LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation
+ ;
+
+whereClause
+ : WHERE expression
+ ;
+
+groupByClause
+ : GROUP BY groupByElements
+ ;
+
+groupByElements
+ : groupByElement (COMMA groupByElement)*
+ ;
+
+groupByElement
+ : expression
+ ;
+
+havingClause
+ : HAVING expression
+ ;
+
+orderByClause
+ : ORDER BY orderByElement (COMMA orderByElement)*
+ ;
+
+orderByElement
+ : expression order = (ASC | DESC)? (NULLS (FIRST | LAST))?
+ ;
+
+limitClause
+ : LIMIT (offset = decimalLiteral COMMA)? limit = decimalLiteral
+ | LIMIT limit = decimalLiteral OFFSET offset = decimalLiteral
+ ;
+
+// Window Function's Details
+windowFunctionClause
+ : function = windowFunction overClause
+ ;
+
+windowFunction
+ : functionName = (ROW_NUMBER | RANK | DENSE_RANK) LR_BRACKET functionArgs? RR_BRACKET # scalarWindowFunction
+ | aggregateFunction # aggregateWindowFunction
+ ;
+
+overClause
+ : OVER LR_BRACKET partitionByClause? orderByClause? RR_BRACKET
+ ;
+
+partitionByClause
+ : PARTITION BY expression (COMMA expression)*
+ ;
+
+// Literals
+constant
+ : stringLiteral # string
+ | sign? decimalLiteral # signedDecimal
+ | sign? realLiteral # signedReal
+ | booleanLiteral # boolean
+ | datetimeLiteral # datetime
+ | intervalLiteral # interval
+ | nullLiteral # null
+ // Doesn't support the following types for now
+ //| BIT_STRING
+ //| NOT? nullLiteral=(NULL_LITERAL | NULL_SPEC_LITERAL)
+ ;
+
+decimalLiteral
+ : DECIMAL_LITERAL
+ | ZERO_DECIMAL
+ | ONE_DECIMAL
+ | TWO_DECIMAL
+ ;
+
+numericLiteral
+ : decimalLiteral
+ | realLiteral
+ ;
+
+stringLiteral
+ : STRING_LITERAL
+ | DOUBLE_QUOTE_ID
+ ;
+
+booleanLiteral
+ : TRUE
+ | FALSE
+ ;
+
+realLiteral
+ : REAL_LITERAL
+ ;
+
+sign
+ : PLUS
+ | MINUS
+ ;
+
+nullLiteral
+ : NULL_LITERAL
+ ;
+
+// Date and Time Literal, follow ANSI 92
+datetimeLiteral
+ : dateLiteral
+ | timeLiteral
+ | timestampLiteral
+ ;
+
+dateLiteral
+ : DATE date = stringLiteral
+ | LEFT_BRACE (DATE | D) date = stringLiteral RIGHT_BRACE
+ ;
+
+timeLiteral
+ : TIME time = stringLiteral
+ | LEFT_BRACE (TIME | T) time = stringLiteral RIGHT_BRACE
+ ;
+
+timestampLiteral
+ : TIMESTAMP timestamp = stringLiteral
+ | LEFT_BRACE (TIMESTAMP | TS) timestamp = stringLiteral RIGHT_BRACE
+ ;
+
+// Actually, these constants are shortcuts to the corresponding functions
+datetimeConstantLiteral
+ : CURRENT_DATE
+ | CURRENT_TIME
+ | CURRENT_TIMESTAMP
+ | LOCALTIME
+ | LOCALTIMESTAMP
+ | UTC_TIMESTAMP
+ | UTC_DATE
+ | UTC_TIME
+ ;
+
+intervalLiteral
+ : INTERVAL expression intervalUnit
+ ;
+
+intervalUnit
+ : MICROSECOND
+ | SECOND
+ | MINUTE
+ | HOUR
+ | DAY
+ | WEEK
+ | MONTH
+ | QUARTER
+ | YEAR
+ | SECOND_MICROSECOND
+ | MINUTE_MICROSECOND
+ | MINUTE_SECOND
+ | HOUR_MICROSECOND
+ | HOUR_SECOND
+ | HOUR_MINUTE
+ | DAY_MICROSECOND
+ | DAY_SECOND
+ | DAY_MINUTE
+ | DAY_HOUR
+ | YEAR_MONTH
+ ;
+
+// predicates
+
+// Simplified approach for expression
+expression
+ : NOT expression # notExpression
+ | left = expression AND right = expression # andExpression
+ | left = expression OR right = expression # orExpression
+ | predicate # predicateExpression
+ ;
+
+predicate
+ : expressionAtom # expressionAtomPredicate
+ | left = predicate comparisonOperator right = predicate # binaryComparisonPredicate
+ | predicate IS nullNotnull # isNullPredicate
+ | predicate NOT? BETWEEN predicate AND predicate # betweenPredicate
+ | left = predicate NOT? LIKE right = predicate # likePredicate
+ | left = predicate REGEXP right = predicate # regexpPredicate
+ | predicate NOT? IN '(' expressions ')' # inPredicate
+ ;
+
+expressions
+ : expression (',' expression)*
+ ;
+
+expressionAtom
+ : constant # constantExpressionAtom
+ | columnName # fullColumnNameExpressionAtom
+ | functionCall # functionCallExpressionAtom
+ | LR_BRACKET expression RR_BRACKET # nestedExpressionAtom
+ | left = expressionAtom mathOperator = (STAR | SLASH | MODULE) right = expressionAtom # mathExpressionAtom
+ | left = expressionAtom mathOperator = (PLUS | MINUS) right = expressionAtom # mathExpressionAtom
+ ;
+
+comparisonOperator
+ : '='
+ | '>'
+ | '<'
+ | '<' '='
+ | '>' '='
+ | '<' '>'
+ | '!' '='
+ ;
+
+nullNotnull
+ : NOT? NULL_LITERAL
+ ;
+
+functionCall
+ : nestedFunctionName LR_BRACKET allTupleFields RR_BRACKET # nestedAllFunctionCall
+ | scalarFunctionName LR_BRACKET functionArgs RR_BRACKET # scalarFunctionCall
+ | specificFunction # specificFunctionCall
+ | windowFunctionClause # windowFunctionCall
+ | aggregateFunction # aggregateFunctionCall
+ | aggregateFunction (orderByClause)? filterClause # filteredAggregationFunctionCall
+ | scoreRelevanceFunction # scoreRelevanceFunctionCall
+ | relevanceFunction # relevanceFunctionCall
+ | highlightFunction # highlightFunctionCall
+ | positionFunction # positionFunctionCall
+ | extractFunction # extractFunctionCall
+ | getFormatFunction # getFormatFunctionCall
+ | timestampFunction # timestampFunctionCall
+ ;
+
+timestampFunction
+ : timestampFunctionName LR_BRACKET simpleDateTimePart COMMA firstArg = functionArg COMMA secondArg = functionArg RR_BRACKET
+ ;
+
+timestampFunctionName
+ : TIMESTAMPADD
+ | TIMESTAMPDIFF
+ ;
+
+getFormatFunction
+ : GET_FORMAT LR_BRACKET getFormatType COMMA functionArg RR_BRACKET
+ ;
+
+getFormatType
+ : DATE
+ | DATETIME
+ | TIME
+ | TIMESTAMP
+ ;
+
+extractFunction
+ : EXTRACT LR_BRACKET datetimePart FROM functionArg RR_BRACKET
+ ;
+
+simpleDateTimePart
+ : MICROSECOND
+ | SECOND
+ | MINUTE
+ | HOUR
+ | DAY
+ | WEEK
+ | MONTH
+ | QUARTER
+ | YEAR
+ ;
+
+complexDateTimePart
+ : SECOND_MICROSECOND
+ | MINUTE_MICROSECOND
+ | MINUTE_SECOND
+ | HOUR_MICROSECOND
+ | HOUR_SECOND
+ | HOUR_MINUTE
+ | DAY_MICROSECOND
+ | DAY_SECOND
+ | DAY_MINUTE
+ | DAY_HOUR
+ | YEAR_MONTH
+ ;
+
+datetimePart
+ : simpleDateTimePart
+ | complexDateTimePart
+ ;
+
+highlightFunction
+ : HIGHLIGHT LR_BRACKET relevanceField (COMMA highlightArg)* RR_BRACKET
+ ;
+
+positionFunction
+ : POSITION LR_BRACKET functionArg IN functionArg RR_BRACKET
+ ;
+
+matchQueryAltSyntaxFunction
+ : field = relevanceField EQUAL_SYMBOL MATCH_QUERY LR_BRACKET query = relevanceQuery RR_BRACKET
+ ;
+
+scalarFunctionName
+ : mathematicalFunctionName
+ | dateTimeFunctionName
+ | textFunctionName
+ | flowControlFunctionName
+ | systemFunctionName
+ | nestedFunctionName
+ ;
+
+specificFunction
+ : CASE expression caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall
+ | CASE caseFuncAlternative+ (ELSE elseArg = functionArg)? END # caseFunctionCall
+ | CAST '(' expression AS convertedDataType ')' # dataTypeFunctionCall
+ ;
+
+relevanceFunction
+ : noFieldRelevanceFunction
+ | singleFieldRelevanceFunction
+ | multiFieldRelevanceFunction
+ | altSingleFieldRelevanceFunction
+ | altMultiFieldRelevanceFunction
+ ;
+
+scoreRelevanceFunction
+ : scoreRelevanceFunctionName LR_BRACKET relevanceFunction (COMMA weight = relevanceFieldWeight)? RR_BRACKET
+ ;
+
+noFieldRelevanceFunction
+ : noFieldRelevanceFunctionName LR_BRACKET query = relevanceQuery (COMMA relevanceArg)* RR_BRACKET
+ ;
+
+// Field is a single column
+singleFieldRelevanceFunction
+ : singleFieldRelevanceFunctionName LR_BRACKET field = relevanceField COMMA query = relevanceQuery (COMMA relevanceArg)* RR_BRACKET
+ ;
+
+// Field is a list of columns
+multiFieldRelevanceFunction
+ : multiFieldRelevanceFunctionName LR_BRACKET LT_SQR_PRTHS field = relevanceFieldAndWeight (COMMA field = relevanceFieldAndWeight)* RT_SQR_PRTHS COMMA query = relevanceQuery (COMMA relevanceArg)* RR_BRACKET
+ | multiFieldRelevanceFunctionName LR_BRACKET alternateMultiMatchQuery COMMA alternateMultiMatchField (COMMA relevanceArg)* RR_BRACKET
+ ;
+
+altSingleFieldRelevanceFunction
+ : field = relevanceField EQUAL_SYMBOL altSyntaxFunctionName = altSingleFieldRelevanceFunctionName LR_BRACKET query = relevanceQuery (COMMA relevanceArg)* RR_BRACKET
+ ;
+
+altMultiFieldRelevanceFunction
+ : field = relevanceField EQUAL_SYMBOL altSyntaxFunctionName = altMultiFieldRelevanceFunctionName LR_BRACKET query = relevanceQuery (COMMA relevanceArg)* RR_BRACKET
+ ;
+
+convertedDataType
+ : typeName = DATE
+ | typeName = TIME
+ | typeName = TIMESTAMP
+ | typeName = INT
+ | typeName = INTEGER
+ | typeName = DOUBLE
+ | typeName = LONG
+ | typeName = FLOAT
+ | typeName = STRING
+ | typeName = BOOLEAN
+ ;
+
+caseFuncAlternative
+ : WHEN condition = functionArg THEN consequent = functionArg
+ ;
+
+aggregateFunction
+ : functionName = aggregationFunctionName LR_BRACKET functionArg RR_BRACKET # regularAggregateFunctionCall
+ | COUNT LR_BRACKET STAR RR_BRACKET # countStarFunctionCall
+ | COUNT LR_BRACKET DISTINCT functionArg RR_BRACKET # distinctCountFunctionCall
+ | percentileApproxFunction # percentileApproxFunctionCall
+ ;
+
+percentileApproxFunction
+ : (PERCENTILE | PERCENTILE_APPROX) LR_BRACKET aggField = functionArg
+ COMMA percent = numericLiteral (COMMA compression = numericLiteral)? RR_BRACKET
+ ;
+
+filterClause
+ : FILTER LR_BRACKET WHERE expression RR_BRACKET
+ ;
+
+aggregationFunctionName
+ : AVG
+ | COUNT
+ | SUM
+ | MIN
+ | MAX
+ | VAR_POP
+ | VAR_SAMP
+ | VARIANCE
+ | STD
+ | STDDEV
+ | STDDEV_POP
+ | STDDEV_SAMP
+ ;
+
+mathematicalFunctionName
+ : ABS
+ | CBRT
+ | CEIL
+ | CEILING
+ | CONV
+ | CRC32
+ | E
+ | EXP
+ | EXPM1
+ | FLOOR
+ | LN
+ | LOG
+ | LOG10
+ | LOG2
+ | MOD
+ | PI
+ | POW
+ | POWER
+ | RAND
+ | RINT
+ | ROUND
+ | SIGN
+ | SIGNUM
+ | SQRT
+ | TRUNCATE
+ | trigonometricFunctionName
+ | arithmeticFunctionName
+ ;
+
+trigonometricFunctionName
+ : ACOS
+ | ASIN
+ | ATAN
+ | ATAN2
+ | COS
+ | COSH
+ | COT
+ | DEGREES
+ | RADIANS
+ | SIN
+ | SINH
+ | TAN
+ ;
+
+arithmeticFunctionName
+ : ADD
+ | SUBTRACT
+ | MULTIPLY
+ | DIVIDE
+ | MOD
+ | MODULUS
+ ;
+
+dateTimeFunctionName
+ : datetimeConstantLiteral
+ | ADDDATE
+ | ADDTIME
+ | CONVERT_TZ
+ | CURDATE
+ | CURTIME
+ | DATE
+ | DATE_ADD
+ | DATE_FORMAT
+ | DATE_SUB
+ | DATEDIFF
+ | DATETIME
+ | DAY
+ | DAYNAME
+ | DAYOFMONTH
+ | DAY_OF_MONTH
+ | DAYOFWEEK
+ | DAYOFYEAR
+ | DAY_OF_YEAR
+ | DAY_OF_WEEK
+ | FROM_DAYS
+ | FROM_UNIXTIME
+ | HOUR
+ | HOUR_OF_DAY
+ | LAST_DAY
+ | MAKEDATE
+ | MAKETIME
+ | MICROSECOND
+ | MINUTE
+ | MINUTE_OF_DAY
+ | MINUTE_OF_HOUR
+ | MONTH
+ | MONTHNAME
+ | MONTH_OF_YEAR
+ | NOW
+ | PERIOD_ADD
+ | PERIOD_DIFF
+ | QUARTER
+ | SEC_TO_TIME
+ | SECOND
+ | SECOND_OF_MINUTE
+ | SUBDATE
+ | SUBTIME
+ | SYSDATE
+ | STR_TO_DATE
+ | TIME
+ | TIME_FORMAT
+ | TIME_TO_SEC
+ | TIMEDIFF
+ | TIMESTAMP
+ | TO_DAYS
+ | TO_SECONDS
+ | UNIX_TIMESTAMP
+ | WEEK
+ | WEEKDAY
+ | WEEK_OF_YEAR
+ | WEEKOFYEAR
+ | YEAR
+ | YEARWEEK
+ ;
+
+textFunctionName
+ : SUBSTR
+ | SUBSTRING
+ | TRIM
+ | LTRIM
+ | RTRIM
+ | LOWER
+ | UPPER
+ | CONCAT
+ | CONCAT_WS
+ | SUBSTR
+ | LENGTH
+ | STRCMP
+ | RIGHT
+ | LEFT
+ | ASCII
+ | LOCATE
+ | REPLACE
+ | REVERSE
+ ;
+
+flowControlFunctionName
+ : IF
+ | IFNULL
+ | NULLIF
+ | ISNULL
+ ;
+
+noFieldRelevanceFunctionName
+ : QUERY
+ ;
+
+systemFunctionName
+ : TYPEOF
+ ;
+
+nestedFunctionName
+ : NESTED
+ ;
+
+scoreRelevanceFunctionName
+ : SCORE
+ | SCOREQUERY
+ | SCORE_QUERY
+ ;
+
+singleFieldRelevanceFunctionName
+ : MATCH
+ | MATCHQUERY
+ | MATCH_QUERY
+ | MATCH_PHRASE
+ | MATCHPHRASE
+ | MATCHPHRASEQUERY
+ | MATCH_BOOL_PREFIX
+ | MATCH_PHRASE_PREFIX
+ | WILDCARD_QUERY
+ | WILDCARDQUERY
+ ;
+
+multiFieldRelevanceFunctionName
+ : MULTI_MATCH
+ | MULTIMATCH
+ | MULTIMATCHQUERY
+ | SIMPLE_QUERY_STRING
+ | QUERY_STRING
+ ;
+
+altSingleFieldRelevanceFunctionName
+ : MATCH_QUERY
+ | MATCHQUERY
+ | MATCH_PHRASE
+ | MATCHPHRASE
+ ;
+
+altMultiFieldRelevanceFunctionName
+ : MULTI_MATCH
+ | MULTIMATCH
+ ;
+
+functionArgs
+ : (functionArg (COMMA functionArg)*)?
+ ;
+
+functionArg
+ : expression
+ ;
+
+relevanceArg
+ : relevanceArgName EQUAL_SYMBOL relevanceArgValue
+ | argName = stringLiteral EQUAL_SYMBOL argVal = relevanceArgValue
+ ;
+
+highlightArg
+ : highlightArgName EQUAL_SYMBOL highlightArgValue
+ ;
+
+relevanceArgName
+ : ALLOW_LEADING_WILDCARD
+ | ANALYZER
+ | ANALYZE_WILDCARD
+ | AUTO_GENERATE_SYNONYMS_PHRASE_QUERY
+ | BOOST
+ | CASE_INSENSITIVE
+ | CUTOFF_FREQUENCY
+ | DEFAULT_FIELD
+ | DEFAULT_OPERATOR
+ | ENABLE_POSITION_INCREMENTS
+ | ESCAPE
+ | FIELDS
+ | FLAGS
+ | FUZZINESS
+ | FUZZY_MAX_EXPANSIONS
+ | FUZZY_PREFIX_LENGTH
+ | FUZZY_REWRITE
+ | FUZZY_TRANSPOSITIONS
+ | LENIENT
+ | LOW_FREQ_OPERATOR
+ | MAX_DETERMINIZED_STATES
+ | MAX_EXPANSIONS
+ | MINIMUM_SHOULD_MATCH
+ | OPERATOR
+ | PHRASE_SLOP
+ | PREFIX_LENGTH
+ | QUOTE_ANALYZER
+ | QUOTE_FIELD_SUFFIX
+ | REWRITE
+ | SLOP
+ | TIE_BREAKER
+ | TIME_ZONE
+ | TYPE
+ | ZERO_TERMS_QUERY
+ ;
+
+highlightArgName
+ : HIGHLIGHT_POST_TAGS
+ | HIGHLIGHT_PRE_TAGS
+ ;
+
+relevanceFieldAndWeight
+ : field = relevanceField
+ | field = relevanceField weight = relevanceFieldWeight
+ | field = relevanceField BIT_XOR_OP weight = relevanceFieldWeight
+ ;
+
+relevanceFieldWeight
+ : numericLiteral
+ ;
+
+relevanceField
+ : qualifiedName
+ | stringLiteral
+ ;
+
+relevanceQuery
+ : relevanceArgValue
+ ;
+
+relevanceArgValue
+ : qualifiedName
+ | constant
+ ;
+
+highlightArgValue
+ : stringLiteral
+ ;
+
+alternateMultiMatchArgName
+ : FIELDS
+ | QUERY
+ | stringLiteral
+ ;
+
+alternateMultiMatchQuery
+ : argName = alternateMultiMatchArgName EQUAL_SYMBOL argVal = relevanceArgValue
+ ;
+
+alternateMultiMatchField
+ : argName = alternateMultiMatchArgName EQUAL_SYMBOL argVal = relevanceArgValue
+ | argName = alternateMultiMatchArgName EQUAL_SYMBOL LT_SQR_PRTHS argVal = relevanceArgValue RT_SQR_PRTHS
+ ;
+
+// Identifiers
+tableName
+ : qualifiedName
+ ;
+
+columnName
+ : qualifiedName
+ ;
+
+allTupleFields
+ : path = qualifiedName DOT STAR
+ ;
+
+alias
+ : ident
+ ;
+
+qualifiedName
+ : ident (DOT ident)*
+ ;
+
+ident
+ : DOT? ID
+ | BACKTICK_QUOTE_ID
+ | keywordsCanBeId
+ | scalarFunctionName
+ ;
+
+keywordsCanBeId
+ : FULL
+ | FIELD
+ | D
+ | T
+ | TS // OD SQL and ODBC special
+ | COUNT
+ | SUM
+ | AVG
+ | MAX
+ | MIN
+ | FIRST
+ | LAST
+ | TYPE // TODO: Type is keyword required by relevancy function. Remove this when relevancy functions moved out
+ ;
diff --git a/language-grammar/src/main/antlr4/SparkSqlBase.g4 b/language-grammar/src/main/antlr4/SparkSqlBase.g4
new file mode 100644
index 00000000000..c53c61adfde
--- /dev/null
+++ b/language-grammar/src/main/antlr4/SparkSqlBase.g4
@@ -0,0 +1,246 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+/*
+ * This file contains code from the Apache Spark project (original license below).
+ * It contains modifications, which are licensed as above:
+ */
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+grammar SparkSqlBase;
+
+// Copy from Spark 3.3.1 SqlBaseParser.g4 and SqlBaseLexer.g4
+
+@members {
+ /**
+ * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+ */
+ public boolean has_unclosed_bracketed_comment = false;
+
+ /**
+ * Verify whether current token is a valid decimal token (which contains dot).
+ * Returns true if the character that follows the token is not a digit or letter or underscore.
+ *
+ * For example:
+ * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+ * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+ * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+ * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
+ * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+ * which is not a digit or letter or underscore.
+ */
+ public boolean isValidDecimal() {
+ int nextChar = _input.LA(1);
+ if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
+ nextChar == '_') {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * This method will be called when we see '/*' and try to match it as a bracketed comment.
+ * If the next character is '+', it should be parsed as hint later, and we cannot match
+ * it as a bracketed comment.
+ *
+ * Returns true if the next character is '+'.
+ */
+ public boolean isHint() {
+ int nextChar = _input.LA(1);
+ if (nextChar == '+') {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * This method will be called when the character stream ends and try to find out the
+ * unclosed bracketed comment.
+ * If the method be called, it means the end of the entire character stream match,
+ * and we set the flag and fail later.
+ */
+ public void markUnclosedComment() {
+ has_unclosed_bracketed_comment = true;
+ }
+}
+
+
+multipartIdentifierPropertyList
+ : multipartIdentifierProperty (COMMA multipartIdentifierProperty)*
+ ;
+
+multipartIdentifierProperty
+ : multipartIdentifier (options=propertyList)?
+ ;
+
+propertyList
+ : property (COMMA property)*
+ ;
+
+property
+ : key=propertyKey (EQ? value=propertyValue)?
+ ;
+
+propertyKey
+ : identifier (DOT identifier)*
+ | STRING
+ ;
+
+propertyValue
+ : INTEGER_VALUE
+ | DECIMAL_VALUE
+ | booleanValue
+ | STRING
+ ;
+
+booleanValue
+ : TRUE | FALSE
+ ;
+
+
+multipartIdentifier
+ : parts+=identifier (DOT parts+=identifier)*
+ ;
+
+identifier
+ : IDENTIFIER #unquotedIdentifier
+ | quotedIdentifier #quotedIdentifierAlternative
+ | nonReserved #unquotedIdentifier
+ ;
+
+quotedIdentifier
+ : BACKQUOTED_IDENTIFIER
+ ;
+
+nonReserved
+ : DROP | SKIPPING | INDEX
+ ;
+
+
+// Flint lexical tokens
+
+BLOOM_FILTER: 'BLOOM_FILTER';
+MIN_MAX: 'MIN_MAX';
+SKIPPING: 'SKIPPING';
+VALUE_SET: 'VALUE_SET';
+
+
+// Spark lexical tokens
+
+SEMICOLON: ';';
+
+LEFT_PAREN: '(';
+RIGHT_PAREN: ')';
+COMMA: ',';
+DOT: '.';
+
+
+AS: 'AS';
+ALTER: 'ALTER';
+ANALYZE: 'ANALYZE';
+CREATE: 'CREATE';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DROP: 'DROP';
+EXISTS: 'EXISTS';
+EXTENDED: 'EXTENDED';
+FALSE: 'FALSE';
+FLINT: 'FLINT';
+IF: 'IF';
+IN: 'IN';
+INDEX: 'INDEX';
+INDEXES: 'INDEXES';
+JOB: 'JOB';
+MATERIALIZED: 'MATERIALIZED';
+NOT: 'NOT';
+ON: 'ON';
+PARTITION: 'PARTITION';
+RECOVER: 'RECOVER';
+REFRESH: 'REFRESH';
+SHOW: 'SHOW';
+TRUE: 'TRUE';
+VACUUM: 'VACUUM';
+VIEW: 'VIEW';
+VIEWS: 'VIEWS';
+WHERE: 'WHERE';
+WITH: 'WITH';
+
+
+EQ : '=' | '==';
+MINUS: '-';
+
+
+STRING
+ : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
+ | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
+ | 'R\'' (~'\'')* '\''
+ | 'R"'(~'"')* '"'
+ ;
+
+INTEGER_VALUE
+ : DIGIT+
+ ;
+
+DECIMAL_VALUE
+ : DECIMAL_DIGITS {isValidDecimal()}?
+ ;
+
+IDENTIFIER
+ : (LETTER | DIGIT | '_')+
+ ;
+
+BACKQUOTED_IDENTIFIER
+ : '`' ( ~'`' | '``' )* '`'
+ ;
+
+fragment DECIMAL_DIGITS
+ : DIGIT+ '.' DIGIT*
+ | '.' DIGIT+
+ ;
+
+fragment DIGIT
+ : [0-9]
+ ;
+
+fragment LETTER
+ : [A-Z]
+ ;
+
+SIMPLE_COMMENT
+ : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
+ ;
+
+BRACKETED_COMMENT
+ : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
+ ;
+
+WS
+ : [ \r\n\t]+ -> channel(HIDDEN)
+ ;
+
+// Catch-all for anything we can't recognize.
+// We use this to be able to ignore and recover all the text
+// when splitting statements with DelimiterLexer
+UNRECOGNIZED
+ : .
+ ;
\ No newline at end of file
diff --git a/language-grammar/src/main/antlr4/SqlBaseLexer.g4 b/language-grammar/src/main/antlr4/SqlBaseLexer.g4
new file mode 100644
index 00000000000..fb440ef8d37
--- /dev/null
+++ b/language-grammar/src/main/antlr4/SqlBaseLexer.g4
@@ -0,0 +1,546 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
+ */
+
+lexer grammar SqlBaseLexer;
+
+@members {
+ /**
+ * When true, parser should throw ParseException for unclosed bracketed comment.
+ */
+ public boolean has_unclosed_bracketed_comment = false;
+
+ /**
+ * Verify whether current token is a valid decimal token (which contains dot).
+ * Returns true if the character that follows the token is not a digit or letter or underscore.
+ *
+ * For example:
+ * For char stream "2.3", "2." is not a valid decimal token, because it is followed by digit '3'.
+ * For char stream "2.3_", "2.3" is not a valid decimal token, because it is followed by '_'.
+ * For char stream "2.3W", "2.3" is not a valid decimal token, because it is followed by 'W'.
+ * For char stream "12.0D 34.E2+0.12 " 12.0D is a valid decimal token because it is followed
+ * by a space. 34.E2 is a valid decimal token because it is followed by symbol '+'
+ * which is not a digit or letter or underscore.
+ */
+ public boolean isValidDecimal() {
+ int nextChar = _input.LA(1);
+ if (nextChar >= 'A' && nextChar <= 'Z' || nextChar >= '0' && nextChar <= '9' ||
+ nextChar == '_') {
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ /**
+ * This method will be called when we see '/*' and try to match it as a bracketed comment.
+ * If the next character is '+', it should be parsed as hint later, and we cannot match
+ * it as a bracketed comment.
+ *
+ * Returns true if the next character is '+'.
+ */
+ public boolean isHint() {
+ int nextChar = _input.LA(1);
+ if (nextChar == '+') {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * This method will be called when the character stream ends and try to find out the
+ * unclosed bracketed comment.
+ * If the method be called, it means the end of the entire character stream match,
+ * and we set the flag and fail later.
+ */
+ public void markUnclosedComment() {
+ has_unclosed_bracketed_comment = true;
+ }
+}
+
+SEMICOLON: ';';
+
+LEFT_PAREN: '(';
+RIGHT_PAREN: ')';
+COMMA: ',';
+DOT: '.';
+LEFT_BRACKET: '[';
+RIGHT_BRACKET: ']';
+
+// NOTE: If you add a new token in the list below, you should update the list of keywords
+// and reserved tag in `docs/sql-ref-ansi-compliance.md#sql-keywords`, and
+// modify `ParserUtils.toExprAlias()` which assumes all keywords are between `ADD` and `ZONE`.
+
+//============================
+// Start of the keywords list
+//============================
+//--SPARK-KEYWORD-LIST-START
+ADD: 'ADD';
+AFTER: 'AFTER';
+ALL: 'ALL';
+ALTER: 'ALTER';
+ALWAYS: 'ALWAYS';
+ANALYZE: 'ANALYZE';
+AND: 'AND';
+ANTI: 'ANTI';
+ANY: 'ANY';
+ANY_VALUE: 'ANY_VALUE';
+ARCHIVE: 'ARCHIVE';
+ARRAY: 'ARRAY';
+AS: 'AS';
+ASC: 'ASC';
+AT: 'AT';
+AUTHORIZATION: 'AUTHORIZATION';
+BETWEEN: 'BETWEEN';
+BIGINT: 'BIGINT';
+BINARY: 'BINARY';
+BOOLEAN: 'BOOLEAN';
+BOTH: 'BOTH';
+BUCKET: 'BUCKET';
+BUCKETS: 'BUCKETS';
+BY: 'BY';
+BYTE: 'BYTE';
+CACHE: 'CACHE';
+CASCADE: 'CASCADE';
+CASE: 'CASE';
+CAST: 'CAST';
+CATALOG: 'CATALOG';
+CATALOGS: 'CATALOGS';
+CHANGE: 'CHANGE';
+CHAR: 'CHAR';
+CHARACTER: 'CHARACTER';
+CHECK: 'CHECK';
+CLEAR: 'CLEAR';
+CLUSTER: 'CLUSTER';
+CLUSTERED: 'CLUSTERED';
+CODEGEN: 'CODEGEN';
+COLLATE: 'COLLATE';
+COLLECTION: 'COLLECTION';
+COLUMN: 'COLUMN';
+COLUMNS: 'COLUMNS';
+COMMENT: 'COMMENT';
+COMMIT: 'COMMIT';
+COMPACT: 'COMPACT';
+COMPACTIONS: 'COMPACTIONS';
+COMPUTE: 'COMPUTE';
+CONCATENATE: 'CONCATENATE';
+CONSTRAINT: 'CONSTRAINT';
+COST: 'COST';
+CREATE: 'CREATE';
+CROSS: 'CROSS';
+CUBE: 'CUBE';
+CURRENT: 'CURRENT';
+CURRENT_DATE: 'CURRENT_DATE';
+CURRENT_TIME: 'CURRENT_TIME';
+CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
+CURRENT_USER: 'CURRENT_USER';
+DAY: 'DAY';
+DAYS: 'DAYS';
+DAYOFYEAR: 'DAYOFYEAR';
+DATA: 'DATA';
+DATE: 'DATE';
+DATABASE: 'DATABASE';
+DATABASES: 'DATABASES';
+DATEADD: 'DATEADD';
+DATE_ADD: 'DATE_ADD';
+DATEDIFF: 'DATEDIFF';
+DATE_DIFF: 'DATE_DIFF';
+DBPROPERTIES: 'DBPROPERTIES';
+DEC: 'DEC';
+DECIMAL: 'DECIMAL';
+DEFAULT: 'DEFAULT';
+DEFINED: 'DEFINED';
+DELETE: 'DELETE';
+DELIMITED: 'DELIMITED';
+DESC: 'DESC';
+DESCRIBE: 'DESCRIBE';
+DFS: 'DFS';
+DIRECTORIES: 'DIRECTORIES';
+DIRECTORY: 'DIRECTORY';
+DISTINCT: 'DISTINCT';
+DISTRIBUTE: 'DISTRIBUTE';
+DIV: 'DIV';
+DOUBLE: 'DOUBLE';
+DROP: 'DROP';
+ELSE: 'ELSE';
+END: 'END';
+ESCAPE: 'ESCAPE';
+ESCAPED: 'ESCAPED';
+EXCEPT: 'EXCEPT';
+EXCHANGE: 'EXCHANGE';
+EXCLUDE: 'EXCLUDE';
+EXISTS: 'EXISTS';
+EXPLAIN: 'EXPLAIN';
+EXPORT: 'EXPORT';
+EXTENDED: 'EXTENDED';
+EXTERNAL: 'EXTERNAL';
+EXTRACT: 'EXTRACT';
+FALSE: 'FALSE';
+FETCH: 'FETCH';
+FIELDS: 'FIELDS';
+FILTER: 'FILTER';
+FILEFORMAT: 'FILEFORMAT';
+FIRST: 'FIRST';
+FLOAT: 'FLOAT';
+FOLLOWING: 'FOLLOWING';
+FOR: 'FOR';
+FOREIGN: 'FOREIGN';
+FORMAT: 'FORMAT';
+FORMATTED: 'FORMATTED';
+FROM: 'FROM';
+FULL: 'FULL';
+FUNCTION: 'FUNCTION';
+FUNCTIONS: 'FUNCTIONS';
+GENERATED: 'GENERATED';
+GLOBAL: 'GLOBAL';
+GRANT: 'GRANT';
+GROUP: 'GROUP';
+GROUPING: 'GROUPING';
+HAVING: 'HAVING';
+BINARY_HEX: 'X';
+HOUR: 'HOUR';
+HOURS: 'HOURS';
+IDENTIFIER_KW: 'IDENTIFIER';
+IF: 'IF';
+IGNORE: 'IGNORE';
+IMPORT: 'IMPORT';
+IN: 'IN';
+INCLUDE: 'INCLUDE';
+INDEX: 'INDEX';
+INDEXES: 'INDEXES';
+INNER: 'INNER';
+INPATH: 'INPATH';
+INPUTFORMAT: 'INPUTFORMAT';
+INSERT: 'INSERT';
+INTERSECT: 'INTERSECT';
+INTERVAL: 'INTERVAL';
+INT: 'INT';
+INTEGER: 'INTEGER';
+INTO: 'INTO';
+IS: 'IS';
+ITEMS: 'ITEMS';
+JOIN: 'JOIN';
+KEYS: 'KEYS';
+LAST: 'LAST';
+LATERAL: 'LATERAL';
+LAZY: 'LAZY';
+LEADING: 'LEADING';
+LEFT: 'LEFT';
+LIKE: 'LIKE';
+ILIKE: 'ILIKE';
+LIMIT: 'LIMIT';
+LINES: 'LINES';
+LIST: 'LIST';
+LOAD: 'LOAD';
+LOCAL: 'LOCAL';
+LOCATION: 'LOCATION';
+LOCK: 'LOCK';
+LOCKS: 'LOCKS';
+LOGICAL: 'LOGICAL';
+LONG: 'LONG';
+MACRO: 'MACRO';
+MAP: 'MAP';
+MATCHED: 'MATCHED';
+MERGE: 'MERGE';
+MICROSECOND: 'MICROSECOND';
+MICROSECONDS: 'MICROSECONDS';
+MILLISECOND: 'MILLISECOND';
+MILLISECONDS: 'MILLISECONDS';
+MINUTE: 'MINUTE';
+MINUTES: 'MINUTES';
+MONTH: 'MONTH';
+MONTHS: 'MONTHS';
+MSCK: 'MSCK';
+NAME: 'NAME';
+NAMESPACE: 'NAMESPACE';
+NAMESPACES: 'NAMESPACES';
+NANOSECOND: 'NANOSECOND';
+NANOSECONDS: 'NANOSECONDS';
+NATURAL: 'NATURAL';
+NO: 'NO';
+NOT: 'NOT' | '!';
+NULL: 'NULL';
+NULLS: 'NULLS';
+NUMERIC: 'NUMERIC';
+OF: 'OF';
+OFFSET: 'OFFSET';
+ON: 'ON';
+ONLY: 'ONLY';
+OPTION: 'OPTION';
+OPTIONS: 'OPTIONS';
+OR: 'OR';
+ORDER: 'ORDER';
+OUT: 'OUT';
+OUTER: 'OUTER';
+OUTPUTFORMAT: 'OUTPUTFORMAT';
+OVER: 'OVER';
+OVERLAPS: 'OVERLAPS';
+OVERLAY: 'OVERLAY';
+OVERWRITE: 'OVERWRITE';
+PARTITION: 'PARTITION';
+PARTITIONED: 'PARTITIONED';
+PARTITIONS: 'PARTITIONS';
+PERCENTILE_CONT: 'PERCENTILE_CONT';
+PERCENTILE_DISC: 'PERCENTILE_DISC';
+PERCENTLIT: 'PERCENT';
+PIVOT: 'PIVOT';
+PLACING: 'PLACING';
+POSITION: 'POSITION';
+PRECEDING: 'PRECEDING';
+PRIMARY: 'PRIMARY';
+PRINCIPALS: 'PRINCIPALS';
+PROPERTIES: 'PROPERTIES';
+PURGE: 'PURGE';
+QUARTER: 'QUARTER';
+QUERY: 'QUERY';
+RANGE: 'RANGE';
+REAL: 'REAL';
+RECORDREADER: 'RECORDREADER';
+RECORDWRITER: 'RECORDWRITER';
+RECOVER: 'RECOVER';
+REDUCE: 'REDUCE';
+REFERENCES: 'REFERENCES';
+REFRESH: 'REFRESH';
+RENAME: 'RENAME';
+REPAIR: 'REPAIR';
+REPEATABLE: 'REPEATABLE';
+REPLACE: 'REPLACE';
+RESET: 'RESET';
+RESPECT: 'RESPECT';
+RESTRICT: 'RESTRICT';
+REVOKE: 'REVOKE';
+RIGHT: 'RIGHT';
+RLIKE: 'RLIKE' | 'REGEXP';
+ROLE: 'ROLE';
+ROLES: 'ROLES';
+ROLLBACK: 'ROLLBACK';
+ROLLUP: 'ROLLUP';
+ROW: 'ROW';
+ROWS: 'ROWS';
+SECOND: 'SECOND';
+SECONDS: 'SECONDS';
+SCHEMA: 'SCHEMA';
+SCHEMAS: 'SCHEMAS';
+SELECT: 'SELECT';
+SEMI: 'SEMI';
+SEPARATED: 'SEPARATED';
+SERDE: 'SERDE';
+SERDEPROPERTIES: 'SERDEPROPERTIES';
+SESSION_USER: 'SESSION_USER';
+SET: 'SET';
+SETMINUS: 'MINUS';
+SETS: 'SETS';
+SHORT: 'SHORT';
+SHOW: 'SHOW';
+SKEWED: 'SKEWED';
+SMALLINT: 'SMALLINT';
+SOME: 'SOME';
+SORT: 'SORT';
+SORTED: 'SORTED';
+SOURCE: 'SOURCE';
+START: 'START';
+STATISTICS: 'STATISTICS';
+STORED: 'STORED';
+STRATIFY: 'STRATIFY';
+STRING: 'STRING';
+STRUCT: 'STRUCT';
+SUBSTR: 'SUBSTR';
+SUBSTRING: 'SUBSTRING';
+SYNC: 'SYNC';
+SYSTEM_TIME: 'SYSTEM_TIME';
+SYSTEM_VERSION: 'SYSTEM_VERSION';
+TABLE: 'TABLE';
+TABLES: 'TABLES';
+TABLESAMPLE: 'TABLESAMPLE';
+TARGET: 'TARGET';
+TBLPROPERTIES: 'TBLPROPERTIES';
+TEMPORARY: 'TEMPORARY' | 'TEMP';
+TERMINATED: 'TERMINATED';
+THEN: 'THEN';
+TIME: 'TIME';
+TIMESTAMP: 'TIMESTAMP';
+TIMESTAMP_LTZ: 'TIMESTAMP_LTZ';
+TIMESTAMP_NTZ: 'TIMESTAMP_NTZ';
+TIMESTAMPADD: 'TIMESTAMPADD';
+TIMESTAMPDIFF: 'TIMESTAMPDIFF';
+TINYINT: 'TINYINT';
+TO: 'TO';
+TOUCH: 'TOUCH';
+TRAILING: 'TRAILING';
+TRANSACTION: 'TRANSACTION';
+TRANSACTIONS: 'TRANSACTIONS';
+TRANSFORM: 'TRANSFORM';
+TRIM: 'TRIM';
+TRUE: 'TRUE';
+TRUNCATE: 'TRUNCATE';
+TRY_CAST: 'TRY_CAST';
+TYPE: 'TYPE';
+UNARCHIVE: 'UNARCHIVE';
+UNBOUNDED: 'UNBOUNDED';
+UNCACHE: 'UNCACHE';
+UNION: 'UNION';
+UNIQUE: 'UNIQUE';
+UNKNOWN: 'UNKNOWN';
+UNLOCK: 'UNLOCK';
+UNPIVOT: 'UNPIVOT';
+UNSET: 'UNSET';
+UPDATE: 'UPDATE';
+USE: 'USE';
+USER: 'USER';
+USING: 'USING';
+VALUES: 'VALUES';
+VARCHAR: 'VARCHAR';
+VERSION: 'VERSION';
+VIEW: 'VIEW';
+VIEWS: 'VIEWS';
+VOID: 'VOID';
+WEEK: 'WEEK';
+WEEKS: 'WEEKS';
+WHEN: 'WHEN';
+WHERE: 'WHERE';
+WINDOW: 'WINDOW';
+WITH: 'WITH';
+WITHIN: 'WITHIN';
+YEAR: 'YEAR';
+YEARS: 'YEARS';
+ZONE: 'ZONE';
+//--SPARK-KEYWORD-LIST-END
+//============================
+// End of the keywords list
+//============================
+
+EQ : '=' | '==';
+NSEQ: '<=>';
+NEQ : '<>';
+NEQJ: '!=';
+LT : '<';
+LTE : '<=' | '!>';
+GT : '>';
+GTE : '>=' | '!<';
+
+PLUS: '+';
+MINUS: '-';
+ASTERISK: '*';
+SLASH: '/';
+PERCENT: '%';
+TILDE: '~';
+AMPERSAND: '&';
+PIPE: '|';
+CONCAT_PIPE: '||';
+HAT: '^';
+COLON: ':';
+ARROW: '->';
+FAT_ARROW : '=>';
+HENT_START: '/*+';
+HENT_END: '*/';
+QUESTION: '?';
+
+STRING_LITERAL
+ : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
+ | 'R\'' (~'\'')* '\''
+ | 'R"'(~'"')* '"'
+ ;
+
+DOUBLEQUOTED_STRING
+ :'"' ( ~('"'|'\\') | ('\\' .) )* '"'
+ ;
+
+// NOTE: If you move a numeric literal, you should modify `ParserUtils.toExprAlias()`
+// which assumes all numeric literals are between `BIGINT_LITERAL` and `BIGDECIMAL_LITERAL`.
+
+BIGINT_LITERAL
+ : DIGIT+ 'L'
+ ;
+
+SMALLINT_LITERAL
+ : DIGIT+ 'S'
+ ;
+
+TINYINT_LITERAL
+ : DIGIT+ 'Y'
+ ;
+
+INTEGER_VALUE
+ : DIGIT+
+ ;
+
+EXPONENT_VALUE
+ : DIGIT+ EXPONENT
+ | DECIMAL_DIGITS EXPONENT {isValidDecimal()}?
+ ;
+
+DECIMAL_VALUE
+ : DECIMAL_DIGITS {isValidDecimal()}?
+ ;
+
+FLOAT_LITERAL
+ : DIGIT+ EXPONENT? 'F'
+ | DECIMAL_DIGITS EXPONENT? 'F' {isValidDecimal()}?
+ ;
+
+DOUBLE_LITERAL
+ : DIGIT+ EXPONENT? 'D'
+ | DECIMAL_DIGITS EXPONENT? 'D' {isValidDecimal()}?
+ ;
+
+BIGDECIMAL_LITERAL
+ : DIGIT+ EXPONENT? 'BD'
+ | DECIMAL_DIGITS EXPONENT? 'BD' {isValidDecimal()}?
+ ;
+
+IDENTIFIER
+ : (LETTER | DIGIT | '_')+
+ ;
+
+BACKQUOTED_IDENTIFIER
+ : '`' ( ~'`' | '``' )* '`'
+ ;
+
+fragment DECIMAL_DIGITS
+ : DIGIT+ '.' DIGIT*
+ | '.' DIGIT+
+ ;
+
+fragment EXPONENT
+ : 'E' [+-]? DIGIT+
+ ;
+
+fragment DIGIT
+ : [0-9]
+ ;
+
+fragment LETTER
+ : [A-Z]
+ ;
+
+SIMPLE_COMMENT
+ : '--' ('\\\n' | ~[\r\n])* '\r'? '\n'? -> channel(HIDDEN)
+ ;
+
+BRACKETED_COMMENT
+ : '/*' {!isHint()}? ( BRACKETED_COMMENT | . )*? ('*/' | {markUnclosedComment();} EOF) -> channel(HIDDEN)
+ ;
+
+WS
+ : [ \r\n\t]+ -> channel(HIDDEN)
+ ;
+
+// Catch-all for anything we can't recognize.
+// We use this to be able to ignore and recover all the text
+// when splitting statements with DelimiterLexer
+UNRECOGNIZED
+ : .
+ ;
diff --git a/language-grammar/src/main/antlr4/SqlBaseParser.g4 b/language-grammar/src/main/antlr4/SqlBaseParser.g4
new file mode 100644
index 00000000000..04128216be0
--- /dev/null
+++ b/language-grammar/src/main/antlr4/SqlBaseParser.g4
@@ -0,0 +1,1875 @@
+/*
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * This file is an adaptation of Presto's presto-parser/src/main/antlr4/com/facebook/presto/sql/parser/SqlBase.g4 grammar.
+ */
+
+parser grammar SqlBaseParser;
+
+options { tokenVocab = SqlBaseLexer; }
+
+@members {
+ /**
+ * When false, INTERSECT is given the greater precedence over the other set
+ * operations (UNION, EXCEPT and MINUS) as per the SQL standard.
+ */
+ public boolean legacy_setops_precedence_enabled = false;
+
+ /**
+ * When false, a literal with an exponent would be converted into
+ * double type rather than decimal type.
+ */
+ public boolean legacy_exponent_literal_as_decimal_enabled = false;
+
+ /**
+ * When true, the behavior of keywords follows ANSI SQL standard.
+ */
+ public boolean SQL_standard_keyword_behavior = false;
+
+ /**
+ * When true, double quoted literals are identifiers rather than STRINGs.
+ */
+ public boolean double_quoted_identifiers = false;
+}
+
+singleStatement
+ : statement SEMICOLON* EOF
+ ;
+
+singleExpression
+ : namedExpression EOF
+ ;
+
+singleTableIdentifier
+ : tableIdentifier EOF
+ ;
+
+singleMultipartIdentifier
+ : multipartIdentifier EOF
+ ;
+
+singleFunctionIdentifier
+ : functionIdentifier EOF
+ ;
+
+singleDataType
+ : dataType EOF
+ ;
+
+singleTableSchema
+ : colTypeList EOF
+ ;
+
+statement
+ : query #statementDefault
+ | ctes? dmlStatementNoWith #dmlStatement
+ | USE identifierReference #use
+ | USE namespace identifierReference #useNamespace
+ | SET CATALOG (identifier | stringLit) #setCatalog
+ | CREATE namespace (IF NOT EXISTS)? identifierReference
+ (commentSpec |
+ locationSpec |
+ (WITH (DBPROPERTIES | PROPERTIES) propertyList))* #createNamespace
+ | ALTER namespace identifierReference
+ SET (DBPROPERTIES | PROPERTIES) propertyList #setNamespaceProperties
+ | ALTER namespace identifierReference
+ SET locationSpec #setNamespaceLocation
+ | DROP namespace (IF EXISTS)? identifierReference
+ (RESTRICT | CASCADE)? #dropNamespace
+ | SHOW namespaces ((FROM | IN) multipartIdentifier)?
+ (LIKE? pattern=stringLit)? #showNamespaces
+ | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
+ createTableClauses
+ (AS? query)? #createTable
+ | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
+ LIKE source=tableIdentifier
+ (tableProvider |
+ rowFormat |
+ createFileFormat |
+ locationSpec |
+ (TBLPROPERTIES tableProps=propertyList))* #createTableLike
+ | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
+ createTableClauses
+ (AS? query)? #replaceTable
+ | ANALYZE TABLE identifierReference partitionSpec? COMPUTE STATISTICS
+ (identifier | FOR COLUMNS identifierSeq | FOR ALL COLUMNS)? #analyze
+ | ANALYZE TABLES ((FROM | IN) identifierReference)? COMPUTE STATISTICS
+ (identifier)? #analyzeTables
+ | ALTER TABLE identifierReference
+ ADD (COLUMN | COLUMNS)
+ columns=qualifiedColTypeWithPositionList #addTableColumns
+ | ALTER TABLE identifierReference
+ ADD (COLUMN | COLUMNS)
+ LEFT_PAREN columns=qualifiedColTypeWithPositionList RIGHT_PAREN #addTableColumns
+ | ALTER TABLE table=identifierReference
+ RENAME COLUMN
+ from=multipartIdentifier TO to=errorCapturingIdentifier #renameTableColumn
+ | ALTER TABLE identifierReference
+ DROP (COLUMN | COLUMNS) (IF EXISTS)?
+ LEFT_PAREN columns=multipartIdentifierList RIGHT_PAREN #dropTableColumns
+ | ALTER TABLE identifierReference
+ DROP (COLUMN | COLUMNS) (IF EXISTS)?
+ columns=multipartIdentifierList #dropTableColumns
+ | ALTER (TABLE | VIEW) from=identifierReference
+ RENAME TO to=multipartIdentifier #renameTable
+ | ALTER (TABLE | VIEW) identifierReference
+ SET TBLPROPERTIES propertyList #setTableProperties
+ | ALTER (TABLE | VIEW) identifierReference
+ UNSET TBLPROPERTIES (IF EXISTS)? propertyList #unsetTableProperties
+ | ALTER TABLE table=identifierReference
+ (ALTER | CHANGE) COLUMN? column=multipartIdentifier
+ alterColumnAction? #alterTableAlterColumn
+ | ALTER TABLE table=identifierReference partitionSpec?
+ CHANGE COLUMN?
+ colName=multipartIdentifier colType colPosition? #hiveChangeColumn
+ | ALTER TABLE table=identifierReference partitionSpec?
+ REPLACE COLUMNS
+ LEFT_PAREN columns=qualifiedColTypeWithPositionList
+ RIGHT_PAREN #hiveReplaceColumns
+ | ALTER TABLE identifierReference (partitionSpec)?
+ SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)? #setTableSerDe
+ | ALTER TABLE identifierReference (partitionSpec)?
+ SET SERDEPROPERTIES propertyList #setTableSerDe
+ | ALTER (TABLE | VIEW) identifierReference ADD (IF NOT EXISTS)?
+ partitionSpecLocation+ #addTablePartition
+ | ALTER TABLE identifierReference
+ from=partitionSpec RENAME TO to=partitionSpec #renameTablePartition
+ | ALTER (TABLE | VIEW) identifierReference
+ DROP (IF EXISTS)? partitionSpec (COMMA partitionSpec)* PURGE? #dropTablePartitions
+ | ALTER TABLE identifierReference
+ (partitionSpec)? SET locationSpec #setTableLocation
+ | ALTER TABLE identifierReference RECOVER PARTITIONS #recoverPartitions
+ | DROP TABLE (IF EXISTS)? identifierReference PURGE? #dropTable
+ | DROP VIEW (IF EXISTS)? identifierReference #dropView
+ | CREATE (OR REPLACE)? (GLOBAL? TEMPORARY)?
+ VIEW (IF NOT EXISTS)? identifierReference
+ identifierCommentList?
+ (commentSpec |
+ (PARTITIONED ON identifierList) |
+ (TBLPROPERTIES propertyList))*
+ AS query #createView
+ | CREATE (OR REPLACE)? GLOBAL? TEMPORARY VIEW
+ tableIdentifier (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider
+ (OPTIONS propertyList)? #createTempViewUsing
+ | ALTER VIEW identifierReference AS? query #alterViewQuery
+ | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
+ identifierReference AS className=stringLit
+ (USING resource (COMMA resource)*)? #createFunction
+ | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference #dropFunction
+ | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
+ statement #explain
+ | SHOW TABLES ((FROM | IN) identifierReference)?
+ (LIKE? pattern=stringLit)? #showTables
+ | SHOW TABLE EXTENDED ((FROM | IN) ns=identifierReference)?
+ LIKE pattern=stringLit partitionSpec? #showTableExtended
+ | SHOW TBLPROPERTIES table=identifierReference
+ (LEFT_PAREN key=propertyKey RIGHT_PAREN)? #showTblProperties
+ | SHOW COLUMNS (FROM | IN) table=identifierReference
+ ((FROM | IN) ns=multipartIdentifier)? #showColumns
+ | SHOW VIEWS ((FROM | IN) identifierReference)?
+ (LIKE? pattern=stringLit)? #showViews
+ | SHOW PARTITIONS identifierReference partitionSpec? #showPartitions
+ | SHOW identifier? FUNCTIONS ((FROM | IN) ns=identifierReference)?
+ (LIKE? (legacy=multipartIdentifier | pattern=stringLit))? #showFunctions
+ | SHOW CREATE TABLE identifierReference (AS SERDE)? #showCreateTable
+ | SHOW CURRENT namespace #showCurrentNamespace
+ | SHOW CATALOGS (LIKE? pattern=stringLit)? #showCatalogs
+ | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName #describeFunction
+ | (DESC | DESCRIBE) namespace EXTENDED?
+ identifierReference #describeNamespace
+ | (DESC | DESCRIBE) TABLE? option=(EXTENDED | FORMATTED)?
+ identifierReference partitionSpec? describeColName? #describeRelation
+ | (DESC | DESCRIBE) QUERY? query #describeQuery
+ | COMMENT ON namespace identifierReference IS
+ comment #commentNamespace
+ | COMMENT ON TABLE identifierReference IS comment #commentTable
+ | REFRESH TABLE identifierReference #refreshTable
+ | REFRESH FUNCTION identifierReference #refreshFunction
+ | REFRESH (stringLit | .*?) #refreshResource
+ | CACHE LAZY? TABLE identifierReference
+ (OPTIONS options=propertyList)? (AS? query)? #cacheTable
+ | UNCACHE TABLE (IF EXISTS)? identifierReference #uncacheTable
+ | CLEAR CACHE #clearCache
+ | LOAD DATA LOCAL? INPATH path=stringLit OVERWRITE? INTO TABLE
+ identifierReference partitionSpec? #loadData
+ | TRUNCATE TABLE identifierReference partitionSpec? #truncateTable
+ | (MSCK)? REPAIR TABLE identifierReference
+ (option=(ADD|DROP|SYNC) PARTITIONS)? #repairTable
+ | op=(ADD | LIST) identifier .*? #manageResource
+ | SET ROLE .*? #failNativeCommand
+ | SET TIME ZONE interval #setTimeZone
+ | SET TIME ZONE timezone #setTimeZone
+ | SET TIME ZONE .*? #setTimeZone
+ | SET configKey EQ configValue #setQuotedConfiguration
+ | SET configKey (EQ .*?)? #setConfiguration
+ | SET .*? EQ configValue #setQuotedConfiguration
+ | SET .*? #setConfiguration
+ | RESET configKey #resetQuotedConfiguration
+ | RESET .*? #resetConfiguration
+ | CREATE INDEX (IF NOT EXISTS)? identifier ON TABLE?
+ identifierReference (USING indexType=identifier)?
+ LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
+ (OPTIONS options=propertyList)? #createIndex
+ | DROP INDEX (IF EXISTS)? identifier ON TABLE? identifierReference #dropIndex
+ | unsupportedHiveNativeCommands .*? #failNativeCommand
+ ;
+
+timezone
+ : stringLit
+ | LOCAL
+ ;
+
+configKey
+ : quotedIdentifier
+ ;
+
+configValue
+ : backQuotedIdentifier
+ ;
+
+unsupportedHiveNativeCommands
+ : kw1=CREATE kw2=ROLE
+ | kw1=DROP kw2=ROLE
+ | kw1=GRANT kw2=ROLE?
+ | kw1=REVOKE kw2=ROLE?
+ | kw1=SHOW kw2=GRANT
+ | kw1=SHOW kw2=ROLE kw3=GRANT?
+ | kw1=SHOW kw2=PRINCIPALS
+ | kw1=SHOW kw2=ROLES
+ | kw1=SHOW kw2=CURRENT kw3=ROLES
+ | kw1=EXPORT kw2=TABLE
+ | kw1=IMPORT kw2=TABLE
+ | kw1=SHOW kw2=COMPACTIONS
+ | kw1=SHOW kw2=CREATE kw3=TABLE
+ | kw1=SHOW kw2=TRANSACTIONS
+ | kw1=SHOW kw2=INDEXES
+ | kw1=SHOW kw2=LOCKS
+ | kw1=CREATE kw2=INDEX
+ | kw1=DROP kw2=INDEX
+ | kw1=ALTER kw2=INDEX
+ | kw1=LOCK kw2=TABLE
+ | kw1=LOCK kw2=DATABASE
+ | kw1=UNLOCK kw2=TABLE
+ | kw1=UNLOCK kw2=DATABASE
+ | kw1=CREATE kw2=TEMPORARY kw3=MACRO
+ | kw1=DROP kw2=TEMPORARY kw3=MACRO
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=CLUSTERED
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=CLUSTERED kw4=BY
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SORTED
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=SKEWED kw4=BY
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=SKEWED
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=NOT kw4=STORED kw5=AS kw6=DIRECTORIES
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=SET kw4=SKEWED kw5=LOCATION
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=EXCHANGE kw4=PARTITION
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=ARCHIVE kw4=PARTITION
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=UNARCHIVE kw4=PARTITION
+ | kw1=ALTER kw2=TABLE tableIdentifier kw3=TOUCH
+ | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=COMPACT
+ | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=CONCATENATE
+ | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=SET kw4=FILEFORMAT
+ | kw1=ALTER kw2=TABLE tableIdentifier partitionSpec? kw3=REPLACE kw4=COLUMNS
+ | kw1=START kw2=TRANSACTION
+ | kw1=COMMIT
+ | kw1=ROLLBACK
+ | kw1=DFS
+ ;
+
+createTableHeader
+ : CREATE TEMPORARY? EXTERNAL? TABLE (IF NOT EXISTS)? identifierReference
+ ;
+
+replaceTableHeader
+ : (CREATE OR)? REPLACE TABLE identifierReference
+ ;
+
+bucketSpec
+ : CLUSTERED BY identifierList
+ (SORTED BY orderedIdentifierList)?
+ INTO INTEGER_VALUE BUCKETS
+ ;
+
+skewSpec
+ : SKEWED BY identifierList
+ ON (constantList | nestedConstantList)
+ (STORED AS DIRECTORIES)?
+ ;
+
+locationSpec
+ : LOCATION stringLit
+ ;
+
+commentSpec
+ : COMMENT stringLit
+ ;
+
+query
+ : ctes? queryTerm queryOrganization
+ ;
+
+insertInto
+ : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF NOT EXISTS)?)? ((BY NAME) | identifierList)? #insertOverwriteTable
+ | INSERT INTO TABLE? identifierReference partitionSpec? (IF NOT EXISTS)? ((BY NAME) | identifierList)? #insertIntoTable
+ | INSERT INTO TABLE? identifierReference REPLACE whereClause #insertIntoReplaceWhere
+ | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat? #insertOverwriteHiveDir
+ | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir
+ ;
+
+partitionSpecLocation
+ : partitionSpec locationSpec?
+ ;
+
+partitionSpec
+ : PARTITION LEFT_PAREN partitionVal (COMMA partitionVal)* RIGHT_PAREN
+ ;
+
+partitionVal
+ : identifier (EQ constant)?
+ | identifier EQ DEFAULT
+ ;
+
+namespace
+ : NAMESPACE
+ | DATABASE
+ | SCHEMA
+ ;
+
+namespaces
+ : NAMESPACES
+ | DATABASES
+ | SCHEMAS
+ ;
+
+describeFuncName
+ : identifierReference
+ | stringLit
+ | comparisonOperator
+ | arithmeticOperator
+ | predicateOperator
+ ;
+
+describeColName
+ : nameParts+=identifier (DOT nameParts+=identifier)*
+ ;
+
+ctes
+ : WITH namedQuery (COMMA namedQuery)*
+ ;
+
+namedQuery
+ : name=errorCapturingIdentifier (columnAliases=identifierList)? AS? LEFT_PAREN query RIGHT_PAREN
+ ;
+
+tableProvider
+ : USING multipartIdentifier
+ ;
+
+createTableClauses
+ :((OPTIONS options=expressionPropertyList) |
+ (PARTITIONED BY partitioning=partitionFieldList) |
+ skewSpec |
+ bucketSpec |
+ rowFormat |
+ createFileFormat |
+ locationSpec |
+ commentSpec |
+ (TBLPROPERTIES tableProps=propertyList))*
+ ;
+
+propertyList
+ : LEFT_PAREN property (COMMA property)* RIGHT_PAREN
+ ;
+
+property
+ : key=propertyKey (EQ? value=propertyValue)?
+ ;
+
+propertyKey
+ : identifier (DOT identifier)*
+ | stringLit
+ ;
+
+propertyValue
+ : INTEGER_VALUE
+ | DECIMAL_VALUE
+ | booleanValue
+ | stringLit
+ ;
+
+expressionPropertyList
+ : LEFT_PAREN expressionProperty (COMMA expressionProperty)* RIGHT_PAREN
+ ;
+
+expressionProperty
+ : key=propertyKey (EQ? value=expression)?
+ ;
+
+constantList
+ : LEFT_PAREN constant (COMMA constant)* RIGHT_PAREN
+ ;
+
+nestedConstantList
+ : LEFT_PAREN constantList (COMMA constantList)* RIGHT_PAREN
+ ;
+
+createFileFormat
+ : STORED AS fileFormat
+ | STORED BY storageHandler
+ ;
+
+fileFormat
+ : INPUTFORMAT inFmt=stringLit OUTPUTFORMAT outFmt=stringLit #tableFileFormat
+ | identifier #genericFileFormat
+ ;
+
+storageHandler
+ : stringLit (WITH SERDEPROPERTIES propertyList)?
+ ;
+
+resource
+ : identifier stringLit
+ ;
+
+dmlStatementNoWith
+ : insertInto query #singleInsertQuery
+ | fromClause multiInsertQueryBody+ #multiInsertQuery
+ | DELETE FROM identifierReference tableAlias whereClause? #deleteFromTable
+ | UPDATE identifierReference tableAlias setClause whereClause? #updateTable
+ | MERGE INTO target=identifierReference targetAlias=tableAlias
+ USING (source=identifierReference |
+ LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias
+ ON mergeCondition=booleanExpression
+ matchedClause*
+ notMatchedClause*
+ notMatchedBySourceClause* #mergeIntoTable
+ ;
+
+identifierReference
+ : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
+ | multipartIdentifier
+ ;
+
+queryOrganization
+ : (ORDER BY order+=sortItem (COMMA order+=sortItem)*)?
+ (CLUSTER BY clusterBy+=expression (COMMA clusterBy+=expression)*)?
+ (DISTRIBUTE BY distributeBy+=expression (COMMA distributeBy+=expression)*)?
+ (SORT BY sort+=sortItem (COMMA sort+=sortItem)*)?
+ windowClause?
+ (LIMIT (ALL | limit=expression))?
+ (OFFSET offset=expression)?
+ ;
+
+multiInsertQueryBody
+ : insertInto fromStatementBody
+ ;
+
+queryTerm
+ : queryPrimary #queryTermDefault
+ | left=queryTerm {legacy_setops_precedence_enabled}?
+ operator=(INTERSECT | UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
+ | left=queryTerm {!legacy_setops_precedence_enabled}?
+ operator=INTERSECT setQuantifier? right=queryTerm #setOperation
+ | left=queryTerm {!legacy_setops_precedence_enabled}?
+ operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm #setOperation
+ ;
+
+queryPrimary
+ : querySpecification #queryPrimaryDefault
+ | fromStatement #fromStmt
+ | TABLE identifierReference #table
+ | inlineTable #inlineTableDefault1
+ | LEFT_PAREN query RIGHT_PAREN #subquery
+ ;
+
+sortItem
+ : expression ordering=(ASC | DESC)? (NULLS nullOrder=(LAST | FIRST))?
+ ;
+
+fromStatement
+ : fromClause fromStatementBody+
+ ;
+
+fromStatementBody
+ : transformClause
+ whereClause?
+ queryOrganization
+ | selectClause
+ lateralView*
+ whereClause?
+ aggregationClause?
+ havingClause?
+ windowClause?
+ queryOrganization
+ ;
+
+querySpecification
+ : transformClause
+ fromClause?
+ lateralView*
+ whereClause?
+ aggregationClause?
+ havingClause?
+ windowClause? #transformQuerySpecification
+ | selectClause
+ fromClause?
+ lateralView*
+ whereClause?
+ aggregationClause?
+ havingClause?
+ windowClause? #regularQuerySpecification
+ ;
+
+transformClause
+ : (SELECT kind=TRANSFORM LEFT_PAREN setQuantifier? expressionSeq RIGHT_PAREN
+ | kind=MAP setQuantifier? expressionSeq
+ | kind=REDUCE setQuantifier? expressionSeq)
+ inRowFormat=rowFormat?
+ (RECORDWRITER recordWriter=stringLit)?
+ USING script=stringLit
+ (AS (identifierSeq | colTypeList | (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)))?
+ outRowFormat=rowFormat?
+ (RECORDREADER recordReader=stringLit)?
+ ;
+
+selectClause
+ : SELECT (hints+=hint)* setQuantifier? namedExpressionSeq
+ ;
+
+setClause
+ : SET assignmentList
+ ;
+
+matchedClause
+ : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction
+ ;
+notMatchedClause
+ : WHEN NOT MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
+ ;
+
+notMatchedBySourceClause
+ : WHEN NOT MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction
+ ;
+
+matchedAction
+ : DELETE
+ | UPDATE SET ASTERISK
+ | UPDATE SET assignmentList
+ ;
+
+notMatchedAction
+ : INSERT ASTERISK
+ | INSERT LEFT_PAREN columns=multipartIdentifierList RIGHT_PAREN
+ VALUES LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
+ ;
+
+notMatchedBySourceAction
+ : DELETE
+ | UPDATE SET assignmentList
+ ;
+
+assignmentList
+ : assignment (COMMA assignment)*
+ ;
+
+assignment
+ : key=multipartIdentifier EQ value=expression
+ ;
+
+whereClause
+ : WHERE booleanExpression
+ ;
+
+havingClause
+ : HAVING booleanExpression
+ ;
+
+hint
+ : HENT_START hintStatements+=hintStatement (COMMA? hintStatements+=hintStatement)* HENT_END
+ ;
+
+hintStatement
+ : hintName=identifier
+ | hintName=identifier LEFT_PAREN parameters+=primaryExpression (COMMA parameters+=primaryExpression)* RIGHT_PAREN
+ ;
+
+fromClause
+ : FROM relation (COMMA relation)* lateralView* pivotClause? unpivotClause?
+ ;
+
+temporalClause
+ : FOR? (SYSTEM_VERSION | VERSION) AS OF version
+ | FOR? (SYSTEM_TIME | TIMESTAMP) AS OF timestamp=valueExpression
+ ;
+
+aggregationClause
+ : GROUP BY groupingExpressionsWithGroupingAnalytics+=groupByClause
+ (COMMA groupingExpressionsWithGroupingAnalytics+=groupByClause)*
+ | GROUP BY groupingExpressions+=expression (COMMA groupingExpressions+=expression)* (
+ WITH kind=ROLLUP
+ | WITH kind=CUBE
+ | kind=GROUPING SETS LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN)?
+ ;
+
+groupByClause
+ : groupingAnalytics
+ | expression
+ ;
+
+groupingAnalytics
+ : (ROLLUP | CUBE) LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN
+ | GROUPING SETS LEFT_PAREN groupingElement (COMMA groupingElement)* RIGHT_PAREN
+ ;
+
+groupingElement
+ : groupingAnalytics
+ | groupingSet
+ ;
+
+groupingSet
+ : LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN
+ | expression
+ ;
+
+pivotClause
+ : PIVOT LEFT_PAREN aggregates=namedExpressionSeq FOR pivotColumn IN LEFT_PAREN pivotValues+=pivotValue (COMMA pivotValues+=pivotValue)* RIGHT_PAREN RIGHT_PAREN
+ ;
+
+pivotColumn
+ : identifiers+=identifier
+ | LEFT_PAREN identifiers+=identifier (COMMA identifiers+=identifier)* RIGHT_PAREN
+ ;
+
+pivotValue
+ : expression (AS? identifier)?
+ ;
+
+unpivotClause
+ : UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN
+ operator=unpivotOperator
+ RIGHT_PAREN (AS? identifier)?
+ ;
+
+unpivotNullClause
+ : (INCLUDE | EXCLUDE) NULLS
+ ;
+
+unpivotOperator
+ : (unpivotSingleValueColumnClause | unpivotMultiValueColumnClause)
+ ;
+
+unpivotSingleValueColumnClause
+ : unpivotValueColumn FOR unpivotNameColumn IN LEFT_PAREN unpivotColumns+=unpivotColumnAndAlias (COMMA unpivotColumns+=unpivotColumnAndAlias)* RIGHT_PAREN
+ ;
+
+unpivotMultiValueColumnClause
+ : LEFT_PAREN unpivotValueColumns+=unpivotValueColumn (COMMA unpivotValueColumns+=unpivotValueColumn)* RIGHT_PAREN
+ FOR unpivotNameColumn
+ IN LEFT_PAREN unpivotColumnSets+=unpivotColumnSet (COMMA unpivotColumnSets+=unpivotColumnSet)* RIGHT_PAREN
+ ;
+
+unpivotColumnSet
+ : LEFT_PAREN unpivotColumns+=unpivotColumn (COMMA unpivotColumns+=unpivotColumn)* RIGHT_PAREN unpivotAlias?
+ ;
+
+unpivotValueColumn
+ : identifier
+ ;
+
+unpivotNameColumn
+ : identifier
+ ;
+
+unpivotColumnAndAlias
+ : unpivotColumn unpivotAlias?
+ ;
+
+unpivotColumn
+ : multipartIdentifier
+ ;
+
+unpivotAlias
+ : AS? identifier
+ ;
+
+lateralView
+ : LATERAL VIEW (OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (AS? colName+=identifier (COMMA colName+=identifier)*)?
+ ;
+
+setQuantifier
+ : DISTINCT
+ | ALL
+ ;
+
+relation
+ : LATERAL? relationPrimary relationExtension*
+ ;
+
+relationExtension
+ : joinRelation
+ | pivotClause
+ | unpivotClause
+ ;
+
+joinRelation
+ : (joinType) JOIN LATERAL? right=relationPrimary joinCriteria?
+ | NATURAL joinType JOIN LATERAL? right=relationPrimary
+ ;
+
+joinType
+ : INNER?
+ | CROSS
+ | LEFT OUTER?
+ | LEFT? SEMI
+ | RIGHT OUTER?
+ | FULL OUTER?
+ | LEFT? ANTI
+ ;
+
+joinCriteria
+ : ON booleanExpression
+ | USING identifierList
+ ;
+
+sample
+ : TABLESAMPLE LEFT_PAREN sampleMethod? RIGHT_PAREN (REPEATABLE LEFT_PAREN seed=INTEGER_VALUE RIGHT_PAREN)?
+ ;
+
+sampleMethod
+ : negativeSign=MINUS? percentage=(INTEGER_VALUE | DECIMAL_VALUE) PERCENTLIT #sampleByPercentile
+ | expression ROWS #sampleByRows
+ | sampleType=BUCKET numerator=INTEGER_VALUE OUT OF denominator=INTEGER_VALUE
+ (ON (identifier | qualifiedName LEFT_PAREN RIGHT_PAREN))? #sampleByBucket
+ | bytes=expression #sampleByBytes
+ ;
+
+identifierList
+ : LEFT_PAREN identifierSeq RIGHT_PAREN
+ ;
+
+identifierSeq
+ : ident+=errorCapturingIdentifier (COMMA ident+=errorCapturingIdentifier)*
+ ;
+
+orderedIdentifierList
+ : LEFT_PAREN orderedIdentifier (COMMA orderedIdentifier)* RIGHT_PAREN
+ ;
+
+orderedIdentifier
+ : ident=errorCapturingIdentifier ordering=(ASC | DESC)?
+ ;
+
+identifierCommentList
+ : LEFT_PAREN identifierComment (COMMA identifierComment)* RIGHT_PAREN
+ ;
+
+identifierComment
+ : identifier commentSpec?
+ ;
+
+relationPrimary
+ : identifierReference temporalClause?
+ sample? tableAlias #tableName
+ | LEFT_PAREN query RIGHT_PAREN sample? tableAlias #aliasedQuery
+ | LEFT_PAREN relation RIGHT_PAREN sample? tableAlias #aliasedRelation
+ | inlineTable #inlineTableDefault2
+ | functionTable #tableValuedFunction
+ ;
+
+inlineTable
+ : VALUES expression (COMMA expression)* tableAlias
+ ;
+
+functionTableSubqueryArgument
+ : TABLE identifierReference
+ | TABLE LEFT_PAREN identifierReference RIGHT_PAREN
+ | TABLE LEFT_PAREN query RIGHT_PAREN
+ ;
+
+functionTableNamedArgumentExpression
+ : key=identifier FAT_ARROW table=functionTableSubqueryArgument
+ ;
+
+functionTableReferenceArgument
+ : functionTableSubqueryArgument
+ | functionTableNamedArgumentExpression
+ ;
+
+functionTableArgument
+ : functionTableReferenceArgument
+ | functionArgument
+ ;
+
+functionTable
+ : funcName=functionName LEFT_PAREN
+ (functionTableArgument (COMMA functionTableArgument)*)?
+ RIGHT_PAREN tableAlias
+ ;
+
+tableAlias
+ : (AS? strictIdentifier identifierList?)?
+ ;
+
+rowFormat
+ : ROW FORMAT SERDE name=stringLit (WITH SERDEPROPERTIES props=propertyList)? #rowFormatSerde
+ | ROW FORMAT DELIMITED
+ (FIELDS TERMINATED BY fieldsTerminatedBy=stringLit (ESCAPED BY escapedBy=stringLit)?)?
+ (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=stringLit)?
+ (MAP KEYS TERMINATED BY keysTerminatedBy=stringLit)?
+ (LINES TERMINATED BY linesSeparatedBy=stringLit)?
+ (NULL DEFINED AS nullDefinedAs=stringLit)? #rowFormatDelimited
+ ;
+
+multipartIdentifierList
+ : multipartIdentifier (COMMA multipartIdentifier)*
+ ;
+
+multipartIdentifier
+ : parts+=errorCapturingIdentifier (DOT parts+=errorCapturingIdentifier)*
+ ;
+
+multipartIdentifierPropertyList
+ : multipartIdentifierProperty (COMMA multipartIdentifierProperty)*
+ ;
+
+multipartIdentifierProperty
+ : multipartIdentifier (OPTIONS options=propertyList)?
+ ;
+
+tableIdentifier
+ : (db=errorCapturingIdentifier DOT)? table=errorCapturingIdentifier
+ ;
+
+functionIdentifier
+ : (db=errorCapturingIdentifier DOT)? function=errorCapturingIdentifier
+ ;
+
+namedExpression
+ : expression (AS? (name=errorCapturingIdentifier | identifierList))?
+ ;
+
+namedExpressionSeq
+ : namedExpression (COMMA namedExpression)*
+ ;
+
+partitionFieldList
+ : LEFT_PAREN fields+=partitionField (COMMA fields+=partitionField)* RIGHT_PAREN
+ ;
+
+partitionField
+ : transform #partitionTransform
+ | colType #partitionColumn
+ ;
+
+transform
+ : qualifiedName #identityTransform
+ | transformName=identifier
+ LEFT_PAREN argument+=transformArgument (COMMA argument+=transformArgument)* RIGHT_PAREN #applyTransform
+ ;
+
+transformArgument
+ : qualifiedName
+ | constant
+ ;
+
+expression
+ : booleanExpression
+ ;
+
+namedArgumentExpression
+ : key=identifier FAT_ARROW value=expression
+ ;
+
+functionArgument
+ : expression
+ | namedArgumentExpression
+ ;
+
+expressionSeq
+ : expression (COMMA expression)*
+ ;
+
+booleanExpression
+ : NOT booleanExpression #logicalNot
+ | EXISTS LEFT_PAREN query RIGHT_PAREN #exists
+ | valueExpression predicate? #predicated
+ | left=booleanExpression operator=AND right=booleanExpression #logicalBinary
+ | left=booleanExpression operator=OR right=booleanExpression #logicalBinary
+ ;
+
+predicate
+ : NOT? kind=BETWEEN lower=valueExpression AND upper=valueExpression
+ | NOT? kind=IN LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
+ | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
+ | NOT? kind=RLIKE pattern=valueExpression
+ | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
+ | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)?
+ | IS NOT? kind=NULL
+ | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
+ | IS NOT? kind=DISTINCT FROM right=valueExpression
+ ;
+
+valueExpression
+ : primaryExpression #valueExpressionDefault
+ | operator=(MINUS | PLUS | TILDE) valueExpression #arithmeticUnary
+ | left=valueExpression operator=(ASTERISK | SLASH | PERCENT | DIV) right=valueExpression #arithmeticBinary
+ | left=valueExpression operator=(PLUS | MINUS | CONCAT_PIPE) right=valueExpression #arithmeticBinary
+ | left=valueExpression operator=AMPERSAND right=valueExpression #arithmeticBinary
+ | left=valueExpression operator=HAT right=valueExpression #arithmeticBinary
+ | left=valueExpression operator=PIPE right=valueExpression #arithmeticBinary
+ | left=valueExpression comparisonOperator right=valueExpression #comparison
+ ;
+
+datetimeUnit
+ : YEAR | QUARTER | MONTH
+ | WEEK | DAY | DAYOFYEAR
+ | HOUR | MINUTE | SECOND | MILLISECOND | MICROSECOND
+ ;
+
+primaryExpression
+ : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER) #currentLike
+ | name=(TIMESTAMPADD | DATEADD | DATE_ADD) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN #timestampadd
+ | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) LEFT_PAREN (unit=datetimeUnit | invalidUnit=stringLit) COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN #timestampdiff
+ | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase
+ | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase
+ | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN #cast
+ | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct
+ | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #first
+ | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #any_value
+ | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN #last
+ | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN #position
+ | constant #constantDefault
+ | ASTERISK #star
+ | qualifiedName DOT ASTERISK #star
+ | LEFT_PAREN namedExpression (COMMA namedExpression)+ RIGHT_PAREN #rowConstructor
+ | LEFT_PAREN query RIGHT_PAREN #subqueryExpression
+ | functionName LEFT_PAREN (setQuantifier? argument+=functionArgument
+ (COMMA argument+=functionArgument)*)? RIGHT_PAREN
+ (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)?
+ (nullsOption=(IGNORE | RESPECT) NULLS)? ( OVER windowSpec)? #functionCall
+ | identifier ARROW expression #lambda
+ | LEFT_PAREN identifier (COMMA identifier)+ RIGHT_PAREN ARROW expression #lambda
+ | value=primaryExpression LEFT_BRACKET index=valueExpression RIGHT_BRACKET #subscript
+ | identifier #columnReference
+ | base=primaryExpression DOT fieldName=identifier #dereference
+ | LEFT_PAREN expression RIGHT_PAREN #parenthesizedExpression
+ | EXTRACT LEFT_PAREN field=identifier FROM source=valueExpression RIGHT_PAREN #extract
+ | (SUBSTR | SUBSTRING) LEFT_PAREN str=valueExpression (FROM | COMMA) pos=valueExpression
+ ((FOR | COMMA) len=valueExpression)? RIGHT_PAREN #substring
+ | TRIM LEFT_PAREN trimOption=(BOTH | LEADING | TRAILING)? (trimStr=valueExpression)?
+ FROM srcStr=valueExpression RIGHT_PAREN #trim
+ | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression
+ FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN #overlay
+ | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN
+ WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN
+ (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)? #percentile
+ ;
+
+literalType
+ : DATE
+ | TIMESTAMP | TIMESTAMP_LTZ | TIMESTAMP_NTZ
+ | INTERVAL
+ | BINARY_HEX
+ | unsupportedType=identifier
+ ;
+
+constant
+ : NULL #nullLiteral
+ | QUESTION #posParameterLiteral
+ | COLON identifier #namedParameterLiteral
+ | interval #intervalLiteral
+ | literalType stringLit #typeConstructor
+ | number #numericLiteral
+ | booleanValue #booleanLiteral
+ | stringLit+ #stringLiteral
+ ;
+
+comparisonOperator
+ : EQ | NEQ | NEQJ | LT | LTE | GT | GTE | NSEQ
+ ;
+
+arithmeticOperator
+ : PLUS | MINUS | ASTERISK | SLASH | PERCENT | DIV | TILDE | AMPERSAND | PIPE | CONCAT_PIPE | HAT
+ ;
+
+predicateOperator
+ : OR | AND | IN | NOT
+ ;
+
+booleanValue
+ : TRUE | FALSE
+ ;
+
+interval
+ : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
+ ;
+
+errorCapturingMultiUnitsInterval
+ : body=multiUnitsInterval unitToUnitInterval?
+ ;
+
+multiUnitsInterval
+ : (intervalValue unit+=unitInMultiUnits)+
+ ;
+
+errorCapturingUnitToUnitInterval
+ : body=unitToUnitInterval (error1=multiUnitsInterval | error2=unitToUnitInterval)?
+ ;
+
+unitToUnitInterval
+ : value=intervalValue from=unitInUnitToUnit TO to=unitInUnitToUnit
+ ;
+
+intervalValue
+ : (PLUS | MINUS)?
+ (INTEGER_VALUE | DECIMAL_VALUE | stringLit)
+ ;
+
+unitInMultiUnits
+ : NANOSECOND | NANOSECONDS | MICROSECOND | MICROSECONDS | MILLISECOND | MILLISECONDS
+ | SECOND | SECONDS | MINUTE | MINUTES | HOUR | HOURS | DAY | DAYS | WEEK | WEEKS
+ | MONTH | MONTHS | YEAR | YEARS
+ ;
+
+unitInUnitToUnit
+ : SECOND | MINUTE | HOUR | DAY | MONTH | YEAR
+ ;
+
+colPosition
+ : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
+ ;
+
+type
+ : BOOLEAN
+ | TINYINT | BYTE
+ | SMALLINT | SHORT
+ | INT | INTEGER
+ | BIGINT | LONG
+ | FLOAT | REAL
+ | DOUBLE
+ | DATE
+ | TIMESTAMP | TIMESTAMP_NTZ | TIMESTAMP_LTZ
+ | STRING
+ | CHARACTER | CHAR
+ | VARCHAR
+ | BINARY
+ | DECIMAL | DEC | NUMERIC
+ | VOID
+ | INTERVAL
+ | ARRAY | STRUCT | MAP
+ | unsupportedType=identifier
+ ;
+
+dataType
+ : complex=ARRAY LT dataType GT #complexDataType
+ | complex=MAP LT dataType COMMA dataType GT #complexDataType
+ | complex=STRUCT (LT complexColTypeList? GT | NEQ) #complexDataType
+ | INTERVAL from=(YEAR | MONTH) (TO to=MONTH)? #yearMonthIntervalDataType
+ | INTERVAL from=(DAY | HOUR | MINUTE | SECOND)
+ (TO to=(HOUR | MINUTE | SECOND))? #dayTimeIntervalDataType
+ | type (LEFT_PAREN INTEGER_VALUE
+ (COMMA INTEGER_VALUE)* RIGHT_PAREN)? #primitiveDataType
+ ;
+
+qualifiedColTypeWithPositionList
+ : qualifiedColTypeWithPosition (COMMA qualifiedColTypeWithPosition)*
+ ;
+
+qualifiedColTypeWithPosition
+ : name=multipartIdentifier dataType colDefinitionDescriptorWithPosition*
+ ;
+
+colDefinitionDescriptorWithPosition
+ : NOT NULL
+ | defaultExpression
+ | commentSpec
+ | colPosition
+ ;
+
+defaultExpression
+ : DEFAULT expression
+ ;
+
+colTypeList
+ : colType (COMMA colType)*
+ ;
+
+colType
+ : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
+ ;
+
+createOrReplaceTableColTypeList
+ : createOrReplaceTableColType (COMMA createOrReplaceTableColType)*
+ ;
+
+createOrReplaceTableColType
+ : colName=errorCapturingIdentifier dataType colDefinitionOption*
+ ;
+
+colDefinitionOption
+ : NOT NULL
+ | defaultExpression
+ | generationExpression
+ | commentSpec
+ ;
+
+generationExpression
+ : GENERATED ALWAYS AS LEFT_PAREN expression RIGHT_PAREN
+ ;
+
+complexColTypeList
+ : complexColType (COMMA complexColType)*
+ ;
+
+complexColType
+ : identifier COLON? dataType (NOT NULL)? commentSpec?
+ ;
+
+whenClause
+ : WHEN condition=expression THEN result=expression
+ ;
+
+windowClause
+ : WINDOW namedWindow (COMMA namedWindow)*
+ ;
+
+namedWindow
+ : name=errorCapturingIdentifier AS windowSpec
+ ;
+
+windowSpec
+ : name=errorCapturingIdentifier #windowRef
+ | LEFT_PAREN name=errorCapturingIdentifier RIGHT_PAREN #windowRef
+ | LEFT_PAREN
+ ( CLUSTER BY partition+=expression (COMMA partition+=expression)*
+ | ((PARTITION | DISTRIBUTE) BY partition+=expression (COMMA partition+=expression)*)?
+ ((ORDER | SORT) BY sortItem (COMMA sortItem)*)?)
+ windowFrame?
+ RIGHT_PAREN #windowDef
+ ;
+
+windowFrame
+ : frameType=RANGE start=frameBound
+ | frameType=ROWS start=frameBound
+ | frameType=RANGE BETWEEN start=frameBound AND end=frameBound
+ | frameType=ROWS BETWEEN start=frameBound AND end=frameBound
+ ;
+
+frameBound
+ : UNBOUNDED boundType=(PRECEDING | FOLLOWING)
+ | boundType=CURRENT ROW
+ | expression boundType=(PRECEDING | FOLLOWING)
+ ;
+
+qualifiedNameList
+ : qualifiedName (COMMA qualifiedName)*
+ ;
+
+functionName
+ : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
+ | identFunc=IDENTIFIER_KW // IDENTIFIER itself is also a valid function name.
+ | qualifiedName
+ | FILTER
+ | LEFT
+ | RIGHT
+ ;
+
+qualifiedName
+ : identifier (DOT identifier)*
+ ;
+
+// this rule is used for explicitly capturing wrong identifiers such as test-table, which should actually be `test-table`
+// replace identifier with errorCapturingIdentifier where the immediate follow symbol is not an expression, otherwise
+// valid expressions such as "a-b" can be recognized as an identifier
+errorCapturingIdentifier
+ : identifier errorCapturingIdentifierExtra
+ ;
+
+// extra left-factoring grammar
+errorCapturingIdentifierExtra
+ : (MINUS identifier)+ #errorIdent
+ | #realIdent
+ ;
+
+identifier
+ : strictIdentifier
+ | {!SQL_standard_keyword_behavior}? strictNonReserved
+ ;
+
+strictIdentifier
+ : IDENTIFIER #unquotedIdentifier
+ | quotedIdentifier #quotedIdentifierAlternative
+ | {SQL_standard_keyword_behavior}? ansiNonReserved #unquotedIdentifier
+ | {!SQL_standard_keyword_behavior}? nonReserved #unquotedIdentifier
+ ;
+
+quotedIdentifier
+ : BACKQUOTED_IDENTIFIER
+ | {double_quoted_identifiers}? DOUBLEQUOTED_STRING
+ ;
+
+backQuotedIdentifier
+ : BACKQUOTED_IDENTIFIER
+ ;
+
+number
+ : {!legacy_exponent_literal_as_decimal_enabled}? MINUS? EXPONENT_VALUE #exponentLiteral
+ | {!legacy_exponent_literal_as_decimal_enabled}? MINUS? DECIMAL_VALUE #decimalLiteral
+ | {legacy_exponent_literal_as_decimal_enabled}? MINUS? (EXPONENT_VALUE | DECIMAL_VALUE) #legacyDecimalLiteral
+ | MINUS? INTEGER_VALUE #integerLiteral
+ | MINUS? BIGINT_LITERAL #bigIntLiteral
+ | MINUS? SMALLINT_LITERAL #smallIntLiteral
+ | MINUS? TINYINT_LITERAL #tinyIntLiteral
+ | MINUS? DOUBLE_LITERAL #doubleLiteral
+ | MINUS? FLOAT_LITERAL #floatLiteral
+ | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral
+ ;
+
+alterColumnAction
+ : TYPE dataType
+ | commentSpec
+ | colPosition
+ | setOrDrop=(SET | DROP) NOT NULL
+ | SET defaultExpression
+ | dropDefault=DROP DEFAULT
+ ;
+
+stringLit
+ : STRING_LITERAL
+ | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING
+ ;
+
+comment
+ : stringLit
+ | NULL
+ ;
+
+version
+ : INTEGER_VALUE
+ | stringLit
+ ;
+
+// When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
+// - Reserved keywords:
+// Keywords that are reserved and can't be used as identifiers for table, view, column,
+// function, alias, etc.
+// - Non-reserved keywords:
+// Keywords that have a special meaning only in particular contexts and can be used as
+// identifiers in other contexts. For example, `EXPLAIN SELECT ...` is a command, but EXPLAIN
+// can be used as identifiers in other places.
+// You can find the full keywords list by searching "Start of the keywords list" in this file.
+// The non-reserved keywords are listed below. Keywords not in this list are reserved keywords.
+ansiNonReserved
+//--ANSI-NON-RESERVED-START
+ : ADD
+ | AFTER
+ | ALTER
+ | ALWAYS
+ | ANALYZE
+ | ANTI
+ | ANY_VALUE
+ | ARCHIVE
+ | ARRAY
+ | ASC
+ | AT
+ | BETWEEN
+ | BIGINT
+ | BINARY
+ | BINARY_HEX
+ | BOOLEAN
+ | BUCKET
+ | BUCKETS
+ | BY
+ | BYTE
+ | CACHE
+ | CASCADE
+ | CATALOG
+ | CATALOGS
+ | CHANGE
+ | CHAR
+ | CHARACTER
+ | CLEAR
+ | CLUSTER
+ | CLUSTERED
+ | CODEGEN
+ | COLLECTION
+ | COLUMNS
+ | COMMENT
+ | COMMIT
+ | COMPACT
+ | COMPACTIONS
+ | COMPUTE
+ | CONCATENATE
+ | COST
+ | CUBE
+ | CURRENT
+ | DATA
+ | DATABASE
+ | DATABASES
+ | DATE
+ | DATEADD
+ | DATE_ADD
+ | DATEDIFF
+ | DATE_DIFF
+ | DAY
+ | DAYS
+ | DAYOFYEAR
+ | DBPROPERTIES
+ | DEC
+ | DECIMAL
+ | DEFAULT
+ | DEFINED
+ | DELETE
+ | DELIMITED
+ | DESC
+ | DESCRIBE
+ | DFS
+ | DIRECTORIES
+ | DIRECTORY
+ | DISTRIBUTE
+ | DIV
+ | DOUBLE
+ | DROP
+ | ESCAPED
+ | EXCHANGE
+ | EXCLUDE
+ | EXISTS
+ | EXPLAIN
+ | EXPORT
+ | EXTENDED
+ | EXTERNAL
+ | EXTRACT
+ | FIELDS
+ | FILEFORMAT
+ | FIRST
+ | FLOAT
+ | FOLLOWING
+ | FORMAT
+ | FORMATTED
+ | FUNCTION
+ | FUNCTIONS
+ | GENERATED
+ | GLOBAL
+ | GROUPING
+ | HOUR
+ | HOURS
+ | IDENTIFIER_KW
+ | IF
+ | IGNORE
+ | IMPORT
+ | INCLUDE
+ | INDEX
+ | INDEXES
+ | INPATH
+ | INPUTFORMAT
+ | INSERT
+ | INT
+ | INTEGER
+ | INTERVAL
+ | ITEMS
+ | KEYS
+ | LAST
+ | LAZY
+ | LIKE
+ | ILIKE
+ | LIMIT
+ | LINES
+ | LIST
+ | LOAD
+ | LOCAL
+ | LOCATION
+ | LOCK
+ | LOCKS
+ | LOGICAL
+ | LONG
+ | MACRO
+ | MAP
+ | MATCHED
+ | MERGE
+ | MICROSECOND
+ | MICROSECONDS
+ | MILLISECOND
+ | MILLISECONDS
+ | MINUTE
+ | MINUTES
+ | MONTH
+ | MONTHS
+ | MSCK
+ | NAME
+ | NAMESPACE
+ | NAMESPACES
+ | NANOSECOND
+ | NANOSECONDS
+ | NO
+ | NULLS
+ | NUMERIC
+ | OF
+ | OPTION
+ | OPTIONS
+ | OUT
+ | OUTPUTFORMAT
+ | OVER
+ | OVERLAY
+ | OVERWRITE
+ | PARTITION
+ | PARTITIONED
+ | PARTITIONS
+ | PERCENTLIT
+ | PIVOT
+ | PLACING
+ | POSITION
+ | PRECEDING
+ | PRINCIPALS
+ | PROPERTIES
+ | PURGE
+ | QUARTER
+ | QUERY
+ | RANGE
+ | REAL
+ | RECORDREADER
+ | RECORDWRITER
+ | RECOVER
+ | REDUCE
+ | REFRESH
+ | RENAME
+ | REPAIR
+ | REPEATABLE
+ | REPLACE
+ | RESET
+ | RESPECT
+ | RESTRICT
+ | REVOKE
+ | RLIKE
+ | ROLE
+ | ROLES
+ | ROLLBACK
+ | ROLLUP
+ | ROW
+ | ROWS
+ | SCHEMA
+ | SCHEMAS
+ | SECOND
+ | SECONDS
+ | SEMI
+ | SEPARATED
+ | SERDE
+ | SERDEPROPERTIES
+ | SET
+ | SETMINUS
+ | SETS
+ | SHORT
+ | SHOW
+ | SKEWED
+ | SMALLINT
+ | SORT
+ | SORTED
+ | SOURCE
+ | START
+ | STATISTICS
+ | STORED
+ | STRATIFY
+ | STRING
+ | STRUCT
+ | SUBSTR
+ | SUBSTRING
+ | SYNC
+ | SYSTEM_TIME
+ | SYSTEM_VERSION
+ | TABLES
+ | TABLESAMPLE
+ | TARGET
+ | TBLPROPERTIES
+ | TEMPORARY
+ | TERMINATED
+ | TIMESTAMP
+ | TIMESTAMP_LTZ
+ | TIMESTAMP_NTZ
+ | TIMESTAMPADD
+ | TIMESTAMPDIFF
+ | TINYINT
+ | TOUCH
+ | TRANSACTION
+ | TRANSACTIONS
+ | TRANSFORM
+ | TRIM
+ | TRUE
+ | TRUNCATE
+ | TRY_CAST
+ | TYPE
+ | UNARCHIVE
+ | UNBOUNDED
+ | UNCACHE
+ | UNLOCK
+ | UNPIVOT
+ | UNSET
+ | UPDATE
+ | USE
+ | VALUES
+ | VARCHAR
+ | VERSION
+ | VIEW
+ | VIEWS
+ | VOID
+ | WEEK
+ | WEEKS
+ | WINDOW
+ | YEAR
+ | YEARS
+ | ZONE
+//--ANSI-NON-RESERVED-END
+ ;
+
+// When `SQL_standard_keyword_behavior=false`, there are 2 kinds of keywords in Spark SQL.
+// - Non-reserved keywords:
+// Same definition as the one when `SQL_standard_keyword_behavior=true`.
+// - Strict-non-reserved keywords:
+// A strict version of non-reserved keywords, which can not be used as table alias.
+// You can find the full keywords list by searching "Start of the keywords list" in this file.
+// The strict-non-reserved keywords are listed in `strictNonReserved`.
+// The non-reserved keywords are listed in `nonReserved`.
+// These 2 together contain all the keywords.
+strictNonReserved
+ : ANTI
+ | CROSS
+ | EXCEPT
+ | FULL
+ | INNER
+ | INTERSECT
+ | JOIN
+ | LATERAL
+ | LEFT
+ | NATURAL
+ | ON
+ | RIGHT
+ | SEMI
+ | SETMINUS
+ | UNION
+ | USING
+ ;
+
+nonReserved
+//--DEFAULT-NON-RESERVED-START
+ : ADD
+ | AFTER
+ | ALL
+ | ALTER
+ | ALWAYS
+ | ANALYZE
+ | AND
+ | ANY
+ | ANY_VALUE
+ | ARCHIVE
+ | ARRAY
+ | AS
+ | ASC
+ | AT
+ | AUTHORIZATION
+ | BETWEEN
+ | BIGINT
+ | BINARY
+ | BINARY_HEX
+ | BOOLEAN
+ | BOTH
+ | BUCKET
+ | BUCKETS
+ | BY
+ | BYTE
+ | CACHE
+ | CASCADE
+ | CASE
+ | CAST
+ | CATALOG
+ | CATALOGS
+ | CHANGE
+ | CHAR
+ | CHARACTER
+ | CHECK
+ | CLEAR
+ | CLUSTER
+ | CLUSTERED
+ | CODEGEN
+ | COLLATE
+ | COLLECTION
+ | COLUMN
+ | COLUMNS
+ | COMMENT
+ | COMMIT
+ | COMPACT
+ | COMPACTIONS
+ | COMPUTE
+ | CONCATENATE
+ | CONSTRAINT
+ | COST
+ | CREATE
+ | CUBE
+ | CURRENT
+ | CURRENT_DATE
+ | CURRENT_TIME
+ | CURRENT_TIMESTAMP
+ | CURRENT_USER
+ | DATA
+ | DATABASE
+ | DATABASES
+ | DATE
+ | DATEADD
+ | DATE_ADD
+ | DATEDIFF
+ | DATE_DIFF
+ | DAY
+ | DAYS
+ | DAYOFYEAR
+ | DBPROPERTIES
+ | DEC
+ | DECIMAL
+ | DEFAULT
+ | DEFINED
+ | DELETE
+ | DELIMITED
+ | DESC
+ | DESCRIBE
+ | DFS
+ | DIRECTORIES
+ | DIRECTORY
+ | DISTINCT
+ | DISTRIBUTE
+ | DIV
+ | DOUBLE
+ | DROP
+ | ELSE
+ | END
+ | ESCAPE
+ | ESCAPED
+ | EXCHANGE
+ | EXCLUDE
+ | EXISTS
+ | EXPLAIN
+ | EXPORT
+ | EXTENDED
+ | EXTERNAL
+ | EXTRACT
+ | FALSE
+ | FETCH
+ | FILTER
+ | FIELDS
+ | FILEFORMAT
+ | FIRST
+ | FLOAT
+ | FOLLOWING
+ | FOR
+ | FOREIGN
+ | FORMAT
+ | FORMATTED
+ | FROM
+ | FUNCTION
+ | FUNCTIONS
+ | GENERATED
+ | GLOBAL
+ | GRANT
+ | GROUP
+ | GROUPING
+ | HAVING
+ | HOUR
+ | HOURS
+ | IDENTIFIER_KW
+ | IF
+ | IGNORE
+ | IMPORT
+ | IN
+ | INCLUDE
+ | INDEX
+ | INDEXES
+ | INPATH
+ | INPUTFORMAT
+ | INSERT
+ | INT
+ | INTEGER
+ | INTERVAL
+ | INTO
+ | IS
+ | ITEMS
+ | KEYS
+ | LAST
+ | LAZY
+ | LEADING
+ | LIKE
+ | LONG
+ | ILIKE
+ | LIMIT
+ | LINES
+ | LIST
+ | LOAD
+ | LOCAL
+ | LOCATION
+ | LOCK
+ | LOCKS
+ | LOGICAL
+ | LONG
+ | MACRO
+ | MAP
+ | MATCHED
+ | MERGE
+ | MICROSECOND
+ | MICROSECONDS
+ | MILLISECOND
+ | MILLISECONDS
+ | MINUTE
+ | MINUTES
+ | MONTH
+ | MONTHS
+ | MSCK
+ | NAME
+ | NAMESPACE
+ | NAMESPACES
+ | NANOSECOND
+ | NANOSECONDS
+ | NO
+ | NOT
+ | NULL
+ | NULLS
+ | NUMERIC
+ | OF
+ | OFFSET
+ | ONLY
+ | OPTION
+ | OPTIONS
+ | OR
+ | ORDER
+ | OUT
+ | OUTER
+ | OUTPUTFORMAT
+ | OVER
+ | OVERLAPS
+ | OVERLAY
+ | OVERWRITE
+ | PARTITION
+ | PARTITIONED
+ | PARTITIONS
+ | PERCENTILE_CONT
+ | PERCENTILE_DISC
+ | PERCENTLIT
+ | PIVOT
+ | PLACING
+ | POSITION
+ | PRECEDING
+ | PRIMARY
+ | PRINCIPALS
+ | PROPERTIES
+ | PURGE
+ | QUARTER
+ | QUERY
+ | RANGE
+ | REAL
+ | RECORDREADER
+ | RECORDWRITER
+ | RECOVER
+ | REDUCE
+ | REFERENCES
+ | REFRESH
+ | RENAME
+ | REPAIR
+ | REPEATABLE
+ | REPLACE
+ | RESET
+ | RESPECT
+ | RESTRICT
+ | REVOKE
+ | RLIKE
+ | ROLE
+ | ROLES
+ | ROLLBACK
+ | ROLLUP
+ | ROW
+ | ROWS
+ | SCHEMA
+ | SCHEMAS
+ | SECOND
+ | SECONDS
+ | SELECT
+ | SEPARATED
+ | SERDE
+ | SERDEPROPERTIES
+ | SESSION_USER
+ | SET
+ | SETS
+ | SHORT
+ | SHOW
+ | SKEWED
+ | SMALLINT
+ | SOME
+ | SORT
+ | SORTED
+ | SOURCE
+ | START
+ | STATISTICS
+ | STORED
+ | STRATIFY
+ | STRING
+ | STRUCT
+ | SUBSTR
+ | SUBSTRING
+ | SYNC
+ | SYSTEM_TIME
+ | SYSTEM_VERSION
+ | TABLE
+ | TABLES
+ | TABLESAMPLE
+ | TARGET
+ | TBLPROPERTIES
+ | TEMPORARY
+ | TERMINATED
+ | THEN
+ | TIME
+ | TIMESTAMP
+ | TIMESTAMP_LTZ
+ | TIMESTAMP_NTZ
+ | TIMESTAMPADD
+ | TIMESTAMPDIFF
+ | TINYINT
+ | TO
+ | TOUCH
+ | TRAILING
+ | TRANSACTION
+ | TRANSACTIONS
+ | TRANSFORM
+ | TRIM
+ | TRUE
+ | TRUNCATE
+ | TRY_CAST
+ | TYPE
+ | UNARCHIVE
+ | UNBOUNDED
+ | UNCACHE
+ | UNIQUE
+ | UNKNOWN
+ | UNLOCK
+ | UNPIVOT
+ | UNSET
+ | UPDATE
+ | USE
+ | USER
+ | VALUES
+ | VARCHAR
+ | VERSION
+ | VIEW
+ | VIEWS
+ | VOID
+ | WEEK
+ | WEEKS
+ | WHEN
+ | WHERE
+ | WINDOW
+ | WITH
+ | WITHIN
+ | YEAR
+ | YEARS
+ | ZONE
+//--DEFAULT-NON-RESERVED-END
+ ;
diff --git a/settings.gradle b/settings.gradle
index ba38e3aa427..fc4bc43f463 100644
--- a/settings.gradle
+++ b/settings.gradle
@@ -20,6 +20,7 @@ include 'benchmarks'
include 'datasources'
include 'async-query-core'
include 'async-query'
+include 'language-grammar'
// exclude integ-test/doctest in case of offline build since they need downloads
if (!gradle.startParameter.offline) {