Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
Expand Down Expand Up @@ -237,6 +238,10 @@ public T visitParse(Parse node, C context) {
return visitChildren(node, context);
}

public T visitSpath(SPath node, C context) {
return visitChildren(node, context);
}

public T visitLet(Let node, C context) {
return visitChildren(node, context);
}
Expand Down
5 changes: 5 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.RelationSubquery;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
Expand Down Expand Up @@ -520,6 +521,10 @@ public static Parse parse(
return new Parse(parseMethod, sourceField, pattern, arguments, input);
}

public static SPath spath(UnresolvedPlan input, String inField, String outField, String path) {
return new SPath(input, inField, outField, path);
}

public static Patterns patterns(
UnresolvedPlan input,
UnresolvedExpression sourceField,
Expand Down
55 changes: 55 additions & 0 deletions core/src/main/java/org/opensearch/sql/ast/tree/SPath.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package org.opensearch.sql.ast.tree;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing license header


import com.google.common.collect.ImmutableList;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.EqualsAndHashCode;
import lombok.RequiredArgsConstructor;
import lombok.ToString;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.opensearch.sql.ast.AbstractNodeVisitor;
import org.opensearch.sql.ast.dsl.AstDSL;

@ToString
@EqualsAndHashCode(callSuper = false)
@RequiredArgsConstructor
@AllArgsConstructor
public class SPath extends UnresolvedPlan {
private UnresolvedPlan child;

private final String inField;

@Nullable private final String outField;

private final String path;

@Override
public UnresolvedPlan attach(UnresolvedPlan child) {
this.child = child;
return this;
}

@Override
public List<UnresolvedPlan> getChild() {
return this.child == null ? ImmutableList.of() : ImmutableList.of(this.child);
}

@Override
public <T, C> T accept(AbstractNodeVisitor<T, C> nodeVisitor, C context) {
return nodeVisitor.visitSpath(this, context);
}

public Eval rewriteAsEval() {
String outField = this.outField;
if (outField == null) {
outField = this.path;
}

return AstDSL.eval(
this.child,
AstDSL.let(
AstDSL.field(outField),
AstDSL.function(
"json_extract", AstDSL.field(inField), AstDSL.stringLiteral(this.path))));
}
Comment on lines +48 to +54
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the core behavior, the rest is plumbing/parsing boilerplate.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It spath is translate to eval, does spath node still required?

Copy link
Collaborator Author

@Swiddis Swiddis Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We're going to be adding more functionality to it later, so I think it's worthwhile to have the full node here. We could probably be more efficient by parsing it directly into an eval at the tree level if it stays as just this step, though.

}
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@
import org.opensearch.sql.ast.tree.RareTopN;
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.Sort.SortOption;
import org.opensearch.sql.ast.tree.SubqueryAlias;
Expand Down Expand Up @@ -479,6 +480,11 @@ public RelNode visitParse(Parse node, CalcitePlanContext context) {
return context.relBuilder.peek();
}

@Override
public RelNode visitSpath(SPath node, CalcitePlanContext context) {
return visitEval(node.rewriteAsEval(), context);
}

@Override
public RelNode visitPatterns(Patterns node, CalcitePlanContext context) {
visitChildren(node, context);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,9 @@ private static boolean isScalarObject(Object obj) {
}

private static String doJsonize(Object candidate) {
if (isScalarObject(candidate)) {
if (candidate == null) {
return "null"; // Matches isScalarObject, but not toString-able.
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Usability patch. Without this, JSON_EXTRACT raises exceptions on any missing fields (which kills extracting from flexibly-typed inputs, one of the major reasons to rely on string types).

The user-facing error without this on a missing value is java.lang.NullPointerException: Cannot invoke "Object.toString()" because "candidate" is null.

} else if (isScalarObject(candidate)) {
return candidate.toString();
} else {
return JsonFunctions.jsonize(candidate);
Expand Down
80 changes: 80 additions & 0 deletions docs/user/ppl/cmd/spath.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
=============
spath
=============

.. rubric:: Table of contents

.. contents::
:local:
:depth: 2


Description
============
| The `spath` command allows extracting fields from structured text data. It currently allows selecting from JSON data with JSON paths.

Version
=======
3.3.0

Syntax
============
spath input=<field> [output=<field>] [path=]<path>


* input: mandatory. The field to scan for JSON data.
* output: optional. The destination field that the data will be loaded to. Defaults to the value of `path`.
* path: mandatory. The path of the data to load for the object. For more information on path syntax, see `json_extract <../functions/json.rst#json_extract>`_.

Note
=====
The `spath` command currently does not support pushdown behavior for extraction. It will be slow on large datasets. It's generally better to index fields needed for filtering directly instead of using `spath` to filter nested fields.

Example 1: Simple Field Extraction
==================================

The simplest spath is to extract a single field. This extracts `n` from the `doc` field of type `text`.

PPL query::

PPL> source=test_spath | spath input=doc n;
fetched rows / total rows = 3/3
+----------+---+
| doc | n |
|----------+---|
| {"n": 1} | 1 |
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

results is string? "1"

Copy link
Collaborator Author

@Swiddis Swiddis Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, returns a string -- we can't really have it dynamically choose the type until we have the ability to dynamically make schemas at planning time

| {"n": 2} | 2 |
| {"n": 3} | 3 |
+----------+---+

Example 2: Lists & Nesting
============================

These queries demonstrate more JSON path uses, like traversing nested fields and extracting list elements.

PPL query::

PPL> source=test_spath | spath input=doc output=first_element list{0} | spath input=doc output=all_elements list{} | spath input=doc output=nested nest_out.nest_in;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the expectation of conflict output?

###
POST {{baseUrl}}/tttt/_bulk
Content-Type: application/x-ndjson

{ "index": { "_id": 1 } }
{ "@timestamp": "2025-08-25 00:00:01", "id": 1, "nested_out": {"nested_in": "success"} }
{ "index": { "_id": 2 } }
{ "@timestamp": "2025-08-25 00:00:02", "id": 2, "nested_out": {"nested_in": "failed"} }

###
POST {{baseUrl}}/_plugins/_ppl/
Content-Type: application/x-ndjson

{
  "query": "source=tttt | eval doc='{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}' | spath input=doc output=nested_out nest_out.nest_in | fields nested_out.nested_in"
}


###
POST {{baseUrl}}/_plugins/_ppl/
Content-Type: application/x-ndjson

{
  "query": "source=tttt | eval doc='{\"list\": [1, 2, 3, 4], \"nest_out\": {\"nest_in\": \"a\"}}' | spath input=doc output=nested_out nest_out.nest_in | fields nested_out"
}

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as with eval, it overwrites the existing field. This seems correct to me

fetched rows / total rows = 3/3
+------------------------------------------------------+---------------+--------------+--------+
| doc | first_element | all_elements | nested |
|------------------------------------------------------+---------------+--------------+--------|
| {"list": [1, 2, 3, 4], "nest_out": {"nest_in": "a"}} | 1 | [1,2,3,4] | a |
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result is string? "[1,2,3,4]"

"null", and "[]"

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could also return null instead of "null", but it seems consistent with the whole "stringify into json" rule that the rest of the json_extract method has going on

| {"list": [], "nest_out": {"nest_in": "a"}} | null | [] | a |
| {"list": [5, 6], "nest_out": {"nest_in": "a"}} | 5 | [5,6] | a |
+------------------------------------------------------+---------------+--------------+--------+

Example 3: Sum of inner elements
============================

The example shows extracting an inner field and doing statistics on it, using the docs from example 1. It also demonstrates that `spath` always returns strings for inner types.

PPL query::

PPL> source=test_spath | spath input=doc n | eval n=cast(n as int) | stats sum(n);
fetched rows / total rows = 1/1
+--------+
| sum(n) |
|--------|
| 6 |
+--------+
2 changes: 2 additions & 0 deletions docs/user/ppl/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@ The query start with search command and then flowing a set of command delimited

- `sort command <cmd/sort.rst>`_

- `spath command <cmd/spath.rst>`_

- `stats command <cmd/stats.rst>`_

- `subquery (aka subsearch) command <cmd/subquery.rst>`_
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.remote;

import static org.opensearch.sql.util.MatcherUtils.rows;
import static org.opensearch.sql.util.MatcherUtils.schema;
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
import static org.opensearch.sql.util.MatcherUtils.verifySchema;

import java.io.IOException;
import org.json.JSONObject;
import org.junit.jupiter.api.Test;
import org.opensearch.client.Request;
import org.opensearch.sql.ppl.PPLIntegTestCase;

public class CalcitePPLSpathCommandIT extends PPLIntegTestCase {
@Override
public void init() throws Exception {
super.init();
enableCalcite();

loadIndex(Index.BANK);

// Create test data for string concatenation
Request request1 = new Request("PUT", "/test_spath/_doc/1?refresh=true");
request1.setJsonEntity("{\"doc\": \"{\\\"n\\\": 1}\"}");
client().performRequest(request1);

Request request2 = new Request("PUT", "/test_spath/_doc/2?refresh=true");
request2.setJsonEntity("{\"doc\": \"{\\\"n\\\": 2}\"}");
client().performRequest(request2);

Request request3 = new Request("PUT", "/test_spath/_doc/3?refresh=true");
request3.setJsonEntity("{\"doc\": \"{\\\"n\\\": 3}\"}");
client().performRequest(request3);
}

@Test
public void testSimpleSpath() throws IOException {
JSONObject result =
executeQuery("source=test_spath | spath input=doc output=result path=n | fields result");
verifySchema(result, schema("result", "string"));
verifyDataRows(result, rows("1"), rows("2"), rows("3"));
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: maybe add another error case of malformed JSON / invalid paths

}
6 changes: 6 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ HEAD: 'HEAD';
TOP: 'TOP';
RARE: 'RARE';
PARSE: 'PARSE';
SPATH: 'SPATH';
REGEX: 'REGEX';
PUNCT: 'PUNCT';
GROK: 'GROK';
Expand Down Expand Up @@ -114,6 +115,9 @@ ANOMALY_SCORE_THRESHOLD: 'ANOMALY_SCORE_THRESHOLD';
APPEND: 'APPEND';
COUNTFIELD: 'COUNTFIELD';
SHOWCOUNT: 'SHOWCOUNT';
INPUT: 'INPUT';
OUTPUT: 'OUTPUT';
PATH: 'PATH';

// COMPARISON FUNCTION KEYWORDS
CASE: 'CASE';
Expand Down Expand Up @@ -202,6 +206,8 @@ LT_PRTHS: '(';
RT_PRTHS: ')';
LT_SQR_PRTHS: '[';
RT_SQR_PRTHS: ']';
LT_CURLY: '{';
RT_CURLY: '}';
SINGLE_QUOTE: '\'';
DOUBLE_QUOTE: '"';
BACKTICK: '`';
Expand Down
27 changes: 27 additions & 0 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ commands
| rareCommand
| grokCommand
| parseCommand
| spathCommand
| patternsCommand
| lookupCommand
| kmeansCommand
Expand Down Expand Up @@ -187,6 +188,28 @@ parseCommand
: PARSE (source_field = expression) (pattern = stringLiteral)
;

spathCommand
: SPATH spathParameter*
;

spathParameter
: (INPUT EQUAL input = expression)
| (OUTPUT EQUAL output = expression)
| ((PATH EQUAL)? path = indexablePath)
;

indexablePath
: pathElement (DOT pathElement)*
;

pathElement
: ident pathArrayAccess?
;

pathArrayAccess
: LT_CURLY (INTEGER_LITERAL)? RT_CURLY
;

patternsMethod
: PUNCT
| REGEX
Expand Down Expand Up @@ -1199,6 +1222,10 @@ keywordsCanBeId
| ANOMALY_SCORE_THRESHOLD
| COUNTFIELD
| SHOWCOUNT
| PATH
| INPUT
| OUTPUT

// AGGREGATIONS AND WINDOW
| statsFunctionName
| windowFunctionName
Expand Down
29 changes: 29 additions & 0 deletions ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
import org.opensearch.sql.ast.tree.Relation;
import org.opensearch.sql.ast.tree.Rename;
import org.opensearch.sql.ast.tree.Reverse;
import org.opensearch.sql.ast.tree.SPath;
import org.opensearch.sql.ast.tree.Sort;
import org.opensearch.sql.ast.tree.SubqueryAlias;
import org.opensearch.sql.ast.tree.TableFunction;
Expand Down Expand Up @@ -527,6 +528,34 @@ public UnresolvedPlan visitParseCommand(OpenSearchPPLParser.ParseCommandContext
return new Parse(ParseMethod.REGEX, sourceField, pattern, ImmutableMap.of());
}

@Override
public UnresolvedPlan visitSpathCommand(OpenSearchPPLParser.SpathCommandContext ctx) {
String inField = null;
String outField = null;
String path = null;

for (OpenSearchPPLParser.SpathParameterContext param : ctx.spathParameter()) {
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If an argument appears multiple times, we overwrite it with the new value.

This is consistent with how the other commands behave under the same scenario.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even input parameter can be multiple? I think it may be worth adding this comment in code.

if (param.input != null) {
inField = param.input.getText();
}
if (param.output != null) {
outField = param.output.getText();
}
if (param.path != null) {
path = param.path.getText();
}
}

if (inField == null) {
throw new IllegalArgumentException("`input` parameter is required for `spath`");
}
if (path == null) {
throw new IllegalArgumentException("`path` parameter is required for `spath`");
}

return new SPath(inField, outField, path);
}

@Override
public UnresolvedPlan visitPatternsCommand(OpenSearchPPLParser.PatternsCommandContext ctx) {
UnresolvedExpression sourceField = internalVisitExpression(ctx.source_field);
Expand Down
Loading
Loading