Skip to content

Allow parameter for ES|QL SAMPLE #129392

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jun 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion x-pack/plugin/esql/src/main/antlr/EsqlBaseParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -316,5 +316,5 @@ completionCommand
;

sampleCommand
: DEV_SAMPLE probability=decimalValue
: DEV_SAMPLE probability=constant
;
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,7 @@ public enum Cap {
LUCENE_QUERY_EVALUATOR_QUERY_REWRITE,

/**
* Support parameters for LiMIT command.
* Support parameters for LIMIT command.
*/
PARAMETER_FOR_LIMIT,

Expand Down Expand Up @@ -1202,7 +1202,12 @@ public enum Cap {
*/
KNN_FUNCTION(Build.current().isSnapshot()),

LIKE_WITH_LIST_OF_PATTERNS;
LIKE_WITH_LIST_OF_PATTERNS,

/**
* Support parameters for SAMPLE command.
*/
PARAMETER_FOR_SAMPLE(Build.current().isSnapshot());

private final boolean enabled;

Expand Down

Large diffs are not rendered by default.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
import java.util.function.Function;

import static java.util.Collections.emptyList;
import static org.elasticsearch.xpack.esql.common.Failure.fail;
import static org.elasticsearch.xpack.esql.core.type.DataType.KEYWORD;
import static org.elasticsearch.xpack.esql.core.util.StringUtils.WILDCARD;
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputExpressions;
Expand Down Expand Up @@ -783,7 +784,15 @@ public Literal inferenceId(EsqlBaseParser.IdentifierOrParameterContext ctx) {
}

public PlanFactory visitSampleCommand(EsqlBaseParser.SampleCommandContext ctx) {
var probability = visitDecimalValue(ctx.probability);
return plan -> new Sample(source(ctx), probability, plan);
Source source = source(ctx);
Object val = expression(ctx.probability).fold(FoldContext.small() /* TODO remove me */);
if (val instanceof Double probability && probability > 0.0 && probability < 1.0) {
return input -> new Sample(source, new Literal(source, probability, DataType.DOUBLE), input);
} else {
throw new ParsingException(
source(ctx),
"invalid value for SAMPLE probability [" + val + "], expecting a number between 0 and 1, exclusive"
);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,16 @@
import org.elasticsearch.common.io.stream.NamedWriteableRegistry;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.search.aggregations.bucket.sampler.random.RandomSamplingQuery;
import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware;
import org.elasticsearch.xpack.esql.capabilities.TelemetryAware;
import org.elasticsearch.xpack.esql.common.Failures;
import org.elasticsearch.xpack.esql.core.expression.Expression;
import org.elasticsearch.xpack.esql.core.expression.FoldContext;
import org.elasticsearch.xpack.esql.core.expression.Foldables;
import org.elasticsearch.xpack.esql.core.tree.NodeInfo;
import org.elasticsearch.xpack.esql.core.tree.Source;
import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput;

import java.io.IOException;
import java.util.Objects;

import static org.elasticsearch.xpack.esql.common.Failure.fail;

public class Sample extends UnaryPlan implements TelemetryAware, PostAnalysisVerificationAware {
public class Sample extends UnaryPlan implements TelemetryAware {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Sample", Sample::new);

private final Expression probability;
Expand Down Expand Up @@ -92,13 +85,4 @@ public boolean equals(Object obj) {

return Objects.equals(probability, other.probability) && Objects.equals(child(), other.child());
}

@Override
public void postAnalysisVerification(Failures failures) {
try {
RandomSamplingQuery.checkProbabilityRange((double) Foldables.valueOf(FoldContext.small(), probability));
} catch (IllegalArgumentException e) {
failures.add(fail(probability, e.getMessage()));
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,6 @@
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultEnrichResolution;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.indexWithDateDateNanosUnionType;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.randomValueOtherThanTest;
import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.tsdbIndexResolution;
import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY;
import static org.elasticsearch.xpack.esql.core.type.DataType.DATETIME;
Expand Down Expand Up @@ -3465,20 +3464,6 @@ public void testRrfError() {
assertThat(e.getMessage(), containsString("Unknown column [_id]"));
}

public void testRandomSampleProbability() {
assumeTrue("requires SAMPLE capability", EsqlCapabilities.Cap.SAMPLE_V3.isEnabled());

var e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE 1."));
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [1.0]"));

e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE .0"));
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [0.0]"));

double p = randomValueOtherThanTest(d -> 0 < d && d < 1, () -> randomDoubleBetween(0, Double.MAX_VALUE, false));
e = expectThrows(VerificationException.class, () -> analyze("FROM test | SAMPLE " + p));
assertThat(e.getMessage(), containsString("RandomSampling probability must be strictly between 0.0 and 1.0, was [" + p + "]"));
}

// TODO There's too much boilerplate involved here! We need a better way of creating FieldCapabilitiesResponses from a mapping or index.
private static FieldCapabilitiesIndexResponse fieldCapabilitiesIndexResponse(
String indexName,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,25 @@ public void testInvalidLimit() {
assertEquals("1:13: Invalid value for LIMIT [-1], expecting a non negative integer", error("row a = 1 | limit -1"));
}

public void testInvalidSample() {
assertEquals(
"1:13: invalid value for SAMPLE probability [foo], expecting a number between 0 and 1, exclusive",
error("row a = 1 | sample \"foo\"")
);
assertEquals(
"1:13: invalid value for SAMPLE probability [-1.0], expecting a number between 0 and 1, exclusive",
error("row a = 1 | sample -1.0")
);
assertEquals(
"1:13: invalid value for SAMPLE probability [0], expecting a number between 0 and 1, exclusive",
error("row a = 1 | sample 0")
);
assertEquals(
"1:13: invalid value for SAMPLE probability [1], expecting a number between 0 and 1, exclusive",
error("row a = 1 | sample 1")
);
}

private String error(String query) {
ParsingException e = expectThrows(ParsingException.class, () -> defaultAnalyzer.analyze(parser.createStatement(query)));
String message = e.getMessage();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.startsWith;

//@TestLogging(value = "org.elasticsearch.xpack.esql:TRACE", reason = "debug")
public class StatementParserTests extends AbstractStatementParserTests {
Expand Down Expand Up @@ -3485,8 +3486,15 @@ public void testSample() {
assumeTrue("SAMPLE requires corresponding capability", EsqlCapabilities.Cap.SAMPLE_V3.isEnabled());
expectError("FROM test | SAMPLE .1 2", "line 1:23: extraneous input '2' expecting <EOF>");
expectError("FROM test | SAMPLE .1 \"2\"", "line 1:23: extraneous input '\"2\"' expecting <EOF>");
expectError("FROM test | SAMPLE 1", "line 1:20: mismatched input '1' expecting {DECIMAL_LITERAL, '+', '-'}");
expectError("FROM test | SAMPLE", "line 1:19: mismatched input '<EOF>' expecting {DECIMAL_LITERAL, '+', '-'}");
expectError(
"FROM test | SAMPLE 1",
"line 1:13: invalid value for SAMPLE probability [1], expecting a number between 0 and 1, exclusive"
);
expectThrows(
ParsingException.class,
startsWith("line 1:19: mismatched input '<EOF>' expecting {"),
() -> statement("FROM test | SAMPLE")
);
}

static Alias alias(String name, Expression value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,20 +413,22 @@ FROM EVAL SORT LIMIT with documents_found:
- match: {values.2: ["1",2.0,null,true,123,1674835275193]}

---
"Test Unnamed Input Params Also For Limit":
"Test Unnamed Input Params Also For Limit And Sample":
- requires:
test_runner_features: [ capabilities ]
capabilities:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ parameter_for_limit ]
capabilities: [ parameter_for_limit, parameter_for_sample ]
reason: "named or positional parameters"
- do:
esql.query:
body:
query: 'from test | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
params: ["1", 2.0, null, true, 123, 1674835275193, 3]
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
# TODO: remove it when the issue is fixed
query: 'from test | sort time | sample ? | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
params: [0.999999999999, "1", 2.0, null, true, 123, 1674835275193, 3]

- length: {columns: 6}
- match: {columns.0.name: "x"}
Expand Down Expand Up @@ -455,14 +457,16 @@ FROM EVAL SORT LIMIT with documents_found:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ parameter_for_limit ]
capabilities: [ parameter_for_limit, parameter_for_sample ]
reason: "named or positional parameters"

- do:
esql.query:
body:
query: 'from test | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
params: [{"n1" : "1"}, {"n2" : 2.0}, {"n3" : null}, {"n4" : true}, {"n5" : 123}, {"n6": 1674835275193}, {"l": 3}]
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
# TODO: remove it when the issue is fixed
query: 'from test | sort time | sample ? | eval x = ?, y = ?, z = ?, t = ?, u = ?, v = ? | keep x, y, z, t, u, v | limit ?'
params: [{"s": 0.99999999999}, {"n1" : "1"}, {"n2" : 2.0}, {"n3" : null}, {"n4" : true}, {"n5" : 123}, {"n6": 1674835275193}, {"l": 3}]

- length: {columns: 6}
- match: {columns.0.name: "x"}
Expand Down Expand Up @@ -519,14 +523,16 @@ FROM EVAL SORT LIMIT with documents_found:
- method: POST
path: /_query
parameters: [ ]
capabilities: [ parameter_for_limit ]
capabilities: [ parameter_for_limit, parameter_for_sample ]
reason: "named or positional parameters for field names"

- do:
esql.query:
body:
query: 'from test | stats x = count(?f1), y = sum(?f2) by ?f3 | sort ?f3 | keep ?f3, x, y | limit ?l'
params: [{"f1" : {"identifier" : "time"}}, {"f2" : { "identifier" : "count" }}, {"f3" : { "identifier" : "color"}}, {"l": 3}]
# The "| sort time" is to work around https://github.com/elastic/elasticsearch/issues/120272
# TODO: remove it when the issue is fixed
query: 'from test | sort time | sample ?s | stats x = count(?f1), y = sum(?f2) by ?f3 | sort ?f3 | keep ?f3, x, y | limit ?l'
params: [{"s": 0.99999999999}, {"f1" : {"identifier" : "time"}}, {"f2" : { "identifier" : "count" }}, {"f3" : { "identifier" : "color"}}, {"l": 3}]

- length: {columns: 3}
- match: {columns.0.name: "color"}
Expand Down