Skip to content

Commit 8d3ca4e

Browse files
authored
Allow format sort values of date fields (#70357)
If a search after request targets multiple indices and some of its sort field has type `date` in one index but `date_nanos` in other indices, then Elasticsearch won't interpret the search_after parameter correctly in every target index. The sort value of a date field by default is a long of milliseconds since the epoch while a date_nanos field is a long of nanoseconds. This commit introduces the `format` parameter in the sort field so a sort value of a date or date_nanos will be formatted using a date format in a search response. The below example illustrates how to use this new parameter. ```js { "query": { "match_all": {} }, "sort": [ { "timestamp": { "order": "asc", "format": "strict_date_optional_time_nanos" } } ] } ``` ```js { "query": { "match_all": {} }, "sort": [ { "timestamp": { "order": "asc", "format": "strict_date_optional_time_nanos" } } ], "search_after": [ "2015-01-01T12:10:30.123456789Z" // in `strict_date_optional_time_nanos` format ] } ``` Closes #69192
1 parent 9edb151 commit 8d3ca4e

File tree

9 files changed

+396
-27
lines changed

9 files changed

+396
-27
lines changed

docs/reference/search/search-your-data/paginate-search-results.asciidoc

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ NOTE: Search after requests have optimizations that make them faster when the so
8181
order is `_shard_doc` and total hits are not tracked. If you want to iterate over all documents regardless of the
8282
order, this is the most efficient option.
8383

84+
IMPORTANT: If the `sort` field is a <<date,`date`>> in some target data streams or indices
85+
but a <<date_nanos,`date_nanos`>> field in other targets, use the `numeric_type` parameter
86+
to convert the values to a single resolution and the `format` parameter to specify a
87+
<<mapping-date-format, date format>> for the `sort` field. Otherwise, {es} won't interpret
88+
the search after parameter correctly in each request.
89+
8490
[source,console]
8591
----
8692
GET /_search
@@ -96,7 +102,7 @@ GET /_search
96102
"keep_alive": "1m"
97103
},
98104
"sort": [ <2>
99-
{"@timestamp": "asc"}
105+
{"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type" : "date_nanos" }}
100106
]
101107
}
102108
----
@@ -107,7 +113,7 @@ GET /_search
107113

108114
The search response includes an array of `sort` values for each hit. If you used
109115
a PIT, a tiebreaker is included as the last `sort` values for each hit.
110-
This tiebreaker called `_shard_doc` is added automically on every search requests that use a PIT.
116+
This tiebreaker called `_shard_doc` is added automatically on every search requests that use a PIT.
111117
The `_shard_doc` value is the combination of the shard index within the PIT and the Lucene's internal doc ID,
112118
it is unique per document and constant within a PIT.
113119
You can also add the tiebreaker explicitly in the search request to customize the order:
@@ -127,7 +133,7 @@ GET /_search
127133
"keep_alive": "1m"
128134
},
129135
"sort": [ <2>
130-
{"@timestamp": "asc"},
136+
{"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos"}},
131137
{"_shard_doc": "desc"}
132138
]
133139
}
@@ -156,7 +162,7 @@ GET /_search
156162
"_score" : null,
157163
"_source" : ...,
158164
"sort" : [ <2>
159-
4098435132000,
165+
"2021-05-20T05:30:04.832Z",
160166
4294967298 <3>
161167
]
162168
}
@@ -190,10 +196,10 @@ GET /_search
190196
"keep_alive": "1m"
191197
},
192198
"sort": [
193-
{"@timestamp": "asc"}
199+
{"@timestamp": {"order": "asc", "format": "strict_date_optional_time_nanos"}}
194200
],
195201
"search_after": [ <2>
196-
4098435132000,
202+
"2021-05-20T05:30:04.832Z",
197203
4294967298
198204
],
199205
"track_total_hits": false <3>

docs/reference/search/search-your-data/sort-search-results.asciidoc

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ PUT /my-index-000001
3131
GET /my-index-000001/_search
3232
{
3333
"sort" : [
34-
{ "post_date" : {"order" : "asc"}},
34+
{ "post_date" : {"order" : "asc", "format": "strict_date_optional_time_nanos"}},
3535
"user",
3636
{ "name" : "desc" },
3737
{ "age" : "desc" },
@@ -51,8 +51,25 @@ should sort by `_doc`. This especially helps when <<scroll-search-results,scroll
5151
[discrete]
5252
=== Sort Values
5353

54-
The sort values for each document returned are also returned as part of
55-
the response.
54+
The search response includes `sort` values for each document. Use the `format`
55+
parameter to specify a <<built-in-date-formats,date format>> for the `sort`
56+
values of <<date,`date`>> and <<date_nanos,`date_nanos`>> fields. The following
57+
search returns `sort` values for the `post_date` field in the
58+
`strict_date_optional_time_nanos` format.
59+
60+
[source,console]
61+
--------------------------------------------------
62+
GET /my-index-000001/_search
63+
{
64+
"sort" : [
65+
{ "post_date" : {"format": "strict_date_optional_time_nanos"}}
66+
],
67+
"query" : {
68+
"term" : { "user" : "kimchy" }
69+
}
70+
}
71+
--------------------------------------------------
72+
// TEST[continued]
5673

5774
[discrete]
5875
=== Sort Order

rest-api-spec/src/main/resources/rest-api-spec/test/search/90_search_after.yml

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,3 +252,134 @@
252252
size: 1
253253
sort: ["_shard_doc"]
254254
search_after: [ 0L ]
255+
256+
---
257+
"Format sort values":
258+
- skip:
259+
version: " - 7.12.99"
260+
reason: Format sort output is introduced in 7.13
261+
262+
- do:
263+
indices.create:
264+
index: test
265+
body:
266+
mappings:
267+
properties:
268+
timestamp:
269+
type: date
270+
format: yyyy-MM-dd HH:mm:ss.SSS
271+
- do:
272+
indices.create:
273+
index: test_nanos
274+
body:
275+
mappings:
276+
properties:
277+
timestamp:
278+
type: date_nanos
279+
format: dd/MM/yyyy HH:mm:ss.SSS
280+
- do:
281+
bulk:
282+
refresh: true
283+
index: test
284+
body: |
285+
{"index":{}}
286+
{"timestamp":"2021-10-13 00:30:04.828"}
287+
{"index":{}}
288+
{"timestamp":"2021-06-11 04:30:04.828"}
289+
{"index":{}}
290+
{"timestamp":"2021-02-11 08:30:04.828"}
291+
- do:
292+
bulk:
293+
refresh: true
294+
index: test_nanos
295+
body: |
296+
{"index":{}}
297+
{"timestamp":"21/08/2021 03:30:04.732"}
298+
{"index":{}}
299+
{"timestamp":"20/05/2021 05:30:04.832"}
300+
{"index":{}}
301+
{"timestamp":"15/04/2021 06:30:04.821"}
302+
303+
- do:
304+
search:
305+
index: test
306+
body:
307+
size: 1
308+
sort: [{timestamp: {"order" : "asc", "format": "strict_date_optional_time_nanos"}}]
309+
- match: {hits.total.value: 3 }
310+
- length: {hits.hits: 1 }
311+
- match: {hits.hits.0._source.timestamp: "2021-02-11 08:30:04.828" }
312+
- match: {hits.hits.0.sort: ["2021-02-11T08:30:04.828Z"] }
313+
314+
- do:
315+
search:
316+
index: test
317+
body:
318+
size: 1
319+
sort: [{timestamp: {"order" : "asc", "format": "strict_date_optional_time_nanos"}}]
320+
search_after: ["2021-02-11T08:30:04.828Z"]
321+
- match: {hits.total.value: 3 }
322+
- length: {hits.hits: 1 }
323+
- match: {hits.hits.0._source.timestamp: "2021-06-11 04:30:04.828" }
324+
- match: {hits.hits.0.sort: ["2021-06-11T04:30:04.828Z"] }
325+
326+
# mismatch format
327+
- do:
328+
catch: /failed to parse date field/
329+
search:
330+
index: test
331+
body:
332+
size: 1
333+
sort: [{ timestamp: {"order" : "asc", "format": "yyyy-MM-dd HH:mm:ss.SSS"}}]
334+
search_after: [ "2021-02-11T08:30:04.828Z" ]
335+
- do:
336+
catch: /failed to parse date field/
337+
search:
338+
index: test
339+
body:
340+
size: 1
341+
sort: [ { timestamp: { "order": "asc", "format": "epoch_millis" } } ]
342+
search_after: [ "2021-02-11T08:30:04.828Z" ]
343+
- do:
344+
search:
345+
index: test
346+
body:
347+
size: 1
348+
sort: [{timestamp: {"order" : "asc", "format": "yyyy-MM-dd | HH:mm:ss.SSS"}}]
349+
search_after: ["2021-02-11 | 08:30:04.828"]
350+
- match: {hits.total.value: 3 }
351+
- length: {hits.hits: 1 }
352+
- match: {hits.hits.0._source.timestamp: "2021-06-11 04:30:04.828" }
353+
- match: {hits.hits.0.sort: ["2021-06-11 | 04:30:04.828"] }
354+
355+
# Mixed two types with numeric
356+
- do:
357+
search:
358+
index: tes*
359+
body:
360+
size: 2
361+
sort: [ { timestamp: { "order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type": "date_nanos" } } ]
362+
- match: { hits.total.value: 6 }
363+
- length: { hits.hits: 2 }
364+
- match: { hits.hits.0._index: test }
365+
- match: { hits.hits.0._source.timestamp: "2021-02-11 08:30:04.828" }
366+
- match: { hits.hits.0.sort: [ "2021-02-11T08:30:04.828Z" ] }
367+
- match: { hits.hits.1._index: test_nanos }
368+
- match: { hits.hits.1._source.timestamp: "15/04/2021 06:30:04.821" }
369+
- match: { hits.hits.1.sort: [ "2021-04-15T06:30:04.821Z" ] }
370+
371+
- do:
372+
search:
373+
index: test*
374+
body:
375+
size: 2
376+
sort: [ { timestamp: { "order": "asc", "format": "strict_date_optional_time_nanos", "numeric_type": "date" } } ]
377+
search_after: [ "2021-04-15T06:30:04.821Z" ]
378+
- match: { hits.total.value: 6 }
379+
- length: { hits.hits: 2 }
380+
- match: { hits.hits.0._index: test_nanos }
381+
- match: { hits.hits.0._source.timestamp: "20/05/2021 05:30:04.832" }
382+
- match: { hits.hits.0.sort: [ "2021-05-20T05:30:04.832Z" ] }
383+
- match: { hits.hits.1._index: test }
384+
- match: { hits.hits.1._source.timestamp: "2021-06-11 04:30:04.828" }
385+
- match: { hits.hits.1.sort: [ "2021-06-11T04:30:04.828Z" ] }

server/src/internalClusterTest/java/org/elasticsearch/search/searchafter/SearchAfterIT.java

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,20 @@
99
package org.elasticsearch.search.searchafter;
1010

1111
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
12+
import org.elasticsearch.action.index.IndexRequest;
1213
import org.elasticsearch.action.index.IndexRequestBuilder;
1314
import org.elasticsearch.action.search.SearchPhaseExecutionException;
1415
import org.elasticsearch.action.search.SearchRequestBuilder;
1516
import org.elasticsearch.action.search.SearchResponse;
1617
import org.elasticsearch.action.search.ShardSearchFailure;
18+
import org.elasticsearch.action.support.WriteRequest;
19+
import org.elasticsearch.cluster.metadata.IndexMetadata;
1720
import org.elasticsearch.common.UUIDs;
21+
import org.elasticsearch.common.settings.Settings;
1822
import org.elasticsearch.common.xcontent.XContentBuilder;
23+
import org.elasticsearch.rest.RestStatus;
1924
import org.elasticsearch.search.SearchHit;
25+
import org.elasticsearch.search.sort.SortBuilders;
2026
import org.elasticsearch.search.sort.SortOrder;
2127
import org.elasticsearch.test.ESIntegTestCase;
2228
import org.hamcrest.Matchers;
@@ -30,6 +36,9 @@
3036
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
3137
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
3238
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
39+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
40+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
41+
import static org.hamcrest.Matchers.arrayContaining;
3342
import static org.hamcrest.Matchers.containsString;
3443
import static org.hamcrest.Matchers.equalTo;
3544

@@ -183,6 +192,80 @@ public void testWithSimpleTypes() throws Exception {
183192
assertSearchFromWithSortValues(INDEX_NAME, TYPE_NAME, documents, reqSize);
184193
}
185194

195+
public void testWithCustomFormatSortValueOfDateField() throws Exception {
196+
final XContentBuilder mappings = jsonBuilder();
197+
mappings.startObject().startObject("properties");
198+
{
199+
mappings.startObject("start_date");
200+
mappings.field("type", "date");
201+
mappings.field("format", "yyyy-MM-dd");
202+
mappings.endObject();
203+
}
204+
{
205+
mappings.startObject("end_date");
206+
mappings.field("type", "date");
207+
mappings.field("format", "yyyy-MM-dd");
208+
mappings.endObject();
209+
}
210+
mappings.endObject().endObject();
211+
assertAcked(client().admin().indices().prepareCreate("test")
212+
.setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, between(1, 3)))
213+
.addMapping("_doc", mappings));
214+
215+
216+
client().prepareBulk().setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE)
217+
.add(new IndexRequest("test").id("1").source("start_date", "2019-03-24", "end_date", "2020-01-21"))
218+
.add(new IndexRequest("test").id("2").source("start_date", "2018-04-23", "end_date", "2021-02-22"))
219+
.add(new IndexRequest("test").id("3").source("start_date", "2015-01-22", "end_date", "2022-07-23"))
220+
.add(new IndexRequest("test").id("4").source("start_date", "2016-02-21", "end_date", "2024-03-24"))
221+
.add(new IndexRequest("test").id("5").source("start_date", "2017-01-20", "end_date", "2025-05-28"))
222+
.get();
223+
224+
SearchResponse resp = client().prepareSearch("test")
225+
.addSort(SortBuilders.fieldSort("start_date").setFormat("dd/MM/yyyy"))
226+
.addSort(SortBuilders.fieldSort("end_date").setFormat("yyyy-MM-dd"))
227+
.setSize(2)
228+
.get();
229+
assertNoFailures(resp);
230+
assertThat(resp.getHits().getHits()[0].getSortValues(), arrayContaining("22/01/2015", "2022-07-23"));
231+
assertThat(resp.getHits().getHits()[1].getSortValues(), arrayContaining("21/02/2016", "2024-03-24"));
232+
233+
resp = client().prepareSearch("test")
234+
.addSort(SortBuilders.fieldSort("start_date").setFormat("dd/MM/yyyy"))
235+
.addSort(SortBuilders.fieldSort("end_date").setFormat("yyyy-MM-dd"))
236+
.searchAfter(new String[]{"21/02/2016", "2024-03-24"})
237+
.setSize(2)
238+
.get();
239+
assertNoFailures(resp);
240+
assertThat(resp.getHits().getHits()[0].getSortValues(), arrayContaining("20/01/2017", "2025-05-28"));
241+
assertThat(resp.getHits().getHits()[1].getSortValues(), arrayContaining("23/04/2018", "2021-02-22"));
242+
243+
resp = client().prepareSearch("test")
244+
.addSort(SortBuilders.fieldSort("start_date").setFormat("dd/MM/yyyy"))
245+
.addSort(SortBuilders.fieldSort("end_date")) // it's okay because end_date has the format "yyyy-MM-dd"
246+
.searchAfter(new String[]{"21/02/2016", "2024-03-24"})
247+
.setSize(2)
248+
.get();
249+
assertNoFailures(resp);
250+
assertThat(resp.getHits().getHits()[0].getSortValues(), arrayContaining("20/01/2017", 1748390400000L));
251+
assertThat(resp.getHits().getHits()[1].getSortValues(), arrayContaining("23/04/2018", 1613952000000L));
252+
253+
SearchRequestBuilder searchRequest = client().prepareSearch("test")
254+
.addSort(SortBuilders.fieldSort("start_date").setFormat("dd/MM/yyyy"))
255+
.addSort(SortBuilders.fieldSort("end_date").setFormat("epoch_millis"))
256+
.searchAfter(new Object[]{"21/02/2016", 1748390400000L})
257+
.setSize(2);
258+
assertNoFailures(searchRequest.get());
259+
260+
searchRequest = client().prepareSearch("test")
261+
.addSort(SortBuilders.fieldSort("start_date").setFormat("dd/MM/yyyy"))
262+
.addSort(SortBuilders.fieldSort("end_date").setFormat("epoch_millis")) // wrong format
263+
.searchAfter(new Object[]{"21/02/2016", "23/04/2018"})
264+
.setSize(2);
265+
assertFailures(searchRequest, RestStatus.BAD_REQUEST,
266+
containsString("failed to parse date field [23/04/2018] with format [epoch_millis]"));
267+
}
268+
186269
private static class ListComparator implements Comparator<List> {
187270
@Override
188271
public int compare(List o1, List o2) {

0 commit comments

Comments
 (0)