diff --git a/docs/build.gradle b/docs/build.gradle index 81b698a2a1b1..7029c46b0ecc 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -144,23 +144,28 @@ Closure setupTwitter = { String name, int count -> type: date likes: type: long + location: + properties: + city: + type: keyword + country: + type: keyword - do: bulk: index: twitter refresh: true body: |''' for (int i = 0; i < count; i++) { - String user, text + String body if (i == 0) { - user = 'kimchy' - text = 'trying out Elasticsearch' + body = """{"user": "kimchy", "message": "trying out Elasticsearch", "date": "2009-11-15T14:12:12", "likes": 0, + "location": { "city": "Amsterdam", "country": "Netherlands" }}""" } else { - user = 'test' - text = "some message with the number $i" + body = """{"user": "test", "message": "some message with the number $i", "date": "2009-11-15T14:12:12", "likes": $i}""" } buildRestTests.setups[name] += """ {"index":{"_id": "$i"}} - {"user": "$user", "message": "$text", "date": "2009-11-15T14:12:12", "likes": $i}""" + $body""" } } setupTwitter('twitter', 5) diff --git a/docs/reference/aggregations/misc.asciidoc b/docs/reference/aggregations/misc.asciidoc index 6d099f295565..7a7277aced5c 100644 --- a/docs/reference/aggregations/misc.asciidoc +++ b/docs/reference/aggregations/misc.asciidoc @@ -105,7 +105,8 @@ GET /twitter/_search?typed_keys "aggregations": { "top_users": { "top_hits": { - "size": 1 + "size": 1, + "_source": ["user", "likes", "message"] } } } @@ -141,9 +142,8 @@ In the response, the aggregations names will be changed to respectively `date_hi "_id": "0", "_score": 1.0, "_source": { - "date": "2009-11-15T14:12:12", - "message": "trying out Elasticsearch", "user": "kimchy", + "message": "trying out Elasticsearch", "likes": 0 } } @@ -167,12 +167,12 @@ request. This is the case for Terms, Significant Terms and Percentiles aggregati also contains information about the type of the targeted field: `lterms` (for a terms aggregation on a Long field), `sigsterms` (for a significant terms aggregation on a String field), `tdigest_percentiles` (for a percentile aggregation based on the TDigest algorithm). - + [[indexing-aggregation-results]] == Indexing aggregation results with {transforms} - -<> enable you to convert existing {es} indices -into summarized indices, which provide opportunities for new insights and -analytics. You can use {transforms} to persistently index your aggregation + +<> enable you to convert existing {es} indices +into summarized indices, which provide opportunities for new insights and +analytics. You can use {transforms} to persistently index your aggregation results into entity-centric indices. diff --git a/docs/reference/docs/get.asciidoc b/docs/reference/docs/get.asciidoc index 90984d53a97f..c820ad1439f5 100644 --- a/docs/reference/docs/get.asciidoc +++ b/docs/reference/docs/get.asciidoc @@ -241,7 +241,11 @@ The API returns the following result: "user" : "kimchy", "date" : "2009-11-15T14:12:12", "likes": 0, - "message" : "trying out Elasticsearch" + "message" : "trying out Elasticsearch", + "location" : { + "city": "Amsterdam", + "country": "Netherlands" + } } } -------------------------------------------------- diff --git a/docs/reference/modules/cross-cluster-search.asciidoc b/docs/reference/modules/cross-cluster-search.asciidoc index 29bed7cbcfc5..5cbd24d7320e 100644 --- a/docs/reference/modules/cross-cluster-search.asciidoc +++ b/docs/reference/modules/cross-cluster-search.asciidoc @@ -76,7 +76,8 @@ GET /cluster_one:twitter/_search "match": { "user": "kimchy" } - } + }, + "_source": ["user", "message", "likes"] } -------------------------------------------------- // TEST[continued] @@ -113,7 +114,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -147,7 +147,8 @@ GET /twitter,cluster_one:twitter,cluster_two:twitter/_search "match": { "user": "kimchy" } - } + }, + "_source": ["user", "message", "likes"] } -------------------------------------------------- // TEST[continued] @@ -184,7 +185,6 @@ The API returns the following response: "_score": 2, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -195,7 +195,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } @@ -206,7 +205,6 @@ The API returns the following response: "_score": 1, "_source": { "user": "kimchy", - "date": "2009-11-15T14:12:12", "message": "trying out Elasticsearch", "likes": 0 } diff --git a/docs/reference/search/search-fields.asciidoc b/docs/reference/search/search-fields.asciidoc index f0d31e86594a..de0d470d9e3e 100644 --- a/docs/reference/search/search-fields.asciidoc +++ b/docs/reference/search/search-fields.asciidoc @@ -4,33 +4,212 @@ By default, each hit in the search response includes the document <>, which is the entire JSON object that was -provided when indexing the document. If you only need certain source fields in -the search response, you can use the <> to -restrict what parts of the source are returned. +provided when indexing the document. To retrieve specific fields in the search +response, you can use the `fields` parameter: -Returning fields using only the document source has some limitations: +[source,console] +---- +POST twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "fields": ["user", "date"], + "_source": false +} +---- +// TEST[setup:twitter] -* The `_source` field does not include <> or -<>. Likewise, a field in the source does not contain -values copied using the <> mapping parameter. -* Since the `_source` is stored as a single field in Lucene, the whole source -object must be loaded and parsed, even if only a small number of fields are -needed. +The `fields` parameter consults both a document's `_source` and the index +mappings to load and return values. Because it makes use of the mappings, +`fields` has some advantages over referencing the `_source` directly: it +accepts <> and <>, and +also formats field values like dates in a consistent way. -To avoid these limitations, you can: +A document's `_source` is stored as a single field in Lucene. So the whole +`_source` object must be loaded and parsed even if only a small number of +fields are requested. To avoid this limitation, you can try another option for +loading fields: * Use the <> parameter to get values for selected fields. This can be a good choice when returning a fairly small number of fields that support doc values, such as keywords and dates. -* Use the <> parameter to get the values for specific stored fields. (Fields that use the <> mapping option.) +* Use the <> parameter to +get the values for specific stored fields (fields that use the +<> mapping option). -You can find more detailed information on each of these methods in the +You can find more detailed information on each of these methods in the following sections: -* <> +* <> * <> * <> +* <> + +[discrete] +[[search-fields-param]] +=== Fields + +The `fields` parameter allows for retrieving a list of document fields in +the search response. It consults both the document `_source` and the index +mappings to return each value in a standardized way that matches its mapping +type. By default, date fields are formatted according to the +<> parameter in their mappings. + +.*Example* +[%collapsible] +==== +The following search request uses the `fields` parameter to retrieve values +for the `user` field, all fields starting with `location.`, and the +`date` field: + +[source,console] +---- +POST twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "fields": [ + "user", + "location.*", <1> + { + "field": "date", + "format": "epoch_millis" <2> + } + ], + "_source": false +} +---- +// TEST[continued] + +<1> Both full field names and wildcard patterns are accepted. +<2> Using object notation, you can pass a `format` parameter to apply a custom + format for the field's values. This is currently supported for + <> and <>, which + accept a <>. + +The values are returned as a flat list in the `fields` section in each hit: + +[source,console-result] +---- +{ + "took" : 2, + "timed_out" : false, + "_shards" : { + "total" : 1, + "successful" : 1, + "skipped" : 0, + "failed" : 0 + }, + "hits" : { + "total" : { + "value" : 1, + "relation" : "eq" + }, + "max_score" : 1.0, + "hits" : [ + { + "_index" : "twitter", + "_id" : "0", + "_score" : 1.0, + "fields" : { + "user" : [ + "kimchy" + ], + "date" : [ + "1258294332000" + ], + "location.city": [ + "Amsterdam" + ], + "location.country": [ + "Netherlands" + ] + } + } + ] + } +} +---- +// TESTRESPONSE[s/"took" : 2/"took": $body.took/] +// TESTRESPONSE[s/"max_score" : 1.0/"max_score" : $body.hits.max_score/] +// TESTRESPONSE[s/"_score" : 1.0/"_score" : $body.hits.hits.0._score/] + +Only leaf fields are returned -- `fields` does not allow for fetching entire +objects. + +==== + +The `fields` parameter handles field types like <> and +<> whose values aren't always present in +the `_source`. Other mapping options are also respected, including +<>, <> and +<>. + +[discrete] +[[docvalue-fields]] +=== Doc value fields + +You can use the <> parameter to return +<> for one or more fields in the search response. + +Doc values store the same values as the `_source` but in an on-disk, +column-based structure that's optimized for sorting and aggregations. Since each +field is stored separately, {es} only reads the field values that were requested +and can avoid loading the whole document `_source`. + +Doc values are stored for supported fields by default. However, doc values are +not supported for <> or +{plugins}/mapper-annotated-text-usage.html[`text_annotated`] fields. + +.*Example* +[%collapsible] +==== +The following search request uses the `docvalue_fields` parameter to retrieve +doc values for the `user` field, all fields starting with `location.`, and the +`date` field: + + +[source,console] +---- +GET twitter/_search +{ + "query": { + "match": { + "message": "elasticsearch" + } + }, + "docvalue_fields": [ + "user", + "location.*", <1> + { + "field": "date", + "format": "epoch_millis" <2> + } + ] +} +---- +// TEST[continued] + +<1> Both full field names and wildcard patterns are accepted. +<2> Using object notation, you can pass a `format` parameter to apply a custom + format for the field's doc values. <> support a + <>. <> support a + https://docs.oracle.com/javase/8/docs/api/java/text/DecimalFormat.html[DecimalFormat + pattern]. Other field datatypes do not support the `format` parameter. +==== + +TIP: You cannot use the `docvalue_fields` parameter to retrieve doc values for +nested objects. If you specify a nested object, the search returns an empty +array (`[ ]`) for the field. To access nested fields, use the +<> parameter's `docvalue_fields` +property. [discrete] [[source-filtering]] @@ -122,7 +301,6 @@ GET /_search ---- ==== - [discrete] [[docvalue-fields]] === Doc value fields @@ -184,7 +362,6 @@ array (`[ ]`) for the field. To access nested fields, use the <> parameter's `docvalue_fields` property. - [discrete] [[stored-fields]] === Stored fields diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 6833db79dfe5..5ff39b788a6b 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -626,7 +626,11 @@ The API returns the following response: "date": "2009-11-15T14:12:12", "likes": 0, "message": "trying out Elasticsearch", - "user": "kimchy" + "user": "kimchy", + "location": { + "city": "Amsterdam", + "country": "Netherlands" + } } } ] @@ -715,7 +719,11 @@ The API returns the following response: "user": "kimchy", "message": "trying out Elasticsearch", "date": "2009-11-15T14:12:12", - "likes": 0 + "likes": 0, + "location": { + "city": "Amsterdam", + "country": "Netherlands" + } } } ]