Merge branch 'main' into fc-cleanup

original-brownbear · Sep 23, 2024 · 4baff96 · 4baff96
2 parents 69a018d + 5750696
commit 4baff96
Show file tree

Hide file tree

Showing 177 changed files with 2,224 additions and 1,232 deletions.
diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle
@@ -168,7 +168,7 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
  vmParameters = [
  '-ea',
  '-Djava.security.manager=allow',
- '-Djava.locale.providers=SPI,CLDR',
+ '-Djava.locale.providers=CLDR',
  '-Des.nativelibs.path="' + testLibraryPath + '"',
  // TODO: only open these for mockito when it is modularized
  '--add-opens=java.base/java.security.cert=ALL-UNNAMED',

diff --git a/...internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java b/...internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java
@@ -93,7 +93,7 @@ public void execute(Task t) {
  mkdirs(test.getWorkingDir().toPath().resolve("temp").toFile());
 
  // TODO remove once jvm.options are added to test system properties
- test.systemProperty("java.locale.providers", "SPI,CLDR");
+ test.systemProperty("java.locale.providers", "CLDR");
  }
  });
  test.getJvmArgumentProviders().add(nonInputProperties);

diff --git a/...ibution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/...ibution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java
@@ -11,7 +11,6 @@
 
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.util.concurrent.EsExecutors;
-import org.elasticsearch.core.UpdateForV9;
 
 import java.util.List;
 import java.util.Map;
@@ -61,7 +60,7 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
  "-Dlog4j.shutdownHookEnabled=false",
  "-Dlog4j2.disable.jmx=true",
  "-Dlog4j2.formatMsgNoLookups=true",
- "-Djava.locale.providers=" + getLocaleProviders(),
+ "-Djava.locale.providers=CLDR",
  maybeEnableNativeAccess(),
  maybeOverrideDockerCgroup(distroType),
  maybeSetActiveProcessorCount(nodeSettings),
@@ -73,16 +72,6 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
  ).filter(e -> e.isEmpty() == false).collect(Collectors.toList());
  }
 
- @UpdateForV9 // only use CLDR in v9+
- private static String getLocaleProviders() {
- /*
- * Specify SPI to load IsoCalendarDataProvider (see #48209), specifying the first day of week as Monday.
- * When on pre-23, use COMPAT instead to maintain existing date formats as much as we can.
- * When on JDK 23+, use the default CLDR locale database, as COMPAT was removed in JDK 23.
- */
- return Runtime.version().feature() >= 23 ? "SPI,CLDR" : "SPI,COMPAT";
- }
-
  /*
  * The virtual file /proc/self/cgroup should list the current cgroup
  * membership. For each hierarchy, you can follow the cgroup path from

diff --git a/docs/changelog/112645.yaml b/docs/changelog/112645.yaml
@@ -0,0 +1,6 @@
+pr: 112645
+summary: Add support for multi-value dimensions
+area: Mapping
+type: enhancement
+issues:
+ - 110387
diff --git a/docs/changelog/112768.yaml b/docs/changelog/112768.yaml
@@ -0,0 +1,5 @@
+pr: 112768
+summary: Deduplicate Kuromoji User Dictionary
+area: Search
+type: enhancement
+issues: []
diff --git a/docs/changelog/113172.yaml b/docs/changelog/113172.yaml
@@ -0,0 +1,6 @@
+pr: 113172
+summary: "[ESQL] Add finish() elapsed time to aggregation profiling times"
+area: ES|QL
+type: enhancement
+issues:
+ - 112950
diff --git a/docs/changelog/113280.yaml b/docs/changelog/113280.yaml
@@ -0,0 +1,5 @@
+pr: 113280
+summary: Warn for model load failures if they have a status code <500
+area: Machine Learning
+type: bug
+issues: []
diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc
@@ -133,6 +133,11 @@ unknown words. It can be set to:
 
  Whether punctuation should be discarded from the output. Defaults to `true`.
 
+`lenient`::
+
+ Whether the `user_dictionary` should be deduplicated on the provided `text`.
+ False by default causing duplicates to generate an error.
+
 `user_dictionary`::
 +
 --
@@ -221,7 +226,8 @@ PUT kuromoji_sample
  "type": "kuromoji_tokenizer",
  "mode": "extended",
  "discard_punctuation": "false",
- "user_dictionary": "userdict_ja.txt"
+ "user_dictionary": "userdict_ja.txt",
+ "lenient": "true"
  }
  },
  "analyzer": {

diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc
@@ -58,6 +58,11 @@ It can be set to:
 
  Whether punctuation should be discarded from the output. Defaults to `true`.
 
+`lenient`::
+
+ Whether the `user_dictionary` should be deduplicated on the provided `text`.
+ False by default causing duplicates to generate an error.
+
 `user_dictionary`::
 +
 --
@@ -104,7 +109,8 @@ PUT nori_sample
  "type": "nori_tokenizer",
  "decompound_mode": "mixed",
  "discard_punctuation": "false",
- "user_dictionary": "userdict_ko.txt"
+ "user_dictionary": "userdict_ko.txt",
+ "lenient": "true"
  }
  },
  "analyzer": {
@@ -299,7 +305,6 @@ Which responds with:
 }
 --------------------------------------------------
 
-
 [[analysis-nori-speech]]
 ==== `nori_part_of_speech` token filter
 

diff --git a/docs/reference/mapping/types/keyword.asciidoc b/docs/reference/mapping/types/keyword.asciidoc
@@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
 Dimension fields have the following constraints:
 
 * The `doc_values` and `index` mapping parameters must be `true`.
-* Field values cannot be an <<array,array or multi-value>>.
 // end::dimension[]
 * Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
 ** The field cannot use a <<normalizer,`normalizer`>>.

diff --git a/docs/reference/release-notes/8.15.0.asciidoc b/docs/reference/release-notes/8.15.0.asciidoc
@@ -44,6 +44,14 @@ To work around this issue, you have a number of options:
 <<esql-kibana-enable,disable ES|QL queries in {kib}>>
 ** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts.
 
+* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or
+partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved.
+There is no workaround and the only option to is to upgrade to 8.15.2 when released.
++
+If you use synthetic source then you may be affected by this bug if the following is true:
+** If you have more fields then the `index.mapping.total_fields.limit` setting allows.
+** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled.
+
 [[breaking-8.15.0]]
 [float]
 === Breaking changes

diff --git a/docs/reference/release-notes/8.15.1.asciidoc b/docs/reference/release-notes/8.15.1.asciidoc
@@ -24,13 +24,20 @@ To work around this issue, you have a number of options:
 <<esql-kibana-enable,disable ES|QL queries in {kib}>>
 ** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts.
 
-* Index Stats, Node Stats and Cluster Stats API can return a null pointer exception if an index contains a `dense_vector` field 
+* Index Stats, Node Stats and Cluster Stats API can return a null pointer exception if an index contains a `dense_vector` field
 but there is an index segment that does not contain any documents with a dense vector field ({es-pull}112720[#112720]). Workarounds:
 ** If the affected index already contains documents with a dense vector field, force merge the index to a single segment.
 ** If the affected index does not already contain documents with a dense vector field, index a document with a dense vector field
 and then force merge to a single segment.
 ** If the affected index's `dense_vector` fields are unused, reindex without the `dense_vector` fields.
 
+* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or
+partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved.
+There is no workaround and the only option to is to upgrade to 8.15.2 when released.
++
+If you use synthetic source then you may be affected by this bug if the following is true:
+** If you have more fields then the `index.mapping.total_fields.limit` setting allows.
+** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled.
 
 [[bug-8.15.1]]
 [float]

diff --git a/docs/reference/rest-api/security/get-service-accounts.asciidoc b/docs/reference/rest-api/security/get-service-accounts.asciidoc
@@ -250,7 +250,8 @@ GET /_security/service/elastic/fleet-server
  "monitor",
  "create_index",
  "auto_configure",
- "maintenance"
+ "maintenance",
+ "view_index_metadata"
  ],
  "allow_restricted_indices": false
  },
@@ -265,7 +266,8 @@ GET /_security/service/elastic/fleet-server
  "monitor",
  "create_index",
  "auto_configure",
- "maintenance"
+ "maintenance",
+ "view_index_metadata"
  ],
  "allow_restricted_indices": false
  }

diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc
@@ -81,7 +81,43 @@ retrievers) *only* the query element is allowed.
 [[standard-retriever-example]]
 ==== Example
 
-[source,js]
+////
+[source,console]
+----
+PUT /restaurants
+{
+ "mappings": {
+ "properties": {
+ "region": { "type": "keyword" },
+ "year": { "type": "keyword" },
+ "vector": {
+ "type": "dense_vector",
+ "dims": 3
+ }
+ }
+ }
+}
+
+POST /restaurants/_bulk?refresh
+{"index":{}}
+{"region": "Austria", "year": "2019", "vector": [10, 22, 77]}
+{"index":{}}
+{"region": "France", "year": "2019", "vector": [10, 22, 78]}
+{"index":{}}
+{"region": "Austria", "year": "2020", "vector": [10, 22, 79]}
+{"index":{}}
+{"region": "France", "year": "2020", "vector": [10, 22, 80]}
+----
+// TESTSETUP
+
+[source,console]
+--------------------------------------------------
+DELETE /restaurants
+--------------------------------------------------
+// TEARDOWN
+////
+
+[source,console]
 ----
 GET /restaurants/_search
 {
@@ -109,9 +145,8 @@ GET /restaurants/_search
  }
 }
 ----
-// NOTCONSOLE
 <1> Opens the `retriever` object.
-<2> The `standard` retriever is used for definining traditional {es} queries.
+<2> The `standard` retriever is used for defining traditional {es} queries.
 <3> The entry point for defining the search query.
 <4> The `bool` object allows for combining multiple query clauses logically.
 <5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score.
@@ -171,9 +206,9 @@ The parameters `query_vector` and `query_vector_builder` cannot be used together
 [[knn-retriever-example]]
 ==== Example
 
-[source,js]
+[source,console]
 ----
-GET my-embeddings/_search
+GET /restaurants/_search
 {
  "retriever": {
  "knn": { <1>
@@ -185,8 +220,7 @@ GET my-embeddings/_search
  }
 }
 ----
-// NOTCONSOLE
-
+// TEST[continued]
 <1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity.
 <2> Specifies the field name that contains the vectors.
 <3> The query vector against which document vectors are compared in the `knn` search.
@@ -223,7 +257,7 @@ the retriever tree.
 
 A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF:
 
-[source,js]
+[source,console]
 ----
 GET /restaurants/_search
 {
@@ -234,7 +268,7 @@ GET /restaurants/_search
  "standard": { <3>
  "query": {
  "multi_match": {
- "query": "San Francisco",
+ "query": "Austria",
  "fields": [
  "city",
  "region"
@@ -258,7 +292,7 @@ GET /restaurants/_search
  }
 }
 ----
-// NOTCONSOLE
+// TEST[continued]
 <1> Defines a retriever tree with an RRF retriever.
 <2> The sub-retriever array.
 <3> The first sub-retriever is a `standard` retriever.
@@ -272,7 +306,7 @@ GET /restaurants/_search
 
 A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF:
 
-[source,js]
+[source,console]
 ----
 GET movies/_search
 {
@@ -316,7 +350,7 @@ GET movies/_search
  }
 }
 ----
-// NOTCONSOLE
+// TEST[skip:uses ELSER]
 
 [[text-similarity-reranker-retriever]]
 ==== Text Similarity Re-ranker Retriever
@@ -390,7 +424,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers
 This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents.
 This requires a <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type.
 
-[source,js]
+[source,console]
 ----
 GET /index/_search
 {
@@ -414,7 +448,7 @@ GET /index/_search
  }
 }
 ----
-// NOTCONSOLE
+// TEST[skip:uses ML]
 
 [discrete]
 [[text-similarity-reranker-retriever-example-eland]]
@@ -452,7 +486,7 @@ eland_import_hub_model \
 +
 . Create an inference endpoint for the `rerank` task
 +
-[source,js]
+[source,console]
 ----
 PUT _inference/rerank/my-msmarco-minilm-model
 {
@@ -464,11 +498,11 @@ PUT _inference/rerank/my-msmarco-minilm-model
  }
 }
 ----
-// NOTCONSOLE
+// TEST[skip:uses ML]
 +
 . Define a `text_similarity_rerank` retriever.
 +
-[source,js]
+[source,console]
 ----
 POST movies/_search
 {
@@ -490,7 +524,7 @@ POST movies/_search
  }
 }
 ----
-// NOTCONSOLE
+// TEST[skip:uses ML]
 +
 This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama".
 It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}.

diff --git a/modules/aggregations/build.gradle b/modules/aggregations/build.gradle
@@ -45,3 +45,7 @@ dependencies {
  compileOnly(project(':modules:lang-painless:spi'))
  clusterModules(project(':modules:lang-painless'))
 }
+
+tasks.named("yamlRestCompatTestTransform").configure({ task ->
+ task.skipTest("aggregations/date_agg_per_day_of_week/Date aggregartion per day of week", "week-date behaviour has changed")
+})