Skip to content

Commit

Permalink
Merge branch 'main' into fc-cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
elasticmachine committed Sep 23, 2024
2 parents 69a018d + 5750696 commit 4baff96
Show file tree
Hide file tree
Showing 177 changed files with 2,224 additions and 1,232 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
vmParameters = [
'-ea',
'-Djava.security.manager=allow',
'-Djava.locale.providers=SPI,CLDR',
'-Djava.locale.providers=CLDR',
'-Des.nativelibs.path="' + testLibraryPath + '"',
// TODO: only open these for mockito when it is modularized
'--add-opens=java.base/java.security.cert=ALL-UNNAMED',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ public void execute(Task t) {
mkdirs(test.getWorkingDir().toPath().resolve("temp").toFile());

// TODO remove once jvm.options are added to test system properties
test.systemProperty("java.locale.providers", "SPI,CLDR");
test.systemProperty("java.locale.providers", "CLDR");
}
});
test.getJvmArgumentProviders().add(nonInputProperties);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.EsExecutors;
import org.elasticsearch.core.UpdateForV9;

import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -61,7 +60,7 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
"-Dlog4j.shutdownHookEnabled=false",
"-Dlog4j2.disable.jmx=true",
"-Dlog4j2.formatMsgNoLookups=true",
"-Djava.locale.providers=" + getLocaleProviders(),
"-Djava.locale.providers=CLDR",
maybeEnableNativeAccess(),
maybeOverrideDockerCgroup(distroType),
maybeSetActiveProcessorCount(nodeSettings),
Expand All @@ -73,16 +72,6 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
).filter(e -> e.isEmpty() == false).collect(Collectors.toList());
}

@UpdateForV9 // only use CLDR in v9+
private static String getLocaleProviders() {
/*
* Specify SPI to load IsoCalendarDataProvider (see #48209), specifying the first day of week as Monday.
* When on pre-23, use COMPAT instead to maintain existing date formats as much as we can.
* When on JDK 23+, use the default CLDR locale database, as COMPAT was removed in JDK 23.
*/
return Runtime.version().feature() >= 23 ? "SPI,CLDR" : "SPI,COMPAT";
}

/*
* The virtual file /proc/self/cgroup should list the current cgroup
* membership. For each hierarchy, you can follow the cgroup path from
Expand Down
6 changes: 6 additions & 0 deletions docs/changelog/112645.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 112645
summary: Add support for multi-value dimensions
area: Mapping
type: enhancement
issues:
- 110387
5 changes: 5 additions & 0 deletions docs/changelog/112768.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 112768
summary: Deduplicate Kuromoji User Dictionary
area: Search
type: enhancement
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/113172.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 113172
summary: "[ESQL] Add finish() elapsed time to aggregation profiling times"
area: ES|QL
type: enhancement
issues:
- 112950
5 changes: 5 additions & 0 deletions docs/changelog/113280.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113280
summary: Warn for model load failures if they have a status code <500
area: Machine Learning
type: bug
issues: []
8 changes: 7 additions & 1 deletion docs/plugins/analysis-kuromoji.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ unknown words. It can be set to:

Whether punctuation should be discarded from the output. Defaults to `true`.

`lenient`::

Whether the `user_dictionary` should be deduplicated on the provided `text`.
False by default causing duplicates to generate an error.

`user_dictionary`::
+
--
Expand Down Expand Up @@ -221,7 +226,8 @@ PUT kuromoji_sample
"type": "kuromoji_tokenizer",
"mode": "extended",
"discard_punctuation": "false",
"user_dictionary": "userdict_ja.txt"
"user_dictionary": "userdict_ja.txt",
"lenient": "true"
}
},
"analyzer": {
Expand Down
9 changes: 7 additions & 2 deletions docs/plugins/analysis-nori.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,11 @@ It can be set to:

Whether punctuation should be discarded from the output. Defaults to `true`.

`lenient`::

Whether the `user_dictionary` should be deduplicated on the provided `text`.
False by default causing duplicates to generate an error.

`user_dictionary`::
+
--
Expand Down Expand Up @@ -104,7 +109,8 @@ PUT nori_sample
"type": "nori_tokenizer",
"decompound_mode": "mixed",
"discard_punctuation": "false",
"user_dictionary": "userdict_ko.txt"
"user_dictionary": "userdict_ko.txt",
"lenient": "true"
}
},
"analyzer": {
Expand Down Expand Up @@ -299,7 +305,6 @@ Which responds with:
}
--------------------------------------------------


[[analysis-nori-speech]]
==== `nori_part_of_speech` token filter

Expand Down
1 change: 0 additions & 1 deletion docs/reference/mapping/types/keyword.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ index setting limits the number of dimensions in an index.
Dimension fields have the following constraints:

* The `doc_values` and `index` mapping parameters must be `true`.
* Field values cannot be an <<array,array or multi-value>>.
// end::dimension[]
* Dimension values are used to identify a document’s time series. If dimension values are altered in any way during indexing, the document will be stored as belonging to different from intended time series. As a result there are additional constraints:
** The field cannot use a <<normalizer,`normalizer`>>.
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/release-notes/8.15.0.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ To work around this issue, you have a number of options:
<<esql-kibana-enable,disable ES|QL queries in {kib}>>
** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts.

* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or
partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved.
There is no workaround and the only option to is to upgrade to 8.15.2 when released.
+
If you use synthetic source then you may be affected by this bug if the following is true:
** If you have more fields then the `index.mapping.total_fields.limit` setting allows.
** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled.

[[breaking-8.15.0]]
[float]
=== Breaking changes
Expand Down
9 changes: 8 additions & 1 deletion docs/reference/release-notes/8.15.1.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,20 @@ To work around this issue, you have a number of options:
<<esql-kibana-enable,disable ES|QL queries in {kib}>>
** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts.

* Index Stats, Node Stats and Cluster Stats API can return a null pointer exception if an index contains a `dense_vector` field
* Index Stats, Node Stats and Cluster Stats API can return a null pointer exception if an index contains a `dense_vector` field
but there is an index segment that does not contain any documents with a dense vector field ({es-pull}112720[#112720]). Workarounds:
** If the affected index already contains documents with a dense vector field, force merge the index to a single segment.
** If the affected index does not already contain documents with a dense vector field, index a document with a dense vector field
and then force merge to a single segment.
** If the affected index's `dense_vector` fields are unused, reindex without the `dense_vector` fields.

* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or
partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved.
There is no workaround and the only option to is to upgrade to 8.15.2 when released.
+
If you use synthetic source then you may be affected by this bug if the following is true:
** If you have more fields then the `index.mapping.total_fields.limit` setting allows.
** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled.

[[bug-8.15.1]]
[float]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,8 @@ GET /_security/service/elastic/fleet-server
"monitor",
"create_index",
"auto_configure",
"maintenance"
"maintenance",
"view_index_metadata"
],
"allow_restricted_indices": false
},
Expand All @@ -265,7 +266,8 @@ GET /_security/service/elastic/fleet-server
"monitor",
"create_index",
"auto_configure",
"maintenance"
"maintenance",
"view_index_metadata"
],
"allow_restricted_indices": false
}
Expand Down
70 changes: 52 additions & 18 deletions docs/reference/search/retriever.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,43 @@ retrievers) *only* the query element is allowed.
[[standard-retriever-example]]
==== Example

[source,js]
////
[source,console]
----
PUT /restaurants
{
"mappings": {
"properties": {
"region": { "type": "keyword" },
"year": { "type": "keyword" },
"vector": {
"type": "dense_vector",
"dims": 3
}
}
}
}
POST /restaurants/_bulk?refresh
{"index":{}}
{"region": "Austria", "year": "2019", "vector": [10, 22, 77]}
{"index":{}}
{"region": "France", "year": "2019", "vector": [10, 22, 78]}
{"index":{}}
{"region": "Austria", "year": "2020", "vector": [10, 22, 79]}
{"index":{}}
{"region": "France", "year": "2020", "vector": [10, 22, 80]}
----
// TESTSETUP
[source,console]
--------------------------------------------------
DELETE /restaurants
--------------------------------------------------
// TEARDOWN
////

[source,console]
----
GET /restaurants/_search
{
Expand Down Expand Up @@ -109,9 +145,8 @@ GET /restaurants/_search
}
}
----
// NOTCONSOLE
<1> Opens the `retriever` object.
<2> The `standard` retriever is used for definining traditional {es} queries.
<2> The `standard` retriever is used for defining traditional {es} queries.
<3> The entry point for defining the search query.
<4> The `bool` object allows for combining multiple query clauses logically.
<5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score.
Expand Down Expand Up @@ -171,9 +206,9 @@ The parameters `query_vector` and `query_vector_builder` cannot be used together
[[knn-retriever-example]]
==== Example

[source,js]
[source,console]
----
GET my-embeddings/_search
GET /restaurants/_search
{
"retriever": {
"knn": { <1>
Expand All @@ -185,8 +220,7 @@ GET my-embeddings/_search
}
}
----
// NOTCONSOLE

// TEST[continued]
<1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity.
<2> Specifies the field name that contains the vectors.
<3> The query vector against which document vectors are compared in the `knn` search.
Expand Down Expand Up @@ -223,7 +257,7 @@ the retriever tree.

A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF:

[source,js]
[source,console]
----
GET /restaurants/_search
{
Expand All @@ -234,7 +268,7 @@ GET /restaurants/_search
"standard": { <3>
"query": {
"multi_match": {
"query": "San Francisco",
"query": "Austria",
"fields": [
"city",
"region"
Expand All @@ -258,7 +292,7 @@ GET /restaurants/_search
}
}
----
// NOTCONSOLE
// TEST[continued]
<1> Defines a retriever tree with an RRF retriever.
<2> The sub-retriever array.
<3> The first sub-retriever is a `standard` retriever.
Expand All @@ -272,7 +306,7 @@ GET /restaurants/_search

A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF:

[source,js]
[source,console]
----
GET movies/_search
{
Expand Down Expand Up @@ -316,7 +350,7 @@ GET movies/_search
}
}
----
// NOTCONSOLE
// TEST[skip:uses ELSER]

[[text-similarity-reranker-retriever]]
==== Text Similarity Re-ranker Retriever
Expand Down Expand Up @@ -390,7 +424,7 @@ A text similarity re-ranker retriever is a compound retriever. Child retrievers
This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents.
This requires a <<infer-service-cohere,Cohere Rerank inference endpoint>> using the `rerank` task type.

[source,js]
[source,console]
----
GET /index/_search
{
Expand All @@ -414,7 +448,7 @@ GET /index/_search
}
}
----
// NOTCONSOLE
// TEST[skip:uses ML]

[discrete]
[[text-similarity-reranker-retriever-example-eland]]
Expand Down Expand Up @@ -452,7 +486,7 @@ eland_import_hub_model \
+
. Create an inference endpoint for the `rerank` task
+
[source,js]
[source,console]
----
PUT _inference/rerank/my-msmarco-minilm-model
{
Expand All @@ -464,11 +498,11 @@ PUT _inference/rerank/my-msmarco-minilm-model
}
}
----
// NOTCONSOLE
// TEST[skip:uses ML]
+
. Define a `text_similarity_rerank` retriever.
+
[source,js]
[source,console]
----
POST movies/_search
{
Expand All @@ -490,7 +524,7 @@ POST movies/_search
}
}
----
// NOTCONSOLE
// TEST[skip:uses ML]
+
This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama".
It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}.
Expand Down
4 changes: 4 additions & 0 deletions modules/aggregations/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -45,3 +45,7 @@ dependencies {
compileOnly(project(':modules:lang-painless:spi'))
clusterModules(project(':modules:lang-painless'))
}

tasks.named("yamlRestCompatTestTransform").configure({ task ->
task.skipTest("aggregations/date_agg_per_day_of_week/Date aggregartion per day of week", "week-date behaviour has changed")
})
Loading

0 comments on commit 4baff96

Please sign in to comment.