diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index 0c0ede8c3a076..a886220c84cda 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -109,3 +109,11 @@ EOF Agent information from gobld EOF fi + +# Amazon Linux 2 has DNS resolution issues with resource-based hostnames in EC2 +# We have many functional tests that try to lookup and resolve the hostname of the local machine in a particular way +# And they fail. This sets up a manual entry for the hostname in dnsmasq. +if [[ -f /etc/os-release ]] && grep -q '"Amazon Linux 2"' /etc/os-release; then + echo "$(hostname -i | cut -d' ' -f 2) $(hostname -f)." | sudo tee /etc/dnsmasq.hosts + sudo systemctl restart dnsmasq.service +fi diff --git a/.buildkite/pipelines/intake.template.yml b/.buildkite/pipelines/intake.template.yml index 1a513971b2c10..f530f237113a9 100644 --- a/.buildkite/pipelines/intake.template.yml +++ b/.buildkite/pipelines/intake.template.yml @@ -7,7 +7,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -17,7 +16,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -26,7 +24,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -35,7 +32,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -44,7 +40,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -53,7 +48,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -67,7 +61,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -78,7 +71,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: elasticsearch-dra-workflow label: Trigger DRA snapshot workflow diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index a6af8bd35c7a0..e44a1e67e9d59 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -8,7 +8,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -18,7 +17,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -27,7 +25,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -36,7 +33,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -45,7 +41,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -54,7 +49,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -68,7 +62,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -79,7 +72,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: elasticsearch-dra-workflow label: Trigger DRA snapshot workflow diff --git a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml index 1f69b8faa7ab4..8cf2a8aacbece 100644 --- a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml +++ b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml @@ -15,7 +15,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: "elasticsearch-lucene-snapshot-tests" build: diff --git a/.buildkite/pipelines/lucene-snapshot/run-tests.yml b/.buildkite/pipelines/lucene-snapshot/run-tests.yml index 49c3396488d82..c76c54a56494e 100644 --- a/.buildkite/pipelines/lucene-snapshot/run-tests.yml +++ b/.buildkite/pipelines/lucene-snapshot/run-tests.yml @@ -7,7 +7,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait: null - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -17,7 +16,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -26,7 +24,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -35,7 +32,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -44,7 +40,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -53,7 +48,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -70,7 +64,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -81,4 +74,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/periodic-packaging.bwc.template.yml b/.buildkite/pipelines/periodic-packaging.bwc.template.yml index 8a6fa2553b204..b06bc80d3535d 100644 --- a/.buildkite/pipelines/periodic-packaging.bwc.template.yml +++ b/.buildkite/pipelines/periodic-packaging.bwc.template.yml @@ -11,6 +11,5 @@ image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 115873552e056..ac207fca5e3ed 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -44,7 +44,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.0.1 @@ -61,7 +60,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.1.3 @@ -78,7 +76,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.2.3 @@ -95,7 +92,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.3.3 @@ -112,7 +108,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.4.3 @@ -129,7 +124,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.5.3 @@ -146,7 +140,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.6.2 @@ -163,7 +156,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.7.1 @@ -180,7 +172,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.8.2 @@ -197,7 +188,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.9.2 @@ -214,7 +204,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.10.4 @@ -231,7 +220,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.11.4 @@ -248,7 +236,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.12.2 @@ -265,7 +252,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.13.4 @@ -282,7 +268,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.14.3 @@ -299,7 +284,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.15.3 @@ -316,7 +300,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.16.0 @@ -333,7 +316,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 9.0.0 diff --git a/.buildkite/pipelines/periodic-platform-support.yml b/.buildkite/pipelines/periodic-platform-support.yml index 29feb5b832ee2..86e0623ba5b87 100644 --- a/.buildkite/pipelines/periodic-platform-support.yml +++ b/.buildkite/pipelines/periodic-platform-support.yml @@ -28,7 +28,6 @@ steps: localSsds: 1 localSsdInterface: nvme machineType: custom-32-98304 - diskSizeGb: 250 env: {} - group: platform-support-windows steps: diff --git a/.buildkite/pipelines/periodic.bwc.template.yml b/.buildkite/pipelines/periodic.bwc.template.yml index b22270dbf221c..43a0a7438d656 100644 --- a/.buildkite/pipelines/periodic.bwc.template.yml +++ b/.buildkite/pipelines/periodic.bwc.template.yml @@ -7,7 +7,6 @@ machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION retry: diff --git a/.buildkite/pipelines/periodic.template.yml b/.buildkite/pipelines/periodic.template.yml index 5048916a9cac9..201c34058a409 100644 --- a/.buildkite/pipelines/periodic.template.yml +++ b/.buildkite/pipelines/periodic.template.yml @@ -25,7 +25,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: example-plugins command: |- cd $$WORKSPACE/plugins/examples @@ -37,7 +36,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: java-fips-matrix steps: - label: "{{matrix.ES_RUNTIME_JAVA}} / {{matrix.GRADLE_TASK}} / java-fips-matrix" @@ -59,7 +57,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -69,14 +66,13 @@ steps: matrix: setup: ES_RUNTIME_JAVA: - - openjdk17 + - openjdk21 BWC_VERSION: $BWC_LIST agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -102,7 +98,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -120,7 +115,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -156,7 +150,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / azure command: | export azure_storage_container=elasticsearch-ci-thirdparty @@ -171,7 +164,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / gcs command: | export google_storage_bucket=elasticsearch-ci-thirdparty @@ -186,7 +178,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / geoip command: | .ci/scripts/run-gradle.sh :modules:ingest-geoip:internalClusterTest -Dtests.jvm.argline="-Dgeoip_use_service=true" @@ -196,7 +187,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / s3 command: | export amazon_s3_bucket=elasticsearch-ci.us-west-2 @@ -211,7 +201,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: Upload Snyk Dependency Graph command: .ci/scripts/run-gradle.sh uploadSnykDependencyGraph -PsnykTargetReference=$BUILDKITE_BRANCH env: @@ -222,8 +211,7 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - if: build.branch == "main" || build.branch == "7.17" + if: build.branch == "main" || build.branch == "8.x" || build.branch == "7.17" - label: check-branch-consistency command: .ci/scripts/run-gradle.sh branchConsistency timeout_in_minutes: 15 @@ -231,7 +219,6 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-2 - diskSizeGb: 250 - label: check-branch-protection-rules command: .buildkite/scripts/branch-protection.sh timeout_in_minutes: 5 diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index fa7e84fae160b..cbca7f820c7b7 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -11,7 +11,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.0.1 retry: @@ -31,7 +30,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.1.3 retry: @@ -51,7 +49,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.2.3 retry: @@ -71,7 +68,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.3.3 retry: @@ -91,7 +87,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.4.3 retry: @@ -111,7 +106,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.5.3 retry: @@ -131,7 +125,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.6.2 retry: @@ -151,7 +144,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.7.1 retry: @@ -171,7 +163,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.8.2 retry: @@ -191,7 +182,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.9.2 retry: @@ -211,7 +201,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.10.4 retry: @@ -231,7 +220,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.11.4 retry: @@ -251,7 +239,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.12.2 retry: @@ -271,7 +258,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.13.4 retry: @@ -291,7 +277,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.14.3 retry: @@ -311,7 +296,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.15.3 retry: @@ -331,7 +315,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.16.0 retry: @@ -351,7 +334,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 9.0.0 retry: @@ -386,7 +368,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: example-plugins command: |- cd $$WORKSPACE/plugins/examples @@ -398,7 +379,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: java-fips-matrix steps: - label: "{{matrix.ES_RUNTIME_JAVA}} / {{matrix.GRADLE_TASK}} / java-fips-matrix" @@ -420,7 +400,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -430,14 +409,13 @@ steps: matrix: setup: ES_RUNTIME_JAVA: - - openjdk17 + - openjdk21 BWC_VERSION: ["8.15.3", "8.16.0", "9.0.0"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -463,7 +441,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -481,7 +458,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -517,7 +493,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / azure command: | export azure_storage_container=elasticsearch-ci-thirdparty @@ -532,7 +507,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / gcs command: | export google_storage_bucket=elasticsearch-ci-thirdparty @@ -547,7 +521,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / geoip command: | .ci/scripts/run-gradle.sh :modules:ingest-geoip:internalClusterTest -Dtests.jvm.argline="-Dgeoip_use_service=true" @@ -557,7 +530,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / s3 command: | export amazon_s3_bucket=elasticsearch-ci.us-west-2 @@ -572,7 +544,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: Upload Snyk Dependency Graph command: .ci/scripts/run-gradle.sh uploadSnykDependencyGraph -PsnykTargetReference=$BUILDKITE_BRANCH env: @@ -583,7 +554,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 if: build.branch == "main" || build.branch == "7.17" - label: check-branch-consistency command: .ci/scripts/run-gradle.sh branchConsistency @@ -592,7 +562,6 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-2 - diskSizeGb: 250 - label: check-branch-protection-rules command: .buildkite/scripts/branch-protection.sh timeout_in_minutes: 5 diff --git a/.buildkite/pipelines/pull-request/build-benchmark.yml b/.buildkite/pipelines/pull-request/build-benchmark.yml index 96330bee03638..8d3215b8393ce 100644 --- a/.buildkite/pipelines/pull-request/build-benchmark.yml +++ b/.buildkite/pipelines/pull-request/build-benchmark.yml @@ -22,4 +22,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml index 8f59e593b286f..5a9fc2d938ac0 100644 --- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml +++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml @@ -18,4 +18,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/cloud-deploy.yml b/.buildkite/pipelines/pull-request/cloud-deploy.yml index 2932f874c5cf8..ce8e8206d51ff 100644 --- a/.buildkite/pipelines/pull-request/cloud-deploy.yml +++ b/.buildkite/pipelines/pull-request/cloud-deploy.yml @@ -11,4 +11,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/docs-check.yml b/.buildkite/pipelines/pull-request/docs-check.yml index 3bf1e43697a7c..2201eb2d1e4ea 100644 --- a/.buildkite/pipelines/pull-request/docs-check.yml +++ b/.buildkite/pipelines/pull-request/docs-check.yml @@ -12,4 +12,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/eql-correctness.yml b/.buildkite/pipelines/pull-request/eql-correctness.yml index d85827d10e886..8f7ca6942c0e9 100644 --- a/.buildkite/pipelines/pull-request/eql-correctness.yml +++ b/.buildkite/pipelines/pull-request/eql-correctness.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/example-plugins.yml b/.buildkite/pipelines/pull-request/example-plugins.yml index fb4a17fb214cb..18d0de6594980 100644 --- a/.buildkite/pipelines/pull-request/example-plugins.yml +++ b/.buildkite/pipelines/pull-request/example-plugins.yml @@ -16,4 +16,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/full-bwc.yml b/.buildkite/pipelines/pull-request/full-bwc.yml index c404069bd0e60..d3fa8eccaf7d9 100644 --- a/.buildkite/pipelines/pull-request/full-bwc.yml +++ b/.buildkite/pipelines/pull-request/full-bwc.yml @@ -13,4 +13,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml b/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml index 970dafbb28647..c62cf23310422 100644 --- a/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml +++ b/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml @@ -18,6 +18,5 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION diff --git a/.buildkite/pipelines/pull-request/part-1-fips.yml b/.buildkite/pipelines/pull-request/part-1-fips.yml index 99544e7f5a80b..42f930c1bde9a 100644 --- a/.buildkite/pipelines/pull-request/part-1-fips.yml +++ b/.buildkite/pipelines/pull-request/part-1-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-1.yml b/.buildkite/pipelines/pull-request/part-1.yml index b4b9d5469ec41..3d467c6c41e43 100644 --- a/.buildkite/pipelines/pull-request/part-1.yml +++ b/.buildkite/pipelines/pull-request/part-1.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-2-fips.yml b/.buildkite/pipelines/pull-request/part-2-fips.yml index 36a9801547d78..6a3647ceb50ae 100644 --- a/.buildkite/pipelines/pull-request/part-2-fips.yml +++ b/.buildkite/pipelines/pull-request/part-2-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-2.yml b/.buildkite/pipelines/pull-request/part-2.yml index 12bd78cf895fd..43de69bbcd945 100644 --- a/.buildkite/pipelines/pull-request/part-2.yml +++ b/.buildkite/pipelines/pull-request/part-2.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-3-fips.yml b/.buildkite/pipelines/pull-request/part-3-fips.yml index 4a2df3026e782..cee3ea153acb9 100644 --- a/.buildkite/pipelines/pull-request/part-3-fips.yml +++ b/.buildkite/pipelines/pull-request/part-3-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-3.yml b/.buildkite/pipelines/pull-request/part-3.yml index 6991c05da85c6..12abae7634822 100644 --- a/.buildkite/pipelines/pull-request/part-3.yml +++ b/.buildkite/pipelines/pull-request/part-3.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-4-fips.yml b/.buildkite/pipelines/pull-request/part-4-fips.yml index 734f8af816895..11a50456ca4c0 100644 --- a/.buildkite/pipelines/pull-request/part-4-fips.yml +++ b/.buildkite/pipelines/pull-request/part-4-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-4.yml b/.buildkite/pipelines/pull-request/part-4.yml index 59f2f2898a590..af11f08953d07 100644 --- a/.buildkite/pipelines/pull-request/part-4.yml +++ b/.buildkite/pipelines/pull-request/part-4.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-5-fips.yml b/.buildkite/pipelines/pull-request/part-5-fips.yml index 801b812bb99c0..4e193ac751086 100644 --- a/.buildkite/pipelines/pull-request/part-5-fips.yml +++ b/.buildkite/pipelines/pull-request/part-5-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-5.yml b/.buildkite/pipelines/pull-request/part-5.yml index c7e50631d1cdd..306ce7533d0ed 100644 --- a/.buildkite/pipelines/pull-request/part-5.yml +++ b/.buildkite/pipelines/pull-request/part-5.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/precommit.yml b/.buildkite/pipelines/pull-request/precommit.yml index 8d1458b1b60c8..f6548dfeed9b2 100644 --- a/.buildkite/pipelines/pull-request/precommit.yml +++ b/.buildkite/pipelines/pull-request/precommit.yml @@ -10,4 +10,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/rest-compatibility.yml b/.buildkite/pipelines/pull-request/rest-compatibility.yml index 16144a2a0780f..a69810e23d960 100644 --- a/.buildkite/pipelines/pull-request/rest-compatibility.yml +++ b/.buildkite/pipelines/pull-request/rest-compatibility.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/validate-changelogs.yml b/.buildkite/pipelines/pull-request/validate-changelogs.yml index 296ef11637118..9451d321a9b39 100644 --- a/.buildkite/pipelines/pull-request/validate-changelogs.yml +++ b/.buildkite/pipelines/pull-request/validate-changelogs.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java index 284324b3d9206..b8f0a11e21c8f 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/index/codec/tsdb/internal/DecodeBenchmark.java @@ -12,6 +12,7 @@ import org.apache.lucene.store.ByteArrayDataInput; import org.apache.lucene.store.ByteArrayDataOutput; import org.apache.lucene.store.DataOutput; +import org.elasticsearch.index.codec.tsdb.DocValuesForUtil; import org.openjdk.jmh.infra.Blackhole; import java.io.IOException; @@ -43,7 +44,7 @@ public void setupInvocation(int bitsPerValue) { @Override public void benchmark(int bitsPerValue, Blackhole bh) throws IOException { - forUtil.decode(bitsPerValue, this.dataInput, this.output); + DocValuesForUtil.decode(bitsPerValue, this.dataInput, this.output); bh.consume(this.output); } } diff --git a/docs/changelog/111852.yaml b/docs/changelog/111852.yaml new file mode 100644 index 0000000000000..c043cab43ebbd --- /dev/null +++ b/docs/changelog/111852.yaml @@ -0,0 +1,5 @@ +pr: 111852 +summary: Add DeBERTa-V2/V3 tokenizer +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/113216.yaml b/docs/changelog/113216.yaml new file mode 100644 index 0000000000000..dec0b991fdacf --- /dev/null +++ b/docs/changelog/113216.yaml @@ -0,0 +1,10 @@ +pr: 113216 +summary: "[Inference API] Deprecate elser service" +area: Machine Learning +type: deprecation +issues: [] +deprecation: + title: "[Inference API] Deprecate elser service" + area: REST API + details: The `elser` service of the inference API will be removed in an upcoming release. Please use the elasticsearch service instead. + impact: In the current version there is no impact. In a future version, users of the `elser` service will no longer be able to use it, and will be required to use the `elasticsearch` service to access elser through the inference API. diff --git a/docs/changelog/113498.yaml b/docs/changelog/113498.yaml new file mode 100644 index 0000000000000..93b21a1d171eb --- /dev/null +++ b/docs/changelog/113498.yaml @@ -0,0 +1,5 @@ +pr: 113498 +summary: Listing all available databases in the _ingest/geoip/database API +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/113561.yaml b/docs/changelog/113561.yaml new file mode 100644 index 0000000000000..d00eac7685bcc --- /dev/null +++ b/docs/changelog/113561.yaml @@ -0,0 +1,5 @@ +pr: 113561 +summary: Add link to Circuit Breaker "Data too large" exception message +area: Infra/Circuit Breakers +type: enhancement +issues: [] diff --git a/docs/changelog/113690.yaml b/docs/changelog/113690.yaml new file mode 100644 index 0000000000000..bd5f1245f471e --- /dev/null +++ b/docs/changelog/113690.yaml @@ -0,0 +1,5 @@ +pr: 113690 +summary: Add object param for keeping synthetic source +area: Mapping +type: enhancement +issues: [] diff --git a/docs/changelog/113910.yaml b/docs/changelog/113910.yaml new file mode 100644 index 0000000000000..aa9d3b61fe768 --- /dev/null +++ b/docs/changelog/113910.yaml @@ -0,0 +1,5 @@ +pr: 113910 +summary: Do not expand dots when storing objects in ignored source +area: Logs +type: bug +issues: [] diff --git a/docs/changelog/113911.yaml b/docs/changelog/113911.yaml new file mode 100644 index 0000000000000..5c2f93a6ea76a --- /dev/null +++ b/docs/changelog/113911.yaml @@ -0,0 +1,5 @@ +pr: 113911 +summary: Enable OpenAI Streaming +area: Machine Learning +type: enhancement +issues: [] diff --git a/docs/changelog/113988.yaml b/docs/changelog/113988.yaml new file mode 100644 index 0000000000000..d55e7eb2db326 --- /dev/null +++ b/docs/changelog/113988.yaml @@ -0,0 +1,5 @@ +pr: 113988 +summary: Track search and fetch failure stats +area: Stats +type: enhancement +issues: [] diff --git a/docs/changelog/113989.yaml b/docs/changelog/113989.yaml new file mode 100644 index 0000000000000..7bf50b52d9e07 --- /dev/null +++ b/docs/changelog/113989.yaml @@ -0,0 +1,5 @@ +pr: 113989 +summary: Add `max_multipart_parts` setting to S3 repository +area: Snapshot/Restore +type: enhancement +issues: [] diff --git a/docs/reference/esql/esql-query-api.asciidoc b/docs/reference/esql/esql-query-api.asciidoc index c8c735b73d2a4..d1db21043a5b5 100644 --- a/docs/reference/esql/esql-query-api.asciidoc +++ b/docs/reference/esql/esql-query-api.asciidoc @@ -79,6 +79,8 @@ For syntax, refer to <>. (Optional, boolean) If provided and `true` the response will include an extra `profile` object with information about how the query was executed. It provides insight into the performance of each part of the query. This is for human debugging as the object's format might change at any time. +Think of this like https://www.postgresql.org/docs/current/sql-explain.html[EXPLAIN ANALYZE] or +https://en.wikipedia.org/wiki/Query_plan[EXPLAIN PLAN]. `query`:: (Required, string) {esql} query to run. For syntax, refer to <>. @@ -109,4 +111,6 @@ Values for the search results. `profile`:: (object) Profile describing the execution of the query. Only returned if `profile` was sent in the body. -The object itself is for human debugging and can change at any time. +The object itself is for human debugging and can change at any time. Think of this like +https://www.postgresql.org/docs/current/sql-explain.html[EXPLAIN ANALYZE] or +https://en.wikipedia.org/wiki/Query_plan[EXPLAIN PLAN]. diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc index ccea38cf602da..902b6c26611e5 100644 --- a/docs/reference/mapping/fields/synthetic-source.asciidoc +++ b/docs/reference/mapping/fields/synthetic-source.asciidoc @@ -32,18 +32,25 @@ space. Additional latency can be avoided by not loading `_source` field in queri [[synthetic-source-fields]] ===== Supported fields -Synthetic `_source` is supported by all field types. Depending on implementation details, field types have different properties when used with synthetic `_source`. +Synthetic `_source` is supported by all field types. Depending on implementation details, field types have different +properties when used with synthetic `_source`. -<> construct synthetic `_source` using existing data, most commonly <> and <>. For these field types, no additional space is needed to store the contents of `_source` field. Due to the storage layout of <>, the generated `_source` field undergoes <> compared to original document. +<> construct synthetic `_source` using existing data, most +commonly <> and <>. For these field types, no additional space +is needed to store the contents of `_source` field. Due to the storage layout of <>, the +generated `_source` field undergoes <> compared to original document. -For all other field types, the original value of the field is stored as is, in the same way as the `_source` field in non-synthetic mode. In this case there are no modifications and field data in `_source` is the same as in the original document. Similarly, malformed values of fields that use <> or <> need to be stored as is. This approach is less storage efficient since data needed for `_source` reconstruction is stored in addition to other data required to index the field (like `doc_values`). +For all other field types, the original value of the field is stored as is, in the same way as the `_source` field in +non-synthetic mode. In this case there are no modifications and field data in `_source` is the same as in the original +document. Similarly, malformed values of fields that use <> or +<> need to be stored as is. This approach is less storage efficient since data needed for +`_source` reconstruction is stored in addition to other data required to index the field (like `doc_values`). [[synthetic-source-restrictions]] ===== Synthetic `_source` restrictions -Synthetic `_source` cannot be used together with field mappings that use <>. - -Some field types have additional restrictions. These restrictions are documented in the **synthetic `_source`** section of the field type's <>. +Some field types have additional restrictions. These restrictions are documented in the **synthetic `_source`** section +of the field type's <>. [[synthetic-source-modifications]] ===== Synthetic `_source` modifications @@ -144,6 +151,42 @@ Will become: ---- // TEST[s/^/{"_source":/ s/\n$/}/] +This impacts how source contents can be referenced in <>. For instance, referencing +a script in its original source form will return null: + +[source,js] +---- +"script": { "source": """ emit(params._source['foo.bar.baz']) """ } +---- +// NOTCONSOLE + +Instead, source references need to be in line with the mapping structure: + +[source,js] +---- +"script": { "source": """ emit(params._source['foo']['bar']['baz']) """ } +---- +// NOTCONSOLE + +or simply + +[source,js] +---- +"script": { "source": """ emit(params._source.foo.bar.baz) """ } +---- +// NOTCONSOLE + +The following <> are preferable as, in addition to being agnostic to the +mapping structure, they make use of docvalues if available and fall back to synthetic source only when needed. This +reduces source synthesizing, a slow and costly operation. + +[source,js] +---- +"script": { "source": """ emit(field('foo.bar.baz').get(null)) """ } +"script": { "source": """ emit($('foo.bar.baz', null)) """ } +---- +// NOTCONSOLE + [[synthetic-source-modifications-alphabetical]] ====== Alphabetical sorting Synthetic `_source` fields are sorted alphabetically. The @@ -155,18 +198,99 @@ that ordering. [[synthetic-source-modifications-ranges]] ====== Representation of ranges -Range field values (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted accordingly. See <>. +Range field values (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted +accordingly. See <>. [[synthetic-source-precision-loss-for-point-types]] ====== Reduced precision of `geo_point` values -Values of `geo_point` fields are represented in synthetic `_source` with reduced precision. See <>. +Values of `geo_point` fields are represented in synthetic `_source` with reduced precision. See +<>. + +[[synthetic-source-keep]] +====== Minimizing source modifications + +It is possible to avoid synthetic source modifications for a particular object or field, at extra storage cost. +This is controlled through param `synthetic_source_keep` with the following option: + + - `none`: synthetic source diverges from the original source as described above (default). + - `arrays`: arrays of the corresponding field or object preserve the original element ordering and duplicate elements. +The synthetic source fragment for such arrays is not guaranteed to match the original source exactly, e.g. array +`[1, 2, [5], [[4, [3]]], 5]` may appear as-is or in an equivalent format like `[1, 2, 5, 4, 3, 5]`. The exact format +may change in the future, in an effort to reduce the storage overhead of this option. +- `all`: the source for both singleton instances and arrays of the corresponding field or object gets recorded. When +applied to objects, the source of all sub-objects and sub-fields gets captured. Furthermore, the original source of +arrays gets captured and appears in synthetic source with no modifications. + +For instance: + +[source,console,id=create-index-with-synthetic-source-keep] +---- +PUT idx_keep +{ + "mappings": { + "_source": { + "mode": "synthetic" + }, + "properties": { + "path": { + "type": "object", + "synthetic_source_keep": "all" + }, + "ids": { + "type": "integer", + "synthetic_source_keep": "arrays" + } + } + } +} +---- +// TEST + +[source,console,id=synthetic-source-keep-example] +---- +PUT idx_keep/_doc/1 +{ + "path": { + "to": [ + { "foo": [3, 2, 1] }, + { "foo": [30, 20, 10] } + ], + "bar": "baz" + }, + "ids": [ 200, 100, 300, 100 ] +} +---- +// TEST[s/$/\nGET idx_keep\/_doc\/1?filter_path=_source\n/] + +returns the original source, with no array deduplication and sorting: + +[source,console-result] +---- +{ + "path": { + "to": [ + { "foo": [3, 2, 1] }, + { "foo": [30, 20, 10] } + ], + "bar": "baz" + }, + "ids": [ 200, 100, 300, 100 ] +} +---- +// TEST[s/^/{"_source":/ s/\n$/}/] +The option for capturing the source of arrays can be applied at index level, by setting +`index.mapping.synthetic_source_keep` to `arrays`. This applies to all objects and fields in the index, except for +the ones with explicit overrides of `synthetic_source_keep` set to `none`. In this case, the storage overhead grows +with the number and sizes of arrays present in source of each document, naturally. [[synthetic-source-fields-native-list]] ===== Field types that support synthetic source with no storage overhead -The following field types support synthetic source using data from <> or <>, and require no additional storage space to construct the `_source` field. +The following field types support synthetic source using data from <> or +>, and require no additional storage space to construct the `_source` field. -NOTE: If you enable the <> or <> settings, then additional storage is required to store ignored field values for these types. +NOTE: If you enable the <> or <> settings, then +additional storage is required to store ignored field values for these types. ** <> ** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`] diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index d0fdf0145aa58..07abbff986643 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -63,12 +63,14 @@ PUT my-index-000002 `inference_id`:: (Required, string) {infer-cap} endpoint that will be used to generate the embeddings for the field. +This parameter cannot be updated. Use the <> to create the endpoint. If `search_inference_id` is specified, the {infer} endpoint defined by `inference_id` will only be used at index time. `search_inference_id`:: (Optional, string) {infer-cap} endpoint that will be used to generate embeddings at query time. +You can update this parameter by using the <>. Use the <> to create the endpoint. If not specified, the {infer} endpoint defined by `inference_id` will be used at both index and query time. diff --git a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc index de9a35e0d29b8..f1bd238a64fbf 100644 --- a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc @@ -50,7 +50,7 @@ PUT _inference/sparse_embedding/my-elser-endpoint <1> be used and ELSER creates sparse vectors. The `inference_id` is `my-elser-endpoint`. <2> The `elser` service is used in this example. -<3> This setting enables and configures {ml-docs}/ml-nlp-elser.html#elser-adaptive-allocations[adaptive allocations]. +<3> This setting enables and configures adaptive allocations. Adaptive allocations make it possible for ELSER to automatically scale up or down resources based on the current load on the process. [NOTE] diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc index a75a1a3ce1042..1f55296139cd3 100644 --- a/docs/reference/snapshot-restore/repository-s3.asciidoc +++ b/docs/reference/snapshot-restore/repository-s3.asciidoc @@ -261,9 +261,11 @@ multiple deployments may share the same bucket. `chunk_size`:: - (<>) Big files can be broken down into chunks during snapshotting if needed. - Specify the chunk size as a value and unit, for example: - `1TB`, `1GB`, `10MB`. Defaults to the maximum size of a blob in the S3 which is `5TB`. + (<>) The maximum size of object that {es} will write to the repository + when creating a snapshot. Files which are larger than `chunk_size` will be chunked into several + smaller objects. {es} may also split a file across multiple objects to satisfy other constraints + such as the `max_multipart_parts` limit. Defaults to `5TB` which is the + https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum size of an object in AWS S3]. `compress`:: @@ -292,6 +294,14 @@ include::repository-shared-settings.asciidoc[] size allowed by S3. Defaults to `100mb` or `5%` of JVM heap, whichever is smaller. +`max_multipart_parts` :: + + (<>) The maximum number of parts that {es} will write during a multipart upload + of a single object. Files which are larger than `buffer_size × max_multipart_parts` will be + chunked into several smaller objects. {es} may also split a file across multiple objects to + satisfy other constraints such as the `chunk_size` limit. Defaults to `10000` which is the + https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html[maximum number of parts in a multipart upload in AWS S3]. + `canned_acl`:: The S3 repository supports all diff --git a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java index 7a27eddfaf8c7..3456f4b679474 100644 --- a/modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java +++ b/modules/data-streams/src/main/java/org/elasticsearch/datastreams/rest/RestGetDataStreamsAction.java @@ -41,9 +41,9 @@ public class RestGetDataStreamsAction extends BaseRestHandler { IndicesOptions.ConcreteTargetOptions.IGNORE_UNAVAILABLE, IndicesOptions.WildcardOptions.ALLOW_NO_INDICES, IndicesOptions.GatekeeperOptions.IGNORE_THROTTLED, - DataStream.isFailureStoreFeatureFlagEnabled() ? IndicesOptions.FailureStoreOptions.FAILURE_STORE : "name", "verbose" - ) + ), + DataStream.isFailureStoreFeatureFlagEnabled() ? Set.of(IndicesOptions.FailureStoreOptions.FAILURE_STORE) : Set.of() ) ); diff --git a/modules/ingest-geoip/build.gradle b/modules/ingest-geoip/build.gradle index b50fc86282d1f..4312221b33937 100644 --- a/modules/ingest-geoip/build.gradle +++ b/modules/ingest-geoip/build.gradle @@ -88,3 +88,8 @@ tasks.named("dependencyLicenses").configure { artifacts { restTests(new File(projectDir, "src/yamlRestTest/resources/rest-api-spec/test")) } + +tasks.named("yamlRestCompatTestTransform").configure({ task -> + task.skipTest("ingest_geoip/40_geoip_databases/Test adding, getting, and removing geoip databases", + "get databases behavior began returning more results in 8.16") +}) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java index ce15e02e6efcc..940231b12c894 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java @@ -20,11 +20,13 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.hash.MessageDigests; import org.elasticsearch.common.logging.HeaderWarning; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.CheckedRunnable; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Tuple; import org.elasticsearch.env.Environment; import org.elasticsearch.gateway.GatewayService; @@ -37,6 +39,7 @@ import org.elasticsearch.watcher.ResourceWatcherService; import java.io.Closeable; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.file.FileAlreadyExistsException; @@ -51,8 +54,10 @@ import java.security.MessageDigest; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -541,6 +546,35 @@ public Set getConfigDatabases() { return configDatabases.getConfigDatabases().keySet(); } + public Map getConfigDatabasesDetail() { + Map allDatabases = new HashMap<>(); + for (Map.Entry entry : configDatabases.getConfigDatabases().entrySet()) { + DatabaseReaderLazyLoader databaseReaderLazyLoader = entry.getValue(); + try { + allDatabases.put( + entry.getKey(), + new ConfigDatabaseDetail( + entry.getKey(), + databaseReaderLazyLoader.getMd5(), + databaseReaderLazyLoader.getBuildDateMillis(), + databaseReaderLazyLoader.getDatabaseType() + ) + ); + } catch (FileNotFoundException e) { + /* + * Since there is nothing to prevent a database from being deleted while this method is running, it is possible we get an + * exception here because the file no longer exists. We just log it and move on -- it's preferable to synchronization. + */ + logger.trace(Strings.format("Unable to get metadata for config database %s", entry.getKey()), e); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + return allDatabases; + } + + public record ConfigDatabaseDetail(String name, @Nullable String md5, @Nullable Long buildDateInMillis, @Nullable String type) {} + public Set getFilesInTemp() { try (Stream files = Files.list(geoipTmpDirectory)) { return files.map(Path::getFileName).map(Path::toString).collect(Collectors.toSet()); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java index dff083ea0cde8..e160c8ad1543f 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseReaderLazyLoader.java @@ -63,6 +63,7 @@ class DatabaseReaderLazyLoader implements IpDatabase { // cache the database type so that we do not re-read it on every pipeline execution final SetOnce databaseType; + final SetOnce buildDate; private volatile boolean deleteDatabaseFileOnShutdown; private final AtomicInteger currentUsages = new AtomicInteger(0); @@ -74,6 +75,7 @@ class DatabaseReaderLazyLoader implements IpDatabase { this.loader = createDatabaseLoader(databasePath); this.databaseReader = new SetOnce<>(); this.databaseType = new SetOnce<>(); + this.buildDate = new SetOnce<>(); } /** @@ -277,4 +279,15 @@ private Optional lookup(Reader reader, String ip, Class getNamedWriteables() { new NamedWriteableRegistry.Entry(PersistentTaskParams.class, GEOIP_DOWNLOADER, GeoIpTaskParams::new), new NamedWriteableRegistry.Entry(PersistentTaskState.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskState::new), new NamedWriteableRegistry.Entry(PersistentTaskParams.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskParams::new), - new NamedWriteableRegistry.Entry(Task.Status.class, GEOIP_DOWNLOADER, GeoIpDownloaderStats::new) + new NamedWriteableRegistry.Entry(Task.Status.class, GEOIP_DOWNLOADER, GeoIpDownloaderStats::new), + new NamedWriteableRegistry.Entry( + DatabaseConfiguration.Provider.class, + DatabaseConfiguration.Maxmind.NAME, + DatabaseConfiguration.Maxmind::new + ), + new NamedWriteableRegistry.Entry( + DatabaseConfiguration.Provider.class, + DatabaseConfiguration.Local.NAME, + DatabaseConfiguration.Local::new + ), + new NamedWriteableRegistry.Entry( + DatabaseConfiguration.Provider.class, + DatabaseConfiguration.Web.NAME, + DatabaseConfiguration.Web::new + ) ); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java index b8b48e0f738a5..3399b71879e26 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java @@ -9,13 +9,16 @@ package org.elasticsearch.ingest.geoip.direct; +import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.cluster.metadata.MetadataCreateIndexService; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ObjectParser; import org.elasticsearch.xcontent.ParseField; import org.elasticsearch.xcontent.ToXContentObject; import org.elasticsearch.xcontent.XContentBuilder; @@ -34,19 +37,19 @@ * That is, it has an id e.g. "my_db_config_1" and it says "download the file named XXXX from SomeCompany, and here's the * magic token to use to do that." */ -public record DatabaseConfiguration(String id, String name, Maxmind maxmind) implements Writeable, ToXContentObject { +public record DatabaseConfiguration(String id, String name, Provider provider) implements Writeable, ToXContentObject { // id is a user selected signifier like 'my_domain_db' // name is the name of a file that can be downloaded (like 'GeoIP2-Domain') - // a configuration will have a 'type' like "maxmind", and that might have some more details, + // a configuration will have a 'provider' like "maxmind", and that might have some more details, // for now, though the important thing is that the json has to have it even though we don't model it meaningfully in this class public DatabaseConfiguration { // these are invariants, not actual validation Objects.requireNonNull(id); Objects.requireNonNull(name); - Objects.requireNonNull(maxmind); + Objects.requireNonNull(provider); } /** @@ -76,25 +79,49 @@ public record DatabaseConfiguration(String id, String name, Maxmind maxmind) imp ); private static final ParseField NAME = new ParseField("name"); - private static final ParseField MAXMIND = new ParseField("maxmind"); + private static final ParseField MAXMIND = new ParseField(Maxmind.NAME); + private static final ParseField WEB = new ParseField(Web.NAME); + private static final ParseField LOCAL = new ParseField(Local.NAME); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( "database", false, (a, id) -> { String name = (String) a[0]; - Maxmind maxmind = (Maxmind) a[1]; - return new DatabaseConfiguration(id, name, maxmind); + Provider provider; + if (a[1] != null) { + provider = (Maxmind) a[1]; + } else if (a[2] != null) { + provider = (Web) a[2]; + } else { + provider = (Local) a[3]; + } + return new DatabaseConfiguration(id, name, provider); } ); static { PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); - PARSER.declareObject(ConstructingObjectParser.constructorArg(), (parser, id) -> Maxmind.PARSER.apply(parser, null), MAXMIND); + PARSER.declareObject( + ConstructingObjectParser.optionalConstructorArg(), + (parser, id) -> Maxmind.PARSER.apply(parser, null), + MAXMIND + ); + PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (parser, id) -> Web.PARSER.apply(parser, null), WEB); + PARSER.declareObject(ConstructingObjectParser.optionalConstructorArg(), (parser, id) -> Local.PARSER.apply(parser, null), LOCAL); } public DatabaseConfiguration(StreamInput in) throws IOException { - this(in.readString(), in.readString(), new Maxmind(in)); + this(in.readString(), in.readString(), readProvider(in)); + } + + private static Provider readProvider(StreamInput in) throws IOException { + if (in.getTransportVersion().onOrAfter(TransportVersions.INGEST_GEO_DATABASE_PROVIDERS)) { + return in.readNamedWriteable(Provider.class); + } else { + // prior to the above version, everything was always a maxmind, so this half of the if is logical + return new Maxmind(in.readString()); + } } public static DatabaseConfiguration parse(XContentParser parser, String id) { @@ -105,14 +132,27 @@ public static DatabaseConfiguration parse(XContentParser parser, String id) { public void writeTo(StreamOutput out) throws IOException { out.writeString(id); out.writeString(name); - maxmind.writeTo(out); + if (out.getTransportVersion().onOrAfter(TransportVersions.INGEST_GEO_DATABASE_PROVIDERS)) { + out.writeNamedWriteable(provider); + } else { + if (provider instanceof Maxmind maxmind) { + out.writeString(maxmind.accountId); + } else { + /* + * The existence of a non-Maxmind providers is gated on the feature get_database_configuration_action.multi_node, and + * get_database_configuration_action.multi_node is only available on or after + * TransportVersions.INGEST_GEO_DATABASE_PROVIDERS. + */ + assert false : "non-maxmind DatabaseConfiguration.Provider [" + provider.getWriteableName() + "]"; + } + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.field("name", name); - builder.field("maxmind", maxmind); + builder.field(provider.getWriteableName(), provider); builder.endObject(); return builder; } @@ -168,7 +208,24 @@ public ActionRequestValidationException validate() { return err.validationErrors().isEmpty() ? null : err; } - public record Maxmind(String accountId) implements Writeable, ToXContentObject { + public boolean isReadOnly() { + return provider.isReadOnly(); + } + + /** + * A marker interface that all providers need to implement. + */ + public interface Provider extends NamedWriteable, ToXContentObject { + boolean isReadOnly(); + } + + public record Maxmind(String accountId) implements Provider { + public static final String NAME = "maxmind"; + + @Override + public String getWriteableName() { + return NAME; + } public Maxmind { // this is an invariant, not actual validation @@ -206,5 +263,90 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.endObject(); return builder; } + + @Override + public boolean isReadOnly() { + return false; + } + } + + public record Local(String type) implements Provider { + public static final String NAME = "local"; + + private static final ParseField TYPE = new ParseField("type"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("database", false, (a, id) -> { + String type = (String) a[0]; + return new Local(type); + }); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), TYPE); + } + + public Local(StreamInput in) throws IOException { + this(in.readString()); + } + + public static Local parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(type); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("type", type); + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public boolean isReadOnly() { + return true; + } + } + + public record Web() implements Provider { + public static final String NAME = "web"; + + private static final ObjectParser PARSER = new ObjectParser<>("database", Web::new); + + public Web(StreamInput in) throws IOException { + this(); + } + + public static Web parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException {} + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public boolean isReadOnly() { + return true; + } } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java index 41be25987a31b..b5343f17e47b6 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java @@ -49,7 +49,12 @@ public DatabaseConfiguration getDatabase() { } public static Request parseRequest(TimeValue masterNodeTimeout, TimeValue ackTimeout, String id, XContentParser parser) { - return new Request(masterNodeTimeout, ackTimeout, DatabaseConfiguration.parse(parser, id)); + DatabaseConfiguration database = DatabaseConfiguration.parse(parser, id); + if (database.isReadOnly()) { + throw new IllegalArgumentException("Database " + id + " is read only"); + } else { + return new Request(masterNodeTimeout, ackTimeout, database); + } } @Override diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java index 088cea04cef87..b73b2fd4beb08 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java @@ -91,6 +91,8 @@ protected void masterOperation(Task task, Request request, ClusterState state, A final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); if (geoIpMeta.getDatabases().containsKey(id) == false) { throw new ResourceNotFoundException("Database configuration not found: {}", id); + } else if (geoIpMeta.getDatabases().get(id).database().isReadOnly()) { + throw new IllegalArgumentException("Database " + id + " is read only"); } deleteDatabaseConfigurationTaskQueue.submitTask( Strings.format("delete-geoip-database-configuration-[%s]", id), diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java index 0660a9ff0491d..c83c40e56b749 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java @@ -9,7 +9,6 @@ package org.elasticsearch.ingest.geoip.direct; -import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.FailedNodeException; import org.elasticsearch.action.support.ActionFilters; @@ -19,19 +18,28 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.features.FeatureService; +import org.elasticsearch.ingest.geoip.DatabaseNodeService; +import org.elasticsearch.ingest.geoip.GeoIpTaskState; import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; import org.elasticsearch.injection.guice.Inject; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; +import java.util.Base64; +import java.util.Collection; +import java.util.Comparator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; import static org.elasticsearch.ingest.IngestGeoIpFeatures.GET_DATABASE_CONFIGURATION_ACTION_MULTI_NODE; @@ -43,6 +51,7 @@ public class TransportGetDatabaseConfigurationAction extends TransportNodesActio List> { private final FeatureService featureService; + private final DatabaseNodeService databaseNodeService; @Inject public TransportGetDatabaseConfigurationAction( @@ -50,7 +59,8 @@ public TransportGetDatabaseConfigurationAction( ClusterService clusterService, ThreadPool threadPool, ActionFilters actionFilters, - FeatureService featureService + FeatureService featureService, + DatabaseNodeService databaseNodeService ) { super( GetDatabaseConfigurationAction.NAME, @@ -61,6 +71,7 @@ public TransportGetDatabaseConfigurationAction( threadPool.executor(ThreadPool.Names.MANAGEMENT) ); this.featureService = featureService; + this.databaseNodeService = databaseNodeService; } @Override @@ -74,9 +85,19 @@ protected void doExecute( * TransportGetDatabaseConfigurationAction used to be a TransportMasterNodeAction, and not all nodes in the cluster have been * updated. So we don't want to send node requests to the other nodes because they will blow up. Instead, we just return * the information that we used to return from the master node (it doesn't make any difference that this might not be the master - * node, because we're only reading the cluster state). + * node, because we're only reading the cluster state). Because older nodes only know about the Maxmind provider type, we filter + * out all others here to avoid causing problems on those nodes. */ - newResponseAsync(task, request, createActionContext(task, request), List.of(), List.of(), listener); + newResponseAsync( + task, + request, + createActionContext(task, request).stream() + .filter(database -> database.database().provider() instanceof DatabaseConfiguration.Maxmind) + .toList(), + List.of(), + List.of(), + listener + ); } else { super.doExecute(task, request, listener); } @@ -97,28 +118,79 @@ protected List createActionContext(Task task, Get ); } - final IngestGeoIpMetadata geoIpMeta = clusterService.state().metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); List results = new ArrayList<>(); - + PersistentTasksCustomMetadata tasksMetadata = PersistentTasksCustomMetadata.getPersistentTasksCustomMetadata( + clusterService.state() + ); for (String id : ids) { - if (Regex.isSimpleMatchPattern(id)) { - for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { - if (Regex.simpleMatch(id, entry.getKey())) { - results.add(entry.getValue()); + results.addAll(getWebDatabases(tasksMetadata, id)); + results.addAll(getMaxmindDatabases(clusterService, id)); + } + return results; + } + + /* + * This returns read-only database information about the databases managed by the standard downloader + */ + private static Collection getWebDatabases(PersistentTasksCustomMetadata tasksMetadata, String id) { + List webDatabases = new ArrayList<>(); + if (tasksMetadata != null) { + PersistentTasksCustomMetadata.PersistentTask maybeGeoIpTask = tasksMetadata.getTask("geoip-downloader"); + if (maybeGeoIpTask != null) { + GeoIpTaskState geoIpTaskState = (GeoIpTaskState) maybeGeoIpTask.getState(); + if (geoIpTaskState != null) { + Map databases = geoIpTaskState.getDatabases(); + for (String databaseFileName : databases.keySet()) { + String databaseName = getDatabaseNameForFileName(databaseFileName); + String databaseId = getDatabaseIdForFileName(DatabaseConfiguration.Web.NAME, databaseFileName); + if ((Regex.isSimpleMatchPattern(id) && Regex.simpleMatch(id, databaseId)) || id.equals(databaseId)) { + webDatabases.add( + new DatabaseConfigurationMetadata( + new DatabaseConfiguration(databaseId, databaseName, new DatabaseConfiguration.Web()), + -1, + databases.get(databaseFileName).lastUpdate() + ) + ); + } } } - } else { - DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); - if (meta == null) { - throw new ResourceNotFoundException("database configuration not found: {}", id); - } else { - results.add(meta); + } + } + return webDatabases; + } + + private static String getDatabaseIdForFileName(String providerType, String databaseFileName) { + return "_" + providerType + "_" + Base64.getEncoder().encodeToString(databaseFileName.getBytes(StandardCharsets.UTF_8)); + } + + private static String getDatabaseNameForFileName(String databaseFileName) { + return databaseFileName.endsWith(".mmdb") + ? databaseFileName.substring(0, databaseFileName.length() + 1 - ".mmmdb".length()) + : databaseFileName; + } + + /* + * This returns information about databases that are downloaded from maxmind. + */ + private static Collection getMaxmindDatabases(ClusterService clusterService, String id) { + List maxmindDatabases = new ArrayList<>(); + final IngestGeoIpMetadata geoIpMeta = clusterService.state().metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + if (Regex.isSimpleMatchPattern(id)) { + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + if (Regex.simpleMatch(id, entry.getKey())) { + maxmindDatabases.add(entry.getValue()); } } + } else { + DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); + if (meta != null) { + maxmindDatabases.add(meta); + } } - return results; + return maxmindDatabases; } + @Override protected void newResponseAsync( Task task, GetDatabaseConfigurationAction.Request request, @@ -127,13 +199,47 @@ protected void newResponseAsync( List failures, ActionListener listener ) { - ActionListener.run( - listener, - l -> ActionListener.respondAndRelease( + ActionListener.run(listener, l -> { + List combinedResults = new ArrayList<>(results); + combinedResults.addAll( + deduplicateNodeResponses(responses, results.stream().map(result -> result.database().name()).collect(Collectors.toSet())) + ); + ActionListener.respondAndRelease( l, - new GetDatabaseConfigurationAction.Response(results, clusterService.getClusterName(), responses, failures) + new GetDatabaseConfigurationAction.Response(combinedResults, clusterService.getClusterName(), responses, failures) + ); + }); + } + + /* + * This deduplicates the nodeResponses by name, favoring the most recent. This is because each node is reporting the local databases + * that it has, and we don't want to report duplicates to the user. It also filters out any that already exist in the set of + * preExistingNames. This is because the non-local databases take precedence, so any local database with the same name as a non-local + * one will not be used. + * Non-private for unit testing + */ + static Collection deduplicateNodeResponses( + List nodeResponses, + Set preExistingNames + ) { + /* + * Each node reports the list of databases that are in its config/ingest-geoip directory. For the sake of this API we assume all + * local databases with the same name are the same database, and deduplicate by name and just return the newest. + */ + return nodeResponses.stream() + .flatMap(response -> response.getDatabases().stream()) + .collect( + Collectors.groupingBy( + database -> database.database().name(), + Collectors.maxBy(Comparator.comparing(DatabaseConfigurationMetadata::modifiedDate)) + ) ) - ); + .values() + .stream() + .filter(Optional::isPresent) + .map(Optional::get) + .filter(database -> preExistingNames.contains(database.database().name()) == false) + .toList(); } @Override @@ -157,7 +263,48 @@ protected GetDatabaseConfigurationAction.NodeResponse newNodeResponse(StreamInpu @Override protected GetDatabaseConfigurationAction.NodeResponse nodeOperation(GetDatabaseConfigurationAction.NodeRequest request, Task task) { - return new GetDatabaseConfigurationAction.NodeResponse(transportService.getLocalNode(), List.of()); + final Set ids; + if (request.getDatabaseIds().length == 0) { + // if we did not ask for a specific name, then return all databases + ids = Set.of("*"); + } else { + ids = new LinkedHashSet<>(Arrays.asList(request.getDatabaseIds())); + } + if (ids.size() > 1 && ids.stream().anyMatch(Regex::isSimpleMatchPattern)) { + throw new IllegalArgumentException( + "wildcard only supports a single value, please use comma-separated values or a single wildcard value" + ); + } + + List results = new ArrayList<>(); + for (String id : ids) { + results.addAll(getLocalDatabases(databaseNodeService, id)); + } + return new GetDatabaseConfigurationAction.NodeResponse(transportService.getLocalNode(), results); } + /* + * This returns information about the databases that users have put in the config/ingest-geoip directory on the node. + */ + private static List getLocalDatabases(DatabaseNodeService databaseNodeService, String id) { + List localDatabases = new ArrayList<>(); + Map configDatabases = databaseNodeService.getConfigDatabasesDetail(); + for (DatabaseNodeService.ConfigDatabaseDetail configDatabase : configDatabases.values()) { + String databaseId = getDatabaseIdForFileName(DatabaseConfiguration.Local.NAME, configDatabase.name()); + if ((Regex.isSimpleMatchPattern(id) && Regex.simpleMatch(id, databaseId)) || id.equals(databaseId)) { + localDatabases.add( + new DatabaseConfigurationMetadata( + new DatabaseConfiguration( + databaseId, + getDatabaseNameForFileName(configDatabase.name()), + new DatabaseConfiguration.Local(configDatabase.type()) + ), + -1, + configDatabase.buildDateInMillis() == null ? -1 : configDatabase.buildDateInMillis() + ) + ); + } + } + return localDatabases; + } } diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java index 231a2a856815c..6a98cd532604b 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.ingest.geoip; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; @@ -21,6 +22,12 @@ import java.util.Map; public class IngestGeoIpMetadataTests extends AbstractChunkedSerializingTestCase { + + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(new IngestGeoIpPlugin().getNamedWriteables()); + } + @Override protected IngestGeoIpMetadata doParseInstance(XContentParser parser) throws IOException { return IngestGeoIpMetadata.fromXContent(parser); diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java index 847f9c5bf7d4a..476a30d86ee05 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java @@ -9,7 +9,9 @@ package org.elasticsearch.ingest.geoip.direct; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.IngestGeoIpPlugin; import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xcontent.XContentParser; @@ -21,6 +23,11 @@ public class DatabaseConfigurationMetadataTests extends AbstractXContentSerializingTestCase { + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(new IngestGeoIpPlugin().getNamedWriteables()); + } + private String id; @Override diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java index bb11f71b26d03..33356ad4235dc 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java @@ -9,8 +9,12 @@ package org.elasticsearch.ingest.geoip.direct; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.IngestGeoIpPlugin; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Local; import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Maxmind; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Web; import org.elasticsearch.test.AbstractXContentSerializingTestCase; import org.elasticsearch.xcontent.XContentParser; @@ -21,6 +25,11 @@ public class DatabaseConfigurationTests extends AbstractXContentSerializingTestCase { + @Override + protected NamedWriteableRegistry getNamedWriteableRegistry() { + return new NamedWriteableRegistry(new IngestGeoIpPlugin().getNamedWriteables()); + } + private String id; @Override @@ -35,26 +44,39 @@ protected DatabaseConfiguration createTestInstance() { } public static DatabaseConfiguration randomDatabaseConfiguration(String id) { - return new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new Maxmind(randomAlphaOfLength(5))); + DatabaseConfiguration.Provider provider = switch (between(0, 2)) { + case 0 -> new Maxmind(randomAlphaOfLength(5)); + case 1 -> new Web(); + case 2 -> new Local(randomAlphaOfLength(10)); + default -> throw new AssertionError("failure, got illegal switch case"); + }; + return new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), provider); } @Override protected DatabaseConfiguration mutateInstance(DatabaseConfiguration instance) { switch (between(0, 2)) { case 0: - return new DatabaseConfiguration(instance.id() + randomAlphaOfLength(2), instance.name(), instance.maxmind()); + return new DatabaseConfiguration(instance.id() + randomAlphaOfLength(2), instance.name(), instance.provider()); case 1: return new DatabaseConfiguration( instance.id(), randomValueOtherThan(instance.name(), () -> randomFrom(MAXMIND_NAMES)), - instance.maxmind() + instance.provider() ); case 2: - return new DatabaseConfiguration( - instance.id(), - instance.name(), - new Maxmind(instance.maxmind().accountId() + randomAlphaOfLength(2)) - ); + DatabaseConfiguration.Provider provider = instance.provider(); + DatabaseConfiguration.Provider modifiedProvider; + if (provider instanceof Maxmind maxmind) { + modifiedProvider = new Maxmind(((Maxmind) instance.provider()).accountId() + randomAlphaOfLength(2)); + } else if (provider instanceof Web) { + modifiedProvider = new Maxmind(randomAlphaOfLength(20)); // can't modify a Web + } else if (provider instanceof Local local) { + modifiedProvider = new Local(local.type() + randomAlphaOfLength(2)); + } else { + throw new AssertionError("Unexpected provider type: " + provider.getClass()); + } + return new DatabaseConfiguration(instance.id(), instance.name(), modifiedProvider); default: throw new AssertionError("failure, got illegal switch case"); } diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationActionTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationActionTests.java new file mode 100644 index 0000000000000..988b50311186d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationActionTests.java @@ -0,0 +1,131 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.test.ESTestCase; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.direct.GetDatabaseConfigurationAction.NodeResponse; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.Mockito.mock; + +public class TransportGetDatabaseConfigurationActionTests extends ESTestCase { + public void testDeduplicateNodeResponses() { + { + List nodeResponses = new ArrayList<>(); + Set preExistingNames = Set.of(); + Collection deduplicated = TransportGetDatabaseConfigurationAction.deduplicateNodeResponses( + nodeResponses, + preExistingNames + ); + assertTrue(deduplicated.isEmpty()); + } + { + List nodeResponses = List.of( + generateTestNodeResponse(List.of()), + generateTestNodeResponse(List.of()), + generateTestNodeResponse(List.of()) + ); + Set preExistingNames = Set.of(); + Collection deduplicated = TransportGetDatabaseConfigurationAction.deduplicateNodeResponses( + nodeResponses, + preExistingNames + ); + assertTrue(deduplicated.isEmpty()); + } + { + // 3 nodes with 3 overlapping responses. We expect the deduplicated collection to include 1, 2, 3, and 4. + List nodeResponses = List.of( + generateTestNodeResponse(List.of("1", "2", "3")), + generateTestNodeResponse(List.of("1", "2", "3")), + generateTestNodeResponse(List.of("1", "4")) + ); + Set preExistingNames = Set.of(); + Collection deduplicated = TransportGetDatabaseConfigurationAction.deduplicateNodeResponses( + nodeResponses, + preExistingNames + ); + assertThat(deduplicated.size(), equalTo(4)); + assertThat( + deduplicated.stream().map(database -> database.database().name()).collect(Collectors.toSet()), + equalTo(Set.of("1", "2", "3", "4")) + ); + } + { + /* + * 3 nodes with 3 overlapping responses, but this time we're also passing in a set of pre-existing names that overlap with + * two of them. So we expect the deduplicated collection to include 1 and 4. + */ + List nodeResponses = List.of( + generateTestNodeResponse(List.of("1", "2", "3")), + generateTestNodeResponse(List.of("1", "2", "3")), + generateTestNodeResponse(List.of("1", "4")) + ); + Set preExistingNames = Set.of("2", "3", "5"); + Collection deduplicated = TransportGetDatabaseConfigurationAction.deduplicateNodeResponses( + nodeResponses, + preExistingNames + ); + assertThat(deduplicated.size(), equalTo(2)); + assertThat( + deduplicated.stream().map(database -> database.database().name()).collect(Collectors.toSet()), + equalTo(Set.of("1", "4")) + ); + } + { + /* + * Here 3 nodes report the same database, but with different modified dates and versions. We expect the one with the highest + * modified date to win out. + */ + List nodeResponses = List.of( + generateTestNodeResponseFromDatabases(List.of(generateTestDatabase("1", 1))), + generateTestNodeResponseFromDatabases(List.of(generateTestDatabase("1", 1000))), + generateTestNodeResponseFromDatabases(List.of(generateTestDatabase("1", 3))) + ); + Set preExistingNames = Set.of("2", "3", "5"); + Collection deduplicated = TransportGetDatabaseConfigurationAction.deduplicateNodeResponses( + nodeResponses, + preExistingNames + ); + assertThat(deduplicated.size(), equalTo(1)); + DatabaseConfigurationMetadata result = deduplicated.iterator().next(); + assertThat(result, equalTo(nodeResponses.get(1).getDatabases().get(0))); + } + } + + private NodeResponse generateTestNodeResponse(List databaseNames) { + List databases = databaseNames.stream().map(this::generateTestDatabase).toList(); + return generateTestNodeResponseFromDatabases(databases); + } + + private NodeResponse generateTestNodeResponseFromDatabases(List databases) { + DiscoveryNode discoveryNode = mock(DiscoveryNode.class); + return new NodeResponse(discoveryNode, databases); + } + + private DatabaseConfigurationMetadata generateTestDatabase(String databaseName) { + return generateTestDatabase(databaseName, randomLongBetween(0, Long.MAX_VALUE)); + } + + private DatabaseConfigurationMetadata generateTestDatabase(String databaseName, long modifiedDate) { + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration( + randomAlphaOfLength(50), + databaseName, + new DatabaseConfiguration.Local(randomAlphaOfLength(20)) + ); + return new DatabaseConfigurationMetadata(databaseConfiguration, randomLongBetween(0, Long.MAX_VALUE), modifiedDate); + } +} diff --git a/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml index 6809443fdfbc3..04fd2ac6a8189 100644 --- a/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml +++ b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml @@ -1,7 +1,7 @@ setup: - requires: - cluster_features: ["geoip.downloader.database.configuration"] - reason: "geoip downloader database configuration APIs added in 8.15" + cluster_features: ["geoip.downloader.database.configuration", "get_database_configuration_action.multi_node"] + reason: "geoip downloader database configuration APIs added in 8.15, and updated in 8.16 to return more results" --- "Test adding, getting, and removing geoip databases": @@ -41,6 +41,17 @@ setup: } - match: { acknowledged: true } + - do: + catch: /illegal_argument_exception/ + ingest.put_geoip_database: + id: "_web_TXlDdXN0b21HZW9MaXRlMi1DaXR5Lm1tZGI=" + body: > + { + "name": "GeoIP2-City", + "web": { + } + } + - do: ingest.get_geoip_database: id: "my_database_1" @@ -52,19 +63,37 @@ setup: - do: ingest.get_geoip_database: {} - - length: { databases: 2 } + - length: { databases: 6 } - do: ingest.get_geoip_database: id: "my_database_1,my_database_2" - length: { databases: 2 } + - do: + ingest.get_geoip_database: + id: "_web_TXlDdXN0b21HZW9MaXRlMi1DaXR5Lm1tZGI=" + - length: { databases: 1 } + - match: { databases.0.id: "_web_TXlDdXN0b21HZW9MaXRlMi1DaXR5Lm1tZGI=" } + - gte: { databases.0.modified_date_millis: -1 } + - match: { databases.0.database.name: "MyCustomGeoLite2-City" } + - do: ingest.delete_geoip_database: id: "my_database_1" + - do: + catch: /resource_not_found_exception/ + ingest.delete_geoip_database: + id: "_web_TXlDdXN0b21HZW9MaXRlMi1DaXR5Lm1tZGI=" + - do: ingest.get_geoip_database: {} + - length: { databases: 5 } + + - do: + ingest.get_geoip_database: + id: "my_database_2" - length: { databases: 1 } - match: { databases.0.id: "my_database_2" } - gte: { databases.0.modified_date_millis: 0 } diff --git a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java index f919284d8e897..af385eeac6a5b 100644 --- a/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java +++ b/modules/repository-s3/src/main/java/org/elasticsearch/repositories/s3/S3Repository.java @@ -140,6 +140,11 @@ class S3Repository extends MeteredBlobStoreRepository { MAX_FILE_SIZE_USING_MULTIPART ); + /** + * Maximum parts number for multipart upload. (see https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html) + */ + static final Setting MAX_MULTIPART_PARTS = Setting.intSetting("max_multipart_parts", 10_000, 1, 10_000); + /** * Sets the S3 storage class type for the backup files. Values may be standard, reduced_redundancy, * standard_ia, onezone_ia and intelligent_tiering. Defaults to standard. @@ -253,7 +258,9 @@ class S3Repository extends MeteredBlobStoreRepository { } this.bufferSize = BUFFER_SIZE_SETTING.get(metadata.settings()); - this.chunkSize = CHUNK_SIZE_SETTING.get(metadata.settings()); + var maxChunkSize = CHUNK_SIZE_SETTING.get(metadata.settings()); + var maxPartsNum = MAX_MULTIPART_PARTS.get(metadata.settings()); + this.chunkSize = objectSizeLimit(maxChunkSize, bufferSize, maxPartsNum); // We make sure that chunkSize is bigger or equal than/to bufferSize if (this.chunkSize.getBytes() < bufferSize.getBytes()) { @@ -302,6 +309,20 @@ private static Map buildLocation(RepositoryMetadata metadata) { return Map.of("base_path", BASE_PATH_SETTING.get(metadata.settings()), "bucket", BUCKET_SETTING.get(metadata.settings())); } + /** + * Calculates S3 object size limit based on 2 constraints: maximum object(chunk) size + * and maximum number of parts for multipart upload. + * https://docs.aws.amazon.com/AmazonS3/latest/userguide/qfacts.html + * + * @param chunkSize s3 object size + * @param bufferSize s3 multipart upload part size + * @param maxPartsNum s3 multipart upload max parts number + */ + private static ByteSizeValue objectSizeLimit(ByteSizeValue chunkSize, ByteSizeValue bufferSize, int maxPartsNum) { + var bytes = Math.min(chunkSize.getBytes(), bufferSize.getBytes() * maxPartsNum); + return ByteSizeValue.ofBytes(bytes); + } + /** * Holds a reference to delayed repository operation {@link Scheduler.Cancellable} so it can be cancelled should the repository be * closed concurrently. diff --git a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java index 1eab59ebb0eb7..3817af4def888 100644 --- a/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java +++ b/modules/repository-s3/src/test/java/org/elasticsearch/repositories/s3/S3RepositoryTests.java @@ -175,4 +175,37 @@ public void testAnalysisFailureDetail() { } } + // ensures that chunkSize is limited to chunk_size setting, when buffer_size * parts_num is bigger + public void testChunkSizeLimit() { + var meta = new RepositoryMetadata( + "dummy-repo", + "mock", + Settings.builder() + .put(S3Repository.BUCKET_SETTING.getKey(), "bucket") + .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "1GB") + .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB") + .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) // ~1TB + .build() + ); + try (var repo = createS3Repo(meta)) { + assertEquals(ByteSizeValue.ofGb(1), repo.chunkSize()); + } + } + + // ensures that chunkSize is limited to buffer_size * parts_num, when chunk_size setting is bigger + public void testPartsNumLimit() { + var meta = new RepositoryMetadata( + "dummy-repo", + "mock", + Settings.builder() + .put(S3Repository.BUCKET_SETTING.getKey(), "bucket") + .put(S3Repository.CHUNK_SIZE_SETTING.getKey(), "5TB") + .put(S3Repository.BUFFER_SIZE_SETTING.getKey(), "100MB") + .put(S3Repository.MAX_MULTIPART_PARTS.getKey(), 10_000) + .build() + ); + try (var repo = createS3Repo(meta)) { + assertEquals(ByteSizeValue.ofMb(1_000_000), repo.chunkSize()); + } + } } diff --git a/muted-tests.yml b/muted-tests.yml index 22ecb333a1908..5b2c59c1dc8c0 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -278,9 +278,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testCreateJobsWithIndexNameOption issue: https://github.com/elastic/elasticsearch/issues/113528 -- class: org.elasticsearch.validation.DotPrefixClientYamlTestSuiteIT - method: test {p0=dot_prefix/10_basic/Deprecated index template with a dot prefix index pattern} - issue: https://github.com/elastic/elasticsearch/issues/113529 - class: org.elasticsearch.xpack.ml.integration.MlJobIT method: testCantCreateJobWithSameID issue: https://github.com/elastic/elasticsearch/issues/113581 @@ -345,14 +342,21 @@ tests: - class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT method: testPutE5Small_withPlatformAgnosticVariant issue: https://github.com/elastic/elasticsearch/issues/113983 -- class: org.elasticsearch.test.rest.ClientYamlTestSuiteIT - issue: https://github.com/elastic/elasticsearch/issues/114013 - class: org.elasticsearch.xpack.rank.rrf.RRFRankClientYamlTestSuiteIT method: test {yaml=rrf/700_rrf_retriever_search_api_compatibility/rrf retriever with top-level collapse} issue: https://github.com/elastic/elasticsearch/issues/114019 - class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT method: testPutE5WithTrainedModelAndInference issue: https://github.com/elastic/elasticsearch/issues/114023 +- class: org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilderIT + method: testRRFWithCollapse + issue: https://github.com/elastic/elasticsearch/issues/114074 +- class: org.elasticsearch.xpack.rank.rrf.RRFRetrieverBuilderIT + method: testMultipleRRFRetrievers + issue: https://github.com/elastic/elasticsearch/issues/114079 +- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT + method: testPutE5Small_withPlatformSpecificVariant + issue: https://github.com/elastic/elasticsearch/issues/113950 # Examples: # diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index ed1cf905f7e9d..a742e83255bbb 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -57,5 +57,4 @@ tasks.named("precommit").configure { tasks.named("yamlRestCompatTestTransform").configure({task -> task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility") task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") - task.skipTestsByFilePattern("indices.create/synthetic_source*.yml", "@UpdateForV9 -> tests do not pass after bumping API version to 9 [ES-9597]") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index a999bb7816065..a871d2ac0ae15 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -197,7 +197,7 @@ empty object with unmapped fields: --- disabled root object: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.ignored_source.dont_expand_dots"] reason: requires tracking ignored source - do: @@ -222,17 +222,19 @@ disabled root object: index: test - match: { hits.total.value: 1 } - - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.some_string: AaAa } - - match: { hits.hits.0._source.some_int: 1000 } - - match: { hits.hits.0._source.some_double: 123.456789 } - - match: { hits.hits.0._source.a.very.deeply.nested.field: AAAA } - + - match: + hits.hits.0._source: + name: aaaa + some_string: AaAa + some_int: 1000 + some_double: 123.456789 + some_bool: true + a.very.deeply.nested.field: AAAA --- disabled object: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.ignored_source.dont_expand_dots"] reason: requires tracking ignored source - do: @@ -261,14 +263,15 @@ disabled object: - match: { hits.total.value: 1 } - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.path.some_int: 1000 } - - match: { hits.hits.0._source.path.to.a.very.deeply.nested.field: AAAA } - + - match: + hits.hits.0._source.path: + some_int: 1000 + to.a.very.deeply.nested.field: AAAA --- disabled object contains array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.ignored_source.dont_expand_dots"] reason: requires tracking ignored source - do: @@ -297,10 +300,12 @@ disabled object contains array: - match: { hits.total.value: 1 } - match: { hits.hits.0._source.name: aaaa } - - match: { hits.hits.0._source.path.0.some_int: 1000 } - - match: { hits.hits.0._source.path.0.to.a.very.deeply.nested.field: AAAA } - - match: { hits.hits.0._source.path.1.some_double: 10.0 } - - match: { hits.hits.0._source.path.1.some_bool: true } + - match: + hits.hits.0._source.path: + - some_int: 1000 + to.a.very.deeply.nested.field: AAAA + - some_double: 10.0 + some_bool: true --- @@ -429,7 +434,7 @@ mixed disabled and enabled objects: --- object with dynamic override: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.ignored_source.dont_expand_dots"] reason: requires tracking ignored source - do: @@ -467,7 +472,7 @@ object with dynamic override: - match: { hits.hits.0._source.name: a } - match: { hits.hits.0._source.path_no.name: foo } - match: { hits.hits.0._source.path_no.some_int: 10 } - - match: { hits.hits.0._source.path_no.to.a.very.deeply.nested.field: A } + - match: { hits.hits.0._source.path_no.to: { a.very.deeply.nested.field: A } } - match: { hits.hits.0._source.path_runtime.name: bar } - match: { hits.hits.0._source.path_runtime.some_int: 20 } - match: { hits.hits.0._source.path_runtime.to.a.very.deeply.nested.field: B } @@ -524,7 +529,7 @@ subobject with dynamic override: --- object array in object with dynamic override: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep"] reason: requires tracking ignored source - do: @@ -895,7 +900,7 @@ doubly nested object: --- subobjects auto: - requires: - cluster_features: ["mapper.subobjects_auto"] + cluster_features: ["mapper.subobjects_auto", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source and supporting subobjects auto setting - do: @@ -920,7 +925,7 @@ subobjects auto: id: type: keyword stored: - store_array_source: true + synthetic_source_keep: arrays properties: span: properties: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml index 7d7be765631e5..dfe6c9820a16a 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml @@ -1,7 +1,71 @@ +--- +object param - store complex object: + - requires: + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] + reason: requires tracking ignored source + + - do: + indices.create: + index: test + body: + mappings: + _source: + mode: synthetic + properties: + id: + type: integer + stored: + synthetic_source_keep: all + properties: + object_array: + properties: + trace: + type: keyword + nested: + type: nested + kw: + type: keyword + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "id": 1, "stored": { "object_array": [ {"trace": "B"}, {"trace": "A"} ], "nested": [ {"foo": 20}, {"foo": 10} ], "kw": 100 } }' + - '{ "create": { } }' + - '{ "id": 2, "stored": { "object_array": { "trace": ["D", "C"] }, "nested": { "bar": [ 40, 30] }, "kw": 200, "baz": "2000" } }' + - '{ "create": { } }' + - '{ "id": 3, "stored": [ { "object_array": { "trace": "E" } }, { "nested": { "bar": [ 60, 50] } }, { "kw": 300 } ] }' + + - do: + search: + index: test + sort: id + + - match: { hits.hits.0._source.id: 1 } + - match: { hits.hits.0._source.stored.object_array.0.trace: B } + - match: { hits.hits.0._source.stored.object_array.1.trace: A } + - match: { hits.hits.0._source.stored.nested.0.foo: 20 } + - match: { hits.hits.0._source.stored.nested.1.foo: 10 } + - match: { hits.hits.0._source.stored.kw: 100 } + + - match: { hits.hits.1._source.id: 2 } + - match: { hits.hits.1._source.stored.object_array.trace: [D, C] } + - match: { hits.hits.1._source.stored.nested.bar: [40, 30] } + - match: { hits.hits.1._source.stored.kw: 200 } + - match: { hits.hits.1._source.stored.baz: "2000" } + + - match: { hits.hits.2._source.id: 3 } + - match: { hits.hits.2._source.stored.0.object_array.trace: E } + - match: { hits.hits.2._source.stored.1.nested.bar: [ 60, 50 ] } + - match: { hits.hits.2._source.stored.2.kw: 300 } + + --- object param - object array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -25,7 +89,7 @@ object param - object array: id: type: keyword stored: - store_array_source: true + synthetic_source_keep: arrays properties: span: properties: @@ -65,7 +129,7 @@ object param - object array: --- object param - object array within array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -77,10 +141,10 @@ object param - object array within array: mode: synthetic properties: stored: - store_array_source: true + synthetic_source_keep: arrays properties: path: - store_array_source: true + synthetic_source_keep: arrays properties: to: properties: @@ -108,7 +172,7 @@ object param - object array within array: --- object param - no object array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -120,7 +184,7 @@ object param - no object array: mode: synthetic properties: stored: - store_array_source: true + synthetic_source_keep: arrays properties: span: properties: @@ -150,7 +214,7 @@ object param - no object array: --- object param - field ordering in object array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -164,7 +228,7 @@ object param - field ordering in object array: a: type: keyword b: - store_array_source: true + synthetic_source_keep: arrays properties: aa: type: keyword @@ -173,7 +237,7 @@ object param - field ordering in object array: c: type: keyword d: - store_array_source: true + synthetic_source_keep: arrays properties: aa: type: keyword @@ -199,7 +263,7 @@ object param - field ordering in object array: --- object param - nested object array next to other fields: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -215,7 +279,7 @@ object param - nested object array next to other fields: b: properties: c: - store_array_source: true + synthetic_source_keep: arrays properties: aa: type: keyword @@ -255,7 +319,7 @@ object param - nested object array next to other fields: --- object param - nested object with stored array: - requires: - cluster_features: ["mapper.track_ignored_source"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -272,7 +336,7 @@ object param - nested object with stored array: type: nested nested_array_stored: type: nested - store_array_source: true + synthetic_source_keep: all - do: bulk: @@ -304,7 +368,7 @@ object param - nested object with stored array: --- index param - nested array within array: - requires: - cluster_features: ["mapper.synthetic_source_keep"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires tracking ignored source - do: @@ -322,7 +386,7 @@ index param - nested array within array: to: properties: some: - store_array_source: true + synthetic_source_keep: arrays properties: id: type: integer @@ -351,7 +415,7 @@ index param - nested array within array: # 112156 stored field under object with store_array_source: - requires: - cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix"] + cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix", "mapper.bwc_workaround_9_0"] reason: requires bug fix to be implemented - do: @@ -369,7 +433,7 @@ stored field under object with store_array_source: name: type: keyword obj: - store_array_source: true + synthetic_source_keep: arrays properties: foo: type: keyword @@ -740,7 +804,7 @@ field param - nested array within array: --- index param - root arrays: - requires: - cluster_features: ["mapper.synthetic_source_keep"] + cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] reason: requires keeping array source - do: @@ -772,6 +836,9 @@ index param - root arrays: properties: id: type: keyword + obj_default: + type: object + synthetic_source_keep: none - do: bulk: @@ -782,6 +849,8 @@ index param - root arrays: - '{ "id": 1, "leaf": [30, 20, 10], "leaf_default": [30, 20, 10], "obj": [ { "trace": { "id": "a" }, "span": { "id": "1" } }, { "trace": { "id": "b" }, "span": { "id": "1" } } ] }' - '{ "create": { } }' - '{ "id": 2, "leaf": [130, 120, 110], "leaf_default": [130, 120, 110], "obj": [ { "trace": { "id": "aa" }, "span": { "id": "2" } }, { "trace": { "id": "bb" }, "span": { "id": "2" } } ] }' + - '{ "create": { } }' + - '{ "id": 3, "obj_default": [ { "trace": { "id": "bb" }, "span": { "id": "2" } }, { "trace": { "id": "aa" }, "span": { "id": "2" } } ] }' - do: search: @@ -799,13 +868,17 @@ index param - root arrays: - match: { hits.hits.1._source.id: 2 } - match: { hits.hits.1._source.leaf: [ 130, 120, 110 ] } - - match: { hits.hits.0._source.leaf_default: [10, 20, 30] } + - match: { hits.hits.1._source.leaf_default: [110, 120, 130] } - length: { hits.hits.1._source.obj: 2 } - match: { hits.hits.1._source.obj.0.trace.id: aa } - match: { hits.hits.1._source.obj.0.span.id: "2" } - match: { hits.hits.1._source.obj.1.trace.id: bb } - match: { hits.hits.1._source.obj.1.span.id: "2" } + - match: { hits.hits.2._source.id: 3 } + - match: { hits.hits.2._source.obj_default.trace.id: [aa, bb] } + - match: { hits.hits.2._source.obj_default.span.id: "2" } + --- index param - dynamic root arrays: diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/stats/SearchStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/stats/SearchStatsIT.java index d0bae2b9ee47e..9f40b1928dce6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/stats/SearchStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/stats/SearchStatsIT.java @@ -13,23 +13,35 @@ import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse; import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.routing.GroupShardsIterator; import org.elasticsearch.cluster.routing.ShardIterator; import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.index.mapper.OnScriptError; import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.RangeQueryBuilder; import org.elasticsearch.index.search.stats.SearchStats.Stats; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.ScriptPlugin; +import org.elasticsearch.script.LongFieldScript; import org.elasticsearch.script.MockScriptPlugin; import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptContext; +import org.elasticsearch.script.ScriptEngine; import org.elasticsearch.script.ScriptType; import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder; +import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.Source; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.json.JsonXContent; import java.util.Collection; import java.util.Collections; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; @@ -53,7 +65,7 @@ public class SearchStatsIT extends ESIntegTestCase { @Override protected Collection> nodePlugins() { - return Collections.singleton(CustomScriptPlugin.class); + return List.of(CustomScriptPlugin.class, FailingFieldPlugin.class); } public static class CustomScriptPlugin extends MockScriptPlugin { @@ -68,6 +80,50 @@ protected Map, Object>> pluginScripts() { } } + public static class FailingFieldPlugin extends Plugin implements ScriptPlugin { + + @Override + public ScriptEngine getScriptEngine(Settings settings, Collection> contexts) { + return new ScriptEngine() { + @Override + public String getType() { + return "failing_field"; + } + + @Override + @SuppressWarnings("unchecked") + public FactoryType compile( + String name, + String code, + ScriptContext context, + Map params + ) { + return (FactoryType) new LongFieldScript.Factory() { + @Override + public LongFieldScript.LeafFactory newFactory( + String fieldName, + Map params, + SearchLookup searchLookup, + OnScriptError onScriptError + ) { + return ctx -> new LongFieldScript(fieldName, params, searchLookup, onScriptError, ctx) { + @Override + public void execute() { + throw new IllegalArgumentException("Accessing failing field"); + } + }; + } + }; + } + + @Override + public Set> getSupportedContexts() { + return Set.of(LongFieldScript.CONTEXT); + } + }; + } + } + @Override protected int numberOfReplicas() { return 0; @@ -244,4 +300,64 @@ protected int numAssignedShards(String... indices) { GroupShardsIterator allAssignedShardsGrouped = state.routingTable().allAssignedShardsGrouped(indices, true); return allAssignedShardsGrouped.size(); } + + public void testFailureStats() throws Exception { + String indexName = "test"; + XContentBuilder mapping = JsonXContent.contentBuilder().startObject(); + mapping.startObject("runtime"); + { + mapping.startObject("fail_me"); + { + mapping.field("type", "long"); + mapping.startObject("script").field("source", "").field("lang", "failing_field").endObject(); + } + mapping.endObject(); + } + mapping.endObject(); + mapping.endObject(); + int numOfShards = between(1, 5); + client().admin() + .indices() + .prepareCreate(indexName) + .setSettings(Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, numOfShards)) + .setMapping(mapping) + .get(); + int numDocs = between(20, 100); + for (int i = 1; i < numDocs; i++) { + index(indexName, Integer.toString(i), Map.of("position", i)); + } + refresh(indexName); + int numQueries = between(1, 10); + long failedQueries = 0; + for (int q = 0; q < numQueries; q++) { + expectThrows(Exception.class, () -> { + client().prepareSearch(indexName) + .setQuery(new RangeQueryBuilder("fail_me").gt(10)) + .setAllowPartialSearchResults(true) + .get(); + }); + failedQueries += numOfShards; + var stats = client().admin().indices().prepareStats(indexName).all().get().getTotal().search.getTotal(); + assertThat(stats.getQueryCount(), equalTo(0L)); + assertThat(stats.getQueryFailure(), equalTo(failedQueries)); + assertThat(stats.getFetchCount(), equalTo(0L)); + assertThat(stats.getFetchFailure(), equalTo(0L)); + } + int numFetches = between(1, 10); + for (int q = 0; q < numFetches; q++) { + expectThrows(Exception.class, () -> { + client().prepareSearch(indexName) + .setQuery(new RangeQueryBuilder("position").gt(0)) + .setFetchSource(false) + .addFetchField("fail_me") + .setSize(1000) + .get(); + }); + var stats = client().admin().indices().prepareStats(indexName).all().get().getTotal().search.getTotal(); + assertThat(stats.getQueryCount(), equalTo((q + 1L) * numOfShards)); + assertThat(stats.getQueryFailure(), equalTo(failedQueries)); + assertThat(stats.getFetchCount(), equalTo(0L)); + assertThat(stats.getFetchFailure(), equalTo((q + 1L) * numOfShards)); + } + } } diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index 7ff0ed1bbe82c..c55436f85a6e3 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -232,6 +232,8 @@ static TransportVersion def(int id) { public static final TransportVersion ESQL_CCS_EXECUTION_INFO = def(8_756_00_0); public static final TransportVersion REGEX_AND_RANGE_INTERVAL_QUERIES = def(8_757_00_0); public static final TransportVersion RRF_QUERY_REWRITE = def(8_758_00_0); + public static final TransportVersion SEARCH_FAILURE_STATS = def(8_759_00_0); + public static final TransportVersion INGEST_GEO_DATABASE_PROVIDERS = def(8_760_00_0); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/bootstrap/ESPolicy.java b/server/src/main/java/org/elasticsearch/bootstrap/ESPolicy.java index e07e0608c1383..845303abe6baf 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/ESPolicy.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/ESPolicy.java @@ -16,6 +16,7 @@ import java.io.IOException; import java.net.SocketPermission; import java.net.URL; +import java.security.AllPermission; import java.security.CodeSource; import java.security.Permission; import java.security.PermissionCollection; @@ -39,6 +40,7 @@ final class ESPolicy extends Policy { static final String UNTRUSTED_RESOURCE = "untrusted.policy"; private static final String ALL_FILE_MASK = "read,readlink,write,delete,execute"; + private static final AllPermission ALL_PERMISSION = new AllPermission(); final Policy template; final Policy untrusted; @@ -124,7 +126,7 @@ public boolean implies(ProtectionDomain domain, Permission permission) { * It's helpful to use the infrastructure around FilePermission here to do the directory structure check with implies * so we use ALL_FILE_MASK mask to check if we can do something with this file, whatever the actual operation we're requesting */ - return canAccessSecuredFile(location, new FilePermission(permission.getName(), ALL_FILE_MASK)); + return canAccessSecuredFile(domain, new FilePermission(permission.getName(), ALL_FILE_MASK)); } if (location != null) { @@ -157,15 +159,24 @@ public boolean implies(ProtectionDomain domain, Permission permission) { } @SuppressForbidden(reason = "We get given an URL by the security infrastructure") - private boolean canAccessSecuredFile(URL location, FilePermission permission) { - if (location == null) { + private boolean canAccessSecuredFile(ProtectionDomain domain, FilePermission permission) { + if (domain == null || domain.getCodeSource() == null || domain.getCodeSource().getLocation() == null) { return false; } + // If the domain in question has AllPermission - only true of sources built into the JDK, as we prevent AllPermission from being + // configured in Elasticsearch - then it has access to this file. + + if (system.implies(domain, ALL_PERMISSION)) { + return true; + } + URL location = domain.getCodeSource().getLocation(); + // check the source Set accessibleSources = securedFiles.get(permission); if (accessibleSources != null) { // simple case - single-file referenced directly + return accessibleSources.contains(location); } else { // there's a directory reference in there somewhere diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Security.java b/server/src/main/java/org/elasticsearch/bootstrap/Security.java index f22413f9abd12..dc6de9a6b2c91 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Security.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Security.java @@ -47,6 +47,7 @@ import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.function.Consumer; @@ -236,17 +237,25 @@ private static Map> readSecuredConfigFiles( for (Map.Entry> ps : settingPatterns) { if (ps.getKey().matcher(setting).matches()) { // add the setting value to the secured files for these codebase URLs - Path file = environment.configFile().resolve(environment.settings().get(setting)); - if (file.startsWith(environment.configFile()) == false) { - throw new IllegalStateException(ps.getValue() + " tried to grant access to file outside config directory " + file); - } - if (logger.isDebugEnabled()) { - ps.getValue() - .forEach( - url -> logger.debug("Jar {} securing access to config file {} through setting {}", url, file, setting) + String settingValue = environment.settings().get(setting); + // Some settings can also be an HTTPS URL in addition to a file path; if that's the case just skip this one. + // If the setting shouldn't be an HTTPS URL, that'll be caught by that setting's validation later in the process. + // HTTP (no S) URLs are not supported. + if (settingValue.toLowerCase(Locale.ROOT).startsWith("https://") == false) { + Path file = environment.configFile().resolve(settingValue); + if (file.startsWith(environment.configFile()) == false) { + throw new IllegalStateException( + ps.getValue() + " tried to grant access to file outside config directory " + file ); + } + if (logger.isDebugEnabled()) { + ps.getValue() + .forEach( + url -> logger.debug("Jar {} securing access to config file {} through setting {}", url, file, setting) + ); + } + securedConfigFiles.computeIfAbsent(file.toString(), k -> new HashSet<>()).addAll(ps.getValue()); } - securedConfigFiles.computeIfAbsent(file.toString(), k -> new HashSet<>()).addAll(ps.getValue()); } } } diff --git a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java index 4b0a0c5e77ebb..b059113b4098c 100644 --- a/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java +++ b/server/src/main/java/org/elasticsearch/common/ReferenceDocs.java @@ -80,6 +80,7 @@ public enum ReferenceDocs { FLOOD_STAGE_WATERMARK, X_OPAQUE_ID, FORMING_SINGLE_NODE_CLUSTERS, + CIRCUIT_BREAKER_ERRORS, // this comment keeps the ';' on the next line so every entry above has a trailing ',' which makes the diff for adding new links cleaner ; diff --git a/server/src/main/java/org/elasticsearch/common/breaker/ChildMemoryCircuitBreaker.java b/server/src/main/java/org/elasticsearch/common/breaker/ChildMemoryCircuitBreaker.java index 6d8510d27f27a..9669e78a119b9 100644 --- a/server/src/main/java/org/elasticsearch/common/breaker/ChildMemoryCircuitBreaker.java +++ b/server/src/main/java/org/elasticsearch/common/breaker/ChildMemoryCircuitBreaker.java @@ -10,6 +10,7 @@ package org.elasticsearch.common.breaker; import org.apache.logging.log4j.Logger; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.indices.breaker.BreakerSettings; import org.elasticsearch.indices.breaker.HierarchyCircuitBreakerService; @@ -87,7 +88,8 @@ public void circuitBreak(String fieldName, long bytesNeeded) { + memoryBytesLimit + "/" + ByteSizeValue.ofBytes(memoryBytesLimit) - + "]"; + + "]; for more information, see " + + ReferenceDocs.CIRCUIT_BREAKER_ERRORS; logger.debug(() -> format("%s", message)); throw new CircuitBreakingException(message, bytesNeeded, memoryBytesLimit, durability); } diff --git a/server/src/main/java/org/elasticsearch/common/settings/Setting.java b/server/src/main/java/org/elasticsearch/common/settings/Setting.java index 6ad20b9fc6d16..a0b6e665042d0 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/Setting.java +++ b/server/src/main/java/org/elasticsearch/common/settings/Setting.java @@ -1629,6 +1629,27 @@ public static > Setting enumSetting( return new Setting<>(key, defaultValue.toString(), e -> Enum.valueOf(clazz, e.toUpperCase(Locale.ROOT)), validator, properties); } + /** + * Creates a setting where the allowed values are defined as enum constants. All enum constants must be uppercase. + * + * @param the generics type parameter reflecting the actual type of the enum + * @param clazz the enum class + * @param defaultValue a default value function that returns the default values string representation. + * @param key the key for the setting + * @param validator validator for this setting + * @param properties properties for this setting like scope, filtering... + * @return the setting object + */ + public static > Setting enumSetting( + Class clazz, + Function defaultValue, + String key, + Validator validator, + Property... properties + ) { + return new Setting<>(key, defaultValue, e -> Enum.valueOf(clazz, e.toUpperCase(Locale.ROOT)), validator, properties); + } + /** * Creates a setting where the allowed values are defined as enum constants. All enum constants must be uppercase. * diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java index f3d758f4fc8b7..ae372ea8194bc 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch814Codec.java @@ -67,7 +67,7 @@ public Elasticsearch814Codec() { */ public Elasticsearch814Codec(Zstd814StoredFieldsFormat.Mode mode) { super("Elasticsearch814", lucene99Codec); - this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode); + this.storedFieldsFormat = mode.getFormat(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java index 00711c7ecc306..27ff19a9d8e40 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java +++ b/server/src/main/java/org/elasticsearch/index/codec/Elasticsearch816Codec.java @@ -28,9 +28,13 @@ */ public class Elasticsearch816Codec extends CodecService.DeduplicateFieldInfosCodec { + private static final Lucene912Codec LUCENE_912_CODEC = new Lucene912Codec(); + private static final PostingsFormat defaultPostingsFormat = new Lucene912PostingsFormat(); + private static final DocValuesFormat defaultDVFormat = new Lucene90DocValuesFormat(); + private static final KnnVectorsFormat defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); + private final StoredFieldsFormat storedFieldsFormat; - private final PostingsFormat defaultPostingsFormat; private final PostingsFormat postingsFormat = new PerFieldPostingsFormat() { @Override public PostingsFormat getPostingsFormatForField(String field) { @@ -38,7 +42,6 @@ public PostingsFormat getPostingsFormatForField(String field) { } }; - private final DocValuesFormat defaultDVFormat; private final DocValuesFormat docValuesFormat = new PerFieldDocValuesFormat() { @Override public DocValuesFormat getDocValuesFormatForField(String field) { @@ -46,7 +49,6 @@ public DocValuesFormat getDocValuesFormatForField(String field) { } }; - private final KnnVectorsFormat defaultKnnVectorsFormat; private final KnnVectorsFormat knnVectorsFormat = new PerFieldKnnVectorsFormat() { @Override public KnnVectorsFormat getKnnVectorsFormatForField(String field) { @@ -64,11 +66,8 @@ public Elasticsearch816Codec() { * worse space-efficiency or vice-versa. */ public Elasticsearch816Codec(Zstd814StoredFieldsFormat.Mode mode) { - super("Elasticsearch816", new Lucene912Codec()); - this.storedFieldsFormat = new Zstd814StoredFieldsFormat(mode); - this.defaultPostingsFormat = new Lucene912PostingsFormat(); - this.defaultDVFormat = new Lucene90DocValuesFormat(); - this.defaultKnnVectorsFormat = new Lucene99HnswVectorsFormat(); + super("Elasticsearch816", LUCENE_912_CODEC); + this.storedFieldsFormat = mode.getFormat(); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java index 671931ac7154a..648913098ff0d 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtil.java @@ -21,17 +21,10 @@ public class DocValuesForUtil { private static final int BITS_IN_FIVE_BYTES = 5 * Byte.SIZE; private static final int BITS_IN_SIX_BYTES = 6 * Byte.SIZE; private static final int BITS_IN_SEVEN_BYTES = 7 * Byte.SIZE; - private final int blockSize; - private final byte[] encoded; + private static final int blockSize = ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE; + private final byte[] encoded = new byte[1024]; - public DocValuesForUtil() { - this(ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); - } - - private DocValuesForUtil(int blockSize) { - this.blockSize = blockSize; - this.encoded = new byte[1024]; - } + public DocValuesForUtil() {} public static int roundBits(int bitsPerValue) { if (bitsPerValue > 24 && bitsPerValue <= 32) { @@ -74,7 +67,7 @@ private void encodeFiveSixOrSevenBytesPerValue(long[] in, int bitsPerValue, fina out.writeBytes(this.encoded, bytesPerValue * in.length); } - public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException { + public static void decode(int bitsPerValue, final DataInput in, long[] out) throws IOException { if (bitsPerValue <= 24) { ForUtil.decode(bitsPerValue, in, out); } else if (bitsPerValue <= 32) { @@ -88,7 +81,7 @@ public void decode(int bitsPerValue, final DataInput in, long[] out) throws IOEx } } - private void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException { + private static void decodeFiveSixOrSevenBytesPerValue(int bitsPerValue, final DataInput in, long[] out) throws IOException { // NOTE: we expect multibyte values to be written "least significant byte" first int bytesPerValue = bitsPerValue / Byte.SIZE; long mask = (1L << bitsPerValue) - 1; diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java index f152a0b0601a2..4e95ce34dc410 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesEncoder.java @@ -275,7 +275,7 @@ void decodeOrdinals(DataInput in, long[] out, int bitsPerOrd) throws IOException Arrays.fill(out, runLen, out.length, v2); } else if (encoding == 2) { // bit-packed - forUtil.decode(bitsPerOrd, in, out); + DocValuesForUtil.decode(bitsPerOrd, in, out); } else if (encoding == 3) { // cycle encoding int cycleLength = (int) v1; @@ -299,7 +299,7 @@ void decode(DataInput in, long[] out) throws IOException { final int bitsPerValue = token >>> 3; if (bitsPerValue != 0) { - forUtil.decode(bitsPerValue, in, out); + DocValuesForUtil.decode(bitsPerValue, in, out); } else { Arrays.fill(out, 0L); } diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java index a887516e5e7cc..e3c2daddba80e 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/ES87TSDBDocValuesProducer.java @@ -355,7 +355,7 @@ public TermsEnum termsEnum() throws IOException { } } - private abstract class BaseSortedSetDocValues extends SortedSetDocValues { + private abstract static class BaseSortedSetDocValues extends SortedSetDocValues { final SortedSetEntry entry; final IndexInput data; diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java index 7a8d09c02ba3b..b1e91ad75e9a2 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES813FlatVectorFormat.java @@ -36,7 +36,7 @@ public class ES813FlatVectorFormat extends KnnVectorsFormat { static final String NAME = "ES813FlatVectorFormat"; - private final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); + private static final FlatVectorsFormat format = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); /** * Sole constructor diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java index 4313aa40cf13e..4bf396e8d5ad1 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES814ScalarQuantizedVectorsFormat.java @@ -49,6 +49,10 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat { private static final FlatVectorsFormat rawVectorFormat = new Lucene99FlatVectorsFormat(DefaultFlatVectorScorer.INSTANCE); + static final FlatVectorsScorer flatVectorScorer = new ESFlatVectorsScorer( + new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE) + ); + /** The minimum confidence interval */ private static final float MINIMUM_CONFIDENCE_INTERVAL = 0.9f; @@ -60,7 +64,6 @@ public class ES814ScalarQuantizedVectorsFormat extends FlatVectorsFormat { * calculated as `1-1/(vector_dimensions + 1)` */ public final Float confidenceInterval; - final FlatVectorsScorer flatVectorScorer; private final byte bits; private final boolean compress; @@ -83,7 +86,6 @@ public ES814ScalarQuantizedVectorsFormat(Float confidenceInterval, int bits, boo throw new IllegalArgumentException("bits must be one of: 4, 7, 8; bits=" + bits); } this.confidenceInterval = confidenceInterval; - this.flatVectorScorer = new ESFlatVectorsScorer(new ScalarQuantizedVectorScorer(DefaultFlatVectorScorer.INSTANCE)); this.bits = (byte) bits; this.compress = compress; } diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java index 2df0757a8b8ee..af771b6a27f19 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorFormat.java @@ -22,7 +22,7 @@ public class ES815BitFlatVectorFormat extends KnnVectorsFormat { static final String NAME = "ES815BitFlatVectorFormat"; - private final FlatVectorsFormat format = new ES815BitFlatVectorsFormat(); + private static final FlatVectorsFormat format = new ES815BitFlatVectorsFormat(); /** * Sole constructor diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java index f1ae4e3fdeded..5969c9d5db6d7 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815BitFlatVectorsFormat.java @@ -27,7 +27,7 @@ class ES815BitFlatVectorsFormat extends FlatVectorsFormat { - private final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE); + private static final FlatVectorsFormat delegate = new Lucene99FlatVectorsFormat(FlatBitVectorScorer.INSTANCE); protected ES815BitFlatVectorsFormat() { super("ES815BitFlatVectorsFormat"); diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java index 55271719a4574..5e4656ea94c5b 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/ES815HnswBitVectorsFormat.java @@ -30,7 +30,7 @@ public class ES815HnswBitVectorsFormat extends KnnVectorsFormat { private final int maxConn; private final int beamWidth; - private final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat(); + private static final FlatVectorsFormat flatVectorsFormat = new ES815BitFlatVectorsFormat(); public ES815HnswBitVectorsFormat() { this(16, 100); diff --git a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java index 84871b5c811dd..6aa77b7222696 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/zstd/Zstd814StoredFieldsFormat.java @@ -52,17 +52,23 @@ public enum Mode { BEST_COMPRESSION(3, BEST_COMPRESSION_BLOCK_SIZE, 2048); final int level, blockSizeInBytes, blockDocCount; + final Zstd814StoredFieldsFormat format; Mode(int level, int blockSizeInBytes, int blockDocCount) { this.level = level; this.blockSizeInBytes = blockSizeInBytes; this.blockDocCount = blockDocCount; + this.format = new Zstd814StoredFieldsFormat(this); + } + + public Zstd814StoredFieldsFormat getFormat() { + return format; } } private final Mode mode; - public Zstd814StoredFieldsFormat(Mode mode) { + private Zstd814StoredFieldsFormat(Mode mode) { super("ZstdStoredFields814", new ZstdCompressionMode(mode.level), mode.blockSizeInBytes, mode.blockDocCount, 10); this.mode = mode; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index c82621baa717a..19bd4f9980baf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -135,7 +135,7 @@ private void internalParseDocument(MetadataFieldMapper[] metadataFieldsMappers, new IgnoredSourceFieldMapper.NameValue( MapperService.SINGLE_MAPPING_NAME, 0, - XContentDataHelper.encodeToken(context.parser()), + context.encodeFlattenedToken(), context.doc() ) ); @@ -236,7 +236,7 @@ private static List parseDocForMissingValues var leaf = fields.get(fullName); // There may be multiple matches for array elements, don't use #remove. if (leaf != null) { parser.nextToken(); // Advance the parser to the value to be read. - result.add(leaf.cloneWithValue(XContentDataHelper.encodeToken(parser))); + result.add(leaf.cloneWithValue(context.encodeFlattenedToken())); parser.nextToken(); // Skip the token ending the value. fieldName = null; } @@ -402,7 +402,7 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio new IgnoredSourceFieldMapper.NameValue( context.parent().fullPath(), context.parent().fullPath().lastIndexOf(context.parent().leafName()), - XContentDataHelper.encodeToken(parser), + context.encodeFlattenedToken(), context.doc() ) ); @@ -421,20 +421,21 @@ static void parseObjectOrNested(DocumentParserContext context) throws IOExceptio throwOnConcreteValue(context.parent(), currentFieldName, context); } + if (context.canAddIgnoredField() && getSourceKeepMode(context, context.parent().sourceKeepMode()) == Mapper.SourceKeepMode.ALL) { + context = context.addIgnoredFieldFromContext( + new IgnoredSourceFieldMapper.NameValue( + context.parent().fullPath(), + context.parent().fullPath().lastIndexOf(context.parent().leafName()), + null, + context.doc() + ) + ); + token = context.parser().currentToken(); + parser = context.parser(); + } + if (context.parent().isNested()) { // Handle a nested object that doesn't contain an array. Arrays are handled in #parseNonDynamicArray. - if (context.parent().storeArraySource() && context.canAddIgnoredField()) { - context = context.addIgnoredFieldFromContext( - new IgnoredSourceFieldMapper.NameValue( - context.parent().fullPath(), - context.parent().fullPath().lastIndexOf(context.parent().leafName()), - null, - context.doc() - ) - ); - token = context.parser().currentToken(); - parser = context.parser(); - } context = context.createNestedContext((NestedObjectMapper) context.parent()); } @@ -651,12 +652,11 @@ private static void parseObjectDynamic(DocumentParserContext context, String cur if (context.dynamic() == ObjectMapper.Dynamic.FALSE) { failIfMatchesRoutingPath(context, currentFieldName); if (context.canAddIgnoredField()) { - // read everything up to end object and store it context.addIgnoredField( IgnoredSourceFieldMapper.NameValue.fromContext( context, context.path().pathAsText(currentFieldName), - XContentDataHelper.encodeToken(context.parser()) + context.encodeFlattenedToken() ) ); } else { @@ -742,7 +742,7 @@ private static void parseArrayDynamic(DocumentParserContext context, String curr IgnoredSourceFieldMapper.NameValue.fromContext( context, context.path().pathAsText(currentFieldName), - XContentDataHelper.encodeToken(context.parser()) + context.encodeFlattenedToken() ) ); } else { @@ -760,7 +760,7 @@ private static void parseArrayDynamic(DocumentParserContext context, String curr IgnoredSourceFieldMapper.NameValue.fromContext( context, context.path().pathAsText(currentFieldName), - XContentDataHelper.encodeToken(context.parser()) + context.encodeFlattenedToken() ) ); } catch (IOException e) { @@ -801,8 +801,8 @@ private static void parseNonDynamicArray( // Check if we need to record the array source. This only applies to synthetic source. if (context.canAddIgnoredField()) { boolean objectRequiresStoringSource = mapper instanceof ObjectMapper objectMapper - && (objectMapper.storeArraySource() - || (context.sourceKeepModeFromIndexSettings() == Mapper.SourceKeepMode.ARRAYS + && (getSourceKeepMode(context, objectMapper.sourceKeepMode()) == Mapper.SourceKeepMode.ALL + || (getSourceKeepMode(context, objectMapper.sourceKeepMode()) == Mapper.SourceKeepMode.ARRAYS && objectMapper instanceof NestedObjectMapper == false)); boolean fieldWithFallbackSyntheticSource = mapper instanceof FieldMapper fieldMapper && fieldMapper.syntheticSourceMode() == FieldMapper.SyntheticSourceMode.FALLBACK; @@ -817,17 +817,15 @@ private static void parseNonDynamicArray( } else if (mapper instanceof ObjectMapper objectMapper && (objectMapper.isEnabled() == false)) { // No need to call #addIgnoredFieldFromContext as both singleton and array instances of this object // get tracked through ignored source. - context.addIgnoredField( - IgnoredSourceFieldMapper.NameValue.fromContext(context, fullPath, XContentDataHelper.encodeToken(context.parser())) - ); + context.addIgnoredField(IgnoredSourceFieldMapper.NameValue.fromContext(context, fullPath, context.encodeFlattenedToken())); return; } } // In synthetic source, if any array element requires storing its source as-is, it takes precedence over // elements from regular source loading that are then skipped from the synthesized array source. - // To prevent this, we track each array name, to check if it contains any sub-arrays in its elements. - context = context.cloneForArray(fullPath); + // To prevent this, we track that parsing sub-context is within array scope. + context = context.maybeCloneForArray(mapper); XContentParser parser = context.parser(); XContentParser.Token token; @@ -933,7 +931,7 @@ private static void parseDynamicValue(DocumentParserContext context, String curr IgnoredSourceFieldMapper.NameValue.fromContext( context, context.path().pathAsText(currentFieldName), - XContentDataHelper.encodeToken(context.parser()) + context.encodeFlattenedToken() ) ); } @@ -944,7 +942,7 @@ private static void parseDynamicValue(DocumentParserContext context, String curr IgnoredSourceFieldMapper.NameValue.fromContext( context, context.path().pathAsText(currentFieldName), - XContentDataHelper.encodeToken(context.parser()) + context.encodeFlattenedToken() ) ); } @@ -1043,7 +1041,7 @@ protected void parseCreateField(DocumentParserContext context) { if (context.dynamic() == ObjectMapper.Dynamic.RUNTIME && context.canAddIgnoredField()) { try { context.addIgnoredField( - IgnoredSourceFieldMapper.NameValue.fromContext(context, path, XContentDataHelper.encodeToken(context.parser())) + IgnoredSourceFieldMapper.NameValue.fromContext(context, path, context.encodeFlattenedToken()) ); } catch (IOException e) { throw new IllegalArgumentException( @@ -1115,15 +1113,7 @@ protected SyntheticSourceSupport syntheticSourceSupport() { private static class NoOpObjectMapper extends ObjectMapper { NoOpObjectMapper(String name, String fullPath) { - super( - name, - fullPath, - Explicit.IMPLICIT_TRUE, - Optional.empty(), - Explicit.IMPLICIT_FALSE, - Dynamic.RUNTIME, - Collections.emptyMap() - ); + super(name, fullPath, Explicit.IMPLICIT_TRUE, Optional.empty(), Optional.empty(), Dynamic.RUNTIME, Collections.emptyMap()); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index c2970d8716147..eebe95e260dcf 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -12,6 +12,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.time.DateFormatter; import org.elasticsearch.core.Tuple; import org.elasticsearch.index.IndexMode; @@ -110,7 +111,7 @@ public int get() { private final Set ignoredFields; private final List ignoredFieldValues; private final List ignoredFieldsMissingValues; - private String parentArrayField; + private boolean inArrayScope; private final Map> dynamicMappers; private final DynamicMapperSize dynamicMappersSize; @@ -142,7 +143,7 @@ private DocumentParserContext( Set ignoreFields, List ignoredFieldValues, List ignoredFieldsWithNoSource, - String parentArrayField, + boolean inArrayScope, Map> dynamicMappers, Map dynamicObjectMappers, Map> dynamicRuntimeFields, @@ -163,7 +164,7 @@ private DocumentParserContext( this.ignoredFields = ignoreFields; this.ignoredFieldValues = ignoredFieldValues; this.ignoredFieldsMissingValues = ignoredFieldsWithNoSource; - this.parentArrayField = parentArrayField; + this.inArrayScope = inArrayScope; this.dynamicMappers = dynamicMappers; this.dynamicObjectMappers = dynamicObjectMappers; this.dynamicRuntimeFields = dynamicRuntimeFields; @@ -187,7 +188,7 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, in.ignoredFields, in.ignoredFieldValues, in.ignoredFieldsMissingValues, - in.parentArrayField, + in.inArrayScope, in.dynamicMappers, in.dynamicObjectMappers, in.dynamicRuntimeFields, @@ -218,7 +219,7 @@ protected DocumentParserContext( new HashSet<>(), new ArrayList<>(), new ArrayList<>(), - null, + false, new HashMap<>(), new HashMap<>(), new HashMap<>(), @@ -323,10 +324,7 @@ public final void deduplicateIgnoredFieldValues(final Set fullNames) { public final DocumentParserContext addIgnoredFieldFromContext(IgnoredSourceFieldMapper.NameValue ignoredFieldWithNoSource) throws IOException { if (canAddIgnoredField()) { - if (parentArrayField != null - && parent != null - && parentArrayField.equals(parent.fullPath()) - && parent instanceof NestedObjectMapper == false) { + if (inArrayScope) { // The field is an array within an array, store all sub-array elements. ignoredFieldsMissingValues.add(ignoredFieldWithNoSource); return cloneWithRecordedSource(); @@ -341,6 +339,20 @@ public final DocumentParserContext addIgnoredFieldFromContext(IgnoredSourceField return this; } + /** + * Wraps {@link XContentDataHelper#encodeToken}, disabling dot expansion from {@link DotExpandingXContentParser}. + * This helps avoid producing duplicate names in the same scope, due to expanding dots to objects. + * For instance: { "a.b": "b", "a.c": "c" } => { "a": { "b": "b" }, "a": { "c": "c" } } + * This can happen when storing parts of document source that are not indexed (e.g. disabled objects). + */ + BytesRef encodeFlattenedToken() throws IOException { + boolean old = path().isWithinLeafObject(); + path().setWithinLeafObject(true); + BytesRef encoded = XContentDataHelper.encodeToken(parser()); + path().setWithinLeafObject(old); + return encoded; + } + /** * Return the collection of fields that are missing their source values. */ @@ -349,14 +361,17 @@ public final Collection getIgnoredFieldsMiss } /** - * Clones the current context to mark it as an array. Records the full name of the array field, to check for sub-arrays. + * Clones the current context to mark it as an array, if it's not already marked, or restore it if it's within a nested object. * Applies to synthetic source only. */ - public final DocumentParserContext cloneForArray(String fullName) throws IOException { - if (canAddIgnoredField()) { - DocumentParserContext subcontext = switchParser(parser()); - subcontext.parentArrayField = fullName; - return subcontext; + public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOException { + if (canAddIgnoredField() && mapper instanceof ObjectMapper) { + boolean isNested = mapper instanceof NestedObjectMapper; + if ((inArrayScope == false && isNested == false) || (inArrayScope && isNested)) { + DocumentParserContext subcontext = switchParser(parser()); + subcontext.inArrayScope = inArrayScope == false; + return subcontext; + } } return this; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index d57edb757ba10..296c2c5311d9a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -57,6 +57,7 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { public static final TypeParser PARSER = new FixedTypeParser(context -> new IgnoredSourceFieldMapper(context.getIndexSettings())); static final NodeFeature TRACK_IGNORED_SOURCE = new NodeFeature("mapper.track_ignored_source"); + static final NodeFeature DONT_EXPAND_DOTS_IN_IGNORED_SOURCE = new NodeFeature("mapper.ignored_source.dont_expand_dots"); /* Setting to disable encoding and writing values for this field. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java index d88bdf6e50615..0ecd3cc588d5b 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapper.java @@ -14,6 +14,8 @@ import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.StringLiteralDeduplicator; import org.elasticsearch.features.NodeFeature; +import org.elasticsearch.index.IndexMode; +import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; import org.elasticsearch.xcontent.ToXContentFragment; @@ -83,11 +85,18 @@ public void toXContent(XContentBuilder builder) throws IOException { // Setting to SourceKeepMode.ALL is equivalent to disabling synthetic source, so this is not allowed. public static final Setting SYNTHETIC_SOURCE_KEEP_INDEX_SETTING = Setting.enumSetting( SourceKeepMode.class, + settings -> { + var indexMode = IndexSettings.MODE.get(settings); + if (indexMode == IndexMode.LOGSDB) { + return SourceKeepMode.ARRAYS.toString(); + } else { + return SourceKeepMode.NONE.toString(); + } + }, "index.mapping.synthetic_source_keep", - SourceKeepMode.NONE, value -> { if (value == SourceKeepMode.ALL) { - throw new IllegalArgumentException("index.mapping.synthetic_source_keep can't be set to [" + value.toString() + "]"); + throw new IllegalArgumentException("index.mapping.synthetic_source_keep can't be set to [" + value + "]"); } }, Setting.Property.IndexScope, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index cf8f391813c09..4f90bd6e6f2c9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -21,9 +21,15 @@ * Spec for mapper-related features. */ public class MapperFeatures implements FeatureSpecification { + + // Used to avoid noise in mixed cluster and rest compatibility tests. Must not be backported to 8.x branch. + // This label gets added to tests with such failures before merging with main, then removed when backported to 8.x. + public static final NodeFeature BWC_WORKAROUND_9_0 = new NodeFeature("mapper.bwc_workaround_9_0"); + @Override public Set getFeatures() { return Set.of( + BWC_WORKAROUND_9_0, IgnoredSourceFieldMapper.TRACK_IGNORED_SOURCE, PassThroughObjectMapper.PASS_THROUGH_PRIORITY, RangeFieldMapper.NULL_VALUES_OFF_BY_ONE_FIX, @@ -52,6 +58,6 @@ public Set getFeatures() { @Override public Set getTestFeatures() { - return Set.of(RangeFieldMapper.DATE_RANGE_INDEXING_FIX); + return Set.of(RangeFieldMapper.DATE_RANGE_INDEXING_FIX, IgnoredSourceFieldMapper.DONT_EXPAND_DOTS_IN_IGNORED_SOURCE); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java index fc5f28dd51c9d..d0e0dcb6b97ba 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java @@ -98,6 +98,17 @@ public NestedObjectMapper build(MapperBuilderContext context) { } else { nestedTypePath = fullPath; } + if (sourceKeepMode.orElse(SourceKeepMode.NONE) == SourceKeepMode.ARRAYS) { + throw new MapperException( + "parameter [ " + + Mapper.SYNTHETIC_SOURCE_KEEP_PARAM + + " ] can't be set to [" + + SourceKeepMode.ARRAYS + + "] for nested object [" + + fullPath + + "]" + ); + } final Query nestedTypeFilter = NestedPathFieldMapper.filter(indexCreatedVersion, nestedTypePath); NestedMapperBuilderContext nestedContext = new NestedMapperBuilderContext( context.buildFullName(leafName()), @@ -115,7 +126,7 @@ public NestedObjectMapper build(MapperBuilderContext context) { buildMappers(nestedContext), enabled, dynamic, - storeArraySource, + sourceKeepMode, includeInParent, includeInRoot, parentTypeFilter, @@ -213,7 +224,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { Map mappers, Explicit enabled, ObjectMapper.Dynamic dynamic, - Explicit storeArraySource, + Optional sourceKeepMode, Explicit includeInParent, Explicit includeInRoot, Query parentTypeFilter, @@ -222,7 +233,7 @@ public MapperBuilderContext createChildContext(String name, Dynamic dynamic) { Function bitsetProducer, IndexSettings indexSettings ) { - super(name, fullPath, enabled, Optional.empty(), storeArraySource, dynamic, mappers); + super(name, fullPath, enabled, Optional.empty(), sourceKeepMode, dynamic, mappers); this.parentTypeFilter = parentTypeFilter; this.nestedTypePath = nestedTypePath; this.nestedTypeFilter = nestedTypeFilter; @@ -283,7 +294,7 @@ NestedObjectMapper withoutMappers() { Map.of(), enabled, dynamic, - storeArraySource, + sourceKeepMode, includeInParent, includeInRoot, parentTypeFilter, @@ -310,8 +321,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (isEnabled() != Defaults.ENABLED) { builder.field("enabled", enabled.value()); } - if (storeArraySource != Defaults.STORE_ARRAY_SOURCE) { - builder.field(STORE_ARRAY_SOURCE_PARAM, storeArraySource.value()); + if (sourceKeepMode.isPresent()) { + builder.field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, sourceKeepMode.get()); } serializeMappers(builder, params); return builder.endObject(); @@ -359,7 +370,7 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex mergeResult.mappers(), mergeResult.enabled(), mergeResult.dynamic(), - mergeResult.trackArraySource(), + mergeResult.sourceKeepMode(), incInParent, incInRoot, parentTypeFilter, @@ -393,8 +404,8 @@ protected MapperMergeContext createChildContext(MapperMergeContext mapperMergeCo @Override public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - if (storeArraySource()) { - // IgnoredSourceFieldMapper integration takes care of writing the source for nested objects that enabled store_array_source. + if (sourceKeepMode.orElse(SourceKeepMode.NONE) == SourceKeepMode.ALL) { + // IgnoredSourceFieldMapper integration takes care of writing the source for the nested object. return SourceLoader.SyntheticFieldLoader.NOTHING; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java index 40019566adaa8..0b9727aa66c8a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/ObjectMapper.java @@ -45,6 +45,7 @@ public class ObjectMapper extends Mapper { public static final String CONTENT_TYPE = "object"; static final String STORE_ARRAY_SOURCE_PARAM = "store_array_source"; static final NodeFeature SUBOBJECTS_AUTO = new NodeFeature("mapper.subobjects_auto"); + // No-op. All uses of this feature were reverted but node features can't be removed. static final NodeFeature SUBOBJECTS_AUTO_FIXES = new NodeFeature("mapper.subobjects_auto_fixes"); /** @@ -127,7 +128,7 @@ static Dynamic getRootDynamic(MappingLookup mappingLookup) { public static class Builder extends Mapper.Builder { protected Optional subobjects; protected Explicit enabled = Explicit.IMPLICIT_TRUE; - protected Explicit storeArraySource = Defaults.STORE_ARRAY_SOURCE; + protected Optional sourceKeepMode = Optional.empty(); protected Dynamic dynamic; protected final List mappersBuilders = new ArrayList<>(); @@ -141,8 +142,8 @@ public Builder enabled(boolean enabled) { return this; } - public Builder storeArraySource(boolean value) { - this.storeArraySource = Explicit.explicitBoolean(value); + public Builder sourceKeepMode(SourceKeepMode sourceKeepMode) { + this.sourceKeepMode = Optional.of(sourceKeepMode); return this; } @@ -245,7 +246,7 @@ public ObjectMapper build(MapperBuilderContext context) { context.buildFullName(leafName()), enabled, subobjects, - storeArraySource, + sourceKeepMode, dynamic, buildMappers(context.createChildContext(leafName(), dynamic)) ); @@ -307,7 +308,10 @@ protected static boolean parseObjectOrDocumentTypeProperties( builder.enabled(XContentMapValues.nodeBooleanValue(fieldNode, fieldName + ".enabled")); return true; } else if (fieldName.equals(STORE_ARRAY_SOURCE_PARAM)) { - builder.storeArraySource(XContentMapValues.nodeBooleanValue(fieldNode, fieldName + ".store_array_source")); + builder.sourceKeepMode(SourceKeepMode.ARRAYS); + return true; + } else if (fieldName.equals(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM)) { + builder.sourceKeepMode(SourceKeepMode.from(fieldNode.toString())); return true; } else if (fieldName.equals("properties")) { if (fieldNode instanceof Collection && ((Collection) fieldNode).isEmpty()) { @@ -434,7 +438,7 @@ private static void validateFieldName(String fieldName, IndexVersion indexCreate protected final Explicit enabled; protected final Optional subobjects; - protected final Explicit storeArraySource; + protected final Optional sourceKeepMode; protected final Dynamic dynamic; protected final Map mappers; @@ -444,7 +448,7 @@ private static void validateFieldName(String fieldName, IndexVersion indexCreate String fullPath, Explicit enabled, Optional subobjects, - Explicit storeArraySource, + Optional sourceKeepMode, Dynamic dynamic, Map mappers ) { @@ -454,7 +458,7 @@ private static void validateFieldName(String fieldName, IndexVersion indexCreate this.fullPath = internFieldName(fullPath); this.enabled = enabled; this.subobjects = subobjects; - this.storeArraySource = storeArraySource; + this.sourceKeepMode = sourceKeepMode; this.dynamic = dynamic; if (mappers == null) { this.mappers = Map.of(); @@ -482,7 +486,7 @@ public Builder newBuilder(IndexVersion indexVersionCreated) { * This is typically used in the context of a mapper merge when there's not enough budget to add the entire object. */ ObjectMapper withoutMappers() { - return new ObjectMapper(leafName(), fullPath, enabled, subobjects, storeArraySource, dynamic, Map.of()); + return new ObjectMapper(leafName(), fullPath, enabled, subobjects, sourceKeepMode, dynamic, Map.of()); } @Override @@ -520,8 +524,8 @@ public final Subobjects subobjects() { return subobjects.orElse(Subobjects.ENABLED); } - public final boolean storeArraySource() { - return storeArraySource.value(); + public final Optional sourceKeepMode() { + return sourceKeepMode; } @Override @@ -550,7 +554,7 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex fullPath, mergeResult.enabled, mergeResult.subObjects, - mergeResult.trackArraySource, + mergeResult.sourceKeepMode, mergeResult.dynamic, mergeResult.mappers ); @@ -559,7 +563,7 @@ public ObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeContex protected record MergeResult( Explicit enabled, Optional subObjects, - Explicit trackArraySource, + Optional sourceKeepMode, Dynamic dynamic, Map mappers ) { @@ -593,26 +597,31 @@ static MergeResult build(ObjectMapper existing, ObjectMapper mergeWithObject, Ma } else { subObjects = existing.subobjects; } - final Explicit trackArraySource; - if (mergeWithObject.storeArraySource.explicit()) { + final Optional sourceKeepMode; + if (mergeWithObject.sourceKeepMode.isPresent()) { if (reason == MergeReason.INDEX_TEMPLATE) { - trackArraySource = mergeWithObject.storeArraySource; - } else if (existing.storeArraySource != mergeWithObject.storeArraySource) { + sourceKeepMode = mergeWithObject.sourceKeepMode; + } else if (existing.sourceKeepMode.isEmpty() || existing.sourceKeepMode.get() != mergeWithObject.sourceKeepMode.get()) { throw new MapperException( - "the [store_array_source] parameter can't be updated for the object mapping [" + existing.fullPath() + "]" + "the [ " + + Mapper.SYNTHETIC_SOURCE_KEEP_PARAM + + " ] parameter can't be updated for the object mapping [" + + existing.fullPath() + + "]" ); } else { - trackArraySource = existing.storeArraySource; + sourceKeepMode = existing.sourceKeepMode; } } else { - trackArraySource = existing.storeArraySource; + sourceKeepMode = existing.sourceKeepMode; } + MapperMergeContext objectMergeContext = existing.createChildContext(parentMergeContext, existing.leafName()); Map mergedMappers = buildMergedMappers(existing, mergeWithObject, objectMergeContext, subObjects); return new MergeResult( enabled, subObjects, - trackArraySource, + sourceKeepMode, mergeWithObject.dynamic != null ? mergeWithObject.dynamic : existing.dynamic, mergedMappers ); @@ -733,6 +742,12 @@ private void ensureFlattenable(MapperBuilderContext context, ContentPath path) { + ")" ); } + if (sourceKeepMode.isPresent()) { + throwAutoFlatteningException( + path, + "the value of [" + Mapper.SYNTHETIC_SOURCE_KEEP_PARAM + "] is [ " + sourceKeepMode.get() + " ]" + ); + } if (isEnabled() == false) { throwAutoFlatteningException(path, "the value of [enabled] is [false]"); } @@ -774,8 +789,8 @@ void toXContent(XContentBuilder builder, Params params, ToXContent custom) throw if (subobjects.isPresent()) { builder.field("subobjects", subobjects.get().printedValue); } - if (storeArraySource != Defaults.STORE_ARRAY_SOURCE) { - builder.field(STORE_ARRAY_SOURCE_PARAM, storeArraySource.value()); + if (sourceKeepMode.isPresent()) { + builder.field("synthetic_source_keep", sourceKeepMode.get()); } if (custom != null) { custom.toXContent(builder, params); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/PassThroughObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/PassThroughObjectMapper.java index 9ef36b99a57c5..80f845d626a2f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/PassThroughObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/PassThroughObjectMapper.java @@ -82,6 +82,7 @@ public PassThroughObjectMapper build(MapperBuilderContext context) { leafName(), context.buildFullName(leafName()), enabled, + sourceKeepMode, dynamic, buildMappers(context.createChildContext(leafName(), timeSeriesDimensionSubFields.value(), dynamic)), timeSeriesDimensionSubFields, @@ -99,13 +100,14 @@ public PassThroughObjectMapper build(MapperBuilderContext context) { String name, String fullPath, Explicit enabled, + Optional sourceKeepMode, Dynamic dynamic, Map mappers, Explicit timeSeriesDimensionSubFields, int priority ) { // Subobjects are not currently supported. - super(name, fullPath, enabled, Optional.of(Subobjects.DISABLED), Explicit.IMPLICIT_FALSE, dynamic, mappers); + super(name, fullPath, enabled, Optional.of(Subobjects.DISABLED), sourceKeepMode, dynamic, mappers); this.timeSeriesDimensionSubFields = timeSeriesDimensionSubFields; this.priority = priority; if (priority < 0) { @@ -115,7 +117,16 @@ public PassThroughObjectMapper build(MapperBuilderContext context) { @Override PassThroughObjectMapper withoutMappers() { - return new PassThroughObjectMapper(leafName(), fullPath(), enabled, dynamic, Map.of(), timeSeriesDimensionSubFields, priority); + return new PassThroughObjectMapper( + leafName(), + fullPath(), + enabled, + sourceKeepMode, + dynamic, + Map.of(), + timeSeriesDimensionSubFields, + priority + ); } @Override @@ -158,6 +169,7 @@ public PassThroughObjectMapper merge(Mapper mergeWith, MapperMergeContext parent leafName(), fullPath(), mergeResult.enabled(), + mergeResult.sourceKeepMode(), mergeResult.dynamic(), mergeResult.mappers(), containsDimensions, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java index 878f9c92fa552..ce983e8a327c9 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/RootObjectMapper.java @@ -113,7 +113,7 @@ public RootObjectMapper build(MapperBuilderContext context) { leafName(), enabled, subobjects, - storeArraySource, + sourceKeepMode, dynamic, buildMappers(context.createChildContext(null, dynamic)), new HashMap<>(runtimeFields), @@ -135,7 +135,7 @@ public RootObjectMapper build(MapperBuilderContext context) { String name, Explicit enabled, Optional subobjects, - Explicit trackArraySource, + Optional sourceKeepMode, Dynamic dynamic, Map mappers, Map runtimeFields, @@ -144,12 +144,17 @@ public RootObjectMapper build(MapperBuilderContext context) { Explicit dateDetection, Explicit numericDetection ) { - super(name, name, enabled, subobjects, trackArraySource, dynamic, mappers); + super(name, name, enabled, subobjects, sourceKeepMode, dynamic, mappers); this.runtimeFields = runtimeFields; this.dynamicTemplates = dynamicTemplates; this.dynamicDateTimeFormatters = dynamicDateTimeFormatters; this.dateDetection = dateDetection; this.numericDetection = numericDetection; + if (sourceKeepMode.orElse(SourceKeepMode.NONE) == SourceKeepMode.ALL) { + throw new MapperParsingException( + "root object can't be configured with [" + Mapper.SYNTHETIC_SOURCE_KEEP_PARAM + ":" + SourceKeepMode.ALL + "]" + ); + } } @Override @@ -166,7 +171,7 @@ RootObjectMapper withoutMappers() { leafName(), enabled, subobjects, - storeArraySource, + sourceKeepMode, dynamic, Map.of(), Map.of(), @@ -282,7 +287,7 @@ public RootObjectMapper merge(Mapper mergeWith, MapperMergeContext parentMergeCo leafName(), mergeResult.enabled(), mergeResult.subObjects(), - mergeResult.trackArraySource(), + mergeResult.sourceKeepMode(), mergeResult.dynamic(), mergeResult.mappers(), Map.copyOf(runtimeFields), diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/SearchStats.java b/server/src/main/java/org/elasticsearch/index/search/stats/SearchStats.java index f6521960be290..ff514091979c3 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/SearchStats.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/SearchStats.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.search.stats; +import org.elasticsearch.TransportVersions; import org.elasticsearch.common.Strings; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -45,6 +46,9 @@ public static class Stats implements Writeable, ToXContentFragment { private long suggestTimeInMillis; private long suggestCurrent; + private long queryFailure; + private long fetchFailure; + private Stats() { // for internal use, initializes all counts to 0 } @@ -53,9 +57,11 @@ public Stats( long queryCount, long queryTimeInMillis, long queryCurrent, + long queryFailure, long fetchCount, long fetchTimeInMillis, long fetchCurrent, + long fetchFailure, long scrollCount, long scrollTimeInMillis, long scrollCurrent, @@ -66,10 +72,12 @@ public Stats( this.queryCount = queryCount; this.queryTimeInMillis = queryTimeInMillis; this.queryCurrent = queryCurrent; + this.queryFailure = queryFailure; this.fetchCount = fetchCount; this.fetchTimeInMillis = fetchTimeInMillis; this.fetchCurrent = fetchCurrent; + this.fetchFailure = fetchFailure; this.scrollCount = scrollCount; this.scrollTimeInMillis = scrollTimeInMillis; @@ -96,16 +104,47 @@ private Stats(StreamInput in) throws IOException { suggestCount = in.readVLong(); suggestTimeInMillis = in.readVLong(); suggestCurrent = in.readVLong(); + + if (in.getTransportVersion().onOrAfter(TransportVersions.SEARCH_FAILURE_STATS)) { + queryFailure = in.readVLong(); + fetchFailure = in.readVLong(); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeVLong(queryCount); + out.writeVLong(queryTimeInMillis); + out.writeVLong(queryCurrent); + + out.writeVLong(fetchCount); + out.writeVLong(fetchTimeInMillis); + out.writeVLong(fetchCurrent); + + out.writeVLong(scrollCount); + out.writeVLong(scrollTimeInMillis); + out.writeVLong(scrollCurrent); + + out.writeVLong(suggestCount); + out.writeVLong(suggestTimeInMillis); + out.writeVLong(suggestCurrent); + + if (out.getTransportVersion().onOrAfter(TransportVersions.SEARCH_FAILURE_STATS)) { + out.writeVLong(queryFailure); + out.writeVLong(fetchFailure); + } } public void add(Stats stats) { queryCount += stats.queryCount; queryTimeInMillis += stats.queryTimeInMillis; queryCurrent += stats.queryCurrent; + queryFailure += stats.queryFailure; fetchCount += stats.fetchCount; fetchTimeInMillis += stats.fetchTimeInMillis; fetchCurrent += stats.fetchCurrent; + fetchFailure += stats.fetchFailure; scrollCount += stats.scrollCount; scrollTimeInMillis += stats.scrollTimeInMillis; @@ -119,9 +158,11 @@ public void add(Stats stats) { public void addForClosingShard(Stats stats) { queryCount += stats.queryCount; queryTimeInMillis += stats.queryTimeInMillis; + queryFailure += stats.queryFailure; fetchCount += stats.fetchCount; fetchTimeInMillis += stats.fetchTimeInMillis; + fetchFailure += stats.fetchFailure; scrollCount += stats.scrollCount; scrollTimeInMillis += stats.scrollTimeInMillis; @@ -148,6 +189,10 @@ public long getQueryCurrent() { return queryCurrent; } + public long getQueryFailure() { + return queryFailure; + } + public long getFetchCount() { return fetchCount; } @@ -164,6 +209,10 @@ public long getFetchCurrent() { return fetchCurrent; } + public long getFetchFailure() { + return fetchFailure; + } + public long getScrollCount() { return scrollCount; } @@ -200,34 +249,17 @@ public static Stats readStats(StreamInput in) throws IOException { return new Stats(in); } - @Override - public void writeTo(StreamOutput out) throws IOException { - out.writeVLong(queryCount); - out.writeVLong(queryTimeInMillis); - out.writeVLong(queryCurrent); - - out.writeVLong(fetchCount); - out.writeVLong(fetchTimeInMillis); - out.writeVLong(fetchCurrent); - - out.writeVLong(scrollCount); - out.writeVLong(scrollTimeInMillis); - out.writeVLong(scrollCurrent); - - out.writeVLong(suggestCount); - out.writeVLong(suggestTimeInMillis); - out.writeVLong(suggestCurrent); - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(Fields.QUERY_TOTAL, queryCount); builder.humanReadableField(Fields.QUERY_TIME_IN_MILLIS, Fields.QUERY_TIME, getQueryTime()); builder.field(Fields.QUERY_CURRENT, queryCurrent); + builder.field(Fields.QUERY_FAILURE, queryFailure); builder.field(Fields.FETCH_TOTAL, fetchCount); builder.humanReadableField(Fields.FETCH_TIME_IN_MILLIS, Fields.FETCH_TIME, getFetchTime()); builder.field(Fields.FETCH_CURRENT, fetchCurrent); + builder.field(Fields.FETCH_FAILURE, fetchFailure); builder.field(Fields.SCROLL_TOTAL, scrollCount); builder.humanReadableField(Fields.SCROLL_TIME_IN_MILLIS, Fields.SCROLL_TIME, getScrollTime()); @@ -248,9 +280,11 @@ public boolean equals(Object o) { return queryCount == that.queryCount && queryTimeInMillis == that.queryTimeInMillis && queryCurrent == that.queryCurrent + && queryFailure == that.queryFailure && fetchCount == that.fetchCount && fetchTimeInMillis == that.fetchTimeInMillis && fetchCurrent == that.fetchCurrent + && fetchFailure == that.fetchFailure && scrollCount == that.scrollCount && scrollTimeInMillis == that.scrollTimeInMillis && scrollCurrent == that.scrollCurrent @@ -265,9 +299,11 @@ public int hashCode() { queryCount, queryTimeInMillis, queryCurrent, + queryFailure, fetchCount, fetchTimeInMillis, fetchCurrent, + fetchCount, scrollCount, scrollTimeInMillis, scrollCurrent, @@ -377,10 +413,12 @@ static final class Fields { static final String QUERY_TIME = "query_time"; static final String QUERY_TIME_IN_MILLIS = "query_time_in_millis"; static final String QUERY_CURRENT = "query_current"; + static final String QUERY_FAILURE = "query_failure"; static final String FETCH_TOTAL = "fetch_total"; static final String FETCH_TIME = "fetch_time"; static final String FETCH_TIME_IN_MILLIS = "fetch_time_in_millis"; static final String FETCH_CURRENT = "fetch_current"; + static final String FETCH_FAILURE = "fetch_failure"; static final String SCROLL_TOTAL = "scroll_total"; static final String SCROLL_TIME = "scroll_time"; static final String SCROLL_TIME_IN_MILLIS = "scroll_time_in_millis"; diff --git a/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java b/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java index f86727991a8b2..6e6f744f6b719 100644 --- a/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java +++ b/server/src/main/java/org/elasticsearch/index/search/stats/ShardSearchStats.java @@ -65,10 +65,14 @@ public void onPreQueryPhase(SearchContext searchContext) { @Override public void onFailedQueryPhase(SearchContext searchContext) { - computeStats( - searchContext, - searchContext.hasOnlySuggest() ? statsHolder -> statsHolder.suggestCurrent.dec() : statsHolder -> statsHolder.queryCurrent.dec() - ); + computeStats(searchContext, statsHolder -> { + if (searchContext.hasOnlySuggest()) { + statsHolder.suggestCurrent.dec(); + } else { + statsHolder.queryCurrent.dec(); + statsHolder.queryFailure.inc(); + } + }); } @Override @@ -89,7 +93,10 @@ public void onPreFetchPhase(SearchContext searchContext) { @Override public void onFailedFetchPhase(SearchContext searchContext) { - computeStats(searchContext, statsHolder -> statsHolder.fetchCurrent.dec()); + computeStats(searchContext, statsHolder -> { + statsHolder.fetchCurrent.dec(); + statsHolder.fetchFailure.inc(); + }); } @Override @@ -163,14 +170,19 @@ static final class StatsHolder { final CounterMetric scrollCurrent = new CounterMetric(); final CounterMetric suggestCurrent = new CounterMetric(); + final CounterMetric queryFailure = new CounterMetric(); + final CounterMetric fetchFailure = new CounterMetric(); + SearchStats.Stats stats() { return new SearchStats.Stats( queryMetric.count(), TimeUnit.NANOSECONDS.toMillis(queryMetric.sum()), queryCurrent.count(), + queryFailure.count(), fetchMetric.count(), TimeUnit.NANOSECONDS.toMillis(fetchMetric.sum()), fetchCurrent.count(), + fetchFailure.count(), scrollMetric.count(), TimeUnit.MICROSECONDS.toMillis(scrollMetric.sum()), scrollCurrent.count(), diff --git a/server/src/main/java/org/elasticsearch/indices/PostRecoveryMerger.java b/server/src/main/java/org/elasticsearch/indices/PostRecoveryMerger.java index 44b5d857946cc..17382e7854e90 100644 --- a/server/src/main/java/org/elasticsearch/indices/PostRecoveryMerger.java +++ b/server/src/main/java/org/elasticsearch/indices/PostRecoveryMerger.java @@ -46,14 +46,14 @@ class PostRecoveryMerger { private static final boolean TRIGGER_MERGE_AFTER_RECOVERY; static { - final var propertyValue = System.getProperty("es.trigger_merge_after_recovery_8_515_00_0"); + final var propertyValue = System.getProperty("es.trigger_merge_after_recovery"); if (propertyValue == null) { TRIGGER_MERGE_AFTER_RECOVERY = true; } else if ("false".equals(propertyValue)) { TRIGGER_MERGE_AFTER_RECOVERY = false; } else { throw new IllegalStateException( - "system property [es.trigger_merge_after_recovery_8_515_00_0] may only be set to [false], but was [" + propertyValue + "]" + "system property [es.trigger_merge_after_recovery] may only be set to [false], but was [" + propertyValue + "]" ); } } diff --git a/server/src/main/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerService.java b/server/src/main/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerService.java index d72909806240c..b1b0f0201ebbe 100644 --- a/server/src/main/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerService.java +++ b/server/src/main/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerService.java @@ -11,6 +11,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.breaker.ChildMemoryCircuitBreaker; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.breaker.CircuitBreakingException; @@ -474,7 +475,8 @@ public void accept(String key, CircuitBreaker breaker) { appendBytesSafe(message, (long) (breaker.getUsed() * breaker.getOverhead())); } }); - message.append("]"); + message.append("]; for more information, see "); + message.append(ReferenceDocs.CIRCUIT_BREAKER_ERRORS); return message.toString(); } diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceServiceRegistry.java b/server/src/main/java/org/elasticsearch/inference/InferenceServiceRegistry.java index 40b4e37f36509..f1ce94173a550 100644 --- a/server/src/main/java/org/elasticsearch/inference/InferenceServiceRegistry.java +++ b/server/src/main/java/org/elasticsearch/inference/InferenceServiceRegistry.java @@ -46,7 +46,13 @@ public Map getServices() { } public Optional getService(String serviceName) { - return Optional.ofNullable(services.get(serviceName)); + + if ("elser".equals(serviceName)) { // ElserService.NAME before removal + // here we are aliasing the elser service to use the elasticsearch service instead + return Optional.ofNullable(services.get("elasticsearch")); // ElasticsearchInternalService.NAME + } else { + return Optional.ofNullable(services.get(serviceName)); + } } public List getNamedWriteables() { diff --git a/server/src/main/java/org/elasticsearch/inference/InferenceServiceResults.java b/server/src/main/java/org/elasticsearch/inference/InferenceServiceResults.java index 5724a738209e2..34c8ffcb82d09 100644 --- a/server/src/main/java/org/elasticsearch/inference/InferenceServiceResults.java +++ b/server/src/main/java/org/elasticsearch/inference/InferenceServiceResults.java @@ -52,7 +52,7 @@ default boolean isStreaming() { * When {@link #isStreaming()} is {@code true}, the InferenceAction.Results will subscribe to this publisher. * Implementations should follow the {@link java.util.concurrent.Flow.Publisher} spec to stream the chunks. */ - default Flow.Publisher publisher() { + default Flow.Publisher publisher() { assert isStreaming() == false : "This must be implemented when isStreaming() == true"; throw new UnsupportedOperationException("This must be implemented when isStreaming() == true"); } diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json index 71be3d333ec3f..c8fa98b196c7b 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.json @@ -2,4 +2,4 @@ "Content moved to reference-docs-links.txt", "This is a temporary placeholder to satisfy sub check_elasticsearch_links in the docs build", "Remove with @UpdateForV10 (if not before)" -] +] \ No newline at end of file diff --git a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt index 190bbd3c319b4..ab9a6b253be7a 100644 --- a/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt +++ b/server/src/main/resources/org/elasticsearch/common/reference-docs-links.txt @@ -42,3 +42,4 @@ MAX_SHARDS_PER_NODE size-your-shards FLOOD_STAGE_WATERMARK fix-watermark-errors.html X_OPAQUE_ID api-conventions.html#x-opaque-id FORMING_SINGLE_NODE_CLUSTERS modules-discovery-bootstrap-cluster.html#modules-discovery-bootstrap-cluster-joining +CIRCUIT_BREAKER_ERRORS circuit-breaker-errors.html \ No newline at end of file diff --git a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java index b4996db40f823..77d00f0e5a068 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -615,6 +615,8 @@ private static CommonStats createShardLevelCommonStats() { ++iota, ++iota, ++iota, + ++iota, + ++iota, ++iota ); Map groupStats = new HashMap<>(); diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java index 72295743608c3..7da5463ea46ff 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/DocValuesForUtilTests.java @@ -70,11 +70,10 @@ public void testEncodeDecode() throws IOException { { // decode IndexInput in = d.openInput("test.bin", IOContext.READONCE); - final DocValuesForUtil forUtil = new DocValuesForUtil(); final long[] restored = new long[ES87TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; for (int i = 0; i < iterations; ++i) { final int bitsPerValue = in.readByte(); - forUtil.decode(bitsPerValue, in, restored); + DocValuesForUtil.decode(bitsPerValue, in, restored); assertArrayEquals( Arrays.toString(restored), ArrayUtil.copyOfSubArray( diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperValidationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperValidationTests.java index d48c5550631cd..e385177b87147 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperValidationTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldAliasMapperValidationTests.java @@ -179,7 +179,7 @@ private static ObjectMapper createObjectMapper(String name) { name, Explicit.IMPLICIT_TRUE, Optional.empty(), - Explicit.IMPLICIT_FALSE, + Optional.empty(), ObjectMapper.Dynamic.FALSE, emptyMap() ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java b/server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java index 18c4f393bc696..ae793bc3b329e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/FieldTypeLookupTests.java @@ -20,6 +20,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import static java.util.Collections.emptyList; @@ -433,6 +434,7 @@ private PassThroughObjectMapper createPassThroughMapper(String name, Map { b.startArray("path"); + b.startObject().field("int_value", 20).endObject(); b.startObject().field("int_value", 10).endObject(); + b.endArray(); + b.field("bool_value", true); + }); + assertEquals(""" + {"bool_value":true,"path":[{"int_value":20},{"int_value":10}]}""", syntheticSource); + } + + public void testIndexStoredArraySourceRootObjectArrayWithBypass() throws IOException { + DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(syntheticSourceMapping(b -> { + b.startObject("path"); + { + b.field("type", "object"); + b.field("synthetic_source_keep", "none"); + b.startObject("properties"); + { + b.startObject("int_value").field("type", "integer").endObject(); + } + b.endObject(); + } + b.endObject(); + b.startObject("bool_value").field("type", "boolean").endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startArray("path"); b.startObject().field("int_value", 20).endObject(); + b.startObject().field("int_value", 10).endObject(); b.endArray(); b.field("bool_value", true); }); assertEquals(""" - {"bool_value":true,"path":[{"int_value":10},{"int_value":20}]}""", syntheticSource); + {"bool_value":true,"path":{"int_value":[10,20]}}""", syntheticSource); } public void testIndexStoredArraySourceNestedValueArray() throws IOException { @@ -622,6 +648,12 @@ public void testIndexStoredArraySourceNestedValueArrayDisabled() throws IOExcept { b.startObject("int_value").field("type", "integer").field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "none").endObject(); b.startObject("bool_value").field("type", "boolean").endObject(); + b.startObject("obj").field("type", "object").field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "none"); + b.startObject("properties"); + { + b.startObject("foo").field("type", "integer").endObject(); + } + b.endObject().endObject(); } b.endObject(); } @@ -632,11 +664,17 @@ public void testIndexStoredArraySourceNestedValueArrayDisabled() throws IOExcept { b.array("int_value", new int[] { 30, 20, 10 }); b.field("bool_value", true); + b.startArray("obj"); + { + b.startObject().field("foo", 2).endObject(); + b.startObject().field("foo", 1).endObject(); + } + b.endArray(); } b.endObject(); }); assertEquals(""" - {"path":{"bool_value":true,"int_value":[10,20,30]}}""", syntheticSource); + {"path":{"bool_value":true,"int_value":[10,20,30],"obj":{"foo":[1,2]}}}""", syntheticSource); } public void testFieldStoredArraySourceNestedValueArray() throws IOException { @@ -674,8 +712,8 @@ public void testFieldStoredSourceNestedValue() throws IOException { b.field("type", "object"); b.startObject("properties"); { - b.startObject("default").field("type", "float").field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "none").endObject(); - b.startObject("source_kept").field("type", "float").field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "all").endObject(); + b.startObject("default").field("type", "float").field("synthetic_source_keep", "none").endObject(); + b.startObject("source_kept").field("type", "float").field("synthetic_source_keep", "all").endObject(); b.startObject("bool_value").field("type", "boolean").endObject(); } b.endObject(); @@ -738,7 +776,7 @@ public void testRootArray() throws IOException { b.startObject("path"); { b.field("type", "object"); - b.field("store_array_source", true); + b.field("synthetic_source_keep", "arrays"); b.startObject("properties"); { b.startObject("int_value").field("type", "integer").endObject(); @@ -765,7 +803,7 @@ public void testNestedArray() throws IOException { b.field("type", "object"); b.startObject("properties"); { - b.startObject("to").field("type", "object").field("store_array_source", true); + b.startObject("to").field("type", "object").field("synthetic_source_keep", "arrays"); { b.startObject("properties"); { @@ -835,10 +873,10 @@ public void testArrayWithinArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path"); { - b.field("type", "object").field("store_array_source", true); + b.field("type", "object").field("synthetic_source_keep", "arrays"); b.startObject("properties"); { - b.startObject("to").field("type", "object").field("store_array_source", true); + b.startObject("to").field("type", "object").field("synthetic_source_keep", "arrays"); { b.startObject("properties"); { @@ -893,7 +931,7 @@ public void testObjectArrayAndValue() throws IOException { { b.startObject("stored"); { - b.field("type", "object").field("store_array_source", true); + b.field("type", "object").field("synthetic_source_keep", "arrays"); b.startObject("properties").startObject("leaf").field("type", "integer").endObject().endObject(); } b.endObject(); @@ -926,6 +964,79 @@ public void testObjectArrayAndValue() throws IOException { {"path":{"stored":[{"leaf":10},{"leaf":20}]}}""", syntheticSource); } + public void testDeeplyNestedObjectArrayAndValue() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").startObject("properties").startObject("to").startObject("properties"); + { + b.startObject("stored"); + { + b.field("type", "object").field("store_array_source", true); + b.startObject("properties").startObject("leaf").field("type", "integer").endObject().endObject(); + } + b.endObject(); + } + b.endObject().endObject().endObject().endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startArray("path"); + { + b.startObject(); + { + b.startObject("to").startArray("stored"); + { + b.startObject().field("leaf", 10).endObject(); + } + b.endArray().endObject(); + } + b.endObject(); + b.startObject(); + { + b.startObject("to").startObject("stored").field("leaf", 20).endObject().endObject(); + } + b.endObject(); + } + b.endArray(); + }); + assertEquals(""" + {"path":{"to":{"stored":[{"leaf":10},{"leaf":20}]}}}""", syntheticSource); + } + + public void testObjectArrayAndValueInNestedObject() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").startObject("properties").startObject("to").startObject("properties"); + { + b.startObject("stored"); + { + b.field("type", "nested").field("dynamic", false); + } + b.endObject(); + } + b.endObject().endObject().endObject().endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startArray("path"); + { + b.startObject(); + { + b.startObject("to").startArray("stored"); + { + b.startObject().field("leaf", 10).endObject(); + } + b.endArray().endObject(); + } + b.endObject(); + b.startObject(); + { + b.startObject("to").startObject("stored").field("leaf", 20).endObject().endObject(); + } + b.endObject(); + } + b.endArray(); + }); + assertEquals(""" + {"path":{"to":{"stored":[{"leaf":10},{"leaf":20}]}}}""", syntheticSource); + } + public void testObjectArrayAndValueDisabledObject() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path").field("type", "object").startObject("properties"); @@ -1061,7 +1172,7 @@ public void testStoredArrayWithinHigherLevelArray() throws IOException { b.field("type", "object"); b.startObject("properties"); { - b.startObject("to").field("type", "object").field("store_array_source", true); + b.startObject("to").field("type", "object").field("synthetic_source_keep", "arrays"); { b.startObject("properties"); { @@ -1107,6 +1218,42 @@ public void testStoredArrayWithinHigherLevelArray() throws IOException { {"path":{"to":[{"name":"A"},{"name":"B"},{"name":"C"},{"name":"D"}]}}""", booleanValue), syntheticSource); } + public void testObjectWithKeepAll() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path"); + { + b.field("type", "object").field("synthetic_source_keep", "all"); + b.startObject("properties"); + { + b.startObject("a").field("type", "object").endObject(); + b.startObject("b").field("type", "integer").endObject(); + } + b.endObject(); + } + b.endObject(); + b.startObject("id").field("type", "integer").endObject(); + })).documentMapper(); + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("path"); + { + b.startArray("a"); + { + b.startObject().field("foo", 30).endObject(); + b.startObject().field("foo", 20).endObject(); + b.startObject().field("foo", 10).endObject(); + b.startObject().field("bar", 20).endObject(); + b.startObject().field("bar", 10).endObject(); + } + b.endArray(); + b.array("b", 4, 1, 3, 2); + } + b.endObject(); + b.field("id", 10); + }); + assertEquals(""" + {"id":10,"path":{"a":[{"foo":30},{"foo":20},{"foo":10},{"bar":20},{"bar":10}],"b":[4,1,3,2]}}""", syntheticSource); + } + public void testFallbackFieldWithinHigherLevelArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path"); @@ -1140,7 +1287,7 @@ public void testFallbackFieldWithinHigherLevelArray() throws IOException { public void testFieldOrdering() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("A").field("type", "integer").endObject(); - b.startObject("B").field("type", "object").field("store_array_source", true); + b.startObject("B").field("type", "object").field("synthetic_source_keep", "arrays"); { b.startObject("properties"); { @@ -1151,7 +1298,7 @@ public void testFieldOrdering() throws IOException { } b.endObject(); b.startObject("C").field("type", "integer").endObject(); - b.startObject("D").field("type", "object").field("store_array_source", true); + b.startObject("D").field("type", "object").field("synthetic_source_keep", "arrays"); { b.startObject("properties"); { @@ -1189,7 +1336,7 @@ public void testNestedObjectWithField() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path").field("type", "nested"); { - b.field("store_array_source", true); + b.field("synthetic_source_keep", "all"); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -1211,7 +1358,7 @@ public void testNestedObjectWithArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path").field("type", "nested"); { - b.field("store_array_source", true); + b.field("synthetic_source_keep", "all"); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -1244,7 +1391,7 @@ public void testNestedSubobjectWithField() throws IOException { b.startObject("int_value").field("type", "integer").endObject(); b.startObject("to").field("type", "nested"); { - b.field("store_array_source", true); + b.field("synthetic_source_keep", "all"); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -1285,7 +1432,7 @@ public void testNestedSubobjectWithArray() throws IOException { b.startObject("int_value").field("type", "integer").endObject(); b.startObject("to").field("type", "nested"); { - b.field("store_array_source", true); + b.field("synthetic_source_keep", "all"); b.startObject("properties"); { b.startObject("foo").field("type", "keyword").endObject(); @@ -1325,7 +1472,7 @@ public void testNestedSubobjectWithArray() throws IOException { public void testNestedObjectIncludeInRoot() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { - b.startObject("path").field("type", "nested").field("store_array_source", true).field("include_in_root", true); + b.startObject("path").field("type", "nested").field("synthetic_source_keep", "all").field("include_in_root", true); { b.startObject("properties"); { @@ -1599,7 +1746,7 @@ public void testStoredNestedSubObjectWithNameOverlappingParentName() throws IOEx b.startObject("path"); b.startObject("properties"); { - b.startObject("at").field("type", "nested").field("store_array_source", "true").endObject(); + b.startObject("at").field("type", "nested").field("synthetic_source_keep", "all").endObject(); } b.endObject(); b.endObject(); @@ -1646,6 +1793,107 @@ public void testCopyToLogicInsideObject() throws IOException { assertEquals("{\"path\":{\"at\":\"A\"}}", syntheticSource); } + public void testDynamicIgnoredObjectWithFlatFields() throws IOException { + DocumentMapper documentMapper = createMapperService(topMapping(b -> { + b.startObject("_source").field("mode", "synthetic").endObject(); + b.field("dynamic", false); + })).documentMapper(); + + CheckedConsumer document = b -> { + b.startObject("top"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + }; + + var syntheticSource = syntheticSource(documentMapper, document); + assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource); + + CheckedConsumer documentWithArray = b -> { + b.startArray("top"); + b.startObject(); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + b.startObject(); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + b.endArray(); + }; + + var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray); + assertEquals(""" + {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); + } + + public void testDisabledRootObjectWithFlatFields() throws IOException { + DocumentMapper documentMapper = createMapperService(topMapping(b -> { + b.startObject("_source").field("mode", "synthetic").endObject(); + b.field("enabled", false); + })).documentMapper(); + + CheckedConsumer document = b -> { + b.startObject("top"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + }; + + var syntheticSource = syntheticSource(documentMapper, document); + assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource); + + CheckedConsumer documentWithArray = b -> { + b.startArray("top"); + b.startObject(); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + b.startObject(); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + b.endArray(); + }; + + var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray); + assertEquals(""" + {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); + } + + public void testDisabledObjectWithFlatFields() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("top").field("type", "object").field("enabled", false).endObject(); + })).documentMapper(); + + CheckedConsumer document = b -> { + b.startObject("top"); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + }; + + var syntheticSource = syntheticSource(documentMapper, document); + assertEquals("{\"top\":{\"file.name\":\"A\",\"file.line\":10}}", syntheticSource); + + CheckedConsumer documentWithArray = b -> { + b.startArray("top"); + b.startObject(); + b.field("file.name", "A"); + b.field("file.line", 10); + b.endObject(); + b.startObject(); + b.field("file.name", "B"); + b.field("file.line", 20); + b.endObject(); + b.endArray(); + }; + + var syntheticSourceWithArray = syntheticSource(documentMapper, documentWithArray); + assertEquals(""" + {"top":[{"file.name":"A","file.line":10},{"file.name":"B","file.line":20}]}""", syntheticSourceWithArray); + } + protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) throws IOException { // We exclude ignored source field since in some cases it contains an exact copy of a part of document source. diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupTests.java index 1381df07789b5..fd44e68df19a8 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MappingLookupTests.java @@ -84,7 +84,7 @@ public void testSubfieldOverride() { "object", Explicit.EXPLICIT_TRUE, Optional.empty(), - Explicit.IMPLICIT_FALSE, + Optional.empty(), ObjectMapper.Dynamic.TRUE, Collections.singletonMap("object.subfield", fieldMapper) ); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java index 0a954115e77f6..be1469e25f24d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/NestedObjectMapperTests.java @@ -1571,14 +1571,14 @@ public void testNestedMapperFilters() throws Exception { public void testStoreArraySourceinSyntheticSourceMode() throws IOException { DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { - b.startObject("o").field("type", "nested").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + b.startObject("o").field("type", "nested").field("synthetic_source_keep", "all").endObject(); })); assertNotNull(mapper.mapping().getRoot().getMapper("o")); } public void testStoreArraySourceNoopInNonSyntheticSourceMode() throws IOException { DocumentMapper mapper = createDocumentMapper(mapping(b -> { - b.startObject("o").field("type", "nested").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + b.startObject("o").field("type", "nested").field("synthetic_source_keep", "all").endObject(); })); assertNotNull(mapper.mapping().getRoot().getMapper("o")); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java index 3312c94e8a0e1..64eee39532c31 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/ObjectMapperTests.java @@ -167,7 +167,7 @@ public void testMergeEnabledForIndexTemplates() throws IOException { assertNotNull(objectMapper); assertFalse(objectMapper.isEnabled()); assertEquals(ObjectMapper.Subobjects.ENABLED, objectMapper.subobjects()); - assertFalse(objectMapper.storeArraySource()); + assertTrue(objectMapper.sourceKeepMode().isEmpty()); // Setting 'enabled' to true is allowed, and updates the mapping. update = Strings.toString( @@ -189,7 +189,7 @@ public void testMergeEnabledForIndexTemplates() throws IOException { assertNotNull(objectMapper); assertTrue(objectMapper.isEnabled()); assertEquals(ObjectMapper.Subobjects.AUTO, objectMapper.subobjects()); - assertTrue(objectMapper.storeArraySource()); + assertEquals(Mapper.SourceKeepMode.ARRAYS, objectMapper.sourceKeepMode().orElse(Mapper.SourceKeepMode.NONE)); } public void testFieldReplacementForIndexTemplates() throws IOException { @@ -678,14 +678,14 @@ public void testSyntheticSourceDocValuesFieldWithout() throws IOException { public void testStoreArraySourceinSyntheticSourceMode() throws IOException { DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { - b.startObject("o").field("type", "object").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + b.startObject("o").field("type", "object").field("synthetic_source_keep", "arrays").endObject(); })); assertNotNull(mapper.mapping().getRoot().getMapper("o")); } public void testStoreArraySourceNoopInNonSyntheticSourceMode() throws IOException { DocumentMapper mapper = createDocumentMapper(mapping(b -> { - b.startObject("o").field("type", "object").field(ObjectMapper.STORE_ARRAY_SOURCE_PARAM, true).endObject(); + b.startObject("o").field("type", "object").field("synthetic_source_keep", "arrays").endObject(); })); assertNotNull(mapper.mapping().getRoot().getMapper("o")); } @@ -727,7 +727,7 @@ private ObjectMapper createObjectMapperWithAllParametersSet(CheckedConsumer createMapperService(mapping)); + assertThat(e.getMessage(), containsString("root object can't be configured with [synthetic_source_keep:all]")); + } + public void testWithoutMappers() throws IOException { RootObjectMapper shallowRoot = createRootObjectMapperWithAllParametersSet(b -> {}, b -> {}); RootObjectMapper root = createRootObjectMapperWithAllParametersSet(b -> { diff --git a/server/src/test/java/org/elasticsearch/index/search/stats/SearchStatsTests.java b/server/src/test/java/org/elasticsearch/index/search/stats/SearchStatsTests.java index 06df262a090ae..a4430e1c1499d 100644 --- a/server/src/test/java/org/elasticsearch/index/search/stats/SearchStatsTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/stats/SearchStatsTests.java @@ -22,9 +22,9 @@ public void testShardLevelSearchGroupStats() throws Exception { // let's create two dummy search stats with groups Map groupStats1 = new HashMap<>(); Map groupStats2 = new HashMap<>(); - groupStats2.put("group1", new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); - SearchStats searchStats1 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats1); - SearchStats searchStats2 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats2); + groupStats2.put("group1", new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)); + SearchStats searchStats1 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats1); + SearchStats searchStats2 = new SearchStats(new Stats(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 0, groupStats2); // adding these two search stats and checking group stats are correct searchStats1.add(searchStats2); diff --git a/server/src/test/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerServiceTests.java b/server/src/test/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerServiceTests.java index 156460d320ee2..610e87b50d365 100644 --- a/server/src/test/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerServiceTests.java +++ b/server/src/test/java/org/elasticsearch/indices/breaker/HierarchyCircuitBreakerServiceTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.indices.breaker; +import org.elasticsearch.common.ReferenceDocs; import org.elasticsearch.common.breaker.ChildMemoryCircuitBreaker; import org.elasticsearch.common.breaker.CircuitBreaker; import org.elasticsearch.common.breaker.CircuitBreakingException; @@ -911,9 +912,11 @@ public double getOverhead() { ), oneOf( "[parent] Data too large, data for [test] would be [3/3b], which is larger than the limit of [6/6b], " - + "usages [child=7/7b, otherChild=8/8b]", + + "usages [child=7/7b, otherChild=8/8b]; for more information, see " + + ReferenceDocs.CIRCUIT_BREAKER_ERRORS, "[parent] Data too large, data for [test] would be [3/3b], which is larger than the limit of [6/6b], " - + "usages [otherChild=8/8b, child=7/7b]" + + "usages [otherChild=8/8b, child=7/7b]; for more information, see " + + ReferenceDocs.CIRCUIT_BREAKER_ERRORS ) ); @@ -928,7 +931,8 @@ public double getOverhead() { ), equalTo( "[parent] Data too large, data for [test] would be [3/3b], which is larger than the limit of [6/6b], " - + "real usage: [2/2b], new bytes reserved: [1/1b], usages []" + + "real usage: [2/2b], new bytes reserved: [1/1b], usages []; for more information, see " + + ReferenceDocs.CIRCUIT_BREAKER_ERRORS ) ); @@ -945,7 +949,8 @@ public double getOverhead() { ), equalTo( "[parent] Data too large, data for [test] would be [-3], which is larger than the limit of [-6], " - + "real usage: [-2], new bytes reserved: [-1/-1b], usages [child1=-7]" + + "real usage: [-2], new bytes reserved: [-1/-1b], usages [child1=-7]; for more information, see " + + ReferenceDocs.CIRCUIT_BREAKER_ERRORS ) ); } finally { diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index a7d18ff782400..ca26779f3376d 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -1537,7 +1537,7 @@ public void testSyntheticSourceKeepNone() throws IOException { SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1); DocumentMapper mapper = createDocumentMapper(syntheticSourceMapping(b -> { b.startObject("field"); - b.field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "none"); + b.field("synthetic_source_keep", "none"); example.mapping().accept(b); b.endObject(); })); @@ -1548,7 +1548,7 @@ public void testSyntheticSourceKeepAll() throws IOException { SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1); DocumentMapper mapperAll = createDocumentMapper(syntheticSourceMapping(b -> { b.startObject("field"); - b.field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "all"); + b.field("synthetic_source_keep", "all"); example.mapping().accept(b); b.endObject(); })); @@ -1565,7 +1565,7 @@ public void testSyntheticSourceKeepArrays() throws IOException { SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1); DocumentMapper mapperAll = createDocumentMapper(syntheticSourceMapping(b -> { b.startObject("field"); - b.field(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, randomFrom("arrays", "all")); // Both options keep array source. + b.field("synthetic_source_keep", randomFrom("arrays", "all")); // Both options keep array source. example.mapping().accept(b); b.endObject(); })); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java index 81e120511a40f..067d1b96e965e 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DataSourceRequest.java @@ -12,6 +12,7 @@ import org.elasticsearch.logsdb.datageneration.DataGeneratorSpecification; import org.elasticsearch.logsdb.datageneration.FieldType; import org.elasticsearch.logsdb.datageneration.fields.DynamicMapping; +import org.elasticsearch.test.ESTestCase; import java.util.Set; @@ -115,11 +116,15 @@ public DataSourceResponse.LeafMappingParametersGenerator accept(DataSourceHandle } } - record ObjectMappingParametersGenerator(boolean isNested) + record ObjectMappingParametersGenerator(boolean isRoot, boolean isNested) implements DataSourceRequest { public DataSourceResponse.ObjectMappingParametersGenerator accept(DataSourceHandler handler) { return handler.handle(this); } + + public String syntheticSourceKeepValue() { + return isRoot() ? ESTestCase.randomFrom("none", "arrays") : ESTestCase.randomFrom("none", "arrays", "all"); + } } } diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java index 89850cd56bbd0..69f839d461b40 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/datasource/DefaultMappingParametersHandler.java @@ -83,6 +83,9 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequ if (ESTestCase.randomBoolean()) { parameters.put("dynamic", ESTestCase.randomFrom("true", "false", "strict")); } + if (ESTestCase.randomBoolean()) { + parameters.put(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, "all"); // [arrays] doesn't apply to nested objects + } return parameters; }); @@ -96,6 +99,9 @@ public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequ if (ESTestCase.randomBoolean()) { parameters.put("enabled", ESTestCase.randomFrom("true", "false")); } + if (ESTestCase.randomBoolean()) { + parameters.put(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, request.syntheticSourceKeepValue()); + } return parameters; }); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java index b5cd4f78aff95..ba168b221f572 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/NestedFieldDataGenerator.java @@ -28,7 +28,7 @@ public class NestedFieldDataGenerator implements FieldDataGenerator { this.mappingParameters = context.specification() .dataSource() - .get(new DataSourceRequest.ObjectMappingParametersGenerator(true)) + .get(new DataSourceRequest.ObjectMappingParametersGenerator(false, true)) .mappingGenerator() .get(); var dynamicMapping = context.determineDynamicMapping(mappingParameters); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java index 27c27e31702f7..084310ac967fc 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/ObjectFieldDataGenerator.java @@ -28,7 +28,7 @@ public class ObjectFieldDataGenerator implements FieldDataGenerator { this.mappingParameters = context.specification() .dataSource() - .get(new DataSourceRequest.ObjectMappingParametersGenerator(false)) + .get(new DataSourceRequest.ObjectMappingParametersGenerator(false, false)) .mappingGenerator() .get(); var dynamicMapping = context.determineDynamicMapping(mappingParameters); diff --git a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java index e85d18a1dac12..2c7aa65d8c6d1 100644 --- a/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/logsdb/datageneration/fields/TopLevelObjectFieldDataGenerator.java @@ -37,7 +37,7 @@ public TopLevelObjectFieldDataGenerator(DataGeneratorSpecification specification this.mappingParameters = Map.of(); } else { this.mappingParameters = new HashMap<>( - specification.dataSource().get(new DataSourceRequest.ObjectMappingParametersGenerator(false)).mappingGenerator().get() + specification.dataSource().get(new DataSourceRequest.ObjectMappingParametersGenerator(true, false)).mappingGenerator().get() ); // Top-level object can't be disabled because @timestamp is a required field in data streams. this.mappingParameters.remove("enabled"); diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java index e8c1aecb7abee..215973b5dece2 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java @@ -2394,7 +2394,7 @@ protected static void waitForActiveLicense(final RestClient restClient) throws E assertThat("Expecting non-null license status", status, notNullValue()); assertThat("Expecting active license", status, equalTo("active")); } - }); + }, 10, TimeUnit.MINUTES); } // TODO: replace usages of this with warning_regex or allowed_warnings_regex diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/StreamingChatCompletionResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/StreamingChatCompletionResults.java index 672ad74419c8f..05a181d3fc5b6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/StreamingChatCompletionResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/StreamingChatCompletionResults.java @@ -7,22 +7,27 @@ package org.elasticsearch.xpack.core.inference.results; +import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; import org.elasticsearch.inference.InferenceResults; import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.xcontent.ToXContent; import java.io.IOException; +import java.util.Deque; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.Flow; +import static org.elasticsearch.xpack.core.inference.results.ChatCompletionResults.COMPLETION; + /** * Chat Completion results that only contain a Flow.Publisher. */ -public record StreamingChatCompletionResults(Flow.Publisher publisher) implements InferenceServiceResults { +public record StreamingChatCompletionResults(Flow.Publisher publisher) implements InferenceServiceResults { @Override public boolean isStreaming() { @@ -58,4 +63,30 @@ public void writeTo(StreamOutput out) throws IOException { public Iterator toXContentChunked(ToXContent.Params params) { throw new UnsupportedOperationException("Not implemented"); } + + public record Results(Deque results) implements ChunkedToXContent { + @Override + public Iterator toXContentChunked(ToXContent.Params params) { + return Iterators.concat( + ChunkedToXContentHelper.startObject(), + ChunkedToXContentHelper.startArray(COMPLETION), + Iterators.flatMap(results.iterator(), d -> d.toXContentChunked(params)), + ChunkedToXContentHelper.endArray(), + ChunkedToXContentHelper.endObject() + ); + } + } + + public record Result(String delta) implements ChunkedToXContent { + private static final String RESULT = "delta"; + + @Override + public Iterator toXContentChunked(ToXContent.Params params) { + return Iterators.concat( + ChunkedToXContentHelper.startObject(), + ChunkedToXContentHelper.field(RESULT, delta), + ChunkedToXContentHelper.endObject() + ); + } + } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/MlInferenceNamedXContentProvider.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/MlInferenceNamedXContentProvider.java index 65e30072d9870..667d7bf63efc9 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/MlInferenceNamedXContentProvider.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/MlInferenceNamedXContentProvider.java @@ -40,6 +40,8 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenizationUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ClassificationConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ClassificationConfigUpdate; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2TokenizationUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.EmptyConfigUpdate; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfig; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.FillMaskConfigUpdate; @@ -547,6 +549,13 @@ public List getNamedXContentParsers() { (p, c) -> XLMRobertaTokenization.fromXContent(p, (boolean) c) ) ); + namedXContent.add( + new NamedXContentRegistry.Entry( + Tokenization.class, + new ParseField(DebertaV2Tokenization.NAME), + (p, c) -> DebertaV2Tokenization.fromXContent(p, (boolean) c) + ) + ); namedXContent.add( new NamedXContentRegistry.Entry( @@ -583,6 +592,13 @@ public List getNamedXContentParsers() { (p, c) -> XLMRobertaTokenizationUpdate.fromXContent(p) ) ); + namedXContent.add( + new NamedXContentRegistry.Entry( + TokenizationUpdate.class, + DebertaV2TokenizationUpdate.NAME, + (p, c) -> DebertaV2TokenizationUpdate.fromXContent(p) + ) + ); return namedXContent; } @@ -791,6 +807,7 @@ public List getNamedWriteables() { ); namedWriteables.add(new NamedWriteableRegistry.Entry(Tokenization.class, RobertaTokenization.NAME, RobertaTokenization::new)); namedWriteables.add(new NamedWriteableRegistry.Entry(Tokenization.class, XLMRobertaTokenization.NAME, XLMRobertaTokenization::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(Tokenization.class, DebertaV2Tokenization.NAME, DebertaV2Tokenization::new)); namedWriteables.add( new NamedWriteableRegistry.Entry( @@ -827,6 +844,9 @@ public List getNamedWriteables() { XLMRobertaTokenizationUpdate::new ) ); + namedWriteables.add( + new NamedWriteableRegistry.Entry(TokenizationUpdate.class, DebertaV2Tokenization.NAME, DebertaV2TokenizationUpdate::new) + ); return namedWriteables; } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2Tokenization.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2Tokenization.java new file mode 100644 index 0000000000000..ce5464832b6d5 --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2Tokenization.java @@ -0,0 +1,83 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml.inference.trainedmodel; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +public class DebertaV2Tokenization extends Tokenization { + + public static final String NAME = "deberta_v2"; + public static final String MASK_TOKEN = "[MASK]"; + + public static ConstructingObjectParser createParser(boolean ignoreUnknownFields) { + ConstructingObjectParser parser = new ConstructingObjectParser<>( + NAME, + ignoreUnknownFields, + a -> new DebertaV2Tokenization( + (Boolean) a[0], + (Boolean) a[1], + (Integer) a[2], + a[3] == null ? null : Truncate.fromString((String) a[3]), + (Integer) a[4] + ) + ); + declareCommonFields(parser); + return parser; + } + + private static final ConstructingObjectParser LENIENT_PARSER = createParser(true); + private static final ConstructingObjectParser STRICT_PARSER = createParser(false); + + public static DebertaV2Tokenization fromXContent(XContentParser parser, boolean lenient) { + return lenient ? LENIENT_PARSER.apply(parser, null) : STRICT_PARSER.apply(parser, null); + } + + public DebertaV2Tokenization( + Boolean doLowerCase, + Boolean withSpecialTokens, + Integer maxSequenceLength, + Truncate truncate, + Integer span + ) { + super(doLowerCase, withSpecialTokens, maxSequenceLength, truncate, span); + } + + public DebertaV2Tokenization(StreamInput in) throws IOException { + super(in); + } + + @Override + Tokenization buildWindowingTokenization(int updatedMaxSeqLength, int updatedSpan) { + return new DebertaV2Tokenization(doLowerCase, withSpecialTokens, updatedMaxSeqLength, truncate, updatedSpan); + } + + @Override + public String getMaskToken() { + return MASK_TOKEN; + } + + @Override + XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException { + return builder; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public String getName() { + return NAME; + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2TokenizationUpdate.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2TokenizationUpdate.java new file mode 100644 index 0000000000000..683b27793402d --- /dev/null +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/DebertaV2TokenizationUpdate.java @@ -0,0 +1,90 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.core.ml.inference.trainedmodel; + +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; + +import java.io.IOException; +import java.util.Optional; + +public class DebertaV2TokenizationUpdate extends AbstractTokenizationUpdate { + public static final ParseField NAME = new ParseField(DebertaV2Tokenization.NAME); + + public static ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "deberta_v2_tokenization_update", + a -> new DebertaV2TokenizationUpdate(a[0] == null ? null : Tokenization.Truncate.fromString((String) a[0]), (Integer) a[1]) + ); + + static { + declareCommonParserFields(PARSER); + } + + public static DebertaV2TokenizationUpdate fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + public DebertaV2TokenizationUpdate(@Nullable Tokenization.Truncate truncate, @Nullable Integer span) { + super(truncate, span); + } + + public DebertaV2TokenizationUpdate(StreamInput in) throws IOException { + super(in); + } + + @Override + public Tokenization apply(Tokenization originalConfig) { + if (originalConfig instanceof DebertaV2Tokenization debertaV2Tokenization) { + if (isNoop()) { + return debertaV2Tokenization; + } + + Tokenization.validateSpanAndTruncate(getTruncate(), getSpan()); + + if (getTruncate() != null && getTruncate().isInCompatibleWithSpan() == false) { + // When truncate value is incompatible with span wipe out + // the existing span setting to avoid an invalid combination of settings. + // This avoids the user have to set span to the special unset value + return new DebertaV2Tokenization( + debertaV2Tokenization.doLowerCase(), + debertaV2Tokenization.withSpecialTokens(), + debertaV2Tokenization.maxSequenceLength(), + getTruncate(), + null + ); + } + + return new DebertaV2Tokenization( + debertaV2Tokenization.doLowerCase(), + debertaV2Tokenization.withSpecialTokens(), + debertaV2Tokenization.maxSequenceLength(), + Optional.ofNullable(this.getTruncate()).orElse(originalConfig.getTruncate()), + Optional.ofNullable(this.getSpan()).orElse(originalConfig.getSpan()) + ); + } + throw ExceptionsHelper.badRequestException( + "Tokenization config of type [{}] can not be updated with a request of type [{}]", + originalConfig.getName(), + getName() + ); + } + + @Override + public String getWriteableName() { + return NAME.getPreferredName(); + } + + @Override + public String getName() { + return NAME.getPreferredName(); + } +} diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdate.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdate.java index 92e44edcd1259..328c851d63be6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdate.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdate.java @@ -42,7 +42,9 @@ public static TokenizationUpdate tokenizationFromMap(Map map) { RobertaTokenizationUpdate.NAME.getPreferredName(), RobertaTokenizationUpdate::new, XLMRobertaTokenizationUpdate.NAME.getPreferredName(), - XLMRobertaTokenizationUpdate::new + XLMRobertaTokenizationUpdate::new, + DebertaV2Tokenization.NAME, + DebertaV2TokenizationUpdate::new ); Map tokenizationConfig = null; diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java index 4fec726b9fa5d..a7c46a68538c0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/Tokenization.java @@ -36,7 +36,8 @@ public enum Truncate { public boolean isInCompatibleWithSpan() { return false; } - }; + }, + BALANCED; public boolean isInCompatibleWithSpan() { return true; diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdateTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdateTests.java index 8bc3a339ab0ee..83dc0b2a06376 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdateTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/inference/trainedmodel/NlpConfigUpdateTests.java @@ -73,7 +73,9 @@ public void testTokenizationFromMap() { ); assertThat( e.getMessage(), - containsString("unknown tokenization type expecting one of [bert, bert_ja, mpnet, roberta, xlm_roberta] got [not_bert]") + containsString( + "unknown tokenization type expecting one of [bert, bert_ja, deberta_v2, mpnet, roberta, xlm_roberta] got [not_bert]" + ) ); } diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java index 1fd670f836df3..439ebe34c7d4a 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BasicBlockTests.java @@ -830,16 +830,20 @@ public void testBooleanBlock() { randomBoolean() ? randomIntBetween(1, positionCount) : positionCount ); Boolean value = randomFrom(random(), null, true, false); - IntStream.range(0, positionCount).mapToObj(ii -> { + Boolean[] bools = IntStream.range(0, positionCount).mapToObj(ii -> { if (value == null) { return randomBoolean(); } return value; - }).forEach(vectorBuilder::appendBoolean); + }).toArray(Boolean[]::new); + Arrays.stream(bools).forEach(vectorBuilder::appendBoolean); BooleanVector vector = vectorBuilder.build(); assertSingleValueDenseBlock(vector.asBlock()); assertToMask(vector); - if (value != null) { + if (value == null) { + assertThat(vector.allTrue(), equalTo(Arrays.stream(bools).allMatch(v -> v))); + assertThat(vector.allFalse(), equalTo(Arrays.stream(bools).allMatch(v -> v == false))); + } else { if (value) { assertTrue(vector.allTrue()); assertFalse(vector.allFalse()); diff --git a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java index 6225aa1a6f2a0..929e38dc31751 100644 --- a/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java +++ b/x-pack/plugin/esql/compute/src/test/java/org/elasticsearch/compute/data/BigArrayVectorTests.java @@ -84,7 +84,10 @@ public void testBoolean() throws IOException { assertThat(mask.mask().getBoolean(p), equalTo(values[p])); } } - if (value != null) { + if (value == null) { + assertThat(vector.allTrue(), equalTo(Arrays.stream(values).allMatch(v -> v))); + assertThat(vector.allFalse(), equalTo(Arrays.stream(values).allMatch(v -> v == false))); + } else { if (value) { assertTrue(vector.allTrue()); assertFalse(vector.allFalse()); diff --git a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java index 524cd5014c19e..ea8b32f36f54c 100644 --- a/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java +++ b/x-pack/plugin/inference/src/internalClusterTest/java/org/elasticsearch/xpack/inference/integration/ModelRegistryIT.java @@ -28,11 +28,10 @@ import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.inference.InferencePlugin; import org.elasticsearch.xpack.inference.registry.ModelRegistry; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalModel; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalService; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceSettingsTests; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceTests; -import org.elasticsearch.xpack.inference.services.elser.ElserMlNodeTaskSettingsTests; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalModel; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserInternalServiceSettingsTests; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserMlNodeTaskSettingsTests; import org.junit.Before; import java.io.IOException; @@ -118,10 +117,10 @@ public void testGetModel() throws Exception { assertEquals(model.getConfigurations().getService(), modelHolder.get().service()); - var elserService = new ElserInternalService( + var elserService = new ElasticsearchInternalService( new InferenceServiceExtension.InferenceServiceFactoryContext(mock(Client.class), mock(ThreadPool.class)) ); - ElserInternalModel roundTripModel = elserService.parsePersistedConfigWithSecrets( + ElasticsearchInternalModel roundTripModel = (ElasticsearchInternalModel) elserService.parsePersistedConfigWithSecrets( modelHolder.get().inferenceEntityId(), modelHolder.get().taskType(), modelHolder.get().settings(), @@ -277,7 +276,17 @@ public void testGetModelWithSecrets() throws InterruptedException { } private Model buildElserModelConfig(String inferenceEntityId, TaskType taskType) { - return ElserInternalServiceTests.randomModelConfig(inferenceEntityId, taskType); + return switch (taskType) { + case SPARSE_EMBEDDING -> new org.elasticsearch.xpack.inference.services.elasticsearch.ElserInternalModel( + inferenceEntityId, + taskType, + ElasticsearchInternalService.NAME, + ElserInternalServiceSettingsTests.createRandom(), + ElserMlNodeTaskSettingsTests.createRandom() + ); + default -> throw new IllegalArgumentException("task type " + taskType + " is not supported"); + }; + } protected void blockingCall(Consumer> function, AtomicReference response, AtomicReference error) @@ -300,7 +309,7 @@ private static Model buildModelWithUnknownField(String inferenceEntityId) { new ModelWithUnknownField( inferenceEntityId, TaskType.SPARSE_EMBEDDING, - ElserInternalService.NAME, + ElasticsearchInternalService.NAME, ElserInternalServiceSettingsTests.createRandom(), ElserMlNodeTaskSettingsTests.createRandom() ) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java index 336626cd1db20..02bddb6076d69 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferenceNamedWriteablesProvider.java @@ -64,9 +64,9 @@ import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandInternalTextEmbeddingServiceSettings; import org.elasticsearch.xpack.inference.services.elasticsearch.CustomElandRerankTaskSettings; import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserInternalServiceSettings; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserMlNodeTaskSettings; import org.elasticsearch.xpack.inference.services.elasticsearch.MultilingualE5SmallInternalServiceSettings; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceSettings; -import org.elasticsearch.xpack.inference.services.elser.ElserMlNodeTaskSettings; import org.elasticsearch.xpack.inference.services.googleaistudio.completion.GoogleAiStudioCompletionServiceSettings; import org.elasticsearch.xpack.inference.services.googleaistudio.embeddings.GoogleAiStudioEmbeddingsServiceSettings; import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiSecretSettings; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java index f2f019490444e..0ab395f4bfa39 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java @@ -86,7 +86,6 @@ import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceFeature; import org.elasticsearch.xpack.inference.services.elastic.ElasticInferenceServiceSettings; import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalService; import org.elasticsearch.xpack.inference.services.googleaistudio.GoogleAiStudioService; import org.elasticsearch.xpack.inference.services.googlevertexai.GoogleVertexAiService; import org.elasticsearch.xpack.inference.services.huggingface.HuggingFaceService; @@ -229,7 +228,6 @@ public void loadExtensions(ExtensionLoader loader) { public List getInferenceServiceFactories() { return List.of( - ElserInternalService::new, context -> new HuggingFaceElserService(httpFactory.get(), serviceComponents.get()), context -> new HuggingFaceService(httpFactory.get(), serviceComponents.get()), context -> new OpenAiService(httpFactory.get(), serviceComponents.get()), diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java index 4186b281a35b5..d2a73b7df77c1 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/TransportInferenceAction.java @@ -11,6 +11,7 @@ import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.HandledTransportAction; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.common.xcontent.ChunkedToXContent; import org.elasticsearch.inference.InferenceService; @@ -42,6 +43,7 @@ public class TransportInferenceAction extends HandledTransportAction hasRequestCompletedFunction, ActionListener listener ) { - List docsInput = DocumentsOnlyInput.of(inferenceInputs).getInputs(); - OpenAiChatCompletionRequest request = new OpenAiChatCompletionRequest(docsInput, model); + var docsOnly = DocumentsOnlyInput.of(inferenceInputs); + var docsInput = docsOnly.getInputs(); + var stream = docsOnly.stream(); + OpenAiChatCompletionRequest request = new OpenAiChatCompletionRequest(docsInput, model, stream); execute(new ExecutableInferenceRequest(requestSender, logger, request, HANDLER, hasRequestCompletedFunction, listener)); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/OpenAiEmbeddingsRequestManager.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/OpenAiEmbeddingsRequestManager.java index 5c164f2eb9644..49fa15e5bc843 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/OpenAiEmbeddingsRequestManager.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/http/sender/OpenAiEmbeddingsRequestManager.java @@ -33,7 +33,7 @@ public class OpenAiEmbeddingsRequestManager extends OpenAiRequestManager { private static final ResponseHandler HANDLER = createEmbeddingsHandler(); private static ResponseHandler createEmbeddingsHandler() { - return new OpenAiResponseHandler("openai text embedding", OpenAiEmbeddingsResponseEntity::fromResponse); + return new OpenAiResponseHandler("openai text embedding", OpenAiEmbeddingsResponseEntity::fromResponse, false); } public static OpenAiEmbeddingsRequestManager of(OpenAiEmbeddingsModel model, Truncator truncator, ThreadPool threadPool) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiChatCompletionResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiChatCompletionResponseHandler.java index 08426dc4bbc4a..7607e5e4ed3a2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiChatCompletionResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiChatCompletionResponseHandler.java @@ -7,20 +7,14 @@ package org.elasticsearch.xpack.inference.external.openai; -import org.elasticsearch.inference.InferenceServiceResults; -import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults; import org.elasticsearch.xpack.inference.external.http.HttpResult; import org.elasticsearch.xpack.inference.external.http.retry.ResponseParser; import org.elasticsearch.xpack.inference.external.http.retry.RetryException; import org.elasticsearch.xpack.inference.external.request.Request; -import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; -import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor; - -import java.util.concurrent.Flow; public class OpenAiChatCompletionResponseHandler extends OpenAiResponseHandler { public OpenAiChatCompletionResponseHandler(String requestType, ResponseParser parseFunction) { - super(requestType, parseFunction); + super(requestType, parseFunction, true); } @Override @@ -28,19 +22,4 @@ protected RetryException buildExceptionHandling429(Request request, HttpResult r // We don't retry, if the chat completion input is too large return new RetryException(false, buildError(RATE_LIMIT, request, result)); } - - @Override - public boolean canHandleStreamingResponses() { - return true; - } - - @Override - public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { - var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser()); - var openAiProcessor = new OpenAiStreamingProcessor(); - - flow.subscribe(serverSentEventProcessor); - serverSentEventProcessor.subscribe(openAiProcessor); - return new StreamingChatCompletionResults(openAiProcessor); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java index c23b94351c187..c193280e1978b 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandler.java @@ -9,6 +9,8 @@ import org.apache.logging.log4j.Logger; import org.elasticsearch.common.Strings; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults; import org.elasticsearch.xpack.inference.external.http.HttpResult; import org.elasticsearch.xpack.inference.external.http.retry.BaseResponseHandler; import org.elasticsearch.xpack.inference.external.http.retry.ContentTooLargeException; @@ -16,8 +18,12 @@ import org.elasticsearch.xpack.inference.external.http.retry.RetryException; import org.elasticsearch.xpack.inference.external.request.Request; import org.elasticsearch.xpack.inference.external.response.openai.OpenAiErrorResponseEntity; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventParser; +import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventProcessor; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import java.util.concurrent.Flow; + import static org.elasticsearch.xpack.inference.external.http.HttpUtils.checkForEmptyBody; import static org.elasticsearch.xpack.inference.external.http.retry.ResponseHandlerUtils.getFirstHeaderOrUnknown; @@ -38,8 +44,11 @@ public class OpenAiResponseHandler extends BaseResponseHandler { static final String OPENAI_SERVER_BUSY = "Received a server busy error status code"; - public OpenAiResponseHandler(String requestType, ResponseParser parseFunction) { + private final boolean canHandleStreamingResponses; + + public OpenAiResponseHandler(String requestType, ResponseParser parseFunction, boolean canHandleStreamingResponses) { super(requestType, parseFunction, OpenAiErrorResponseEntity::fromResponse); + this.canHandleStreamingResponses = canHandleStreamingResponses; } @Override @@ -120,4 +129,19 @@ static String buildRateLimitErrorMessage(HttpResult result) { return RATE_LIMIT + ". " + usageMessage; } + + @Override + public boolean canHandleStreamingResponses() { + return canHandleStreamingResponses; + } + + @Override + public InferenceServiceResults parseResult(Request request, Flow.Publisher flow) { + var serverSentEventProcessor = new ServerSentEventProcessor(new ServerSentEventParser()); + var openAiProcessor = new OpenAiStreamingProcessor(); + + flow.subscribe(serverSentEventProcessor); + serverSentEventProcessor.subscribe(openAiProcessor); + return new StreamingChatCompletionResults(openAiProcessor); + } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessor.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessor.java index dcda832091e05..803bae40b33ed 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessor.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessor.java @@ -9,27 +9,28 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.xcontent.ChunkedToXContent; -import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentParserConfiguration; import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xpack.core.inference.results.StreamingChatCompletionResults; import org.elasticsearch.xpack.inference.common.DelegatingProcessor; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEvent; import org.elasticsearch.xpack.inference.external.response.streaming.ServerSentEventField; import java.io.IOException; import java.util.ArrayDeque; +import java.util.Collections; import java.util.Deque; import java.util.Iterator; -import java.util.Optional; +import java.util.Objects; +import java.util.function.Predicate; import static org.elasticsearch.common.xcontent.XContentParserUtils.ensureExpectedToken; -import static org.elasticsearch.core.Strings.format; -import static org.elasticsearch.xpack.core.inference.results.ChatCompletionResults.COMPLETION; +import static org.elasticsearch.common.xcontent.XContentParserUtils.parseList; +import static org.elasticsearch.xpack.inference.external.response.XContentUtils.consumeUntilObjectEnd; import static org.elasticsearch.xpack.inference.external.response.XContentUtils.moveToFirstToken; import static org.elasticsearch.xpack.inference.external.response.XContentUtils.positionParserAtTokenAfterField; @@ -105,7 +106,6 @@ public class OpenAiStreamingProcessor extends DelegatingProcessor, ChunkedToXContent> { private static final Logger log = LogManager.getLogger(OpenAiStreamingProcessor.class); private static final String FAILED_TO_FIND_FIELD_TEMPLATE = "Failed to find required field [%s] in OpenAI chat completions response"; - private static final String RESULT = "delta"; private static final String CHOICES_FIELD = "choices"; private static final String DELTA_FIELD = "delta"; @@ -115,19 +115,18 @@ public class OpenAiStreamingProcessor extends DelegatingProcessor item) { + protected void next(Deque item) throws Exception { var parserConfig = XContentParserConfiguration.EMPTY.withDeprecationHandler(LoggingDeprecationHandler.INSTANCE); - var results = new ArrayDeque(item.size()); + var results = new ArrayDeque(item.size()); for (ServerSentEvent event : item) { if (ServerSentEventField.DATA == event.name() && event.hasValue()) { try { var delta = parse(parserConfig, event); - delta.map(this::deltaChunk).ifPresent(results::offer); + delta.forEachRemaining(results::offer); } catch (Exception e) { log.warn("Failed to parse event from inference provider: {}", event); - onError(new IOException("Failed to parse event from inference provider.", e)); - return; + throw e; } } } @@ -135,13 +134,14 @@ protected void next(Deque item) { if (results.isEmpty()) { upstream().request(1); } else { - downstream().onNext(completionChunk(results.iterator())); + downstream().onNext(new StreamingChatCompletionResults.Results(results)); } } - private Optional parse(XContentParserConfiguration parserConfig, ServerSentEvent event) throws IOException { + private Iterator parse(XContentParserConfiguration parserConfig, ServerSentEvent event) + throws IOException { if (DONE_MESSAGE.equalsIgnoreCase(event.value())) { - return Optional.empty(); + return Collections.emptyIterator(); } try (XContentParser jsonParser = XContentFactory.xContent(XContentType.JSON).createParser(parserConfig, event.value())) { @@ -150,54 +150,38 @@ private Optional parse(XContentParserConfiguration parserConfig, ServerS XContentParser.Token token = jsonParser.currentToken(); ensureExpectedToken(XContentParser.Token.START_OBJECT, token, jsonParser); - // choices is an array, but since we don't send 'n' in the request then we only get one value in the result positionParserAtTokenAfterField(jsonParser, CHOICES_FIELD, FAILED_TO_FIND_FIELD_TEMPLATE); - jsonParser.nextToken(); - ensureExpectedToken(XContentParser.Token.START_OBJECT, jsonParser.currentToken(), jsonParser); + return parseList(jsonParser, parser -> { + ensureExpectedToken(XContentParser.Token.START_OBJECT, parser.currentToken(), parser); - positionParserAtTokenAfterField(jsonParser, DELTA_FIELD, FAILED_TO_FIND_FIELD_TEMPLATE); + positionParserAtTokenAfterField(parser, DELTA_FIELD, FAILED_TO_FIND_FIELD_TEMPLATE); - token = jsonParser.currentToken(); + var currentToken = parser.currentToken(); - ensureExpectedToken(XContentParser.Token.START_OBJECT, token, jsonParser); + ensureExpectedToken(XContentParser.Token.START_OBJECT, currentToken, parser); - while (token != null) { - if (token == XContentParser.Token.FIELD_NAME && jsonParser.currentName().equals(CONTENT_FIELD)) { - jsonParser.nextToken(); - var contentToken = jsonParser.currentToken(); - ensureExpectedToken(XContentParser.Token.VALUE_STRING, contentToken, jsonParser); - return Optional.ofNullable(jsonParser.text()); - } else if (token == XContentParser.Token.FIELD_NAME && jsonParser.currentName().equals(FINISH_REASON_FIELD)) { - jsonParser.nextToken(); - var contentToken = jsonParser.currentToken(); - ensureExpectedToken(XContentParser.Token.VALUE_STRING, contentToken, jsonParser); - if (STOP_MESSAGE.equalsIgnoreCase(jsonParser.text())) { - return Optional.empty(); - } + currentToken = parser.nextToken(); + if (currentToken == XContentParser.Token.END_OBJECT) { + consumeUntilObjectEnd(parser); // end choices + return ""; // stopped } - token = jsonParser.nextToken(); - } - throw new IllegalStateException(format(FAILED_TO_FIND_FIELD_TEMPLATE, CONTENT_FIELD)); + if (currentToken == XContentParser.Token.FIELD_NAME && parser.currentName().equals(CONTENT_FIELD)) { + parser.nextToken(); + } else { + positionParserAtTokenAfterField(parser, CONTENT_FIELD, FAILED_TO_FIND_FIELD_TEMPLATE); + } + ensureExpectedToken(XContentParser.Token.VALUE_STRING, parser.currentToken(), parser); + var content = parser.text(); + consumeUntilObjectEnd(parser); // end delta + consumeUntilObjectEnd(parser); // end choices + return content; + }).stream() + .filter(Objects::nonNull) + .filter(Predicate.not(String::isEmpty)) + .map(StreamingChatCompletionResults.Result::new) + .iterator(); } } - - private ChunkedToXContent deltaChunk(String delta) { - return params -> Iterators.concat( - ChunkedToXContentHelper.startObject(), - ChunkedToXContentHelper.field(RESULT, delta), - ChunkedToXContentHelper.endObject() - ); - } - - private ChunkedToXContent completionChunk(Iterator delta) { - return params -> Iterators.concat( - ChunkedToXContentHelper.startObject(), - ChunkedToXContentHelper.startArray(COMPLETION), - Iterators.flatMap(delta, d -> d.toXContentChunked(params)), - ChunkedToXContentHelper.endArray(), - ChunkedToXContentHelper.endObject() - ); - } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequest.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequest.java index 9fa6533161745..99a025e70d003 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequest.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequest.java @@ -32,11 +32,13 @@ public class OpenAiChatCompletionRequest implements OpenAiRequest { private final OpenAiAccount account; private final List input; private final OpenAiChatCompletionModel model; + private final boolean stream; - public OpenAiChatCompletionRequest(List input, OpenAiChatCompletionModel model) { + public OpenAiChatCompletionRequest(List input, OpenAiChatCompletionModel model, boolean stream) { this.account = OpenAiAccount.of(model, OpenAiChatCompletionRequest::buildDefaultUri); this.input = Objects.requireNonNull(input); this.model = Objects.requireNonNull(model); + this.stream = stream; } @Override @@ -45,7 +47,7 @@ public HttpRequest createHttpRequest() { ByteArrayEntity byteEntity = new ByteArrayEntity( Strings.toString( - new OpenAiChatCompletionRequestEntity(input, model.getServiceSettings().modelId(), model.getTaskSettings().user()) + new OpenAiChatCompletionRequestEntity(input, model.getServiceSettings().modelId(), model.getTaskSettings().user(), stream) ).getBytes(StandardCharsets.UTF_8) ); httpPost.setEntity(byteEntity); @@ -83,6 +85,11 @@ public String getInferenceEntityId() { return model.getInferenceEntityId(); } + @Override + public boolean isStreaming() { + return stream; + } + public static URI buildDefaultUri() throws URISyntaxException { return new URIBuilder().setScheme("https") .setHost(OpenAiUtils.HOST) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntity.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntity.java index c9aa225c77941..867a7ca80cbcb 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntity.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntity.java @@ -25,19 +25,22 @@ public class OpenAiChatCompletionRequestEntity implements ToXContentObject { private static final String ROLE_FIELD = "role"; private static final String USER_FIELD = "user"; private static final String CONTENT_FIELD = "content"; + private static final String STREAM_FIELD = "stream"; private final List messages; private final String model; private final String user; + private final boolean stream; - public OpenAiChatCompletionRequestEntity(List messages, String model, String user) { + public OpenAiChatCompletionRequestEntity(List messages, String model, String user, boolean stream) { Objects.requireNonNull(messages); Objects.requireNonNull(model); this.messages = messages; this.model = model; this.user = user; + this.stream = stream; } @Override @@ -65,6 +68,10 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(USER_FIELD, user); } + if (stream) { + builder.field(STREAM_FIELD, true); + } + builder.endObject(); return builder; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java index 21b2df6af1ab6..71b38d7a0785a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java @@ -17,6 +17,7 @@ import org.elasticsearch.inference.InferenceServiceResults; import org.elasticsearch.inference.InputType; import org.elasticsearch.inference.Model; +import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSender; import org.elasticsearch.xpack.inference.external.http.sender.InferenceInputs; @@ -27,8 +28,10 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; public abstract class SenderService implements InferenceService { + protected static final Set COMPLETION_ONLY = Set.of(TaskType.COMPLETION); private final Sender sender; private final ServiceComponents serviceComponents; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModel.java index 163e3dd654150..bbbae736dbeb9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModel.java @@ -18,7 +18,7 @@ import org.elasticsearch.xpack.inference.external.action.ExecutableAction; import org.elasticsearch.xpack.inference.external.action.elastic.ElasticInferenceServiceActionVisitor; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.elser.ElserModels; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; import java.net.URI; import java.net.URISyntaxException; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettings.java index 15b89525f7915..bbda1bb716794 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettings.java @@ -17,7 +17,7 @@ import org.elasticsearch.inference.ServiceSettings; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; -import org.elasticsearch.xpack.inference.services.elser.ElserModels; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; import org.elasticsearch.xpack.inference.services.settings.FilteredXContentObject; import org.elasticsearch.xpack.inference.services.settings.RateLimitSettings; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java index 1dd7a36315c19..23e806e01300a 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java @@ -32,7 +32,6 @@ import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfigUpdate; import org.elasticsearch.xpack.core.ml.utils.MlPlatformArchitecturesUtil; import org.elasticsearch.xpack.inference.InferencePlugin; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalModel; import java.io.IOException; import java.util.EnumSet; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java index b0c0fb0b8e7cc..e274c641e30be 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalService.java @@ -7,11 +7,14 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.common.ValidationException; +import org.elasticsearch.common.logging.DeprecationCategory; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.core.Nullable; import org.elasticsearch.core.TimeValue; import org.elasticsearch.inference.ChunkedInferenceServiceResults; @@ -55,10 +58,13 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMap; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL_LINUX_X86; public class ElasticsearchInternalService extends BaseElasticsearchInternalService { public static final String NAME = "elasticsearch"; + public static final String OLD_ELSER_SERVICE_NAME = "elser"; static final String MULTILINGUAL_E5_SMALL_MODEL_ID = ".multilingual-e5-small"; static final String MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 = ".multilingual-e5-small_linux-x86_64"; @@ -67,6 +73,9 @@ public class ElasticsearchInternalService extends BaseElasticsearchInternalServi MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86 ); + private static final Logger logger = LogManager.getLogger(ElasticsearchInternalService.class); + private static final DeprecationLogger DEPRECATION_LOGGER = DeprecationLogger.getLogger(ElasticsearchInternalService.class); + public ElasticsearchInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { super(context); } @@ -94,19 +103,41 @@ public void parseRequestConfig( try { Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); Map taskSettingsMap = removeFromMap(config, ModelConfigurations.TASK_SETTINGS); + String serviceName = (String) config.remove(ModelConfigurations.SERVICE); // required for elser service in elasticsearch service throwIfNotEmptyMap(config, name()); String modelId = (String) serviceSettingsMap.get(ElasticsearchInternalServiceSettings.MODEL_ID); if (modelId == null) { - throw new ValidationException().addValidationError("Error parsing request config, model id is missing"); - } - if (MULTILINGUAL_E5_SMALL_VALID_IDS.contains(modelId)) { + if (OLD_ELSER_SERVICE_NAME.equals(serviceName)) { + // TODO complete deprecation of null model ID + // throw new ValidationException().addValidationError("Error parsing request config, model id is missing"); + DEPRECATION_LOGGER.critical( + DeprecationCategory.API, + "inference_api_null_model_id_in_elasticsearch_service", + "Putting elasticsearch service inference endpoints (including elser service) without a model_id field is" + + " deprecated and will be removed in a future release. Please specify a model_id field." + ); + platformArch.accept( + modelListener.delegateFailureAndWrap( + (delegate, arch) -> elserCase(inferenceEntityId, taskType, config, arch, serviceSettingsMap, modelListener) + ) + ); + } else { + throw new IllegalArgumentException("Error parsing service settings, model_id must be provided"); + } + } else if (MULTILINGUAL_E5_SMALL_VALID_IDS.contains(modelId)) { platformArch.accept( modelListener.delegateFailureAndWrap( (delegate, arch) -> e5Case(inferenceEntityId, taskType, config, arch, serviceSettingsMap, modelListener) ) ); + } else if (ElserModels.isValidModel(modelId)) { + platformArch.accept( + modelListener.delegateFailureAndWrap( + (delegate, arch) -> elserCase(inferenceEntityId, taskType, config, arch, serviceSettingsMap, modelListener) + ) + ); } else { customElandCase(inferenceEntityId, taskType, serviceSettingsMap, taskSettingsMap, modelListener); } @@ -239,7 +270,86 @@ static boolean modelVariantValidForArchitecture(Set platformArchitecture // platform agnostic model is always compatible return true; } + return modelId.equals( + selectDefaultModelVariantBasedOnClusterArchitecture( + platformArchitectures, + MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86, + MULTILINGUAL_E5_SMALL_MODEL_ID + ) + ); + } + private void elserCase( + String inferenceEntityId, + TaskType taskType, + Map config, + Set platformArchitectures, + Map serviceSettingsMap, + ActionListener modelListener + ) { + var esServiceSettingsBuilder = ElasticsearchInternalServiceSettings.fromRequestMap(serviceSettingsMap); + final String defaultModelId = selectDefaultModelVariantBasedOnClusterArchitecture( + platformArchitectures, + ELSER_V2_MODEL_LINUX_X86, + ELSER_V2_MODEL + ); + if (false == defaultModelId.equals(esServiceSettingsBuilder.getModelId())) { + + if (esServiceSettingsBuilder.getModelId() == null) { + // TODO remove this case once we remove the option to not pass model ID + esServiceSettingsBuilder.setModelId(defaultModelId); + } else if (esServiceSettingsBuilder.getModelId().equals(ELSER_V2_MODEL)) { + logger.warn( + "The platform agnostic model [{}] was requested on Linux x86_64. " + + "It is recommended to use the optimized model instead [{}]", + ELSER_V2_MODEL, + ELSER_V2_MODEL_LINUX_X86 + ); + } else { + throw new IllegalArgumentException( + "Error parsing request config, model id does not match any models available on this platform. Was [" + + esServiceSettingsBuilder.getModelId() + + "]. You may need to use a platform agnostic model." + ); + } + } + + DEPRECATION_LOGGER.warn( + DeprecationCategory.API, + "inference_api_elser_service", + "The [{}] service is deprecated and will be removed in a future release. Use the [{}] service instead, with" + + " [model_id] set to [{}] in the [service_settings]", + OLD_ELSER_SERVICE_NAME, + ElasticsearchInternalService.NAME, + defaultModelId + ); + + if (modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic(platformArchitectures, esServiceSettingsBuilder.getModelId())) { + throw new IllegalArgumentException( + "Error parsing request config, model id does not match any models available on this platform. Was [" + + esServiceSettingsBuilder.getModelId() + + "]" + ); + } + + throwIfNotEmptyMap(config, name()); + throwIfNotEmptyMap(serviceSettingsMap, name()); + + modelListener.onResponse( + new ElserInternalModel( + inferenceEntityId, + taskType, + NAME, + new ElserInternalServiceSettings(esServiceSettingsBuilder.build()), + ElserMlNodeTaskSettings.DEFAULT + ) + ); + } + + private static boolean modelVariantDoesNotMatchArchitecturesAndIsNotPlatformAgnostic( + Set platformArchitectures, + String modelId + ) { return modelId.equals( selectDefaultModelVariantBasedOnClusterArchitecture( platformArchitectures, @@ -276,6 +386,14 @@ public Model parsePersistedConfig(String inferenceEntityId, TaskType taskType, M NAME, new MultilingualE5SmallInternalServiceSettings(ElasticsearchInternalServiceSettings.fromPersistedMap(serviceSettingsMap)) ); + } else if (ElserModels.isValidModel(modelId)) { + return new ElserInternalModel( + inferenceEntityId, + taskType, + NAME, + new ElserInternalServiceSettings(ElasticsearchInternalServiceSettings.fromPersistedMap(serviceSettingsMap)), + ElserMlNodeTaskSettings.DEFAULT + ); } else { return createCustomElandModel( inferenceEntityId, diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java index 1acf19c5373b7..f8b5837ef387e 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettings.java @@ -83,7 +83,7 @@ protected static ElasticsearchInternalServiceSettings.Builder fromMap( validationException ); - // model id is optional as the ELSER and E5 service will default it + // model id is optional as the ELSER service will default it. TODO make this a required field once the elser service is removed String modelId = extractOptionalString(map, MODEL_ID, ModelConfigurations.SERVICE_SETTINGS, validationException); if (numAllocations == null && adaptiveAllocationsSettings == null) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java similarity index 93% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java index bb668c314649d..827eb178f7633 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalModel.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalModel.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; @@ -13,7 +13,6 @@ import org.elasticsearch.inference.TaskType; import org.elasticsearch.xpack.core.ml.action.CreateTrainedModelAssignmentAction; import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; -import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalModel; public class ElserInternalModel extends ElasticsearchInternalModel { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java similarity index 89% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java index fcbabd5a88fc6..f7bcd95c8bd28 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettings.java @@ -5,14 +5,13 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; -import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings; import java.io.IOException; import java.util.Arrays; @@ -22,7 +21,7 @@ public class ElserInternalServiceSettings extends ElasticsearchInternalServiceSe public static final String NAME = "elser_mlnode_service_settings"; - public static ElasticsearchInternalServiceSettings.Builder fromRequestMap(Map map) { + public static Builder fromRequestMap(Map map) { ValidationException validationException = new ValidationException(); var baseSettings = ElasticsearchInternalServiceSettings.fromMap(map, validationException); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettings.java similarity index 96% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettings.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettings.java index 9b9f6e41113e5..934edaa96a15c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettings.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import org.elasticsearch.TransportVersion; import org.elasticsearch.TransportVersions; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserModels.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModels.java similarity index 87% rename from x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserModels.java rename to x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModels.java index af94d2813dd2c..37f528ea3a750 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserModels.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModels.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import java.util.Set; @@ -23,7 +23,7 @@ public class ElserModels { ); public static boolean isValidModel(String model) { - return VALID_ELSER_MODEL_IDS.contains(model); + return model != null && VALID_ELSER_MODEL_IDS.contains(model); } public static boolean isValidEisModel(String model) { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java deleted file mode 100644 index d36b8eca7661e..0000000000000 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalService.java +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - * - * this file has been contributed to by a Generative AI - */ - -package org.elasticsearch.xpack.inference.services.elser; - -import org.elasticsearch.ElasticsearchStatusException; -import org.elasticsearch.TransportVersion; -import org.elasticsearch.TransportVersions; -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.TimeValue; -import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; -import org.elasticsearch.inference.InferenceResults; -import org.elasticsearch.inference.InferenceServiceExtension; -import org.elasticsearch.inference.InferenceServiceResults; -import org.elasticsearch.inference.InputType; -import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; -import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; -import org.elasticsearch.xpack.core.inference.results.SparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; -import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; -import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TextExpansionConfigUpdate; -import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; -import org.elasticsearch.xpack.inference.services.ServiceUtils; -import org.elasticsearch.xpack.inference.services.elasticsearch.BaseElasticsearchInternalService; - -import java.util.ArrayList; -import java.util.EnumSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.function.Consumer; - -import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrThrowIfNull; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.throwIfNotEmptyMap; -import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL; -import static org.elasticsearch.xpack.inference.services.elser.ElserModels.ELSER_V2_MODEL_LINUX_X86; - -public class ElserInternalService extends BaseElasticsearchInternalService { - - public static final String NAME = "elser"; - - private static final String OLD_MODEL_ID_FIELD_NAME = "model_version"; - - public ElserInternalService(InferenceServiceExtension.InferenceServiceFactoryContext context) { - super(context); - } - - // for testing - ElserInternalService( - InferenceServiceExtension.InferenceServiceFactoryContext context, - Consumer>> platformArch - ) { - super(context, platformArch); - } - - @Override - protected EnumSet supportedTaskTypes() { - return EnumSet.of(TaskType.SPARSE_EMBEDDING); - } - - @Override - public void parseRequestConfig( - String inferenceEntityId, - TaskType taskType, - Map config, - ActionListener parsedModelListener - ) { - try { - Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - var serviceSettingsBuilder = ElserInternalServiceSettings.fromRequestMap(serviceSettingsMap); - - Map taskSettingsMap; - // task settings are optional - if (config.containsKey(ModelConfigurations.TASK_SETTINGS)) { - taskSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.TASK_SETTINGS); - } else { - taskSettingsMap = Map.of(); - } - - var taskSettings = taskSettingsFromMap(taskType, taskSettingsMap); - - throwIfNotEmptyMap(config, NAME); - throwIfNotEmptyMap(serviceSettingsMap, NAME); - throwIfNotEmptyMap(taskSettingsMap, NAME); - - if (serviceSettingsBuilder.getModelId() == null) { - platformArch.accept(parsedModelListener.delegateFailureAndWrap((delegate, arch) -> { - serviceSettingsBuilder.setModelId( - selectDefaultModelVariantBasedOnClusterArchitecture(arch, ELSER_V2_MODEL_LINUX_X86, ELSER_V2_MODEL) - ); - parsedModelListener.onResponse( - new ElserInternalModel( - inferenceEntityId, - taskType, - NAME, - new ElserInternalServiceSettings(serviceSettingsBuilder.build()), - taskSettings - ) - ); - })); - } else { - parsedModelListener.onResponse( - new ElserInternalModel( - inferenceEntityId, - taskType, - NAME, - new ElserInternalServiceSettings(serviceSettingsBuilder.build()), - taskSettings - ) - ); - } - } catch (Exception e) { - parsedModelListener.onFailure(e); - } - } - - @Override - public ElserInternalModel parsePersistedConfigWithSecrets( - String inferenceEntityId, - TaskType taskType, - Map config, - Map secrets - ) { - return parsePersistedConfig(inferenceEntityId, taskType, config); - } - - @Override - public ElserInternalModel parsePersistedConfig(String inferenceEntityId, TaskType taskType, Map config) { - Map serviceSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.SERVICE_SETTINGS); - - // Change from old model_version field name to new model_id field name as of - // TransportVersions.ML_TEXT_EMBEDDING_INFERENCE_SERVICE_ADDED - if (serviceSettingsMap.containsKey(OLD_MODEL_ID_FIELD_NAME)) { - String modelId = ServiceUtils.removeAsType(serviceSettingsMap, OLD_MODEL_ID_FIELD_NAME, String.class); - serviceSettingsMap.put(ElserInternalServiceSettings.MODEL_ID, modelId); - } - - var serviceSettings = ElserInternalServiceSettings.fromPersistedMap(serviceSettingsMap); - - Map taskSettingsMap; - // task settings are optional - if (config.containsKey(ModelConfigurations.TASK_SETTINGS)) { - taskSettingsMap = removeFromMapOrThrowIfNull(config, ModelConfigurations.TASK_SETTINGS); - } else { - taskSettingsMap = Map.of(); - } - - var taskSettings = taskSettingsFromMap(taskType, taskSettingsMap); - - return new ElserInternalModel(inferenceEntityId, taskType, NAME, new ElserInternalServiceSettings(serviceSettings), taskSettings); - } - - @Override - public void infer( - Model model, - @Nullable String query, - List inputs, - boolean stream, - Map taskSettings, - InputType inputType, - TimeValue timeout, - ActionListener listener - ) { - // No task settings to override with requestTaskSettings - - try { - checkCompatibleTaskType(model.getConfigurations().getTaskType()); - } catch (Exception e) { - listener.onFailure(e); - return; - } - - var request = buildInferenceRequest( - model.getConfigurations().getInferenceEntityId(), - TextExpansionConfigUpdate.EMPTY_UPDATE, - inputs, - inputType, - timeout, - false // chunk - ); - - client.execute( - InferModelAction.INSTANCE, - request, - listener.delegateFailureAndWrap( - (l, inferenceResult) -> l.onResponse(SparseEmbeddingResults.of(inferenceResult.getInferenceResults())) - ) - ); - } - - public void chunkedInfer( - Model model, - List input, - Map taskSettings, - InputType inputType, - @Nullable ChunkingOptions chunkingOptions, - TimeValue timeout, - ActionListener> listener - ) { - chunkedInfer(model, null, input, taskSettings, inputType, chunkingOptions, timeout, listener); - } - - @Override - public void chunkedInfer( - Model model, - @Nullable String query, - List inputs, - Map taskSettings, - InputType inputType, - @Nullable ChunkingOptions chunkingOptions, - TimeValue timeout, - ActionListener> listener - ) { - try { - checkCompatibleTaskType(model.getConfigurations().getTaskType()); - } catch (Exception e) { - listener.onFailure(e); - return; - } - - var configUpdate = chunkingOptions != null - ? new TokenizationConfigUpdate(chunkingOptions.windowSize(), chunkingOptions.span()) - : new TokenizationConfigUpdate(null, null); - - var request = buildInferenceRequest( - model.getConfigurations().getInferenceEntityId(), - configUpdate, - inputs, - inputType, - timeout, - true // chunk - ); - - client.execute( - InferModelAction.INSTANCE, - request, - listener.delegateFailureAndWrap( - (l, inferenceResult) -> l.onResponse(translateChunkedResults(inferenceResult.getInferenceResults())) - ) - ); - } - - private void checkCompatibleTaskType(TaskType taskType) { - if (TaskType.SPARSE_EMBEDDING.isAnyOrSame(taskType) == false) { - throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); - } - } - - private static ElserMlNodeTaskSettings taskSettingsFromMap(TaskType taskType, Map config) { - if (taskType != TaskType.SPARSE_EMBEDDING) { - throw new ElasticsearchStatusException(TaskType.unsupportedTaskTypeErrorMsg(taskType, NAME), RestStatus.BAD_REQUEST); - } - - // no config options yet - return ElserMlNodeTaskSettings.DEFAULT; - } - - private List translateChunkedResults(List inferenceResults) { - var translated = new ArrayList(); - - for (var inferenceResult : inferenceResults) { - if (inferenceResult instanceof MlChunkedTextExpansionResults mlChunkedResult) { - translated.add(InferenceChunkedSparseEmbeddingResults.ofMlResult(mlChunkedResult)); - } else if (inferenceResult instanceof ErrorInferenceResults error) { - translated.add(new ErrorChunkedInferenceResults(error.getException())); - } else { - throw new ElasticsearchStatusException( - "Expected a chunked inference [{}] received [{}]", - RestStatus.INTERNAL_SERVER_ERROR, - MlChunkedTextExpansionResults.NAME, - inferenceResult.getWriteableName() - ); - } - } - return translated; - } - - @Override - public String name() { - return NAME; - } - - @Override - public TransportVersion getMinimalSupportedVersion() { - return TransportVersions.V_8_12_0; - } -} diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java index bba8721c48c88..fd5f7197475aa 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/openai/OpenAiService.java @@ -42,6 +42,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; import static org.elasticsearch.xpack.inference.services.ServiceFields.MODEL_ID; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; @@ -314,6 +315,11 @@ public TransportVersion getMinimalSupportedVersion() { return TransportVersions.ML_INFERENCE_RATE_LIMIT_SETTINGS_ADDED; } + @Override + public Set supportedStreamingTasks() { + return COMPLETION_ONLY; + } + /** * Model was originally defined in task settings, but it should * have been part of the service settings. diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/ModelConfigurationsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/ModelConfigurationsTests.java index 5a1922fd200f5..03613901c7816 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/ModelConfigurationsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/ModelConfigurationsTests.java @@ -16,8 +16,8 @@ import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.xpack.core.inference.ChunkingSettingsFeatureFlag; import org.elasticsearch.xpack.inference.chunking.ChunkingSettingsTests; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceSettingsTests; -import org.elasticsearch.xpack.inference.services.elser.ElserMlNodeTaskSettings; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserInternalServiceSettingsTests; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserMlNodeTaskSettings; public class ModelConfigurationsTests extends AbstractWireSerializingTestCase { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/common/DelegatingProcessorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/common/DelegatingProcessorTests.java index 826eaf6cd6860..e86b909af91aa 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/common/DelegatingProcessorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/common/DelegatingProcessorTests.java @@ -14,6 +14,7 @@ import java.util.concurrent.atomic.AtomicReference; import static org.hamcrest.Matchers.notNullValue; +import static org.hamcrest.Matchers.nullValue; import static org.mockito.ArgumentMatchers.any; import static org.mockito.ArgumentMatchers.anyInt; import static org.mockito.ArgumentMatchers.eq; @@ -28,17 +29,25 @@ public class DelegatingProcessorTests extends ESTestCase { public static R onNext(DelegatingProcessor processor, T item) { var response = new AtomicReference(); + var error = new AtomicReference(); processor.onSubscribe(mock()); Flow.Subscriber downstream = mock(); + doAnswer(ans -> { response.set(ans.getArgument(0)); return null; }).when(downstream).onNext(any()); + doAnswer(ans -> { + error.set(ans.getArgument(0)); + return null; + }).when(downstream).onError(any()); + processor.subscribe(downstream); processor.onNext(item); + assertThat("onError should not be called", error.get(), nullValue()); assertThat("Response from processor was null", response.get(), notNullValue()); return response.get(); } @@ -46,7 +55,8 @@ public static R onNext(DelegatingProcessor processor, T item) { public static Throwable onError(DelegatingProcessor processor, T item) { var response = new AtomicReference(); - processor.onSubscribe(mock()); + Flow.Subscription upstream = mock(); + processor.onSubscribe(upstream); Flow.Subscriber downstream = mock(); doAnswer(ans -> { @@ -57,6 +67,7 @@ public static Throwable onError(DelegatingProcessor processor, T it processor.onNext(item); assertThat("Error from processor was null", response.get(), notNullValue()); + verify(upstream, times(1)).cancel(); return response.get(); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandlerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandlerTests.java index bda709017f266..de5b2416cc766 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandlerTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiResponseHandlerTests.java @@ -42,7 +42,7 @@ public void testCheckForFailureStatusCode() { var mockRequest = RequestTests.mockRequest("id"); var httpResult = new HttpResult(httpResponse, new byte[] {}); - var handler = new OpenAiResponseHandler("", (request, result) -> null); + var handler = new OpenAiResponseHandler("", (request, result) -> null, false); // 200 ok when(statusLine.getStatusCode()).thenReturn(200); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessorTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessorTests.java index 992990a476a0c..a57e7c1b64c07 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessorTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/openai/OpenAiStreamingProcessorTests.java @@ -111,12 +111,10 @@ public void testParseErrorCallsOnError() { item.offer(new ServerSentEvent(ServerSentEventField.DATA, "this isn't json")); var exception = onError(new OpenAiStreamingProcessor(), item); - assertThat(exception, instanceOf(IOException.class)); - assertThat(exception.getMessage(), equalTo("Failed to parse event from inference provider.")); - assertThat(exception.getCause(), instanceOf(XContentParseException.class)); + assertThat(exception, instanceOf(XContentParseException.class)); } - public void testEmptyResultsRequestsMoreData() { + public void testEmptyResultsRequestsMoreData() throws Exception { var emptyDeque = new ArrayDeque(); var processor = new OpenAiStreamingProcessor(); @@ -133,7 +131,7 @@ public void testEmptyResultsRequestsMoreData() { verify(downstream, times(0)).onNext(any()); } - public void testDoneMessageIsIgnored() { + public void testDoneMessageIsIgnored() throws Exception { var item = new ArrayDeque(); item.offer(new ServerSentEvent(ServerSentEventField.DATA, "[DONE]")); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntityTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntityTests.java index 0b61bf060fc5f..9d5492f9e9516 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntityTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestEntityTests.java @@ -21,7 +21,7 @@ public class OpenAiChatCompletionRequestEntityTests extends ESTestCase { public void testXContent_WritesUserWhenDefined() throws IOException { - var entity = new OpenAiChatCompletionRequestEntity(List.of("abc"), "model", "user"); + var entity = new OpenAiChatCompletionRequestEntity(List.of("abc"), "model", "user", false); XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); entity.toXContent(builder, null); @@ -33,7 +33,7 @@ public void testXContent_WritesUserWhenDefined() throws IOException { } public void testXContent_DoesNotWriteUserWhenItIsNull() throws IOException { - var entity = new OpenAiChatCompletionRequestEntity(List.of("abc"), "model", null); + var entity = new OpenAiChatCompletionRequestEntity(List.of("abc"), "model", null, false); XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON); entity.toXContent(builder, null); @@ -44,10 +44,10 @@ public void testXContent_DoesNotWriteUserWhenItIsNull() throws IOException { } public void testXContent_ThrowsIfModelIsNull() { - assertThrows(NullPointerException.class, () -> new OpenAiChatCompletionRequestEntity(List.of("abc"), null, "user")); + assertThrows(NullPointerException.class, () -> new OpenAiChatCompletionRequestEntity(List.of("abc"), null, "user", false)); } public void testXContent_ThrowsIfMessagesAreNull() { - assertThrows(NullPointerException.class, () -> new OpenAiChatCompletionRequestEntity(null, "model", "user")); + assertThrows(NullPointerException.class, () -> new OpenAiChatCompletionRequestEntity(null, "model", "user", false)); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestTests.java index b71508021eddd..b6ebfd02941f3 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/external/request/openai/OpenAiChatCompletionRequestTests.java @@ -87,6 +87,17 @@ public void testCreateRequest_WithDefaultUrlAndWithoutUserOrganization() throws assertThat(requestMap.get("n"), is(1)); } + public void testCreateRequest_WithStreaming() throws URISyntaxException, IOException { + var request = createRequest(null, null, "secret", "abc", "model", null, true); + var httpRequest = request.createHttpRequest(); + + assertThat(httpRequest.httpRequestBase(), instanceOf(HttpPost.class)); + var httpPost = (HttpPost) httpRequest.httpRequestBase(); + + var requestMap = entityAsMap(httpPost.getEntity().getContent()); + assertThat(requestMap.get("stream"), is(true)); + } + public void testTruncate_DoesNotReduceInputTextSize() throws URISyntaxException, IOException { var request = createRequest(null, null, "secret", "abcd", "model", null); var truncatedRequest = request.truncate(); @@ -117,9 +128,21 @@ public static OpenAiChatCompletionRequest createRequest( String input, String model, @Nullable String user + ) { + return createRequest(url, org, apiKey, input, model, user, false); + } + + public static OpenAiChatCompletionRequest createRequest( + @Nullable String url, + @Nullable String org, + String apiKey, + String input, + String model, + @Nullable String user, + boolean stream ) { var chatCompletionModel = OpenAiChatCompletionModelTests.createChatCompletionModel(url, org, apiKey, model, user); - return new OpenAiChatCompletionRequest(List.of(input), chatCompletionModel); + return new OpenAiChatCompletionRequest(List.of(input), chatCompletionModel, stream); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java new file mode 100644 index 0000000000000..f23ea2aa414b2 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/InferenceEventsAssertion.java @@ -0,0 +1,186 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services; + +import org.elasticsearch.ElasticsearchStatusException; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.xcontent.ChunkedToXContent; +import org.elasticsearch.common.xcontent.XContentHelper; +import org.elasticsearch.inference.InferenceServiceResults; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentFactory; +import org.hamcrest.MatcherAssert; +import org.hamcrest.Matchers; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Flow; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; + +import static org.elasticsearch.xcontent.ToXContent.EMPTY_PARAMS; +import static org.hamcrest.CoreMatchers.is; + +/** + * Helper to chain together assertions for streaming {@link InferenceServiceResults}. + */ +public record InferenceEventsAssertion(Iterator events, Throwable error, boolean isComplete, int iterations) { + + public static InferenceEventsAssertion assertThat(InferenceServiceResults results) throws Exception { + return TestSubscriber.subscribeAndWait(results.publisher()).toAssertion(); + } + + public InferenceEventsAssertion hasFinishedStream() { + MatcherAssert.assertThat( + "Expected publisher to eventually call onComplete, but it stopped after [" + iterations + "] iterations.", + isComplete + ); + return this; + } + + public InferenceEventsAssertion hasNoErrors() { + MatcherAssert.assertThat("Expected no errors from stream.", error, Matchers.nullValue()); + return this; + } + + public InferenceEventsAssertion hasError() { + MatcherAssert.assertThat("Expected error from stream.", error, Matchers.notNullValue()); + return this; + } + + public InferenceEventsAssertion hasErrorWithStatusCode(int statusCode) { + hasError(); + Throwable t = error; + while (t != null) { + if (t instanceof ElasticsearchStatusException statusException) { + MatcherAssert.assertThat(statusException.status().getStatus(), Matchers.equalTo(statusCode)); + return this; + } + t = t.getCause(); + } + ESTestCase.fail(error, "Expected an underlying ElasticsearchStatusException."); + return this; + } + + public InferenceEventsAssertion hasErrorContaining(String message) { + hasError(); + Throwable t = error; + while (t != null) { + if (t.getMessage() != null && t.getMessage().contains(message)) { + return this; + } + t = t.getCause(); + } + ESTestCase.fail(error, "Expected exception to contain string: " + message); + return this; + } + + public InferenceEventsAssertion hasEvents(String... events) { + Arrays.stream(events).forEach(this::hasEvent); + return this; + } + + public InferenceEventsAssertion hasNoEvents() { + MatcherAssert.assertThat("Expected no items processed by the subscriber.", iterations, Matchers.is(0)); + return this; + } + + public InferenceEventsAssertion hasEvent(String event) { + MatcherAssert.assertThat( + "Subscriber returned [" + iterations + "] results, but we expect at least one more.", + this.events.hasNext(), + Matchers.is(true) + ); + MatcherAssert.assertThat(this.events.next(), Matchers.equalTo(event)); + return this; + } + + private static class TestSubscriber implements Flow.Subscriber { + private final CountDownLatch latch = new CountDownLatch(1); + private final AtomicInteger infiniteLoopCheck = new AtomicInteger(0); + private final Stream.Builder events = Stream.builder(); + private Throwable error; + private boolean isComplete; + private Flow.Subscription subscription; + + private static TestSubscriber subscribeAndWait(Flow.Publisher publisher) throws Exception { + var testSubscriber = new TestSubscriber(); + publisher.subscribe(testSubscriber); + // the subscriber will initiate response handling on another thread, so we need to wait for that thread to finish + try { + MatcherAssert.assertThat( + "Timed out waiting for publisher or mock web server to finish. Collected [" + + testSubscriber.infiniteLoopCheck.get() + + "] items.", + testSubscriber.latch.await(10, TimeUnit.SECONDS), + is(true) + ); + } catch (Exception e) { + // the test is about to fail, but stop the mock server from responding anyway + testSubscriber.subscription.cancel(); + throw e; + } + return testSubscriber; + } + + @Override + public void onSubscribe(Flow.Subscription subscription) { + this.subscription = subscription; + subscription.request(1); + } + + @Override + public void onNext(T item) { + if (infiniteLoopCheck.incrementAndGet() > 10) { + subscription.cancel(); + latch.countDown(); + return; + } + + try { + events.add(toJsonString(item)); + subscription.request(1); + } catch (IOException e) { + onError(e); + } + } + + @Override + public void onError(Throwable throwable) { + error = throwable; + isComplete = true; + latch.countDown(); + } + + @Override + public void onComplete() { + isComplete = true; + latch.countDown(); + } + + private String toJsonString(ChunkedToXContent chunkedToXContent) throws IOException { + try (var builder = XContentFactory.jsonBuilder()) { + chunkedToXContent.toXContentChunked(EMPTY_PARAMS).forEachRemaining(xContent -> { + try { + xContent.toXContent(builder, EMPTY_PARAMS); + } catch (IOException e) { + throw new IllegalStateException(e); + } + }); + return XContentHelper.convertToJson(BytesReference.bytes(builder), false, builder.contentType()); + } + } + + private InferenceEventsAssertion toAssertion() { + return new InferenceEventsAssertion(events.build().iterator(), error, isComplete, infiniteLoopCheck.get()); + } + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModelTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModelTests.java index af13ce7944685..c9f4234331221 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModelTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsModelTests.java @@ -11,7 +11,7 @@ import org.elasticsearch.inference.EmptyTaskSettings; import org.elasticsearch.inference.TaskType; import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.xpack.inference.services.elser.ElserModels; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; public class ElasticInferenceServiceSparseEmbeddingsModelTests extends ESTestCase { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettingsTests.java index a2b36cf9abdd5..1751e1c3be5e8 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceSparseEmbeddingsServiceSettingsTests.java @@ -16,13 +16,13 @@ import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xpack.inference.services.ConfigurationParseContext; import org.elasticsearch.xpack.inference.services.ServiceFields; -import org.elasticsearch.xpack.inference.services.elser.ElserModels; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; import java.io.IOException; import java.util.HashMap; import java.util.Map; -import static org.elasticsearch.xpack.inference.services.elser.ElserModelsTests.randomElserModel; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModelsTests.randomElserModel; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index ab85e112418f5..d10c70c6f0f5e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -36,7 +36,7 @@ import org.elasticsearch.xpack.inference.logging.ThrottlerManager; import org.elasticsearch.xpack.inference.results.SparseEmbeddingResultsTests; import org.elasticsearch.xpack.inference.services.ServiceFields; -import org.elasticsearch.xpack.inference.services.elser.ElserModels; +import org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels; import org.hamcrest.MatcherAssert; import org.hamcrest.Matchers; import org.junit.After; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java index 41afef88d22c6..419db748d793d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceSettingsTests.java @@ -11,7 +11,6 @@ import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractWireSerializingTestCase; import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings; -import org.elasticsearch.xpack.inference.services.elser.ElserInternalServiceSettings; import java.io.IOException; import java.util.HashMap; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index de9298f1b08dd..cd6da4c0ad8d8 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -9,10 +9,12 @@ package org.elasticsearch.xpack.inference.services.elasticsearch; +import org.apache.logging.log4j.Level; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.support.PlainActionFuture; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper; @@ -69,6 +71,8 @@ import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID; import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.MULTILINGUAL_E5_SMALL_MODEL_ID_LINUX_X86; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.NAME; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalService.OLD_ELSER_SERVICE_NAME; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; @@ -97,9 +101,11 @@ public void shutdownThreadPool() { } public void testParseRequestConfig() { + // Null model variant var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, ElasticsearchInternalService.NAME); + config.put( ModelConfigurations.SERVICE_SETTINGS, new HashMap<>( Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) @@ -112,15 +118,16 @@ public void testParseRequestConfig() { ); var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, modelListener); + service.parseRequestConfig(randomInferenceEntityId, taskType, config, modelListener); } public void testParseRequestConfig_Misconfigured() { - // Null model variant + // Non-existent model variant { var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, ElasticsearchInternalService.NAME); + config.put( ModelConfigurations.SERVICE_SETTINGS, new HashMap<>( Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) @@ -133,20 +140,21 @@ public void testParseRequestConfig_Misconfigured() { ); var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, modelListener); + service.parseRequestConfig(randomInferenceEntityId, taskType, config, modelListener); } // Invalid config map { var service = createService(mock(Client.class)); - var settings = new HashMap(); - settings.put( + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, ElasticsearchInternalService.NAME); + config.put( ModelConfigurations.SERVICE_SETTINGS, new HashMap<>( Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) ) ); - settings.put("not_a_valid_config_setting", randomAlphaOfLength(10)); + config.put("not_a_valid_config_setting", randomAlphaOfLength(10)); ActionListener modelListener = ActionListener.wrap( model -> fail("Model parsing should have failed"), @@ -154,7 +162,7 @@ public void testParseRequestConfig_Misconfigured() { ); var taskType = randomFrom(TaskType.TEXT_EMBEDDING, TaskType.RERANK, TaskType.SPARSE_EMBEDDING); - service.parseRequestConfig(randomInferenceEntityId, taskType, settings, modelListener); + service.parseRequestConfig(randomInferenceEntityId, taskType, config, modelListener); } } @@ -182,7 +190,7 @@ public void testParseRequestConfig_E5() { randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, - getModelVerificationActionListener(e5ServiceSettings) + getE5ModelVerificationActionListener(e5ServiceSettings) ); } @@ -214,7 +222,7 @@ public void testParseRequestConfig_E5() { randomInferenceEntityId, TaskType.TEXT_EMBEDDING, settings, - getModelVerificationActionListener(e5ServiceSettings) + getE5ModelVerificationActionListener(e5ServiceSettings) ); } @@ -247,6 +255,106 @@ public void testParseRequestConfig_E5() { } } + public void testParseRequestConfig_elser() { + // General happy case + { + Client mockClient = mock(Client.class); + when(mockClient.threadPool()).thenReturn(threadPool); + var service = createService(mockClient); + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, OLD_ELSER_SERVICE_NAME); + config.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + ElserModels.ELSER_V2_MODEL + ) + ) + ); + + var elserServiceSettings = new ElserInternalServiceSettings(1, 4, ElserModels.ELSER_V2_MODEL, null); + + service.parseRequestConfig( + randomInferenceEntityId, + TaskType.SPARSE_EMBEDDING, + config, + getElserModelVerificationActionListener( + elserServiceSettings, + null, + "The [elser] service is deprecated and will be removed in a future release. Use the [elasticsearch] service " + + "instead, with [model_id] set to [.elser_model_2] in the [service_settings]" + ) + ); + } + + // null model ID returns elser model for the provided platform (not linux) + { + Client mockClient = mock(Client.class); + when(mockClient.threadPool()).thenReturn(threadPool); + var service = createService(mockClient); + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, OLD_ELSER_SERVICE_NAME); + config.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of(ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, 1, ElasticsearchInternalServiceSettings.NUM_THREADS, 4) + ) + ); + + var elserServiceSettings = new ElserInternalServiceSettings(1, 4, ElserModels.ELSER_V2_MODEL, null); + + String criticalWarning = + "Putting elasticsearch service inference endpoints (including elser service) without a model_id field is" + + " deprecated and will be removed in a future release. Please specify a model_id field."; + String warnWarning = + "The [elser] service is deprecated and will be removed in a future release. Use the [elasticsearch] service " + + "instead, with [model_id] set to [.elser_model_2] in the [service_settings]"; + service.parseRequestConfig( + randomInferenceEntityId, + TaskType.SPARSE_EMBEDDING, + config, + getElserModelVerificationActionListener(elserServiceSettings, criticalWarning, warnWarning) + ); + assertWarnings(true, new DeprecationWarning(DeprecationLogger.CRITICAL, criticalWarning)); + } + + // Invalid service settings + { + Client mockClient = mock(Client.class); + when(mockClient.threadPool()).thenReturn(threadPool); + var service = createService(mockClient); + var config = new HashMap(); + config.put(ModelConfigurations.SERVICE, OLD_ELSER_SERVICE_NAME); + config.put( + ModelConfigurations.SERVICE_SETTINGS, + new HashMap<>( + Map.of( + ElasticsearchInternalServiceSettings.NUM_ALLOCATIONS, + 1, + ElasticsearchInternalServiceSettings.NUM_THREADS, + 4, + ElasticsearchInternalServiceSettings.MODEL_ID, + ElserModels.ELSER_V2_MODEL, + "not_a_valid_service_setting", + randomAlphaOfLength(10) + ) + ) + ); + + ActionListener modelListener = ActionListener.wrap( + model -> fail("Model parsing should have failed"), + e -> assertThat(e, instanceOf(ElasticsearchStatusException.class)) + ); + + service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, config, modelListener); + } + } + @SuppressWarnings("unchecked") public void testParseRequestConfig_Rerank() { // with task settings @@ -374,7 +482,7 @@ public void testParseRequestConfig_SparseEmbedding() { service.parseRequestConfig(randomInferenceEntityId, TaskType.SPARSE_EMBEDDING, settings, modelListener); } - private ActionListener getModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) { + private ActionListener getE5ModelVerificationActionListener(MultilingualE5SmallInternalServiceSettings e5ServiceSettings) { return ActionListener.wrap(model -> { assertEquals( new MultilingualE5SmallModel( @@ -388,6 +496,30 @@ private ActionListener getModelVerificationActionListener(MultilingualE5S }, e -> { fail("Model parsing failed " + e.getMessage()); }); } + private ActionListener getElserModelVerificationActionListener( + ElserInternalServiceSettings elserServiceSettings, + String criticalWarning, + String warnWarning + ) { + return ActionListener.wrap(model -> { + assertWarnings( + true, + new DeprecationWarning(DeprecationLogger.CRITICAL, criticalWarning), + new DeprecationWarning(Level.WARN, warnWarning) + ); + assertEquals( + new ElserInternalModel( + randomInferenceEntityId, + TaskType.SPARSE_EMBEDDING, + NAME, + elserServiceSettings, + ElserMlNodeTaskSettings.DEFAULT + ), + model + ); + }, e -> { fail("Model parsing failed " + e.getMessage()); }); + } + public void testParsePersistedConfig() { // Null model variant diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettingsTests.java similarity index 89% rename from x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettingsTests.java index ffbdf1a5a6178..f4e97b2c2e5e0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserInternalServiceSettingsTests.java @@ -5,18 +5,16 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import org.elasticsearch.TransportVersions; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractWireSerializingTestCase; -import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettings; -import org.elasticsearch.xpack.inference.services.elasticsearch.ElasticsearchInternalServiceSettingsTests; import java.io.IOException; import java.util.HashSet; -import static org.elasticsearch.xpack.inference.services.elser.ElserModelsTests.randomElserModel; +import static org.elasticsearch.xpack.inference.services.elasticsearch.ElserModelsTests.randomElserModel; public class ElserInternalServiceSettingsTests extends AbstractWireSerializingTestCase { diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettingsTests.java similarity index 93% rename from x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettingsTests.java rename to x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettingsTests.java index d55065a5f9b27..a7de3fe8b8fdc 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserMlNodeTaskSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserMlNodeTaskSettingsTests.java @@ -5,7 +5,7 @@ * 2.0. */ -package org.elasticsearch.xpack.inference.services.elser; +package org.elasticsearch.xpack.inference.services.elasticsearch; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.test.AbstractWireSerializingTestCase; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModelsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModelsTests.java new file mode 100644 index 0000000000000..fa0148ac69df5 --- /dev/null +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElserModelsTests.java @@ -0,0 +1,39 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.inference.services.elasticsearch; + +import org.elasticsearch.test.ESTestCase; + +public class ElserModelsTests extends ESTestCase { + + public static String randomElserModel() { + return randomFrom(org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.VALID_ELSER_MODEL_IDS); + } + + public void testIsValidModel() { + assertTrue(org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.isValidModel(randomElserModel())); + } + + public void testIsValidEisModel() { + assertTrue( + org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.isValidEisModel( + org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.ELSER_V2_MODEL + ) + ); + } + + public void testIsInvalidModel() { + assertFalse(org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.isValidModel("invalid")); + } + + public void testIsInvalidEisModel() { + assertFalse( + org.elasticsearch.xpack.inference.services.elasticsearch.ElserModels.isValidEisModel(ElserModels.ELSER_V2_MODEL_LINUX_X86) + ); + } +} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceTests.java deleted file mode 100644 index 09abeb9b9b389..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserInternalServiceTests.java +++ /dev/null @@ -1,548 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - * - * this file was contributed to by a generative AI - */ - -package org.elasticsearch.xpack.inference.services.elser; - -import org.elasticsearch.action.ActionListener; -import org.elasticsearch.client.internal.Client; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.inference.ChunkedInferenceServiceResults; -import org.elasticsearch.inference.ChunkingOptions; -import org.elasticsearch.inference.InferenceResults; -import org.elasticsearch.inference.InferenceServiceExtension; -import org.elasticsearch.inference.InputType; -import org.elasticsearch.inference.Model; -import org.elasticsearch.inference.ModelConfigurations; -import org.elasticsearch.inference.TaskType; -import org.elasticsearch.test.ESTestCase; -import org.elasticsearch.threadpool.TestThreadPool; -import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.xpack.core.inference.action.InferenceAction; -import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; -import org.elasticsearch.xpack.core.inference.results.InferenceChunkedSparseEmbeddingResults; -import org.elasticsearch.xpack.core.ml.action.InferModelAction; -import org.elasticsearch.xpack.core.ml.action.InferTrainedModelDeploymentAction; -import org.elasticsearch.xpack.core.ml.action.PutTrainedModelAction; -import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig; -import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; -import org.elasticsearch.xpack.core.ml.inference.results.InferenceChunkedTextExpansionResultsTests; -import org.elasticsearch.xpack.core.ml.inference.results.MlChunkedTextExpansionResults; -import org.elasticsearch.xpack.core.ml.inference.trainedmodel.TokenizationConfigUpdate; -import org.elasticsearch.xpack.inference.InferencePlugin; -import org.junit.After; -import org.junit.Before; -import org.mockito.ArgumentCaptor; -import org.mockito.Mockito; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; - -import static org.hamcrest.Matchers.containsString; -import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.instanceOf; -import static org.mockito.ArgumentMatchers.any; -import static org.mockito.ArgumentMatchers.same; -import static org.mockito.Mockito.doAnswer; -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -public class ElserInternalServiceTests extends ESTestCase { - - private static ThreadPool threadPool; - - @Before - public void setUpThreadPool() { - threadPool = createThreadPool(InferencePlugin.inferenceUtilityExecutor(Settings.EMPTY)); - } - - @After - public void shutdownThreadPool() { - TestThreadPool.terminate(threadPool, 30, TimeUnit.SECONDS); - } - - public static Model randomModelConfig(String inferenceEntityId, TaskType taskType) { - return switch (taskType) { - case SPARSE_EMBEDDING -> new ElserInternalModel( - inferenceEntityId, - taskType, - ElserInternalService.NAME, - ElserInternalServiceSettingsTests.createRandom(), - ElserMlNodeTaskSettingsTests.createRandom() - ); - default -> throw new IllegalArgumentException("task type " + taskType + " is not supported"); - }; - } - - public void testParseConfigStrict() { - var service = createService(mock(Client.class)); - - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - "model_id", - ".elser_model_1" - ) - ) - ); - settings.put(ModelConfigurations.TASK_SETTINGS, Map.of()); - - var expectedModel = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - ElserInternalService.NAME, - new ElserInternalServiceSettings(1, 4, ".elser_model_1", null), - ElserMlNodeTaskSettings.DEFAULT - ); - - var modelVerificationListener = getModelVerificationListener(expectedModel); - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, modelVerificationListener); - - } - - public void testParseConfigWithoutModelId() { - Client mockClient = mock(Client.class); - when(mockClient.threadPool()).thenReturn(threadPool); - var service = createService(mockClient); - - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1, ElserInternalServiceSettings.NUM_THREADS, 4)) - ); - - var expectedModel = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - ElserInternalService.NAME, - new ElserInternalServiceSettings(1, 4, ".elser_model_2", null), - ElserMlNodeTaskSettings.DEFAULT - ); - - var modelVerificationListener = getModelVerificationListener(expectedModel); - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, modelVerificationListener); - - } - - public void testParseConfigLooseWithOldModelId() { - var service = createService(mock(Client.class)); - - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - "model_version", - ".elser_model_1" - ) - ) - ); - settings.put(ModelConfigurations.TASK_SETTINGS, Map.of()); - - var expectedModel = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - ElserInternalService.NAME, - new ElserInternalServiceSettings(1, 4, ".elser_model_1", null), - ElserMlNodeTaskSettings.DEFAULT - ); - - var realModel = service.parsePersistedConfig("foo", TaskType.SPARSE_EMBEDDING, settings); - - assertEquals(expectedModel, realModel); - - } - - private static ActionListener getModelVerificationListener(ElserInternalModel expectedModel) { - return ActionListener.wrap( - (model) -> { assertEquals(expectedModel, model); }, - (e) -> fail("Model verification should not fail " + e.getMessage()) - ); - } - - public void testParseConfigStrictWithNoTaskSettings() { - var service = createService(mock(Client.class), Set.of("Aarch64")); - - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1, ElserInternalServiceSettings.NUM_THREADS, 4)) - ); - - var expectedModel = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - ElserInternalService.NAME, - new ElserInternalServiceSettings(1, 4, ElserModels.ELSER_V2_MODEL, null), - ElserMlNodeTaskSettings.DEFAULT - ); - - var modelVerificationListener = getModelVerificationListener(expectedModel); - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, modelVerificationListener); - } - - public void testParseConfigStrictWithUnknownSettings() { - - var service = createService(mock(Client.class)); - - for (boolean throwOnUnknown : new boolean[] { true, false }) { - { - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - ElserInternalServiceSettings.MODEL_ID, - ".elser_model_2" - ) - ) - ); - settings.put(ModelConfigurations.TASK_SETTINGS, Map.of()); - settings.put("foo", "bar"); - - ActionListener errorVerificationListener = ActionListener.wrap((model) -> { - if (throwOnUnknown) { - fail("Model verification should fail when throwOnUnknown is true"); - } - }, (e) -> { - if (throwOnUnknown) { - assertThat( - e.getMessage(), - containsString("Model configuration contains settings [{foo=bar}] unknown to the [elser] service") - ); - } else { - fail("Model verification should not fail when throwOnUnknown is false"); - } - }); - - if (throwOnUnknown == false) { - var parsed = service.parsePersistedConfigWithSecrets( - "foo", - TaskType.SPARSE_EMBEDDING, - settings, - Collections.emptyMap() - ); - } else { - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, errorVerificationListener); - } - } - - { - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - ElserInternalServiceSettings.MODEL_ID, - ".elser_model_2" - ) - ) - ); - settings.put(ModelConfigurations.TASK_SETTINGS, Map.of("foo", "bar")); - - ActionListener errorVerificationListener = ActionListener.wrap((model) -> { - if (throwOnUnknown) { - fail("Model verification should fail when throwOnUnknown is true"); - } - }, (e) -> { - if (throwOnUnknown) { - assertThat( - e.getMessage(), - containsString("Model configuration contains settings [{foo=bar}] unknown to the [elser] service") - ); - } else { - fail("Model verification should not fail when throwOnUnknown is false"); - } - }); - if (throwOnUnknown == false) { - var parsed = service.parsePersistedConfigWithSecrets( - "foo", - TaskType.SPARSE_EMBEDDING, - settings, - Collections.emptyMap() - ); - } else { - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, errorVerificationListener); - } - } - - { - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>( - Map.of( - ElserInternalServiceSettings.NUM_ALLOCATIONS, - 1, - ElserInternalServiceSettings.NUM_THREADS, - 4, - ElserInternalServiceSettings.MODEL_ID, - ".elser_model_2", - "foo", - "bar" - ) - ) - ); - settings.put(ModelConfigurations.TASK_SETTINGS, Map.of("foo", "bar")); - - ActionListener errorVerificationListener = ActionListener.wrap((model) -> { - if (throwOnUnknown) { - fail("Model verification should fail when throwOnUnknown is true"); - } - }, (e) -> { - if (throwOnUnknown) { - assertThat( - e.getMessage(), - containsString("Model configuration contains settings [{foo=bar}] unknown to the [elser] service") - ); - } else { - fail("Model verification should not fail when throwOnUnknown is false"); - } - }); - if (throwOnUnknown == false) { - var parsed = service.parsePersistedConfigWithSecrets( - "foo", - TaskType.SPARSE_EMBEDDING, - settings, - Collections.emptyMap() - ); - } else { - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, errorVerificationListener); - } - } - } - } - - public void testParseRequestConfig_DefaultModel() { - { - var service = createService(mock(Client.class), Set.of()); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1, ElserInternalServiceSettings.NUM_THREADS, 4)) - ); - - ActionListener modelActionListener = ActionListener.wrap((model) -> { - assertEquals(".elser_model_2", ((ElserInternalModel) model).getServiceSettings().modelId()); - }, (e) -> { fail(e, "Model verification should not fail"); }); - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, modelActionListener); - } - { - var service = createService(mock(Client.class), Set.of("linux-x86_64")); - var settings = new HashMap(); - settings.put( - ModelConfigurations.SERVICE_SETTINGS, - new HashMap<>(Map.of(ElserInternalServiceSettings.NUM_ALLOCATIONS, 1, ElserInternalServiceSettings.NUM_THREADS, 4)) - ); - - ActionListener modelActionListener = ActionListener.wrap((model) -> { - assertEquals(".elser_model_2_linux-x86_64", ((ElserInternalModel) model).getServiceSettings().modelId()); - }, (e) -> { fail(e, "Model verification should not fail"); }); - - service.parseRequestConfig("foo", TaskType.SPARSE_EMBEDDING, settings, modelActionListener); - } - } - - @SuppressWarnings("unchecked") - public void testChunkInfer() { - var mlTrainedModelResults = new ArrayList(); - mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); - mlTrainedModelResults.add(InferenceChunkedTextExpansionResultsTests.createRandomResults()); - mlTrainedModelResults.add(new ErrorInferenceResults(new RuntimeException("boom"))); - var response = new InferModelAction.Response(mlTrainedModelResults, "foo", true); - - ThreadPool threadpool = new TestThreadPool("test"); - Client client = mock(Client.class); - when(client.threadPool()).thenReturn(threadpool); - doAnswer(invocationOnMock -> { - var listener = (ActionListener) invocationOnMock.getArguments()[2]; - listener.onResponse(response); - return null; - }).when(client).execute(same(InferModelAction.INSTANCE), any(InferModelAction.Request.class), any(ActionListener.class)); - - var model = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - "elser", - new ElserInternalServiceSettings(1, 1, "elser", null), - new ElserMlNodeTaskSettings() - ); - var service = createService(client); - - var gotResults = new AtomicBoolean(); - var resultsListener = ActionListener.>wrap(chunkedResponse -> { - assertThat(chunkedResponse, hasSize(3)); - assertThat(chunkedResponse.get(0), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); - var result1 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(0); - assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(0)).getChunks(), result1.getChunkedResults()); - assertThat(chunkedResponse.get(1), instanceOf(InferenceChunkedSparseEmbeddingResults.class)); - var result2 = (InferenceChunkedSparseEmbeddingResults) chunkedResponse.get(1); - assertEquals(((MlChunkedTextExpansionResults) mlTrainedModelResults.get(1)).getChunks(), result2.getChunkedResults()); - var result3 = (ErrorChunkedInferenceResults) chunkedResponse.get(2); - assertThat(result3.getException(), instanceOf(RuntimeException.class)); - assertThat(result3.getException().getMessage(), containsString("boom")); - gotResults.set(true); - }, ESTestCase::fail); - - service.chunkedInfer( - model, - null, - List.of("foo", "bar"), - Map.of(), - InputType.SEARCH, - new ChunkingOptions(null, null), - InferenceAction.Request.DEFAULT_TIMEOUT, - ActionListener.runAfter(resultsListener, () -> terminate(threadpool)) - ); - - if (gotResults.get() == false) { - terminate(threadpool); - } - assertTrue("Listener not called", gotResults.get()); - } - - @SuppressWarnings("unchecked") - public void testChunkInferSetsTokenization() { - var expectedSpan = new AtomicInteger(); - var expectedWindowSize = new AtomicReference(); - - ThreadPool threadpool = new TestThreadPool("test"); - Client client = mock(Client.class); - try { - when(client.threadPool()).thenReturn(threadpool); - doAnswer(invocationOnMock -> { - var request = (InferTrainedModelDeploymentAction.Request) invocationOnMock.getArguments()[1]; - assertThat(request.getUpdate(), instanceOf(TokenizationConfigUpdate.class)); - var update = (TokenizationConfigUpdate) request.getUpdate(); - assertEquals(update.getSpanSettings().span(), expectedSpan.get()); - assertEquals(update.getSpanSettings().maxSequenceLength(), expectedWindowSize.get()); - return null; - }).when(client) - .execute( - same(InferTrainedModelDeploymentAction.INSTANCE), - any(InferTrainedModelDeploymentAction.Request.class), - any(ActionListener.class) - ); - - var model = new ElserInternalModel( - "foo", - TaskType.SPARSE_EMBEDDING, - "elser", - new ElserInternalServiceSettings(1, 1, "elser", null), - new ElserMlNodeTaskSettings() - ); - var service = createService(client); - - expectedSpan.set(-1); - expectedWindowSize.set(null); - service.chunkedInfer( - model, - List.of("foo", "bar"), - Map.of(), - InputType.SEARCH, - null, - InferenceAction.Request.DEFAULT_TIMEOUT, - ActionListener.wrap(r -> fail("unexpected result"), e -> fail(e.getMessage())) - ); - - expectedSpan.set(-1); - expectedWindowSize.set(256); - service.chunkedInfer( - model, - List.of("foo", "bar"), - Map.of(), - InputType.SEARCH, - new ChunkingOptions(256, null), - InferenceAction.Request.DEFAULT_TIMEOUT, - ActionListener.wrap(r -> fail("unexpected result"), e -> fail(e.getMessage())) - ); - } finally { - terminate(threadpool); - } - } - - @SuppressWarnings("unchecked") - public void testPutModel() { - var client = mock(Client.class); - ArgumentCaptor argument = ArgumentCaptor.forClass(PutTrainedModelAction.Request.class); - - doAnswer(invocation -> { - var listener = (ActionListener) invocation.getArguments()[2]; - listener.onResponse(new PutTrainedModelAction.Response(mock(TrainedModelConfig.class))); - return null; - }).when(client).execute(Mockito.same(PutTrainedModelAction.INSTANCE), argument.capture(), any()); - - when(client.threadPool()).thenReturn(threadPool); - - var service = createService(client); - - var model = new ElserInternalModel( - "my-elser", - TaskType.SPARSE_EMBEDDING, - "elser", - new ElserInternalServiceSettings(1, 1, ".elser_model_2", null), - ElserMlNodeTaskSettings.DEFAULT - ); - - service.putModel(model, new ActionListener<>() { - @Override - public void onResponse(Boolean success) { - assertTrue(success); - } - - @Override - public void onFailure(Exception e) { - fail(e); - } - }); - - var putConfig = argument.getValue().getTrainedModelConfig(); - assertEquals("text_field", putConfig.getInput().getFieldNames().get(0)); - } - - private ElserInternalService createService(Client client) { - var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client, threadPool); - return new ElserInternalService(context); - } - - private ElserInternalService createService(Client client, Set architectures) { - var context = new InferenceServiceExtension.InferenceServiceFactoryContext(client, threadPool); - return new ElserInternalService(context, (l) -> l.onResponse(architectures)); - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserModelsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserModelsTests.java deleted file mode 100644 index f56e941dcc8c0..0000000000000 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elser/ElserModelsTests.java +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0; you may not use this file except in compliance with the Elastic License - * 2.0. - */ - -package org.elasticsearch.xpack.inference.services.elser; - -import org.elasticsearch.test.ESTestCase; - -public class ElserModelsTests extends ESTestCase { - - public static String randomElserModel() { - return randomFrom(ElserModels.VALID_ELSER_MODEL_IDS); - } - - public void testIsValidModel() { - assertTrue(ElserModels.isValidModel(randomElserModel())); - } - - public void testIsValidEisModel() { - assertTrue(ElserModels.isValidEisModel(ElserModels.ELSER_V2_MODEL)); - } - - public void testIsInvalidModel() { - assertFalse(ElserModels.isValidModel("invalid")); - } - - public void testIsInvalidEisModel() { - assertFalse(ElserModels.isValidEisModel(ElserModels.ELSER_V2_MODEL_LINUX_X86)); - } -} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 508da45ac2fc2..cf1438b334478 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -38,8 +38,10 @@ import org.elasticsearch.xpack.inference.external.http.sender.HttpRequestSenderTests; import org.elasticsearch.xpack.inference.external.http.sender.Sender; import org.elasticsearch.xpack.inference.logging.ThrottlerManager; +import org.elasticsearch.xpack.inference.services.InferenceEventsAssertion; import org.elasticsearch.xpack.inference.services.ServiceFields; import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionModel; +import org.elasticsearch.xpack.inference.services.openai.completion.OpenAiChatCompletionModelTests; import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModel; import org.elasticsearch.xpack.inference.services.openai.embeddings.OpenAiEmbeddingsModelTests; import org.hamcrest.CoreMatchers; @@ -1005,6 +1007,76 @@ public void testInfer_SendsRequest() throws IOException { } } + public void testInfer_StreamRequest() throws Exception { + String responseJson = """ + data: {\ + "id":"12345",\ + "object":"chat.completion.chunk",\ + "created":123456789,\ + "model":"gpt-4o-mini",\ + "system_fingerprint": "123456789",\ + "choices":[\ + {\ + "index":0,\ + "delta":{\ + "content":"hello, world"\ + },\ + "logprobs":null,\ + "finish_reason":null\ + }\ + ]\ + } + + """; + webServer.enqueue(new MockResponse().setResponseCode(200).setBody(responseJson)); + + var result = streamChatCompletion(); + + InferenceEventsAssertion.assertThat(result).hasFinishedStream().hasNoErrors().hasEvent(""" + {"completion":[{"delta":"hello, world"}]}"""); + } + + private InferenceServiceResults streamChatCompletion() throws IOException { + var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); + try (var service = new OpenAiService(senderFactory, createWithEmptySettings(threadPool))) { + var model = OpenAiChatCompletionModelTests.createChatCompletionModel(getUrl(webServer), "org", "secret", "model", "user"); + PlainActionFuture listener = new PlainActionFuture<>(); + service.infer( + model, + null, + List.of("abc"), + true, + new HashMap<>(), + InputType.INGEST, + InferenceAction.Request.DEFAULT_TIMEOUT, + listener + ); + + return listener.actionGet(TIMEOUT); + } + } + + public void testInfer_StreamRequest_ErrorResponse() throws Exception { + String responseJson = """ + { + "error": { + "message": "You didn't provide an API key...", + "type": "invalid_request_error", + "param": null, + "code": null + } + }"""; + webServer.enqueue(new MockResponse().setResponseCode(401).setBody(responseJson)); + + var result = streamChatCompletion(); + + InferenceEventsAssertion.assertThat(result) + .hasFinishedStream() + .hasNoEvents() + .hasErrorWithStatusCode(401) + .hasErrorContaining("You didn't provide an API key..."); + } + public void testCheckModelConfig_IncludesMaxTokens() throws IOException { var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); diff --git a/x-pack/plugin/logsdb/build.gradle b/x-pack/plugin/logsdb/build.gradle index 5b7e45a90149d..466cf69243c8e 100644 --- a/x-pack/plugin/logsdb/build.gradle +++ b/x-pack/plugin/logsdb/build.gradle @@ -11,6 +11,7 @@ evaluationDependsOn(xpackModule('core')) apply plugin: 'elasticsearch.internal-es-plugin' apply plugin: 'elasticsearch.internal-java-rest-test' +apply plugin: 'elasticsearch.internal-yaml-rest-test' esplugin { name 'logsdb' @@ -30,3 +31,7 @@ dependencies { tasks.named("javaRestTest").configure { usesDefaultDistribution() } + +tasks.named('yamlRestTest') { + usesDefaultDistribution() +} diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java new file mode 100644 index 0000000000000..fcac791cb2057 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/java/org/elasticsearch/xpack/logsdb/LogsdbTestSuiteIT.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.logsdb; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.test.cluster.ElasticsearchCluster; +import org.elasticsearch.test.cluster.local.distribution.DistributionType; +import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; +import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; +import org.junit.ClassRule; + +public class LogsdbTestSuiteIT extends ESClientYamlSuiteTestCase { + + @ClassRule + public static final ElasticsearchCluster cluster = ElasticsearchCluster.local() + .distribution(DistributionType.DEFAULT) + .setting("xpack.security.enabled", "false") + .setting("xpack.license.self_generated.type", "trial") + .build(); + + public LogsdbTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return ESClientYamlSuiteTestCase.createParameters(); + } + + @Override + protected String getTestRestCluster() { + return cluster.getHttpAddresses(); + } + +} diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/10_setting.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/10_setting.yml new file mode 100644 index 0000000000000..d597859c1e4d8 --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/10_setting.yml @@ -0,0 +1,39 @@ +--- +synthetic_source_keep defaults: + - requires: + test_runner_features: [ capabilities ] + capabilities: + - method: PUT + path: /{index} + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" + + - do: + indices.create: + index: test1 + body: + settings: + index: + mode: logsdb + + - do: + indices.create: + index: test2 + + - do: + indices.get_settings: + index: test1 + include_defaults: true + + - is_true: test1 + - match: { test1.settings.index.mode: "logsdb" } + - match: { test1.defaults.index.mapping.synthetic_source_keep: "arrays" } + + - do: + indices.get_settings: + index: test2 + include_defaults: true + + - is_true: test2 + - is_false: test2.settings.index.mode + - match: { test2.defaults.index.mapping.synthetic_source_keep: "none" } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java index 464c8eac8c9dd..1b53a7642abf3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizer.java @@ -169,11 +169,6 @@ boolean isWithSpecialTokens() { return withSpecialTokens; } - @Override - int defaultSpanForChunking(int maxWindowSize) { - return (maxWindowSize - numExtraTokensForSingleSequence()) / 2; - } - @Override int getNumExtraTokensForSeqPair() { return 3; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaTokenizationResult.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaTokenizationResult.java new file mode 100644 index 0000000000000..2a50172fcc722 --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaTokenizationResult.java @@ -0,0 +1,143 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + * + * this file was contributed to by a Generative AI model + */ + +package org.elasticsearch.xpack.ml.inference.nlp.tokenizers; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentFactory; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; +import org.elasticsearch.xpack.ml.inference.nlp.NlpTask; + +import java.io.IOException; +import java.util.List; +import java.util.function.Function; +import java.util.stream.IntStream; +import java.util.stream.Stream; + +public class DebertaTokenizationResult extends TokenizationResult { + static final String REQUEST_ID = "request_id"; + static final String TOKENS = "tokens"; + static final String ARG1 = "arg_1"; + static final String ARG2 = "arg_2"; + + private static final Logger logger = LogManager.getLogger(DebertaTokenizationResult.class); + + protected DebertaTokenizationResult(List vocab, List tokenizations, int padTokenId) { + super(vocab, tokenizations, padTokenId); + } + + @Override + public NlpTask.Request buildRequest(String requestId, Tokenization.Truncate t) throws IOException { + XContentBuilder builder = XContentFactory.jsonBuilder(); + builder.startObject(); + builder.field(REQUEST_ID, requestId); + writePaddedTokens(TOKENS, builder); + writeAttentionMask(ARG1, builder); + writeTokenTypeIds(ARG2, builder); + builder.endObject(); + + // BytesReference.bytes closes the builder + BytesReference jsonRequest = BytesReference.bytes(builder); + return new NlpTask.Request(this, jsonRequest); + } + + static class DebertaTokensBuilder implements TokenizationResult.TokensBuilder { + private final int clsTokenId; + private final int sepTokenId; + private final boolean withSpecialTokens; + protected final Stream.Builder tokenIds; + protected final Stream.Builder tokenMap; + protected int seqPairOffset = 0; + + DebertaTokensBuilder(int clsTokenId, int sepTokenId, boolean withSpecialTokens) { + this.clsTokenId = clsTokenId; + this.sepTokenId = sepTokenId; + this.withSpecialTokens = withSpecialTokens; + this.tokenIds = Stream.builder(); + this.tokenMap = Stream.builder(); + } + + @Override + public TokensBuilder addSequence(List tokenIds, List tokenMap) { + // DeBERTa-v2 single sequence: [CLS] X [SEP] + if (withSpecialTokens) { + this.tokenIds.add(IntStream.of(clsTokenId)); + this.tokenMap.add(IntStream.of(SPECIAL_TOKEN_POSITION)); + } + this.tokenIds.add(tokenIds.stream().mapToInt(Integer::valueOf)); + this.tokenMap.add(tokenMap.stream().mapToInt(Integer::valueOf)); + if (withSpecialTokens) { + this.tokenIds.add(IntStream.of(sepTokenId)); + this.tokenMap.add(IntStream.of(SPECIAL_TOKEN_POSITION)); + } + return this; + } + + @Override + public TokensBuilder addSequencePair( + List tokenId1s, + List tokenMap1, + List tokenId2s, + List tokenMap2 + ) { + if (tokenId1s.isEmpty() || tokenId2s.isEmpty()) { + throw new IllegalArgumentException("Both sequences must have at least one token"); + } + + // DeBERTa-v2 pair of sequences: [CLS] A [SEP] B [SEP] + if (withSpecialTokens) { + tokenIds.add(IntStream.of(clsTokenId)); + tokenMap.add(IntStream.of(SPECIAL_TOKEN_POSITION)); + } + tokenIds.add(tokenId1s.stream().mapToInt(Integer::valueOf)); + tokenMap.add(tokenMap1.stream().mapToInt(Integer::valueOf)); + int previouslyFinalMap = tokenMap1.get(tokenMap1.size() - 1); + if (withSpecialTokens) { + tokenIds.add(IntStream.of(sepTokenId)); + tokenMap.add(IntStream.of(SPECIAL_TOKEN_POSITION)); + } + tokenIds.add(tokenId2s.stream().mapToInt(Integer::valueOf)); + tokenMap.add(tokenMap2.stream().mapToInt(i -> i + previouslyFinalMap)); + if (withSpecialTokens) { + tokenIds.add(IntStream.of(sepTokenId)); + tokenMap.add(IntStream.of(SPECIAL_TOKEN_POSITION)); + } + seqPairOffset = withSpecialTokens ? tokenId1s.size() + 2 : tokenId1s.size(); + return this; + } + + @Override + public Tokens build( + List input, + boolean truncated, + List> allTokens, + int spanPrev, + int seqId + ) { + return new Tokens( + input, + allTokens, + truncated, + tokenIds.build().flatMapToInt(Function.identity()).toArray(), + tokenMap.build().flatMapToInt(Function.identity()).toArray(), + spanPrev, + seqId, + seqPairOffset + ); + } + + @Override + public Tokens build(String input, boolean truncated, List allTokens, int spanPrev, int seqId) { + return TokensBuilder.super.build(input, truncated, allTokens, spanPrev, seqId); + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2Tokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2Tokenizer.java new file mode 100644 index 0000000000000..3f7094bcce29d --- /dev/null +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2Tokenizer.java @@ -0,0 +1,301 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + * + * This Java port DeBERTa-V2 tokenizer, was derived from + * Microsoft's DeBERTa-V2 project at https://github.com/microsoft/DeBERTa + * and + * Huggingface's DeBERTa-V2 transformers + * project at https://github.com/huggingface/transformers/blob/main/src/transformers/models/deberta_v2/tokenization_deberta_v2.py + */ + +package org.elasticsearch.xpack.ml.inference.nlp.tokenizers; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; +import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper; +import org.elasticsearch.xpack.ml.inference.nlp.NlpTask; + +import java.io.IOException; +import java.io.Reader; +import java.io.UncheckedIOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.OptionalInt; +import java.util.Set; +import java.util.SortedMap; +import java.util.TreeMap; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +public class DebertaV2Tokenizer extends NlpTokenizer { + + public static final String UNKNOWN_TOKEN = "[UNK]"; + public static final String SEPARATOR_TOKEN = "[SEP]"; + public static final String PAD_TOKEN = "[PAD]"; + public static final String CLASS_TOKEN = "[CLS]"; + public static final String MASK_TOKEN = "[MASK]"; + + private static final Set NEVER_SPLIT = Set.of(UNKNOWN_TOKEN, SEPARATOR_TOKEN, PAD_TOKEN, CLASS_TOKEN, MASK_TOKEN); + + private final DebertaAnalyzer debertaAnalyzer; + protected final List originalVocab; + private final SortedMap vocab; + protected final boolean withSpecialTokens; + protected final int sepTokenId; + private final int clsTokenId; + protected final int padTokenId; + private final int maxSequenceLength; + + protected DebertaV2Tokenizer( + List originalVocab, + SortedMap vocab, + List scores, + boolean withSpecialTokens, + int maxSequenceLength, + Set neverSplit + ) throws IOException { + this.originalVocab = originalVocab; + this.debertaAnalyzer = new DebertaAnalyzer( + originalVocab, + scores, + new ArrayList<>(Sets.union(NEVER_SPLIT, neverSplit)), + UNKNOWN_TOKEN + ); + this.vocab = vocab; + this.withSpecialTokens = withSpecialTokens; + this.maxSequenceLength = maxSequenceLength; + if (vocab.containsKey(UNKNOWN_TOKEN) == false) { + throw ExceptionsHelper.conflictStatusException("stored vocabulary is missing required [{}] token", UNKNOWN_TOKEN); + } + if (vocab.containsKey(PAD_TOKEN) == false) { + throw ExceptionsHelper.conflictStatusException("stored vocabulary is missing required [{}] token", PAD_TOKEN); + } + this.padTokenId = vocab.get(PAD_TOKEN); + if (withSpecialTokens) { + Set missingSpecialTokens = Sets.difference(Set.of(SEPARATOR_TOKEN, CLASS_TOKEN), vocab.keySet()); + if (missingSpecialTokens.isEmpty() == false) { + throw ExceptionsHelper.conflictStatusException("stored vocabulary is missing required {} token(s)", missingSpecialTokens); + } + this.sepTokenId = vocab.get(SEPARATOR_TOKEN); + this.clsTokenId = vocab.get(CLASS_TOKEN); + } else { + this.sepTokenId = -1; + this.clsTokenId = -1; + } + } + + @Override + int clsTokenId() { + return clsTokenId; + } + + @Override + int sepTokenId() { + return sepTokenId; + } + + @Override + int maxSequenceLength() { + return maxSequenceLength; + } + + @Override + boolean isWithSpecialTokens() { + return withSpecialTokens; + } + + @Override + int numExtraTokensForSingleSequence() { + // https://github.com/huggingface/transformers/blob/v4.44.0/src/transformers/models/deberta_v2/tokenization_deberta_v2.py#L164 + // single sequence: [CLS] X [SEP] + return 2; + } + + @Override + int getNumExtraTokensForSeqPair() { + // https://github.com/huggingface/transformers/blob/v4.44.0/src/transformers/models/deberta_v2/tokenization_deberta_v2.py#L165 + // pair of sequences: [CLS] A [SEP] B [SEP] + return 3; + } + + @Override + public TokenizationResult buildTokenizationResult(List tokenizations) { + return new DebertaTokenizationResult(originalVocab, tokenizations, padTokenId); + } + + @Override + public NlpTask.RequestBuilder requestBuilder() { + return (inputs, requestId, truncate, span, windowSize) -> buildTokenizationResult( + IntStream.range(0, inputs.size()) + .boxed() + .flatMap(seqId -> tokenize(inputs.get(seqId), truncate, span, seqId, windowSize).stream()) + .collect(Collectors.toList()) + ).buildRequest(requestId, truncate); + } + + @Override + public OptionalInt getPadTokenId() { + return OptionalInt.of(padTokenId); + } + + @Override + public String getPadToken() { + return PAD_TOKEN; + } + + @Override + public OptionalInt getMaskTokenId() { + Integer maskId = vocab.get(MASK_TOKEN); + if (maskId == null) { + return OptionalInt.empty(); + } + return OptionalInt.of(maskId); + } + + @Override + public String getMaskToken() { + return MASK_TOKEN; + } + + @Override + public List getVocabulary() { + return originalVocab; + } + + @Override + TokenizationResult.TokensBuilder createTokensBuilder(int clsTokenId, int sepTokenId, boolean withSpecialTokens) { + return new DebertaTokenizationResult.DebertaTokensBuilder(clsTokenId, sepTokenId, withSpecialTokens); + } + + public static DebertaV2Tokenizer.Builder builder(List vocab, List scores, DebertaV2Tokenization tokenization) { + return new DebertaV2Tokenizer.Builder(vocab, scores, tokenization); + } + + public static class Builder { + + protected final List originalVocab; + protected final List scores; + protected final SortedMap vocab; + protected boolean withSpecialTokens; + protected int maxSequenceLength; + protected Set neverSplit; + + protected Builder(List vocab, List scores, DebertaV2Tokenization tokenization) { + this.originalVocab = vocab; + this.vocab = buildSortedVocab(vocab); + this.scores = scores; + this.withSpecialTokens = tokenization.withSpecialTokens(); + this.maxSequenceLength = tokenization.maxSequenceLength(); + } + + private static SortedMap buildSortedVocab(List vocab) { + SortedMap sortedVocab = new TreeMap<>(); + for (int i = 0; i < vocab.size(); i++) { + sortedVocab.put(vocab.get(i), i); + } + return sortedVocab; + } + + public DebertaV2Tokenizer.Builder setNeverSplit(Set neverSplit) { + this.neverSplit = neverSplit; + return this; + } + + public DebertaV2Tokenizer.Builder setMaxSequenceLength(int maxSequenceLength) { + this.maxSequenceLength = maxSequenceLength; + return this; + } + + /** + * Include CLS and SEP tokens + * @param withSpecialTokens if true include CLS and SEP tokens + * @return this + */ + public DebertaV2Tokenizer.Builder setWithSpecialTokens(boolean withSpecialTokens) { + this.withSpecialTokens = withSpecialTokens; + return this; + } + + public DebertaV2Tokenizer build() throws IOException { + if (neverSplit == null) { + neverSplit = Collections.emptySet(); + } + + return new DebertaV2Tokenizer(originalVocab, vocab, scores, withSpecialTokens, maxSequenceLength, neverSplit); + } + } + + @Override + public InnerTokenization innerTokenize(String seq) { + List tokenPositionMap = new ArrayList<>(); + try (TokenStream ts = debertaAnalyzer.tokenStream("input", seq)) { + ts.reset(); + PositionIncrementAttribute tokenPos = ts.addAttribute(PositionIncrementAttribute.class); + int currPos = -1; // the PositionIncrement starts at one, so this aligns the first token at position 0 + while (ts.incrementToken()) { + currPos += tokenPos.getPositionIncrement(); + tokenPositionMap.add(currPos); + } + } catch (IOException ex) { + throw new UncheckedIOException(ex); + } + return new InnerTokenization(new ArrayList<>(debertaAnalyzer.getTokens()), tokenPositionMap); + } + + @Override + public void close() { + this.debertaAnalyzer.close(); + } + + static class DebertaAnalyzer extends Analyzer { + private final List vocabulary; + private final List neverSplit; + private final double[] scores; + private UnigramTokenizer innerTokenizer; + private final String unknownToken; + private final PrecompiledCharMapNormalizer.Config normalizer; + + DebertaAnalyzer(List vocabulary, List scores, List neverSplit, String unknownToken) throws IOException { + this.vocabulary = vocabulary; + this.neverSplit = neverSplit; + this.unknownToken = unknownToken; + this.scores = new double[scores.size()]; + int i = 0; + for (Double s : scores) { + this.scores[i++] = s; + } + normalizer = PrecompiledCharMapNormalizer.fromBase64EncodedResource( + "/org/elasticsearch/xpack/ml/inference.nlp.tokenizers/spm_precompiled_normalizer.txt" + ); + } + + @Override + protected Reader initReader(String fieldName, Reader reader) { + if (normalizer.offsets().length > 0) { + return new PrecompiledCharMapNormalizer(normalizer.offsets(), normalizer.utf8str(), reader); + } + return reader; + } + + @Override + protected TokenStreamComponents createComponents(String fieldName) { + this.innerTokenizer = UnigramTokenizer.build(neverSplit, vocabulary, scores, unknownToken, true); + return new TokenStreamComponents(this.innerTokenizer); + } + + public List getTokens() { + if (innerTokenizer != null) { + return innerTokenizer.getTokenizedValues(); + } else { + return List.of(); + } + } + } +} diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java index 5014eb269b081..0b4a5b651d8d4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizer.java @@ -11,6 +11,7 @@ import org.elasticsearch.core.Strings; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertJapaneseTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.MPNetTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.RobertaTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; @@ -26,6 +27,7 @@ import java.util.OptionalInt; import java.util.stream.Collectors; +import static java.lang.Math.min; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig.TOKENIZATION; import static org.elasticsearch.xpack.core.ml.inference.trainedmodel.NlpConfig.VOCABULARY; @@ -48,7 +50,9 @@ public abstract class NlpTokenizer implements Releasable { abstract int getNumExtraTokensForSeqPair(); - abstract int defaultSpanForChunking(int maxWindowSize); + int defaultSpanForChunking(int maxWindowSize) { + return (maxWindowSize - numExtraTokensForSingleSequence()) / 2; + } public abstract TokenizationResult buildTokenizationResult(List tokenizations); @@ -85,7 +89,7 @@ public final List tokenize( if (numTokens > windowSize) { switch (truncate) { - case FIRST, SECOND -> { + case FIRST, SECOND, BALANCED -> { // only one sequence exists in this case isTruncated = true; tokenIds = tokenIds.subList(0, isWithSpecialTokens() ? windowSize - numExtraTokensForSingleSequence() : windowSize); tokenPositionMap = tokenPositionMap.subList( @@ -123,7 +127,7 @@ public final List tokenize( int splitStartPos = 0; int spanPrev = -1; while (splitEndPos < tokenIds.size()) { - splitEndPos = Math.min( + splitEndPos = min( splitStartPos + (isWithSpecialTokens() ? windowSize - numExtraTokensForSingleSequence() : windowSize), tokenIds.size() ); @@ -232,6 +236,29 @@ public TokenizationResult.Tokens tokenize( tokenIdsSeq2 = tokenIdsSeq2.subList(0, maxSequenceLength() - extraTokens - tokenIdsSeq1.size()); tokenPositionMapSeq2 = tokenPositionMapSeq2.subList(0, maxSequenceLength() - extraTokens - tokenIdsSeq1.size()); } + case BALANCED -> { + isTruncated = true; + int firstSequenceLength = 0; + + if (tokenIdsSeq2.size() > (maxSequenceLength() - getNumExtraTokensForSeqPair()) / 2) { + firstSequenceLength = min(tokenIdsSeq1.size(), (maxSequenceLength() - getNumExtraTokensForSeqPair()) / 2); + } else { + firstSequenceLength = min( + tokenIdsSeq1.size(), + maxSequenceLength() - tokenIdsSeq2.size() - getNumExtraTokensForSeqPair() + ); + } + int secondSequenceLength = min( + tokenIdsSeq2.size(), + maxSequenceLength() - firstSequenceLength - getNumExtraTokensForSeqPair() + ); + + tokenIdsSeq1 = tokenIdsSeq1.subList(0, firstSequenceLength); + tokenPositionMapSeq1 = tokenPositionMapSeq1.subList(0, firstSequenceLength); + + tokenIdsSeq2 = tokenIdsSeq2.subList(0, secondSequenceLength); + tokenPositionMapSeq2 = tokenPositionMapSeq2.subList(0, secondSequenceLength); + } case NONE -> throw ExceptionsHelper.badRequestException( "Input too large. The tokenized input length [{}] exceeds the maximum sequence length [{}]", numTokens, @@ -355,7 +382,7 @@ public List tokenize(String seq1, String seq2, Tokeni } while (splitEndPos < tokenIdsSeq2.size()) { - splitEndPos = Math.min(splitStartPos + trueMaxSeqLength, tokenIdsSeq2.size()); + splitEndPos = min(splitStartPos + trueMaxSeqLength, tokenIdsSeq2.size()); // Make sure we do not end on a word if (splitEndPos != tokenIdsSeq2.size()) { while (splitEndPos > splitStartPos + 1 @@ -447,6 +474,9 @@ public static NlpTokenizer build(Vocabulary vocabulary, Tokenization params) thr if (params instanceof XLMRobertaTokenization xlmRobertaTokenization) { return XLMRobertaTokenizer.builder(vocabulary.get(), vocabulary.scores(), xlmRobertaTokenization).build(); } + if (params instanceof DebertaV2Tokenization debertaV2Tokenization) { + return DebertaV2Tokenizer.builder(vocabulary.get(), vocabulary.scores(), debertaV2Tokenization).build(); + } throw new IllegalArgumentException("unknown tokenization type [" + params.getName() + "]"); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/RobertaTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/RobertaTokenizer.java index e884e84faa85d..6d58d2e2dc2cf 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/RobertaTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/RobertaTokenizer.java @@ -106,11 +106,6 @@ int getNumExtraTokensForSeqPair() { return 4; } - @Override - int defaultSpanForChunking(int maxWindowSize) { - return (maxWindowSize - numExtraTokensForSingleSequence()) / 2; - } - @Override int numExtraTokensForSingleSequence() { return 2; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizer.java index acb1f6c038ef9..31deac066cba2 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizer.java @@ -14,6 +14,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.util.Maps; import org.elasticsearch.core.Nullable; @@ -49,7 +50,13 @@ public final class UnigramTokenizer extends Tokenizer { private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - static UnigramTokenizer build(List neverSplit, List dictionary, double[] scores, String unknownToken) { + static UnigramTokenizer build( + List neverSplit, + List dictionary, + double[] scores, + String unknownToken, + boolean byteFallback + ) { if (dictionary.isEmpty()) { throw new IllegalArgumentException("vocab empty"); } @@ -84,7 +91,8 @@ static UnigramTokenizer build(List neverSplit, List dictionary, Optional.ofNullable(tokenToId.get(new BytesRef(unknownToken))) .orElseThrow( () -> new IllegalArgumentException("provided vocabulary does not contain the unknown token of [" + unknownToken + "]") - ) + ), + byteFallback ); } @@ -94,7 +102,7 @@ static UnigramTokenizer build(List neverSplit, List dictionary, private final double minScore; // This may be configurable in the future - private final boolean fuseUnk = true; + private boolean fuseUnk = true; private final double[] vocabScores; private final CharTrie neverSplit; private final CharArraySet neverSplitHash; @@ -104,6 +112,7 @@ static UnigramTokenizer build(List neverSplit, List dictionary, // This is a buffer that is reused per token for decoding the normalized char-sequence into utf-8 bytes // It's usage is NOT thread safe private byte[] normalizedByteBuffer = new byte[128]; + private boolean byteFallback = false; // If true, decompose unknown pieces into UTF-8 byte pieces public UnigramTokenizer( double minScore, @@ -127,6 +136,31 @@ public UnigramTokenizer( this.whitespaceTokenizer = new SimpleWhitespaceTokenizer(); } + public UnigramTokenizer( + double minScore, + double[] vocabScores, + CharTrie neverSplit, + CharArraySet neverSplitHash, + Map vocabToId, + BytesTrie vocabTrie, + int unknownTokenId, + boolean byteFallback + ) { + super(); + this.tokens = new LinkedList<>(); + this.tokenizedValues = new ArrayList<>(); + this.minScore = minScore; + this.neverSplit = neverSplit; + this.neverSplitHash = neverSplitHash; + this.vocabToId = vocabToId; + this.vocabTrie = vocabTrie; + this.unknownTokenId = unknownTokenId; + this.vocabScores = vocabScores; + this.whitespaceTokenizer = new SimpleWhitespaceTokenizer(); + this.byteFallback = byteFallback; + this.fuseUnk = byteFallback == false; + } + List getTokenizedValues() { return tokenizedValues; } @@ -231,6 +265,21 @@ public boolean incrementToken() throws IOException { return false; } + private int[] decomposeBytePieces(byte[] bytes) { + assert this.byteFallback; + + int[] pieces = new int[bytes.length]; + for (int i = 0; i < bytes.length; i++) { + BytesRef decomposedToken = new BytesRef(Strings.format("<0x%02X>", bytes[i])); + Integer piece = vocabToId.get(decomposedToken); + if (piece == null) { + piece = unknownTokenId; + } + pieces[i] = piece; + } + return pieces; + } + /** * This algorithm does the following: * @@ -309,7 +358,21 @@ List tokenize(CharSequence inputSequence, IntToIntFuncti while (endsAtBytes > 0) { BestPathNode node = bestPathNodes[endsAtBytes]; int startsAtBytes = node.startsAtBytePos; - if (node.id == unknownTokenId && fuseUnk) { + if (node.id == unknownTokenId && byteFallback) { + CharSequence multiByteSequence = inputSequence.subSequence(node.startsAtCharPos, endsAtChars); + byte[] bytes = multiByteSequence.toString().getBytes(StandardCharsets.UTF_8); + int[] pieces = decomposeBytePieces(bytes); + for (int i = pieces.length - 1; i >= 0; i--) { + results.add( + new DelimitedToken.Encoded( + Strings.format("<0x%02X>", bytes[i]), + pieces[i], + offsetCorrection.apply(node.startsAtCharPos), + offsetCorrection.apply(startsAtBytes + i) + ) + ); + } + } else if (node.id == unknownTokenId && fuseUnk) { unknownTokens.add( new DelimitedToken.Encoded( new String(normalizedByteBuffer, startsAtBytes, endsAtBytes - startsAtBytes, StandardCharsets.UTF_8), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/XLMRobertaTokenizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/XLMRobertaTokenizer.java index 7a856d8e4735a..0e8793eb374ca 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/XLMRobertaTokenizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/XLMRobertaTokenizer.java @@ -101,11 +101,6 @@ int getNumExtraTokensForSeqPair() { return 4; } - @Override - int defaultSpanForChunking(int maxWindowSize) { - return (maxWindowSize - numExtraTokensForSingleSequence()) / 2; - } - @Override int numExtraTokensForSingleSequence() { return 2; @@ -284,7 +279,7 @@ protected Reader initReader(String fieldName, Reader reader) { @Override protected TokenStreamComponents createComponents(String fieldName) { - this.innerTokenizer = UnigramTokenizer.build(neverSplit, vocabulary, scores, unknownToken); + this.innerTokenizer = UnigramTokenizer.build(neverSplit, vocabulary, scores, unknownToken, false); return new TokenStreamComponents(this.innerTokenizer); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java index 901fea45d9de9..ccebe3bf0ca98 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/BertTokenizerTests.java @@ -760,6 +760,119 @@ public void testTokenizeLargeInputMultiSequenceTruncation() { } + public void testTokenizeLargeInputMultiSequenceBalancedTruncation() { + try ( + BertTokenizer tokenizer = BertTokenizer.builder( + TEST_CASED_VOCAB, + new BertTokenization(null, true, 10, Tokenization.Truncate.BALANCED, -1) + ).build() + ) { + + { // both sequences are truncated + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elasticsearch is fun", + "Godzilla my little red car", + Tokenization.Truncate.BALANCED, + 0 + ); + + var tokenStream = Arrays.stream(tokenization.tokenIds()).mapToObj(TEST_CASED_VOCAB::get).collect(Collectors.toList()); + assertThat( + tokenStream, + contains( + BertTokenizer.CLASS_TOKEN, + "Elastic", + "##search", + "is", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + "my", + "little", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + } + + { // first sequence is too short to be truncated + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elasticsearch", + "Godzilla my little red car", + Tokenization.Truncate.BALANCED, + 0 + ); + + var tokenStream = Arrays.stream(tokenization.tokenIds()).mapToObj(TEST_CASED_VOCAB::get).collect(Collectors.toList()); + assertThat( + tokenStream, + contains( + BertTokenizer.CLASS_TOKEN, + "Elastic", + "##search", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + "my", + "little", + "red", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + } + + { // second sequence is too short to be truncated + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elasticsearch is my little red fun", + "Godzilla", + Tokenization.Truncate.BALANCED, + 0 + ); + + var tokenStream = Arrays.stream(tokenization.tokenIds()).mapToObj(TEST_CASED_VOCAB::get).collect(Collectors.toList()); + assertThat( + tokenStream, + contains( + BertTokenizer.CLASS_TOKEN, + "Elastic", + "##search", + "is", + "my", + "little", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + } + + { // both sequences are too short to be truncated + TokenizationResult.Tokens tokenization = tokenizer.tokenize("Elasticsearch", "Godzilla", Tokenization.Truncate.BALANCED, 0); + + var tokenStream = Arrays.stream(tokenization.tokenIds()).mapToObj(TEST_CASED_VOCAB::get).collect(Collectors.toList()); + assertThat( + tokenStream, + contains( + BertTokenizer.CLASS_TOKEN, + "Elastic", + "##search", + BertTokenizer.SEPARATOR_TOKEN, + "God", + "##zilla", + BertTokenizer.SEPARATOR_TOKEN + ) + ); + } + + expectThrows( + ElasticsearchStatusException.class, + () -> BertTokenizer.builder(TEST_CASED_VOCAB, new BertTokenization(null, true, 8, Tokenization.Truncate.NONE, -1)) + .build() + .tokenize("Elasticsearch is fun", "Godzilla my little red car", Tokenization.Truncate.NONE, 0) + ); + } + } + public void testMultiSeqRequiresSpecialTokens() { try ( BertTokenizer tokenizer = BertTokenizer.builder( diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java new file mode 100644 index 0000000000000..bbe509da67452 --- /dev/null +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/DebertaV2TokenizerTests.java @@ -0,0 +1,206 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.ml.inference.nlp.tokenizers; + +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import static org.elasticsearch.xpack.ml.inference.nlp.tokenizers.DebertaV2Tokenizer.MASK_TOKEN; +import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.not; + +public class DebertaV2TokenizerTests extends ESTestCase { + + private static final List TEST_CASE_VOCAB = List.of( + DebertaV2Tokenizer.CLASS_TOKEN, + DebertaV2Tokenizer.PAD_TOKEN, + DebertaV2Tokenizer.SEPARATOR_TOKEN, + DebertaV2Tokenizer.UNKNOWN_TOKEN, + "▁Ela", + "stic", + "search", + "▁is", + "▁fun", + "▁God", + "z", + "illa", + "▁my", + "▁little", + "▁red", + "▁car", + "▁😀", + "▁🇸🇴", + MASK_TOKEN, + ".", + "<0xC2>", + "<0xAD>", + "▁" + ); + private static final List TEST_CASE_SCORES = List.of( + 0.0, + 0.0, + 0.0, + 0.0, + -12.535264015197754, + -12.300995826721191, + -13.255199432373047, + -7.402246475219727, + -11.201482772827148, + -10.576351165771484, + -7.898513317108154, + -10.230172157287598, + -9.18289566040039, + -11.451579093933105, + -10.858806610107422, + -10.214239120483398, + -10.230172157287598, + -9.451579093933105, + 0.0, + -3.0, + 1.0, + 2.0, + -7.97025 + ); + + private List tokenStrings(List tokens) { + return tokens.stream().map(DelimitedToken::toString).collect(Collectors.toList()); + } + + public void testTokenize() throws IOException { + try ( + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder( + TEST_CASE_VOCAB, + TEST_CASE_SCORES, + new DebertaV2Tokenization(false, false, null, Tokenization.Truncate.NONE, -1) + ).build() + ) { + TokenizationResult.Tokens tokenization = tokenizer.tokenize("Elasticsearch fun", Tokenization.Truncate.NONE, -1, 0, null) + .get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁Ela", "stic", "search", "▁fun")); + assertArrayEquals(new int[] { 4, 5, 6, 8 }, tokenization.tokenIds()); + assertArrayEquals(new int[] { 0, 1, 2, 3 }, tokenization.tokenMap()); + } + } + + public void testSurrogatePair() throws IOException { + try ( + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder( + TEST_CASE_VOCAB, + TEST_CASE_SCORES, + new DebertaV2Tokenization(false, false, null, Tokenization.Truncate.NONE, -1) + ).build() + ) { + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elastic" + "\u00AD" + "search 😀" + "\u00AD" + " fun", + Tokenization.Truncate.NONE, + -1, + 0, + null + ).get(0); + assertArrayEquals(new int[] { 4, 5, 20, 21, 6, 16, 20, 21, 8 }, tokenization.tokenIds()); + assertThat( + tokenStrings(tokenization.tokens().get(0)), + contains("▁Ela", "stic", "<0xC2>", "<0xAD>", "search", "▁\uD83D\uDE00", "<0xC2>", "<0xAD>", "▁fun") + ); + + tokenization = tokenizer.tokenize("😀", Tokenization.Truncate.NONE, -1, 0, null).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁\uD83D\uDE00")); + + tokenization = tokenizer.tokenize("Elasticsearch 😀", Tokenization.Truncate.NONE, -1, 0, null).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁Ela", "stic", "search", "▁\uD83D\uDE00")); + + tokenization = tokenizer.tokenize("Elasticsearch 😀 fun", Tokenization.Truncate.NONE, -1, 0, null).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁Ela", "stic", "search", "▁\uD83D\uDE00", "▁fun")); + + } + } + + public void testMultiByteEmoji() throws IOException { + try ( + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder( + TEST_CASE_VOCAB, + TEST_CASE_SCORES, + new DebertaV2Tokenization(false, false, null, Tokenization.Truncate.NONE, -1) + ).build() + ) { + TokenizationResult.Tokens tokenization = tokenizer.tokenize("🇸🇴", Tokenization.Truncate.NONE, -1, 0, null).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁🇸🇴")); + assertThat(tokenization.tokenIds()[0], not(equalTo(3))); // not the unknown token + + tokenization = tokenizer.tokenize("🏁", Tokenization.Truncate.NONE, -1, 0, null).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁", "<0xF0>", "<0x9F>", "<0x8F>", "<0x81>")); + // contains the 4-byte sequence representing the emoji which is not in the vocab, due to byteFallback enabled + } + } + + public void testTokenizeWithNeverSplit() throws IOException { + try ( + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder( + TEST_CASE_VOCAB, + TEST_CASE_SCORES, + new DebertaV2Tokenization(false, true, null, Tokenization.Truncate.NONE, -1) + ).build() + ) { + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elasticsearch ." + MASK_TOKEN + ".", + Tokenization.Truncate.NONE, + -1, + 0, + null + ).get(0); + assertThat(tokenStrings(tokenization.tokens().get(0)), contains("▁Ela", "stic", "search", "▁", ".", MASK_TOKEN, "▁", ".")); + } + } + + public void testMultiSeqTokenization() throws IOException { + try ( + DebertaV2Tokenizer tokenizer = DebertaV2Tokenizer.builder( + TEST_CASE_VOCAB, + TEST_CASE_SCORES, + new DebertaV2Tokenization(false, false, null, Tokenization.Truncate.NONE, -1) + ).setWithSpecialTokens(true).build() + ) { + TokenizationResult.Tokens tokenization = tokenizer.tokenize( + "Elasticsearch is fun", + "Godzilla my little red car", + Tokenization.Truncate.NONE, + 0 + ); + + var tokenStream = Arrays.stream(tokenization.tokenIds()).mapToObj(TEST_CASE_VOCAB::get).collect(Collectors.toList()); + assertThat( + tokenStream, + contains( + DebertaV2Tokenizer.CLASS_TOKEN, + "▁Ela", + "stic", + "search", + "▁is", + "▁fun", + DebertaV2Tokenizer.SEPARATOR_TOKEN, + "▁God", + "z", + "illa", + "▁my", + "▁little", + "▁red", + "▁car", + DebertaV2Tokenizer.SEPARATOR_TOKEN + ) + ); + } + } + +} diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizerTests.java index fc2a31a06e187..ad6f44e77aafc 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/NlpTokenizerTests.java @@ -10,6 +10,7 @@ import org.elasticsearch.test.ESTestCase; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertJapaneseTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.BertTokenization; +import org.elasticsearch.xpack.core.ml.inference.trainedmodel.DebertaV2Tokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.MPNetTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.RobertaTokenization; import org.elasticsearch.xpack.core.ml.inference.trainedmodel.Tokenization; @@ -44,6 +45,13 @@ public class NlpTokenizerTests extends ESTestCase { RobertaTokenizer.CLASS_TOKEN, RobertaTokenizer.MASK_TOKEN ); + public static final List DEBERTA_REQUIRED_VOCAB = List.of( + DebertaV2Tokenizer.UNKNOWN_TOKEN, + DebertaV2Tokenizer.SEPARATOR_TOKEN, + DebertaV2Tokenizer.PAD_TOKEN, + DebertaV2Tokenizer.CLASS_TOKEN, + DebertaV2Tokenizer.MASK_TOKEN + ); void validateBuilder(List vocab, Tokenization tokenization, Class expectedClass) throws IOException { Vocabulary vocabulary = new Vocabulary(vocab, "model-name", null, null); @@ -66,5 +74,8 @@ public void testBuildTokenizer() throws IOException { Tokenization xlmRoberta = new XLMRobertaTokenization(null, null, Tokenization.Truncate.NONE, -1); validateBuilder(ROBERTA_REQUIRED_VOCAB, xlmRoberta, XLMRobertaTokenizer.class); + + Tokenization debertaV2 = new DebertaV2Tokenization(false, null, null, Tokenization.Truncate.NONE, -1); + validateBuilder(DEBERTA_REQUIRED_VOCAB, debertaV2, DebertaV2Tokenizer.class); } } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizerTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizerTests.java index f97055b29ca7b..d1ce2fea9d1dc 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizerTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/inference/nlp/tokenizers/UnigramTokenizerTests.java @@ -39,8 +39,24 @@ public void testSimpleTokenization() throws IOException { public void testLessSimpleTokenization() throws IOException { TestNLPAnalyzer analyzer = new TestNLPAnalyzer( - List.of(UNKNOWN_TOKEN, PREFIX + "ab", "cd", PREFIX + "abc", "a", "b", "c", "ABC", "abcdabcd", "q", "r", "qr", ""), - List.of(0.0, 0.0, -0.1, -0.2, -0.3, -0.4, -0.5, -0.5, 20.0, 20.5, 20.5, -0.5, 0.0), + List.of( + UNKNOWN_TOKEN, + PREFIX + "ab", + "cd", + PREFIX + "abc", + "a", + "b", + "c", + "ABC", + "abcdabcd", + "q", + "r", + "qr", + "", + "aa", + "aaaa" + ), + List.of(0.0, 0.0, -0.1, -0.2, -0.3, -0.4, -0.5, -0.5, 20.0, 20.5, 20.5, -0.5, 0.0, -13.5467, -14.9644), UNKNOWN_TOKEN, new PrecompiledCharMapNormalizer.Config(new int[0], "") ); @@ -53,6 +69,31 @@ public void testLessSimpleTokenization() throws IOException { assertAnalyzesToNoCharFilter(analyzer, " \nabcd \n\n abcc \n", new String[] { PREFIX + "ab", "cd", PREFIX + "abc", "c" }); } + public void testLessSimpleTokenizationForRepeatingCharacters() throws IOException { + TestNLPAnalyzer analyzer = new TestNLPAnalyzer( + List.of(UNKNOWN_TOKEN, "HH", "HHHH", PREFIX + "H", "HHH", PREFIX + "HH", PREFIX, PREFIX + "HHH"), + List.of(0.0, -13.5467, -14.9644, -9.17478, -15.1165, -13.201, -7.97025, -15.602), + UNKNOWN_TOKEN, + PrecompiledCharMapNormalizer.fromBase64EncodedResource( + "/org/elasticsearch/xpack/ml/inference.nlp.tokenizers/spm_precompiled_normalizer.txt" + ) + ); + + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHHHHHHH", new String[] { PREFIX, "HHHH", "HHHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHHHHHH", new String[] { PREFIX + "HHH", "HHHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHHHHH", new String[] { PREFIX + "HH", "HHHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHHHH", new String[] { PREFIX + "H", "HHHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHHH", new String[] { PREFIX, "HHHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHHH", new String[] { PREFIX + "HHH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHHH", new String[] { PREFIX + "HH", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHHH", new String[] { PREFIX + "H", "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHHH", new String[] { PREFIX, "HHHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HHH", new String[] { PREFIX + "HHH" }); + assertAnalyzesToNoCharFilter(analyzer, "HH", new String[] { PREFIX + "HH" }); + assertAnalyzesToNoCharFilter(analyzer, "H", new String[] { PREFIX + "H" }); + + } + public void testLessSimpleTokenizationWithNeverSplit() throws IOException { TestNLPAnalyzer analyzer = new TestNLPAnalyzer( List.of( @@ -153,7 +194,7 @@ protected Reader initReader(String fieldName, Reader reader) { @Override protected TokenStreamComponents createComponents(String fieldName) { - UnigramTokenizer tokenizer = UnigramTokenizer.build(NEVER_SPLIT, dictionary, scores, unknownToken); + UnigramTokenizer tokenizer = UnigramTokenizer.build(NEVER_SPLIT, dictionary, scores, unknownToken, false); return new TokenStreamComponents(tokenizer); } } diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndexStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndexStatsMonitoringDocTests.java index afeec7dd52b17..b6c059b7a0dcc 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndexStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndexStatsMonitoringDocTests.java @@ -392,7 +392,7 @@ private static CommonStats mockCommonStats() { final IndexingStats.Stats indexingStats = new IndexingStats.Stats(++iota, ++iota, no, no, no, no, no, no, false, ++iota, no, no); commonStats.getIndexing().add(new IndexingStats(indexingStats)); - final SearchStats.Stats searchStats = new SearchStats.Stats(++iota, ++iota, no, no, no, no, no, no, no, no, no, no); + final SearchStats.Stats searchStats = new SearchStats.Stats(++iota, ++iota, no, no, no, no, no, no, no, no, no, no, no, no); commonStats.getSearch().add(new SearchStats(searchStats, no, null)); final SegmentsStats segmentsStats = new SegmentsStats(); diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndicesStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndicesStatsMonitoringDocTests.java index 0d1a0374d4fc3..6822f54633bdc 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndicesStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/indices/IndicesStatsMonitoringDocTests.java @@ -186,7 +186,7 @@ private CommonStats mockCommonStats() { final IndexingStats.Stats indexingStats = new IndexingStats.Stats(3L, 4L, 0L, 0L, 0L, 0L, 0L, 0L, true, 5L, 0, 0); commonStats.getIndexing().add(new IndexingStats(indexingStats)); - final SearchStats.Stats searchStats = new SearchStats.Stats(6L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L); + final SearchStats.Stats searchStats = new SearchStats.Stats(6L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L); commonStats.getSearch().add(new SearchStats(searchStats, 0L, null)); final BulkStats bulkStats = new BulkStats(0L, 0L, 0L, 0L, 0L); diff --git a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/node/NodeStatsMonitoringDocTests.java b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/node/NodeStatsMonitoringDocTests.java index 54f3ce634a25a..da23f27e1357e 100644 --- a/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/node/NodeStatsMonitoringDocTests.java +++ b/x-pack/plugin/monitoring/src/test/java/org/elasticsearch/xpack/monitoring/collector/node/NodeStatsMonitoringDocTests.java @@ -352,7 +352,7 @@ private static NodeStats mockNodeStats() { indicesCommonStats.getQueryCache().add(new QueryCacheStats(++iota, ++iota, ++iota, ++iota, no)); indicesCommonStats.getRequestCache().add(new RequestCacheStats(++iota, ++iota, ++iota, ++iota)); - final SearchStats.Stats searchStats = new SearchStats.Stats(++iota, ++iota, no, no, no, no, no, no, no, no, no, no); + final SearchStats.Stats searchStats = new SearchStats.Stats(++iota, ++iota, no, no, no, no, no, no, no, no, no, no, no, no); indicesCommonStats.getSearch().add(new SearchStats(searchStats, no, null)); final SegmentsStats segmentsStats = new SegmentsStats(); diff --git a/x-pack/plugin/otel-data/src/main/resources/component-templates/traces-otel@mappings.yaml b/x-pack/plugin/otel-data/src/main/resources/component-templates/traces-otel@mappings.yaml index a4c62efeed7a4..0e77bc208eed4 100644 --- a/x-pack/plugin/otel-data/src/main/resources/component-templates/traces-otel@mappings.yaml +++ b/x-pack/plugin/otel-data/src/main/resources/component-templates/traces-otel@mappings.yaml @@ -44,7 +44,7 @@ template: dropped_events_count: type: long links: - store_array_source: true + synthetic_source_keep: arrays properties: trace_id: type: keyword diff --git a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml index 95367c97ce8f2..657453bf4ae9f 100644 --- a/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml +++ b/x-pack/plugin/otel-data/src/yamlRestTest/resources/rest-api-spec/test/20_logs_tests.yml @@ -48,9 +48,9 @@ setup: body: fields: ["*"] - length: { hits.hits: 1 } - - match: { hits.hits.0.fields.resource\.attributes\.host\.ip: ["0.0.0.0", "127.0.0.1"] } - - match: { hits.hits.0.fields.attributes\.foo: [1, 2, 3] } - - match: { hits.hits.0.fields.attributes\.bar: [a, b, c] } + - match: { hits.hits.0.fields.resource\.attributes\.host\.ip: ["127.0.0.1", "0.0.0.0"] } + - match: { hits.hits.0.fields.attributes\.foo: [3, 2, 1] } + - match: { hits.hits.0.fields.attributes\.bar: [b, c, a] } --- "Exception aliases": - do: diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java index b345178e205c3..8b3f8ec09675a 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/jwt/JwtUtil.java @@ -228,7 +228,8 @@ public static byte[] readFileContents(final String jwkSetConfigKeyPkc, final Str throws SettingsException { try { final Path path = JwtUtil.resolvePath(environment, jwkSetPathPkc); - return Files.readAllBytes(path); + byte[] bytes = AccessController.doPrivileged((PrivilegedExceptionAction) () -> Files.readAllBytes(path)); + return bytes; } catch (Exception e) { throw new SettingsException( "Failed to read contents for setting [" + jwkSetConfigKeyPkc + "] value [" + jwkSetPathPkc + "].", diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosRealm.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosRealm.java index 6601d27d5a431..d5ef90f7f1664 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosRealm.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/kerberos/KerberosRealm.java @@ -29,6 +29,8 @@ import java.nio.file.Files; import java.nio.file.Path; +import java.security.AccessController; +import java.security.PrivilegedAction; import java.util.Collections; import java.util.List; import java.util.Map; @@ -101,19 +103,26 @@ public KerberosRealm(final RealmConfig config, final UserRoleMapper userRoleMapp this.threadPool = threadPool; this.keytabPath = config.env().configFile().resolve(config.getSetting(KerberosRealmSettings.HTTP_SERVICE_KEYTAB_PATH)); - if (Files.exists(keytabPath) == false) { + validateKeytab(this.keytabPath); + + this.enableKerberosDebug = config.getSetting(KerberosRealmSettings.SETTING_KRB_DEBUG_ENABLE); + this.removeRealmName = config.getSetting(KerberosRealmSettings.SETTING_REMOVE_REALM_NAME); + this.delegatedRealms = null; + } + + private static void validateKeytab(Path keytabPath) { + boolean fileExists = AccessController.doPrivileged((PrivilegedAction) () -> Files.exists(keytabPath)); + if (fileExists == false) { throw new IllegalArgumentException("configured service key tab file [" + keytabPath + "] does not exist"); } - if (Files.isDirectory(keytabPath)) { + boolean pathIsDir = AccessController.doPrivileged((PrivilegedAction) () -> Files.isDirectory(keytabPath)); + if (pathIsDir) { throw new IllegalArgumentException("configured service key tab file [" + keytabPath + "] is a directory"); } - if (Files.isReadable(keytabPath) == false) { + boolean isReadable = AccessController.doPrivileged((PrivilegedAction) () -> Files.isReadable(keytabPath)); + if (isReadable == false) { throw new IllegalArgumentException("configured service key tab file [" + keytabPath + "] must have read permission"); } - - this.enableKerberosDebug = config.getSetting(KerberosRealmSettings.SETTING_KRB_DEBUG_ENABLE); - this.removeRealmName = config.getSetting(KerberosRealmSettings.SETTING_REMOVE_REALM_NAME); - this.delegatedRealms = null; } @Override diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java index c2e0caf7234cb..aa1946f445670 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthenticator.java @@ -93,6 +93,7 @@ import org.elasticsearch.xpack.core.security.authc.RealmSettings; import org.elasticsearch.xpack.core.security.authc.oidc.OpenIdConnectRealmSettings; import org.elasticsearch.xpack.core.ssl.SSLService; +import org.elasticsearch.xpack.security.PrivilegedFileWatcher; import org.elasticsearch.xpack.security.authc.jwt.JwtUtil; import java.io.IOException; @@ -366,8 +367,14 @@ private void validateAccessToken(AccessToken accessToken, JWT idToken) { private JWKSet readJwkSetFromFile(String jwkSetPath) throws IOException, ParseException { final Path path = realmConfig.env().configFile().resolve(jwkSetPath); // avoid using JWKSet.loadFile() as it does not close FileInputStream internally - String jwkSet = Files.readString(path, StandardCharsets.UTF_8); - return JWKSet.parse(jwkSet); + try { + String jwkSet = AccessController.doPrivileged( + (PrivilegedExceptionAction) () -> Files.readString(path, StandardCharsets.UTF_8) + ); + return JWKSet.parse(jwkSet); + } catch (PrivilegedActionException ex) { + throw (IOException) ex.getException(); + } } /** @@ -808,7 +815,7 @@ IDTokenValidator createIdTokenValidator(boolean addFileWatcherIfRequired) { private void setMetadataFileWatcher(String jwkSetPath) throws IOException { final Path path = realmConfig.env().configFile().resolve(jwkSetPath); - FileWatcher watcher = new FileWatcher(path); + FileWatcher watcher = new PrivilegedFileWatcher(path); watcher.addListener(new FileListener(LOGGER, () -> this.idTokenValidator.set(createIdTokenValidator(false)))); watcherService.add(watcher, ResourceWatcherService.Frequency.MEDIUM); } diff --git a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/saml/SamlRealm.java b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/saml/SamlRealm.java index 704875efa18f6..9adfd15e23207 100644 --- a/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/saml/SamlRealm.java +++ b/x-pack/plugin/security/src/main/java/org/elasticsearch/xpack/security/authc/saml/SamlRealm.java @@ -49,6 +49,7 @@ import org.elasticsearch.xpack.core.security.user.User; import org.elasticsearch.xpack.core.ssl.CertParsingUtils; import org.elasticsearch.xpack.core.ssl.SSLService; +import org.elasticsearch.xpack.security.PrivilegedFileWatcher; import org.elasticsearch.xpack.security.authc.Realms; import org.elasticsearch.xpack.security.authc.TokenService; import org.elasticsearch.xpack.security.authc.support.DelegatedAuthorizationSupport; @@ -774,7 +775,11 @@ private static final class SamlFilesystemMetadataResolver extends FilesystemMeta @Override protected byte[] fetchMetadata() throws ResolverException { assert assertNotTransportThread("fetching SAML metadata from a file"); - return super.fetchMetadata(); + try { + return AccessController.doPrivileged((PrivilegedExceptionAction) () -> super.fetchMetadata()); + } catch (PrivilegedActionException e) { + throw (ResolverException) e.getException(); + } } } @@ -806,7 +811,7 @@ private static Tuple(resolver, () -> resolveEntityDescriptor(resolver, entityId, path.toString(), true)); diff --git a/x-pack/plugin/security/src/main/plugin-metadata/plugin-security.policy b/x-pack/plugin/security/src/main/plugin-metadata/plugin-security.policy index b3d5e80e09dcd..d814dfbb1c117 100644 --- a/x-pack/plugin/security/src/main/plugin-metadata/plugin-security.policy +++ b/x-pack/plugin/security/src/main/plugin-metadata/plugin-security.policy @@ -6,6 +6,10 @@ grant { permission org.elasticsearch.SecuredConfigFileAccessPermission "x-pack/users"; // other security files specified by settings permission org.elasticsearch.SecuredConfigFileSettingAccessPermission "xpack.security.authc.realms.ldap.*.files.role_mapping"; + permission org.elasticsearch.SecuredConfigFileSettingAccessPermission "xpack.security.authc.realms.pki.*.files.role_mapping"; + permission org.elasticsearch.SecuredConfigFileSettingAccessPermission "xpack.security.authc.realms.jwt.*.pkc_jwkset_path"; + permission org.elasticsearch.SecuredConfigFileSettingAccessPermission "xpack.security.authc.realms.saml.*.idp.metadata.path"; + permission org.elasticsearch.SecuredConfigFileSettingAccessPermission "xpack.security.authc.realms.kerberos.*.keytab.path"; // needed for SAML permission java.util.PropertyPermission "org.apache.xml.security.ignoreLineBreaks", "read,write"; diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz_api_keys/40_document_level_security_synthetic_source.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz_api_keys/40_document_level_security_synthetic_source.yml index 769b9d848ba35..52abe0a3d83d7 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz_api_keys/40_document_level_security_synthetic_source.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/security/authz_api_keys/40_document_level_security_synthetic_source.yml @@ -186,7 +186,7 @@ Filter on object with stored source: type: keyword obj: type: object - store_array_source: true + synthetic_source_keep: arrays properties: secret: type: keyword