From 3080ca8ae877a586dbd9ec283c1faa67a21547ae Mon Sep 17 00:00:00 2001 From: bellengao Date: Sun, 23 Feb 2020 16:41:39 +0800 Subject: [PATCH 01/10] Support array for string ingest processors --- .../ingest/processors/bytes.asciidoc | 2 +- .../reference/ingest/processors/gsub.asciidoc | 2 +- .../ingest/processors/html_strip.asciidoc | 2 +- .../ingest/processors/lowercase.asciidoc | 2 +- .../reference/ingest/processors/trim.asciidoc | 2 +- .../ingest/processors/uppercase.asciidoc | 2 +- .../ingest/processors/url-decode.asciidoc | 2 +- .../common/AbstractStringProcessor.java | 29 ++++++++++++++-- .../AbstractStringProcessorTestCase.java | 33 ++++++++++++++++++- 9 files changed, 66 insertions(+), 10 deletions(-) diff --git a/docs/reference/ingest/processors/bytes.asciidoc b/docs/reference/ingest/processors/bytes.asciidoc index 76f054cac64c2..82d7472d1ec01 100644 --- a/docs/reference/ingest/processors/bytes.asciidoc +++ b/docs/reference/ingest/processors/bytes.asciidoc @@ -1,6 +1,6 @@ [[bytes-processor]] === Bytes Processor -Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). +Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). If the field value is an array, all members will be converted. Supported human readable units are "b", "kb", "mb", "gb", "tb", "pb" case insensitive. An error will occur if the field is not a supported format or resultant value exceeds 2^63. diff --git a/docs/reference/ingest/processors/gsub.asciidoc b/docs/reference/ingest/processors/gsub.asciidoc index f6919eb1e95f1..1048d5ed387fd 100644 --- a/docs/reference/ingest/processors/gsub.asciidoc +++ b/docs/reference/ingest/processors/gsub.asciidoc @@ -1,7 +1,7 @@ [[gsub-processor]] === Gsub Processor Converts a string field by applying a regular expression and a replacement. -If the field is not a string, the processor will throw an exception. +If the field is not a string, the processor will throw an exception. If the field value is an array, all members will be converted. [[gsub-options]] .Gsub Options diff --git a/docs/reference/ingest/processors/html_strip.asciidoc b/docs/reference/ingest/processors/html_strip.asciidoc index 2fa3cd7bbb8ae..32450e977e008 100644 --- a/docs/reference/ingest/processors/html_strip.asciidoc +++ b/docs/reference/ingest/processors/html_strip.asciidoc @@ -1,6 +1,6 @@ [[htmlstrip-processor]] === HTML Strip Processor -Removes HTML from field. +Removes HTML from field. If the field value is an array, all members will be converted. NOTE: Each HTML tag is replaced with a `\n` character. diff --git a/docs/reference/ingest/processors/lowercase.asciidoc b/docs/reference/ingest/processors/lowercase.asciidoc index 878b74ed9ba24..96413c7b9b94f 100644 --- a/docs/reference/ingest/processors/lowercase.asciidoc +++ b/docs/reference/ingest/processors/lowercase.asciidoc @@ -1,6 +1,6 @@ [[lowercase-processor]] === Lowercase Processor -Converts a string to its lowercase equivalent. +Converts a string to its lowercase equivalent. If the field value is an array, all members will be converted. [[lowercase-options]] .Lowercase Options diff --git a/docs/reference/ingest/processors/trim.asciidoc b/docs/reference/ingest/processors/trim.asciidoc index 7c28767076ecc..1690e7cd45980 100644 --- a/docs/reference/ingest/processors/trim.asciidoc +++ b/docs/reference/ingest/processors/trim.asciidoc @@ -1,6 +1,6 @@ [[trim-processor]] === Trim Processor -Trims whitespace from field. +Trims whitespace from field. If the field value is an array, all members will be trimmed. NOTE: This only works on leading and trailing whitespace. diff --git a/docs/reference/ingest/processors/uppercase.asciidoc b/docs/reference/ingest/processors/uppercase.asciidoc index 7565be1c7c303..b967b83a1d478 100644 --- a/docs/reference/ingest/processors/uppercase.asciidoc +++ b/docs/reference/ingest/processors/uppercase.asciidoc @@ -1,6 +1,6 @@ [[uppercase-processor]] === Uppercase Processor -Converts a string to its uppercase equivalent. +Converts a string to its uppercase equivalent. If the field value is an array, all members will be converted. [[uppercase-options]] .Uppercase Options diff --git a/docs/reference/ingest/processors/url-decode.asciidoc b/docs/reference/ingest/processors/url-decode.asciidoc index 76fc00c80f679..f7003b2ad9646 100644 --- a/docs/reference/ingest/processors/url-decode.asciidoc +++ b/docs/reference/ingest/processors/url-decode.asciidoc @@ -1,6 +1,6 @@ [[urldecode-processor]] === URL Decode Processor -URL-decodes a string +URL-decodes a string. If the field value is an array, all members will be decoded. [[urldecode-options]] .URL Decode Options diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java index 546519aa5f606..cd7050cd710d6 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java @@ -24,6 +24,8 @@ import org.elasticsearch.ingest.IngestDocument; import org.elasticsearch.ingest.Processor; +import java.util.ArrayList; +import java.util.List; import java.util.Map; /** @@ -58,7 +60,8 @@ String getTargetField() { @Override public final IngestDocument execute(IngestDocument document) { - String val = document.getFieldValue(field, String.class, ignoreMissing); + Object val = document.getFieldValue(field, Object.class, ignoreMissing); + Object newValue; if (val == null && ignoreMissing) { return document; @@ -66,7 +69,29 @@ public final IngestDocument execute(IngestDocument document) { throw new IllegalArgumentException("field [" + field + "] is null, cannot process it."); } - document.setFieldValue(targetField, process(val)); + if (val instanceof List) { + List list = (List) val; + List newList = new ArrayList<>(list.size()); + for (Object value : list) { + if (value instanceof String) { + newList.add(process((String) value)); + } else { + throw new IllegalArgumentException("field [" + field + "] of type [" + value.getClass().getName() + + "] cannot be cast to [" + String.class.getName() + "]"); + } + } + newValue = newList; + } else { + if (val instanceof String) { + newValue = process((String) val); + } else { + throw new IllegalArgumentException("field [" + field + "] of type [" + val.getClass().getName() + "] cannot be cast to [" + + String.class.getName() + "]"); + } + + } + + document.setFieldValue(targetField, newValue); return document; } diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java index f667f84e5d7b1..1dd8033ea40fe 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java @@ -24,8 +24,10 @@ import org.elasticsearch.ingest.RandomDocumentPicks; import org.elasticsearch.test.ESTestCase; +import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; +import java.util.List; import static org.elasticsearch.ingest.IngestDocumentMatcher.assertIngestDocument; import static org.hamcrest.Matchers.containsString; @@ -41,7 +43,7 @@ protected String modifyInput(String input) { protected abstract T expectedResult(String input); - protected Class expectedResultType(){ + protected Class expectedResultType() { return String.class; // most results types are Strings } @@ -52,6 +54,19 @@ public void testProcessor() throws Exception { Processor processor = newProcessor(fieldName, randomBoolean(), fieldName); processor.execute(ingestDocument); assertThat(ingestDocument.getFieldValue(fieldName, expectedResultType()), equalTo(expectedResult(fieldValue))); + + int numItems = randomIntBetween(1, 10); + List fieldValueList = new ArrayList<>(); + List expectedList = new ArrayList<>(); + for (int i = 0; i < numItems; i++) { + String randomString = RandomDocumentPicks.randomString(random()); + fieldValueList.add(modifyInput(randomString)); + expectedList.add(expectedResult(randomString)); + } + String multiValueFieldName = RandomDocumentPicks.addRandomField(random(), ingestDocument, fieldValueList); + Processor multiValueProcessor = newProcessor(multiValueFieldName, randomBoolean(), multiValueFieldName); + multiValueProcessor.execute(ingestDocument); + assertThat(ingestDocument.getFieldValue(multiValueFieldName, List.class), equalTo(expectedList)); } public void testFieldNotFound() throws Exception { @@ -94,6 +109,14 @@ public void testNonStringValue() throws Exception { Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); assertThat(e.getMessage(), equalTo("field [" + fieldName + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); + + List fieldValueList = new ArrayList<>(); + fieldValueList.add(randomInt()); + ingestDocument.setFieldValue(fieldName, fieldValueList); + Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); + assertThat(exception.getMessage(), equalTo("field [" + fieldName + + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]") + ); } public void testNonStringValueWithIgnoreMissing() throws Exception { @@ -104,6 +127,14 @@ public void testNonStringValueWithIgnoreMissing() throws Exception { Exception e = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); assertThat(e.getMessage(), equalTo("field [" + fieldName + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); + + List fieldValueList = new ArrayList<>(); + fieldValueList.add(randomInt()); + ingestDocument.setFieldValue(fieldName, fieldValueList); + Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); + assertThat(exception.getMessage(), equalTo("field [" + fieldName + + "] of type [java.lang.Integer] cannot be cast to [java.lang.String]") + ); } public void testTargetField() throws Exception { From e35d1f274b7c620548ac3513e3332333c58b3532 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:26:58 +0800 Subject: [PATCH 02/10] Update docs/reference/ingest/processors/bytes.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/bytes.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/bytes.asciidoc b/docs/reference/ingest/processors/bytes.asciidoc index 82d7472d1ec01..5a551f8a82eac 100644 --- a/docs/reference/ingest/processors/bytes.asciidoc +++ b/docs/reference/ingest/processors/bytes.asciidoc @@ -1,6 +1,6 @@ [[bytes-processor]] === Bytes Processor -Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). If the field value is an array, all members will be converted. +Converts a human readable byte value (e.g. 1kb) to its value in bytes (e.g. 1024). If the field is an array of strings, all members of the array will be converted. Supported human readable units are "b", "kb", "mb", "gb", "tb", "pb" case insensitive. An error will occur if the field is not a supported format or resultant value exceeds 2^63. From ec399aaa268f0ece411d690489061495f471aba2 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:27:26 +0800 Subject: [PATCH 03/10] Update docs/reference/ingest/processors/gsub.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/gsub.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/gsub.asciidoc b/docs/reference/ingest/processors/gsub.asciidoc index 1048d5ed387fd..2defa6e7cd138 100644 --- a/docs/reference/ingest/processors/gsub.asciidoc +++ b/docs/reference/ingest/processors/gsub.asciidoc @@ -1,7 +1,7 @@ [[gsub-processor]] === Gsub Processor Converts a string field by applying a regular expression and a replacement. -If the field is not a string, the processor will throw an exception. If the field value is an array, all members will be converted. +If the field is an array of string, all members of the array will be converted. If any non-string values are encountered, the processor will throw an exception. [[gsub-options]] .Gsub Options From 10bc702c1e54c2b6e8773ad88023446b35ca90c7 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:28:07 +0800 Subject: [PATCH 04/10] Update docs/reference/ingest/processors/html_strip.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/html_strip.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/html_strip.asciidoc b/docs/reference/ingest/processors/html_strip.asciidoc index 32450e977e008..bd4e8e8ccd920 100644 --- a/docs/reference/ingest/processors/html_strip.asciidoc +++ b/docs/reference/ingest/processors/html_strip.asciidoc @@ -1,6 +1,6 @@ [[htmlstrip-processor]] === HTML Strip Processor -Removes HTML from field. If the field value is an array, all members will be converted. +Removes HTML tags from the field. If the field is an array of strings, HTML tags will be removed from all members of the array. NOTE: Each HTML tag is replaced with a `\n` character. From 3770c3863ddcb3928e04fa160635860c4c44b107 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:28:45 +0800 Subject: [PATCH 05/10] Update docs/reference/ingest/processors/lowercase.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/lowercase.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/lowercase.asciidoc b/docs/reference/ingest/processors/lowercase.asciidoc index 96413c7b9b94f..903d69625352f 100644 --- a/docs/reference/ingest/processors/lowercase.asciidoc +++ b/docs/reference/ingest/processors/lowercase.asciidoc @@ -1,6 +1,6 @@ [[lowercase-processor]] === Lowercase Processor -Converts a string to its lowercase equivalent. If the field value is an array, all members will be converted. +Converts a string to its lowercase equivalent. If the field is an array of strings, all members of the array will be converted. [[lowercase-options]] .Lowercase Options From 097add095bbdd778e50dffb8fca264733732d6b4 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:29:01 +0800 Subject: [PATCH 06/10] Update docs/reference/ingest/processors/trim.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/trim.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/trim.asciidoc b/docs/reference/ingest/processors/trim.asciidoc index 1690e7cd45980..ef3611161e2e2 100644 --- a/docs/reference/ingest/processors/trim.asciidoc +++ b/docs/reference/ingest/processors/trim.asciidoc @@ -1,6 +1,6 @@ [[trim-processor]] === Trim Processor -Trims whitespace from field. If the field value is an array, all members will be trimmed. +Trims whitespace from field. If the field is an array of strings, all members of the array will be trimmed. NOTE: This only works on leading and trailing whitespace. From 60568b9c6157c4a02655dc203ed34bab9367aa8a Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:29:23 +0800 Subject: [PATCH 07/10] Update docs/reference/ingest/processors/uppercase.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/uppercase.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/uppercase.asciidoc b/docs/reference/ingest/processors/uppercase.asciidoc index b967b83a1d478..3e26cedcf9cce 100644 --- a/docs/reference/ingest/processors/uppercase.asciidoc +++ b/docs/reference/ingest/processors/uppercase.asciidoc @@ -1,6 +1,6 @@ [[uppercase-processor]] === Uppercase Processor -Converts a string to its uppercase equivalent. If the field value is an array, all members will be converted. +Converts a string to its uppercase equivalent. If the field is an array of strings, all members of the array will be converted. [[uppercase-options]] .Uppercase Options From f93eef5d2f714540e1faa2a71a6c4ebfc6143360 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:29:37 +0800 Subject: [PATCH 08/10] Update docs/reference/ingest/processors/url-decode.asciidoc Co-Authored-By: Dan Hermann --- docs/reference/ingest/processors/url-decode.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/ingest/processors/url-decode.asciidoc b/docs/reference/ingest/processors/url-decode.asciidoc index f7003b2ad9646..268fce1c18c2a 100644 --- a/docs/reference/ingest/processors/url-decode.asciidoc +++ b/docs/reference/ingest/processors/url-decode.asciidoc @@ -1,6 +1,6 @@ [[urldecode-processor]] === URL Decode Processor -URL-decodes a string. If the field value is an array, all members will be decoded. +URL-decodes a string. If the field is an array of strings, all members of the array will be decoded. [[urldecode-options]] .URL Decode Options From cf6256d8b71a5cd0b025cc622dd6a0572a23cea8 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 10:30:28 +0800 Subject: [PATCH 09/10] Update modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java Co-Authored-By: Dan Hermann --- .../elasticsearch/ingest/common/AbstractStringProcessor.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java index cd7050cd710d6..ded75e95be73d 100644 --- a/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java +++ b/modules/ingest-common/src/main/java/org/elasticsearch/ingest/common/AbstractStringProcessor.java @@ -76,8 +76,8 @@ public final IngestDocument execute(IngestDocument document) { if (value instanceof String) { newList.add(process((String) value)); } else { - throw new IllegalArgumentException("field [" + field + "] of type [" + value.getClass().getName() + - "] cannot be cast to [" + String.class.getName() + "]"); + throw new IllegalArgumentException("value [" + value + "] of type [" + value.getClass().getName() + + "] in list field [" + field + "] cannot be cast to [" + String.class.getName() + "]"); } } newValue = newList; From 7bc26cdf72fa011d0432790165c7ff4219a8f6d9 Mon Sep 17 00:00:00 2001 From: bellengao Date: Tue, 17 Mar 2020 15:00:09 +0800 Subject: [PATCH 10/10] modify error message of the test code --- .../common/AbstractStringProcessorTestCase.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java index 1dd8033ea40fe..b9c83be40fff7 100644 --- a/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java +++ b/modules/ingest-common/src/test/java/org/elasticsearch/ingest/common/AbstractStringProcessorTestCase.java @@ -111,12 +111,12 @@ public void testNonStringValue() throws Exception { "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); List fieldValueList = new ArrayList<>(); - fieldValueList.add(randomInt()); + int randomValue = randomInt(); + fieldValueList.add(randomValue); ingestDocument.setFieldValue(fieldName, fieldValueList); Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); - assertThat(exception.getMessage(), equalTo("field [" + fieldName + - "] of type [java.lang.Integer] cannot be cast to [java.lang.String]") - ); + assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName + + "] cannot be cast to [java.lang.String]")); } public void testNonStringValueWithIgnoreMissing() throws Exception { @@ -129,12 +129,12 @@ public void testNonStringValueWithIgnoreMissing() throws Exception { "] of type [java.lang.Integer] cannot be cast to [java.lang.String]")); List fieldValueList = new ArrayList<>(); - fieldValueList.add(randomInt()); + int randomValue = randomInt(); + fieldValueList.add(randomValue); ingestDocument.setFieldValue(fieldName, fieldValueList); Exception exception = expectThrows(Exception.class, () -> processor.execute(ingestDocument)); - assertThat(exception.getMessage(), equalTo("field [" + fieldName + - "] of type [java.lang.Integer] cannot be cast to [java.lang.String]") - ); + assertThat(exception.getMessage(), equalTo("value [" + randomValue + "] of type [java.lang.Integer] in list field [" + fieldName + + "] cannot be cast to [java.lang.String]")); } public void testTargetField() throws Exception {