diff --git a/docker/start_db.sh b/docker/start_db.sh index 168ce96b5..02f48db2e 100755 --- a/docker/start_db.sh +++ b/docker/start_db.sh @@ -2,7 +2,7 @@ # Configuration environment variables: # STARTER_MODE: (single|cluster|activefailover), default single -# DOCKER_IMAGE: ArangoDB docker image, default gcr.io/gcr-for-testing/arangodb/arangodb:latest +# DOCKER_IMAGE: ArangoDB docker image, default docker.io/arangodb/arangodb:latest # SSL: (true|false), default false # DATABASE_EXTENDED_NAMES: (true|false), default false # ARANGO_LICENSE_KEY: only required for ArangoDB Enterprise @@ -11,11 +11,11 @@ # STARTER_MODE=cluster SSL=true ./start_db.sh STARTER_MODE=${STARTER_MODE:=single} -DOCKER_IMAGE=${DOCKER_IMAGE:=gcr.io/gcr-for-testing/arangodb/arangodb:latest} +DOCKER_IMAGE=${DOCKER_IMAGE:=docker.io/arangodb/arangodb:latest} SSL=${SSL:=false} DATABASE_EXTENDED_NAMES=${DATABASE_EXTENDED_NAMES:=false} -STARTER_DOCKER_IMAGE=gcr.io/gcr-for-testing/arangodb/arangodb-starter:latest +STARTER_DOCKER_IMAGE=docker.io/arangodb/arangodb-starter:latest # exit when any command fails set -e diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzerProperties.java index f114b641f..a82374b31 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzerProperties.java +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/StopwordsAnalyzerProperties.java @@ -24,6 +24,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Objects; +import java.util.stream.Collectors; /** * @author Michele Rastelli @@ -38,26 +39,89 @@ private static String stringToHex(String str) { return hex.toString(); } + private static String hexToString(String hex) { + final StringBuilder result = new StringBuilder(); + for (int i = 0; i < hex.length() - 1; i += 2) { + String tempInHex = hex.substring(i, (i + 2)); + int decimal = Integer.parseInt(tempInHex, 16); + result.append((char) decimal); + } + return result.toString(); + } + public StopwordsAnalyzerProperties() { stopwords = new ArrayList<>(); + hex = true; } - private List stopwords; + private final List stopwords; + private final boolean hex; /** - * @return array of hex-encoded strings that describe the tokens to be discarded. + * @return list of hex-encoded strings that describe the tokens to be discarded. + * @deprecated use {@link #getStopwordsAsHexList()} instead */ + @Deprecated public List getStopwords() { - return stopwords; + return getStopwordsAsHexList(); + } + + /** + * @return list of verbatim strings that describe the tokens to be discarded. + */ + public List getStopwordsAsStringList() { + if (hex) { + return stopwords.stream() + .map(StopwordsAnalyzerProperties::hexToString) + .collect(Collectors.toList()); + } else { + return stopwords; + } } + /** + * @return list of hex-encoded strings that describe the tokens to be discarded. + */ + public List getStopwordsAsHexList() { + if (hex) { + return stopwords; + } else { + return stopwords.stream() + .map(StopwordsAnalyzerProperties::stringToHex) + .collect(Collectors.toList()); + } + } + + /** + * @return if false each string in {@link #stopwords} is used as verbatim, if true as hex-encoded. + */ + public boolean getHex() { + return hex; + } + + /** + * @param value stopword as verbatim string + * @return this + */ public StopwordsAnalyzerProperties addStopwordAsString(final String value) { - stopwords.add(stringToHex(value)); + if (hex) { + stopwords.add(stringToHex(value)); + } else { + stopwords.add(value); + } return this; } + /** + * @param value stopword as hex string + * @return this + */ public StopwordsAnalyzerProperties addStopwordAsHex(final String value) { - stopwords.add(value); + if (hex) { + stopwords.add(value); + } else { + stopwords.add(hexToString(value)); + } return this; } @@ -66,11 +130,11 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; StopwordsAnalyzerProperties that = (StopwordsAnalyzerProperties) o; - return Objects.equals(stopwords, that.stopwords); + return hex == that.hex && Objects.equals(stopwords, that.stopwords); } @Override public int hashCode() { - return Objects.hash(stopwords); + return Objects.hash(stopwords, hex); } } diff --git a/src/test/java/com/arangodb/ArangoSearchTest.java b/src/test/java/com/arangodb/ArangoSearchTest.java index c3650185c..76e9aa953 100644 --- a/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/src/test/java/com/arangodb/ArangoSearchTest.java @@ -870,15 +870,21 @@ public void stopwordsAnalyzer() { .addStopwordAsHex("616e64") .addStopwordAsString("the"); - assertThat(properties.getStopwords(), hasItem("616e64")); - assertThat(properties.getStopwords(), hasItem("746865")); + assertThat(properties.getStopwordsAsStringList(), hasItem("and")); + assertThat(properties.getStopwordsAsHexList(), hasItem("746865")); StopwordsAnalyzer analyzer = new StopwordsAnalyzer(); - analyzer.setName("test-" + UUID.randomUUID().toString()); + String name = "test-" + UUID.randomUUID().toString(); + analyzer.setName(name); analyzer.setProperties(properties); analyzer.setFeatures(features); createGetAndDeleteTypedAnalyzer(analyzer); + db.createSearchAnalyzer(analyzer); + String res = db.query("RETURN FLATTEN(TOKENS(SPLIT('the fox and the dog and a theater', ' '), @aName))", + Collections.singletonMap("aName", name), String.class).next(); + assertThat(res, is("[\"fox\",\"dog\",\"a\",\"theater\"]")); + db.deleteSearchAnalyzer(name); } @Test