-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
353 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
41 changes: 41 additions & 0 deletions
41
src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchContext.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.search.fetch.analyzedtext; | ||
|
||
|
||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; | ||
import org.elasticsearch.action.termvectors.TermVectorsRequest; | ||
import org.elasticsearch.search.fetch.FetchSubPhaseContext; | ||
|
||
public class AnalyzedTextFetchContext extends FetchSubPhaseContext { | ||
|
||
private AnalyzeRequest request; | ||
|
||
public AnalyzedTextFetchContext() { | ||
} | ||
|
||
public AnalyzeRequest getRequest() { | ||
return request; | ||
} | ||
|
||
public void setRequest(AnalyzeRequest request) { | ||
this.request = request; | ||
} | ||
} |
71 changes: 71 additions & 0 deletions
71
src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchParseElement.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.search.fetch.analyzedtext; | ||
|
||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; | ||
import org.elasticsearch.action.termvectors.TermVectorsRequest; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.common.xcontent.XContentFactory; | ||
import org.elasticsearch.common.xcontent.XContentParser; | ||
import org.elasticsearch.common.xcontent.XContentType; | ||
import org.elasticsearch.script.SharedMethods; | ||
import org.elasticsearch.search.fetch.FetchSubPhase; | ||
import org.elasticsearch.search.fetch.FetchSubPhaseParseElement; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
|
||
import java.util.Map; | ||
|
||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; | ||
|
||
|
||
public class AnalyzedTextFetchParseElement extends FetchSubPhaseParseElement<AnalyzedTextFetchContext> { | ||
@Override | ||
protected void innerParse(XContentParser parser, AnalyzedTextFetchContext analyzedTextFetchContext, SearchContext searchContext) | ||
throws Exception { | ||
|
||
XContentBuilder newBuilder = jsonBuilder(); | ||
newBuilder.copyCurrentStructure(parser); | ||
Map<String, Object> requestAsMap = SharedMethods.getSourceAsMap(newBuilder.string()); | ||
AnalyzeRequest request = new AnalyzeRequest(); | ||
if (requestAsMap.get("analyzer") != null) { | ||
request.analyzer((String) requestAsMap.remove("analyzer")); | ||
} | ||
if (requestAsMap.get("token_filters") != null) { | ||
request.tokenFilters((String[]) requestAsMap.remove("token_filters")); | ||
} | ||
if (requestAsMap.get("tokenizer") != null) { | ||
request.tokenizer((String) requestAsMap.remove("tokenizer")); | ||
} | ||
if (requestAsMap.get("char_filters") != null) { | ||
request.charFilters((String[]) requestAsMap.remove("char_filters")); | ||
} | ||
if (requestAsMap.get("field") != null) { | ||
request.field((String) requestAsMap.remove("field")); | ||
} | ||
assert requestAsMap.isEmpty(); | ||
analyzedTextFetchContext.setRequest(request); | ||
|
||
} | ||
|
||
@Override | ||
protected FetchSubPhase.ContextFactory<AnalyzedTextFetchContext> getContextFactory() { | ||
return AnalyzedTextFetchSubPhase.CONTEXT_FACTORY; | ||
} | ||
} |
164 changes: 164 additions & 0 deletions
164
src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchSubPhase.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.search.fetch.analyzedtext; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||
import org.elasticsearch.ElasticsearchException; | ||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; | ||
import org.elasticsearch.index.analysis.CharFilterFactory; | ||
import org.elasticsearch.index.analysis.CustomAnalyzer; | ||
import org.elasticsearch.index.analysis.TokenFilterFactory; | ||
import org.elasticsearch.index.analysis.TokenizerFactory; | ||
import org.elasticsearch.search.SearchHitField; | ||
import org.elasticsearch.search.SearchParseElement; | ||
import org.elasticsearch.search.fetch.FetchSubPhase; | ||
import org.elasticsearch.search.internal.InternalSearchHit; | ||
import org.elasticsearch.search.internal.InternalSearchHitField; | ||
import org.elasticsearch.search.internal.SearchContext; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.Collections; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
|
||
public class AnalyzedTextFetchSubPhase implements FetchSubPhase { | ||
|
||
public static final ContextFactory<AnalyzedTextFetchContext> CONTEXT_FACTORY = new ContextFactory<AnalyzedTextFetchContext>() { | ||
|
||
@Override | ||
public String getName() { | ||
return NAMES[0]; | ||
} | ||
|
||
@Override | ||
public AnalyzedTextFetchContext newContextInstance() { | ||
return new AnalyzedTextFetchContext(); | ||
} | ||
}; | ||
|
||
public AnalyzedTextFetchSubPhase() { | ||
} | ||
|
||
public static final String[] NAMES = {"analyzed_text"}; | ||
|
||
@Override | ||
public Map<String, ? extends SearchParseElement> parseElements() { | ||
return Collections.singletonMap(NAMES[0], new AnalyzedTextFetchParseElement()); | ||
} | ||
|
||
@Override | ||
public void hitsExecute(SearchContext context, InternalSearchHit[] hits) { | ||
} | ||
|
||
@Override | ||
public void hitExecute(SearchContext context, HitContext hitContext) { | ||
if (context.getFetchSubPhaseContext(CONTEXT_FACTORY).hitExecutionNeeded() == false) { | ||
return; | ||
} | ||
AnalyzeRequest request = context.getFetchSubPhaseContext(CONTEXT_FACTORY).getRequest(); | ||
Analyzer analyzer = null; | ||
boolean closeAnalyzer = false; | ||
String text = (String) context.lookup().source().extractValue(request.field()); | ||
if (analyzer == null && request.analyzer() != null) { | ||
analyzer = context.analysisService().analyzer(request.analyzer()); | ||
if (analyzer == null) { | ||
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]"); | ||
} | ||
} else if (request.tokenizer() != null) { | ||
TokenizerFactory tokenizerFactory; | ||
tokenizerFactory = context.analysisService().tokenizer(request.tokenizer()); | ||
if (tokenizerFactory == null) { | ||
throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]"); | ||
} | ||
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0]; | ||
if (request.tokenFilters() != null && request.tokenFilters().length > 0) { | ||
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length]; | ||
for (int i = 0; i < request.tokenFilters().length; i++) { | ||
String tokenFilterName = request.tokenFilters()[i]; | ||
tokenFilterFactories[i] = context.analysisService().tokenFilter(tokenFilterName); | ||
if (tokenFilterFactories[i] == null) { | ||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); | ||
} | ||
if (tokenFilterFactories[i] == null) { | ||
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]"); | ||
} | ||
} | ||
} | ||
|
||
CharFilterFactory[] charFilterFactories = new CharFilterFactory[0]; | ||
if (request.charFilters() != null && request.charFilters().length > 0) { | ||
charFilterFactories = new CharFilterFactory[request.charFilters().length]; | ||
for (int i = 0; i < request.charFilters().length; i++) { | ||
String charFilterName = request.charFilters()[i]; | ||
charFilterFactories[i] = context.analysisService().charFilter(charFilterName); | ||
if (charFilterFactories[i] == null) { | ||
throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]"); | ||
} | ||
if (charFilterFactories[i] == null) { | ||
throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]"); | ||
} | ||
} | ||
} | ||
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories); | ||
closeAnalyzer = true; | ||
} else { | ||
analyzer = context.analysisService().defaultIndexAnalyzer(); | ||
} | ||
if (analyzer == null) { | ||
throw new IllegalArgumentException("failed to find analyzer"); | ||
} | ||
|
||
List<String> tokens = simpleAnalyze(analyzer, text, request.field()); | ||
if (closeAnalyzer) { | ||
analyzer.close(); | ||
} | ||
|
||
if (hitContext.hit().fieldsOrNull() == null) { | ||
hitContext.hit().fields(new HashMap<String, SearchHitField>()); | ||
} | ||
SearchHitField hitField = hitContext.hit().fields().get(NAMES[0]); | ||
if (hitField == null) { | ||
hitField = new InternalSearchHitField(NAMES[0], new ArrayList<>(1)); | ||
hitContext.hit().fields().put(NAMES[0], hitField); | ||
} | ||
|
||
hitField.values().add(tokens); | ||
} | ||
|
||
private static List<String> simpleAnalyze(Analyzer analyzer, String text, String field) { | ||
List<String> tokens = new ArrayList<>(); | ||
try (TokenStream stream = analyzer.tokenStream(field, text)) { | ||
stream.reset(); | ||
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); | ||
while (stream.incrementToken()) { | ||
tokens.add(term.toString()); | ||
} | ||
stream.end(); | ||
} catch (IOException e) { | ||
throw new ElasticsearchException("failed to analyze", e); | ||
} | ||
return tokens; | ||
} | ||
} |
75 changes: 75 additions & 0 deletions
75
src/test/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
/* | ||
* Licensed to Elasticsearch under one or more contributor | ||
* license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright | ||
* ownership. Elasticsearch licenses this file to you under | ||
* the Apache License, Version 2.0 (the "License"); you may | ||
* not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
|
||
package org.elasticsearch.search.fetch.analyzedtext; | ||
|
||
import org.elasticsearch.action.search.SearchResponse; | ||
import org.elasticsearch.plugin.TokenPlugin; | ||
import org.elasticsearch.plugins.Plugin; | ||
import org.elasticsearch.search.SearchHit; | ||
import org.elasticsearch.search.SearchHitField; | ||
import org.elasticsearch.search.builder.SearchSourceBuilder; | ||
import org.elasticsearch.test.ESIntegTestCase; | ||
import org.junit.Test; | ||
|
||
import java.io.IOException; | ||
import java.util.Collection; | ||
import java.util.List; | ||
|
||
import static org.elasticsearch.client.Requests.indexRequest; | ||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; | ||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; | ||
|
||
|
||
public class AnalyzedTextFetchIT extends ESIntegTestCase { | ||
|
||
protected Collection<Class<? extends Plugin>> transportClientPlugins() { | ||
return pluginList(TokenPlugin.class); | ||
} | ||
|
||
@Override | ||
protected Collection<Class<? extends Plugin>> nodePlugins() { | ||
return pluginList(TokenPlugin.class); | ||
} | ||
|
||
public void testSimpleFetchAnalyzedText() throws IOException { | ||
|
||
client().index( | ||
indexRequest("test").type("type").id("1") | ||
.source(jsonBuilder().startObject().field("test", "I am sam i am").endObject())).actionGet(); | ||
client().admin().indices().prepareRefresh().execute().actionGet(); | ||
ensureGreen(); | ||
|
||
SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().ext(jsonBuilder().startObject() | ||
.startObject(AnalyzedTextFetchSubPhase.NAMES[0]) | ||
.field("field", "test") | ||
.endObject() | ||
.endObject()); | ||
SearchResponse response = client().prepareSearch().setSource(searchSource).get(); | ||
assertSearchResponse(response); | ||
logger.info(response.toString()); | ||
SearchHit hit = response.getHits().getAt(0); | ||
// get the fields from the response | ||
SearchHitField fields = hit.field(AnalyzedTextFetchSubPhase.NAMES[0]); | ||
List<String> termVectors = fields.getValue(); | ||
assertArrayEquals(termVectors.toArray(new String[termVectors.size()]), new String[]{"i", "am", "sam", "i", "am"}); | ||
logger.info("{}", termVectors); | ||
} | ||
|
||
} |