Skip to content

Commit

Permalink
Restore AnalyzedTextFetchSubPhase
Browse files Browse the repository at this point in the history
  • Loading branch information
imotov committed Jul 26, 2016
1 parent 14938e6 commit d2599dc
Show file tree
Hide file tree
Showing 6 changed files with 353 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,6 @@ public Object run() {
.execute(new ActionListener<PutStoredScriptResponse>() {
@Override
public void onResponse(PutStoredScriptResponse indexResponse) {
//TODO: Fix response
listener.onResponse(new TrainNaiveBayesResponse(id));
}

Expand Down
2 changes: 2 additions & 0 deletions src/main/java/org/elasticsearch/plugin/TokenPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.elasticsearch.script.pmml.PMMLModelScriptEngineService;
import org.elasticsearch.script.pmml.VectorScriptFactory;
import org.elasticsearch.search.SearchModule;
import org.elasticsearch.search.fetch.analyzedtext.AnalyzedTextFetchSubPhase;
import org.elasticsearch.search.fetch.termvectors.TermVectorsFetchSubPhase;

import java.util.Collections;
Expand Down Expand Up @@ -91,5 +92,6 @@ public void onModule(NetworkModule module) {

public void onModule(SearchModule searchModule) {
searchModule.registerFetchSubPhase(new TermVectorsFetchSubPhase());
searchModule.registerFetchSubPhase(new AnalyzedTextFetchSubPhase());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.fetch.analyzedtext;


import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.termvectors.TermVectorsRequest;
import org.elasticsearch.search.fetch.FetchSubPhaseContext;

public class AnalyzedTextFetchContext extends FetchSubPhaseContext {

private AnalyzeRequest request;

public AnalyzedTextFetchContext() {
}

public AnalyzeRequest getRequest() {
return request;
}

public void setRequest(AnalyzeRequest request) {
this.request = request;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.fetch.analyzedtext;

import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.termvectors.TermVectorsRequest;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.script.SharedMethods;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.fetch.FetchSubPhaseParseElement;
import org.elasticsearch.search.internal.SearchContext;

import java.util.Map;

import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;


public class AnalyzedTextFetchParseElement extends FetchSubPhaseParseElement<AnalyzedTextFetchContext> {
@Override
protected void innerParse(XContentParser parser, AnalyzedTextFetchContext analyzedTextFetchContext, SearchContext searchContext)
throws Exception {

XContentBuilder newBuilder = jsonBuilder();
newBuilder.copyCurrentStructure(parser);
Map<String, Object> requestAsMap = SharedMethods.getSourceAsMap(newBuilder.string());
AnalyzeRequest request = new AnalyzeRequest();
if (requestAsMap.get("analyzer") != null) {
request.analyzer((String) requestAsMap.remove("analyzer"));
}
if (requestAsMap.get("token_filters") != null) {
request.tokenFilters((String[]) requestAsMap.remove("token_filters"));
}
if (requestAsMap.get("tokenizer") != null) {
request.tokenizer((String) requestAsMap.remove("tokenizer"));
}
if (requestAsMap.get("char_filters") != null) {
request.charFilters((String[]) requestAsMap.remove("char_filters"));
}
if (requestAsMap.get("field") != null) {
request.field((String) requestAsMap.remove("field"));
}
assert requestAsMap.isEmpty();
analyzedTextFetchContext.setRequest(request);

}

@Override
protected FetchSubPhase.ContextFactory<AnalyzedTextFetchContext> getContextFactory() {
return AnalyzedTextFetchSubPhase.CONTEXT_FACTORY;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.fetch.analyzedtext;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.SearchParseElement;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.InternalSearchHit;
import org.elasticsearch.search.internal.InternalSearchHitField;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


public class AnalyzedTextFetchSubPhase implements FetchSubPhase {

public static final ContextFactory<AnalyzedTextFetchContext> CONTEXT_FACTORY = new ContextFactory<AnalyzedTextFetchContext>() {

@Override
public String getName() {
return NAMES[0];
}

@Override
public AnalyzedTextFetchContext newContextInstance() {
return new AnalyzedTextFetchContext();
}
};

public AnalyzedTextFetchSubPhase() {
}

public static final String[] NAMES = {"analyzed_text"};

@Override
public Map<String, ? extends SearchParseElement> parseElements() {
return Collections.singletonMap(NAMES[0], new AnalyzedTextFetchParseElement());
}

@Override
public void hitsExecute(SearchContext context, InternalSearchHit[] hits) {
}

@Override
public void hitExecute(SearchContext context, HitContext hitContext) {
if (context.getFetchSubPhaseContext(CONTEXT_FACTORY).hitExecutionNeeded() == false) {
return;
}
AnalyzeRequest request = context.getFetchSubPhaseContext(CONTEXT_FACTORY).getRequest();
Analyzer analyzer = null;
boolean closeAnalyzer = false;
String text = (String) context.lookup().source().extractValue(request.field());
if (analyzer == null && request.analyzer() != null) {
analyzer = context.analysisService().analyzer(request.analyzer());
if (analyzer == null) {
throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
}
} else if (request.tokenizer() != null) {
TokenizerFactory tokenizerFactory;
tokenizerFactory = context.analysisService().tokenizer(request.tokenizer());
if (tokenizerFactory == null) {
throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
}
TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
for (int i = 0; i < request.tokenFilters().length; i++) {
String tokenFilterName = request.tokenFilters()[i];
tokenFilterFactories[i] = context.analysisService().tokenFilter(tokenFilterName);
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
}
if (tokenFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
}
}
}

CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
if (request.charFilters() != null && request.charFilters().length > 0) {
charFilterFactories = new CharFilterFactory[request.charFilters().length];
for (int i = 0; i < request.charFilters().length; i++) {
String charFilterName = request.charFilters()[i];
charFilterFactories[i] = context.analysisService().charFilter(charFilterName);
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]");
}
if (charFilterFactories[i] == null) {
throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]");
}
}
}
analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
closeAnalyzer = true;
} else {
analyzer = context.analysisService().defaultIndexAnalyzer();
}
if (analyzer == null) {
throw new IllegalArgumentException("failed to find analyzer");
}

List<String> tokens = simpleAnalyze(analyzer, text, request.field());
if (closeAnalyzer) {
analyzer.close();
}

if (hitContext.hit().fieldsOrNull() == null) {
hitContext.hit().fields(new HashMap<String, SearchHitField>());
}
SearchHitField hitField = hitContext.hit().fields().get(NAMES[0]);
if (hitField == null) {
hitField = new InternalSearchHitField(NAMES[0], new ArrayList<>(1));
hitContext.hit().fields().put(NAMES[0], hitField);
}

hitField.values().add(tokens);
}

private static List<String> simpleAnalyze(Analyzer analyzer, String text, String field) {
List<String> tokens = new ArrayList<>();
try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
while (stream.incrementToken()) {
tokens.add(term.toString());
}
stream.end();
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
}
return tokens;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.search.fetch.analyzedtext;

import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.plugin.TokenPlugin;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHitField;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Test;

import java.io.IOException;
import java.util.Collection;
import java.util.List;

import static org.elasticsearch.client.Requests.indexRequest;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;


public class AnalyzedTextFetchIT extends ESIntegTestCase {

protected Collection<Class<? extends Plugin>> transportClientPlugins() {
return pluginList(TokenPlugin.class);
}

@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return pluginList(TokenPlugin.class);
}

public void testSimpleFetchAnalyzedText() throws IOException {

client().index(
indexRequest("test").type("type").id("1")
.source(jsonBuilder().startObject().field("test", "I am sam i am").endObject())).actionGet();
client().admin().indices().prepareRefresh().execute().actionGet();
ensureGreen();

SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().ext(jsonBuilder().startObject()
.startObject(AnalyzedTextFetchSubPhase.NAMES[0])
.field("field", "test")
.endObject()
.endObject());
SearchResponse response = client().prepareSearch().setSource(searchSource).get();
assertSearchResponse(response);
logger.info(response.toString());
SearchHit hit = response.getHits().getAt(0);
// get the fields from the response
SearchHitField fields = hit.field(AnalyzedTextFetchSubPhase.NAMES[0]);
List<String> termVectors = fields.getValue();
assertArrayEquals(termVectors.toArray(new String[termVectors.size()]), new String[]{"i", "am", "sam", "i", "am"});
logger.info("{}", termVectors);
}

}

0 comments on commit d2599dc

Please sign in to comment.