Restore AnalyzedTextFetchSubPhase

brwe · Jul 26, 2016 · d2599dc · d2599dc
1 parent 14938e6
commit d2599dc
Show file tree

Hide file tree

Showing 6 changed files with 353 additions and 1 deletion.
diff --git a/src/main/java/org/elasticsearch/action/trainnaivebayes/TransportTrainNaiveBayesAction.java b/src/main/java/org/elasticsearch/action/trainnaivebayes/TransportTrainNaiveBayesAction.java
@@ -287,7 +287,6 @@ public Object run() {
                     .execute(new ActionListener<PutStoredScriptResponse>() {
                         @Override
                         public void onResponse(PutStoredScriptResponse indexResponse) {
-                            //TODO: Fix response
                             listener.onResponse(new TrainNaiveBayesResponse(id));
                         }
 

diff --git a/src/main/java/org/elasticsearch/plugin/TokenPlugin.java b/src/main/java/org/elasticsearch/plugin/TokenPlugin.java
@@ -42,6 +42,7 @@
 import org.elasticsearch.script.pmml.PMMLModelScriptEngineService;
 import org.elasticsearch.script.pmml.VectorScriptFactory;
 import org.elasticsearch.search.SearchModule;
+import org.elasticsearch.search.fetch.analyzedtext.AnalyzedTextFetchSubPhase;
 import org.elasticsearch.search.fetch.termvectors.TermVectorsFetchSubPhase;
 
 import java.util.Collections;
@@ -91,5 +92,6 @@ public void onModule(NetworkModule module) {
 
     public void onModule(SearchModule searchModule) {
         searchModule.registerFetchSubPhase(new TermVectorsFetchSubPhase());
+        searchModule.registerFetchSubPhase(new AnalyzedTextFetchSubPhase());
     }
 }
diff --git a/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchContext.java b/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchContext.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.analyzedtext;
+
+
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
+import org.elasticsearch.action.termvectors.TermVectorsRequest;
+import org.elasticsearch.search.fetch.FetchSubPhaseContext;
+
+public class AnalyzedTextFetchContext extends FetchSubPhaseContext {
+
+    private AnalyzeRequest request;
+
+    public AnalyzedTextFetchContext() {
+    }
+
+    public AnalyzeRequest getRequest() {
+        return request;
+    }
+
+    public void setRequest(AnalyzeRequest request) {
+        this.request = request;
+    }
+}
diff --git a/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchParseElement.java b/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchParseElement.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.analyzedtext;
+
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
+import org.elasticsearch.action.termvectors.TermVectorsRequest;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.common.xcontent.XContentType;
+import org.elasticsearch.script.SharedMethods;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.fetch.FetchSubPhaseParseElement;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.util.Map;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+
+
+public class AnalyzedTextFetchParseElement extends FetchSubPhaseParseElement<AnalyzedTextFetchContext> {
+    @Override
+    protected void innerParse(XContentParser parser, AnalyzedTextFetchContext analyzedTextFetchContext, SearchContext searchContext)
+            throws Exception {
+
+        XContentBuilder newBuilder = jsonBuilder();
+        newBuilder.copyCurrentStructure(parser);
+        Map<String, Object> requestAsMap = SharedMethods.getSourceAsMap(newBuilder.string());
+        AnalyzeRequest request = new AnalyzeRequest();
+        if (requestAsMap.get("analyzer") != null) {
+            request.analyzer((String) requestAsMap.remove("analyzer"));
+        }
+        if (requestAsMap.get("token_filters") != null) {
+            request.tokenFilters((String[]) requestAsMap.remove("token_filters"));
+        }
+        if (requestAsMap.get("tokenizer") != null) {
+            request.tokenizer((String) requestAsMap.remove("tokenizer"));
+        }
+        if (requestAsMap.get("char_filters") != null) {
+            request.charFilters((String[]) requestAsMap.remove("char_filters"));
+        }
+        if (requestAsMap.get("field") != null) {
+            request.field((String) requestAsMap.remove("field"));
+        }
+        assert requestAsMap.isEmpty();
+        analyzedTextFetchContext.setRequest(request);
+
+    }
+
+    @Override
+    protected FetchSubPhase.ContextFactory<AnalyzedTextFetchContext> getContextFactory() {
+        return AnalyzedTextFetchSubPhase.CONTEXT_FACTORY;
+    }
+}
diff --git a/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchSubPhase.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.analyzedtext;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
+import org.elasticsearch.index.analysis.CharFilterFactory;
+import org.elasticsearch.index.analysis.CustomAnalyzer;
+import org.elasticsearch.index.analysis.TokenFilterFactory;
+import org.elasticsearch.index.analysis.TokenizerFactory;
+import org.elasticsearch.search.SearchHitField;
+import org.elasticsearch.search.SearchParseElement;
+import org.elasticsearch.search.fetch.FetchSubPhase;
+import org.elasticsearch.search.internal.InternalSearchHit;
+import org.elasticsearch.search.internal.InternalSearchHitField;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+public class AnalyzedTextFetchSubPhase implements FetchSubPhase {
+
+    public static final ContextFactory<AnalyzedTextFetchContext> CONTEXT_FACTORY = new ContextFactory<AnalyzedTextFetchContext>() {
+
+        @Override
+        public String getName() {
+            return NAMES[0];
+        }
+
+        @Override
+        public AnalyzedTextFetchContext newContextInstance() {
+            return new AnalyzedTextFetchContext();
+        }
+    };
+
+    public AnalyzedTextFetchSubPhase() {
+    }
+
+    public static final String[] NAMES = {"analyzed_text"};
+
+    @Override
+    public Map<String, ? extends SearchParseElement> parseElements() {
+        return  Collections.singletonMap(NAMES[0], new AnalyzedTextFetchParseElement());
+    }
+
+    @Override
+    public void hitsExecute(SearchContext context, InternalSearchHit[] hits) {
+    }
+
+    @Override
+    public void hitExecute(SearchContext context, HitContext hitContext) {
+        if (context.getFetchSubPhaseContext(CONTEXT_FACTORY).hitExecutionNeeded() == false) {
+            return;
+        }
+        AnalyzeRequest request = context.getFetchSubPhaseContext(CONTEXT_FACTORY).getRequest();
+        Analyzer analyzer = null;
+        boolean closeAnalyzer = false;
+        String text = (String) context.lookup().source().extractValue(request.field());
+        if (analyzer == null && request.analyzer() != null) {
+            analyzer = context.analysisService().analyzer(request.analyzer());
+            if (analyzer == null) {
+                throw new IllegalArgumentException("failed to find analyzer [" + request.analyzer() + "]");
+            }
+        } else if (request.tokenizer() != null) {
+            TokenizerFactory tokenizerFactory;
+            tokenizerFactory = context.analysisService().tokenizer(request.tokenizer());
+            if (tokenizerFactory == null) {
+                throw new IllegalArgumentException("failed to find tokenizer under [" + request.tokenizer() + "]");
+            }
+            TokenFilterFactory[] tokenFilterFactories = new TokenFilterFactory[0];
+            if (request.tokenFilters() != null && request.tokenFilters().length > 0) {
+                tokenFilterFactories = new TokenFilterFactory[request.tokenFilters().length];
+                for (int i = 0; i < request.tokenFilters().length; i++) {
+                    String tokenFilterName = request.tokenFilters()[i];
+                    tokenFilterFactories[i] = context.analysisService().tokenFilter(tokenFilterName);
+                    if (tokenFilterFactories[i] == null) {
+                        throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
+                    }
+                    if (tokenFilterFactories[i] == null) {
+                        throw new IllegalArgumentException("failed to find token filter under [" + tokenFilterName + "]");
+                    }
+                }
+            }
+
+            CharFilterFactory[] charFilterFactories = new CharFilterFactory[0];
+            if (request.charFilters() != null && request.charFilters().length > 0) {
+                charFilterFactories = new CharFilterFactory[request.charFilters().length];
+                for (int i = 0; i < request.charFilters().length; i++) {
+                    String charFilterName = request.charFilters()[i];
+                    charFilterFactories[i] = context.analysisService().charFilter(charFilterName);
+                    if (charFilterFactories[i] == null) {
+                        throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]");
+                    }
+                    if (charFilterFactories[i] == null) {
+                        throw new IllegalArgumentException("failed to find token char under [" + charFilterName + "]");
+                    }
+                }
+            }
+            analyzer = new CustomAnalyzer(tokenizerFactory, charFilterFactories, tokenFilterFactories);
+            closeAnalyzer = true;
+        } else {
+            analyzer = context.analysisService().defaultIndexAnalyzer();
+        }
+        if (analyzer == null) {
+            throw new IllegalArgumentException("failed to find analyzer");
+        }
+
+        List<String> tokens = simpleAnalyze(analyzer, text, request.field());
+        if (closeAnalyzer) {
+            analyzer.close();
+        }
+
+        if (hitContext.hit().fieldsOrNull() == null) {
+            hitContext.hit().fields(new HashMap<String, SearchHitField>());
+        }
+        SearchHitField hitField = hitContext.hit().fields().get(NAMES[0]);
+        if (hitField == null) {
+            hitField = new InternalSearchHitField(NAMES[0], new ArrayList<>(1));
+            hitContext.hit().fields().put(NAMES[0], hitField);
+        }
+
+        hitField.values().add(tokens);
+    }
+
+    private static List<String> simpleAnalyze(Analyzer analyzer, String text, String field) {
+        List<String> tokens = new ArrayList<>();
+        try (TokenStream stream = analyzer.tokenStream(field, text)) {
+            stream.reset();
+            CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
+            while (stream.incrementToken()) {
+                tokens.add(term.toString());
+            }
+            stream.end();
+        } catch (IOException e) {
+            throw new ElasticsearchException("failed to analyze", e);
+        }
+        return tokens;
+    }
+}
diff --git a/src/test/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchIT.java b/src/test/java/org/elasticsearch/search/fetch/analyzedtext/AnalyzedTextFetchIT.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.fetch.analyzedtext;
+
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.plugin.TokenPlugin;
+import org.elasticsearch.plugins.Plugin;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.SearchHitField;
+import org.elasticsearch.search.builder.SearchSourceBuilder;
+import org.elasticsearch.test.ESIntegTestCase;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+
+import static org.elasticsearch.client.Requests.indexRequest;
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
+
+
+public class AnalyzedTextFetchIT extends ESIntegTestCase {
+
+    protected Collection<Class<? extends Plugin>> transportClientPlugins() {
+        return pluginList(TokenPlugin.class);
+    }
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return pluginList(TokenPlugin.class);
+    }
+
+    public void testSimpleFetchAnalyzedText() throws IOException {
+
+        client().index(
+                indexRequest("test").type("type").id("1")
+                        .source(jsonBuilder().startObject().field("test", "I am sam i am").endObject())).actionGet();
+        client().admin().indices().prepareRefresh().execute().actionGet();
+        ensureGreen();
+
+        SearchSourceBuilder searchSource = SearchSourceBuilder.searchSource().ext(jsonBuilder().startObject()
+                .startObject(AnalyzedTextFetchSubPhase.NAMES[0])
+                .field("field", "test")
+                .endObject()
+                .endObject());
+        SearchResponse response = client().prepareSearch().setSource(searchSource).get();
+        assertSearchResponse(response);
+        logger.info(response.toString());
+        SearchHit hit = response.getHits().getAt(0);
+        // get the fields from the response
+        SearchHitField fields = hit.field(AnalyzedTextFetchSubPhase.NAMES[0]);
+        List<String> termVectors = fields.getValue();
+        assertArrayEquals(termVectors.toArray(new String[termVectors.size()]), new String[]{"i", "am", "sam", "i", "am"});
+        logger.info("{}", termVectors);
+    }
+
+}