Skip to content

Commit

Permalink
[lucene] adds SEARCH_MORE function for more like this search
Browse files Browse the repository at this point in the history
refs : #7408
  • Loading branch information
robfrank committed May 11, 2017
1 parent b8b556f commit 6212d1f
Show file tree
Hide file tree
Showing 11 changed files with 388 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ else if (value instanceof Double)
return ridField;
}

return new TextField(fieldName, value.toString(), store);
return new TextField(fieldName, value.toString(), Field.Store.YES);

}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public class OLuceneFunctionsFactory implements OSQLFunctionFactory {
register(OLuceneSearchOnIndexFunction.NAME, new OLuceneSearchOnIndexFunction());
register(OLuceneSearchOnFieldsFunction.NAME, new OLuceneSearchOnFieldsFunction());
register(OLuceneSearchOnClassFunction.NAME, new OLuceneSearchOnClassFunction());
register(OLuceneSearchMoreLikeThisFunction.NAME, new OLuceneSearchMoreLikeThisFunction());
}

public static void register(final String name, final Object function) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,20 @@ public static MemoryIndex getOrCreateMemoryIndex(OCommandContext ctx) {
return memoryIndex;
}

public static String doubleEscape(String s) {
StringBuilder sb = new StringBuilder();

for(int i = 0; i < s.length(); ++i) {
char c = s.charAt(i);
if(c == 92 || c == 43 || c == 45 || c == 33 || c == 40 || c == 41 || c == 58 || c == 94 || c == 91 || c == 93 || c == 34 || c == 123 || c == 125 || c == 126 || c == 42 || c == 63 || c == 124 || c == 38 || c == 47) {
sb.append('\\');
sb.append('\\');
}

sb.append(c);
}

return sb.toString();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,248 @@
package com.orientechnologies.lucene.functions;

import com.orientechnologies.common.log.OLogManager;
import com.orientechnologies.lucene.collections.OLuceneCompositeKey;
import com.orientechnologies.lucene.index.OLuceneFullTextIndex;
import com.orientechnologies.orient.core.command.OCommandContext;
import com.orientechnologies.orient.core.db.record.OIdentifiable;
import com.orientechnologies.orient.core.id.ORID;
import com.orientechnologies.orient.core.metadata.OMetadata;
import com.orientechnologies.orient.core.record.OElement;
import com.orientechnologies.orient.core.record.impl.ODocument;
import com.orientechnologies.orient.core.sql.executor.OResult;
import com.orientechnologies.orient.core.sql.functions.OIndexableSQLFunction;
import com.orientechnologies.orient.core.sql.functions.OSQLFunctionAbstract;
import com.orientechnologies.orient.core.sql.parser.*;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;

import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

import static com.orientechnologies.lucene.functions.OLuceneFunctionsUtils.getOrCreateMemoryIndex;

/**
* Created by frank on 15/01/2017.
*/
public class OLuceneSearchMoreLikeThisFunction extends OSQLFunctionAbstract implements OIndexableSQLFunction {

public static final String NAME = "search_more";

public OLuceneSearchMoreLikeThisFunction() {
super(NAME, 1, 2);
}

@Override
public String getName() {
return NAME;
}

@Override
public Object execute(Object iThis,
OIdentifiable iCurrentRecord,
Object iCurrentResult,
Object[] params,
OCommandContext ctx) {

OResult result = (OResult) iThis;

OElement element = result.toElement();

String className = element.getSchemaType().get().getName();

OLuceneFullTextIndex index = searchForIndex(ctx, className);

if (index == null)
return false;

String query = (String) params[0];

MemoryIndex memoryIndex = getOrCreateMemoryIndex(ctx);

List<Object> key = index.getDefinition().getFields()
.stream()
.map(s -> element.getProperty(s))
.collect(Collectors.toList());

try {
for (IndexableField field : index.buildDocument(key).getFields()) {
memoryIndex.addField(field, index.indexAnalyzer());
}

return memoryIndex.search(index.buildQuery(query)) > 0.0f;
} catch (ParseException e) {
OLogManager.instance().error(this, "error occurred while building query", e);

}
return null;

}

@Override
public String getSyntax() {
return "SEARCH_MORE( [rids], [ metdatada {} ] )";
}

@Override
public boolean filterResult() {
return true;
}

@Override
public Iterable<OIdentifiable> searchFromTarget(OFromClause target,
OBinaryCompareOperator operator,
Object rightValue,
OCommandContext ctx,
OExpression... args) {

OLuceneFullTextIndex index = searchForIndex(target, ctx);

IndexSearcher searcher = index.searcher();

OExpression expression = args[0];

List<ORID> rids = (List<ORID>) expression.execute((OIdentifiable) null, ctx);

List<String> ridsAsString = rids.stream()
.map(r -> r.toString())
.collect(Collectors.toList());

String queryOthers =
"RID:( " + QueryParser.escape(String.join(" ", ridsAsString)) + ")";

Set<OIdentifiable> oIdentifiables = index.get(queryOthers);

MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());

mlt.setAnalyzer(index.queryAnalyzer());
mlt.setFieldNames(index.getDefinition().getFields().toArray(new String[] {}));
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(1);

BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
oIdentifiables.stream()
.forEach(oi -> {

index.getDefinition().getFields()
.stream().forEach(fieldName -> {
try {
OElement element = oi.getRecord().load();
String property = element.getProperty(fieldName);
Query fieldQuery = mlt.like(fieldName, new StringReader(property));

queryBuilder.add(fieldQuery, BooleanClause.Occur.SHOULD);
} catch (IOException e) {
//Fixme: do something usefull
e.printStackTrace();
}

});

}

);

ridsAsString.stream()
.forEach(rid ->
{
Term rid1 = new Term("RID", QueryParser.escape(rid));
queryBuilder.add(new TermQuery(rid1), BooleanClause.Occur.MUST_NOT);
}

);

Query mltQuery = queryBuilder.build();

if (index != null) {

if (args.length == 2) {
ODocument metadata = new ODocument().fromJSON(args[2].toString());

//TODO handle metadata
System.out.println("metadata.toJSON() = " + metadata.toJSON());
Set<OIdentifiable> luceneResultSet = index.get(mltQuery.toString());
}

Set<OIdentifiable> luceneResultSet = index.get(new OLuceneCompositeKey(Arrays.asList(mltQuery.toString())).setContext(ctx));

return luceneResultSet;
}
return Collections.emptySet();

}

private OLuceneFullTextIndex searchForIndex(OFromClause target, OCommandContext ctx) {
OFromItem item = target.getItem();

String className = item.getIdentifier().getStringValue();

return searchForIndex(ctx, className);
}

private OLuceneFullTextIndex searchForIndex(OCommandContext ctx, String className) {
OMetadata dbMetadata = ctx.getDatabase().activateOnCurrentThread().getMetadata();

List<OLuceneFullTextIndex> indices = dbMetadata
.getSchema()
.getClass(className)
.getIndexes()
.stream()
.filter(idx -> idx instanceof OLuceneFullTextIndex)
.map(idx -> (OLuceneFullTextIndex) idx)
.collect(Collectors.toList());

if (indices.size() > 1) {
throw new IllegalArgumentException("too many full-text indices on given class: " + className);
}

return indices.size() == 0 ? null : indices.get(0);
}

@Override
public Object getResult() {
System.out.println("getResult");
return super.getResult();
}

@Override
public long estimate(OFromClause target, OBinaryCompareOperator operator, Object rightValue, OCommandContext ctx,
OExpression... args) {
OLuceneFullTextIndex index = searchForIndex(target, ctx);

if (index != null)
return index.getSize();
return 0;

}

@Override
public boolean canExecuteWithoutIndex(OFromClause target, OBinaryCompareOperator operator, Object rightValue, OCommandContext ctx,
OExpression... args) {
return allowsIndexedExecution(target, operator, rightValue, ctx, args);
}

@Override
public boolean allowsIndexedExecution(OFromClause target, OBinaryCompareOperator operator, Object rightValue, OCommandContext ctx,
OExpression... args) {

OLuceneFullTextIndex index = searchForIndex(target, ctx);

return index != null;
}

@Override
public boolean shouldExecuteAfterSearch(OFromClause target, OBinaryCompareOperator operator, Object rightValue,
OCommandContext ctx, OExpression... args) {
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ public Iterable<OIdentifiable> searchFromTarget(OFromClause target,

OExpression expression = args[0];
String query = (String) expression.execute((OIdentifiable) null, ctx);

if (index != null) {

if (args.length == 2) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -428,18 +428,15 @@ public boolean supportsOrderedIterations() {
}

@Override
public IndexSearcher searcher() throws IOException {
public IndexSearcher searcher() {
while (true) {
try {
return storage.callIndexEngine(false, false, indexId, new OIndexEngineCallback<IndexSearcher>() {
@Override
public IndexSearcher callEngine(OIndexEngine engine) {
OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
try {
return indexEngine.searcher();
} catch (IOException e) {
throw OException.wrapException(new OIndexException("Cannot get searcher from index " + getName()), e);
}
return storage.callIndexEngine(false, false, indexId, engine -> {
OLuceneIndexEngine indexEngine = (OLuceneIndexEngine) engine;
try {
return indexEngine.searcher();
} catch (IOException e) {
throw OException.wrapException(new OIndexException("Cannot get searcher from index " + getName()), e);
}
});
} catch (OInvalidIndexEngineIdException e) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package com.orientechnologies.lucene.functions;

import com.orientechnologies.lucene.test.BaseLuceneTest;
import com.orientechnologies.orient.core.id.ORecordId;
import com.orientechnologies.orient.core.sql.executor.OResultSet;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;

import java.io.InputStream;

import static org.assertj.core.api.Assertions.assertThat;

/**
* Created by frank on 15/01/2017.
*/
public class OLuceneSearchMoreLikeThisFunctionTest extends BaseLuceneTest {

@Before
public void setUp() throws Exception {
InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql");

db.execute("sql", getScriptFromStream(stream));

db.command("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE ");

}

@Test
public void shouldSearchMoreLikeThisWithRid() throws Exception {

// db.query("select from Song").stream().forEach(e-> System.out.println("e = " + e.toElement().toJSON()));

OResultSet resultSet = db
.query("SELECT from Song where SEARCH_More([#25:2, #25:3] ) = true");

assertThat(resultSet).hasSize(48);

resultSet.close();
}

@Test
@Ignore
public void shouldSearchMoreLikeThisWithInnerQuery() throws Exception {

// db.query("select from Song").stream().forEach(e-> System.out.println("e = " + e.toElement().toJSON()));

OResultSet resultSet = db
.query("SELECT from Song where SEARCH_More( (LET SELECT @RID FROM SONG WHERE AUTHOR = \"hunter\" ) ) = true");

assertThat(resultSet).hasSize(2);

resultSet.close();
}

}
Loading

0 comments on commit 6212d1f

Please sign in to comment.