Skip to content

Commit

Permalink
Merge pull request #16 from AndBible/update_lucene
Browse files Browse the repository at this point in the history
Update lucene to version 8.11.2
  • Loading branch information
tuomas2 authored Nov 14, 2024
2 parents 007c06f + b2989d4 commit 1445a75
Show file tree
Hide file tree
Showing 48 changed files with 386 additions and 1,786 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,5 @@ rebel.xml
/.gradle/
/build/
atlassian-ide-plugin.xml
.DS_Store
local.properties
12 changes: 6 additions & 6 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tasks.withType<Test>() {
}

group = "org.crosswire"
version = "2.3"
version = "2.4"

repositories {
mavenCentral()
Expand All @@ -25,13 +25,13 @@ repositories {
dependencies {
// implementation("org.jetbrains.kotlin:kotlin-stdlib")
implementation("org.apache.commons:commons-compress:1.12")
implementation("com.chenlb.mmseg4j:mmseg4j-analysis:1.8.6")
implementation("com.chenlb.mmseg4j:mmseg4j-dic:1.8.6")

implementation("org.jdom:jdom2:2.0.6.1")
implementation("org.apache.lucene:lucene-analyzers:3.6.2")
// To upgrade Lucene, change to
// implementation("org.apache.lucene:lucene-analyzers-common:x")
implementation("org.apache.lucene:lucene-analyzers-common:8.11.2")
implementation("org.apache.lucene:lucene-analyzers-smartcn:8.11.2")
implementation("org.apache.lucene:lucene-analyzers-kuromoji:8.11.2")

implementation("org.apache.lucene:lucene-queryparser:8.11.2")

//implementation("org.slf4j:slf4j-api:1.7.6")
implementation("org.slf4j:slf4j-api:1.7.6")
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/crosswire/common/util/CWClassLoader.java
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public final class CWClassLoader extends ClassLoader {
* @return the CrossWire Class Loader
*/
public static CWClassLoader instance(Class<?> resourceOwner) {
return AccessController.doPrivileged(new PrivilegedLoader<CWClassLoader>(resourceOwner));
return new CWClassLoader(resourceOwner);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.crosswire.jsword.book.sword.Backend;
import org.crosswire.jsword.book.sword.processing.NoOpRawTextProcessor;
import org.crosswire.jsword.book.sword.processing.RawTextToXmlProcessor;
import org.crosswire.jsword.index.IndexManagerFactory;
import org.crosswire.jsword.index.IndexStatus;
import org.crosswire.jsword.index.IndexStatusEvent;
import org.crosswire.jsword.index.IndexStatusListener;
Expand Down Expand Up @@ -186,6 +187,9 @@ public boolean match(String name) {
* @see org.crosswire.jsword.book.Book#getIndexStatus()
*/
public IndexStatus getIndexStatus() {
if (IndexManagerFactory.getIndexManager().needsReindexing(this)) {
return IndexStatus.INVALID;
}
return bmd.getIndexStatus();
}

Expand Down
1 change: 1 addition & 0 deletions src/main/java/org/crosswire/jsword/index/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ public interface Index {
* @throws BookException
*/
Key find(String query) throws BookException;
Key find(String query, boolean fullText) throws BookException;

/**
* An index must be able to create KeyLists for users in a similar way to
Expand Down
37 changes: 10 additions & 27 deletions src/main/java/org/crosswire/jsword/index/lucene/IndexMetadata.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.crosswire.jsword.index.lucene;

import java.io.IOException;
import java.util.Objects;

import org.crosswire.common.util.PropertyMap;
import org.crosswire.common.util.ResourceUtil;
Expand All @@ -41,17 +42,20 @@
public final class IndexMetadata {

/** latest version on top */
public static final float INDEX_VERSION_1_2 = 1.2f;
public static final float INDEX_VERSION_1_3 = 1.3f;

/**
* A prior version.
*
* @deprecated do not use
*/
@Deprecated
public static final float INDEX_VERSION_1_2 = 1.2f;

@Deprecated
public static final float INDEX_VERSION_1_1 = 1.1f;

public static final String LATEST_INDEX_VERSION = "Latest.Index.Version";
public static final String LUCENE_VERSION = "Lucene.Version";

public static final String PREFIX_LATEST_INDEX_VERSION_BOOK_OVERRIDE = "Latest.Index.Version.Book.";
/**
Expand All @@ -69,38 +73,20 @@ public static IndexMetadata instance() {
return myInstance;
}

/**
* default Installed IndexVersion
*
* @return the index version
* @deprecated see InstalledIndex.java
*/
@Deprecated
public float getInstalledIndexVersion() {
String value = props.get(INDEX_VERSION, "1.1"); // todo At some point
// default should be 1.2
return Float.parseFloat(value);
}

// Default Latest IndexVersion : Default version number of Latest indexing
// schema: PerBook index version must be equal or greater than this
public float getLatestIndexVersion() {
String value = props.get(LATEST_INDEX_VERSION, "1.2");
return Float.parseFloat(value);
}
public String getLatestIndexVersionStr() {
String value = props.get(LATEST_INDEX_VERSION, "1.2");
return value;
String value = props.get(LATEST_INDEX_VERSION);
return (value == null) ? InstalledIndex.DEFAULT_INSTALLED_INDEX_VERSION : Float.parseFloat(value);
}

public float getLatestIndexVersion(Book b) {
if (b == null) {
return getLatestIndexVersion();
}

String value = props.get(PREFIX_LATEST_INDEX_VERSION_BOOK_OVERRIDE + IndexMetadata.getBookIdentifierPropSuffix(b.getBookMetaData()),
props.get(LATEST_INDEX_VERSION));
return Float.parseFloat(value);
String value = props.get(PREFIX_LATEST_INDEX_VERSION_BOOK_OVERRIDE + IndexMetadata.getBookIdentifierPropSuffix(b.getBookMetaData()));
return (value == null) ? getLatestIndexVersion() : Float.parseFloat(value);
}

// used in property keys e.g. Installed.Index.Version.Book.ESV[1.0.1]
Expand All @@ -112,9 +98,6 @@ public static String getBookIdentifierPropSuffix(BookMetaData meta) {
return meta.getInitials() + moduleVer;
}

public float getLuceneVersion() {
return Float.parseFloat(props.get(LUCENE_VERSION));
}
private IndexMetadata() {
try {
props = ResourceUtil.getProperties(getClass());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public final class InstalledIndex {
public static final String PREFIX_INSTALLED_INDEX_VERSION_BOOK_OVERRIDE = "Installed.Index.Version.Book.";
// TODO(Sijo): change this value on lucene upgrade
/** The Index version for new indexes */
public static final float DEFAULT_INSTALLED_INDEX_VERSION = IndexMetadata.INDEX_VERSION_1_2;
public static final float DEFAULT_INSTALLED_INDEX_VERSION = IndexMetadata.INDEX_VERSION_1_3;

/**
* All access through this single instance.
Expand Down
98 changes: 61 additions & 37 deletions src/main/java/org/crosswire/jsword/index/lucene/LuceneIndex.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,21 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.crosswire.common.progress.JobManager;
import org.crosswire.common.progress.Progress;
import org.crosswire.common.util.FileUtil;
Expand All @@ -55,7 +59,7 @@
import org.crosswire.jsword.index.AbstractIndex;
import org.crosswire.jsword.index.IndexPolicy;
import org.crosswire.jsword.index.IndexStatus;
import org.crosswire.jsword.index.lucene.analysis.LuceneAnalyzer;
import org.crosswire.jsword.index.lucene.analysis.AnalyzerFactory;
import org.crosswire.jsword.index.search.SearchModifier;
import org.crosswire.jsword.passage.AbstractPassage;
import org.crosswire.jsword.passage.Key;
Expand Down Expand Up @@ -125,6 +129,11 @@ public class LuceneIndex extends AbstractIndex implements Closeable {
*/
public static final String FIELD_MORPHOLOGY = "morph";

/**
* Full text without tokenization.
*/
public static final String FIELD_FULL_TEXT = "full_text";

/**
* Combines the strong numbers with the morphology field
*/
Expand Down Expand Up @@ -206,17 +215,17 @@ public LuceneIndex(Book book, URI storage, IndexPolicy policy) throws BookExcept

try {
// When misconfigured, this can throw errors.
Analyzer analyzer = new LuceneAnalyzer(book);

Analyzer analyzer = AnalyzerFactory.getInstance().createAnalyzer(book);

book.setIndexStatus(IndexStatus.CREATING);

IndexWriter writer = null;
try {
// Write the core index to disk.
final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath()));
writer = new IndexWriter(destination, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
writer.setRAMBufferSizeMB(policy.getRAMBufferSize());
final Directory destination = FSDirectory.open(new File(tempPath.getCanonicalPath()).toPath());
IndexWriterConfig config = new IndexWriterConfig(analyzer);
config.setRAMBufferSizeMB(policy.getRAMBufferSize());
writer = new IndexWriter(destination, config);

generateSearchIndexImpl(job, errors, writer, book.getGlobalKeyList(), 0, policy);

Expand Down Expand Up @@ -269,17 +278,22 @@ public LuceneIndex(Book book, URI storage, IndexPolicy policy) throws BookExcept
*/
private void initDirectoryAndSearcher() {
try {
directory = FSDirectory.open(new File(path));
searcher = new IndexSearcher(directory, true);
directory = FSDirectory.open(new File(path).toPath());
reader = DirectoryReader.open(directory);
searcher = new IndexSearcher(reader);
} catch (IOException ex) {
log.warn("second load failure", ex);
}
}

public Key find(String search) throws BookException {
return find(search, false);
}

/* (non-Javadoc)
* @see org.crosswire.jsword.index.Index#find(java.lang.String)
*/
public Key find(String search) throws BookException {
public Key find(String search, boolean fullText) throws BookException {
String v11nName = book.getBookMetaData().getProperty("Versification").toString();
Versification v11n = Versifications.instance().getVersification(v11nName);

Expand All @@ -289,12 +303,12 @@ public Key find(String search) throws BookException {
if (search != null) {
Throwable theCause = null;
try {
Analyzer analyzer = new LuceneAnalyzer(book);
Analyzer analyzer = AnalyzerFactory.getInstance().createAnalyzer(book);

QueryParser parser = new QueryParser(Version.LUCENE_29, LuceneIndex.FIELD_BODY, analyzer);
QueryParser parser = new QueryParser(fullText ? LuceneIndex.FIELD_FULL_TEXT : LuceneIndex.FIELD_BODY, analyzer);
parser.setAllowLeadingWildcard(true);
Query query = parser.parse(search);
log.info("ParsedQuery- {}", query.toString());
log.info("ParsedQuery {} {}", query.getClass().toString(), query);

// For ranking we use a PassageTally
if (modifier != null && modifier.isRanked()) {
Expand All @@ -303,7 +317,7 @@ public Key find(String search) throws BookException {
tally.raiseNormalizeProtection();
results = tally;

TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), false);
TopScoreDocCollector collector = TopScoreDocCollector.create(modifier.getMaxResults(), modifier.getMaxResults());
searcher.search(query, collector);
tally.setTotal(collector.getTotalHits());
ScoreDoc[] hits = collector.topDocs().scoreDocs;
Expand Down Expand Up @@ -371,7 +385,7 @@ public Key getKey(String name) throws NoSuchKeyException {
* @see org.crosswire.jsword.index.Index#close()
*/
public final void close() {
IOUtil.close(searcher);
IOUtil.close(reader);
searcher = null;
IOUtil.close(directory);
directory = null;
Expand All @@ -395,23 +409,29 @@ private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter

String oldRootName = "";
int percent = 0;
String rootName = "";
BookData data = null;
Element osis = null;
String rootName;
BookData data;
Element osis;

// Set up for reuse.
Document doc = new Document();
Field keyField = new Field(FIELD_KEY, "", Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.NO);
Field bodyField = new Field(FIELD_BODY, "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
Field bodyStemField = new Field(FIELD_BODY_STEM, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
Field introField = new Field(FIELD_INTRO, "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
Field introStemField = new Field(FIELD_INTRO_STEM, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
Field strongField = new Field(FIELD_STRONG, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
Field xrefField = new Field(FIELD_XREF, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
Field noteField = new Field(FIELD_NOTE, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
Field headingField = new Field(FIELD_HEADING, "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO);
Field headingStemField = new Field(FIELD_HEADING_STEM, "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
Field morphologyField = new Field(FIELD_MORPHOLOGY , "", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO);
FieldType storedNotAnalyzed = new FieldType(StringField.TYPE_STORED);
storedNotAnalyzed.setOmitNorms(false);
FieldType strongFieldType = new FieldType(TextField.TYPE_NOT_STORED);
strongFieldType.setStoreTermVectors(true);
// For this change, see 9de01b56ebf252ffefe05e606e330a1787b94c9d:lucene/MIGRATE.txt
Field keyField = new Field(FIELD_KEY, "", storedNotAnalyzed);
Field bodyField = new TextField(FIELD_BODY, "", Field.Store.YES);
Field bodyStemField = new TextField(FIELD_BODY_STEM, "", Field.Store.NO);
Field introField = new TextField(FIELD_INTRO, "", Field.Store.YES);
Field introStemField = new TextField(FIELD_INTRO_STEM, "", Field.Store.NO);
Field strongField = new Field(FIELD_STRONG, "", strongFieldType);
Field xrefField = new TextField(FIELD_XREF, "", Field.Store.NO);
Field noteField = new TextField(FIELD_NOTE, "", Field.Store.NO);
Field headingField = new TextField(FIELD_HEADING, "", Field.Store.YES);
Field headingStemField = new TextField(FIELD_HEADING_STEM, "", Field.Store.NO);
Field morphologyField = new TextField(FIELD_MORPHOLOGY , "", Field.Store.NO);
Field fullText = new StringField(FIELD_FULL_TEXT, "", Field.Store.YES);

int size = key.getCardinality();
int subCount = count;
Expand All @@ -435,11 +455,11 @@ private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter
}

// Remove all fields from the document
doc.getFields().clear();
doc.clear();

// Do the actual indexing
// Always add the key
keyField.setValue(subkey.getOsisRef());
keyField.setStringValue(subkey.getOsisRef());
doc.add(keyField);

final String canonicalText = OSISUtil.getCanonicalText(osis);
Expand All @@ -450,6 +470,9 @@ private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter
addField(doc, bodyField, canonicalText);
addField(doc, bodyStemField, canonicalText);
}
//osis.getValue() differs from getCanonicalText in that special characters are not separated from words by whitespace.
//If regex search should be case sensitive, remove toLowerCase here.
addField(doc, fullText, osis.getValue().toLowerCase());

if (includeStrongs) {
addField(doc, strongField, OSISUtil.getStrongsNumbers(osis));
Expand Down Expand Up @@ -515,7 +538,7 @@ private void generateSearchIndexImpl(Progress job, List<Key> errors, IndexWriter
*/
private void addField(Document doc, Field field, String text) {
if (text != null && text.length() > 0) {
field.setValue(text);
field.setStringValue(text);
doc.add(field);
}
}
Expand All @@ -528,7 +551,7 @@ private void addField(Document doc, Field field, String text) {
* See {@link org.crosswire.jsword.index.IndexManager#closeAllIndexes()} for more information
* @return the searcher
*/
public Searcher getSearcher() {
public IndexSearcher getSearcher() {
return searcher;
}

Expand All @@ -547,10 +570,11 @@ public Searcher getSearcher() {
*/
private Directory directory;

private IndexReader reader;
/**
* The Lucene search engine
*/
private Searcher searcher;
private IndexSearcher searcher;

/**
* A synchronization lock point to prevent us from doing 2 index runs at a
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ public boolean needsReindexing(Book book) {
//should Clients use IndexStatus.INVALID
float installedV = InstalledIndex.instance().getInstalledIndexVersion(book);
if (installedV < IndexMetadata.instance().getLatestIndexVersion(book)) {
log.info("{}: needs reindexing, Installed index version @{}", book.getBookMetaData().getInitials(), Float.toString(installedV));
log.info("{}: needs reindexing, Installed index version @{}", book.getBookMetaData().getInitials(), installedV);
return true;
}
return false;
Expand Down
Loading

0 comments on commit 1445a75

Please sign in to comment.