Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cleanup index when opening a library #8962

Merged
merged 3 commits into from
Jul 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We fixed an issue where removing several groups deletes only one of them. [#8390](https://github.com/JabRef/jabref/issues/8390)
- We fixed an issue where the Sidepane (groups, web search and open office) width is not remembered after restarting JabRef. [#8907](https://github.com/JabRef/jabref/issues/8907)
- We fixed a bug where switching between themes will cause an error/exception. [#8939](https://github.com/JabRef/jabref/pull/8939)
- We fixed a bug where files that were deleted in the source bibtex file were kept in the index. [8962](https://github.com/JabRef/jabref/pull/8962)

### Removed

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/jabref/gui/LibraryTab.java
Original file line number Diff line number Diff line change
Expand Up @@ -857,7 +857,7 @@ private class IndexUpdateListener {
public IndexUpdateListener() {
if (preferencesService.getFilePreferences().shouldFulltextIndexLinkedFiles()) {
try {
indexingTaskManager.addToIndex(PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences()), bibDatabaseContext);
indexingTaskManager.updateIndex(PdfIndexer.of(bibDatabaseContext, preferencesService.getFilePreferences()), bibDatabaseContext);
} catch (IOException e) {
LOGGER.error("Cannot access lucene index", e);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ private void rebuildIndex() {
}
try {
currentLibraryTab.get().getIndexingTaskManager().createIndex(PdfIndexer.of(databaseContext, filePreferences));
currentLibraryTab.get().getIndexingTaskManager().addToIndex(PdfIndexer.of(databaseContext, filePreferences), databaseContext);
currentLibraryTab.get().getIndexingTaskManager().updateIndex(PdfIndexer.of(databaseContext, filePreferences), databaseContext);
} catch (IOException e) {
dialogService.notify(Localization.lang("Failed to access fulltext search index"));
LOGGER.error("Failed to access fulltext search index", e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.List;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ConcurrentLinkedQueue;

import org.jabref.gui.util.BackgroundTask;
Expand Down Expand Up @@ -88,12 +89,17 @@ public void createIndex(PdfIndexer indexer) {
enqueueTask(() -> indexer.createIndex());
}

public void addToIndex(PdfIndexer indexer, BibDatabaseContext databaseContext) {
public void updateIndex(PdfIndexer indexer, BibDatabaseContext databaseContext) {
Set<String> pathsToRemove = indexer.getListOfFilePaths();
for (BibEntry entry : databaseContext.getEntries()) {
for (LinkedFile file : entry.getFiles()) {
enqueueTask(() -> indexer.addToIndex(entry, file, databaseContext));
pathsToRemove.remove(file.getLink());
}
}
for (String pathToRemove : pathsToRemove) {
enqueueTask(() -> indexer.removeFromIndex(pathToRemove));
}
}

public void addToIndex(PdfIndexer indexer, BibEntry entry, BibDatabaseContext databaseContext) {
Expand All @@ -108,7 +114,7 @@ public void addToIndex(PdfIndexer indexer, BibEntry entry, List<LinkedFile> link

public void removeFromIndex(PdfIndexer indexer, BibEntry entry, List<LinkedFile> linkedFiles) {
for (LinkedFile file : linkedFiles) {
enqueueTask(() -> indexer.removeFromIndex(entry, file));
enqueueTask(() -> indexer.removeFromIndex(file.getLink()));
}
}

Expand Down
38 changes: 30 additions & 8 deletions src/main/java/org/jabref/logic/pdf/search/indexing/PdfIndexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.HashSet;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.TimeUnit;

import org.jabref.gui.LibraryTab;
Expand All @@ -25,6 +27,8 @@
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
Expand Down Expand Up @@ -110,19 +114,16 @@ public void addToIndex(BibEntry entry, LinkedFile linkedFile, BibDatabaseContext
}

/**
* Removes a pdf file linked to one entry in the database from the index
* Removes a pdf file identified by its path from the index
*
* @param entry the entry the file is linked to
* @param linkedFile the link to the file to be removed
* @param linkedFilePath the path to the file to be removed
*/
public void removeFromIndex(BibEntry entry, LinkedFile linkedFile) {
public void removeFromIndex(String linkedFilePath) {
try (IndexWriter indexWriter = new IndexWriter(
directoryToIndex,
new IndexWriterConfig(
new EnglishStemAnalyzer()).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
if (!entry.getFiles().isEmpty()) {
indexWriter.deleteDocuments(new Term(SearchFieldConstants.PATH, linkedFile.getLink()));
}
indexWriter.deleteDocuments(new Term(SearchFieldConstants.PATH, linkedFilePath));
indexWriter.commit();
} catch (IOException e) {
LOGGER.warn("Could not initialize the IndexWriter!", e);
Expand All @@ -145,7 +146,7 @@ public void removeFromIndex(BibEntry entry) {
*/
public void removeFromIndex(BibEntry entry, List<LinkedFile> linkedFiles) {
for (LinkedFile linkedFile : linkedFiles) {
removeFromIndex(entry, linkedFile);
removeFromIndex(linkedFile.getLink());
}
}

Expand Down Expand Up @@ -224,4 +225,25 @@ private void writeToIndex(BibEntry entry, LinkedFile linkedFile) {
LOGGER.warn("Could not add the document {} to the index!", linkedFile.getLink(), e);
}
}

/**
* Lists the paths of all the files that are stored in the index
*
* @return all file paths
*/
public Set<String> getListOfFilePaths() {
Set<String> paths = new HashSet<>();
try (IndexReader reader = DirectoryReader.open(directoryToIndex)) {
IndexSearcher searcher = new IndexSearcher(reader);
MatchAllDocsQuery query = new MatchAllDocsQuery();
TopDocs allDocs = searcher.search(query, Integer.MAX_VALUE);
for (ScoreDoc scoreDoc : allDocs.scoreDocs) {
Document doc = reader.document(scoreDoc.doc);
paths.add(doc.getField(SearchFieldConstants.PATH).stringValue());
}
} catch (IOException e) {
return paths;
}
return paths;
}
}