Skip to content

Commit

Permalink
fix #2798 Enhance IndexingHelper with additional JUnit tests
Browse files Browse the repository at this point in the history
  • Loading branch information
marevol committed Jan 15, 2024
1 parent b68736b commit 938f4a4
Show file tree
Hide file tree
Showing 2 changed files with 603 additions and 22 deletions.
92 changes: 70 additions & 22 deletions src/main/java/org/codelibs/fess/helper/IndexingHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.TotalHits;
import org.codelibs.fess.es.client.SearchEngineClient;
import org.codelibs.fess.mylasta.direction.FessConfig;
import org.codelibs.fess.thumbnail.ThumbnailManager;
import org.codelibs.fess.util.ComponentUtil;
import org.codelibs.fess.util.DocList;
import org.codelibs.fess.util.MemoryUtil;
import org.opensearch.action.admin.indices.refresh.RefreshResponse;
import org.opensearch.action.search.SearchResponse;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryBuilders;
Expand All @@ -52,19 +54,23 @@ public void sendDocuments(final SearchEngineClient searchEngineClient, final Doc
try {
if (fessConfig.isThumbnailCrawlerEnabled()) {
final ThumbnailManager thumbnailManager = ComponentUtil.getThumbnailManager();
final String thumbnailField = fessConfig.getIndexFieldThumbnail();
docList.stream().forEach(doc -> {
if (!thumbnailManager.offer(doc)) {
if (logger.isDebugEnabled()) {
logger.debug("Removing {}={} from doc[{}]", fessConfig.getIndexFieldThumbnail(),
doc.get(fessConfig.getIndexFieldThumbnail()), doc.get(fessConfig.getIndexFieldUrl()));
logger.debug("Removing {}={} from doc[{}]", thumbnailField, doc.get(thumbnailField),
doc.get(fessConfig.getIndexFieldUrl()));
}
doc.remove(fessConfig.getIndexFieldThumbnail());
doc.remove(thumbnailField);
}
});
}
final CrawlingConfigHelper crawlingConfigHelper = ComponentUtil.getCrawlingConfigHelper();
synchronized (searchEngineClient) {
deleteOldDocuments(searchEngineClient, docList);
final long deletedDocCount = deleteOldDocuments(searchEngineClient, docList);
if (logger.isDebugEnabled()) {
logger.debug("Deleted {} old docs", deletedDocCount);
}
searchEngineClient.addAll(fessConfig.getIndexDocumentUpdateIndex(), docList, (doc, builder) -> {
final String configId = (String) doc.get(fessConfig.getIndexFieldConfigId());
crawlingConfigHelper.getPipeline(configId).ifPresent(s -> builder.setPipeline(s));
Expand All @@ -85,7 +91,7 @@ public void sendDocuments(final SearchEngineClient searchEngineClient, final Doc
}
}

private void deleteOldDocuments(final SearchEngineClient searchEngineClient, final DocList docList) {
protected long deleteOldDocuments(final SearchEngineClient searchEngineClient, final DocList docList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();

final List<String> docIdList = new ArrayList<>();
Expand All @@ -108,7 +114,7 @@ private void deleteOldDocuments(final SearchEngineClient searchEngineClient, fin
new String[] { fessConfig.getIndexFieldId(), fessConfig.getIndexFieldDocId() });
for (final Map<String, Object> doc : docs) {
final Object oldIdValue = doc.get(fessConfig.getIndexFieldId());
if (!idValue.equals(oldIdValue) && oldIdValue != null) {
if (oldIdValue != null && !idValue.equals(oldIdValue)) {
final Object oldDocIdValue = doc.get(fessConfig.getIndexFieldDocId());
if (oldDocIdValue != null) {
docIdList.add(oldDocIdValue.toString());
Expand All @@ -120,9 +126,10 @@ private void deleteOldDocuments(final SearchEngineClient searchEngineClient, fin
}
}
if (!docIdList.isEmpty()) {
searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
return deleteDocumentByQuery(searchEngineClient, fessConfig.getIndexDocumentUpdateIndex(),
QueryBuilders.termsQuery(fessConfig.getIndexFieldDocId(), docIdList.stream().toArray(n -> new String[n])));
}
return 0L;
}

public boolean updateDocument(final SearchEngineClient searchEngineClient, final String id, final String field, final Object value) {
Expand All @@ -137,19 +144,23 @@ public boolean deleteDocument(final SearchEngineClient searchEngineClient, final

public long deleteDocumentByUrl(final SearchEngineClient searchEngineClient, final String url) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
return searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
return deleteDocumentByQuery(searchEngineClient, fessConfig.getIndexDocumentUpdateIndex(),
QueryBuilders.termQuery(fessConfig.getIndexFieldUrl(), url));
}

public long deleteDocumentsByDocId(final SearchEngineClient searchEngineClient, final List<String> docIdList) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
return searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
return deleteDocumentByQuery(searchEngineClient, fessConfig.getIndexDocumentUpdateIndex(),
QueryBuilders.termsQuery(fessConfig.getIndexFieldDocId(), docIdList.stream().toArray(n -> new String[n])));
}

public long deleteDocumentByQuery(final SearchEngineClient searchEngineClient, final QueryBuilder queryBuilder) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
return searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(), queryBuilder);
return deleteDocumentByQuery(searchEngineClient, fessConfig.getIndexDocumentUpdateIndex(), queryBuilder);
}

protected long deleteDocumentByQuery(final SearchEngineClient searchEngineClient, final String index, final QueryBuilder queryBuilder) {
return searchEngineClient.deleteByQuery(index, queryBuilder);
}

public Map<String, Object> getDocument(final SearchEngineClient searchEngineClient, final String id, final String[] fields) {
Expand All @@ -168,9 +179,9 @@ public List<Map<String, Object>> getDocumentListByPrefixId(final SearchEngineCli
return getDocumentListByQuery(searchEngineClient, queryBuilder, fields);
}

public void deleteChildDocument(final SearchEngineClient searchEngineClient, final String id) {
public long deleteChildDocument(final SearchEngineClient searchEngineClient, final String id) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
return searchEngineClient.deleteByQuery(fessConfig.getIndexDocumentUpdateIndex(),
QueryBuilders.termQuery(fessConfig.getIndexFieldParentId(), id));
}

Expand All @@ -185,9 +196,7 @@ protected List<Map<String, Object>> getDocumentListByQuery(final SearchEngineCli
final String[] fields) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();

final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
.setQuery(queryBuilder).setSize(0).execute().actionGet(fessConfig.getIndexSearchTimeout());
final long numFound = countResponse.getHits().getTotalHits().value;
final long numFound = getDocumentSizeByQuery(searchEngineClient, queryBuilder, fessConfig);
final long maxSearchDocSize = fessConfig.getIndexerMaxSearchDocSizeAsInteger().longValue();
final boolean exceeded = numFound > maxSearchDocSize;
if (exceeded) {
Expand Down Expand Up @@ -217,35 +226,74 @@ protected List<Map<String, Object>> getDocumentListByQuery(final SearchEngineCli
});
}

protected long getDocumentSizeByQuery(final SearchEngineClient searchEngineClient, final QueryBuilder queryBuilder,
final FessConfig fessConfig) {
final SearchResponse countResponse = searchEngineClient.prepareSearch(fessConfig.getIndexDocumentUpdateIndex())
.setQuery(queryBuilder).setSize(0).setTrackTotalHits(true).execute().actionGet(fessConfig.getIndexSearchTimeout());
final TotalHits totalHits = countResponse.getHits().getTotalHits();
if (totalHits != null) {
return totalHits.value;
}
return 0;
}

public long deleteBySessionId(final String sessionId) {
final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String index = fessConfig.getIndexDocumentUpdateIndex();
return deleteBySessionId(searchEngineClient, index, sessionId);
}

public long deleteBySessionId(final SearchEngineClient searchEngineClient, final String index, final String sessionId) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldSegment(), sessionId);
return deleteByQueryBuilder(index, queryBuilder);
return deleteByQueryBuilder(searchEngineClient, index, queryBuilder);
}

public long deleteByConfigId(final String configId) {
final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String index = fessConfig.getIndexDocumentUpdateIndex();
return deleteByConfigId(searchEngineClient, index, configId);
}

public long deleteByConfigId(final SearchEngineClient searchEngineClient, final String index, final String configId) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldConfigId(), configId);
return deleteByQueryBuilder(index, queryBuilder);
return deleteByQueryBuilder(searchEngineClient, index, queryBuilder);
}

public long deleteByVirtualHost(final String virtualHost) {
final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final String index = fessConfig.getIndexDocumentUpdateIndex();
return deleteByVirtualHost(searchEngineClient, index, virtualHost);
}

public long deleteByVirtualHost(final SearchEngineClient searchEngineClient, final String index, final String virtualHost) {
final FessConfig fessConfig = ComponentUtil.getFessConfig();
final QueryBuilder queryBuilder = QueryBuilders.termQuery(fessConfig.getIndexFieldVirtualHost(), virtualHost);
return deleteByQueryBuilder(index, queryBuilder);
return deleteByQueryBuilder(searchEngineClient, index, queryBuilder);
}

protected long deleteByQueryBuilder(final String index, final QueryBuilder queryBuilder) {
final SearchEngineClient searchEngineClient = ComponentUtil.getSearchEngineClient();
searchEngineClient.admin().indices().prepareRefresh(index).execute().actionGet();
protected long deleteByQueryBuilder(final SearchEngineClient searchEngineClient, final String index, final QueryBuilder queryBuilder) {
refreshIndex(searchEngineClient, index);
final long numOfDeleted = searchEngineClient.deleteByQuery(index, queryBuilder);
logger.info("Deleted {} old docs.", numOfDeleted);
if (logger.isDebugEnabled()) {
logger.debug("Deleted {} old docs.", numOfDeleted);
}
return numOfDeleted;
}

protected int refreshIndex(final SearchEngineClient searchEngineClient, final String index) {
final RefreshResponse response = searchEngineClient.admin().indices().prepareRefresh(index).execute().actionGet();
if (logger.isDebugEnabled()) {
logger.debug("[{}] refresh status: {} ({}/{}/{})", index, response.getStatus(), response.getTotalShards(),
response.getSuccessfulShards(), response.getFailedShards());
}
return response.getStatus().getStatus();
}

public long calculateDocumentSize(final Map<String, Object> dataMap) {
return MemoryUtil.sizeOf(dataMap);
}
Expand Down
Loading

0 comments on commit 938f4a4

Please sign in to comment.