Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solr: Try Soft Commit on Indexing #10547

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
6a333e1
try soft commit
qqmyers May 8, 2024
fb322a9
Merge remote-tracking branch 'IQSS/develop' into solr-soft-commit-orig
qqmyers May 20, 2024
1e5b167
keep softcommit short to avoid delays in visibility
qqmyers May 13, 2024
40d85de
add test delay for autosoft, make hardcommit 30s like auto setting
qqmyers May 13, 2024
d25daa7
add 1-2 second delays in tests for softAutocomplete at 1s
qqmyers May 13, 2024
3b2657f
more sleeps
qqmyers May 17, 2024
2d2aacc
more delays
qqmyers May 17, 2024
d64e973
remove commented out deletes
qqmyers May 22, 2024
139c833
more commented out code to remove
qqmyers May 22, 2024
b0f8223
add 1 sec on failing tests
qqmyers May 22, 2024
317038a
add missing perm reindex
qqmyers May 22, 2024
51b89b6
change waiting
qqmyers May 22, 2024
94cb9b0
fix index object and add null check for unit test
qqmyers May 22, 2024
fcaa51e
remove test-specific null check
qqmyers May 22, 2024
d9ccfbd
reindex linking dv
qqmyers May 22, 2024
608444f
general solr release note
qqmyers May 23, 2024
afc5842
more fixes
qqmyers May 23, 2024
b69e7b2
revert change - was correct
qqmyers May 23, 2024
8ec281a
another sleepforsearch
qqmyers May 23, 2024
da768b4
test adding explicit reindexing
qqmyers May 23, 2024
1327cd9
avoid other uses of cache in test that looks for exact counts
qqmyers May 23, 2024
7f898d7
Adding longer max sleep, add count param to sleep method
qqmyers May 29, 2024
2ff37bb
Revert "add missing perm reindex"
qqmyers May 30, 2024
0579fed
Merge remote-tracking branch 'IQSS/develop' into solr-soft-commit-orig
qqmyers May 30, 2024
1c6fd45
Merge remote-tracking branch 'IQSS/develop' into solr-soft-commit-orig
qqmyers Jun 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions conf/solr/9.3.0/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@
have some sort of hard autoCommit to limit the log size.
-->
<autoCommit>
<maxTime>${solr.autoCommit.maxTime:15000}</maxTime>
<maxTime>${solr.autoCommit.maxTime:30000}</maxTime>
<openSearcher>false</openSearcher>
</autoCommit>

Expand All @@ -301,7 +301,7 @@
-->

<autoSoftCommit>
<maxTime>${solr.autoSoftCommit.maxTime:-1}</maxTime>
<maxTime>${solr.autoSoftCommit.maxTime:1000}</maxTime>
</autoSoftCommit>

<!-- Update Related Event Listeners
Expand Down
1 change: 1 addition & 0 deletions doc/release-notes/10547-solr-updates.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Multiple improvements have ben made to they way Solr indexing and searching is done. Response times should be significantly improved. See the individual PRs in this release for details.
Original file line number Diff line number Diff line change
Expand Up @@ -308,7 +308,7 @@ public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths)
String status;
try {
if (dataverse.getId() != null) {
solrClientService.getSolrClient().add(docs);
solrClientService.getSolrClient().add(docs, COMMIT_WITHIN);
} else {
logger.info("WARNING: indexing of a dataverse with no id attempted");
}
Expand All @@ -317,14 +317,6 @@ public Future<String> indexDataverse(Dataverse dataverse, boolean processPaths)
logger.info(status);
return new AsyncResult<>(status);
}
try {
solrClientService.getSolrClient().commit();
} catch (SolrServerException | IOException ex) {
status = ex.toString();
logger.info(status);
return new AsyncResult<>(status);
}

dvObjectService.updateContentIndexTime(dataverse);
IndexResponse indexResponse = solrIndexService.indexPermissionsForOneDvObject(dataverse);
String msg = "indexed dataverse " + dataverse.getId() + ":" + dataverse.getAlias() + ". Response from permission indexing: " + indexResponse.getMessage();
Expand All @@ -349,6 +341,7 @@ public void indexDatasetInNewTransaction(Long datasetId) { //Dataset dataset) {
private static final Map<Long, Boolean> INDEXING_NOW = new ConcurrentHashMap<>();
// semaphore for async indexing
private static final Semaphore ASYNC_INDEX_SEMAPHORE = new Semaphore(JvmSettings.MAX_ASYNC_INDEXES.lookupOptional(Integer.class).orElse(4), true);
static final int COMMIT_WITHIN = 30000; //Same as current autoHardIndex time

@Inject
@Metric(name = "index_permit_wait_time", absolute = true, unit = MetricUnits.NANOSECONDS,
Expand Down Expand Up @@ -1525,8 +1518,7 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset, Set<Long> d
final SolrInputDocuments docs = toSolrDocs(indexableDataset, datafilesInDraftVersion);

try {
solrClientService.getSolrClient().add(docs.getDocuments());
solrClientService.getSolrClient().commit();
solrClientService.getSolrClient().add(docs.getDocuments(), COMMIT_WITHIN);
} catch (SolrServerException | IOException ex) {
if (ex.getCause() instanceof SolrServerException) {
throw new SolrServerException(ex);
Expand Down Expand Up @@ -1778,8 +1770,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc

sid.removeField(SearchFields.SUBTREE);
sid.addField(SearchFields.SUBTREE, paths);
UpdateResponse addResponse = solrClientService.getSolrClient().add(sid);
UpdateResponse commitResponse = solrClientService.getSolrClient().commit();
UpdateResponse addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN);
if (object.isInstanceofDataset()) {
for (DataFile df : dataset.getFiles()) {
solrQuery.setQuery(SearchUtil.constructQuery(SearchFields.ENTITY_ID, df.getId().toString()));
Expand All @@ -1792,8 +1783,7 @@ private void updatePathForExistingSolrDocs(DvObject object) throws SolrServerExc
}
sid.removeField(SearchFields.SUBTREE);
sid.addField(SearchFields.SUBTREE, paths);
addResponse = solrClientService.getSolrClient().add(sid);
commitResponse = solrClientService.getSolrClient().commit();
addResponse = solrClientService.getSolrClient().add(sid, COMMIT_WITHIN);
}
}
}
Expand Down Expand Up @@ -1835,12 +1825,7 @@ public String delete(Dataverse doomed) {
logger.fine("deleting Solr document for dataverse " + doomed.getId());
UpdateResponse updateResponse;
try {
updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId());
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrClientService.getSolrClient().commit();
updateResponse = solrClientService.getSolrClient().deleteById(solrDocIdentifierDataverse + doomed.getId(), COMMIT_WITHIN);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Expand All @@ -1860,12 +1845,7 @@ public String removeSolrDocFromIndex(String doomed) {
logger.fine("deleting Solr document: " + doomed);
UpdateResponse updateResponse;
try {
updateResponse = solrClientService.getSolrClient().deleteById(doomed);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
try {
solrClientService.getSolrClient().commit();
updateResponse = solrClientService.getSolrClient().deleteById(doomed, COMMIT_WITHIN);
} catch (SolrServerException | IOException ex) {
return ex.toString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,8 +356,7 @@ private void persistToSolr(Collection<SolrInputDocument> docs) throws SolrServer
/**
* @todo Do something with these responses from Solr.
*/
UpdateResponse addResponse = solrClientService.getSolrClient().add(docs);
UpdateResponse commitResponse = solrClientService.getSolrClient().commit();
UpdateResponse addResponse = solrClientService.getSolrClient().add(docs, IndexServiceBean.COMMIT_WITHIN);
}

public IndexResponse indexPermissionsOnSelfAndChildren(long definitionPointId) {
Expand Down Expand Up @@ -497,26 +496,20 @@ public IndexResponse deleteMultipleSolrIds(List<String> solrIdsToDelete) {
return new IndexResponse("nothing to delete");
}
try {
solrClientService.getSolrClient().deleteById(solrIdsToDelete);
solrClientService.getSolrClient().deleteById(solrIdsToDelete, IndexServiceBean.COMMIT_WITHIN);
} catch (SolrServerException | IOException ex) {
/**
* @todo mark these for re-deletion
*/
return new IndexResponse("problem deleting the following documents from Solr: " + solrIdsToDelete);
}
try {
solrClientService.getSolrClient().commit();
} catch (SolrServerException | IOException ex) {
return new IndexResponse("problem committing deletion of the following documents from Solr: " + solrIdsToDelete);
}
return new IndexResponse("no known problem deleting the following documents from Solr:" + solrIdsToDelete);
}

public JsonObjectBuilder deleteAllFromSolrAndResetIndexTimes() throws SolrServerException, IOException {
JsonObjectBuilder response = Json.createObjectBuilder();
logger.info("attempting to delete all Solr documents before a complete re-index");
solrClientService.getSolrClient().deleteByQuery("*:*");
solrClientService.getSolrClient().commit();
solrClientService.getSolrClient().deleteByQuery("*:*", IndexServiceBean.COMMIT_WITHIN);
int numRowsAffected = dvObjectService.clearAllIndexTimes();
response.add(numRowsClearedByClearAllIndexTimes, numRowsAffected);
response.add(messageString, "Solr index and database index timestamps cleared.");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ public void testMoveDataverse() {
while (checkIndex) {
try {
try {
Thread.sleep(4000);
Thread.sleep(6000);
} catch (InterruptedException ex) {
}
Response search = UtilIT.search("id:dataverse_" + dataverseId + "&subtree=" + dataverseAlias2, apiToken);
Expand Down
6 changes: 6 additions & 0 deletions src/test/java/edu/harvard/iq/dataverse/api/LinkIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import static jakarta.ws.rs.core.Response.Status.FORBIDDEN;
import static jakarta.ws.rs.core.Response.Status.OK;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.junit.jupiter.api.Assertions.assertTrue;

import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -163,6 +165,8 @@ public void testDeepLinks() {
.statusCode(OK.getStatusCode())
.body("data.message", equalTo("Dataverse " + level1a + " linked successfully to " + level1b));

assertTrue(UtilIT.sleepForSearch("*", apiToken, "&subtree="+level1b, 1, UtilIT.GENERAL_LONG_DURATION), "Zero counts in level1b");

Response searchLevel1toLevel1 = UtilIT.search("*", apiToken, "&subtree=" + level1b);
searchLevel1toLevel1.prettyPrint();
searchLevel1toLevel1.then().assertThat()
Expand All @@ -184,6 +188,8 @@ public void testDeepLinks() {
.statusCode(OK.getStatusCode())
.body("data.message", equalTo("Dataverse " + level2a + " linked successfully to " + level2b));

assertTrue(UtilIT.sleepForSearch("*", apiToken, "&subtree=" + level2b, 1, UtilIT.GENERAL_LONG_DURATION), "Never found linked dataverse: " + level2b);

Response searchLevel2toLevel2 = UtilIT.search("*", apiToken, "&subtree=" + level2b);
searchLevel2toLevel2.prettyPrint();
searchLevel2toLevel2.then().assertThat()
Expand Down
37 changes: 12 additions & 25 deletions src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ public void testSearchPermisions() throws InterruptedException {
assertEquals(200, grantUser2AccessOnDataset.getStatusCode());

String searchPart = "id:dataset_" + datasetId1 + "_draft";
assertTrue(UtilIT.sleepForSearch(searchPart, apiToken2, "", UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart);
assertTrue(UtilIT.sleepForSearch(searchPart, apiToken2, "", 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart);

Response shouldBeVisibleToUser2 = UtilIT.search("id:dataset_" + datasetId1 + "_draft", apiToken2);
shouldBeVisibleToUser2.prettyPrint();
Expand Down Expand Up @@ -793,14 +793,9 @@ public void testNestedSubtree() {
Response createDataverseResponse2 = UtilIT.createSubDataverse("subDV" + UtilIT.getRandomIdentifier(), null, apiToken, dataverseAlias);
createDataverseResponse2.prettyPrint();
String dataverseAlias2 = UtilIT.getAliasFromResponse(createDataverseResponse2);

String searchPart = "*";

Response searchUnpublishedSubtree = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias);
searchUnpublishedSubtree.prettyPrint();
searchUnpublishedSubtree.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.total_count", CoreMatchers.equalTo(1));
assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias, 1, UtilIT.GENERAL_LONG_DURATION), "Missing subDV");

Response searchUnpublishedSubtree2 = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias2);
searchUnpublishedSubtree2.prettyPrint();
Expand Down Expand Up @@ -863,18 +858,8 @@ public void testNestedSubtree() {
publishDataset.then().assertThat()
.statusCode(OK.getStatusCode());
UtilIT.sleepForReindex(datasetPid, apiToken, 5);
Response searchPublishedSubtreeWDS = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias);
searchPublishedSubtreeWDS.prettyPrint();
searchPublishedSubtreeWDS.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.total_count", CoreMatchers.equalTo(2));

Response searchPublishedSubtreeWDS2 = UtilIT.search(searchPart, apiToken, "&subtree="+dataverseAlias2);
searchPublishedSubtreeWDS2.prettyPrint();
searchPublishedSubtreeWDS2.then().assertThat()
.statusCode(OK.getStatusCode())
.body("data.total_count", CoreMatchers.equalTo(1));

assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias, 2, UtilIT.GENERAL_LONG_DURATION), "Did not find 2 children");
assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree=" + dataverseAlias2, 1, UtilIT.GENERAL_LONG_DURATION), "Did not find 1 child");
}

//If this test fails it'll fail inconsistently as it tests underlying async role code
Expand Down Expand Up @@ -906,16 +891,16 @@ public void testCuratorCardDataversePopulation() throws InterruptedException {
String subDataverseAlias = "dv" + UtilIT.getRandomIdentifier();
Response createSubDataverseResponse = UtilIT.createSubDataverse(subDataverseAlias, null, apiTokenSuper, parentDataverseAlias);
createSubDataverseResponse.prettyPrint();
//UtilIT.getAliasFromResponse(createSubDataverseResponse);


Response grantRoleOnDataverseResponse = UtilIT.grantRoleOnDataverse(subDataverseAlias, "curator", "@" + username, apiTokenSuper);
grantRoleOnDataverseResponse.then().assertThat()
.statusCode(OK.getStatusCode());

String searchPart = "*";

assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree="+parentDataverseAlias, 1, UtilIT.GENERAL_LONG_DURATION), "Failed test if search exceeds max duration " + searchPart);

Response searchPublishedSubtreeSuper = UtilIT.search(searchPart, apiTokenSuper, "&subtree="+parentDataverseAlias);
assertTrue(UtilIT.sleepForSearch(searchPart, apiToken, "&subtree="+parentDataverseAlias, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Failed test if search exceeds max duration " + searchPart);
searchPublishedSubtreeSuper.prettyPrint();
searchPublishedSubtreeSuper.then().assertThat()
.statusCode(OK.getStatusCode())
Expand Down Expand Up @@ -968,7 +953,7 @@ public void testSubtreePermissions() {
.statusCode(OK.getStatusCode());

// Wait a little while for the index to pick up the datasets, otherwise timing issue with searching for it.
UtilIT.sleepForReindex(datasetId2.toString(), apiToken, 2);
UtilIT.sleepForReindex(datasetId2.toString(), apiToken, 3);

String identifier = JsonPath.from(datasetAsJson.getBody().asString()).getString("data.identifier");
String identifier2 = JsonPath.from(datasetAsJson2.getBody().asString()).getString("data.identifier");
Expand Down Expand Up @@ -1077,6 +1062,8 @@ public void testSubtreePermissions() {
.statusCode(OK.getStatusCode())
.body("data.total_count", CoreMatchers.equalTo(2));

assertTrue(UtilIT.sleepForSearch(searchPart, null, "&subtree=" + dataverseAlias2, 1, UtilIT.MAXIMUM_INGEST_LOCK_DURATION), "Missing dataset w/no apiKey");

Response searchPublishedSubtreesNoAPI = UtilIT.search(searchPart, null, "&subtree="+dataverseAlias+"&subtree="+dataverseAlias2);
searchPublishedSubtreesNoAPI.prettyPrint();
searchPublishedSubtreesNoAPI.then().assertThat()
Expand Down
19 changes: 16 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class UtilIT {
private static final String EMPTY_STRING = "";
public static final int MAXIMUM_INGEST_LOCK_DURATION = 15;
public static final int MAXIMUM_PUBLISH_LOCK_DURATION = 20;
public static final int GENERAL_LONG_DURATION = 45; //Useful when multiple adds/publishes, etc/ all get done in sequence
public static final int MAXIMUM_IMPORT_DURATION = 1;

private static SwordConfigurationImpl swordConfiguration = new SwordConfigurationImpl();
Expand Down Expand Up @@ -2844,6 +2845,13 @@ static boolean sleepForReindex(String idOrPersistentId, String apiToken, int dur
i = repeats + 1;
}
} while ((i <= repeats) && stale);
try {
Thread.sleep(1000); //Current autoSoftIndexTime - which adds a delay to when the new docs are visible
i++;
} catch (InterruptedException ex) {
Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex);
i = repeats + 1;
}
System.out.println("Waited " + (i * (sleepStep / 1000.0)) + " seconds");
return i <= repeats;

Expand Down Expand Up @@ -2899,10 +2907,15 @@ static Boolean sleepForDeadlock(int duration) {

//Helper function that returns true if a given search returns a non-zero response within a fixed time limit
// a given duration returns false if still zero results after given duration
static Boolean sleepForSearch(String searchPart, String apiToken, String subTree, int duration) {
static Boolean sleepForSearch(String searchPart, String apiToken, String subTree, int count, int duration) {


Response searchResponse = UtilIT.search(searchPart, apiToken, subTree);
//Leave early if search isn't working
if(searchResponse.statusCode()!=200) {
logger.warning("Non-200 status in sleepForSearch: " + searchResponse.statusCode());
return false;
}
int i = 0;
do {
try {
Expand All @@ -2915,8 +2928,8 @@ static Boolean sleepForSearch(String searchPart, String apiToken, String subTre
} catch (InterruptedException ex) {
Logger.getLogger(UtilIT.class.getName()).log(Level.SEVERE, null, ex);
}
} while (UtilIT.getSearchCountFromResponse(searchResponse) == 0);

} while (UtilIT.getSearchCountFromResponse(searchResponse) != count);
logger.info("Waited " + i + " seconds in sleepForSearch");
return i <= duration;

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@

import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.RoleAssignment;
import edu.harvard.iq.dataverse.authorization.DataverseRole;
import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
Expand All @@ -11,9 +13,14 @@
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.privateurl.PrivateUrl;
import edu.harvard.iq.dataverse.privateurl.PrivateUrlServiceBean;
import edu.harvard.iq.dataverse.search.IndexResponse;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrIndexServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;

import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;

Expand Down Expand Up @@ -89,6 +96,16 @@ public String getDataverseSiteUrl() {
};

}

@Override
public SolrIndexServiceBean solrIndex() {
return new SolrIndexServiceBean(){
@Override
public IndexResponse indexPermissionsOnSelfAndChildren(DvObject definitionPoint) {
return null;
}
};
}

}
);
Expand Down
Loading
Loading