Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added local file caching for better performance on many runs #1205

Open
wants to merge 10 commits into
base: stage
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ protected void index() {
List<GeneDiseaseAnnotationDocument> list = createGeneDiseaseAnnotationDocuments();
createDiseaseAnnotationsFromOrthology();

List<GeneDiseaseAnnotationDocument> viaOrthologyList = getGeneDiseaseAnnotationViaOrthologyDocuments();
List<GeneDiseaseAnnotationDocument> viaOrthologyList = createGeneDiseaseAnnotationViaOrthologyDocuments();
list.addAll(viaOrthologyList);
log.info("Indexing " + list.size() + " gene documents");
indexDocuments(list);
Expand All @@ -123,10 +123,6 @@ private void createDiseaseAnnotationsFromOrthology() {
geneViaOrthologyMap = geneService.getOrthologousGeneDiseaseAnnotations(generatedImplicatedGeneMap);
}

private List<GeneDiseaseAnnotationDocument> getGeneDiseaseAnnotationViaOrthologyDocuments() {
return createGeneDiseaseAnnotationViaOrthologyDocuments();
}

private List<GeneDiseaseAnnotationDocument> createGeneDiseaseAnnotationViaOrthologyDocuments() {
ProcessDisplayHelper ph = new ProcessDisplayHelper(10000);
ph.startProcess("Creating Gene Disease Annotations via Orthology", geneViaOrthologyMap.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
@Consumes({"application/json"})
public interface AGMDiseaseAnnotationInterface extends BaseIdCrudInterface<AGMDiseaseAnnotation> {

@Override
@POST
@Path("/findForPublic")
@JsonView({View.DiseaseAnnotationForPublic.class})
@JsonView({View.ForPublic.class})
SearchResponse<AGMDiseaseAnnotation> findForPublic(@DefaultValue("0") @QueryParam("page") Integer page, @DefaultValue("10") @QueryParam("limit") Integer limit, HashMap<String, Object> params);

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
@Consumes({"application/json"})
public interface AlleleDiseaseAnnotationInterface extends BaseIdCrudInterface<AlleleDiseaseAnnotation> {

@Override
@POST
@Path("/findForPublic")
@JsonView({View.DiseaseAnnotationForPublic.class})
@JsonView({View.ForPublic.class})
SearchResponse<AlleleDiseaseAnnotation> findForPublic(@DefaultValue("0") @QueryParam("page") Integer page, @DefaultValue("10") @QueryParam("limit") Integer limit, HashMap<String, Object> params);

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
@Consumes({"application/json"})
public interface GeneDiseaseAnnotationInterface extends BaseIdCrudInterface<GeneDiseaseAnnotation> {

@Override
@POST
@Path("/findForPublic")
@JsonView({View.DiseaseAnnotationForPublic.class})
@JsonView({View.ForPublic.class})
SearchResponse<GeneDiseaseAnnotation> findForPublic(@DefaultValue("0") @QueryParam("page") Integer page, @DefaultValue("10") @QueryParam("limit") Integer limit, HashMap<String, Object> params);

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.alliancegenome.indexer.indexers.curation.service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

Expand All @@ -21,9 +22,11 @@ public class AGMDiseaseAnnotationService extends BaseDiseaseAnnotationService {

public List<AGMDiseaseAnnotation> getFiltered() {

List<AGMDiseaseAnnotation> ret = readFromCache(cacheFileName, AGMDiseaseAnnotation.class);
if (ret.size() > 0) {
List<AGMDiseaseAnnotation> ret = readFromCache(cacheFileName, List.class);
if (ret != null && ret.size() > 0) {
return ret;
} else {
ret = new ArrayList<>();
}

ProcessDisplayHelper display = new ProcessDisplayHelper(10000);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.alliancegenome.indexer.indexers.curation.service;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

Expand All @@ -21,11 +22,14 @@ public class AlleleDiseaseAnnotationService extends BaseDiseaseAnnotationService

public List<AlleleDiseaseAnnotation> getFiltered() {

List<AlleleDiseaseAnnotation> ret = readFromCache(cacheFileName, AlleleDiseaseAnnotation.class);
if (ret.size() > 0) {
List<AlleleDiseaseAnnotation> ret = readFromCache(cacheFileName, List.class);

if (ret != null && ret.size() > 0) {
return ret;
} else {
ret = new ArrayList<>();
}

ProcessDisplayHelper display = new ProcessDisplayHelper(10000);

int batchSize = 1000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public BaseDiseaseAnnotationService() {
GeneRepository geneRepository = new GeneRepository();

String alleleIdsFileName = "allele_ids.gz";
List<String> alleleList = readFromCache(alleleIdsFileName, String.class);
List<String> alleleList = readFromCache(alleleIdsFileName, List.class);

if (CollectionUtils.isNotEmpty(alleleList)) {
allAlleleIds = new HashSet<>(alleleList);
Expand All @@ -44,7 +44,7 @@ public BaseDiseaseAnnotationService() {
}

String geneIdsFileName = "gene_ids.gz";
List<String> geneList = readFromCache(geneIdsFileName, String.class);
List<String> geneList = readFromCache(geneIdsFileName, List.class);

if (CollectionUtils.isNotEmpty(geneList)) {
allGeneIDs = new HashSet<>(geneList);
Expand All @@ -55,7 +55,7 @@ public BaseDiseaseAnnotationService() {
log.info("Number of all Gene IDs from Neo4j: " + allGeneIDs.size());

String modelIdsFileName = "model_ids.gz";
List<String> modelList = readFromCache(modelIdsFileName, String.class);
List<String> modelList = readFromCache(modelIdsFileName, List.class);

if (CollectionUtils.isNotEmpty(modelList)) {
allModelIDs = new HashSet<>(modelList);
Expand Down Expand Up @@ -107,14 +107,14 @@ protected boolean hasNoObsoletedOrInternalEntities(DiseaseAnnotation da) {
entitiesToBeValidated.addAll(da.getDiseaseGeneticModifiers());
}
AtomicBoolean hasNoObsoletedOrInternalEntities = new AtomicBoolean(true);
entitiesToBeValidated.forEach(auditedObject -> {
for (AuditedObject auditedObject: entitiesToBeValidated) {
if (auditedObject.getObsolete()) {
hasNoObsoletedOrInternalEntities.set(false);
}
if (auditedObject.getInternal()) {
hasNoObsoletedOrInternalEntities.set(false);
}
});
}
return hasNoObsoletedOrInternalEntities.get();
}

Expand Down Expand Up @@ -169,23 +169,23 @@ protected static boolean isValidEntity(HashSet<String> allEntityIds, String curi
return allEntityIds.contains(curie);
}

protected <E> List<E> readFromCache(String fileName, Class<E> clazz) {
protected <E> E readFromCache(String fileName, Class<E> clazz) {
try {
ObjectFileStorage<E> storage = new ObjectFileStorage<>();
File cache = new File(fileName);
if (cache.exists()) {
return storage.readObjectsFromFile(cache);
return storage.readObjectFromFile(cache);
}
} catch (Exception e) {
e.printStackTrace();
}
return new ArrayList<>();
return null;
}

protected <E> void writeToCache(String fileName, List<E> objects) {
protected <E> void writeToCache(String fileName, E object) {
try {
ObjectFileStorage<E> storage = new ObjectFileStorage<>();
storage.writeObjectsToFile(objects, fileName);
storage.writeObjectToFile(object, fileName);
} catch (Exception e) {
e.printStackTrace();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,13 @@ public class GeneDiseaseAnnotationService extends BaseDiseaseAnnotationService {

public List<GeneDiseaseAnnotation> getFiltered() {
ProcessDisplayHelper display = new ProcessDisplayHelper(2000);
List<GeneDiseaseAnnotation> ret = new ArrayList<>();
log.info("Gene IDs #: " + allGeneIDs);
log.info("AGM IDs #: " + allModelIDs);

List<GeneDiseaseAnnotation> ret = readFromCache(cacheFileName, List.class);
if (ret != null && ret.size() > 0) {
return ret;
} else {
ret = new ArrayList<>();
}

int batchSize = 1000;
int page = 0;
Expand Down Expand Up @@ -87,6 +91,18 @@ public List<GeneDiseaseAnnotation> getFiltered() {
public Map<Gene, List<DiseaseAnnotation>> getOrthologousGeneDiseaseAnnotations(Map<String, Pair<Gene, ArrayList<DiseaseAnnotation>>> geneMap) {
ProcessDisplayHelper display = new ProcessDisplayHelper(10000);

String orthoCacheFileName = "gene_disease_via_orthology_annotation.json.gz";

HashMap<Gene, List<DiseaseAnnotation>> newDAMap = readFromCache(orthoCacheFileName, HashMap.class);

if (newDAMap == null) {
newDAMap = new HashMap<>();
}

if (newDAMap != null && newDAMap.size() > 0) {
return newDAMap;
}

HashMap<String, Object> params = new HashMap<>();
params.put("internal", false);
params.put("obsolete", false);
Expand All @@ -98,7 +114,7 @@ public Map<Gene, List<DiseaseAnnotation>> getOrthologousGeneDiseaseAnnotations(M
// hard code MGI:6194238 with corresponding AGRKB ID
Reference allianceReference = referenceService.getReference("AGRKB:101000000828456");

Map<Gene, List<DiseaseAnnotation>> newDAMap = new HashMap<>();

display.startProcess("Creating Gene DA's via orthology", geneMap.size());
// loop over all Markers of validated DiseaseAnnotation records
Set<String> geneIDs = geneMap.keySet();
Expand All @@ -117,7 +133,9 @@ public Map<Gene, List<DiseaseAnnotation>> getOrthologousGeneDiseaseAnnotations(M
continue;
}
// create orthologous DAs for each focus DA
focusDiseaseAnnotations.forEach(focusDiseaseAnnotation -> {

for (DiseaseAnnotation focusDiseaseAnnotation: focusDiseaseAnnotations) {

DiseaseAnnotation gda = null;
if (focusDiseaseAnnotation instanceof AGMDiseaseAnnotation agmda) {
AGMDiseaseAnnotation da = new AGMDiseaseAnnotation();
Expand Down Expand Up @@ -150,11 +168,12 @@ public Map<Gene, List<DiseaseAnnotation>> getOrthologousGeneDiseaseAnnotations(M
gda.setDiseaseAnnotationObject(focusDiseaseAnnotation.getDiseaseAnnotationObject());
gda.setEvidenceCodes(List.of(ecoTermIEA));
gda.setDiseaseQualifiers(focusDiseaseAnnotation.getDiseaseQualifiers());

List<DiseaseAnnotation> geneAnnotations = newDAMap.computeIfAbsent(orthologousGene, k -> new ArrayList<>());
geneAnnotations.add(gda);
});
}
}
display.progressProcess(1L);
display.progressProcess();
}
display.finishProcess();
// consolidating DAs:
Expand All @@ -177,7 +196,10 @@ public Map<Gene, List<DiseaseAnnotation>> getOrthologousGeneDiseaseAnnotations(M

}));
});
System.out.println("Number of orthologous genes generating new DAs: " + newDAMap.size());
log.info("Number of orthologous genes generating new DAs: " + newDAMap.size());

writeToCache(orthoCacheFileName, newDAMap);

return newDAMap;
}
}
8 changes: 7 additions & 1 deletion agr_java_core/pom.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.alliancegenome</groupId>
Expand Down Expand Up @@ -133,6 +134,11 @@
<artifactId>commons-math3</artifactId>
<version>3.6.1</version>
</dependency>
<dependency>
<groupId>net.nilosplace</groupId>
<artifactId>process-display</artifactId>
<version>1.0.12</version>
</dependency>
<dependency>
<groupId>org.knowm.xchart</groupId>
<artifactId>xchart</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ protected void buildSharedSearchableDocumentMappings() throws IOException {
.standardBigrams()
.build();
new FieldBuilder(builder, "object.name", "text").keyword().sort().build();
new FieldBuilder(builder, "object.curie", "text").keyword().sort().build();
new FieldBuilder(builder, "subject.alleleSymbol.displayText", "text").keyword().sort().build();
new FieldBuilder(builder, "subject.geneSymbol.displayText", "text").keyword().sort().build();
new FieldBuilder(builder, "subject.name", "text").keyword().sort().build();
new FieldBuilder(builder, "subject.modEntityId", "text").keyword().sort().build();
new FieldBuilder(builder, "phenotypeStatements", "text")
.keyword()
.build();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@ public Result getAllGenes() {
query += " OPTIONAL MATCH p2=(g:Gene)-[:ANNOTATED_TO]-(so:SOTerm)";
query += " OPTIONAL MATCH p3=(g:Gene)-[:ALSO_KNOWN_AS]-(syn:Synonym)";
query += " OPTIONAL MATCH p4=(g:Gene)-[:CROSS_REFERENCE]-(cr:CrossReference)";
query += " RETURN g.primaryKey, g.modLocalId, collect(distinct replace(syn.name, ',', '')) as synonyms, collect(distinct cr.name) as crossrefs, g.name, g.symbol, g.geneSynopsis, g.automatedGeneSynopsis, s.primaryKey, gl.chromosome, gl.start, gl.end, gl.strand, so.name";
//query += " RETURN g.primaryKey, g.modLocalId, collect(distinct replace(syn.name, ',', '')) as synonyms, collect(distinct cr.name) as crossrefs, g.name, g.symbol, replace(g.geneSynopsis, '\n', ''), replace(g.automatedGeneSynopsis, '\n', ''), s.primaryKey, gl.chromosome, gl.start, gl.end, gl.strand, so.name";
query += " RETURN g.primaryKey, g.modLocalId, collect(distinct replace(syn.name, ',', '')) as synonyms, collect(distinct cr.name) as crossrefs, g.name, g.symbol, g.geneSynopsis, g.automatedGeneSynopsis, s.primaryKey, gl.chromosome, gl.start, gl.end, gl.strand, so.name";
//query += " RETURN g.primaryKey, g.modLocalId, collect(distinct replace(syn.name, ',', '')) as synonyms, collect(distinct cr.name) as crossrefs, g.name, g.symbol, replace(g.geneSynopsis, '\n', ''), replace(g.automatedGeneSynopsis, '\n', ''), s.primaryKey, gl.chromosome, gl.start, gl.end, gl.strand, so.name";

return queryForResult(query);

Expand Down
Loading
Loading