Skip to content

Commit

Permalink
Fixed a bug where not all arguments would have their preferred name set.
Browse files Browse the repository at this point in the history
The issue was that the EventPostProcessingService built a Set<Argument>
of all event arguments and gave each member of this set its preferred
name. However, each event has its very own set of Argument instances.
Assume that two events share one argument with name 'AHR'. Then, both
events will have their own Argument object representing 'AHR', a and a'.
When a and a' are put into a set, only one of both will actually
contained in the set eventually. Thus, only a or a' gets its preferred
name, the other wouldn't.
  • Loading branch information
khituras committed Jan 22, 2018
1 parent 566c0d0 commit 28ae7ea
Show file tree
Hide file tree
Showing 8 changed files with 106 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,11 @@ public enum ComparisonMode {
private String text;
private ComparisonMode comparisonMode = ComparisonMode.TOP_HOMOLOGY;

public Argument(String geneId, String conceptId, String topHomologyId, String preferredName, String text) {
public Argument(String geneId, String conceptId, String topHomologyId, String text) {
super();
this.geneId = geneId;
this.conceptId = conceptId;
this.topHomologyId = topHomologyId;
this.preferredName = preferredName;
this.text = text;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package de.julielab.gepi.core.retrieval.services;

import static java.util.stream.Collectors.toSet;
import static org.neo4j.driver.v1.Values.parameters;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
Expand All @@ -19,16 +19,19 @@
import org.neo4j.driver.v1.StatementResult;
import org.neo4j.driver.v1.Transaction;
import org.neo4j.driver.v1.TransactionWork;
import org.neo4j.driver.v1.Value;
import org.slf4j.Logger;

import com.google.common.collect.Sets;

import de.julielab.gepi.core.retrieval.data.Argument;
import de.julielab.gepi.core.retrieval.data.Event;

public class EventPostProcessingService implements IEventPostProcessingService {

private Logger log;

private String BASE_NEO4J_URL = "bolt://dawkins:7687";
private String BASE_NEO4J_URL = "bolt://darwin:7687";

public EventPostProcessingService(Logger log) {
this.log = log;
Expand All @@ -42,32 +45,18 @@ public EventPostProcessingService(Logger log) {
@Log
@Override
public List<Event> setPreferredNameFromGeneId(List<Event> ev) {

log.trace("Number of events for post processing: {}", ev.size());
// the following hashmap maps gene ids as they appear in the previous hashmaps
// to their respective preferred name as it is written in the neo4j database
Map<String, String> geneIdPrefNameMap = new HashMap<>();

Set<Argument> args = new HashSet<Argument>();
Set<String> conceptIds = new HashSet<String>();
for (Event e : ev)
for (Argument a : e.getArguments()) {
args.add(a);
if (a.getConceptId() != null)
conceptIds.add(a.getConceptId());
else {
log.warn("Event {} has a null geneId, check why!", e);
}
}

// get preferred names from neo4j database
geneIdPrefNameMap = getGeneIdPrefNameMap(geneIdPrefNameMap, conceptIds);
Map<String, String> geneIdPrefNameMap = getGeneIdPrefNameMap(ev.stream().flatMap(e -> e.getArguments().stream().map(Argument::getConceptId)).collect(toSet()));

for (Argument a : args) {
ev.stream().flatMap(e -> e.getArguments().stream()).forEach(a -> {
String preferredName = geneIdPrefNameMap.get(a.getConceptId());
if (preferredName != null)
assert preferredName != null : "Could not find the preferred name for the concept ID " + a.getConceptId();
a.setPreferredName(preferredName);
else
a.setPreferredName("<unknown>");
}
});

return ev;
}
Expand All @@ -80,7 +69,9 @@ public List<Event> setPreferredNameFromGeneId(List<Event> ev) {
* @param geneIdPrefNameMap
* @param conceptIds
*/
private Map<String, String> getGeneIdPrefNameMap(Map<String, String> geneIdPrefNameMap, Set<String> conceptIds) {
private Map<String, String> getGeneIdPrefNameMap(Set<String> conceptIds) {

Map<String, String> geneIdPrefNameMap = new HashMap<>();

Config neo4jconf = Config.build().withoutEncryption().toConfig();
Driver driver = GraphDatabase.driver(this.BASE_NEO4J_URL, AuthTokens.basic("neo4j", "julielab"), neo4jconf);
Expand All @@ -91,27 +82,32 @@ private Map<String, String> getGeneIdPrefNameMap(Map<String, String> geneIdPrefN
@Override
public Map<String, String> execute(Transaction tx) {
Record record;
String[] searchInput = new String[conceptIds.size()];
searchInput = conceptIds.toArray(new String[conceptIds.size()]);

StatementResult result = tx.run("MATCH (t:ID_MAP_NCBI_GENES) where t.id IN {entrezIds} "
+ "WITH t "
+ "OPTIONAL MATCH (t:ID_MAP_NCBI_GENES)-[:HAS_ELEMENT*2]-(n:AGGREGATE_TOP_HOMOLOGY) "
+ "WHERE t.id IN {entrezIds} " + "return DISTINCT t.id AS ENTREZ_ID, "
+ "COALESCE(n.preferredName, t.preferredName) AS PNAME",
parameters("entrezIds", searchInput));

String statementTemplate = "MATCH (t:ID_MAP_NCBI_GENES) where t.id IN {entrezIds} " + "WITH t "
+ "OPTIONAL MATCH (t)-[:HAS_ELEMENT*2]-(n:AGGREGATE_TOP_HOMOLOGY) "
+ "return DISTINCT t.id AS ENTREZ_ID, "
+ "COALESCE(n.preferredName, t.preferredName) AS PNAME";
Value parameters = parameters("entrezIds", conceptIds);
log.trace("Cypher query to obtain preferred names: {} with parameters {}", statementTemplate, parameters);
StatementResult result = tx.run(statementTemplate, parameters);
int numReceived = 0;
while (result.hasNext()) {
record = result.next();

geneIdPrefNameMap.put(record.get("ENTREZ_ID").toString().replaceAll("\"", ""),
record.get("PNAME").toString().replaceAll("\"", ""));
++numReceived;
}
log.trace("Received {} concept ID - preferred name mapping", numReceived);
return geneIdPrefNameMap;
}
});
}

geneIdPrefNameMap.entrySet().stream().map(Entry::toString).forEach(log::trace);
assert conceptIds.size() == geneIdPrefNameMap.size() : conceptIds.size() + " concept IDs were given but only for " + geneIdPrefNameMap.size() + ", their preferred name was fetched. Missing concept IDs: " + Sets.difference(conceptIds, geneIdPrefNameMap.keySet());

if (log.isTraceEnabled())
geneIdPrefNameMap.entrySet().stream().map(Entry::toString).forEach(log::trace);

return geneIdPrefNameMap;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_ARG_CONCEPT_IDS;
import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_ARG_GENE_IDS;
import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_ARG_PREFERRED_NAME;
import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_ARG_TEXT;
import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_ARG_TOP_HOMOLOGY_IDS;
import static de.julielab.gepi.core.retrieval.services.EventRetrievalService.FIELD_EVENT_LIKELIHOOD;
Expand All @@ -15,6 +14,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Stream;

Expand All @@ -24,15 +24,15 @@

import de.julielab.elastic.query.components.data.ISearchServerDocument;
import de.julielab.elastic.query.services.ISearchServerResponse;
import de.julielab.gepi.core.retrieval.data.Argument;
import de.julielab.gepi.core.retrieval.data.Event;
import de.julielab.gepi.core.retrieval.data.EventRetrievalResult;
import de.julielab.gepi.core.retrieval.data.Argument;

public class EventResponseProcessingService implements IEventResponseProcessingService {

@Inject
private IEventPostProcessingService eventPPService;

private Logger log;

public EventResponseProcessingService(Logger log) {
Expand All @@ -54,6 +54,7 @@ public EventRetrievalResult getEventRetrievalResult(ISearchServerResponse respon
Stream<Event> eventStream = resultDocuments2Events(getEventDocuments(response));
EventRetrievalResult eventRetrievalResult = new EventRetrievalResult();
eventRetrievalResult.setEvents(eventStream);
log.trace("Size of the event retrieval result (number of events): {}", eventRetrievalResult.getEventList().size());
// postprocess eventPreferred names first with given neo4j information
eventPPService.setPreferredNameFromGeneId(eventRetrievalResult.getEventList());
return eventRetrievalResult;
Expand All @@ -68,8 +69,6 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
List<Object> topHomologyIds = eventDocument.getFieldValues(FIELD_EVENT_ARG_TOP_HOMOLOGY_IDS)
.orElse(Collections.emptyList());
List<Object> texts = eventDocument.getFieldValues(FIELD_EVENT_ARG_TEXT).orElse(Collections.emptyList());
List<Object> preferredNames = eventDocument.getFieldValues(FIELD_EVENT_ARG_PREFERRED_NAME)
.orElse(Collections.emptyList());
Optional<String> mainEventType = eventDocument.get(FIELD_EVENT_MAINEVENTTYPE);
Optional<Integer> likelihood = eventDocument.get(FIELD_EVENT_LIKELIHOOD);
Optional<String> sentence = eventDocument.get(FIELD_EVENT_SENTENCE);
Expand All @@ -86,9 +85,22 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
String geneId = i < geneIds.size() ? (String) geneIds.get(i) : null;
String topHomologyId = i < topHomologyIds.size() ? (String) topHomologyIds.get(i) : null;
String text = i < texts.size() ? (String) texts.get(i) : null;
String preferredName = i < preferredNames.size() ? (String) preferredNames.get(i) : null;

arguments.add(new Argument(geneId, conceptId, topHomologyId, preferredName, text));
if (conceptId != null) {

// assert conceptId != null : "No concept ID received from event document with
// ID " + eventDocument.getId() + ":\n" + eventDocument;
assert geneId != null;
assert topHomologyId != null;
assert text != null;

arguments.add(new Argument(geneId, conceptId, topHomologyId, text));
} else {
log.warn(
"Came over event document where the concept Id of an argument missing. Document is skipped. This must be fixed in the index. The document was {}",
eventDocument);
return null;
}
}

Event event = new Event();
Expand All @@ -105,7 +117,7 @@ private Stream<Event> resultDocuments2Events(Stream<ISearchServerDocument> docum
event.setSentence(sentence.get());

return event;
});
}).filter(Objects::nonNull);
}

private Stream<ISearchServerDocument> getEventDocuments(ISearchServerResponse response) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import de.julielab.elastic.query.components.ISearchServerComponent;
import de.julielab.elastic.query.components.data.SearchCarrier;
import de.julielab.elastic.query.components.data.SearchServerCommand;
import de.julielab.elastic.query.components.data.SortCommand.SortOrder;
import de.julielab.elastic.query.components.data.query.BoolClause;
import de.julielab.elastic.query.components.data.query.BoolClause.Occur;
import de.julielab.elastic.query.components.data.query.BoolQuery;
Expand Down Expand Up @@ -65,7 +66,7 @@ public class EventRetrievalService implements IEventRetrievalService {
private ISearchServerComponent searchServerComponent;

private String documentIndex;

private static final int SCROLL_SIZE = 500;
private IEventResponseProcessingService eventResponseProcessingService;

public EventRetrievalService(@Symbol(GepiCoreSymbolConstants.INDEX_DOCUMENTS) String documentIndex, Logger log,
Expand Down Expand Up @@ -136,9 +137,10 @@ public CompletableFuture<EventRetrievalResult> getBipartiteEvents(Stream<String>
SearchServerCommand serverCmd = new SearchServerCommand();
serverCmd.query = nestedQuery;
serverCmd.index = documentIndex;
serverCmd.rows = 5;
serverCmd.rows = SCROLL_SIZE;
serverCmd.fieldsToReturn = Collections.emptyList();
serverCmd.downloadCompleteResults = true;
serverCmd.addSortCommand("_doc", SortOrder.ASCENDING);

SearchCarrier carrier = new SearchCarrier("BipartiteEvents");
carrier.addSearchServerCommand(serverCmd);
Expand All @@ -155,10 +157,10 @@ public CompletableFuture<EventRetrievalResult> getBipartiteEvents(Stream<String>
}

/**
* Reorders the arguments of the events to make the first argument
* correspond to the A ID list and the second argument to the B ID list.
* Also adds new events in case of more than two ID hits in the same so we
* can handle all results as binary events.
* Reorders the arguments of the events to make the first argument correspond to
* the A ID list and the second argument to the B ID list. Also adds new events
* in case of more than two ID hits in the same so we can handle all results as
* binary events.
*
* @param idSetA
* The set of list A query IDs.
Expand Down Expand Up @@ -276,13 +278,14 @@ public CompletableFuture<EventRetrievalResult> getOutsideEvents(Stream<String> i
nestedQuery.innerHits.addField(FIELD_EVENT_NUMDISTINCTARGUMENTS);

log.trace("The nestedQuery object has the fields: {}", nestedQuery.innerHits.fields);

SearchServerCommand serverCmd = new SearchServerCommand();
serverCmd.query = nestedQuery;
serverCmd.index = documentIndex;
serverCmd.rows = 5;
serverCmd.rows = SCROLL_SIZE;
serverCmd.fieldsToReturn = Collections.emptyList();
serverCmd.downloadCompleteResults = true;
serverCmd.addSortCommand("_doc", SortOrder.ASCENDING);

SearchCarrier carrier = new SearchCarrier("OutsideEvents");
carrier.addSearchServerCommand(serverCmd);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.tapestry5.json.JSONArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import de.julielab.gepi.core.retrieval.data.Argument;
import de.julielab.gepi.core.retrieval.data.Argument.ComparisonMode;
import de.julielab.gepi.core.retrieval.data.Event;

public class GoogleChartsDataManager implements IGoogleChartsDataManager {

private static final Logger log = LoggerFactory.getLogger(GoogleChartsDataManager.class);
private Map<Argument, Integer> singleArgCount;
private Map<Pair<Argument, Argument>, Integer> pairedArgCount;
private JSONArray singleArgCountJson;
Expand All @@ -44,7 +47,6 @@ public JSONArray getTargetArgCount(List<Event> evtList) {
a.setComparisonMode(ComparisonMode.TOP_HOMOLOGY);
arguments.add(a);
}
;
});

// get the counts of how often event arguments appear
Expand Down Expand Up @@ -78,19 +80,13 @@ public JSONArray getTargetArgCount(List<Event> evtList) {
public JSONArray getPairedArgsCount(List<Event> evtList) {
List<Pair<Argument, Argument>> atids = new ArrayList<>();

log.trace("Number of events for pair counting: {}", evtList.size());

// get all atid atid pairs in one list
evtList.forEach(e -> {
if (e.getNumArguments() == 2) {
atids.add(new ImmutablePair<Argument, Argument>(e.getFirstArgument(), e.getSecondArgument()));
}
if (e.getFirstArgument().getPreferredName() == null) {
System.out.println("Concept ID: " + e.getFirstArgument().getConceptId() + ", Gene ID: " + e.getFirstArgument().getGeneId() + ", Top Homology ID: " + e.getFirstArgument().getTopHomologyId() + ": " + e.getFirstArgument().getPreferredName());
System.exit(1);
}
if (e.getSecondArgument().getPreferredName() == null) {
System.out.println("Concept ID: " + e.getSecondArgument().getConceptId() + ", Gene ID: " + e.getSecondArgument().getGeneId() + ", Top Homology ID: " + e.getSecondArgument().getTopHomologyId() + ": " + e.getSecondArgument().getPreferredName());
System.exit(1);
}
});

// get the count for how often pairs appear
Expand Down Expand Up @@ -120,15 +116,6 @@ public JSONArray getPairedArgsCount(List<Event> evtList) {
pairedArgCountJson.put(tmp);
});

// for (Pair<Argument, Argument> p : pairedArgCount.keySet()) {
// if (p.getLeft() == null || p.getRight() == null || pairedArgCount.get(p) == null || p.getLeft().getPreferredName() == null || p.getRight().getPreferredName() == null ) {
// System.out.println("Arguments: " + p);
// System.out.println("Count: " + pairedArgCount.get(p));
// System.exit(1);
// }
//
// }

return pairedArgCountJson;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,6 @@ public void testReorderBipartiteEventResultArguments3() throws Exception {
}

private Argument getArg(String id) {
return new Argument(id, null, null, "id: " + id, null);
return new Argument(id, null, "id: " + id, null);
}
}
1 change: 1 addition & 0 deletions gepi/gepi-webapp/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
/.settings/
/.classpath
/.project
/gepi-webapp.log
Loading

0 comments on commit 28ae7ea

Please sign in to comment.