Skip to content

Commit

Permalink
add funder consolidation with crossref rest api
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 12, 2023
1 parent e799571 commit 77d809a
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,12 @@ public StringBuilder toTEIHeader(BiblioItem biblio,
}

for (Map.Entry<Funder, List<Funding>> entry : fundingRelation.entrySet()) {
tei.append(entry.getKey().toTEI());

Funder consolidatedFunder = Consolidation.getInstance().consolidateFunder(entry.getKey());
if (consolidatedFunder != null)
tei.append(consolidatedFunder.toTEI());
else
tei.append(entry.getKey().toTEI());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.grobid.core.data.BibDataSet;
import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Funder;
import org.grobid.core.utilities.counters.CntManager;
import org.grobid.core.utilities.crossref.CrossrefClient;
import org.grobid.core.utilities.crossref.CrossrefRequestListener;
import org.grobid.core.utilities.crossref.WorkDeserializer;
import org.grobid.core.utilities.crossref.FunderDeserializer;
import org.grobid.core.utilities.glutton.GluttonClient;
import org.grobid.core.utilities.TextUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Option;
Expand All @@ -33,6 +36,7 @@ public class Consolidation {

private CrossrefClient client = null;
private WorkDeserializer workDeserializer = null;
private FunderDeserializer funderDeserializer = null;
private CntManager cntManager = null;

public enum GrobidConsolidationService {
Expand Down Expand Up @@ -89,6 +93,7 @@ private Consolidation() {
else
client = CrossrefClient.getInstance();
workDeserializer = new WorkDeserializer();
funderDeserializer = new FunderDeserializer();
}

public void setCntManager(CntManager cntManager) {
Expand Down Expand Up @@ -563,4 +568,46 @@ private double ratcliffObershelpDistance(String string1, String string2, boolean
return similarity;
}

public Funder consolidateFunder(Funder funder) {
final List<Funder> results = new ArrayList<>();

Map<String, String> arguments = new HashMap<String,String>();
arguments.put("query", funder.getFullName());
arguments.put("rows", "10"); // we just request the top-10 results, because there are a lot of noise
// and we need many candidates in the pairwise comparison step

long threadId = Thread.currentThread().getId();

try {
client.pushRequest("funders", arguments, funderDeserializer, threadId, new CrossrefRequestListener<Funder>(0) {
@Override
public void onSuccess(List<Funder> res) {
if ((res != null) && (res.size() > 0) ) {
// we need here to post-check the candidates in a pairwise comparison
// in order to avoid false positive
for(Funder oneRes : res) {
/*
Glutton integrates its own post-validation, so we can skip post-validation in GROBID when it is used as
consolidation service.
*/
results.add(oneRes);
}
}
}

@Override
public void onError(int status, String message, Exception exception) {
LOGGER.info("Funder consolidation service returns error ("+status+") : "+message, exception);
}
});
} catch(Exception e) {
LOGGER.info("Funder consolidation error - ", e);
}
client.finish(threadId);
if (results.size() == 0)
return null;
else
return results.get(0);
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
package org.grobid.core.utilities.crossref;

import java.util.Iterator;

import org.grobid.core.data.BiblioItem;
import org.grobid.core.data.Person;
import org.grobid.core.data.Date;
import org.grobid.core.data.Funder;
import org.grobid.core.utilities.TextUtilities;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.fasterxml.jackson.databind.node.ArrayNode;

/**
* Convert a JSON Funder model - from a glutton or crossref response - to a Funder object
* (understandable by this stupid GROBID).
*
* Input JSON format is from the REST API query. For example:
* https://api.crossref.org/funders?query=agence+nationale+de+la+recherche
*
* For better data (Crossref funder registry one), we can then use the data API:
* http://data.crossref.org/fundingdata/funder/10.13039/501100001665
*
*/
public class FunderDeserializer extends CrossrefDeserializer<Funder> {

@Override
protected Funder deserializeOneItem(JsonNode item) {
Funder funder = null;
String type = null; // the crossref type of the item, see http://api.crossref.org/types

if (item.isObject()) {
funder = new Funder();
//System.out.println(item.toString());

JsonNode locationNode = item.get("location");
if (locationNode != null && (!locationNode.isMissingNode()) ) {
String location = locationNode.asText();
funder.setCountry(location);
}

// we always have a uri field, and we can get the DOI from this...
// surprisingly no DOI field !
JsonNode uriNode = item.get("uri");
if (uriNode != null && (!uriNode.isMissingNode()) ) {
String uri = uriNode.asText();
if (uri != null)
uri = uri.replace("http://dx.doi.org/", "");
funder.setDoi(uri);
}

JsonNode nameNode = item.get("name");
if (nameNode != null && (!nameNode.isMissingNode()) ) {
String name = nameNode.asText();
funder.setFullName(name);
}


JsonNode altNamesNode = item.get("alt-names");
if (altNamesNode != null && (!altNamesNode.isMissingNode()) &&
altNamesNode.isArray() && (((ArrayNode)altNamesNode).size() > 0)) {

// here we just keep an acronym form - better names with lang info via the data API
for(int i=0; i <((ArrayNode)altNamesNode).size(); i++) {
String altName = ((ArrayNode)altNamesNode).get(i).asText();
if (TextUtilities.isAllUpperCase(altName) && altName.length()<10) {
funder.setAbbreviatedName(altName);
break;
}
}
}

//System.out.println(funder.toTEI());
}

return funder;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

/**
* Convert a JSON Work model - from a glutton or crossref response - to a BiblioItem
* (understandable by this stupid GROBID
* (understandable by this stupid GROBID)
*
*/
public class WorkDeserializer extends CrossrefDeserializer<BiblioItem> {
Expand Down

0 comments on commit 77d809a

Please sign in to comment.