Skip to content

Commit

Permalink
additional training data
Browse files Browse the repository at this point in the history
  • Loading branch information
kermitt2 committed Aug 5, 2023
1 parent 08c3c6d commit cee9468
Show file tree
Hide file tree
Showing 4 changed files with 17,015 additions and 12,515 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ protected FundingAcknowledgementParser() {
try {
String featureVector = FeaturesVectorFunding.addFeatures(tokenizationFunding, null);
res = label(featureVector);
System.out.println(res);
//System.out.println(res);
} catch (Exception e) {
throw new GrobidException("CRF labeling with table model fails.", e);
}
Expand Down Expand Up @@ -125,17 +125,23 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
TaggingLabel previousLabel = null;

Element curParagraph = teiElement("p");
int posTokenization = 0;

for (TaggingTokenCluster cluster : clusters) {
if (cluster == null) {
continue;
}

boolean spaceBefore = false;
if (posTokenization > 0 && tokenizations.size()>=posTokenization && tokenizations.get(posTokenization-1).getText().equals(" ")) {
spaceBefore = true;
}

TaggingLabel clusterLabel = cluster.getTaggingLabel();
Engine.getCntManager().i(clusterLabel);

List<LayoutToken> tokens = cluster.concatTokens();
String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(tokens));
String clusterContent = LayoutTokensUtil.normalizeText(LayoutTokensUtil.toText(tokens));

if (clusterLabel.equals(FUNDING_FUNDER_NAME)) {
Funder localFunder = funding.getFunder();
Expand Down Expand Up @@ -163,6 +169,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "funder"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_GRANT_NAME)) {
if (StringUtils.isNotBlank(funding.getGrantName())) {
if (funding.isValid()) {
Expand All @@ -180,6 +190,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "grantName"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_PERSON)) {
if (StringUtils.isNotBlank(person.getRawName())) {
if (person.isValid()) {
Expand All @@ -196,6 +210,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "person"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_AFFILIATION)) {
if (StringUtils.isNotBlank(affiliation.getAffiliationString())) {
if (affiliation.notNull()) {
Expand All @@ -212,6 +230,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "affiliation"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_INSTITUTION)) {
if (StringUtils.isNotBlank(institution.getAffiliationString())) {
if (institution.notNull()) {
Expand All @@ -228,6 +250,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "institution"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_GRANT_NUMBER)) {
if (StringUtils.isNotBlank(funding.getGrantNumber())) {
if (funding.isValid()) {
Expand All @@ -245,6 +271,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "grantNumber"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_PROGRAM_NAME)) {
if (StringUtils.isNotBlank(funding.getProgramFullName())) {
if (funding.isValid()) {
Expand All @@ -262,6 +292,10 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "programName"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_PROJECT_NAME)) {
if (StringUtils.isNotBlank(funding.getProjectFullName())) {
if (funding.isValid()) {
Expand All @@ -279,21 +313,26 @@ public Pair<String, Triple<List<Funding>,List<Person>,List<Affiliation>>> proces
entity.addAttribute(new Attribute("type", "projectName"));
entity.appendChild(clusterContent);

if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(entity);

} else if (clusterLabel.equals(FUNDING_OTHER)) {
if (spaceBefore)
curParagraph.appendChild(new Text(" "));
curParagraph.appendChild(textNode(clusterContent));
} else {
LOGGER.warn("Unexpected funding model label - " + clusterLabel.getLabel() + " for " + clusterContent);
}

// last funding
if (funding.isValid())
fundings.add(funding);

previousLabel = clusterLabel;
posTokenization += tokens.size();
}

// last funding, person, institution/affiliation
if (funding.isValid())
fundings.add(funding);

if (institutions != null && institutions.size() > 0)
affiliations.addAll(institutions);

Expand Down
Loading

0 comments on commit cee9468

Please sign in to comment.