Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add funding statement in TEI output #959

Merged
merged 10 commits into from
Oct 19, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ public Document processing(DocumentSource documentSource,
if (abstractProcessed != null) {
// neutralize figure and table annotations (will be considered as paragraphs)
String labeledAbstract = abstractProcessed.getLeft();
labeledAbstract = postProcessLabeledAbstract(labeledAbstract);
labeledAbstract = postProcessFullTextLabeledText(labeledAbstract);
resHeader.setLabeledAbstract(labeledAbstract);
resHeader.setLayoutTokensForLabel(abstractProcessed.getRight(), TaggingLabels.HEADER_ABSTRACT);
}
Expand Down Expand Up @@ -418,18 +418,23 @@ public Pair<String, List<LayoutToken>> processShort(List<LayoutToken> tokens, Do
layoutTokenization = layouts.getTokenization();
if ( (featuredText != null) && (featuredText.trim().length() > 0) ) {
res = label(featuredText);
res = postProcessFullTextLabeledText(res);
}
}

return Pair.of(res, layoutTokenization);
}

static protected String postProcessLabeledAbstract(String labeledAbstract) {
if (labeledAbstract == null)
/**
* Post-process text labeled by the fulltext model on chunks that are known to be text (no table, or figure)
* It converts table and figure labels to paragraph labels.
*/
protected static String postProcessFullTextLabeledText(String fulltextLabeledText) {
if (fulltextLabeledText == null)
return null;
StringBuilder result = new StringBuilder();

String[] lines = labeledAbstract.split("\n");
String[] lines = fulltextLabeledText.split("\n");
String previousLabel = null;
for(int i=0; i<lines.length; i++) {
String line = lines[i];
Expand Down Expand Up @@ -2466,16 +2471,20 @@ private void toTEI(Document doc,
tei.append("\t\t<back>\n");

// acknowledgement is in the back
tei.append(getSectionAsTEI("acknowledgement", "\t\t\t",doc, SegmentationLabels.ACKNOWLEDGEMENT,
teiFormatter, resCitations, config));
StringBuilder acknowledgmentStmt = getSectionAsTEI("acknowledgement", "\t\t\t", doc, SegmentationLabels.ACKNOWLEDGEMENT,
teiFormatter, resCitations, config);

if (acknowledgmentStmt.length() > 0) {
tei.append(acknowledgmentStmt);
}

// availability statements in header
StringBuilder availabilityStmt = new StringBuilder();
if (StringUtils.isNotBlank(resHeader.getAvailabilityStmt())) {
List<LayoutToken> headerAvailabilityStatementTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_AVAILABILITY);
Pair<String, List<LayoutToken>> headerAvailabilityProcessed = processShort(headerAvailabilityStatementTokens, doc);
if (headerAvailabilityProcessed != null) {
availabilityStmt = teiFormatter.processTEIDivSection("availability",
availabilityStmt = teiFormatter.processTEIDivSection("availability",
"\t\t\t",
headerAvailabilityProcessed.getLeft(),
headerAvailabilityProcessed.getRight(),
Expand All @@ -2488,7 +2497,7 @@ private void toTEI(Document doc,
}

// availability statements in non-header part
availabilityStmt = getSectionAsTEI("availability",
availabilityStmt = getSectionAsTEI("availability",
"\t\t\t",
doc,
SegmentationLabels.AVAILABILITY,
Expand All @@ -2499,6 +2508,36 @@ private void toTEI(Document doc,
tei.append(availabilityStmt.toString());
}

// funding in header
StringBuilder fundingStmt = new StringBuilder();
if (StringUtils.isNotBlank(resHeader.getFunding())) {
List<LayoutToken> headerFundingTokens = resHeader.getLayoutTokens(TaggingLabels.HEADER_FUNDING);
Pair<String, List<LayoutToken>> headerFundingProcessed = processShort(headerFundingTokens, doc);
if (headerFundingProcessed != null) {
fundingStmt = teiFormatter.processTEIDivSection("funding",
"\t\t\t",
headerFundingProcessed.getLeft(),
headerFundingProcessed.getRight(),
resCitations,
config);
}
if (fundingStmt.length() > 0) {
tei.append(fundingStmt.toString());
}
}

// funding statements in non-header part
fundingStmt = getSectionAsTEI("funding",
"\t\t\t",
doc,
SegmentationLabels.FUNDING,
teiFormatter,
resCitations,
config);
if (fundingStmt.length() > 0) {
tei.append(fundingStmt);
}

tei = teiFormatter.toTEIAnnex(tei, reseAnnex, resHeader, resCitations,
tokenizationsAnnex, markerTypes, doc, config);

Expand Down Expand Up @@ -2542,6 +2581,7 @@ private StringBuilder getSectionAsTEI(String xmlType,
String resultLabelling = null;
if (StringUtils.isNotBlank(text) ) {
resultLabelling = label(text);
resultLabelling = postProcessFullTextLabeledText(resultLabelling);
}
output = teiFormatter.processTEIDivSection(xmlType, indentation, resultLabelling, tokens, resCitations, config);
}
Expand Down
Loading