Skip to content

Commit

Permalink
Fix DublinCoreExtractor (and add more tests)
Browse files Browse the repository at this point in the history
  • Loading branch information
koppor committed Nov 9, 2019
1 parent 9382185 commit 568ccda
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 16 deletions.
24 changes: 13 additions & 11 deletions src/main/java/org/jabref/logic/xmp/DublinCoreExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ private void extractYearAndMonth() {
}
if (calender != null) {
bibEntry.setField(StandardField.YEAR, String.valueOf(calender.get(Calendar.YEAR)));
int monthNumber = calender.get(Calendar.MONTH) + 1;
// not the 1st of January
if (!((calender.get(Calendar.MONTH) == 0) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Month.getMonthByNumber(calender.get(Calendar.MONTH) + 1)
if (!((monthNumber == 1) && (calender.get(Calendar.DAY_OF_MONTH) == 1))) {
Month.getMonthByNumber(monthNumber)
.ifPresent(month -> bibEntry.setMonth(month));
}
}
Expand Down Expand Up @@ -125,16 +126,13 @@ private void extractPublisher() {
}

/**
* This method sets all fields, which are custom in bibtext and therefore supported by jabref, but which are not
* This method sets all fields, which are custom in BibTeX and therefore supported by JabRef, but which are not
* included in the DublinCore format.
* <p/>
* <p>
* The relation attribute of DublinCore is abused to insert these custom fields.
*/
private void extractBibTexFields() {
List<String> relationships = dcSchema.getRelations();

Predicate<String> isBibTeXElement = s -> s.startsWith("bibtex/");

Consumer<String> splitBibTeXElement = s -> {
// the default pattern is bibtex/key/value, but some fields contains url etc.
// so the value property contains additional slashes, which makes the usage of
Expand All @@ -151,10 +149,11 @@ private void extractBibTexFields() {
// see also DublinCoreExtractor#extractYearAndMonth
if (StandardField.MONTH.equals(key)) {
Optional<Month> parsedMonth = Month.parse(value);
parsedMonth.ifPresent(month -> bibEntry.setField(key, month.getShortName()));
parsedMonth.ifPresent(bibEntry::setMonth);
}
}
};
List<String> relationships = dcSchema.getRelations();
if (relationships != null) {
relationships.stream()
.filter(isBibTeXElement)
Expand Down Expand Up @@ -221,19 +220,22 @@ private void extractType() {
* To understand how to get hold of a DublinCore have a look in the test cases for XMPUtil.
* <p>
* The BibEntry is build by mapping individual fields in the dublin core (like creator, title, subject) to fields in
* a bibtex bibEntry.
* a bibtex bibEntry. In case special "bibtex/" entries are contained, the normal dublin core fields take
* precedence. For instance, the dublin core date takes precedence over bibtex/month.
*
* @return The bibtex bibEntry found in the document information.
* @return The bibEntry extracted from the document information.
*/
public Optional<BibEntry> extractBibtexEntry() {
// first extract "bibtex/" entries
this.extractBibTexFields();

// then extract all "standard" dublin core entries
this.extractEditor();
this.extractAuthor();
this.extractYearAndMonth();
this.extractAbstract();
this.extractDOI();
this.extractPublisher();
this.extractBibTexFields();
this.extractRights();
this.extractSource();
this.extractSubject();
Expand Down
12 changes: 7 additions & 5 deletions src/main/java/org/jabref/model/entry/CanonicalBibtexEntry.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ private CanonicalBibtexEntry() {
}

/**
* This returns a canonical BibTeX serialization. Special characters such as "{" or "&" are NOT escaped, but written
* as is
* This returns a canonical BibTeX serialization. Serializes all fields, even the JabRef internal ones. Does NOT
* serialize "KEY_FIELD" as field, but as key
*
* Serializes all fields, even the JabRef internal ones. Does NOT serialize "KEY_FIELD" as field, but as key
* <ul>
* <li>Special characters such as "{" or "&" are NOT escaped, but written as</li>
* <li>String constants are not handled. That means, <code>month = apr</code> in a bib file gets <code>month = {#apr#}</code>. This indicates that the month field is correctly stored</li>
* </ul>
*/
public static String getCanonicalRepresentation(BibEntry entry) {
StringBuilder sb = new StringBuilder();
Expand Down Expand Up @@ -50,7 +53,7 @@ public static String getCanonicalRepresentation(BibEntry entry) {
// generate field entries
StringJoiner sj = new StringJoiner(",\n", "", "\n");
for (String fieldName : sortedFields) {
String line = String.format(" %s = {%s}", fieldName, String.valueOf(mapFieldToValue.get(fieldName)).replaceAll("\\r\\n","\n"));
String line = String.format(" %s = {%s}", fieldName, String.valueOf(mapFieldToValue.get(fieldName)).replaceAll("\\r\\n", "\n"));
sj.add(line);
}
sb.append(sj);
Expand All @@ -59,5 +62,4 @@ public static String getCanonicalRepresentation(BibEntry entry) {
sb.append('}');
return sb.toString();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -1784,4 +1784,13 @@ void bibTeXConstantAprilIsDisplayedAsConstant() throws ParseException {

assertEquals("#apr#", result.get().getField(StandardField.MONTH).get());
}

@Test
void bibTeXConstantAprilIsParsedAsStringMonthAprilWhenReadingTheField() throws ParseException {
Optional<BibEntry> result = parser.parseSingleEntry("@Misc{m, month = apr }" );

assertEquals(Optional.of("#apr#"), result.get().getField(StandardField.MONTH));
}


}
17 changes: 17 additions & 0 deletions src/test/java/org/jabref/model/entry/CanonicalBibtexEntryTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.jabref.model.entry;

import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

class CanonicalBibtexEntryTest {

@Test
void canonicalRepresentationIsCorrectForStringMonth() {
BibEntry entry = new BibEntry();
entry.setMonth(Month.MAY);
assertEquals("@misc{,\n" +
" month = {#may#}\n" +
"}", CanonicalBibtexEntry.getCanonicalRepresentation(entry));
}
}

0 comments on commit 568ccda

Please sign in to comment.