Skip to content

Commit

Permalink
Fixes #1181: Improved "Normalize to BibTeX name format" (#1470)
Browse files Browse the repository at this point in the history
  • Loading branch information
bruehldev authored and koppor committed Sep 12, 2016
1 parent 99333a9 commit d9dc3a8
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 28 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `#
- Fixed [#1882](https://github.com/JabRef/jabref/issues/1882): Crash after saving illegal bibtexkey in entry editor
- Fixed field `location` containing only city is not exported correctly to MS-Office 2007 xml format
- Fixed field `key` field is not exported to MS-Office 2008 xml format
- Fixed [#1181](https://github.com/JabRef/jabref/issues/1181) and [#1504](https://github.com/JabRef/jabref/issues/1504): Improved "Normalize to BibTeX name format": Support separated names with commas and colons. Considered name affixes such as "Jr".
- Fixed download files failed silently when an invalid directory is selected
- Fixed [#1949](https://github.com/JabRef/jabref/issues/1949): Error message directs to the wrong preference tab
- Fixed InvalidBackgroundColor flickering with Ctrl-s and File > Save database
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package net.sf.jabref.logic.formatter.bibtexfields;

import java.util.Objects;

import net.sf.jabref.logic.formatter.Formatter;
import net.sf.jabref.logic.l10n.Localization;
import net.sf.jabref.model.entry.AuthorList;
Expand All @@ -20,8 +22,9 @@ public String getKey() {
}

@Override
public String format(String value) {
AuthorList authorList = AuthorList.parse(value);
public String format(String nameList) {
Objects.requireNonNull(nameList);
AuthorList authorList = AuthorList.parse(nameList);
return authorList.getAsLastFirstNamesWithAnd(false);
}

Expand Down
77 changes: 77 additions & 0 deletions src/main/java/net/sf/jabref/model/entry/AuthorList.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package net.sf.jabref.model.entry;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Objects;
import java.util.WeakHashMap;
import java.util.stream.Collectors;
Expand Down Expand Up @@ -130,6 +134,9 @@ public class AuthorList {

private static final WeakHashMap<String, AuthorList> AUTHOR_CACHE = new WeakHashMap<>();

// Avoid partition where these values are contained
private final static Collection<String> avoidTermsInLowerCase = Arrays.asList("jr", "sr", "jnr", "snr", "von", "zu", "van", "der");

/**
* Creates a new list of authors.
* <p>
Expand Down Expand Up @@ -157,6 +164,46 @@ protected AuthorList(Author author) {
public static AuthorList parse(String authors) {
Objects.requireNonNull(authors);

// Handle case names in order lastname, firstname and separated by ","
// E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.
if (!authors.toUpperCase(Locale.ENGLISH).contains(" AND ") && !authors.contains("{") && !authors.contains(";")) {
List<String> arrayNameList = Arrays.asList(authors.split(","));

// Delete spaces for correct case identification
arrayNameList.replaceAll(String::trim);

// Looking for space between pre- and lastname
boolean spaceInAllParts = arrayNameList.stream().filter(name -> name.contains(" ")).collect(Collectors
.toList()).size() == arrayNameList.size();

// We hit the comma name separator case
// Usually the getAsLastFirstNamesWithAnd method would separate them if pre- and lastname are separated with "and"
// If not, we check if spaces separate pre- and lastname
if (spaceInAllParts) {
authors = authors.replaceAll(",", " and");
} else {
// Looking for name affixes to avoid
// arrayNameList needs to reduce by the count off avoiding terms
// valuePartsCount holds the count of name parts without the avoided terms

int valuePartsCount = arrayNameList.size();
// Holds the index of each term which needs to be avoided
Collection<Integer> avoidIndex = new HashSet<>();

for (int i = 0; i < arrayNameList.size(); i++) {
if (avoidTermsInLowerCase.contains(arrayNameList.get(i).toLowerCase())) {
avoidIndex.add(i);
valuePartsCount--;
}
}

if ((valuePartsCount % 2) == 0) {
// We hit the described special case with name affix like Jr
authors = buildWithAffix(avoidIndex, arrayNameList).toString();
}
}
}

AuthorList authorList = AUTHOR_CACHE.get(authors);
if (authorList == null) {
AuthorListParser parser = new AuthorListParser();
Expand Down Expand Up @@ -577,4 +624,34 @@ public String getForAlphabetization() {
return authorsAlph;
}

/**
* Builds a new array of strings with stringbuilder.
* Regarding to the name affixes.
* @return New string with correct seperation
*/
private static StringBuilder buildWithAffix(Collection<Integer> indexArray, List nameList) {
StringBuilder stringBuilder = new StringBuilder();
// avoidedTimes needs to be increased by the count of avoided terms for correct odd/even calculation
int avoidedTimes = 0;
for (int i = 0; i < nameList.size(); i++) {
if (indexArray.contains(i)) {
// We hit a name affix
stringBuilder.append(nameList.get(i));
stringBuilder.append(',');
avoidedTimes++;
} else {
stringBuilder.append(nameList.get(i));
if (((i + avoidedTimes) % 2) == 0) {
// Hit separation between last name and firstname --> comma has to be kept
stringBuilder.append(',');
} else {
// Hit separation between full names (e.g., Ali Babar, M. and Dingsøyr, T.) --> semicolon has to be used
// Will be treated correctly by AuthorList.parse(authors);
stringBuilder.append(';');
}
}
}
return stringBuilder;
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package net.sf.jabref.logic.formatter.bibtexfields;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

Expand All @@ -20,91 +19,160 @@ public void setUp() {

@Test
public void testNormalizeAuthorList() {
expectCorrect("Staci D Bilbo", "Bilbo, Staci D.");
expectCorrect("Staci D. Bilbo", "Bilbo, Staci D.");
assertEquals("Bilbo, Staci D.", formatter.format("Staci D Bilbo"));
assertEquals("Bilbo, Staci D.", formatter.format("Staci D. Bilbo"));

expectCorrect("Staci D Bilbo and Smith SH and Jaclyn M Schwarz", "Bilbo, Staci D. and Smith, S. H. and Schwarz, Jaclyn M.");
assertEquals("Bilbo, Staci D. and Smith, S. H. and Schwarz, Jaclyn M.", formatter.format("Staci D Bilbo and Smith SH and Jaclyn M Schwarz"));

expectCorrect("Ølver MA", "Ølver, M. A.");
assertEquals("Ølver, M. A.", formatter.format("Ølver MA"));

expectCorrect("Ølver MA; GG Øie; Øie GG; Alfredsen JÅÅ; Jo Alfredsen; Olsen Y.Y. and Olsen YY.",
"Ølver, M. A. and Øie, G. G. and Øie, G. G. and Alfredsen, J. Å. Å. and Alfredsen, Jo and Olsen, Y. Y. and Olsen, Y. Y.");
assertEquals("Ølver, M. A. and Øie, G. G. and Øie, G. G. and Alfredsen, J. Å. Å. and Alfredsen, Jo and Olsen, Y. Y. and Olsen, Y. Y.",
formatter.format("Ølver MA; GG Øie; Øie GG; Alfredsen JÅÅ; Jo Alfredsen; Olsen Y.Y. and Olsen YY."));

expectCorrect("Ølver MA; GG Øie; Øie GG; Alfredsen JÅÅ; Jo Alfredsen; Olsen Y.Y.; Olsen YY.",
"Ølver, M. A. and Øie, G. G. and Øie, G. G. and Alfredsen, J. Å. Å. and Alfredsen, Jo and Olsen, Y. Y. and Olsen, Y. Y.");
assertEquals("Ølver, M. A. and Øie, G. G. and Øie, G. G. and Alfredsen, J. Å. Å. and Alfredsen, Jo and Olsen, Y. Y. and Olsen, Y. Y.",
formatter.format("Ølver MA; GG Øie; Øie GG; Alfredsen JÅÅ; Jo Alfredsen; Olsen Y.Y.; Olsen YY."));

expectCorrect("Alver, Morten and Alver, Morten O and Alfredsen, JA and Olsen, Y.Y.", "Alver, Morten and Alver, Morten O. and Alfredsen, J. A. and Olsen, Y. Y.");
assertEquals("Alver, Morten and Alver, Morten O. and Alfredsen, J. A. and Olsen, Y. Y.", formatter.format("Alver, Morten and Alver, Morten O and Alfredsen, JA and Olsen, Y.Y."));

expectCorrect("Alver, MA; Alfredsen, JA; Olsen Y.Y.", "Alver, M. A. and Alfredsen, J. A. and Olsen, Y. Y.");
assertEquals("Alver, M. A. and Alfredsen, J. A. and Olsen, Y. Y.", formatter.format("Alver, MA; Alfredsen, JA; Olsen Y.Y."));

expectCorrect("Kolb, Stefan and J{\\\"o}rg Lenhard and Wirtz, Guido", "Kolb, Stefan and Lenhard, J{\\\"o}rg and Wirtz, Guido");
assertEquals("Kolb, Stefan and Lenhard, J{\\\"o}rg and Wirtz, Guido", formatter.format("Kolb, Stefan and J{\\\"o}rg Lenhard and Wirtz, Guido"));
}

@Test
public void twoAuthorsSeperatedByColon() {
expectCorrect("Staci Bilbo; Morten Alver", "Bilbo, Staci and Alver, Morten");
assertEquals("Bilbo, Staci and Alver, Morten", formatter.format("Staci Bilbo; Morten Alver"));
}

@Test
public void threeAuthorsSeperatedByColon() {
expectCorrect("Staci Bilbo; Morten Alver; Test Name", "Bilbo, Staci and Alver, Morten and Name, Test");
assertEquals("Bilbo, Staci and Alver, Morten and Name, Test", formatter.format("Staci Bilbo; Morten Alver; Test Name"));
}

// Test for https://github.com/JabRef/jabref/issues/318
@Test
public void threeAuthorsSeperatedByAnd() {
expectCorrect("Stefan Kolb and J{\\\"o}rg Lenhard and Guido Wirtz", "Kolb, Stefan and Lenhard, J{\\\"o}rg and Wirtz, Guido");
assertEquals("Kolb, Stefan and Lenhard, J{\\\"o}rg and Wirtz, Guido", formatter.format("Stefan Kolb and J{\\\"o}rg Lenhard and Guido Wirtz"));
}

// Test for https://github.com/JabRef/jabref/issues/318
@Test
public void threeAuthorsSeperatedByAndWithDash() {
expectCorrect("Heng-Yu Jian and Xu, Z. and Chang, M.-C.F.", "Jian, Heng-Yu and Xu, Z. and Chang, M.-C. F.");
assertEquals("Jian, Heng-Yu and Xu, Z. and Chang, M.-C. F.", formatter.format("Heng-Yu Jian and Xu, Z. and Chang, M.-C.F."));
}

// Test for https://github.com/JabRef/jabref/issues/318
@Test
public void threeAuthorsSeperatedByAndWithLatex() {
expectCorrect("Oscar Gustafsson and Linda S. DeBrunner and Victor DeBrunner and H{\\aa}kan Johansson", "Gustafsson, Oscar and DeBrunner, Linda S. and DeBrunner, Victor and Johansson, H{\\aa}kan");
assertEquals("Gustafsson, Oscar and DeBrunner, Linda S. and DeBrunner, Victor and Johansson, H{\\aa}kan",
formatter.format("Oscar Gustafsson and Linda S. DeBrunner and Victor DeBrunner and H{\\aa}kan Johansson"));
}

@Test
public void lastThenInitial() {
expectCorrect("Smith S", "Smith, S.");
assertEquals("Smith, S.", formatter.format("Smith S"));
}

@Test
public void lastThenInitials() {
expectCorrect("Smith SH", "Smith, S. H.");
assertEquals("Smith, S. H.", formatter.format("Smith SH"));
}

@Test
public void initialThenLast() {
expectCorrect("S Smith", "Smith, S.");
assertEquals("Smith, S.", formatter.format("S Smith"));
}

@Test
public void initialDotThenLast() {
expectCorrect("S. Smith", "Smith, S.");
assertEquals("Smith, S.", formatter.format("S. Smith"));
}

@Test
public void initialsThenLast() {
expectCorrect("SH Smith", "Smith, S. H.");
assertEquals("Smith, S. H.", formatter.format("SH Smith"));
}

@Test
public void lastThenJuniorThenFirst() {
expectCorrect("Name, della, first", "Name, della, first");
assertEquals("Name, della, first", formatter.format("Name, della, first"));
}

private void expectCorrect(String input, String expected) {
Assert.assertEquals(expected, formatter.format(input));
@Test
public void testConcatenationOfAuthorsWithCommas() {
assertEquals("Ali Babar, M. and Dingsøyr, T. and Lago, P. and van der Vliet, H.", formatter.format("Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H."));
assertEquals("Ali Babar, M.", formatter.format("Ali Babar, M."));
}

@Test
public void testOddCountOfCommas() {
assertEquals("Ali Babar, M., Dingsøyr T. Lago P.", formatter.format("Ali Babar, M., Dingsøyr, T., Lago P."));
}

@Test
public void formatExample() {
assertEquals("Einstein, Albert and Turing, Alan", formatter.format(formatter.getExampleInput()));
assertEquals(formatter.format(formatter.getExampleInput()), "Einstein, Albert and Turing, Alan");
}

@Test
public void testNameAffixe() {
assertEquals("Surname, jr, First and Surname2, First2", formatter.format("Surname, jr, First, Surname2, First2"));
}

@Test
public void testAvoidSpecialCharacter() {
assertEquals("Surname, {, First; Surname2, First2", formatter.format("Surname, {, First; Surname2, First2"));
}

@Test
public void testAndInName() {
assertEquals("Surname and , First, Surname2 First2", formatter.format("Surname, and , First, Surname2, First2"));
}

@Test
public void testMultipleNameAffixes() {
assertEquals("Mair, Jr, Daniel and Brühl, Sr, Daniel", formatter.format("Mair, Jr, Daniel, Brühl, Sr, Daniel"));
}

@Test
public void testCommaSeperatedNames() {
assertEquals("Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal",
formatter.format("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez, Mélanie Tremblay, Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur"));
}

@Test
public void testMultipleSpaces() {
assertEquals("Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal",
formatter.format("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez , Mélanie Tremblay , Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur"));
}

@Test
public void testAvoidPreposition() {
assertEquals("von Zimmer, Hans and van Oberbergern, Michael and zu Berger, Kevin", formatter.format("Hans von Zimmer, Michael van Oberbergern, Kevin zu Berger"));
}

@Test
public void testPreposition() {
assertEquals("von Zimmer, Hans and van Oberbergern, Michael and zu Berger, Kevin", formatter.format("Hans von Zimmer, Michael van Oberbergern, Kevin zu Berger"));
}

@Test
public void testAvoidNameAffixes() {
assertEquals("der Barbar, Canon and der Große, Alexander", formatter.format("Canon der Barbar, Alexander der Große"));
}

@Test
public void testUpperCaseSensitiveList() {
assertEquals("der Barbar, Canon and der Große, Alexander", formatter.format("Canon der Barbar AND Alexander der Große"));
assertEquals("der Barbar, Canon and der Große, Alexander", formatter.format("Canon der Barbar aNd Alexander der Große"));
assertEquals("der Barbar, Canon and der Große, Alexander", formatter.format("Canon der Barbar AnD Alexander der Große"));
}

@Test
public void testSemiCorrectNamesWithSemicolon() {
assertEquals("Last, First and Last2, First2 and Last3, First3", formatter.format("Last, First; Last2, First2; Last3, First3"));
assertEquals("Last, Jr, First and Last2, First2", formatter.format("Last, Jr, First; Last2, First2"));
assertEquals("Last, First and Last2, First2 and Last3, First3 and Last4, First4", formatter.format("Last, First; Last2, First2; Last3, First3; First4 Last4"));
assertEquals("Last and Last2, First2 and Last3, First3 and Last4, First4", formatter.format("Last; Last2, First2; Last3, First3; Last4, First4"));
}
}
10 changes: 10 additions & 0 deletions src/test/java/net/sf/jabref/model/entry/AuthorListTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -524,6 +524,15 @@ public void testGetAuthorsLastFirstAnds() {

}

@Test
public void testGetAuthorsLastFirstAndsCaching() {
// getAsLastFirstNamesWithAnd caches its results, therefore we call the method twice using the same arguments
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true));
}

@Test
public void testGetAuthorsFirstFirst() {

Expand Down Expand Up @@ -636,4 +645,5 @@ public void parseNameWithBraces() throws Exception {
Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null);
Assert.assertEquals(new AuthorList(expected), AuthorList.parse("H{e}lene Fiaux"));
}

}

0 comments on commit d9dc3a8

Please sign in to comment.