From b28a926bb8a8443191012adbea9df171d01ab11f Mon Sep 17 00:00:00 2001 From: bruehldev Date: Mon, 27 Jun 2016 20:00:35 +0200 Subject: [PATCH] Fixes #1181 and #1504: Improved "Normalize to BibTeX name format" Added the jr, sr,... special cases for semicolon partition. Fixed to avoid the "and", "{", ";" cases. Added Test for every case. --- CHANGELOG.md | 1 + .../bibtexfields/NormalizeNamesFormatter.java | 86 +++++++++++++++++++ .../NormalizeNamesFormatterTest.java | 45 +++++++++- .../sf/jabref/model/entry/AuthorListTest.java | 10 +++ 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7a0e66edd97c..216b86a60a92 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ We refer to [GitHub issues](https://github.com/JabRef/jabref/issues) by using `# - Fixed [#1430](https://github.com/JabRef/jabref/issues/1430): "review changes" did misinterpret changes - Fixed [#1434](https://github.com/JabRef/jabref/issues/1434): Static groups are now longer displayed as dynamic ones - Fixed [#1482](https://github.com/JabRef/jabref/issues/1482): Correct number of matched entries is displayed for refining subgroups +- Fixed [#1181](https://github.com/JabRef/jabref/issues/1181) and [#1504](https://github.com/JabRef/jabref/issues/1504): Improved "Normalize to BibTeX name format": Support separated names with commas and colons. Considered name affixes such as "Jr". ### Removed diff --git a/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatter.java b/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatter.java index a71a1d74f22e..e7b1f5b5cfc8 100644 --- a/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatter.java +++ b/src/main/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatter.java @@ -15,6 +15,12 @@ */ package net.sf.jabref.logic.formatter.bibtexfields; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashSet; +import java.util.Objects; +import java.util.StringJoiner; + import net.sf.jabref.logic.formatter.Formatter; import net.sf.jabref.logic.l10n.Localization; import net.sf.jabref.model.entry.AuthorList; @@ -24,6 +30,9 @@ */ public class NormalizeNamesFormatter implements Formatter { + // Avoid partition where these values are contained + private final Collection avoidTermsInLowerCase = Arrays.asList("jr", "sr", "jnr", "snr", "von", "zu", "van", "der"); + @Override public String getName() { return Localization.lang("Normalize names of persons"); @@ -36,6 +45,73 @@ public String getKey() { @Override public String format(String value) { + Objects.requireNonNull(value); + // Handle case names in order lastname, firstname and separated by "," + // E.g., Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H. + if (!value.contains(" and ") && !value.contains("{") && !value.contains(";")) { + String[] valueParts = value.split(","); + // Delete spaces for correct case identification + for(int i=0; i < valueParts.length; i++) { + valueParts[i] = valueParts[i].trim(); + } + // Looking for space between pre- and lastname + boolean spaceInAllParts = false; + for (int i=0; i avoidIndex = new HashSet<>(); + + for (int i = 0; i < valueParts.length; i++) { + if (avoidTermsInLowerCase.contains(valueParts[i].toLowerCase())) { + avoidIndex.add(i); + valuePartsCount--; + } + } + + if ((valuePartsCount % 2) == 0) { + // We hit the described special case with name affix like Jr + StringBuilder stringBuilder = new StringBuilder(); + // avoidedTimes need to increase the count of avoided terms for correct module calculation + int avoidedTimes = 0; + for (int i = 0; i < valueParts.length; i++) { + if (avoidIndex.contains(i)) { + // We hit a name affix + stringBuilder.append(valueParts[i]); + stringBuilder.append(','); + avoidedTimes++; + } else { + stringBuilder.append(valueParts[i]); + if (((i + avoidedTimes) % 2) == 0) { + // Hit separation between last name and firstname --> comma has to be kept + stringBuilder.append(','); + } else { + // Hit separation between full names (e.g., Ali Babar, M. and Dingsøyr, T.) --> semicolon has to be used + // Will be treated correctly by AuthorList.parse(value); + stringBuilder.append(';'); + } + } + } + value = stringBuilder.toString(); + } + } + } + AuthorList authorList = AuthorList.parse(value); return authorList.getAsLastFirstNamesWithAnd(false); } @@ -50,4 +126,14 @@ public String getExampleInput() { return "Albert Einstein and Alan Turing"; } + private static boolean contains(final String[] array, final String[] searchTerms) { + for (String currentTerm : array) { + for (String beCompared : searchTerms) { + if (beCompared.trim().toLowerCase().equals(currentTerm.trim().toLowerCase())) { + return true; + } + } + } + return false; + } } diff --git a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatterTest.java b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatterTest.java index a62d73b1f8e9..1605d11177e8 100644 --- a/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatterTest.java +++ b/src/test/java/net/sf/jabref/logic/formatter/bibtexfields/NormalizeNamesFormatterTest.java @@ -92,6 +92,18 @@ public void lastThenJuniorThenFirst() { expectCorrect("Name, della, first", "Name, della, first"); } + @Test + public void testConcatenationOfAuthorsWithCommas() { + expectCorrect("Ali Babar, M., Dingsøyr, T., Lago, P., van der Vliet, H.", + "Ali Babar, M. and Dingsøyr, T. and Lago, P. and van der Vliet, H."); + expectCorrect("Ali Babar, M.", "Ali Babar, M."); + } + + @Test + public void testOddCountOfCommas() { + expectCorrect("Ali Babar, M., Dingsøyr, T., Lago P.", "Ali Babar, M., Dingsøyr T. Lago P."); + } + private void expectCorrect(String input, String expected) { Assert.assertEquals(expected, formatter.format(input)); } @@ -101,4 +113,35 @@ public void formatExample() { assertEquals("Einstein, Albert and Turing, Alan", formatter.format(formatter.getExampleInput())); } -} \ No newline at end of file + @Test + public void testNameAffixe() { + expectCorrect("Surname, jr, First, Surname2, First2", "Surname, jr, First and Surname2, First2"); + } + + @Test + public void testAvoidSpecialCharacter() { + expectCorrect("Surname, {, First; Surname2, First2", "Surname, {, First; Surname2, First2"); + } + + @Test + public void testAndInName() { + expectCorrect("Surname, and , First, Surname2, First2", "Surname and , First, Surname2 First2"); + } + + @Test + public void testMultipleNameAffixes() { + expectCorrect("Mair, Jr, Daniel, Brühl, Sr, Daniel", "Mair, Jr, Daniel and Brühl, Sr, Daniel"); + } + + @Test + public void testCommaSeperatedNames() { + expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez, Mélanie Tremblay, Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur", + "Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal"); + } + + @Test + public void testMultipleSpaces() { + expectCorrect("Cristina Bosoi, Mariana Oliveira, Rafael Ochoa Sanchez , Mélanie Tremblay , Gabrie TenHave, Nicoolas Deutz, Christopher F. Rose, Chantal Bemeur", + "Bosoi, Cristina and Oliveira, Mariana and Sanchez, Rafael Ochoa and Tremblay, Mélanie and TenHave, Gabrie and Deutz, Nicoolas and Rose, Christopher F. and Bemeur, Chantal"); + } +} diff --git a/src/test/java/net/sf/jabref/model/entry/AuthorListTest.java b/src/test/java/net/sf/jabref/model/entry/AuthorListTest.java index 859e2ce84a67..e745581f956e 100644 --- a/src/test/java/net/sf/jabref/model/entry/AuthorListTest.java +++ b/src/test/java/net/sf/jabref/model/entry/AuthorListTest.java @@ -500,6 +500,15 @@ public void testGetAuthorsLastFirstAnds() { } + @Test + public void testGetAuthorsLastFirstAndsCaching() { + // getAsLastFirstNamesWithAnd caches its results, therefore we call the method twice using the same arguments + Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false)); + Assert.assertEquals("Smith, John", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(false)); + Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true)); + Assert.assertEquals("Smith, J.", AuthorList.parse("John Smith").getAsLastFirstNamesWithAnd(true)); + } + @Test public void testGetAuthorsFirstFirst() { @@ -611,4 +620,5 @@ public void parseNameWithBraces() throws Exception { Author expected = new Author("H{e}lene", "H.", null, "Fiaux", null); Assert.assertEquals(new AuthorList(expected), AuthorList.parse("H{e}lene Fiaux")); } + }