Skip to content

Commit

Permalink
fix ID consideration in DuplicateCheck
Browse files Browse the repository at this point in the history
  • Loading branch information
matthiasgeiger committed Apr 5, 2017
1 parent b714206 commit a1f4101
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 12 deletions.
18 changes: 6 additions & 12 deletions src/main/java/org/jabref/logic/bibtex/DuplicateCheck.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,27 +59,21 @@ private DuplicateCheck() {}
/**
* Checks if the two entries represent the same publication.
*
* Requirements:
* 1. Equal entry type
*
* Checks:
* 1. Equal identifier (DOI)
*
* @param one BibEntry
* @param two BibEntry
* @return boolean
*/
public static boolean isDuplicate(BibEntry one, BibEntry two, BibDatabaseMode bibDatabaseMode) {
// same entry type
if (!one.getType().equals(two.getType())) {
return false;
}

// same identifier
if (hasSameIdentifier(one, two)) {
return true;
}

// same entry type
if (!one.getType().equals(two.getType())) {
return false;
}

EntryType type = EntryTypes.getTypeOrDefault(one.getType(), bibDatabaseMode);
// The check if they have the same required fields:
List<String> var = type.getRequiredFieldsFlat();
Expand All @@ -106,7 +100,7 @@ public static boolean isDuplicate(BibEntry one, BibEntry two, BibDatabaseMode bi

private static boolean hasSameIdentifier(BibEntry one, BibEntry two) {
for (String name : FieldName.getIdentifierFieldNames()) {
if (compareSingleField(name, one, two) == 1) {
if (one.getField(name).isPresent() && one.getField(name).equals(two.getField(name))) {
return true;
}
}
Expand Down
64 changes: 64 additions & 0 deletions src/test/java/org/jabref/logic/bibtex/DuplicateCheckTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

import org.jabref.model.database.BibDatabaseMode;
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.BibtexEntryType;
import org.jabref.model.entry.BibtexEntryTypes;
import org.jabref.model.entry.FieldName;

import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
Expand All @@ -12,6 +15,21 @@

public class DuplicateCheckTest {

private BibEntry simpleArticle;
private BibEntry unrelatedArticle;

@Before
public void setUp() {
simpleArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName())
.withField(FieldName.AUTHOR, "Single Author")
.withField(FieldName.TITLE, "A serious paper about something")
.withField(FieldName.YEAR, "2017");
unrelatedArticle = new BibEntry(BibtexEntryTypes.ARTICLE.getName())
.withField(FieldName.AUTHOR, "Completely Different")
.withField(FieldName.TITLE, "Holy Moly Uffdada und Trallalla")
.withField(FieldName.YEAR, "1992");
}

@Test
public void testDuplicateDetection() {
BibEntry one = new BibEntry(BibtexEntryTypes.ARTICLE.getName());
Expand Down Expand Up @@ -79,4 +97,50 @@ public void testWordCorrelation() {
assertEquals(0.78, (DuplicateCheck.correlateByWords(d2, d3)), 0.01);
}

@Test
public void twoUnrelatedEntriesAreNoDuplicates() {
assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX));
}

@Test
public void twoUnrelatedEntriesWithDifferentDoisAreNoDuplicates() {
simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002");
unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.00X");

assertFalse(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX));
}

@Test
public void twoUnrelatedEntriesWithEqualDoisAreDuplicates() {
simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002");
unrelatedArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002");

assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX));
}

@Test
public void twoUnrelatedEntriesWithEqualPmidAreDuplicates() {
simpleArticle.setField(FieldName.PMID, "12345678");
unrelatedArticle.setField(FieldName.PMID, "12345678");

assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX));
}

@Test
public void twoUnrelatedEntriesWithEqualEprintAreDuplicates() {
simpleArticle.setField(FieldName.EPRINT, "12345678");
unrelatedArticle.setField(FieldName.EPRINT, "12345678");

assertTrue(DuplicateCheck.isDuplicate(simpleArticle, unrelatedArticle, BibDatabaseMode.BIBTEX));
}

@Test
public void twoEntriesWithSameDoiButDifferentTypesAreDuplicates() {
simpleArticle.setField(FieldName.DOI, "10.1016/j.is.2004.02.002");
BibEntry duplicateWithDifferentType = (BibEntry) simpleArticle.clone();
duplicateWithDifferentType.setType(BibtexEntryTypes.INCOLLECTION);

assertTrue(DuplicateCheck.isDuplicate(simpleArticle, duplicateWithDifferentType, BibDatabaseMode.BIBTEX));
}

}

0 comments on commit a1f4101

Please sign in to comment.