Skip to content

Commit

Permalink
add Cleanup for copying over physcial review article id as the page n… (
Browse files Browse the repository at this point in the history
#7025)

* add Cleanup for copying over physcial review article id as the page number

* remove PageFieldCleanup, add private methods to DoiFetcher

* remove comment from DoiFetcherTest

* replace regex with substring and string comparison

* fix checkstyle issues

* fix checkstyle issues

* add regex to check aps doi format

* move suffix pattern to private static field

* add changelog entry

* add issue link to changelog entry
  • Loading branch information
tmrd993 authored Oct 20, 2020
1 parent 79f0221 commit a7b05d0
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve
- We added a query parser and mapping layer to enable conversion of queries formulated in simplified lucene syntax by the user into api queries. [#6799](https://github.com/JabRef/jabref/pull/6799)
- We added some basic functionality to customise the look of JabRef by importing a css theme file. [#5790](https://github.com/JabRef/jabref/issues/5790)
- We added connection check function in network preference setting [#6560](https://github.com/JabRef/jabref/issues/6560)
- We added a DOI format and organization check to detect [American Physical Society](https://journals.aps.org/) journals to copy the article ID
to the page field for cases where the page numbers are missing. [#7019](https://github.com/JabRef/jabref/issues/7019)
- We added a new fetcher to enable users to search jstor.org [#6627](https://github.com/JabRef/jabref/issues/6627)

### Changed
Expand Down
30 changes: 30 additions & 0 deletions src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.regex.Pattern;

import org.jabref.logic.cleanup.FieldFormatterCleanup;
import org.jabref.logic.formatter.bibtexfields.ClearFormatter;
Expand All @@ -22,6 +23,7 @@
import org.jabref.model.entry.BibEntry;
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
import org.jabref.model.entry.types.StandardEntryType;
import org.jabref.model.util.DummyFileUpdateMonitor;
import org.jabref.model.util.OptionalUtil;

Expand All @@ -32,8 +34,13 @@
import org.slf4j.LoggerFactory;

public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher {

public static final String NAME = "DOI";

private static final String APS_JOURNAL_ORG_DOI_ID = "1103";
private static final String APS_SUFFIX = "([\\w]+\\.)([\\w]+\\.)([\\w]+)";
private static final Pattern APS_SUFFIX_PATTERN = Pattern.compile(APS_SUFFIX);

private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class);

private final ImportFormatPreferences preferences;
Expand Down Expand Up @@ -75,6 +82,14 @@ public Optional<BibEntry> performSearchById(String identifier) throws FetcherExc
fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor());
fetchedEntry.ifPresent(this::doPostCleanup);

// Check if the entry is an APS journal and add the article id as the page count if page field is missing
if (fetchedEntry.isPresent() && fetchedEntry.get().hasField(StandardField.DOI)) {
BibEntry entry = fetchedEntry.get();
if (isAPSJournal(entry, entry.getField(StandardField.DOI).get()) && !entry.hasField(StandardField.PAGES)) {
setPageCountToArticleId(entry, entry.getField(StandardField.DOI).get());
}
}

return fetchedEntry;
} else {
throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier));
Expand Down Expand Up @@ -123,4 +138,19 @@ public Optional<String> getAgency(DOI doi) throws IOException {

return agency;
}

private void setPageCountToArticleId(BibEntry entry, String doiAsString) {
String articleId = doiAsString.substring(doiAsString.lastIndexOf('.') + 1);
entry.setField(StandardField.PAGES, articleId);
}

// checks if the entry is an APS journal by comparing the organization id and the suffix format
private boolean isAPSJournal(BibEntry entry, String doiAsString) {
if (!entry.getType().equals(StandardEntryType.Article)) {
return false;
}
String suffix = doiAsString.substring(doiAsString.lastIndexOf('/') + 1);
String organizationId = doiAsString.substring(doiAsString.indexOf('.') + 1, doiAsString.indexOf('/'));
return organizationId.equals(APS_JOURNAL_ORG_DOI_ID) && APS_SUFFIX_PATTERN.matcher(suffix).matches();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public class DoiFetcherTest {
private BibEntry bibEntryBurd2011;
private BibEntry bibEntryDecker2007;
private BibEntry bibEntryIannarelli2019;
private BibEntry bibEntryStenzel2020;

@BeforeEach
public void setUp() {
Expand Down Expand Up @@ -68,6 +69,20 @@ public void setUp() {
.withField(StandardField.JOURNAL, "Chemical Engineering Transactions")
.withField(StandardField.PAGES, "871-876")
.withField(StandardField.VOLUME, "77");
bibEntryStenzel2020 = new BibEntry();
bibEntryStenzel2020.setType(StandardEntryType.Article);
bibEntryStenzel2020.setCitationKey("Stenzel_2020");
bibEntryStenzel2020.setField(StandardField.AUTHOR, "L. Stenzel and A. L. C. Hayward and U. Schollwöck and F. Heidrich-Meisner");
bibEntryStenzel2020.setField(StandardField.JOURNAL, "Physical Review A");
bibEntryStenzel2020.setField(StandardField.TITLE, "Topological phases in the Fermi-Hofstadter-Hubbard model on hybrid-space ladders");
bibEntryStenzel2020.setField(StandardField.YEAR, "2020");
bibEntryStenzel2020.setField(StandardField.MONTH, "aug");
bibEntryStenzel2020.setField(StandardField.VOLUME, "102");
bibEntryStenzel2020.setField(StandardField.DOI, "10.1103/physreva.102.023315");
bibEntryStenzel2020.setField(StandardField.PUBLISHER, "American Physical Society ({APS})");
bibEntryStenzel2020.setField(StandardField.PAGES, "023315");
bibEntryStenzel2020.setField(StandardField.NUMBER, "2");

}

@Test
Expand Down Expand Up @@ -108,4 +123,10 @@ public void testPerformSearchNonTrimmedDOI() throws FetcherException {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("http s://doi.org/ 10.1109 /ICWS .2007.59 ");
assertEquals(Optional.of(bibEntryDecker2007), fetchedEntry);
}

@Test
public void testAPSJournalCopiesArticleIdToPageField() throws FetcherException {
Optional<BibEntry> fetchedEntry = fetcher.performSearchById("10.1103/physreva.102.023315");
assertEquals(Optional.of(bibEntryStenzel2020), fetchedEntry);
}
}

0 comments on commit a7b05d0

Please sign in to comment.