From c9b528436d212d3223f8dbea248a360598b111b6 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Sun, 18 Oct 2020 15:29:04 +0200 Subject: [PATCH 01/10] add Cleanup for copying over physcial review article id as the page number --- .../logic/cleanup/PageFieldCleanup.java | 34 +++++++++++++++++++ .../logic/importer/fetcher/DoiFetcher.java | 2 ++ 2 files changed, 36 insertions(+) create mode 100644 src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java diff --git a/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java b/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java new file mode 100644 index 00000000000..03431808616 --- /dev/null +++ b/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java @@ -0,0 +1,34 @@ +package org.jabref.logic.cleanup; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +import org.jabref.model.FieldChange; +import org.jabref.model.entry.BibEntry; +import org.jabref.model.entry.field.StandardField; + +/** + * adds the article ID of a journal as the page count, but only if the page field is empty + */ +public class PageFieldCleanup implements CleanupJob { + + @Override + public List cleanup(BibEntry entry) { + List changes = new ArrayList<>(); + Optional doiAsString = entry.getField(StandardField.DOI); + + if (doiAsString.isPresent() && !entry.hasField(StandardField.PAGES)) { + String articleId = new String(); + int index = doiAsString.get().length() - 1; + while (Character.isDigit(doiAsString.get().charAt(index))) { + articleId = doiAsString.get().charAt(index--) + articleId; + } + entry.setField(StandardField.PAGES, articleId); + FieldChange change = new FieldChange(entry, StandardField.PAGES, "", articleId); + changes.add(change); + } + + return changes; + } +} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index ae1e3ab5961..6afb54c9e09 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -7,6 +7,7 @@ import java.util.Optional; import org.jabref.logic.cleanup.FieldFormatterCleanup; +import org.jabref.logic.cleanup.PageFieldCleanup; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; import org.jabref.logic.help.HelpFile; @@ -89,6 +90,7 @@ public Optional performSearchById(String identifier) throws FetcherExc } private void doPostCleanup(BibEntry entry) { + new PageFieldCleanup().cleanup(entry); new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()).cleanup(entry); new FieldFormatterCleanup(StandardField.URL, new ClearFormatter()).cleanup(entry); } From 3856f25edd2d53f3b10fdaeeee4e926b6279f135 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Mon, 19 Oct 2020 10:27:45 +0200 Subject: [PATCH 02/10] remove PageFieldCleanup, add private methods to DoiFetcher --- .../logic/cleanup/PageFieldCleanup.java | 34 ----------------- .../logic/importer/fetcher/DoiFetcher.java | 26 ++++++++++++- .../importer/fetcher/DoiFetcherTest.java | 37 +++++++++++++++++++ 3 files changed, 61 insertions(+), 36 deletions(-) delete mode 100644 src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java diff --git a/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java b/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java deleted file mode 100644 index 03431808616..00000000000 --- a/src/main/java/org/jabref/logic/cleanup/PageFieldCleanup.java +++ /dev/null @@ -1,34 +0,0 @@ -package org.jabref.logic.cleanup; - -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; - -import org.jabref.model.FieldChange; -import org.jabref.model.entry.BibEntry; -import org.jabref.model.entry.field.StandardField; - -/** - * adds the article ID of a journal as the page count, but only if the page field is empty - */ -public class PageFieldCleanup implements CleanupJob { - - @Override - public List cleanup(BibEntry entry) { - List changes = new ArrayList<>(); - Optional doiAsString = entry.getField(StandardField.DOI); - - if (doiAsString.isPresent() && !entry.hasField(StandardField.PAGES)) { - String articleId = new String(); - int index = doiAsString.get().length() - 1; - while (Character.isDigit(doiAsString.get().charAt(index))) { - articleId = doiAsString.get().charAt(index--) + articleId; - } - entry.setField(StandardField.PAGES, articleId); - FieldChange change = new FieldChange(entry, StandardField.PAGES, "", articleId); - changes.add(change); - } - - return changes; - } -} diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index 6afb54c9e09..e27b2f1e15f 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -5,9 +5,9 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.regex.Pattern; import org.jabref.logic.cleanup.FieldFormatterCleanup; -import org.jabref.logic.cleanup.PageFieldCleanup; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; import org.jabref.logic.help.HelpFile; @@ -23,6 +23,7 @@ import org.jabref.model.entry.BibEntry; import org.jabref.model.entry.field.StandardField; import org.jabref.model.entry.identifier.DOI; +import org.jabref.model.entry.types.StandardEntryType; import org.jabref.model.util.DummyFileUpdateMonitor; import org.jabref.model.util.OptionalUtil; @@ -76,6 +77,14 @@ public Optional performSearchById(String identifier) throws FetcherExc fetchedEntry = BibtexParser.singleFromString(bibtexString, preferences, new DummyFileUpdateMonitor()); fetchedEntry.ifPresent(this::doPostCleanup); + // Check if the entry is an APS journal and add the article id as the page count if page field is missing + if (fetchedEntry.isPresent() && fetchedEntry.get().hasField(StandardField.DOI)) { + BibEntry entry = fetchedEntry.get(); + if (isAPSJournal(entry, entry.getField(StandardField.DOI).get()) && !entry.hasField(StandardField.PAGES)) { + setPageCountToArticleId(entry, entry.getField(StandardField.DOI).get()); + } + } + return fetchedEntry; } else { throw new FetcherException(Localization.lang("Invalid DOI: '%0'.", identifier)); @@ -90,7 +99,6 @@ public Optional performSearchById(String identifier) throws FetcherExc } private void doPostCleanup(BibEntry entry) { - new PageFieldCleanup().cleanup(entry); new FieldFormatterCleanup(StandardField.PAGES, new NormalizePagesFormatter()).cleanup(entry); new FieldFormatterCleanup(StandardField.URL, new ClearFormatter()).cleanup(entry); } @@ -125,4 +133,18 @@ public Optional getAgency(DOI doi) throws IOException { return agency; } + + private void setPageCountToArticleId(BibEntry entry, String doiAsString) { + String articleId = doiAsString.substring(doiAsString.lastIndexOf('.') + 1); + entry.setField(StandardField.PAGES, articleId); + } + + private boolean isAPSJournal(BibEntry entry, String doiAsString) { + if (!entry.getType().equals(StandardEntryType.Article)) { + return false; + } + Pattern apsJournalSuffixPattern = Pattern.compile("([\\w]+\\.)([\\w]+\\.)([\\w]+)"); + String suffix = doiAsString.substring(doiAsString.lastIndexOf('/') + 1); + return apsJournalSuffixPattern.matcher(suffix).matches(); + } } diff --git a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java index 70ff718193f..fa38c455391 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java @@ -24,6 +24,7 @@ public class DoiFetcherTest { private BibEntry bibEntryBurd2011; private BibEntry bibEntryDecker2007; private BibEntry bibEntryIannarelli2019; + private BibEntry bibEntryStenzel2020; @BeforeEach public void setUp() { @@ -68,6 +69,36 @@ public void setUp() { .withField(StandardField.JOURNAL, "Chemical Engineering Transactions") .withField(StandardField.PAGES, "871-876") .withField(StandardField.VOLUME, "77"); + bibEntryStenzel2020 = new BibEntry(); + bibEntryStenzel2020.setType(StandardEntryType.Article); + bibEntryStenzel2020.setCitationKey("Stenzel_2020"); + bibEntryStenzel2020.setField(StandardField.AUTHOR, "L. Stenzel and A. L. C. Hayward and U. Schollwöck and F. Heidrich-Meisner"); + bibEntryStenzel2020.setField(StandardField.JOURNAL, "Physical Review A"); + bibEntryStenzel2020.setField(StandardField.TITLE, "Topological phases in the Fermi-Hofstadter-Hubbard model on hybrid-space ladders"); + bibEntryStenzel2020.setField(StandardField.YEAR, "2020"); + bibEntryStenzel2020.setField(StandardField.MONTH, "aug"); + bibEntryStenzel2020.setField(StandardField.VOLUME, "102"); + bibEntryStenzel2020.setField(StandardField.DOI, "10.1103/physreva.102.023315"); + bibEntryStenzel2020.setField(StandardField.PUBLISHER, "American Physical Society ({APS})"); + bibEntryStenzel2020.setField(StandardField.PAGES, "023315"); + bibEntryStenzel2020.setField(StandardField.NUMBER, "2"); + + /* + * + * @Article{Stenzel2020, + author = {L. Stenzel and A. L. C. Hayward and U. Schollwöck and F. Heidrich-Meisner}, + journal = {Physical Review A}, + title = {Topological phases in the Fermi-Hofstadter-Hubbard model on hybrid-space ladders}, + year = {2020}, + month = {aug}, + number = {2}, + volume = {102}, + doi = {10.1103/physreva.102.023315}, + publisher = {American Physical Society ({APS})}, + } + + */ + } @Test @@ -108,4 +139,10 @@ public void testPerformSearchNonTrimmedDOI() throws FetcherException { Optional fetchedEntry = fetcher.performSearchById("http s://doi.org/ 10.1109 /ICWS .2007.59 "); assertEquals(Optional.of(bibEntryDecker2007), fetchedEntry); } + + @Test + public void testAPSJournalCopiesArticleIdToPageField() throws FetcherException { + Optional fetchedEntry = fetcher.performSearchById("10.1103/physreva.102.023315"); + assertEquals(Optional.of(bibEntryStenzel2020), fetchedEntry); + } } From 342cacbcad2c6f9a425e1b9accce0fa6485ce0e3 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Mon, 19 Oct 2020 10:30:19 +0200 Subject: [PATCH 03/10] remove comment from DoiFetcherTest --- .../logic/importer/fetcher/DoiFetcherTest.java | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java index fa38c455391..d824e77cc4c 100644 --- a/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java +++ b/src/test/java/org/jabref/logic/importer/fetcher/DoiFetcherTest.java @@ -83,22 +83,6 @@ public void setUp() { bibEntryStenzel2020.setField(StandardField.PAGES, "023315"); bibEntryStenzel2020.setField(StandardField.NUMBER, "2"); - /* - * - * @Article{Stenzel2020, - author = {L. Stenzel and A. L. C. Hayward and U. Schollwöck and F. Heidrich-Meisner}, - journal = {Physical Review A}, - title = {Topological phases in the Fermi-Hofstadter-Hubbard model on hybrid-space ladders}, - year = {2020}, - month = {aug}, - number = {2}, - volume = {102}, - doi = {10.1103/physreva.102.023315}, - publisher = {American Physical Society ({APS})}, - } - - */ - } @Test From e69989a44c5c15b1ac75a95d1cd585f786eb6a72 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Mon, 19 Oct 2020 16:02:49 +0200 Subject: [PATCH 04/10] replace regex with substring and string comparison --- .../org/jabref/logic/importer/fetcher/DoiFetcher.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index e27b2f1e15f..64bc738bf2d 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -5,8 +5,6 @@ import java.util.Collections; import java.util.List; import java.util.Optional; -import java.util.regex.Pattern; - import org.jabref.logic.cleanup.FieldFormatterCleanup; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; @@ -34,6 +32,9 @@ import org.slf4j.LoggerFactory; public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher { + + private static final String APS_JOURNAL_ORG_DOI_ID = "1103"; + public static final String NAME = "DOI"; private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class); @@ -143,8 +144,7 @@ private boolean isAPSJournal(BibEntry entry, String doiAsString) { if (!entry.getType().equals(StandardEntryType.Article)) { return false; } - Pattern apsJournalSuffixPattern = Pattern.compile("([\\w]+\\.)([\\w]+\\.)([\\w]+)"); - String suffix = doiAsString.substring(doiAsString.lastIndexOf('/') + 1); - return apsJournalSuffixPattern.matcher(suffix).matches(); + String organizationId = doiAsString.substring(doiAsString.indexOf('.') + 1, doiAsString.indexOf('/')); + return organizationId.equals(APS_JOURNAL_ORG_DOI_ID); } } From 1779e423bdff56193830f9d777ddf88c1cc5c760 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Mon, 19 Oct 2020 16:10:14 +0200 Subject: [PATCH 05/10] fix checkstyle issues --- .../java/org/jabref/logic/importer/fetcher/DoiFetcher.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index 64bc738bf2d..8c2b0d49b78 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -5,6 +5,7 @@ import java.util.Collections; import java.util.List; import java.util.Optional; + import org.jabref.logic.cleanup.FieldFormatterCleanup; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; import org.jabref.logic.formatter.bibtexfields.NormalizePagesFormatter; @@ -28,15 +29,16 @@ import kong.unirest.json.JSONArray; import kong.unirest.json.JSONException; import kong.unirest.json.JSONObject; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher { - private static final String APS_JOURNAL_ORG_DOI_ID = "1103"; - public static final String NAME = "DOI"; + private static final String APS_JOURNAL_ORG_DOI_ID = "1103"; + private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class); private final ImportFormatPreferences preferences; From 37042695c9688a96b7ec128942eaeef42a5f8171 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Mon, 19 Oct 2020 16:13:26 +0200 Subject: [PATCH 06/10] fix checkstyle issues --- src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index 8c2b0d49b78..07ca1dd7b82 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -29,7 +29,6 @@ import kong.unirest.json.JSONArray; import kong.unirest.json.JSONException; import kong.unirest.json.JSONObject; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; From 1853df7c6f4a5ee7ebf4c01d4e436e045009a55e Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Tue, 20 Oct 2020 11:04:30 +0200 Subject: [PATCH 07/10] add regex to check aps doi format --- .../java/org/jabref/logic/importer/fetcher/DoiFetcher.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index 07ca1dd7b82..bee371260b5 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -5,6 +5,7 @@ import java.util.Collections; import java.util.List; import java.util.Optional; +import java.util.regex.Pattern; import org.jabref.logic.cleanup.FieldFormatterCleanup; import org.jabref.logic.formatter.bibtexfields.ClearFormatter; @@ -37,6 +38,7 @@ public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher { public static final String NAME = "DOI"; private static final String APS_JOURNAL_ORG_DOI_ID = "1103"; + private static final String APS_SUFFIX = "([\\w]+\\.)([\\w]+\\.)([\\w]+)"; private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class); @@ -141,11 +143,14 @@ private void setPageCountToArticleId(BibEntry entry, String doiAsString) { entry.setField(StandardField.PAGES, articleId); } + // checks if the entry is an APS journal by comparing the organization id and the suffix format private boolean isAPSJournal(BibEntry entry, String doiAsString) { if (!entry.getType().equals(StandardEntryType.Article)) { return false; } + Pattern apsSuffixPattern = Pattern.compile(APS_SUFFIX); + String suffix = doiAsString.substring(doiAsString.lastIndexOf('/') + 1); String organizationId = doiAsString.substring(doiAsString.indexOf('.') + 1, doiAsString.indexOf('/')); - return organizationId.equals(APS_JOURNAL_ORG_DOI_ID); + return organizationId.equals(APS_JOURNAL_ORG_DOI_ID) && apsSuffixPattern.matcher(suffix).matches(); } } From 983962f3c2fa72dcaa0543b7caddfdf45db5dfcf Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Tue, 20 Oct 2020 11:42:02 +0200 Subject: [PATCH 08/10] move suffix pattern to private static field --- .../java/org/jabref/logic/importer/fetcher/DoiFetcher.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java index bee371260b5..617c0985814 100644 --- a/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java +++ b/src/main/java/org/jabref/logic/importer/fetcher/DoiFetcher.java @@ -39,6 +39,7 @@ public class DoiFetcher implements IdBasedFetcher, EntryBasedFetcher { private static final String APS_JOURNAL_ORG_DOI_ID = "1103"; private static final String APS_SUFFIX = "([\\w]+\\.)([\\w]+\\.)([\\w]+)"; + private static final Pattern APS_SUFFIX_PATTERN = Pattern.compile(APS_SUFFIX); private static final Logger LOGGER = LoggerFactory.getLogger(DoiFetcher.class); @@ -148,9 +149,8 @@ private boolean isAPSJournal(BibEntry entry, String doiAsString) { if (!entry.getType().equals(StandardEntryType.Article)) { return false; } - Pattern apsSuffixPattern = Pattern.compile(APS_SUFFIX); String suffix = doiAsString.substring(doiAsString.lastIndexOf('/') + 1); String organizationId = doiAsString.substring(doiAsString.indexOf('.') + 1, doiAsString.indexOf('/')); - return organizationId.equals(APS_JOURNAL_ORG_DOI_ID) && apsSuffixPattern.matcher(suffix).matches(); + return organizationId.equals(APS_JOURNAL_ORG_DOI_ID) && APS_SUFFIX_PATTERN.matcher(suffix).matches(); } } From a1fe707781c1e05282d06ca043a0bb61d9fa3367 Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Tue, 20 Oct 2020 12:13:38 +0200 Subject: [PATCH 09/10] add changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a1922cfff3..6be49ebbec1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We added a query parser and mapping layer to enable conversion of queries formulated in simplified lucene syntax by the user into api queries. [#6799](https://github.com/JabRef/jabref/pull/6799) - We added some basic functionality to customise the look of JabRef by importing a css theme file. [#5790](https://github.com/JabRef/jabref/issues/5790) - We added connection check function in network preference setting [#6560](https://github.com/JabRef/jabref/issues/6560) +- We added a DOI format and organization check to detect [American Physical Society](https://journals.aps.org/) journals to copy the article ID +to the page field for cases where the page numbers are missing ### Changed From 593c86d4537bcd8f0a70197a02d3bbee47f5ab9c Mon Sep 17 00:00:00 2001 From: tmrd993 Date: Tue, 20 Oct 2020 12:15:19 +0200 Subject: [PATCH 10/10] add issue link to changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6be49ebbec1..8dd344e2cad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ Note that this project **does not** adhere to [Semantic Versioning](http://semve - We added some basic functionality to customise the look of JabRef by importing a css theme file. [#5790](https://github.com/JabRef/jabref/issues/5790) - We added connection check function in network preference setting [#6560](https://github.com/JabRef/jabref/issues/6560) - We added a DOI format and organization check to detect [American Physical Society](https://journals.aps.org/) journals to copy the article ID -to the page field for cases where the page numbers are missing +to the page field for cases where the page numbers are missing. [#7019](https://github.com/JabRef/jabref/issues/7019) ### Changed