From e295ca298af5a6f9d718c9384df07fb7c7644b38 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sat, 15 Oct 2022 18:07:26 +1100 Subject: [PATCH 01/12] Sample Push to See GH Actions --- update_mathscinet.py | 1 + 1 file changed, 1 insertion(+) diff --git a/update_mathscinet.py b/update_mathscinet.py index 384f0bd..a1c7da6 100755 --- a/update_mathscinet.py +++ b/update_mathscinet.py @@ -19,3 +19,4 @@ # Save the end file in the same path as the old one df.to_csv(file_out, sep=";", escapechar="\\", index=False, header=False) + From 6d94f12d2666727237c8e78d0823e406983f0bef Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sat, 15 Oct 2022 18:11:47 +1100 Subject: [PATCH 02/12] Fix Actions --- LICENSE.md | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/LICENSE.md b/LICENSE.md index 7682507..3784e18 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -2,7 +2,7 @@ CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. -### Statement of Purpose +## Statement of Purpose The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). diff --git a/README.md b/README.md index 623a4e7..b90f552 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,6 @@ In case of duplicate appearances in the journal lists, the last occuring abbrevi * Frontend: * API: -It takes the official list of ISO4 abbreviations of single words, plus the general rules defined in the ISO4 specifications to deduce the abbreviation for any journal name you input. +It takes the official list of ISO4 abbreviations of single words, plus the general rules defined in the ISO4 specifications to deduce the abbreviation for any journal name you input. Could be an alternative or complementary (when missing in the lists) approach to abbreviate journal names. But of course, it does not handle unabbreviation, for which there is no alternative to lists. It can also be a way to check the consistency of existing lists and it might make sense to link to the frontend on the abbrv.jabref website, so that people who want to add abbreviations can check for the correct one. From 65e2a291a5afd62f9b5cb18b68c585381734e4a7 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sat, 15 Oct 2022 18:56:23 +1100 Subject: [PATCH 03/12] Added Core Ampersands Checker --- .github/workflows/tests.yml | 7 +++++++ check_ampersands.py | 18 ++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 check_ampersands.py diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d2e15a7..063d6bf 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -18,3 +18,10 @@ jobs: with: config: './.markdownlint.yml' args: . + + journal-check: + name: Check Ampersands Un-escaped + runs-on: ubuntu-latest + steps: + - name: Run Python Ampersands Script + run: python3 check_ampersands.py \ No newline at end of file diff --git a/check_ampersands.py b/check_ampersands.py new file mode 100644 index 0000000..60181e5 --- /dev/null +++ b/check_ampersands.py @@ -0,0 +1,18 @@ +#!/usr/bin/env python3 + +import os +import itertools + +path = "./journals/" + +fileNames = next(itertools.islice(os.walk(path), 0, None))[2] + +for file in fileNames: + if (file.endswith(".csv")): + with open(path + file, "r") as f: + if ('\&' in f.read()): + raise ValueError("Found an escaped Ampersand in: " + file) + + + + From 8cde5d33100f00161537c9f151201d0b35dbd98f Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sat, 15 Oct 2022 19:00:19 +1100 Subject: [PATCH 04/12] Running Python After Checkout --- .github/workflows/tests.yml | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 063d6bf..26cf3a6 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ on: # always run on pull requests jobs: markdown-check: - name: Check markdown + name: Check markdown and CSV formatting runs-on: ubuntu-latest steps: - name: Checkout source @@ -18,10 +18,6 @@ jobs: with: config: './.markdownlint.yml' args: . - - journal-check: - name: Check Ampersands Un-escaped - runs-on: ubuntu-latest - steps: - name: Run Python Ampersands Script - run: python3 check_ampersands.py \ No newline at end of file + run: python3 check_ampersands.py + \ No newline at end of file From 7621b14f7c97f74cf83a4c794db9855798e01c49 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sun, 16 Oct 2022 10:00:05 +1100 Subject: [PATCH 05/12] Added positional info to ValueError() exception --- check_ampersands.py | 50 +++++++++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/check_ampersands.py b/check_ampersands.py index 60181e5..dbab104 100644 --- a/check_ampersands.py +++ b/check_ampersands.py @@ -1,18 +1,50 @@ #!/usr/bin/env python3 +""" +Python script for checking if all Ampersands in .csv journal abbreviation files are +unescaped. This convention is enforced to ensure that abbreviations of journal titles +can be done without error. + +The script will raise a ValueError() in case escaped ampersands are found, and will +also provide the row and column in which they were found (1 -indexed). The script does +NOT automatically fix these errors. This should be done manually. + +The script will automatically run whenever there is a push to the main branch of the +abbreviations repo (abbrv.jabref.org) using GitHub Actions. +""" + import os import itertools -path = "./journals/" +# Get all file names in journal folders +PATH_TO_JOURNALS = "./journals/" +fileNames = next(itertools.islice(os.walk(PATH_TO_JOURNALS), 0, None))[2] -fileNames = next(itertools.islice(os.walk(path), 0, None))[2] +# Store ALL locations of escaped ampersands so they can all be printed upon failure +errFileNames = [] +errRows = [] +errCols = [] for file in fileNames: if (file.endswith(".csv")): - with open(path + file, "r") as f: - if ('\&' in f.read()): - raise ValueError("Found an escaped Ampersand in: " + file) - - - - + # For each .csv file in the folder, open in read mode + with open(PATH_TO_JOURNALS + file, "r") as f: + for i, line in enumerate(f): + # For each line, if it has \&, store the fname, row and columns + if ('\&' in line): + errFileNames.append(file) + errRows.append(i + 1) + errCols.append([index + 1 for index in range(len(line)) if line.startswith('\&', index)]) + + +# In the case where we do find escaped &, the len() will be non-zero +if (len(errFileNames) > 0): + err_msg = "[" + # For each file, append every row:col location to the error message + for i, fname in enumerate(errFileNames): + for col in errCols[i]: + err_msg += "("+ fname + ", " + str(errRows[i]) + ":" + str(col) + "), " + # Format end of string and return as Value Error to 'fail' GitHub Actions process + err_msg = err_msg[:len(err_msg) - 2] + err_msg += "]" + raise ValueError("Found Escaped Ampersands at: " + err_msg) \ No newline at end of file From dd3581722f9df60b257dd10bf5141fac1e268d38 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sun, 16 Oct 2022 10:13:44 +1100 Subject: [PATCH 06/12] Fixed \& + Updated CHANGELOG for #107 --- CHANGELOG.md | 10 ++++++++++ journals/journal_abbreviations_dainst.csv | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2afa8c..2e04fe2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,4 +7,14 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). Initial tagged release +## 2022-10 +Added Escaped Ampersands Checker +### Added +- check_ampersands.py which checks all csv journals in the journals folder to make +sure all instances of ampersands are unescaped +### Changed +- `.github/workflows/tests.yml` added the above script to the GitHub workflow so the check runs every time the main branch is pushed to +- Minor format changes in `README.md` and `LISENSE.md` as the old GitHub actions check was already failing +- Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended + diff --git a/journals/journal_abbreviations_dainst.csv b/journals/journal_abbreviations_dainst.csv index 1cde5d5..2a4634d 100644 --- a/journals/journal_abbreviations_dainst.csv +++ b/journals/journal_abbreviations_dainst.csv @@ -55,7 +55,7 @@ Acta Universitatis Nicolai Copernici. Archaeologia;ActaTorunA Acta Universitatis Nicolai Copernici. Historia;ActaTorunHist Antike Denkmäler;AD Abhandlungen des Deutschen Archäologischen Instituts, Abteilung Kairo;ADAIK -Adalya. Annual of the Suna \& Inan Kiraç-Research Institute on Mediterranean Civilizations;Adalya +Adalya. Annual of the Suna & Inan Kiraç-Research Institute on Mediterranean Civilizations;Adalya Αρχαιολογικόν Δελτίον (Μελέτες);ADelt A Αρχαιολογικόν Δελτίον (Χρονικά);Adelt B Arkeoloji dergisi. Ege Üniversitesi Edebiyat Fakültesi;ADerg From a286113755d07145c333bf123ce7daf890d26646 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sun, 16 Oct 2022 10:19:23 +1100 Subject: [PATCH 07/12] Fixed Header Formatting --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2e04fe2..fe04d14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,11 +8,16 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). Initial tagged release ## 2022-10 + Added Escaped Ampersands Checker + ### Added + - check_ampersands.py which checks all csv journals in the journals folder to make sure all instances of ampersands are unescaped + ### Changed + - `.github/workflows/tests.yml` added the above script to the GitHub workflow so the check runs every time the main branch is pushed to - Minor format changes in `README.md` and `LISENSE.md` as the old GitHub actions check was already failing - Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended From d002b38e38f7a5f14baea314fec2954e23ae38f4 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Sun, 16 Oct 2022 10:21:02 +1100 Subject: [PATCH 08/12] Remove Trailing Spaces --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fe04d14..9abd957 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,10 +16,10 @@ Added Escaped Ampersands Checker - check_ampersands.py which checks all csv journals in the journals folder to make sure all instances of ampersands are unescaped -### Changed +### Changed - `.github/workflows/tests.yml` added the above script to the GitHub workflow so the check runs every time the main branch is pushed to - Minor format changes in `README.md` and `LISENSE.md` as the old GitHub actions check was already failing -- Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended +- Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended From 0c3c31caa17986722ac8c743e8aa4ad7a6716ca4 Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Tue, 25 Oct 2022 08:51:49 +1100 Subject: [PATCH 09/12] Fixed Changelog + Attempt to Separate Jobs --- .github/workflows/tests.yml | 9 ++++++++- CHANGELOG.md | 10 ++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 26cf3a6..6b19ecd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -8,7 +8,7 @@ on: # always run on pull requests jobs: markdown-check: - name: Check markdown and CSV formatting + name: Check Markdown runs-on: ubuntu-latest steps: - name: Checkout source @@ -18,6 +18,13 @@ jobs: with: config: './.markdownlint.yml' args: . + + ampersands-check: + name: Check Ampersands are Unescaped + runs-on: ubuntu-latest + steps: + - name: Checkout source + uses: actions/checkout@v2 - name: Run Python Ampersands Script run: python3 check_ampersands.py \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 9abd957..1c59a9f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,10 +3,6 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## 2021-09 - -Initial tagged release - ## 2022-10 Added Escaped Ampersands Checker @@ -22,4 +18,10 @@ sure all instances of ampersands are unescaped - Minor format changes in `README.md` and `LISENSE.md` as the old GitHub actions check was already failing - Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended + +## 2021-09 + +Initial tagged release + + From 527fe01299783fb6a694859d202d31ef6d096b2a Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Wed, 26 Oct 2022 15:04:40 +1100 Subject: [PATCH 10/12] Added Unreleased Tag - Format to Check --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1c59a9f..c78deaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,8 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## [UNRELEASED] + ## 2022-10 Added Escaped Ampersands Checker From ff61d3fe2a539df804c4b1126d0d1ca11535924f Mon Sep 17 00:00:00 2001 From: Akshat Jain Date: Thu, 27 Oct 2022 23:09:18 +1100 Subject: [PATCH 11/12] Fixed Changelog --- CHANGELOG.md | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c78deaf..7dd61c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,11 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## [UNRELEASED] - ## 2022-10 -Added Escaped Ampersands Checker - ### Added +- Added Escaped Ampersands Checker - check_ampersands.py which checks all csv journals in the journals folder to make sure all instances of ampersands are unescaped @@ -24,6 +21,4 @@ sure all instances of ampersands are unescaped ## 2021-09 Initial tagged release - - From 46856e1ac5568a7cd4ae3ecf5300529467b7528f Mon Sep 17 00:00:00 2001 From: Oliver Kopp Date: Thu, 27 Oct 2022 22:51:59 +0200 Subject: [PATCH 12/12] Fix CHANGELOG --- CHANGELOG.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dd61c9..c8aee22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,22 +3,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). -## 2022-10 +## [Unreleased] ### Added -- Added Escaped Ampersands Checker -- check_ampersands.py which checks all csv journals in the journals folder to make +- Added checker "Escaped Ampersands": `check_ampersands.py` which checks all csv journals in the journals folder to make sure all instances of ampersands are unescaped ### Changed -- `.github/workflows/tests.yml` added the above script to the GitHub workflow so the check runs every time the main branch is pushed to +- `.github/workflows/tests.yml` contains the script `check_ampersands.py` - Minor format changes in `README.md` and `LISENSE.md` as the old GitHub actions check was already failing - Found an escaped ampersands using the new script in `journal_abbreviations_dainst.csv` so this was ammended - ## 2021-09 Initial tagged release + + +[Unreleased]: https://github.com/JabRef/abbrv.jabref.org/compare/2021-09...main