Skip to content

Commit

Permalink
Merge branch 'release/v0.1.16'
Browse files Browse the repository at this point in the history
  • Loading branch information
rhigman committed Oct 21, 2024
2 parents f1795d0 + a210076 commit edac5eb
Show file tree
Hide file tree
Showing 19 changed files with 65 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/disseminate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
with:
name: ${{ inputs.work-id }}
path: ${{ inputs.work-id }}
retention-days: 1
retention-days: 7
if-no-files-found: ignore
overwrite: false

Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [[0.1.16]](https://github.com/thoth-pub/thoth-dissemination/releases/tag/v0.1.16) - 2024-10-21
### Changed
- Finalised JSTOR upload workflow
- Upgraded dependencies: thothlibrary v0.26.2, internetarchive v4.1.0, requests v2.32.3

## [[0.1.15]](https://github.com/thoth-pub/thoth-dissemination/releases/tag/v0.1.15) - 2024-08-06
### Added
- Support for uploading files and metadata to Project MUSE, JSTOR, EBSCOHost, ProQuest (Ebook Central)
Expand Down
13 changes: 10 additions & 3 deletions config.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,24 @@ cul_pilot_user=
cul_pilot_pw=

# JSTOR FTP server credentials
# TODO not yet not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
jstor_ftp_user=
jstor_ftp_pw=

# JSTOR FTP server publisher-specific folder names
# Each folder name must be given in the format below, i.e. with the
# Thoth publisher ID suffixed (omitting the square brackets).
# Note that the hyphens (-) in the Thoth publisher ID must be replaced
# with underscores (_). This is to allow compatibility with GitHub Actions,
# and make it easier to provide credentials as inline environment variables
# (as env vars with names containing hyphens require use of `env` program).
jstor_ftp_folder_[publisher_id]=

# EBSCOHost FTP server credentials
ebsco_ftp_user=
ebsco_ftp_pw=

# ProQuest Ebook Central FTP server credentials
# TODO not yet not yet confirmed whether credentials will be per-publisher
# TODO not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
proquest_ftp_user=
proquest_ftp_pw=
Expand Down
12 changes: 9 additions & 3 deletions crossrefuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import logging
import sys
import requests
from errors import DisseminationError
from uploader import Uploader


Expand All @@ -29,9 +30,9 @@ def upload_to_platform(self):
# Check that Crossref credentials have been provided for this publisher
publisher_id = self.get_publisher_id()
try:
login_id = self.get_credential_from_env(
login_id = self.get_variable_from_env(
'crossref_user_' + publisher_id.replace('-', '_'), 'Crossref')
login_passwd = self.get_credential_from_env(
login_passwd = self.get_variable_from_env(
'crossref_pw_' + publisher_id.replace('-', '_'), 'Crossref')
except DisseminationError as error:
logging.error(error)
Expand All @@ -45,7 +46,12 @@ def upload_to_platform(self):
# DOI must not be None or deposit file request above would have failed
# (Thoth database guarantees consistent DOI URL format)
doi_prefix = doi.replace('https://doi.org/', '').split('/')[0]
doi_rsp = requests.get('{}/{}'.format(CR_PREFIX_ENDPOINT, doi_prefix))
doi_rsp = requests.get(
url='{}/{}'.format(CR_PREFIX_ENDPOINT, doi_prefix),
# Crossref REST API requests containing a mailto header get preferentially load-balanced
# (https://www.crossref.org/blog/rebalancing-our-rest-api-traffic/)
headers={'mailto': 'distribution@thoth.pub'},
)
if doi_rsp.status_code != 200:
logging.error(
'Not a valid Crossref DOI prefix: {}'.format(doi_prefix)
Expand Down
2 changes: 1 addition & 1 deletion disseminator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
and upload them in the appropriate format to various platforms.
"""

__version__ = '0.1.15'
__version__ = '0.1.16'

import argparse
import logging
Expand Down
4 changes: 2 additions & 2 deletions ebscouploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ def upload_to_platform(self):

# Check that EBSCOHost credentials have been provided
try:
username = self.get_credential_from_env('ebsco_ftp_user', 'EBSCOHost')
password = self.get_credential_from_env('ebsco_ftp_pw', 'EBSCOHost')
username = self.get_variable_from_env('ebsco_ftp_user', 'EBSCOHost')
password = self.get_variable_from_env('ebsco_ftp_pw', 'EBSCOHost')
except DisseminationError as error:
logging.error(error)
sys.exit(1)
Expand Down
2 changes: 1 addition & 1 deletion fsuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def __init__(self, work_id, export_url, client_url, version):
"""Instantiate class for accessing Figshare API."""
super().__init__(work_id, export_url, client_url, version)
try:
api_token = self.get_credential_from_env(
api_token = self.get_variable_from_env(
'figshare_token', 'Figshare')
except DisseminationError as error:
logging.error(error)
Expand Down
4 changes: 2 additions & 2 deletions iauploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ def upload_to_platform(self):

# Fast-fail if credentials for upload are missing
try:
access_key = self.get_credential_from_env(
access_key = self.get_variable_from_env(
'ia_s3_access', 'Internet Archive')
secret_key = self.get_credential_from_env(
secret_key = self.get_variable_from_env(
'ia_s3_secret', 'Internet Archive')
except DisseminationError as error:
logging.error(error)
Expand Down
19 changes: 10 additions & 9 deletions jstoruploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,31 +21,31 @@ def upload_to_platform(self):
Content required: PDF work file plus JPG cover file
Metadata required: JSTOR ONIX 3.0 export
Naming convention: Use PDF ISBN for all filename roots
Upload directory: per-publisher folder, `books` subfolder
Upload directory: per-publisher folder (named ad-hoc), `books` subfolder
"""

# Check that JSTOR credentials have been provided for this publisher
# TODO not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
# Check that JSTOR credentials and publisher folder name have been provided
publisher_id = self.get_publisher_id()
try:
username = self.get_credential_from_env('jstor_ftp_user', 'JSTOR')
password = self.get_credential_from_env('jstor_ftp_pw', 'JSTOR')
username = self.get_variable_from_env('jstor_ftp_user', 'JSTOR')
password = self.get_variable_from_env('jstor_ftp_pw', 'JSTOR')
publisher_dir = self.get_variable_from_env(
'jstor_ftp_folder_' + publisher_id.replace('-', '_'), 'JSTOR')
except DisseminationError as error:
logging.error(error)
sys.exit(1)

filename = self.get_isbn('PDF')
publisher_dir = 'TBD' # TODO
collection_dir = 'books'

metadata_bytes = self.get_formatted_metadata('onix_3.0::jstor')
# Only .jpg cover files are supported
cover_bytes = self.get_cover_image('jpg')
pdf = self.get_publication_details('PDF').bytes
pdf = self.get_publication_details('PDF')
files = [
('{}.xml'.format(filename), BytesIO(metadata_bytes)),
('{}.jpg'.format(filename), BytesIO(cover_bytes)),
('{}.pdf'.format(filename), pdf_bytes),
('{}{}'.format(filename, pdf.file_ext), BytesIO(pdf.bytes)),
]

try:
Expand All @@ -55,6 +55,7 @@ def upload_to_platform(self):
host='ftp.jstor.org',
username=username,
password=password,
port=2222,
cnopts=cnopts,
) as sftp:
try:
Expand Down
4 changes: 2 additions & 2 deletions museuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@ def upload_to_platform(self):
# Check that Project MUSE credentials have been provided for this publisher
publisher_id = self.get_publisher_id()
try:
username = self.get_credential_from_env(
username = self.get_variable_from_env(
'muse_ftp_user_' + publisher_id.replace('-', '_'), 'Project MUSE')
password = self.get_credential_from_env(
password = self.get_variable_from_env(
'muse_ftp_pw_' + publisher_id.replace('-', '_'), 'Project MUSE')
except DisseminationError as error:
logging.error(error)
Expand Down
4 changes: 2 additions & 2 deletions oapenuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ def upload_to_platform(self):

# Fast-fail if credentials for upload are missing
try:
user = self.get_credential_from_env('oapen_ftp_user', 'OAPEN')
passwd = self.get_credential_from_env('oapen_ftp_pw', 'OAPEN')
user = self.get_variable_from_env('oapen_ftp_user', 'OAPEN')
passwd = self.get_variable_from_env('oapen_ftp_pw', 'OAPEN')
except DisseminationError as error:
logging.error(error)
sys.exit(1)
Expand Down
6 changes: 3 additions & 3 deletions proquestuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def upload_to_platform(self):
# TODO not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
try:
username = self.get_credential_from_env('proquest_ftp_user', 'ProQuest Ebook Central')
password = self.get_credential_from_env('proquest_ftp_pw', 'ProQuest Ebook Central')
username = self.get_variable_from_env('proquest_ftp_user', 'ProQuest Ebook Central')
password = self.get_variable_from_env('proquest_ftp_pw', 'ProQuest Ebook Central')
except DisseminationError as error:
logging.error(error)
sys.exit(1)
Expand Down Expand Up @@ -88,7 +88,7 @@ def upload_to_platform(self):
cnopts=cnopts,
) as sftp:
try:
sftp.cwd(collection_dir)
sftp.cwd(root_dir)
except FileNotFoundError:
logging.error(
'Could not find folder "upload" on ProQuest Ebook Central SFTP server')
Expand Down
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
internetarchive==3.0.2
internetarchive==4.1.0
pysftp==0.2.9
python-dotenv==0.19.2
requests==2.31.0
requests==2.32.3
sword2==0.3.0
thothlibrary==0.26.0
thothlibrary==0.26.2
4 changes: 2 additions & 2 deletions requirements_obtain_new_ids.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
internetarchive==3.0.2
thothlibrary==0.26.0
internetarchive==4.1.0
thothlibrary==0.26.2
2 changes: 1 addition & 1 deletion requirements_write_locations.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
thothlibrary==0.26.0
thothlibrary==0.26.2
4 changes: 2 additions & 2 deletions souploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def upload_to_platform(self):

# Fast-fail if credentials for upload are missing
try:
username = self.get_credential_from_env(
username = self.get_variable_from_env(
'so_ftp_user', 'ScienceOpen')
password = self.get_credential_from_env('so_ftp_pw', 'ScienceOpen')
password = self.get_variable_from_env('so_ftp_pw', 'ScienceOpen')
except DisseminationError as error:
logging.error(error)
sys.exit(1)
Expand Down
4 changes: 2 additions & 2 deletions swordv2uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,9 @@ def __init__(
"""Create connection to SWORD v2 endpoint"""
super().__init__(work_id, export_url, client_url, version)
try:
user_name = self.get_credential_from_env(
user_name = self.get_variable_from_env(
user_name_string, 'SWORD v2')
user_pass = self.get_credential_from_env(
user_pass = self.get_variable_from_env(
user_pass_string, 'SWORD v2')
except DisseminationError as error:
logging.error(error)
Expand Down
12 changes: 6 additions & 6 deletions uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ def get_data_from_url(url, expected_format=None):
url, url_content.text, url_content.status_code))

@staticmethod
def get_credential_from_env(credential_name, platform_name):
"""Retrieve specified credential from the environment"""
def get_variable_from_env(variable_name, platform_name):
"""Retrieve specified variable from the environment"""

credential = environ.get(credential_name)
variable = environ.get(variable_name)

if credential is None or len(credential) < 1:
if variable is None or len(variable) < 1:
raise DisseminationError(
'Error uploading to {}: missing credential {}'.format(platform_name, credential_name))
'Error uploading to {}: missing value for {}'.format(platform_name, variable_name))
else:
return credential
return variable
2 changes: 1 addition & 1 deletion zenodouploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, work_id, export_url, client_url, version):
"""Instantiate class for accessing Zenodo API."""
super().__init__(work_id, export_url, client_url, version)
try:
api_token = self.get_credential_from_env(
api_token = self.get_variable_from_env(
'zenodo_token', 'Zenodo')
except DisseminationError as error:
logging.error(error)
Expand Down

0 comments on commit edac5eb

Please sign in to comment.