Skip to content

Commit

Permalink
Merge branch 'release/v0.1.15'
Browse files Browse the repository at this point in the history
  • Loading branch information
rhigman committed Aug 6, 2024
2 parents 4566d23 + d367b17 commit f1795d0
Show file tree
Hide file tree
Showing 16 changed files with 520 additions and 50 deletions.
12 changes: 8 additions & 4 deletions .github/workflows/bulk_disseminate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ on:
type: string

jobs:
run-script:
obtain-new-ids:
runs-on: ubuntu-latest
outputs:
NEW_IDS: ${{ steps.get-ids.outputs.NEW_IDS }}
Expand All @@ -42,10 +42,14 @@ jobs:
ENV_EXCEPTIONS: ${{ inputs.env_exceptions }}

bulk-disseminate:
needs: run-script
if: needs.run-script.outputs.NEW_IDS != '[]'
needs: obtain-new-ids
if: needs.obtain-new-ids.outputs.NEW_IDS != '[]'
strategy:
fail-fast: false
matrix:
work-id: ${{ fromJSON(needs.obtain-new-ids.outputs.NEW_IDS) }}
uses: ./.github/workflows/disseminate.yml
with:
work-ids: ${{ needs.run-script.outputs.NEW_IDS }}
work-id: ${{ matrix.work-id }}
platform: ${{ inputs.platform }}
secrets: inherit
22 changes: 7 additions & 15 deletions .github/workflows/disseminate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ name: disseminate
on:
workflow_call:
inputs:
work-ids:
work-id:
required: true
type: string
platform:
Expand All @@ -17,10 +17,6 @@ jobs:
runs-on: ubuntu-latest
container:
image: openbookpublishers/thoth-dissemination:latest
strategy:
fail-fast: false
matrix:
work-id: ${{ fromJSON(inputs.work-ids) }}
steps:
- name: Make all platform credentials available to later steps (with names lowercased)
uses: oNaiPs/secrets-to-env-action@v1.5
Expand All @@ -31,16 +27,16 @@ jobs:
- name: Run disseminator using Dockerhub image
run: |
/disseminator.py \
--work ${{ matrix.work-id }} \
--work ${{ inputs.work-id }} \
--platform ${{ inputs.platform }} \
> ${{ matrix.work-id }}
> ${{ inputs.work-id }}
- name: Upload output to artifact
uses: actions/upload-artifact@v4
if: contains(fromJSON('["InternetArchive", "CUL", "Figshare", "Zenodo"]'), inputs.platform)
with:
name: ${{ matrix.work-id }}
path: ${{ matrix.work-id }}
name: ${{ inputs.work-id }}
path: ${{ inputs.work-id }}
retention-days: 1
if-no-files-found: ignore
overwrite: false
Expand All @@ -49,10 +45,6 @@ jobs:
runs-on: ubuntu-latest
needs: disseminate
if: contains(fromJSON('["InternetArchive", "CUL", "Figshare", "Zenodo"]'), inputs.platform)
strategy:
fail-fast: false
matrix:
work-id: ${{ fromJSON(inputs.work-ids) }}
steps:
- name: Checkout
# This step deletes existing directory contents, so must be done before artifact download
Expand All @@ -61,7 +53,7 @@ jobs:
- name: Download disseminator output artifact containing location info
uses: actions/download-artifact@v4
with:
name: ${{ matrix.work-id }}
name: ${{ inputs.work-id }}

- name: Set up Python
uses: actions/setup-python@v5
Expand All @@ -72,7 +64,7 @@ jobs:
run: pip install -r requirements_write_locations.txt

- name: Write locations to Thoth using Python script
run: python write_locations.py ${{ matrix.work-id }}
run: python write_locations.py ${{ inputs.work-id }}
env:
THOTH_EMAIL: ${{ secrets.THOTH_EMAIL }}
THOTH_PWD: ${{ secrets.THOTH_PWD }}
6 changes: 5 additions & 1 deletion .github/workflows/manual_disseminate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@ on:

jobs:
manual_disseminate:
strategy:
fail-fast: false
matrix:
work-id: ${{ fromJSON(github.event.inputs.workIds) }}
uses: ./.github/workflows/disseminate.yml
with:
work-ids: ${{ github.event.inputs.workIds }}
work-id: ${{ matrix.work-id }}
platform: ${{ github.event.inputs.platform }}
secrets: inherit
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [[0.1.15]](https://github.com/thoth-pub/thoth-dissemination/releases/tag/v0.1.15) - 2024-08-06
### Added
- Support for uploading files and metadata to Project MUSE, JSTOR, EBSCOHost, ProQuest (Ebook Central)
### Fixed
- Minor fixes/improvements to GitHub Actions (job dependencies, environment variables)

## [[0.1.14]](https://github.com/thoth-pub/thoth-dissemination/releases/tag/v0.1.14) - 2024-07-24
### Changed
- Upgraded thothlibrary dependency to release v0.26.0 (includes improved error handling)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,6 @@ docker run --rm --env-file config.env openbookpublishers/thoth-dissemination:lat
### Options
`--work` = Thoth ID of work to be disseminated

`--platform` = Destination distribution/archiving platform (one of `InternetArchive`, `OAPEN`, `ScienceOpen`, `CUL`, `Crossref`, `Figshare`, `Zenodo`)
`--platform` = Destination distribution/archiving platform (one of `InternetArchive`, `OAPEN`, `ScienceOpen`, `CUL`, `Crossref`, `Figshare`, `Zenodo`, `ProjectMUSE`, `JSTOR`, `EBSCOHost`, `ProQuest`)

See also `--help`.
27 changes: 27 additions & 0 deletions config.env.template
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,33 @@ oapen_ftp_pw=
cul_pilot_user=
cul_pilot_pw=

# JSTOR FTP server credentials
# TODO not yet not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
jstor_ftp_user=
jstor_ftp_pw=

# EBSCOHost FTP server credentials
ebsco_ftp_user=
ebsco_ftp_pw=

# ProQuest Ebook Central FTP server credentials
# TODO not yet not yet confirmed whether credentials will be per-publisher
# or a single Thoth user
proquest_ftp_user=
proquest_ftp_pw=

# Project MUSE FTP server credentials
# Separate credentials must be provided for each publisher whose works are to be submitted.
# Each set of credentials must be given in the format below, i.e. with the
# Thoth publisher ID suffixed (omitting the square brackets).
# Note that the hyphens (-) in the Thoth publisher ID must be replaced
# with underscores (_). This is to allow compatibility with GitHub Actions,
# and make it easier to provide credentials as inline environment variables
# (as env vars with names containing hyphens require use of `env` program).
muse_ftp_user_[publisher_id]=
muse_ftp_pw_[publisher_id]=

# Crossref user credentials
# Separate credentials must be provided for each publisher whose works are to be submitted.
# Each set of credentials must be given in the format below, i.e. with the
Expand Down
10 changes: 9 additions & 1 deletion disseminator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
and upload them in the appropriate format to various platforms.
"""

__version__ = '0.1.14'
__version__ = '0.1.15'

import argparse
import logging
Expand All @@ -20,6 +20,10 @@
from crossrefuploader import CrossrefUploader
from fsuploader import FigshareUploader
from zenodouploader import ZenodoUploader
from museuploader import MUSEUploader
from jstoruploader import JSTORUploader
from ebscouploader import EBSCOUploader
from proquestuploader import ProquestUploader

UPLOADERS = {
"InternetArchive": IAUploader,
Expand All @@ -29,6 +33,10 @@
"Crossref": CrossrefUploader,
"Figshare": FigshareUploader,
"Zenodo": ZenodoUploader,
"ProjectMUSE": MUSEUploader,
"JSTOR": JSTORUploader,
"EBSCOHost": EBSCOUploader,
"ProQuest": ProquestUploader,
}

UPLOADERS_STR = ', '.join("%s" % (key) for (key, _) in UPLOADERS.items())
Expand Down
101 changes: 101 additions & 0 deletions ebscouploader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
Retrieve and disseminate files and metadata to EBSCOHost
"""

import logging
import sys
import pysftp
from datetime import date
from io import BytesIO
from errors import DisseminationError
from uploader import Uploader


class EBSCOUploader(Uploader):
"""Dissemination logic for EBSCOHost"""

def upload_to_platform(self):
"""
Upload work in required format to EBSCOHost.
Content required: PDF and/or EPUB work file
Metadata required: EBSCOHost ONIX 2.1 export
Naming convention: Use "corresponding eISBN" for content filename roots
(can be either PDF or EPUB as long as both are in ONIX)
Metadata filename not strictly controlled; date recommended
Upload directory: TBC # TODO
"""

# Check that EBSCOHost credentials have been provided
try:
username = self.get_credential_from_env('ebsco_ftp_user', 'EBSCOHost')
password = self.get_credential_from_env('ebsco_ftp_pw', 'EBSCOHost')
except DisseminationError as error:
logging.error(error)
sys.exit(1)

filename = None
files = []

# Can't continue if neither PDF nor EPUB file is present
pdf_error = None
epub_error = None
try:
pdf = self.get_publication_details('PDF')
filename = self.get_isbn('PDF')
files.append(('{}{}'.format(filename, pdf.file_ext), BytesIO(pdf.bytes)))
except DisseminationError as error:
pdf_error = error
try:
epub = self.get_publication_details('EPUB')
# Default to using PDF ISBN for filename unless no PDF is present
if not filename:
filename = self.get_isbn('EPUB')
files.append(('{}{}'.format(filename, epub.file_ext), BytesIO(epub.bytes)))
except DisseminationError as error:
epub_error = error
if pdf_error and epub_error:
logging.error(pdf_error)
logging.error(epub_error)
sys.exit(1)

metadata_bytes = self.get_formatted_metadata('onix_2.1::ebsco_host')
files.append(('{}_{}.xml'.format(filename, date.today().isoformat()),
BytesIO(metadata_bytes)))

try:
cnopts = pysftp.CnOpts()
cnopts.hostkeys = None
with pysftp.Connection(
host='sftp.epnet.com',
username=username,
password=password,
cnopts=cnopts,
) as sftp:
for file in files:
try:
sftp.putfo(flo=file[1], remotepath=file[0])
except TypeError as error:
logging.error(
'Error uploading to EBSCOHost SFTP server: {}'.format(error))
# Attempt to delete any partially-uploaded items
# (not confirmed whether EBSCOHost system automatically begins
# processing on upload - cf museuploader)
for file in files:
try:
sftp.remove(file[0])
except FileNotFoundError:
pass
sys.exit(1)
except pysftp.AuthenticationException as error:
logging.error(
'Could not connect to EBSCOHost SFTP server: {}'.format(error))
sys.exit(1)

logging.info('Successfully uploaded to EBSCOHost SFTP server')

def parse_metadata(self):
"""Convert work metadata into EBSCOHost format"""
# Not required for EBSCOHost - only the metadata file is required
pass
6 changes: 3 additions & 3 deletions fsuploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ def create_article(self, metadata, project_id):
'location'], json_body=metadata)
except DisseminationError as error:
raise DisseminationError(
'Creating article failed: {}'.format(error))
'Creating article failed: {} ({})'.format(error, metadata.get('title')))
# Derive 'entity_id' from 'location'
article_id = article_url.split('/')[-1]
# Figshare default behaviour (confirmed under support ticket #438719)
Expand Down Expand Up @@ -495,7 +495,7 @@ def publish_article(self, article_id):
self.issue_request('POST', url, 201)
except DisseminationError as error:
raise DisseminationError(
'Publishing article failed: {}'.format(error))
'Publishing article {} failed: {}'.format(article_id, error))

def clean_up(self, project_id):
"""
Expand Down Expand Up @@ -531,7 +531,7 @@ def upload_file(self, file_bytes, file_name, article_id):
# Check that the data was processed successfully.
return self.check_upload_status(file_url)
except DisseminationError as error:
raise DisseminationError('Uploading file failed: {}'.format(error))
raise DisseminationError('Uploading file failed: {} ({})'.format(error, file_name))

def initiate_new_upload(self, article_id, file_bytes, file_name):
"""
Expand Down
Loading

0 comments on commit f1795d0

Please sign in to comment.