Skip to content

Download and process #75

Download and process

Download and process #75

name: Download and process
on:
workflow_dispatch:
inputs:
colId:
description: 'Collection ID'
required: true
default: '162553'
colId2:
description: 'Collection ID 2'
required: true
default: '284592'
jobs:
build_pages:
name: Download METS files from Transkribus and convert them to TEIs
runs-on: ubuntu-latest
env:
ANT_OPTS: -Xmx5g
TR_USER: ${{ secrets.TR_USER }}
TR_PW: ${{ secrets.TR_PW}}
COL_ID: ${{ github.event.inputs.colId }}
COL_ID2: ${{ github.event.inputs.colId2 }}
steps:
- name: Perform Checkout
uses: actions/checkout@v4
- name: Directory Setup and Page2Tei Download
run: |
mkdir -p mets
mkdir -p tei
git clone --depth=1 --branch skurzinz-patch-1 --single-branch https://github.com/skurzinz/page2tei.git
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: '3.10'
- name: Download METS Files
run: |
pip install acdh-transkribus-utils
python ./scripts/fetch_mets.py
- name: Setup Java / Ant
uses: actions/setup-java@v4
with:
distribution: 'zulu'
java-version: '11'
- name: Install Ant
run: |
apt-get update && apt-get install ant -y --no-install-recommend
wget https://sourceforge.net/projects/saxon/files/Saxon-HE/9.9/SaxonHE9-9-1-7J.zip/download && unzip download -d saxon && rm -rf download
- name: Transform to TEI
run: |
ant -DCOL_ID=${COL_ID} -DCOL_ID2=${COL_ID2} -f ./build_tei.xml
- uses: stefanzweifel/git-auto-commit-action@v4
with:
commit_message: Exported and Transformed ColId ${{ github.event.inputs.colId }}