Skip to content

add mets, cleaning

add mets, cleaning #24

Workflow file for this run

name: gtrepo
on:
push:
tags:
- 'v[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch:
inputs:
tag-name:
description: Name of the release tag
jobs:
job1:
name: uniTest
runs-on: ubuntu-latest
permissions:
checks: write
contents: write
# Map a step output to a job output
outputs:
output1: ${{ steps.step4.outputs.test }}
output2: ${{ steps.step4.outputs.test2 }}
steps:
- name: Git checkout
id: step1
uses: actions/checkout@v4
# Installation Styles and Saxon
- name: install analyse xsl-styles
id: step2
run: |
git clone https://github.com/tboenig/gt-repo-scripts.git
mv gt-repo-scripts/scripts scripts/
rm -r gt-repo-scripts
- name: Download and install saxon
id: step3
run: |
wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip
unzip SaxonHE12-3J.zip
# Installation and Directories
- name: make gh-pages_out
run: mkdir ghout
- name: transform METADATA.yml to METADATA.json
uses: mikefarah/yq@master
with:
cmd: |
yq -o=json METADATA.yml > METADATA.json
- name: PathTest
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_unitTest.xsl \
output=unitTest1 \
-s:scripts/gt-overview_unitTest.xsl -o:ghout/pathtest.md
shell: bash
# Test GT-Page Folder Repo Structure
- name: Empty
id: step4
run: |
[ -s ghout/pathtest.md ] || echo "test=empty" >> $GITHUB_OUTPUT
[ ! -s ghout/pathtest.md ] || echo "test2=full" >> $GITHUB_OUTPUT
# Error Logview
- name: uniTestError
id: step5
if: ${{steps.step4.outputs.test2 == 'full'}}
run: |
less ghout/pathtest.md
job2:
name: analyse_and_makebagit
needs: job1
if: ${{needs.job1.outputs.output1 == 'empty'}}
runs-on: ubuntu-latest
permissions:
checks: write
contents: write
steps:
- name: Using tag name from ref name
if: github.event.inputs.tag-name == ''
run: echo "TAG_NAME=$GITHUB_REF_NAME" >> $GITHUB_ENV
- name: Using tag name from input param
if: github.event.inputs.tag-name != ''
run: echo "TAG_NAME=${{ github.event.inputs.tag-name}}" >> $GITHUB_ENV
- name: Git checkout
uses: actions/checkout@v4
# Installation Styles
- name: install analyse xsl-styles
run: |
git clone https://github.com/tboenig/gt-repo-scripts.git
mv gt-repo-scripts/scripts scripts/
rm -r gt-repo-scripts
# Transfer megarules.xml
- name: install megarules.xml
run: |
git clone --branch gh-pages --single-branch https://github.com/OCR-D/gt-MufiLevelRules.git
mv gt-MufiLevelRules/rules/megalevelrules.xml scripts/megalevelrules.xml
rm -r gt-MufiLevelRules
# Installation GT-Labelling Documentation
- name: install labeling
run: |
git clone https://github.com/tboenig/gt-guidelines.git
# Installation and Directories
- name: install jq
run: sudo apt-get install jq
- name: Download and install saxon
run: |
wget https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE12-3/SaxonHE12-3J.zip
unzip SaxonHE12-3J.zip
- name: make metadata_out
run: mkdir metadata_out
- name: make ocrdzip_out
run: mkdir ocrdzip_out
- name: make gh-pages_out
run: mkdir ghout
- name: make readme_out
run: sh scripts/readmefolder.sh
- name: readme.xml file
run: sh scripts/xreadme.sh
# Transformation and analyzing
- name: transform METADATA.yml to METADATA.json
uses: mikefarah/yq@master
with:
cmd: |
yq -o=json METADATA.yml > METADATA.json
- name: transform METADATA and make GT-Overview
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METADATA repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:ghout/metadata.md
shell: bash
- name: make Compressed table view
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=TABLE repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/table.md
shell: bash
- name: detailed table view
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=OVERVIEW repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:ghout/overview.md
shell: bash
- name: leveling the volume and documents
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-level_parser.xsl \
repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-level_parser.xsl -o:ghout/overview-level.md
shell: bash
- name: generate mets.sh
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METS repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:scripts/mets.sh
shell: bash
- name: generate Metadata JSON file
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METAJSON repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/metadata_l.json
shell: bash
- name: format json file and copy to gh branch
run: |
jq '.' metadata_out/metadata_l.json > metadata_out/metadata.json
cp metadata_out/metadata.json ghout/
rm metadata_out/metadata_l.json
- name: generate README
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=README repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY \
-s:scripts/gt-overview_metadata.xsl -o:README.md
shell: bash
- name: generate METADATA_htr_united.yml
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-metadata_htr_united.xsl repoName=${{ github.event.repository.name }} \
-s:scripts/gt-metadata_htr_united.xsl
shell: bash
- name: generate METS Volume File
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSvolume repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:metadata_out/mets.xml
shell: bash
- name: generate release download List
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=download repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:ghout/download.txt
shell: bash
- name: delete fileGrp DEFAULT
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=METSdefault repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl
shell: bash
- name: generate CITATION.cff
run: |
java -jar saxon-he-12.3.jar -xsl:scripts/gt-overview_metadata.xsl \
output=CITATION repoBase=${{ env.TAG_NAME }} repoName=$GITHUB_REPOSITORY bagitDumpNum=$GITHUB_RUN_NUMBER releaseTag=${{ env.TAG_NAME }} \
-s:scripts/gt-overview_metadata.xsl -o:rawCITATION.cff
shell: bash
- name: formating CITATION.cff
uses: mikefarah/yq@master
with:
cmd: |
yq -I4 rawCITATION.cff > CITATION.cff
rm rawCITATION.cff
- name: Index-link
run: |
cd ghout
ln -s metadata.md index.md
# Mets handling, Install OCR-D and Bagit
- name: del invalidMets
run: sh -ex scripts/data_mets.sh
shell: bash
- name: install ocrd, make validMets and bagit
run: |
sudo apt-get install -y python3 imagemagick libgeos-dev
python3 -m venv venv
source venv/bin/activate
pip install -U pip 'setuptools>=61'
pip install ocrd
ocrd --version
- name: make validMets
run: |
source venv/bin/activate
sh -ex scripts/mets.sh
- name: make bagit
run: |
source venv/bin/activate
sh scripts/data_structure.sh
- name: copy css styles, js javascript and yml files to ghout
run: |
cp scripts/table_hide.css ghout/
cp scripts/levelparser.css ghout/
cp scripts/lang.js ghout/
cp scripts/_config.yml ghout/
- name: archive the metadata files from metadata_out folder
uses: thedoctor0/zip-release@master
with:
filename: metadata-v${{ github.run_number }}.zip
path: 'metadata_out'
- name: copy metadata.zip to ocrdzip_out
run: |
cp metadata-v${{ github.run_number }}.zip ocrdzip_out/
- name: Upload Release
uses: ncipollo/release-action@v1
if: env.TAG_NAME != ''
with:
allowUpdates: true
artifacts: 'ocrdzip_out/*.zip'
artifactContentType: application/zip
body: |
<dl>
<dt>Version:</dt>
<dd>${{ env.TAG_NAME }}</dd>
<dt>Info:</dt>
<dd>
To make use of Ground Truth, please download the provided zip files.<br/>
The 'ocrd.zip' files are ocr-d-bagit files.<br/>
The 'metadata-v${{ github.run_number }}.zip' file contains metadata for the Ground Truth corpus in both METS and JSON format.<br/>
The 'mets.xml' file enumerates all the documents and BagIt files contained within.<br/>
The bagits correspond to the <a href="https://ocr-d.de/de/spec/ocrd_zip.html">OCR-D Bagit Spec</a>.<br/>
The source-code-zip and source-code-tar.gz files only provide metadata, citations, license and readme information.<br/>
If you want to use the source files, please clone the repository.
</dd>
</dl>
</dl>
name: Release ${{ github.run_number }}_${{ env.TAG_NAME }}
omitNameDuringUpdate: true
tag: ${{ env.TAG_NAME }}
token: ${{ secrets.GITHUB_TOKEN }}
- name: Commit README
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add README.md
git commit -m "[Automatic] Update readme files" || echo "Nothing to update"
git push origin HEAD:main
- name: Commit METADATA_htr_united.yml
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add ${{ github.event.repository.name }}_METADATA_htr_united.yml
git commit -m "[Automatic] Update METADATA_htr_united.yml files" || echo "Nothing to update"
git push origin HEAD:main
- name: Commit CITATION.cff
run: |
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add CITATION.cff
git commit -m "[Automatic] Update CITATION.cff files" || echo "Nothing to update"
git push origin HEAD:main
- name: Deploy GT-Overview to GitHub Pages 🚀
uses: JamesIves/github-pages-deploy-action@v4
with:
branch: gh-pages # The branch the action should deploy to.
folder: ghout # The folder the action should deploy.