Skip to content

Commit

Permalink
add XSD validators for PAGE and METS, OCR-D#449
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Apr 29, 2020
1 parent 0cded79 commit 11e2733
Show file tree
Hide file tree
Showing 15 changed files with 4,755 additions and 5 deletions.
1 change: 1 addition & 0 deletions .envrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export PYLINTRC=$PWD/.pylintrc
5 changes: 5 additions & 0 deletions .vimrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
set path+=ocrd
set path+=ocrd_models
set path+=ocrd_modelfactory
set path+=ocrd_validators
set path+=ocrd_utils
8 changes: 7 additions & 1 deletion ocrd_validators/ocrd_validators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
'PageValidator',
'OcrdToolValidator',
'OcrdZipValidator',
'ValidationReport'
'ValidationReport',
'XsdValidator',
'XsdMetsValidator',
'XsdPageValidator',
]

from .report import ValidationReport
Expand All @@ -16,3 +19,6 @@
from .page_validator import PageValidator
from .ocrd_tool_validator import OcrdToolValidator
from .ocrd_zip_validator import OcrdZipValidator
from .xsd_validator import XsdValidator
from .xsd_mets_validator import XsdMetsValidator
from .xsd_page_validator import XsdPageValidator
2 changes: 1 addition & 1 deletion ocrd_validators/ocrd_validators/bagit-profile.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BagIt-Profile-Info:
BagIt-Profile-Identifier: https://ocr-d.de/bagit-profile.json
BagIt-Profile-Identifier: https://ocr-d.github.io/bagit-profile.json
BagIt-Profile-Version: '1.2.0'
Source-Organization: OCR-D
External-Description: BagIt profile for OCR data
Expand Down
7 changes: 6 additions & 1 deletion ocrd_validators/ocrd_validators/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Constants for ocrd_validators.
"""
import yaml
from pkg_resources import resource_string
from pkg_resources import resource_string, resource_filename

OCRD_TOOL_SCHEMA = yaml.safe_load(resource_string(__name__, 'ocrd_tool.schema.yml'))
OCRD_BAGIT_PROFILE = yaml.safe_load(resource_string(__name__, 'bagit-profile.yml'))
Expand All @@ -12,3 +12,8 @@
FILE_GROUP_CATEGORIES = ['IMG', 'SEG', 'OCR', 'COR', 'GT']
TMP_BAGIT_PREFIX = 'ocrd-bagit-'
OCRD_BAGIT_PROFILE_URL = 'https://ocr-d.github.io/bagit-profile.json'
XSD_METS_URL = 'https://www.loc.gov/standards/mets/mets.xsd'
XSD_PAGE_URL = 'http://www.primaresearch.org/schema/PAGE/gts/pagecontent/2019-07-15/pagecontent.xsd'
XSD_PATHS = {}
XSD_PATHS[XSD_METS_URL] = resource_filename(__name__, 'xsd/mets.xsd')
XSD_PATHS[XSD_PAGE_URL] = resource_filename(__name__, 'xsd/page.xsd')
Empty file.
1,854 changes: 1,854 additions & 0 deletions ocrd_validators/ocrd_validators/xsd/mets.xsd

Large diffs are not rendered by default.

Loading

0 comments on commit 11e2733

Please sign in to comment.