From bda92c7e6d9b270e017c8bceb54d2d835724251a Mon Sep 17 00:00:00 2001 From: Milos Jakubicek Date: Thu, 4 Apr 2024 09:52:40 +0200 Subject: [PATCH] added XML schema and JSON schema, examples fixed accordingly --- dmlex-v1.0/schemas/JSON/dmlex.schema.json | 946 ++++++++++++++++++ .../JSON/dmlex_no-crosslingual.schema.json | 786 +++++++++++++++ dmlex-v1.0/schemas/XML/dmlex.xsd | 674 +++++++++++++ .../schemas/XML/dmlex_no-crosslingual.xsd | 572 +++++++++++ .../schemas/helper_scripts/validate_json.py | 11 + .../schemas/helper_scripts/validate_xml.py | 8 + .../examples/examples/source/0.xml.xml | 3 +- .../examples/examples/source/10.xml.xml | 3 +- .../examples/examples/source/14.xml.xml | 2 +- .../examples/examples/source/20.xml.xml | 2 +- .../examples/examples/source/21.xml.xml | 4 +- .../examples/examples/source/24.xml.xml | 12 +- .../examples/examples/source/5.xml.xml | 12 +- .../examples/examples/source/7.xml.xml | 3 +- .../examples/examples/source/8.xml.xml | 8 +- .../examples/examples/source/9.xml.xml | 6 +- 16 files changed, 3023 insertions(+), 29 deletions(-) create mode 100644 dmlex-v1.0/schemas/JSON/dmlex.schema.json create mode 100644 dmlex-v1.0/schemas/JSON/dmlex_no-crosslingual.schema.json create mode 100644 dmlex-v1.0/schemas/XML/dmlex.xsd create mode 100644 dmlex-v1.0/schemas/XML/dmlex_no-crosslingual.xsd create mode 100755 dmlex-v1.0/schemas/helper_scripts/validate_json.py create mode 100755 dmlex-v1.0/schemas/helper_scripts/validate_xml.py diff --git a/dmlex-v1.0/schemas/JSON/dmlex.schema.json b/dmlex-v1.0/schemas/JSON/dmlex.schema.json new file mode 100644 index 0000000..f1233e5 --- /dev/null +++ b/dmlex-v1.0/schemas/JSON/dmlex.schema.json @@ -0,0 +1,946 @@ +{ + "$id": "http://docs.oasis-open.org/lexidma/ns/dmlex-1.0", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "DMLex", + "description": "DMLex JSON serialization. This variant of the schema is for documents implementing the Crosslingual Module (and possibly some other modules).", + "$comment": "This schema can only establish uniqueness for arrays in which the item is the key, i.e. labels, partsOfSpeech, sameAs, and translationLanguages. Uniqueness of IDs and validity of ID references cannot be checked at all.", + "type": "object", + "oneOf": [ + { + "$ref": "#/$defs/lexicographicResource" + }, + { + "$ref": "#/$defs/entry" + } + ], + "$defs": { + "lexicographicResource": { + "type": "object", + "required": ["langCode", "translationLanguages"], + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "uri": { + "type": "string", + "format": "uri" + }, + "langCode": { + "type": "string" + }, + "entries": { + "type": "array", + "items": { + "$ref": "#/$defs/entry" + } + }, + "translationLanguages": { + "type": "array", + "items": { + "$ref": "#/$defs/translationLanguage" + }, + "minItems": 1, + "uniqueItems": true + }, + "definitionTypeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/definitionTypeTag" + } + }, + "inflectedFormTags": { + "type": "array", + "items": { + "$ref": "#/$defs/inflectedFormTag" + } + }, + "labelTags": { + "type": "array", + "items": { + "$ref": "#/$defs/labelTag" + } + }, + "labelTypeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/labelTypeTag" + } + }, + "partOfSpeechTags": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeechTag" + } + }, + "sourceIdentityTags": { + "type": "array", + "items": { + "$ref": "#/$defs/sourceIdentityTag" + } + }, + "transcriptionSchemeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/transcriptionSchemeTag" + } + }, + "relations": { + "type": "array", + "items": { + "$ref": "#/$defs/relation" + } + }, + "relationTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/relationType" + } + }, + "etymonLanguages": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonLanguage" + } + }, + "etymonTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonType" + } + } + }, + "additionalProperties": false, + "if": { + "not": { + "required": ["translationLanguages"], + "properties": { + "translationLanguages": { + "type": "array", + "minItems": 1, + "maxItems": 1 + } + } + } + }, + "then": { + "properties": { + "entries": { + "items": { + "properties": { + "senses": { + "items": { + "properties": { + "headwordTranslations": { + "items": { + "required": ["langCode"] + } + }, + "headwordExplanations": { + "items": { + "required": ["langCode"] + } + } + } + } + } + } + } + } + } + } + }, + "entry": { + "type": "object", + "required": ["headword"], + "properties": { + "headword": { + "type": "string", + "minLength": 1 + }, + "homographNumber": { + "type": "string" + }, + "partsOfSpeech": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeech" + }, + "uniqueItems": true + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "pronunciations": { + "type": "array", + "items": { + "$ref": "#/$defs/pronunciation" + } + }, + "inflectedForms": { + "type": "array", + "items": { + "$ref": "#/$defs/inflectedForm" + } + }, + "senses": { + "type": "array", + "items": { + "$ref": "#/$defs/sense" + } + }, + "id": { + "type": "string" + }, + "placeholderMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/placeholderMarker" + } + }, + "etymologies": { + "type": "array", + "items": { + "$ref": "#/$defs/etymology" + } + } + }, + "additionalProperties": false + }, + "inflectedForm": { + "type": "object", + "required": ["text"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "text": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "pronunciations": { + "type": "array", + "items": { + "$ref": "#/$defs/pronunciation" + } + } + }, + "additionalProperties": false + }, + "sense": { + "type": "object", + "properties": { + "indicator": { + "type": "string" + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "definitions": { + "type": "array", + "items": { + "$ref": "#/$defs/definition" + } + }, + "examples": { + "type": "array", + "items": { + "$ref": "#/$defs/example" + } + }, + "id": { + "type": "string" + }, + "headwordExplanations": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordExplanation" + } + }, + "headwordTranslations": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordTranslation" + } + } + }, + "additionalProperties": false + }, + "definition": { + "type": "object", + "required": ["text"], + "properties": { + "definitionType": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "headwordMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordMarker" + } + }, + "collocateMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/collocateMarker" + } + } + }, + "additionalProperties": false + }, + "pronunciation": { + "type": "object", + "anyOf": [ + { + "required": ["soundFile"] + }, + { + "required": ["transcriptions"], + "properties": { + "transcriptions": { + "minItems": 1 + } + } + } + ], + "properties": { + "soundFile": { + "type": "string" + }, + "transcriptions": { + "type": "array", + "items": { + "$ref": "#/$defs/transcription" + } + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "transcription": { + "type": "object", + "required": ["text"], + "properties": { + "scheme": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "example": { + "type": "object", + "required": ["text"], + "properties": { + "sourceIdentity": { + "type": "string" + }, + "sourceElaboration": { + "type": "string", + "minLength": 1 + }, + "soundFile": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "headwordMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordMarker" + } + }, + "collocateMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/collocateMarker" + } + }, + "exampleTranslations": { + "type": "array", + "items": { + "$ref": "#/$defs/exampleTranslation" + } + } + }, + "additionalProperties": false + }, + "headwordTranslation": { + "type": "object", + "required": ["text"], + "properties": { + "langCode": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "partsOfSpeech": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeech" + }, + "uniqueItems": true + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "pronunciations": { + "type": "array", + "items": { + "$ref": "#/$defs/pronunciation" + } + }, + "inflectedForms": { + "type": "array", + "items": { + "$ref": "#/$defs/inflectedForm" + } + }, + "placeholderMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/placeholderMarker" + } + } + }, + "additionalProperties": false + }, + "headwordExplanation": { + "type": "object", + "required": ["text"], + "properties": { + "langCode": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "exampleTranslation": { + "type": "object", + "required": ["text"], + "properties": { + "langCode": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "headwordMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordMarker" + } + }, + "collocateMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/collocateMarker" + } + } + }, + "additionalProperties": false + }, + "partOfSpeechTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "inflectedFormTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "definitionTypeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "labelTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "typeTag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "labelTypeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "sourceIdentityTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "transcriptionSchemeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string" + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "relation": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "members": { + "type": "array", + "items": { + "$ref": "#/$defs/member" + }, + "minItems": 2 + } + }, + "additionalProperties": false + }, + "member": { + "type": "object", + "required": ["ref"], + "properties": { + "ref": { + "type": "string" + }, + "role": { + "type": "string", + "minLength": 1 + }, + "obverseListingOrder": { + "type": "integer" + } + }, + "additionalProperties": false + }, + "relationType": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "scopeRestriction": { + "type": "string", + "minLength": 1, + "enum": ["sameEntry", "sameResource", "any"] + }, + "description": { + "type": "string", + "minLength": 1 + }, + "memberTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/memberType" + } + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "memberType": { + "type": "object", + "required": ["type"], + "properties": { + "role": { + "type": "string" + }, + "type": { + "type": "string", + "minLength": 1, + "enum": ["sense", "entry", "collocate"] + }, + "min": { + "type": "integer", + "minimum": 0 + }, + "max": { + "type": "integer", + "minimum": 0 + }, + "hint": { + "type": "string", + "minLength": 1, + "enum": ["embed", "navigate", "none"] + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "placeholderMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + } + }, + "additionalProperties": false + }, + "headwordMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + } + }, + "additionalProperties": false + }, + "collocateMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + }, + "lemma": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "id": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymology": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "etymons": { + "type": "array", + "items": { + "$ref": "#/$defs/etymon" + } + } + }, + "additionalProperties": false + }, + "etymon": { + "type": "object", + "required": ["etymonUnits"], + "properties": { + "when": { + "type": "string" + }, + "type": { + "type": "string" + }, + "note": { + "type": "string" + }, + "etymonUnits": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonUnit" + }, + "minItems": 1 + }, + "translation": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymonUnit": { + "type": "object", + "required": ["langCode", "text"], + "properties": { + "langCode": { + "type": "string" + }, + "reconstructed": { + "type": "boolean" + }, + "text": { + "type": "string" + }, + "partsOfSpeech": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeech" + }, + "uniqueItems": true + }, + "translation": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymonType": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "etymonLanguage": { + "type": "object", + "required": ["langCode"], + "properties": { + "langCode": { + "type": "string" + }, + "displayName": { + "type": "string" + } + }, + "additionalProperties": false + }, + "label": { + "type": "string", + "minLength": 1 + }, + "partOfSpeech": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "string" + }, + "translationLanguage": { + "type": "string" + } + } +} + diff --git a/dmlex-v1.0/schemas/JSON/dmlex_no-crosslingual.schema.json b/dmlex-v1.0/schemas/JSON/dmlex_no-crosslingual.schema.json new file mode 100644 index 0000000..d63815e --- /dev/null +++ b/dmlex-v1.0/schemas/JSON/dmlex_no-crosslingual.schema.json @@ -0,0 +1,786 @@ +{ + "$id": "http://docs.oasis-open.org/lexidma/ns/dmlex-1.0", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "title": "DMLex", + "description": "DMLex JSON serialization. This variant of the schema is for documents *not* implementing the Crosslingual Module (but possibly implementing some other modules).", + "$comment": "This schema can only establish uniqueness for arrays in which the item is the key, i.e. labels, partsOfSpeech, and sameAs. Uniqueness of IDs and validity of ID references cannot be checked at all.", + "type": "object", + "oneOf": [ + { + "$ref": "#/$defs/lexicographicResource" + }, + { + "$ref": "#/$defs/entry" + } + ], + "$defs": { + "lexicographicResource": { + "type": "object", + "required": ["langCode"], + "properties": { + "title": { + "type": "string", + "minLength": 1 + }, + "uri": { + "type": "string", + "format": "uri" + }, + "langCode": { + "type": "string" + }, + "entries": { + "type": "array", + "items": { + "$ref": "#/$defs/entry" + } + }, + "definitionTypeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/definitionTypeTag" + } + }, + "inflectedFormTags": { + "type": "array", + "items": { + "$ref": "#/$defs/inflectedFormTag" + } + }, + "labelTags": { + "type": "array", + "items": { + "$ref": "#/$defs/labelTag" + } + }, + "labelTypeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/labelTypeTag" + } + }, + "partOfSpeechTags": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeechTag" + } + }, + "sourceIdentityTags": { + "type": "array", + "items": { + "$ref": "#/$defs/sourceIdentityTag" + } + }, + "transcriptionSchemeTags": { + "type": "array", + "items": { + "$ref": "#/$defs/transcriptionSchemeTag" + } + }, + "relations": { + "type": "array", + "items": { + "$ref": "#/$defs/relation" + } + }, + "relationTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/relationType" + } + }, + "etymonLanguages": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonLanguage" + } + }, + "etymonTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonType" + } + } + }, + "additionalProperties": false + }, + "entry": { + "type": "object", + "required": ["headword"], + "properties": { + "headword": { + "type": "string", + "minLength": 1 + }, + "homographNumber": { + "type": "string" + }, + "partsOfSpeech": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeech" + }, + "uniqueItems": true + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "pronunciations": { + "type": "array", + "items": { + "$ref": "#/$defs/pronunciation" + } + }, + "inflectedForms": { + "type": "array", + "items": { + "$ref": "#/$defs/inflectedForm" + } + }, + "senses": { + "type": "array", + "items": { + "$ref": "#/$defs/sense" + } + }, + "id": { + "type": "string" + }, + "placeholderMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/placeholderMarker" + } + }, + "etymologies": { + "type": "array", + "items": { + "$ref": "#/$defs/etymology" + } + } + }, + "additionalProperties": false + }, + "inflectedForm": { + "type": "object", + "required": ["text"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "text": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "pronunciations": { + "type": "array", + "items": { + "$ref": "#/$defs/pronunciation" + } + } + }, + "additionalProperties": false + }, + "sense": { + "type": "object", + "properties": { + "indicator": { + "type": "string" + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "definitions": { + "type": "array", + "items": { + "$ref": "#/$defs/definition" + } + }, + "examples": { + "type": "array", + "items": { + "$ref": "#/$defs/example" + } + }, + "id": { + "type": "string" + } + }, + "additionalProperties": false + }, + "definition": { + "type": "object", + "required": ["text"], + "properties": { + "definitionType": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "headwordMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordMarker" + } + }, + "collocateMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/collocateMarker" + } + } + }, + "additionalProperties": false + }, + "pronunciation": { + "type": "object", + "anyOf": [ + { + "required": ["soundFile"] + }, + { + "required": ["transcriptions"], + "properties": { + "transcriptions": { + "minItems": 1 + } + } + } + ], + "properties": { + "soundFile": { + "type": "string" + }, + "transcriptions": { + "type": "array", + "items": { + "$ref": "#/$defs/transcription" + } + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "transcription": { + "type": "object", + "required": ["text"], + "properties": { + "scheme": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "example": { + "type": "object", + "required": ["text"], + "properties": { + "sourceIdentity": { + "type": "string" + }, + "sourceElaboration": { + "type": "string", + "minLength": 1 + }, + "soundFile": { + "type": "string" + }, + "text": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "headwordMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/headwordMarker" + } + }, + "collocateMarkers": { + "type": "array", + "items": { + "$ref": "#/$defs/collocateMarker" + } + } + }, + "additionalProperties": false + }, + "partOfSpeechTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "inflectedFormTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "definitionTypeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "labelTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "typeTag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "labelTypeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "sourceIdentityTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "transcriptionSchemeTag": { + "type": "object", + "required": ["tag"], + "properties": { + "tag": { + "type": "string" + }, + "description": { + "type": "string", + "minLength": 1 + }, + "for": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "relation": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + }, + "members": { + "type": "array", + "items": { + "$ref": "#/$defs/member" + }, + "minItems": 2 + } + }, + "additionalProperties": false + }, + "member": { + "type": "object", + "required": ["ref"], + "properties": { + "ref": { + "type": "string" + }, + "role": { + "type": "string", + "minLength": 1 + }, + "obverseListingOrder": { + "type": "integer" + } + }, + "additionalProperties": false + }, + "relationType": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "scopeRestriction": { + "type": "string", + "minLength": 1, + "enum": ["sameEntry", "sameResource", "any"] + }, + "description": { + "type": "string", + "minLength": 1 + }, + "memberTypes": { + "type": "array", + "items": { + "$ref": "#/$defs/memberType" + } + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "memberType": { + "type": "object", + "required": ["type"], + "properties": { + "role": { + "type": "string" + }, + "type": { + "type": "string", + "minLength": 1, + "enum": ["sense", "entry", "collocate"] + }, + "min": { + "type": "integer", + "minimum": 0 + }, + "max": { + "type": "integer", + "minimum": 0 + }, + "hint": { + "type": "string", + "minLength": 1, + "enum": ["embed", "navigate", "none"] + }, + "description": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "array", + "items": { + "$ref": "#/$defs/sameAs" + }, + "uniqueItems": true + } + }, + "additionalProperties": false + }, + "placeholderMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + } + }, + "additionalProperties": false + }, + "headwordMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + } + }, + "additionalProperties": false + }, + "collocateMarker": { + "type": "object", + "required": ["startIndex", "endIndex"], + "properties": { + "startIndex": { + "type": "integer", + "minimum": 0 + }, + "endIndex": { + "type": "integer", + "minimum": 0 + }, + "lemma": { + "type": "string", + "minLength": 1 + }, + "labels": { + "type": "array", + "items": { + "$ref": "#/$defs/label" + }, + "uniqueItems": true + }, + "id": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymology": { + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "etymons": { + "type": "array", + "items": { + "$ref": "#/$defs/etymon" + } + } + }, + "additionalProperties": false + }, + "etymon": { + "type": "object", + "required": ["etymonUnits"], + "properties": { + "when": { + "type": "string" + }, + "type": { + "type": "string" + }, + "note": { + "type": "string" + }, + "etymonUnits": { + "type": "array", + "items": { + "$ref": "#/$defs/etymonUnit" + }, + "minItems": 1 + }, + "translation": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymonUnit": { + "type": "object", + "required": ["langCode", "text"], + "properties": { + "langCode": { + "type": "string" + }, + "reconstructed": { + "type": "boolean" + }, + "text": { + "type": "string" + }, + "partsOfSpeech": { + "type": "array", + "items": { + "$ref": "#/$defs/partOfSpeech" + }, + "uniqueItems": true + }, + "translation": { + "type": "string" + } + }, + "additionalProperties": false + }, + "etymonType": { + "type": "object", + "required": ["type"], + "properties": { + "type": { + "type": "string", + "minLength": 1 + }, + "description": { + "type": "string", + "minLength": 1 + } + }, + "additionalProperties": false + }, + "etymonLanguage": { + "type": "object", + "required": ["langCode"], + "properties": { + "langCode": { + "type": "string" + }, + "displayName": { + "type": "string" + } + }, + "additionalProperties": false + }, + "label": { + "type": "string", + "minLength": 1 + }, + "partOfSpeech": { + "type": "string", + "minLength": 1 + }, + "sameAs": { + "type": "string" + } + } +} + diff --git a/dmlex-v1.0/schemas/XML/dmlex.xsd b/dmlex-v1.0/schemas/XML/dmlex.xsd new file mode 100644 index 0000000..fbf2294 --- /dev/null +++ b/dmlex-v1.0/schemas/XML/dmlex.xsd @@ -0,0 +1,674 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dmlex-v1.0/schemas/XML/dmlex_no-crosslingual.xsd b/dmlex-v1.0/schemas/XML/dmlex_no-crosslingual.xsd new file mode 100644 index 0000000..f73eefa --- /dev/null +++ b/dmlex-v1.0/schemas/XML/dmlex_no-crosslingual.xsd @@ -0,0 +1,572 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/dmlex-v1.0/schemas/helper_scripts/validate_json.py b/dmlex-v1.0/schemas/helper_scripts/validate_json.py new file mode 100755 index 0000000..3df0bca --- /dev/null +++ b/dmlex-v1.0/schemas/helper_scripts/validate_json.py @@ -0,0 +1,11 @@ +#!/usr/bin/python3 +import json +from jsonschema import validate +import glob +schema = json.load(open('dmlex.schema.json')) +for file in glob.glob("*.json"): + if file.endswith('.schema.json'): + continue + print(file) + validate(json.load(open(file)), schema) + diff --git a/dmlex-v1.0/schemas/helper_scripts/validate_xml.py b/dmlex-v1.0/schemas/helper_scripts/validate_xml.py new file mode 100755 index 0000000..c3af065 --- /dev/null +++ b/dmlex-v1.0/schemas/helper_scripts/validate_xml.py @@ -0,0 +1,8 @@ +#!/usr/bin/python3 +import xmlschema +import glob +schema = xmlschema.XMLSchema11('dmlex.xsd') +for file in glob.glob("*.xml"): + print(file) + schema.validate(file) + diff --git a/dmlex-v1.0/specification/examples/examples/source/0.xml.xml b/dmlex-v1.0/specification/examples/examples/source/0.xml.xml index 8baf418..72907c7 100644 --- a/dmlex-v1.0/specification/examples/examples/source/0.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/0.xml.xml @@ -1,6 +1,5 @@ -<lexicographicResource uri="http://example.com" langCode="en"> - <title>Example Dictionary</title> +<lexicographicResource title="Example Dictionary" uri="http://example.com" langCode="en"> <entry id="abandon-verb"> <headword>abandon</headword> <partOfSpeech tag="verb"/> diff --git a/dmlex-v1.0/specification/examples/examples/source/10.xml.xml b/dmlex-v1.0/specification/examples/examples/source/10.xml.xml index 3e13294..ef2f5ae 100644 --- a/dmlex-v1.0/specification/examples/examples/source/10.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/10.xml.xml @@ -1,6 +1,5 @@ -<lexicographicResource uri="http://example.com" langCode="ga"> - <title>My Irish-Multilingual Dictionary</title> +<lexicographicResource title="My Irish-Multilingual Dictionary" uri="http://example.com" langCode="ga"> <translationLanguage langCode="en"/> <translationLanguage langCode="de"/> <translationLanguage langCode="cs"/> diff --git a/dmlex-v1.0/specification/examples/examples/source/14.xml.xml b/dmlex-v1.0/specification/examples/examples/source/14.xml.xml index d202425..a4f728d 100644 --- a/dmlex-v1.0/specification/examples/examples/source/14.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/14.xml.xml @@ -1,6 +1,5 @@ <lexicographicResource uri="http://example.com" langCode="de"> - <translationLanguage langCode="en"/> <entry id="die-see"> <headword>See</headword> <partOfSpeech tag="n-fem"/> @@ -22,6 +21,7 @@ <headwordTranslation><text>ocean</text></headwordTranslation> </sense> </entry> + <translationLanguage langCode="en"/> <relation type="synonyms"> <description>words that mean sea and ocean</description> <member ref="die-see-1"/> diff --git a/dmlex-v1.0/specification/examples/examples/source/20.xml.xml b/dmlex-v1.0/specification/examples/examples/source/20.xml.xml index 30e9860..3e3a703 100644 --- a/dmlex-v1.0/specification/examples/examples/source/20.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/20.xml.xml @@ -3,7 +3,7 @@ <headword>beat <placeholderMarker>sb.</placeholderMarker> up</headword > <sense id="beat-up-1"> - <headwordTranslation> + <headwordTranslation langCode="de"> <text><placeholderMarker>jemanden</placeholderMarker> verprügeln</ text> </headwordTranslation> diff --git a/dmlex-v1.0/specification/examples/examples/source/21.xml.xml b/dmlex-v1.0/specification/examples/examples/source/21.xml.xml index 2f046a4..b7632ae 100644 --- a/dmlex-v1.0/specification/examples/examples/source/21.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/21.xml.xml @@ -2,15 +2,15 @@ <entry id="autopsy"> <headword>autopsy</headword> <sense id="autopsy-1"> - <headwordTranslation><text>pitva</text></headwordTranslation> <example> <text>The coroner performed an <headwordMarker>autopsy</headwo rdMarker>.</text> - <exampleTranslation> + <exampleTranslation langCode="cs"> <text>Koroner provedl <headwordMarker>pitvu</headwordMarke r>.</text> </exampleTranslation> </example> + <headwordTranslation langCode="cs"><text>pitva</text></headwordTranslation> </sense> </entry> diff --git a/dmlex-v1.0/specification/examples/examples/source/24.xml.xml b/dmlex-v1.0/specification/examples/examples/source/24.xml.xml index 39fd4aa..84eca35 100644 --- a/dmlex-v1.0/specification/examples/examples/source/24.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/24.xml.xml @@ -24,6 +24,12 @@ ion> </etymon> </etymology> </entry> + <etymonLanguage langCode="dum"> + <displayName>Middle Dutch</displayName> + </etymonLanguage> + <etymonLanguage langCode="gem-pro"> + <displayName>Proto-Germanic</displayName> + </etymonLanguage> <etymonType type="derivation"> <description>A historical form of the word</description> </etymonType> @@ -31,12 +37,6 @@ ion> <description>Two words derived from the same etymological root</de scription> </etymonType> - <etymonLanguage langCode="dum"> - <displayName>Middle Dutch</displayName> - </etymonLanguage> - <etymonLanguage langCode="gem-pro"> - <displayName>Proto-Germanic</displayName> - </etymonLanguage> </lexicographicResource> diff --git a/dmlex-v1.0/specification/examples/examples/source/5.xml.xml b/dmlex-v1.0/specification/examples/examples/source/5.xml.xml index 66c4780..de87633 100644 --- a/dmlex-v1.0/specification/examples/examples/source/5.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/5.xml.xml @@ -10,17 +10,17 @@ <text>folúsghlantóirí</text> </inflectedForm> </entry> - <partOfSpeechTag tag="n-masc"> - <description>masculine noun</description> - </partOfSpeechTag> - <partOfSpeechTag tag="n-fem"> - <description>feminine noun</description> - </partOfSpeechTag> <inflectedFormTag tag="sg-gen"> <description>singular genitive</description> </inflectedFormTag> <inflectedFormTag tag="pl"> <description>plural</description> </inflectedFormTag> + <partOfSpeechTag tag="n-masc"> + <description>masculine noun</description> + </partOfSpeechTag> + <partOfSpeechTag tag="n-fem"> + <description>feminine noun</description> + </partOfSpeechTag> </lexicographicResource> diff --git a/dmlex-v1.0/specification/examples/examples/source/7.xml.xml b/dmlex-v1.0/specification/examples/examples/source/7.xml.xml index 09e7a3f..f668a18 100644 --- a/dmlex-v1.0/specification/examples/examples/source/7.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/7.xml.xml @@ -1,6 +1,5 @@ -<lexicographicResource uri="http://example.com" langCode="de"> - <title>My German-English Dictionary</title> +<lexicographicResource title="My German-English Dictionary" uri="http://example.com" langCode="de"> <translationLanguage langCode="en"/> </lexicographicResource> diff --git a/dmlex-v1.0/specification/examples/examples/source/8.xml.xml b/dmlex-v1.0/specification/examples/examples/source/8.xml.xml index bd97621..eb272f2 100644 --- a/dmlex-v1.0/specification/examples/examples/source/8.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/8.xml.xml @@ -3,22 +3,22 @@ <headword>doctor</headword> <sense id="doctor-n-1"> <indicator>medical doctor</indicator> - <headwordTranslation> + <headwordTranslation langCode="en"> <text>Arzt</text> <partOfSpeech tag="n-masc"/> </headwordTranslation> - <headwordTranslation> + <headwordTranslation langCode="en"> <text>Ärztin</text> <partOfSpeech tag="n-fem"/> </headwordTranslation> </sense> <sense id="doctor-n-2"> <indicator>academic title</indicator> - <headwordTranslation> + <headwordTranslation langCode="en"> <text>Doktor</text> <partOfSpeech tag="n-masc"/> </headwordTranslation> - <headwordTranslation> + <headwordTranslation langCode="en"> <text>Doktorin</text> <partOfSpeech tag="n-fem"/> </headwordTranslation> diff --git a/dmlex-v1.0/specification/examples/examples/source/9.xml.xml b/dmlex-v1.0/specification/examples/examples/source/9.xml.xml index 8c3811a..7194058 100644 --- a/dmlex-v1.0/specification/examples/examples/source/9.xml.xml +++ b/dmlex-v1.0/specification/examples/examples/source/9.xml.xml @@ -3,9 +3,9 @@ <headword>Treppenwitz</headword> <partOfSpeech tag="n-masc"/> <sense id="treppenwitz-1"> - <headwordExplanation>belated realisation of what one could have sa -id</headwordExplanation> - <headwordTranslation> + <headwordExplanation langCode="en">belated realisation of what one + could have said</headwordExplanation> + <headwordTranslation langCode="en"> <text>staircase wit</text> </headwordTranslation> </sense>