Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add fileGrp cardinality to ocrd-tool.json schema, deprecate fileGrp names #255

Merged
merged 6 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ocrd_tool.schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"description": "Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"description": "Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}}
{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp_cardinality", "output_file_grp_cardinality"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"deprecated": true, "description": "(DEPRECATED) Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"deprecated": true, "description": "(DEPRECATED) Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "input_file_grp_cardinality": {"description": "Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "output_file_grp_cardinality": {"description": "Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}}
35 changes: 30 additions & 5 deletions ocrd_tool.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,25 +29,50 @@ properties:
- steps
- executable
- categories
- input_file_grp
# Not required because not all processors produce output files
# - output_file_grp
- input_file_grp_cardinality
- output_file_grp_cardinality
properties:
executable:
description: The name of the CLI executable in $PATH
type: string
input_file_grp:
description: Input fileGrp@USE this tool expects by default
deprecated: true
description: (DEPRECATED) Input fileGrp@USE this tool expects by default
type: array
items:
type: string
# pattern: '^OCR-D-[A-Z0-9-]+$'
output_file_grp:
description: Output fileGrp@USE this tool produces by default
deprecated: true
description: (DEPRECATED) Output fileGrp@USE this tool produces by default
type: array
items:
type: string
# pattern: '^OCR-D-[A-Z0-9-]+$'
input_file_grp_cardinality:
description: Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
oneOf:
- type: number
multipleOf: 1
- type: array
items:
type: number
multipleOf: 1
minItems: 2
maxItems: 2
default: 1
output_file_grp_cardinality:
description: Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)
oneOf:
- type: number
multipleOf: 1
- type: array
items:
type: number
multipleOf: 1
minItems: 2
maxItems: 2
default: 1
parameters:
description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.
type: object
Expand Down