diff --git a/ocrd_tool.schema.json b/ocrd_tool.schema.json index 0b75b9e..91ff151 100644 --- a/ocrd_tool.schema.json +++ b/ocrd_tool.schema.json @@ -1 +1 @@ -{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"description": "Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"description": "Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}} \ No newline at end of file +{"type": "object", "description": "Schema for tools by OCR-D MP", "required": ["version", "git_url", "tools"], "additionalProperties": false, "properties": {"version": {"description": "Version of the tool, expressed as MAJOR.MINOR.PATCH.", "type": "string", "pattern": "^[0-9]+\\.[0-9]+\\.[0-9]+$"}, "git_url": {"description": "GitHub/GitLab URL", "type": "string", "format": "url"}, "dockerhub": {"description": "DockerHub image", "type": "string"}, "tools": {"type": "object", "additionalProperties": false, "patternProperties": {"ocrd-.*": {"type": "object", "additionalProperties": false, "required": ["description", "steps", "executable", "categories", "input_file_grp_cardinality", "output_file_grp_cardinality"], "properties": {"executable": {"description": "The name of the CLI executable in $PATH", "type": "string"}, "input_file_grp": {"deprecated": true, "description": "(DEPRECATED) Input fileGrp@USE this tool expects by default", "type": "array", "items": {"type": "string"}}, "output_file_grp": {"deprecated": true, "description": "(DEPRECATED) Output fileGrp@USE this tool produces by default", "type": "array", "items": {"type": "string"}}, "input_file_grp_cardinality": {"description": "Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "output_file_grp_cardinality": {"description": "Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited)", "oneOf": [{"type": "number", "multipleOf": 1}, {"type": "array", "items": {"type": "number", "multipleOf": 1}, "minItems": 2, "maxItems": 2}], "default": 1}, "parameters": {"description": "Object describing the parameters of a tool. Keys are parameter names, values sub-schemas.", "type": "object", "default": {}, "patternProperties": {".*": {"type": "object", "additionalProperties": false, "required": ["description", "type"], "properties": {"type": {"type": "string", "description": "Data type of this parameter", "enum": ["string", "number", "boolean", "object", "array"]}, "format": {"description": "Subtype, such as `float` for type `number` or `uri` for type `string`."}, "description": {"description": "Concise description of syntax and semantics of this parameter"}, "items": {"type": "object", "description": "describe the items of an array further"}, "minimum": {"type": "number", "description": "Minimum value for number parameters, including the minimum"}, "maximum": {"type": "number", "description": "Maximum value for number parameters, including the maximum"}, "exclusiveMinimum": {"type": "number", "description": "Minimum value for number parameters, excluding the minimum"}, "exclusiveMaximum": {"type": "number", "description": "Maximum value for number parameters, excluding the maximum"}, "multipleOf": {"type": "number", "description": "For number values, those values must be multiple of this number"}, "properties": {"type": "object", "description": "Describe the properties of an object value"}, "additionalProperties": {"type": "boolean", "description": "Whether an object value may contain properties not explicitly defined"}, "required": {"type": "boolean", "description": "Whether this parameter is required"}, "default": {"description": "Default value when not provided by the user"}, "enum": {"type": "array", "description": "List the allowed values if a fixed list."}, "content-type": {"type": "string", "default": "application/octet-stream", "description": "The media type of resources this processor expects for this parameter. Most processors use files for resources (e.g. `*.traineddata` for `ocrd-tesserocr-recognize`) while others use directories of files (e.g. `default` for `ocrd-eynollah-segment`). If a parameter requires directories, it must set `content-type` to `text/directory`.\n"}, "cacheable": {"type": "boolean", "description": "If parameter is reference to file: Whether the file should be cached, e.g. because it is large and won't change.", "default": false}}}}}, "description": {"description": "Concise description of what the tool does"}, "categories": {"description": "Tools belong to these categories, representing modules within the OCR-D project structure", "type": "array", "items": {"type": "string", "enum": ["Image preprocessing", "Layout analysis", "Text recognition and optimization", "Model training", "Long-term preservation", "Quality assurance"]}}, "steps": {"description": "This tool can be used at these steps in the OCR-D functional model", "type": "array", "items": {"type": "string", "enum": ["preprocessing/characterization", "preprocessing/optimization", "preprocessing/optimization/cropping", "preprocessing/optimization/deskewing", "preprocessing/optimization/despeckling", "preprocessing/optimization/dewarping", "preprocessing/optimization/binarization", "preprocessing/optimization/grayscale_normalization", "recognition/text-recognition", "recognition/font-identification", "recognition/post-correction", "layout/segmentation", "layout/segmentation/text-nontext", "layout/segmentation/region", "layout/segmentation/line", "layout/segmentation/word", "layout/segmentation/classification", "layout/analysis"]}}, "resource_locations": {"type": "array", "description": "The locations in the filesystem this processor supports for resource lookup", "default": ["data", "cwd", "system", "module"], "items": {"type": "string", "enum": ["data", "cwd", "system", "module"]}}, "resources": {"type": "array", "description": "Resources for this processor", "items": {"type": "object", "additionalProperties": false, "required": ["url", "description", "name", "size"], "properties": {"url": {"type": "string", "description": "URLs of all components of this resource"}, "description": {"type": "string", "description": "A description of the resource"}, "name": {"type": "string", "description": "Name to store the resource as"}, "type": {"type": "string", "enum": ["file", "directory", "archive"], "default": "file", "description": "Type of the URL"}, "parameter_usage": {"type": "string", "description": "Defines how the parameter is to be used", "enum": ["as-is", "without-extension"], "default": "as-is"}, "path_in_archive": {"type": "string", "description": "If type is archive, the resource is at this location in the archive", "default": "."}, "version_range": {"type": "string", "description": "Range of supported versions, syntax like in PEP 440", "default": ">= 0.0.1"}, "size": {"type": "number", "description": "Size of the resource in bytes to be retrieved (for archives: size of the archive)"}}}}}}}}}} \ No newline at end of file diff --git a/ocrd_tool.schema.yml b/ocrd_tool.schema.yml index 80e7963..5de65a0 100644 --- a/ocrd_tool.schema.yml +++ b/ocrd_tool.schema.yml @@ -29,25 +29,50 @@ properties: - steps - executable - categories - - input_file_grp - # Not required because not all processors produce output files - # - output_file_grp + - input_file_grp_cardinality + - output_file_grp_cardinality properties: executable: description: The name of the CLI executable in $PATH type: string input_file_grp: - description: Input fileGrp@USE this tool expects by default + deprecated: true + description: (DEPRECATED) Input fileGrp@USE this tool expects by default type: array items: type: string # pattern: '^OCR-D-[A-Z0-9-]+$' output_file_grp: - description: Output fileGrp@USE this tool produces by default + deprecated: true + description: (DEPRECATED) Output fileGrp@USE this tool produces by default type: array items: type: string # pattern: '^OCR-D-[A-Z0-9-]+$' + input_file_grp_cardinality: + description: Number of (comma-separated) input fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited) + oneOf: + - type: number + multipleOf: 1 + - type: array + items: + type: number + multipleOf: 1 + minItems: 2 + maxItems: 2 + default: 1 + output_file_grp_cardinality: + description: Number of (comma-separated) output fileGrp@USE this tool expects (either an exact value or a minimum,maximum list with -1 for unlimited) + oneOf: + - type: number + multipleOf: 1 + - type: array + items: + type: number + multipleOf: 1 + minItems: 2 + maxItems: 2 + default: 1 parameters: description: Object describing the parameters of a tool. Keys are parameter names, values sub-schemas. type: object