diff --git a/docling_core/experimental/idoctags.py b/docling_core/experimental/idoctags.py index 9f497f14..1012f8cd 100644 --- a/docling_core/experimental/idoctags.py +++ b/docling_core/experimental/idoctags.py @@ -1,7 +1,7 @@ """Define classes for DocTags serialization.""" from enum import Enum -from typing import Any, Final, Optional +from typing import Any, Final, Optional, Tuple from xml.dom.minidom import parseString from pydantic import BaseModel @@ -38,7 +38,10 @@ TabularChartMetaField, ) from docling_core.types.doc.labels import DocItemLabel -from docling_core.types.doc.tokens import DocumentToken +from docling_core.types.doc.tokens import ( + _CodeLanguageToken, + _PictureClassificationToken, +) DOCTAGS_VERSION: Final = "1.0.0" @@ -61,6 +64,127 @@ class IDocTagsTableToken(str, Enum): OTSL_RHED = "" # - row header cell, OTSL_SROW = "" # - section row cell + @classmethod + def get_special_tokens( + cls, + ): + """Return all table-related special tokens. + + Includes the opening/closing OTSL tags and each enum token value. + """ + special_tokens: list[str] = ["", ""] + for token in cls: + special_tokens.append(f"{token.value}") + + return special_tokens + + +class IDocTagsToken(str, Enum): + """IDocTagsToken.""" + + _LOC_PREFIX = "loc_" + _SECTION_HEADER_PREFIX = "section_header_level_" + + DOCUMENT = "doctag" + VERSION = "version" + + OTSL = "otsl" + ORDERED_LIST = "ordered_list" + UNORDERED_LIST = "unordered_list" + + PAGE_BREAK = "page_break" + + CAPTION = "caption" + FOOTNOTE = "footnote" + FORMULA = "formula" + LIST_ITEM = "list_item" + PAGE_FOOTER = "page_footer" + PAGE_HEADER = "page_header" + PICTURE = "picture" + SECTION_HEADER = "section_header" + TABLE = "table" + TEXT = "text" + TITLE = "title" + DOCUMENT_INDEX = "document_index" + CODE = "code" + CHECKBOX_SELECTED = "checkbox_selected" + CHECKBOX_UNSELECTED = "checkbox_unselected" + FORM = "form" + EMPTY_VALUE = "empty_value" # used for empty value fields in fillable forms + + @classmethod + def get_special_tokens( + cls, + *, + page_dimension: Tuple[int, int] = (500, 500), + include_location_tokens: bool = True, + include_code_class: bool = False, + include_picture_class: bool = False, + ): + """Function to get all special document tokens.""" + special_tokens: list[str] = [] + for token in cls: + if not token.value.endswith("_"): + special_tokens.append(f"<{token.value}>") + special_tokens.append(f"") + + for i in range(6): + special_tokens += [ + f"<{IDocTagsToken._SECTION_HEADER_PREFIX.value}{i}>", + f"", + ] + + special_tokens.extend(IDocTagsTableToken.get_special_tokens()) + + if include_picture_class: + special_tokens.extend([t.value for t in _PictureClassificationToken]) + + if include_code_class: + special_tokens.extend([t.value for t in _CodeLanguageToken]) + + if include_location_tokens: + # Adding dynamically generated location-tokens + for i in range(0, max(page_dimension[0], page_dimension[1])): + special_tokens.append(f"<{IDocTagsToken._LOC_PREFIX.value}{i}/>") + + return special_tokens + + @classmethod + def create_token_name_from_doc_item_label(cls, label: str, level: int = 1) -> str: + """Get token corresponding to passed doc item label.""" + doc_token_by_item_label = { + DocItemLabel.CAPTION: IDocTagsToken.CAPTION, + DocItemLabel.FOOTNOTE: IDocTagsToken.FOOTNOTE, + DocItemLabel.FORMULA: IDocTagsToken.FORMULA, + DocItemLabel.LIST_ITEM: IDocTagsToken.LIST_ITEM, + DocItemLabel.PAGE_FOOTER: IDocTagsToken.PAGE_FOOTER, + DocItemLabel.PAGE_HEADER: IDocTagsToken.PAGE_HEADER, + DocItemLabel.PICTURE: IDocTagsToken.PICTURE, + DocItemLabel.TABLE: IDocTagsToken.TABLE, + DocItemLabel.TEXT: IDocTagsToken.TEXT, + DocItemLabel.TITLE: IDocTagsToken.TITLE, + DocItemLabel.DOCUMENT_INDEX: IDocTagsToken.DOCUMENT_INDEX, + DocItemLabel.CODE: IDocTagsToken.CODE, + DocItemLabel.CHECKBOX_SELECTED: IDocTagsToken.CHECKBOX_SELECTED, + DocItemLabel.CHECKBOX_UNSELECTED: IDocTagsToken.CHECKBOX_UNSELECTED, + DocItemLabel.FORM: IDocTagsToken.FORM, + # Fallback mappings for labels without dedicated tokens in IDocTagsToken + DocItemLabel.KEY_VALUE_REGION: IDocTagsToken.TEXT, + DocItemLabel.PARAGRAPH: IDocTagsToken.TEXT, + DocItemLabel.REFERENCE: IDocTagsToken.TEXT, + DocItemLabel.CHART: IDocTagsToken.PICTURE, + } + + res: str + if label == DocItemLabel.SECTION_HEADER: + res = f"{IDocTagsToken._SECTION_HEADER_PREFIX}{level}" + else: + try: + res = doc_token_by_item_label[DocItemLabel(label)].value + except KeyError as e: + raise RuntimeError(f"Unexpected DocItemLabel: {label}") from e + return res + class IDocTagsParams(DocTagsParams): """DocTags-specific serialization parameters.""" @@ -187,6 +311,8 @@ def serialize( otsl_content = temp_table.export_to_otsl( temp_doc, add_cell_location=False, + # Suppress chart cell text if global content is off + add_cell_text=params.add_content, self_closing=params.do_self_closing, table_token=IDocTagsTableToken, ) @@ -200,7 +326,7 @@ def serialize( text_res = "".join([r.text for r in res_parts]) if text_res: - token = DocumentToken.create_token_name_from_doc_item_label( + token = IDocTagsToken.create_token_name_from_doc_item_label( label=DocItemLabel.CHART if is_chart else DocItemLabel.PICTURE, ) text_res = _wrap(text=text_res, wrap_tag=token) @@ -238,12 +364,20 @@ def serialize_doc( text_res = delim.join([p.text for p in parts if p.text]) if self.params.add_page_break: - page_sep = f"<{DocumentToken.PAGE_BREAK.value}{'/' if self.params.do_self_closing else ''}>" + page_sep = f"<{IDocTagsToken.PAGE_BREAK.value}{'/' if self.params.do_self_closing else ''}>" for full_match, _, _ in self._get_page_breaks(text=text_res): text_res = text_res.replace(full_match, page_sep) - wrap_tag = DocumentToken.DOCUMENT.value - text_res = f"<{wrap_tag}>{DOCTAGS_VERSION}{text_res}{delim}" + # print(f"text-res-v1: {text_res}") + + tmp = f"<{IDocTagsToken.DOCUMENT.value}>" + tmp += f"<{IDocTagsToken.VERSION.value}>{DOCTAGS_VERSION}" + # text_res += f"{text_res}{delim}" + tmp += f"{text_res}" + tmp += f"" + + # print(f"text-res-v2: {tmp}") + text_res = tmp if self.params.pretty_indentation and ( my_root := parseString(text_res).documentElement @@ -252,4 +386,7 @@ def serialize_doc( text_res = "\n".join( [line for line in text_res.split("\n") if line.strip()] ) + + print(f"text-res-v3:\n{text_res}") + return create_ser_result(text=text_res, span_source=parts) diff --git a/docling_core/transforms/serializer/doctags.py b/docling_core/transforms/serializer/doctags.py index 807b7750..e93542e5 100644 --- a/docling_core/transforms/serializer/doctags.py +++ b/docling_core/transforms/serializer/doctags.py @@ -106,10 +106,16 @@ def serialize( """Serializes the passed item.""" my_visited = visited if visited is not None else set() params = DocTagsParams(**kwargs) - wrap_tag: Optional[str] = DocumentToken.create_token_name_from_doc_item_label( - label=item.label, - **({"level": item.level} if isinstance(item, SectionHeaderItem) else {}), + # Decide wrapping up-front so ListItem never gets wrapped here + wrap_tag_token: Optional[str] = ( + DocumentToken.create_token_name_from_doc_item_label( + label=item.label, + **( + {"level": item.level} if isinstance(item, SectionHeaderItem) else {} + ), + ) ) + wrap_tag: Optional[str] = None if isinstance(item, ListItem) else wrap_tag_token parts: list[str] = [] if item.meta: @@ -152,8 +158,6 @@ def serialize( text_part = f"{language_token}{text_part}" else: text_part = text_part.strip() - if isinstance(item, ListItem): - wrap_tag = None # deferring list item tags to list handling if text_part: parts.append(text_part) @@ -203,7 +207,8 @@ def serialize( otsl_text = item.export_to_otsl( doc=doc, add_cell_location=params.add_table_cell_location, - add_cell_text=params.add_table_cell_text, + # Suppress cell text when global content is disabled + add_cell_text=(params.add_table_cell_text and params.add_content), xsize=params.xsize, ysize=params.ysize, visited=visited, @@ -452,22 +457,87 @@ def serialize( """Serializes the passed item.""" my_visited = visited if visited is not None else set() params = DocTagsParams(**kwargs) - parts = doc_serializer.get_parts( - item=item, - list_level=list_level + 1, - is_inline_scope=is_inline_scope, - visited=my_visited, - **kwargs, - ) - delim = _get_delim(params=params) - if parts: - text_res = delim.join( - [ - t - for p in parts - if (t := _wrap(text=p.text, wrap_tag=DocumentToken.LIST_ITEM.value)) - ] + + # Build list children explicitly. Requirements: + # 1) / can be children of lists. + # 2) Do NOT wrap nested lists into , even if they are + # children of a ListItem in the logical structure. + # 3) Still ensure structural wrappers are preserved even when + # content is suppressed (e.g., add_content=False). + item_results: list[SerializationResult] = [] + child_results_wrapped: list[str] = [] + + excluded = doc_serializer.get_excluded_refs(**kwargs) + for child_ref in item.children: + child = child_ref.resolve(doc) + + # If a nested list group is present directly under this list group, + # emit it as a sibling (no wrapper). + if isinstance(child, ListGroup): + if child.self_ref in my_visited or child.self_ref in excluded: + continue + my_visited.add(child.self_ref) + sub_res = doc_serializer.serialize( + item=child, + list_level=list_level + 1, + is_inline_scope=is_inline_scope, + visited=my_visited, + **kwargs, + ) + if sub_res.text: + child_results_wrapped.append(sub_res.text) + item_results.append(sub_res) + continue + + # Normal case: ListItem under ListGroup + if not isinstance(child, ListItem): + continue + if child.self_ref in my_visited or child.self_ref in excluded: + continue + + my_visited.add(child.self_ref) + + # Serialize the list item content (DocTagsTextSerializer will not wrap it) + child_res = doc_serializer.serialize( + item=child, + list_level=list_level + 1, + is_inline_scope=is_inline_scope, + visited=my_visited, + **kwargs, + ) + item_results.append(child_res) + + # Wrap the content into , without any nested list content. + child_text_wrapped = _wrap( + text=f"{child_res.text}", + wrap_tag=DocumentToken.LIST_ITEM.value, ) + child_results_wrapped.append(child_text_wrapped) + + # After the , append any nested lists (children of this ListItem) + # as siblings at the same level (not wrapped in ). + for subref in child.children: + sub = subref.resolve(doc) + if ( + isinstance(sub, ListGroup) + and sub.self_ref not in my_visited + and sub.self_ref not in excluded + ): + my_visited.add(sub.self_ref) + sub_res = doc_serializer.serialize( + item=sub, + list_level=list_level + 1, + is_inline_scope=is_inline_scope, + visited=my_visited, + **kwargs, + ) + if sub_res.text: + child_results_wrapped.append(sub_res.text) + item_results.append(sub_res) + + delim = _get_delim(params=params) + if child_results_wrapped: + text_res = delim.join(child_results_wrapped) text_res = f"{text_res}{delim}" wrap_tag = ( DocumentToken.ORDERED_LIST.value @@ -477,7 +547,8 @@ def serialize( text_res = _wrap(text=text_res, wrap_tag=wrap_tag) else: text_res = "" - return create_ser_result(text=text_res, span_source=parts) + + return create_ser_result(text=text_res, span_source=item_results) class DocTagsInlineSerializer(BaseInlineSerializer): @@ -636,18 +707,19 @@ def serialize_captions( results: list[SerializationResult] = [] if item.captions: cap_res = super().serialize_captions(item, **kwargs) - if cap_res.text: - if params.add_location: - for caption in item.captions: - if caption.cref not in self.get_excluded_refs(**kwargs): - if isinstance(cap := caption.resolve(self.doc), DocItem): - loc_txt = cap.get_location_tokens( - doc=self.doc, - xsize=params.xsize, - ysize=params.ysize, - self_closing=params.do_self_closing, - ) - results.append(create_ser_result(text=loc_txt)) + if cap_res.text and params.add_location: + for caption in item.captions: + if caption.cref not in self.get_excluded_refs(**kwargs): + if isinstance(cap := caption.resolve(self.doc), DocItem): + loc_txt = cap.get_location_tokens( + doc=self.doc, + xsize=params.xsize, + ysize=params.ysize, + self_closing=params.do_self_closing, + ) + results.append(create_ser_result(text=loc_txt)) + # Only include caption textual content when add_content is True + if cap_res.text and params.add_content: results.append(cap_res) text_res = "".join([r.text for r in results]) if text_res: diff --git a/test/data/doc/constructed_doc.dt b/test/data/doc/constructed_doc.dt index ef0fcde4..5bb411e7 100644 --- a/test/data/doc/constructed_doc.dt +++ b/test/data/doc/constructed_doc.dt @@ -10,12 +10,12 @@ Affiliation 2 list item 1 list item 2 list item 3 -list item 3.a +list item 3.a list item 3.b list item 3.c -list item 3.c.i - - +list item 3.c.i + + list item 4 ProductYears20162017Apple49823695944This is the caption of table 1. @@ -28,7 +28,7 @@ Affiliation 2 item 1 of neighboring list item 2 of neighboring list -item 1 of sub list +item 1 of sub list Here a code snippet: <_Python_>print("Hello world") (to be displayed inline) @@ -37,7 +37,7 @@ Affiliation 2 E=mc^2 (to be displayed inline) - + Here a code block: <_Python_>print("Hello world") @@ -58,13 +58,13 @@ Affiliation 2 Item 1 in A Item 2 in A Item 3 in A -Item 1 in B +Item 1 in B Item 2 in B -Item 1 in C +Item 1 in C Item 2 in C - + Item 3 in B - + Item 4 in A List item without parent list group diff --git a/test/data/doc/constructed_doc.dt.gt b/test/data/doc/constructed_doc.dt.gt index ef0fcde4..5bb411e7 100644 --- a/test/data/doc/constructed_doc.dt.gt +++ b/test/data/doc/constructed_doc.dt.gt @@ -10,12 +10,12 @@ Affiliation 2 list item 1 list item 2 list item 3 -list item 3.a +list item 3.a list item 3.b list item 3.c -list item 3.c.i - - +list item 3.c.i + + list item 4 ProductYears20162017Apple49823695944This is the caption of table 1. @@ -28,7 +28,7 @@ Affiliation 2 item 1 of neighboring list item 2 of neighboring list -item 1 of sub list +item 1 of sub list Here a code snippet: <_Python_>print("Hello world") (to be displayed inline) @@ -37,7 +37,7 @@ Affiliation 2 E=mc^2 (to be displayed inline) - + Here a code block: <_Python_>print("Hello world") @@ -58,13 +58,13 @@ Affiliation 2 Item 1 in A Item 2 in A Item 3 in A -Item 1 in B +Item 1 in B Item 2 in B -Item 1 in C +Item 1 in C Item 2 in C - + Item 3 in B - + Item 4 in A List item without parent list group diff --git a/test/data/doc/constructed_document.yaml.dt b/test/data/doc/constructed_document.yaml.dt index 14df9817..ec16e825 100644 --- a/test/data/doc/constructed_document.yaml.dt +++ b/test/data/doc/constructed_document.yaml.dt @@ -10,12 +10,12 @@ Affiliation 2 list item 1 list item 2 list item 3 -list item 3.a +list item 3.a list item 3.b list item 3.c -list item 3.c.i - - +list item 3.c.i + + list item 4 ProductYears20162017Apple49823695944This is the caption of table 1. @@ -28,7 +28,7 @@ Affiliation 2 item 1 of neighboring list item 2 of neighboring list -item 1 of sub list +item 1 of sub list Here a code snippet: <_Python_>print("Hello world") (to be displayed inline) @@ -37,7 +37,7 @@ Affiliation 2 E=mc^2 (to be displayed inline) - + Here a code block: <_Python_>print("Hello world") @@ -58,13 +58,13 @@ Affiliation 2 Item 1 in A Item 2 in A Item 3 in A -Item 1 in B +Item 1 in B Item 2 in B -Item 1 in C +Item 1 in C Item 2 in C - + Item 3 in B - + Item 4 in A List item without parent list group diff --git a/test/data/doc/ddoc_0.json b/test/data/doc/ddoc_0.json new file mode 100644 index 00000000..7894bed1 --- /dev/null +++ b/test/data/doc/ddoc_0.json @@ -0,0 +1,2103 @@ +{ + "schema_name": "DoclingDocument", + "version": "1.8.0", + "name": "00073b00f3fbd33ef92f0c4902c5c7397c89f07f6a5528c5c97af53c67c4dcc7", + "furniture": { + "self_ref": "#/furniture", + "children": [], + "content_layer": "furniture", + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/texts/0" + }, + { + "$ref": "#/tables/0" + }, + { + "$ref": "#/texts/1" + }, + { + "$ref": "#/groups/0" + }, + { + "$ref": "#/tables/1" + }, + { + "$ref": "#/texts/3" + }, + { + "$ref": "#/groups/1" + }, + { + "$ref": "#/tables/2" + }, + { + "$ref": "#/texts/5" + }, + { + "$ref": "#/groups/2" + }, + { + "$ref": "#/tables/3" + }, + { + "$ref": "#/texts/7" + }, + { + "$ref": "#/groups/3" + }, + { + "$ref": "#/tables/4" + }, + { + "$ref": "#/texts/9" + }, + { + "$ref": "#/groups/4" + }, + { + "$ref": "#/tables/5" + }, + { + "$ref": "#/texts/11" + }, + { + "$ref": "#/groups/5" + }, + { + "$ref": "#/tables/6" + }, + { + "$ref": "#/texts/13" + }, + { + "$ref": "#/texts/14" + } + ], + "content_layer": "body", + "name": "_root_", + "label": "unspecified" + }, + "groups": [ + { + "self_ref": "#/groups/0", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/2" + } + ], + "content_layer": "body", + "name": "list_standalone_10", + "label": "list" + }, + { + "self_ref": "#/groups/1", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/4" + } + ], + "content_layer": "body", + "name": "list_standalone_11", + "label": "list" + }, + { + "self_ref": "#/groups/2", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/6" + } + ], + "content_layer": "body", + "name": "list_standalone_12", + "label": "list" + }, + { + "self_ref": "#/groups/3", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/8" + } + ], + "content_layer": "body", + "name": "list_standalone_13", + "label": "list" + }, + { + "self_ref": "#/groups/4", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/10" + } + ], + "content_layer": "body", + "name": "list_standalone_14", + "label": "list" + }, + { + "self_ref": "#/groups/5", + "parent": { + "$ref": "#/body" + }, + "children": [ + { + "$ref": "#/texts/12" + } + ], + "content_layer": "body", + "name": "list_standalone_15", + "label": "list" + } + ], + "texts": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "furniture", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 412.099992, + "t": 1510.53408, + "r": 847.769328, + "b": 1489.28472, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 47 + ] + } + ], + "orig": "ndbinfo_select_all - Select From ndbinfo Tables", + "text": "ndbinfo_select_all - Select From ndbinfo Tables" + }, + { + "self_ref": "#/texts/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 223.603992, + "t": 1315.97136, + "r": 1144.0813679999999, + "b": 1294.722, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 104 + ] + } + ], + "orig": "This option sets the number of times to execute the select. Use --delay to set the time between loops.", + "text": "This option sets the number of times to execute the select. Use --delay to set the time between loops." + }, + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/groups/0" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 204.531624, + "t": 1271.087136, + "r": 451.80899999999997, + "b": 1249.8377759999998, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 21 + ] + } + ], + "orig": "\u2022 --ndb-connectstring", + "text": "\u2022 --ndb-connectstring", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 224.271072, + "t": 1084.613904, + "r": 1142.0813520000002, + "b": 1039.4017920000001, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 143 + ] + } + ], + "orig": "Set connect string for connecting to ndb_mgmd. Syntax: \"[nodeid=id;][host=]hostname[:port]\". Overrides entries in NDB_CONNECTSTRING and my.cnf.", + "text": "Set connect string for connecting to ndb_mgmd. Syntax: \"[nodeid=id;][host=]hostname[:port]\". Overrides entries in NDB_CONNECTSTRING and my.cnf." + }, + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/groups/1" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 204.740928, + "t": 1014.705648, + "r": 403.115832, + "b": 996.925248, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 17 + ] + } + ], + "orig": "\u2022 --ndb-mgmd-host", + "text": "\u2022 --ndb-mgmd-host", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/5", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 223.91855999999999, + "t": 829.993824, + "r": 541.732608, + "b": 808.744464, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 30 + ] + } + ], + "orig": "Same as --ndb-connectstring .", + "text": "Same as --ndb-connectstring ." + }, + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/groups/2" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 203.139936, + "t": 783.451152, + "r": 367.819344, + "b": 769.089024, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 14 + ] + } + ], + "orig": "\u2022 --ndb-nodeid", + "text": "\u2022 --ndb-nodeid", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/7", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 224.521992, + "t": 598.5096480000001, + "r": 849.322584, + "b": 577.260288, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ], + "orig": "Set node ID for this node, overriding any ID set by --ndb-connectstring.", + "text": "Set node ID for this node, overriding any ID set by --ndb-connectstring." + }, + { + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/groups/3" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 204.8058, + "t": 552.36456, + "r": 584.730504, + "b": 536.87304, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 32 + ] + } + ], + "orig": "\u2022 --ndb-optimized-node-selection", + "text": "\u2022 --ndb-optimized-node-selection", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/9", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 223.919784, + "t": 434.44684800000005, + "r": 1102.77504, + "b": 388.3603679999999, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 136 + ] + } + ], + "orig": "Enable optimizations for selection of nodes for transactions. Enabled by default; use --skip-ndb- optimized-node-selection to disable.", + "text": "Enable optimizations for selection of nodes for transactions. Enabled by default; use --skip-ndb- optimized-node-selection to disable." + }, + { + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/groups/4" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 204.31619999999998, + "t": 363.80044799999996, + "r": 377.693352, + "b": 349.65532800000005, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 15 + ] + } + ], + "orig": "\u2022 --no-defaults", + "text": "\u2022 --no-defaults", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/11", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 225.182952, + "t": 245.847888, + "r": 818.1216, + "b": 224.598528, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ], + "orig": "Do not read default options from any option file other than login file.", + "text": "Do not read default options from any option file other than login file." + }, + { + "self_ref": "#/texts/12", + "parent": { + "$ref": "#/groups/5" + }, + "children": [], + "content_layer": "body", + "label": "list_item", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 204.862104, + "t": 199.52063999999996, + "r": 414.566352, + "b": 180.91339199999993, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 18 + ] + } + ], + "orig": "\u2022 --print-defaults", + "text": "\u2022 --print-defaults", + "enumerated": false, + "marker": "" + }, + { + "self_ref": "#/texts/13", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "furniture", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 224.58808800000003, + "t": 80.88537599999995, + "r": 545.5931039999999, + "b": 59.61700799999994, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 37 + ] + } + ], + "orig": "Print program argument list and exit.", + "text": "Print program argument list and exit." + }, + { + "self_ref": "#/texts/14", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "furniture", + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 1105.84116, + "t": 93.37996799999996, + "r": 1151.544096, + "b": 77.81716800000004, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 4 + ] + } + ], + "orig": "4253", + "text": "4253" + } + ], + "pictures": [], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 222.250248, + "t": 1438.47, + "r": 1151.596728, + "b": 1340.410896, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 53 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 224.533008, + "t": 146.833632, + "r": 688.3127280000001, + "b": 176.839344, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 146.833632, + "r": 1149.957792, + "b": 176.839344, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.51464800000002, + "t": 176.642928, + "r": 688.3127280000001, + "b": 209.54419199999998, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Minimum Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 176.642928, + "r": 1149.957792, + "b": 209.54419199999998, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.553816, + "t": 209.526768, + "r": 688.3127280000001, + "b": 241.270128, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Maximum Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 209.526768, + "r": 1149.957792, + "b": 242.42803199999997, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "MAX_INT", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 3, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 224.533008, + "t": 146.833632, + "r": 688.3127280000001, + "b": 176.839344, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 146.833632, + "r": 1149.957792, + "b": 176.839344, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.51464800000002, + "t": 176.642928, + "r": 688.3127280000001, + "b": 209.54419199999998, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Minimum Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 176.642928, + "r": 1149.957792, + "b": 209.54419199999998, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "0", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.553816, + "t": 209.526768, + "r": 688.3127280000001, + "b": 241.270128, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Maximum Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.2906959999999, + "t": 209.526768, + "r": 1149.957792, + "b": 242.42803199999997, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "MAX_INT", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 222.977304, + "t": 1210.340736, + "r": 1153.22832, + "b": 1109.547648, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 224.89898399999998, + "t": 375.309792, + "r": 686.81088, + "b": 408.21105600000004, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 375.309792, + "r": 1150.3458, + "b": 408.21105600000004, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-connectstring=connection-string", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.672544, + "t": 407.9196, + "r": 686.81088, + "b": 440.820864, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 407.9196, + "r": 1150.1193600000001, + "b": 440.820864, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "String", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.63092799999998, + "t": 440.996688, + "r": 686.81088, + "b": 472.518288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 440.996688, + "r": 1150.0777440000002, + "b": 472.518288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 3, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 224.89898399999998, + "t": 375.309792, + "r": 686.81088, + "b": 408.21105600000004, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 375.309792, + "r": 1150.3458, + "b": 408.21105600000004, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-connectstring=connection-string", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.672544, + "t": 407.9196, + "r": 686.81088, + "b": 440.820864, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 407.9196, + "r": 1150.1193600000001, + "b": 440.820864, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "String", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.63092799999998, + "t": 440.996688, + "r": 686.81088, + "b": 472.518288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8488239999999, + "t": 440.996688, + "r": 1150.0777440000002, + "b": 472.518288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 223.20129599999999, + "t": 955.31832, + "r": 1153.452312, + "b": 854.525232, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 86 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 225.70927200000003, + "t": 630.3464640000001, + "r": 688.6591199999999, + "b": 661.868064, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 630.3464640000001, + "r": 1151.156088, + "b": 661.868064, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-mgmd-host=connection-string", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.87083199999998, + "t": 661.646304, + "r": 688.6591199999999, + "b": 694.872288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 661.646304, + "r": 1150.771752, + "b": 694.872288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "String", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 225.110736, + "t": 695.24928, + "r": 688.6591199999999, + "b": 726.77088, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 695.24928, + "r": 1150.557552, + "b": 726.77088, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 3, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 225.70927200000003, + "t": 630.3464640000001, + "r": 688.6591199999999, + "b": 661.868064, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 630.3464640000001, + "r": 1151.156088, + "b": 661.868064, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-mgmd-host=connection-string", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.87083199999998, + "t": 661.646304, + "r": 688.6591199999999, + "b": 694.872288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 661.646304, + "r": 1150.771752, + "b": 694.872288, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "String", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 225.110736, + "t": 695.24928, + "r": 688.6591199999999, + "b": 726.77088, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 688.993272, + "t": 695.24928, + "r": 1150.557552, + "b": 726.77088, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 222.433848, + "t": 724.960368, + "r": 1152.684864, + "b": 624.16728, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 224.98344, + "t": 860.6774879999999, + "r": 686.840256, + "b": 893.903472, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 860.6774879999999, + "r": 1150.917408, + "b": 893.903472, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-nodeid=#", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 224.908776, + "t": 893.849616, + "r": 686.840256, + "b": 927.0756, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 893.849616, + "r": 1150.842744, + "b": 927.0756, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Integer", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 225.055656, + "t": 927.378144, + "r": 686.840256, + "b": 957.981024, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 927.378144, + "r": 1150.960248, + "b": 958.152096, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 3, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 224.98344, + "t": 860.6774879999999, + "r": 686.840256, + "b": 893.903472, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 860.6774879999999, + "r": 1150.917408, + "b": 893.903472, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-nodeid=#", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 224.908776, + "t": 893.849616, + "r": 686.840256, + "b": 927.0756, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Type", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 893.849616, + "r": 1150.842744, + "b": 927.0756, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Integer", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + [ + { + "bbox": { + "l": 225.055656, + "t": 927.378144, + "r": 686.840256, + "b": 957.981024, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Default Value", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.8684079999999, + "t": 927.378144, + "r": 1150.960248, + "b": 958.152096, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "[none]", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 223.6248, + "t": 492.69369600000005, + "r": 1154.5906320000001, + "b": 459.70056, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 50 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 225.216, + "t": 1092.2915520000001, + "r": 686.2784399999999, + "b": 1123.383888, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.6272799999999, + "t": 1093.1120640000001, + "r": 1150.917408, + "b": 1122.805728, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-optimized-node-selection", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 1, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 225.216, + "t": 1092.2915520000001, + "r": 686.2784399999999, + "b": 1123.383888, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 686.6272799999999, + "t": 1093.1120640000001, + "r": 1150.917408, + "b": 1122.805728, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--ndb-optimized-node-selection", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/5", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 222.891624, + "t": 304.4162879999999, + "r": 1153.14264, + "b": 269.734608, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 33 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 224.082576, + "t": 1281.102768, + "r": 687.7031760000001, + "b": 1314.122832, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 687.775392, + "t": 1280.29968, + "r": 1150.0165439999998, + "b": 1314.122832, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--no-defaults", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 1, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 224.082576, + "t": 1281.102768, + "r": 687.7031760000001, + "b": 1314.122832, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 687.775392, + "t": 1280.29968, + "r": 1150.0165439999998, + "b": 1314.122832, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--no-defaults", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + }, + { + "self_ref": "#/tables/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "content_layer": "body", + "label": "table", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 224.27352000000002, + "t": 138.81384000000003, + "r": 1153.274832, + "b": 105.69873600000005, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 36 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 224.298, + "t": 1445.525136, + "r": 689.071608, + "b": 1478.317104, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 689.0924160000001, + "t": 1445.525136, + "r": 1152.3348, + "b": 1478.248992, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--print-defaults", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ], + "num_rows": 1, + "num_cols": 2, + "grid": [ + [ + { + "bbox": { + "l": 224.298, + "t": 1445.525136, + "r": 689.071608, + "b": 1478.317104, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Command-Line Format", + "column_header": false, + "row_header": true, + "row_section": false, + "fillable": false + }, + { + "bbox": { + "l": 689.0924160000001, + "t": 1445.525136, + "r": 1152.3348, + "b": 1478.248992, + "coord_origin": "TOPLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "--print-defaults", + "column_header": false, + "row_header": false, + "row_section": false, + "fillable": false + } + ] + ] + }, + "annotations": [] + } + ], + "key_value_items": [], + "form_items": [], + "pages": { + "1": { + "size": { + "width": 1224.0, + "height": 1584.0 + }, + "image": { + "mimetype": "image/png", + "dpi": 72, + "size": { + "width": 1224.0, + "height": 1584.0 + }, + "uri": "GroundTruthPageImages/0" + }, + "page_no": 1 + } + } +} \ No newline at end of file diff --git a/test/test_serialization.py b/test/test_serialization.py index 3f17492e..249a660c 100644 --- a/test/test_serialization.py +++ b/test/test_serialization.py @@ -4,7 +4,7 @@ import pytest -from docling_core.experimental.idoctags import IDocTagsDocSerializer +from docling_core.experimental.idoctags import IDocTagsDocSerializer, IDocTagsParams from docling_core.transforms.serializer.common import _DEFAULT_LABELS from docling_core.transforms.serializer.doctags import DocTagsDocSerializer from docling_core.transforms.serializer.html import ( @@ -593,6 +593,32 @@ def test_doctags_meta(): # =============================== +def test_idoctags(): + src = Path("./test/data/doc/ddoc_0.json") + doc = DoclingDocument.load_from_json(src) + + if True: + params = IDocTagsParams() + params.add_content = False + + ser = IDocTagsDocSerializer(doc=doc, params=params) + actual = ser.serialize().text + print(actual) + + assert actual.startswith("") + + if True: + params = IDocTagsParams() + params.pretty_indentation = "" + params.add_content = False + + ser = IDocTagsDocSerializer(doc=doc, params=params) + actual = ser.serialize().text + print(actual) + + assert actual.startswith("") + + def test_idoctags_meta(): src = Path("./test/data/doc/dummy_doc_with_meta.yaml") doc = DoclingDocument.load_from_yaml(src)