From 6dd0f809f0ced8f00a43ec00608e2e9f1fe06381 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Fri, 18 Oct 2024 11:00:47 +0200 Subject: [PATCH 1/2] fix: fix legacy doc ref Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling/datamodel/document.py | 23 ++++++++++++++++------- docling/models/ds_glm_model.py | 22 +++++++++++++++------- tests/verify_utils.py | 2 +- 3 files changed, 32 insertions(+), 15 deletions(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 160a25d0..28f0b0f1 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -6,12 +6,6 @@ from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Tuple, Type, Union import filetype -from docling_core.types import BaseText -from docling_core.types import Document as DsDocument -from docling_core.types import DocumentDescription as DsDocumentDescription -from docling_core.types import FileInfoObject as DsFileInfoObject -from docling_core.types import PageDimensions, PageReference, Prov, Ref -from docling_core.types import Table as DsSchemaTable from docling_core.types.doc import ( DocItem, DocItemLabel, @@ -22,7 +16,22 @@ TextItem, ) from docling_core.types.doc.document import ListItem -from docling_core.types.legacy_doc.base import Figure, GlmTableCell, TableCell +from docling_core.types.legacy_doc.base import ( + BaseText, + Figure, + GlmTableCell, + PageDimensions, + PageReference, + Prov, + Ref, +) +from docling_core.types.legacy_doc.base import Table as DsSchemaTable +from docling_core.types.legacy_doc.base import TableCell +from docling_core.types.legacy_doc.document import ( + CCSDocumentDescription as DsDocumentDescription, +) +from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument +from docling_core.types.legacy_doc.document import FileInfoObject as DsFileInfoObject from docling_core.utils.file import resolve_file_source from pydantic import BaseModel from typing_extensions import deprecated diff --git a/docling/models/ds_glm_model.py b/docling/models/ds_glm_model.py index 74b3ddb0..2f7078d3 100644 --- a/docling/models/ds_glm_model.py +++ b/docling/models/ds_glm_model.py @@ -5,15 +5,23 @@ from deepsearch_glm.nlp_utils import init_nlp_model from deepsearch_glm.utils.doc_utils import to_docling_document from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models -from docling_core.types import BaseText -from docling_core.types import Document as DsDocument -from docling_core.types import DocumentDescription as DsDocumentDescription -from docling_core.types import FileInfoObject as DsFileInfoObject -from docling_core.types import PageDimensions, PageReference, Prov, Ref -from docling_core.types import Table as DsSchemaTable from docling_core.types.doc import BoundingBox, CoordOrigin, DoclingDocument from docling_core.types.legacy_doc.base import BoundingBox as DsBoundingBox -from docling_core.types.legacy_doc.base import Figure, TableCell +from docling_core.types.legacy_doc.base import ( + Figure, + PageDimensions, + PageReference, + Prov, + Ref, +) +from docling_core.types.legacy_doc.base import Table as DsSchemaTable +from docling_core.types.legacy_doc.base import TableCell +from docling_core.types.legacy_doc.document import BaseText +from docling_core.types.legacy_doc.document import ( + CCSDocumentDescription as DsDocumentDescription, +) +from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject +from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument from PIL import ImageDraw from pydantic import BaseModel, ConfigDict diff --git a/tests/verify_utils.py b/tests/verify_utils.py index 1349229e..20f5eef0 100644 --- a/tests/verify_utils.py +++ b/tests/verify_utils.py @@ -3,8 +3,8 @@ from pathlib import Path from typing import List -from docling_core.types import Document as DsDocument from docling_core.types.doc import DoclingDocument +from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument from pydantic import TypeAdapter from pydantic.json import pydantic_encoder From a5cf9298441b26d5b23ca9a02cc9b56318d210b3 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Fri, 18 Oct 2024 12:22:52 +0200 Subject: [PATCH 2/2] fix file info object type Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling/datamodel/document.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docling/datamodel/document.py b/docling/datamodel/document.py index 28f0b0f1..e688af20 100644 --- a/docling/datamodel/document.py +++ b/docling/datamodel/document.py @@ -30,8 +30,8 @@ from docling_core.types.legacy_doc.document import ( CCSDocumentDescription as DsDocumentDescription, ) +from docling_core.types.legacy_doc.document import CCSFileInfoObject as DsFileInfoObject from docling_core.types.legacy_doc.document import ExportedCCSDocument as DsDocument -from docling_core.types.legacy_doc.document import FileInfoObject as DsFileInfoObject from docling_core.utils.file import resolve_file_source from pydantic import BaseModel from typing_extensions import deprecated