From b533805e2dedaa50e99dbf927590fc54a5ad5982 Mon Sep 17 00:00:00 2001 From: shaohuzhang1 Date: Wed, 6 Nov 2024 14:18:10 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=97=A7word=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E5=9B=BE=E7=89=87=E6=97=A0=E6=B3=95=E6=AD=A3=E5=B8=B8?= =?UTF-8?q?=E8=AF=86=E5=88=AB=20#1533?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- apps/common/handle/impl/doc_split_handle.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py index 0e0356ed8cb..c31c53ec123 100644 --- a/apps/common/handle/impl/doc_split_handle.py +++ b/apps/common/handle/impl/doc_split_handle.py @@ -14,9 +14,9 @@ from typing import List from docx import Document, ImagePart +from docx.oxml import ns from docx.table import Table from docx.text.paragraph import Paragraph -from docx.oxml import ns from common.handle.base_split_handle import BaseSplitHandle from common.util.split_model import SplitModel @@ -33,11 +33,8 @@ combine_nsmap = {**ns.nsmap, **old_docx_nsmap} -def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=True): - if is_new_docx: - image_ids = image.xpath('.//a:blip/@r:embed') - else: - image_ids = image.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap) +def image_to_mode(image, doc: Document, images_list, get_image_id): + image_ids = image['get_image_id_handle'](image.get('image')) for img_id in image_ids: # 获取图片id part = doc.part.related_parts[img_id] # 根据图片id获取对应的图片 if isinstance(part, ImagePart): @@ -49,14 +46,15 @@ def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=T def get_paragraph_element_images(paragraph_element, doc: Document, images_list, get_image_id): - images_xpath_list = [".//pic:pic", ".//w:pict"] + images_xpath_list = [(".//pic:pic", lambda img: img.xpath('.//a:blip/@r:embed')), + (".//w:pict", lambda img: img.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap))] images = [] - for images_xpath in images_xpath_list: + for images_xpath, get_image_id_handle in images_xpath_list: try: _images = paragraph_element.xpath(images_xpath) if _images is not None and len(_images) > 0: for image in _images: - images.append(image) + images.append({'image': image, 'get_image_id_handle': get_image_id_handle}) except Exception as e: pass return images