From b533805e2dedaa50e99dbf927590fc54a5ad5982 Mon Sep 17 00:00:00 2001
From: shaohuzhang1 <shaohu.zhang@fit2cloud.com>
Date: Wed, 6 Nov 2024 14:18:10 +0800
Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E6=97=A7word=E6=96=87?=
 =?UTF-8?q?=E6=A1=A3=E5=9B=BE=E7=89=87=E6=97=A0=E6=B3=95=E6=AD=A3=E5=B8=B8?=
 =?UTF-8?q?=E8=AF=86=E5=88=AB=20#1533?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 apps/common/handle/impl/doc_split_handle.py | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/apps/common/handle/impl/doc_split_handle.py b/apps/common/handle/impl/doc_split_handle.py
index 0e0356ed8cb..c31c53ec123 100644
--- a/apps/common/handle/impl/doc_split_handle.py
+++ b/apps/common/handle/impl/doc_split_handle.py
@@ -14,9 +14,9 @@
 from typing import List
 
 from docx import Document, ImagePart
+from docx.oxml import ns
 from docx.table import Table
 from docx.text.paragraph import Paragraph
-from docx.oxml import ns
 
 from common.handle.base_split_handle import BaseSplitHandle
 from common.util.split_model import SplitModel
@@ -33,11 +33,8 @@
 combine_nsmap = {**ns.nsmap, **old_docx_nsmap}
 
 
-def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=True):
-    if is_new_docx:
-        image_ids = image.xpath('.//a:blip/@r:embed')
-    else:
-        image_ids = image.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap)
+def image_to_mode(image, doc: Document, images_list, get_image_id):
+    image_ids = image['get_image_id_handle'](image.get('image'))
     for img_id in image_ids:  # 获取图片id
         part = doc.part.related_parts[img_id]  # 根据图片id获取对应的图片
         if isinstance(part, ImagePart):
@@ -49,14 +46,15 @@ def image_to_mode(image, doc: Document, images_list, get_image_id, is_new_docx=T
 
 
 def get_paragraph_element_images(paragraph_element, doc: Document, images_list, get_image_id):
-    images_xpath_list = [".//pic:pic", ".//w:pict"]
+    images_xpath_list = [(".//pic:pic", lambda img: img.xpath('.//a:blip/@r:embed')),
+                         (".//w:pict", lambda img: img.xpath('.//v:imagedata/@r:id', namespaces=combine_nsmap))]
     images = []
-    for images_xpath in images_xpath_list:
+    for images_xpath, get_image_id_handle in images_xpath_list:
         try:
             _images = paragraph_element.xpath(images_xpath)
             if _images is not None and len(_images) > 0:
                 for image in _images:
-                    images.append(image)
+                    images.append({'image': image, 'get_image_id_handle': get_image_id_handle})
         except Exception as e:
             pass
     return images