44# Licensed under the MIT License. See License.txt in the project root for license information.
55# ======================================================================================================================
66import traceback
7- from typing import List , Callable , Tuple , Any
7+ from typing import List , Callable
88from urllib .parse import urlparse , parse_qs
99
10- from fitframework import fit_logger
1110from langchain_community .document_loaders import PyPDFLoader , PDFPlumberLoader , PyMuPDFLoader , PyPDFDirectoryLoader , \
1211 PyPDFium2Loader , PDFMinerLoader
1312from langchain_core .document_loaders import BaseLoader
1413
15- from .types .document import Document
14+ from fitframework .api .decorators import fitable
15+ from fitframework .api .logging import sys_plugin_logger
1616from .document_util import langchain_doc_to_document
17- from .callable_registers import register_callable_tool
17+ from .types . document import Document
1818
1919
20- def py_pdf_loader (file_path : str , ** kwargs ) -> List [Document ]:
20+ @fitable ("langchain.tool.py_pdf_loader" , "default" )
21+ def py_pdf_loader (file_path : str ) -> List [Document ]:
2122 """Load PDF using pypdf into list of documents."""
2223 return __loader_handler (lambda nfs_file_path : PyPDFLoader (nfs_file_path ), file_path )
2324
24-
25- def pdfplumber_loader (file_path : str , ** kwargs ) -> List [Document ]:
25+ @ fitable ( "langchain.tool.pdfplumber_loader" , "default" )
26+ def pdfplumber_loader (file_path : str ) -> List [Document ]:
2627 """Load PDF using pdfplumber into list of documents"""
2728 return __loader_handler (lambda nfs_file_path : PDFPlumberLoader (nfs_file_path ), file_path )
2829
29-
30- def py_mupdf_loader (file_path : str , ** kwargs ) -> List [Document ]:
30+ @ fitable ( "langchain.tool.py_mupdf_loader" , "default" )
31+ def py_mupdf_loader (file_path : str ) -> List [Document ]:
3132 """Load PDF using PyMuPDF into list of documents"""
3233 return __loader_handler (lambda nfs_file_path : PyMuPDFLoader (nfs_file_path ), file_path )
3334
34-
35- def py_pdfium2_loader (file_path : str , ** kwargs ) -> List [Document ]:
35+ @ fitable ( "langchain.tool.py_pdfium2_loader" , "default" )
36+ def py_pdfium2_loader (file_path : str ) -> List [Document ]:
3637 """Load PDF using pypdfium2 into list of documents"""
3738 return __loader_handler (lambda nfs_file_path : PyPDFium2Loader (nfs_file_path ), file_path )
3839
39-
40- def py_miner_loader (file_path : str , ** kwargs ) -> List [Document ]:
40+ @ fitable ( "langchain.tool.py_miner_loader" , "default" )
41+ def py_miner_loader (file_path : str ) -> List [Document ]:
4142 """Load PDF using PDFMiner into list of documents"""
4243 return __loader_handler (lambda nfs_file_path : PDFMinerLoader (nfs_file_path ), file_path )
4344
44-
45- def py_pdf_directory_loader (directory : str , ** kwargs ) -> List [Document ]:
45+ @ fitable ( "langchain.tool.py_pdf_directory_loader" , "default" )
46+ def py_pdf_directory_loader (directory : str ) -> List [Document ]:
4647 """Load a directory with `PDF` files using `pypdf` and chunks at character level"""
4748 return __loader_handler (lambda nfs_file_dir : PyPDFDirectoryLoader (nfs_file_dir ), directory )
4849
4950
5051def __loader_handler (loader_builder : Callable [[str ], BaseLoader ], file_url : str ) -> List [Document ]:
5152 try :
5253 # 解析文件路径
53- fit_logger .info ("file_url: " + file_url )
54+ sys_plugin_logger .info ("file_url: " + file_url )
5455 nfs_file_path = get_file_path (file_url )
55- fit_logger .info ("nfs_file_path: " + nfs_file_path )
56+ sys_plugin_logger .info ("nfs_file_path: " + nfs_file_path )
5657 pdf_loader = loader_builder (nfs_file_path )
5758 iterator = pdf_loader .lazy_load ()
5859 res = []
@@ -63,8 +64,8 @@ def __loader_handler(loader_builder: Callable[[str], BaseLoader], file_url: str)
6364 res .append (langchain_doc_to_document (doc ))
6465 return res
6566 except BaseException :
66- fit_logger .error ("Invoke file loader failed." )
67- fit_logger .exception ("Invoke file loader failed." )
67+ sys_plugin_logger .error ("Invoke file loader failed." )
68+ sys_plugin_logger .exception ("Invoke file loader failed." )
6869 traceback .print_exc ()
6970 return []
7071
@@ -77,27 +78,10 @@ def get_file_path(file_url: str):
7778 file_query_param = parse_qs (parsed_url .query ).get ('filePath' )
7879 if file_query_param is None or len (file_query_param ) == 0 :
7980 msg = "Invalid file url. missing query parameter [filePath]"
80- fit_logger .error (msg )
81+ sys_plugin_logger .error (msg )
8182 raise ValueError (msg )
8283 else :
8384 return file_query_param [0 ]
8485 except BaseException :
85- fit_logger .error ("Parse file path failed." )
86- return file_url
87-
88-
89- DOCUMENT_RETURN_DESC = "a piece of text and associated metadata."
90-
91- # 普通callable注册方式
92- # Tuple 结构: (tool_func, config_args, return_description)
93- loader_toolkit : List [Tuple [Callable [..., Any ], List [str ], str ]] = [
94- (py_pdf_loader , [], DOCUMENT_RETURN_DESC ),
95- (pdfplumber_loader , [], DOCUMENT_RETURN_DESC ),
96- (py_mupdf_loader , [], DOCUMENT_RETURN_DESC ),
97- (py_pdfium2_loader , [], DOCUMENT_RETURN_DESC ),
98- (py_miner_loader , [], DOCUMENT_RETURN_DESC ),
99- (py_pdf_directory_loader , [], DOCUMENT_RETURN_DESC ),
100- ]
101-
102- for tool in loader_toolkit :
103- register_callable_tool (tool , get_file_path .__module__ , 'langchain.tool' )
86+ sys_plugin_logger .error ("Parse file path failed." )
87+ return file_url
0 commit comments