Skip to content

Commit 2f92238

Browse files
authored
[fel] update langchain loader tools meta (#225)
* 修改langchain-loader-tools插件以及元数据 * 修改检视意见
1 parent 803a966 commit 2f92238

File tree

3 files changed

+573
-418
lines changed

3 files changed

+573
-418
lines changed

framework/fel/python/plugins/fel_langchain_loader_tools/callable_registers.py

Lines changed: 0 additions & 29 deletions
This file was deleted.

framework/fel/python/plugins/fel_langchain_loader_tools/langchain_loader_tools.py

Lines changed: 23 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -4,55 +4,56 @@
44
# Licensed under the MIT License. See License.txt in the project root for license information.
55
# ======================================================================================================================
66
import traceback
7-
from typing import List, Callable, Tuple, Any
7+
from typing import List, Callable
88
from urllib.parse import urlparse, parse_qs
99

10-
from fitframework import fit_logger
1110
from langchain_community.document_loaders import PyPDFLoader, PDFPlumberLoader, PyMuPDFLoader, PyPDFDirectoryLoader, \
1211
PyPDFium2Loader, PDFMinerLoader
1312
from langchain_core.document_loaders import BaseLoader
1413

15-
from .types.document import Document
14+
from fitframework.api.decorators import fitable
15+
from fitframework.api.logging import sys_plugin_logger
1616
from .document_util import langchain_doc_to_document
17-
from .callable_registers import register_callable_tool
17+
from .types.document import Document
1818

1919

20-
def py_pdf_loader(file_path: str, **kwargs) -> List[Document]:
20+
@fitable("langchain.tool.py_pdf_loader", "default")
21+
def py_pdf_loader(file_path: str) -> List[Document]:
2122
"""Load PDF using pypdf into list of documents."""
2223
return __loader_handler(lambda nfs_file_path: PyPDFLoader(nfs_file_path), file_path)
2324

24-
25-
def pdfplumber_loader(file_path: str, **kwargs) -> List[Document]:
25+
@fitable("langchain.tool.pdfplumber_loader", "default")
26+
def pdfplumber_loader(file_path: str) -> List[Document]:
2627
"""Load PDF using pdfplumber into list of documents"""
2728
return __loader_handler(lambda nfs_file_path: PDFPlumberLoader(nfs_file_path), file_path)
2829

29-
30-
def py_mupdf_loader(file_path: str, **kwargs) -> List[Document]:
30+
@fitable("langchain.tool.py_mupdf_loader", "default")
31+
def py_mupdf_loader(file_path: str) -> List[Document]:
3132
"""Load PDF using PyMuPDF into list of documents"""
3233
return __loader_handler(lambda nfs_file_path: PyMuPDFLoader(nfs_file_path), file_path)
3334

34-
35-
def py_pdfium2_loader(file_path: str, **kwargs) -> List[Document]:
35+
@fitable("langchain.tool.py_pdfium2_loader", "default")
36+
def py_pdfium2_loader(file_path: str) -> List[Document]:
3637
"""Load PDF using pypdfium2 into list of documents"""
3738
return __loader_handler(lambda nfs_file_path: PyPDFium2Loader(nfs_file_path), file_path)
3839

39-
40-
def py_miner_loader(file_path: str, **kwargs) -> List[Document]:
40+
@fitable("langchain.tool.py_miner_loader", "default")
41+
def py_miner_loader(file_path: str) -> List[Document]:
4142
"""Load PDF using PDFMiner into list of documents"""
4243
return __loader_handler(lambda nfs_file_path: PDFMinerLoader(nfs_file_path), file_path)
4344

44-
45-
def py_pdf_directory_loader(directory: str, **kwargs) -> List[Document]:
45+
@fitable("langchain.tool.py_pdf_directory_loader", "default")
46+
def py_pdf_directory_loader(directory: str) -> List[Document]:
4647
"""Load a directory with `PDF` files using `pypdf` and chunks at character level"""
4748
return __loader_handler(lambda nfs_file_dir: PyPDFDirectoryLoader(nfs_file_dir), directory)
4849

4950

5051
def __loader_handler(loader_builder: Callable[[str], BaseLoader], file_url: str) -> List[Document]:
5152
try:
5253
# 解析文件路径
53-
fit_logger.info("file_url: " + file_url)
54+
sys_plugin_logger.info("file_url: " + file_url)
5455
nfs_file_path = get_file_path(file_url)
55-
fit_logger.info("nfs_file_path: " + nfs_file_path)
56+
sys_plugin_logger.info("nfs_file_path: " + nfs_file_path)
5657
pdf_loader = loader_builder(nfs_file_path)
5758
iterator = pdf_loader.lazy_load()
5859
res = []
@@ -63,8 +64,8 @@ def __loader_handler(loader_builder: Callable[[str], BaseLoader], file_url: str)
6364
res.append(langchain_doc_to_document(doc))
6465
return res
6566
except BaseException:
66-
fit_logger.error("Invoke file loader failed.")
67-
fit_logger.exception("Invoke file loader failed.")
67+
sys_plugin_logger.error("Invoke file loader failed.")
68+
sys_plugin_logger.exception("Invoke file loader failed.")
6869
traceback.print_exc()
6970
return []
7071

@@ -77,27 +78,10 @@ def get_file_path(file_url: str):
7778
file_query_param = parse_qs(parsed_url.query).get('filePath')
7879
if file_query_param is None or len(file_query_param) == 0:
7980
msg = "Invalid file url. missing query parameter [filePath]"
80-
fit_logger.error(msg)
81+
sys_plugin_logger.error(msg)
8182
raise ValueError(msg)
8283
else:
8384
return file_query_param[0]
8485
except BaseException:
85-
fit_logger.error("Parse file path failed.")
86-
return file_url
87-
88-
89-
DOCUMENT_RETURN_DESC = "a piece of text and associated metadata."
90-
91-
# 普通callable注册方式
92-
# Tuple 结构: (tool_func, config_args, return_description)
93-
loader_toolkit: List[Tuple[Callable[..., Any], List[str], str]] = [
94-
(py_pdf_loader, [], DOCUMENT_RETURN_DESC),
95-
(pdfplumber_loader, [], DOCUMENT_RETURN_DESC),
96-
(py_mupdf_loader, [], DOCUMENT_RETURN_DESC),
97-
(py_pdfium2_loader, [], DOCUMENT_RETURN_DESC),
98-
(py_miner_loader, [], DOCUMENT_RETURN_DESC),
99-
(py_pdf_directory_loader, [], DOCUMENT_RETURN_DESC),
100-
]
101-
102-
for tool in loader_toolkit:
103-
register_callable_tool(tool, get_file_path.__module__, 'langchain.tool')
86+
sys_plugin_logger.error("Parse file path failed.")
87+
return file_url

0 commit comments

Comments
 (0)