-
Notifications
You must be signed in to change notification settings - Fork 121
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #162 from alipay/dev
feat: Version 0.0.13 Release
- Loading branch information
Showing
249 changed files
with
7,582 additions
and
10,372 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
7 changes: 7 additions & 0 deletions
7
agentuniverse/agent/action/knowledge/doc_processor/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# !/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
|
||
# @Time : 2024/7/23 13:59 | ||
# @Author : fanen.lhy | ||
# @Email : fanen.lhy@antgroup.com | ||
# @FileName: __init__.py.py |
50 changes: 50 additions & 0 deletions
50
agentuniverse/agent/action/knowledge/doc_processor/character_text_splitter.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
# !/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
|
||
# @Time : 2024/8/5 14:37 | ||
# @Author : fanen.lhy | ||
# @Email : fanen.lhy@antgroup.com | ||
# @FileName: text_splitter.py | ||
from typing import List, Optional | ||
from langchain.text_splitter import CharacterTextSplitter as Splitter | ||
|
||
from agentuniverse.agent.action.knowledge.doc_processor.doc_processor import \ | ||
DocProcessor | ||
from agentuniverse.agent.action.knowledge.store.document import Document | ||
from agentuniverse.agent.action.knowledge.store.query import Query | ||
from agentuniverse.base.config.component_configer.component_configer import \ | ||
ComponentConfiger | ||
|
||
|
||
class CharacterTextSplitter(DocProcessor): | ||
chunk_size: int = 200 | ||
chunk_overlap: int = 20 | ||
separator: str = "/n/n" | ||
splitter: Optional[Splitter] = None | ||
|
||
def __init__(self, **kwargs): | ||
super().__init__(**kwargs) | ||
self.splitter = Splitter(separator=self.separator, | ||
chunk_size=self.chunk_size, | ||
chunk_overlap=self.chunk_overlap) | ||
|
||
def _process_docs(self, origin_docs: List[Document], query: Query = None) -> \ | ||
List[Document]: | ||
lc_doc_list = self.splitter.split_documents(Document.as_langchain_list( | ||
origin_docs | ||
)) | ||
return Document.from_langchain_list(lc_doc_list) | ||
|
||
def _initialize_by_component_configer(self, | ||
doc_processor_configer: ComponentConfiger) -> 'DocProcessor': | ||
super()._initialize_by_component_configer(doc_processor_configer) | ||
if hasattr(doc_processor_configer, "chunk_size"): | ||
self.chunk_size = doc_processor_configer.chunk_size | ||
if hasattr(doc_processor_configer, "chunk_overlap"): | ||
self.chunk_overlap = doc_processor_configer.chunk_overlap | ||
if hasattr(doc_processor_configer, "separator"): | ||
self.separator = doc_processor_configer.separator | ||
self.splitter = Splitter(separator=self.separator, | ||
chunk_size=self.chunk_size, | ||
chunk_overlap=self.chunk_overlap) | ||
return self |
9 changes: 9 additions & 0 deletions
9
agentuniverse/agent/action/knowledge/doc_processor/character_text_splitter.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: 'character_text_splitter' | ||
description: 'langchain character text splitter' | ||
chunk_size: 200 | ||
chunk_overlap: 20 | ||
separators: "/n/n" | ||
metadata: | ||
type: 'DOC_PROCESSOR' | ||
module: 'agentuniverse.agent.action.knowledge.doc_processor.character_text_splitter' | ||
class: 'CharacterTextSplitter' |
67 changes: 67 additions & 0 deletions
67
agentuniverse/agent/action/knowledge/doc_processor/dashscope_reranker.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
# !/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
|
||
# @Time : 2024/8/5 15:48 | ||
# @Author : fanen.lhy | ||
# @Email : fanen.lhy@antgroup.com | ||
# @FileName: dashscope_reranker.py | ||
|
||
from typing import List, Optional | ||
import dashscope | ||
from http import HTTPStatus | ||
|
||
from agentuniverse.agent.action.knowledge.doc_processor.doc_processor import \ | ||
DocProcessor | ||
from agentuniverse.agent.action.knowledge.store.document import Document | ||
from agentuniverse.agent.action.knowledge.store.query import Query | ||
from agentuniverse.base.config.component_configer.component_configer import \ | ||
ComponentConfiger | ||
|
||
MODEL_NAME_MAP = { | ||
"gte_rerank": dashscope.TextReRank.Models.gte_rerank | ||
} | ||
|
||
|
||
class DashscopeReranker(DocProcessor): | ||
model_name: str = "gte_rerank" | ||
top_n: int = 10 | ||
|
||
def _process_docs(self, origin_docs: List[Document], query: Query = None) -> \ | ||
List[Document]: | ||
if not query or not query.query_str: | ||
raise Exception("Dashscope reranker need an origin string query.") | ||
if len(origin_docs)<1: | ||
return origin_docs | ||
documents_texts = [] | ||
for _doc in origin_docs: | ||
documents_texts.append(_doc.text) | ||
resp = dashscope.TextReRank.call( | ||
model=MODEL_NAME_MAP.get(self.model_name), | ||
query=query.query_str, | ||
documents=documents_texts, | ||
top_n=self.top_n, | ||
return_documents=False | ||
) | ||
if resp.status_code == HTTPStatus.OK: | ||
results = resp.output.results | ||
else: | ||
raise Exception(f"Dashscope rerank api call error: {resp}") | ||
rerank_docs = [] | ||
for _result in results: | ||
index = _result.index | ||
if origin_docs[index].metadata: | ||
origin_docs[index].metadata["relevance_score"] = _result.relevance_score | ||
else: | ||
origin_docs[index].metadata = {"relevance_score": _result.relevance_score} | ||
rerank_docs.append(origin_docs[index]) | ||
|
||
return rerank_docs | ||
|
||
def _initialize_by_component_configer(self, | ||
doc_processor_configer: ComponentConfiger) -> 'DocProcessor': | ||
super()._initialize_by_component_configer(doc_processor_configer) | ||
if hasattr(doc_processor_configer, "model_name"): | ||
self.model_name = doc_processor_configer.model_name | ||
if hasattr(doc_processor_configer, "top_n"): | ||
self.top_n = doc_processor_configer.top_n | ||
return self |
6 changes: 6 additions & 0 deletions
6
agentuniverse/agent/action/knowledge/doc_processor/dashscope_reranker.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
name: 'dashscope_reranker' | ||
description: 'reranker use dashscope api' | ||
metadata: | ||
type: 'DOC_PROCESSOR' | ||
module: 'agentuniverse.agent.action.knowledge.doc_processor.dashscope_reranker' | ||
class: 'DashscopeReranker' |
56 changes: 56 additions & 0 deletions
56
agentuniverse/agent/action/knowledge/doc_processor/doc_processor.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# !/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
|
||
# @Time : 2024/7/23 14:00 | ||
# @Author : fanen.lhy | ||
# @Email : fanen.lhy@antgroup.com | ||
# @FileName: doc_processor.py | ||
|
||
from abc import abstractmethod | ||
from typing import List, Optional | ||
|
||
from agentuniverse.agent.action.knowledge.store.query import Query | ||
from agentuniverse.agent.action.knowledge.store.document import Document | ||
from agentuniverse.base.component.component_base import ComponentEnum | ||
from agentuniverse.base.component.component_base import ComponentBase | ||
from agentuniverse.base.config.component_configer.component_configer import \ | ||
ComponentConfiger | ||
|
||
|
||
class DocProcessor(ComponentBase): | ||
"""The basic class for doc processor. | ||
""" | ||
|
||
component_type: ComponentEnum = ComponentEnum.DOC_PROCESSOR | ||
name: Optional[str] = None | ||
description: Optional[str] = None | ||
|
||
class Config: | ||
arbitrary_types_allowed = True | ||
|
||
def process_docs(self, origin_docs: List[Document], query: Query = None) -> \ | ||
List[Document]: | ||
"""Process input documents,return should also be a document list.""" | ||
return self._process_docs(origin_docs, query) | ||
|
||
@abstractmethod | ||
def _process_docs(self, origin_docs: List[Document], | ||
query: Query = None) -> \ | ||
List[Document]: | ||
"""Process input documents,return should also be a document list.""" | ||
pass | ||
|
||
def _initialize_by_component_configer(self, | ||
doc_processor_configer: ComponentConfiger) \ | ||
-> 'DocProcessor': | ||
"""Initialize the DocProcessor by the ComponentConfiger object. | ||
Args: | ||
doc_processor_configer(ComponentConfiger): A configer contains DocProcessor | ||
basic info. | ||
Returns: | ||
DocProcessor: A DocProcessor instance. | ||
""" | ||
self.name = doc_processor_configer.name | ||
self.description = doc_processor_configer.description | ||
return self |
20 changes: 20 additions & 0 deletions
20
agentuniverse/agent/action/knowledge/doc_processor/doc_processor_manager.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# !/usr/bin/env python3 | ||
# -*- coding:utf-8 -*- | ||
|
||
# @Time : 2024/7/23 14:12 | ||
# @Author : fanen.lhy | ||
# @Email : fanen.lhy@antgroup.com | ||
# @FileName: doc_processor_manager.py | ||
|
||
from agentuniverse.base.annotation.singleton import singleton | ||
from agentuniverse.base.component.component_enum import ComponentEnum | ||
from agentuniverse.base.component.component_manager_base import ComponentManagerBase | ||
from agentuniverse.agent.action.knowledge.doc_processor.doc_processor import DocProcessor | ||
|
||
|
||
@singleton | ||
class DocProcessorManager(ComponentManagerBase[DocProcessor]): | ||
"""A singleton manager class of the DocProcessor.""" | ||
|
||
def __init__(self): | ||
super().__init__(ComponentEnum.DOC_PROCESSOR) |
Oops, something went wrong.