Skip to content

Commit

Permalink
Merge pull request #315 from ohdearquant/merge_direct
Browse files Browse the repository at this point in the history
v0.0.315 chunk / load
  • Loading branch information
ohdearquant authored Mar 30, 2024
2 parents 5914863 + adbc2d9 commit 38c7073
Show file tree
Hide file tree
Showing 15 changed files with 823 additions and 18 deletions.
3 changes: 3 additions & 0 deletions lionagi/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

from .core import direct, Branch, Session, func_to_tool
from .integrations.provider.services import Services
from .integrations.chunker.chunk import chunk
from .integrations.loader.load import load


logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
Expand Down
1 change: 1 addition & 0 deletions lionagi/core/direct/cot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# TODO: chain of thoughts
Empty file added lionagi/core/direct/plan.py
Empty file.
2 changes: 1 addition & 1 deletion lionagi/core/direct/react.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ async def _inner(i=0):
return_branch=return_branch,
**kwargs,
)

if num_instances == 1:
return await _inner()

Expand Down
4 changes: 1 addition & 3 deletions lionagi/core/direct/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,7 @@ class SelectTemplate(ScoredTemplate):
answer: Enum | str = Field(
default_factory=str, description="selection from given choices"
)
choices: list = Field(
default_factory=list, description="the given choices"
)
choices: list = Field(default_factory=list, description="the given choices")

signature: str = "sentence -> answer"

Expand Down
1 change: 1 addition & 0 deletions lionagi/core/direct/sentiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# TODO: sentiment analysis
15 changes: 6 additions & 9 deletions lionagi/integrations/bridge/llamaindex_/node_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,18 @@ def get_llama_index_node_parser(node_parser: Any):
import llama_index.core.node_parser

if not isinstance(node_parser, str) and not issubclass(node_parser, NodeParser):
raise TypeError(f"node_parser must be a string or NodeParser.")
raise TypeError("node_parser must be a string or NodeParser.")

if isinstance(node_parser, str):
if node_parser == "CodeSplitter":
SysUtil.check_import("tree_sitter_languages")

try:
parser = getattr(llama_index.core.node_parser, node_parser)
return parser
return getattr(llama_index.core.node_parser, node_parser)
except Exception as e:
raise AttributeError(
f"llama_index_core has no such attribute:" f" {node_parser}, Error: {e}"
)
) from e

elif isinstance(node_parser, NodeParser):
return node_parser
Expand Down Expand Up @@ -75,10 +74,8 @@ def llama_index_parse_node(
parser = get_llama_index_node_parser(node_parser)
try:
parser = parser(*parser_args, **parser_kwargs)
except:
except Exception:
parser = parser.from_defaults(*parser_args, **parser_kwargs)
nodes = parser.get_nodes_from_documents(documents)
return nodes

return parser.get_nodes_from_documents(documents)
except Exception as e:
raise ValueError(f"Failed to parse. Error: {e}")
raise ValueError(f"Failed to parse. Error: {e}") from e
Empty file.
175 changes: 175 additions & 0 deletions lionagi/integrations/chunker/chunk.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
from typing import Union, Callable

from lionagi.libs import func_call
from lionagi.core.schema import DataNode
from ..bridge.langchain_.langchain_bridge import LangchainBridge
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge


from ..loader.load_util import ChunkerType, file_to_chunks, _datanode_parser


def datanodes_convert(documents, chunker_type):

for i in range(len(documents)):
if type(documents[i]) == DataNode:
if chunker_type == ChunkerType.LLAMAINDEX:
documents[i] = documents[i].to_llama_index()
elif chunker_type == ChunkerType.LANGCHAIN:
documents[i] = documents[i].to_langchain()
return documents


def text_chunker(documents, args, kwargs):

def chunk_node(node):
chunks = file_to_chunks(node.to_dict(), *args, **kwargs)
func_call.lcall(chunks, lambda chunk: chunk.pop("node_id"))
return [DataNode.from_obj({**chunk}) for chunk in chunks]

return [chunk_node(doc) for doc in documents]


def chunk(
documents,
chunker,
chunker_type=ChunkerType.PLAIN,
chunker_args=None,
chunker_kwargs=None,
chunking_kwargs=None,
documents_convert_func=None,
to_datanode: bool | Callable = True,
):

if chunker_args is None:
chunker_args = []
if chunker_kwargs is None:
chunker_kwargs = {}
if chunking_kwargs is None:
chunking_kwargs = {}

if chunker_type == ChunkerType.PLAIN:
return chunk_funcs[ChunkerType.PLAIN](
documents, chunker, chunker_args, chunker_kwargs
)

elif chunker_type == ChunkerType.LANGCHAIN:
return chunk_funcs[ChunkerType.LANGCHAIN](
documents,
documents_convert_func,
chunker,
chunker_args,
chunker_kwargs,
to_datanode,
)

elif chunker_type == ChunkerType.LLAMAINDEX:
return chunk_funcs[ChunkerType.LLAMAINDEX](
documents,
documents_convert_func,
chunker,
chunker_args,
chunker_kwargs,
to_datanode,
)

elif chunker_type == ChunkerType.SELFDEFINED:
return chunk_funcs[ChunkerType.SELFDEFINED](
documents,
chunker,
chunker_args,
chunker_kwargs,
chunking_kwargs,
to_datanode,
)

else:
raise ValueError(
f"{chunker_type} is not supported. Please choose from {list(ChunkerType)}"
)


def _self_defined_chunker(
documents,
chunker,
chunker_args,
chunker_kwargs,
chunking_kwargs,
to_datanode: bool | Callable,
):
try:
splitter = chunker(*chunker_args, **chunker_kwargs)
nodes = splitter.split(documents, **chunking_kwargs)
except Exception as e:
raise ValueError(
f"Self defined chunker {chunker} is not valid. Error: {e}"
) from e

if isinstance(to_datanode, bool) and to_datanode is True:
raise ValueError("Please define a valid parser to DataNode.")
elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


def _llama_index_chunker(
documents,
documents_convert_func,
chunker,
chunker_args,
chunker_kwargs,
to_datanode: bool | Callable,
):
if documents_convert_func:
documents = documents_convert_func(documents, "llama_index")
nodes = LlamaIndexBridge.llama_index_parse_node(
documents, chunker, chunker_args, chunker_kwargs
)

if isinstance(to_datanode, bool) and to_datanode is True:
nodes = [DataNode.from_llama_index(i) for i in nodes]
elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


def _langchain_chunker(
documents,
documents_convert_func,
chunker,
chunker_args,
chunker_kwargs,
to_datanode: bool | Callable,
):
if documents_convert_func:
documents = documents_convert_func(documents, "langchain")
nodes = LangchainBridge.langchain_text_splitter(
documents, chunker, chunker_args, chunker_kwargs
)
if isinstance(to_datanode, bool) and to_datanode is True:
if isinstance(documents, str):
nodes = [DataNode(content=i) for i in nodes]
else:
nodes = [DataNode.from_langchain(i) for i in nodes]
elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


def _plain_chunker(documents, chunker, chunker_args, chunker_kwargs):
try:
if chunker == "text_chunker":
chunker = text_chunker
return chunker(documents, chunker_args, chunker_kwargs)
except Exception as e:
raise ValueError(
f"Reader {chunker} is currently not supported. Error: {e}"
) from e


chunk_funcs = {
ChunkerType.PLAIN: _plain_chunker,
ChunkerType.LANGCHAIN: _langchain_chunker,
ChunkerType.LLAMAINDEX: _llama_index_chunker,
ChunkerType.SELFDEFINED: _self_defined_chunker,
}
Empty file.
152 changes: 152 additions & 0 deletions lionagi/integrations/loader/load.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
from typing import Callable

from lionagi.core.schema import DataNode
from ..bridge.langchain_.langchain_bridge import LangchainBridge
from ..bridge.llamaindex_.llama_index_bridge import LlamaIndexBridge

from .load_util import dir_to_nodes, ReaderType, _datanode_parser


def text_reader(args, kwargs):
"""
Reads text files from a directory and converts them to DataNode instances.
Args:
args: Positional arguments for the dir_to_nodes function.
kwargs: Keyword arguments for the dir_to_nodes function.
Returns:
A list of DataNode instances.
Example usage:
>>> args = ['path/to/text/files']
>>> kwargs = {'file_extension': 'txt'}
>>> nodes = text_reader(args, kwargs)
"""
return dir_to_nodes(*args, **kwargs)


def load(
reader: str | Callable = "SimpleDirectoryReader",
input_dir=None,
input_files=None,
recursive: bool = False,
required_exts: list[str] = None,
reader_type=ReaderType.LLAMAINDEX,
reader_args=None,
reader_kwargs=None,
load_args=None,
load_kwargs=None,
to_datanode: bool | Callable = True,
):

if reader_args is None:
reader_args = []
if reader_kwargs is None:
reader_kwargs = {}
if load_args is None:
load_args = []
if load_kwargs is None:
load_kwargs = {}

if reader_type == ReaderType.PLAIN:
return read_funcs[ReaderType.PLAIN](reader, reader_args, reader_kwargs)

if reader_type == ReaderType.LANGCHAIN:
return read_funcs[ReaderType.LANGCHAIN](
reader, reader_args, reader_kwargs, to_datanode
)

elif reader_type == ReaderType.LLAMAINDEX:
if input_dir is not None:
reader_kwargs["input_dir"] = input_dir
if input_files is not None:
reader_kwargs["input_files"] = input_files
if recursive:
reader_kwargs["recursive"] = True
if required_exts is not None:
reader_kwargs["required_exts"] = required_exts

return read_funcs[ReaderType.LLAMAINDEX](
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_datanode
)

elif reader_type == ReaderType.SELFDEFINED:
return read_funcs[ReaderType.SELFDEFINED](
reader, reader_args, reader_kwargs, load_args, load_kwargs, to_datanode
)

else:
raise ValueError(
f"{reader_type} is not supported. Please choose from {list(ReaderType)}"
)


def _plain_reader(reader, reader_args, reader_kwargs):
try:
if reader == "text_reader":
reader = text_reader
return reader(reader_args, reader_kwargs)
except Exception as e:
raise ValueError(
f"Reader {reader} is currently not supported. Error: {e}"
) from e


def _langchain_reader(reader, reader_args, reader_kwargs, to_datanode: bool | Callable):
nodes = LangchainBridge.langchain_loader(reader, reader_args, reader_kwargs)
if isinstance(to_datanode, bool) and to_datanode is True:
nodes = [DataNode.from_langchain(i) for i in nodes]

elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


def _llama_index_reader(
reader,
reader_args,
reader_kwargs,
load_args,
load_kwargs,
to_datanode: bool | Callable,
):
nodes = LlamaIndexBridge.llama_index_read_data(
reader, reader_args, reader_kwargs, load_args, load_kwargs
)
if isinstance(to_datanode, bool) and to_datanode is True:
nodes = [DataNode.from_llama_index(i) for i in nodes]
elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


def _self_defined_reader(
reader,
reader_args,
reader_kwargs,
load_args,
load_kwargs,
to_datanode: bool | Callable,
):
try:
loader = reader(*reader_args, **reader_kwargs)
nodes = loader.load(*load_args, **load_kwargs)
except Exception as e:
raise ValueError(
f"Self defined reader {reader} is not valid. Error: {e}"
) from e

if isinstance(to_datanode, bool) and to_datanode is True:
raise ValueError("Please define a valid parser to DataNode.")
elif isinstance(to_datanode, Callable):
nodes = _datanode_parser(nodes, to_datanode)
return nodes


read_funcs = {
ReaderType.PLAIN: _plain_reader,
ReaderType.LANGCHAIN: _langchain_reader,
ReaderType.LLAMAINDEX: _llama_index_reader,
ReaderType.SELFDEFINED: _self_defined_reader,
}
Loading

0 comments on commit 38c7073

Please sign in to comment.