Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump llamacloud index and fix issues #456

Merged
merged 11 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/shaggy-rats-draw.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

Bump the LlamaCloud library and fix breaking changes (Python).
30 changes: 15 additions & 15 deletions helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,21 +37,21 @@ const getAdditionalDependencies = (
case "mongo": {
dependencies.push({
name: "llama-index-vector-stores-mongodb",
version: "^0.3.1",
version: "^0.6.0",
});
break;
}
case "pg": {
dependencies.push({
name: "llama-index-vector-stores-postgres",
version: "^0.2.5",
version: "^0.3.2",
});
break;
}
case "pinecone": {
dependencies.push({
name: "llama-index-vector-stores-pinecone",
version: "^0.2.1",
version: "^0.4.1",
constraints: {
python: ">=3.11,<3.13",
},
Expand All @@ -61,7 +61,7 @@ const getAdditionalDependencies = (
case "milvus": {
dependencies.push({
name: "llama-index-vector-stores-milvus",
version: "^0.2.0",
version: "^0.3.0",
});
dependencies.push({
name: "pymilvus",
Expand All @@ -72,14 +72,14 @@ const getAdditionalDependencies = (
case "astra": {
dependencies.push({
name: "llama-index-vector-stores-astra-db",
version: "^0.2.0",
version: "^0.4.0",
});
break;
}
case "qdrant": {
dependencies.push({
name: "llama-index-vector-stores-qdrant",
version: "^0.3.0",
version: "^0.4.0",
constraints: {
python: ">=3.11,<3.13",
},
Expand All @@ -89,21 +89,21 @@ const getAdditionalDependencies = (
case "chroma": {
dependencies.push({
name: "llama-index-vector-stores-chroma",
version: "^0.2.0",
version: "^0.4.0",
});
break;
}
case "weaviate": {
dependencies.push({
name: "llama-index-vector-stores-weaviate",
version: "^1.1.1",
version: "^1.2.3",
});
break;
}
case "llamacloud":
dependencies.push({
name: "llama-index-indices-managed-llama-cloud",
version: "^0.6.0",
version: "^0.6.3",
});
break;
}
Expand All @@ -122,13 +122,13 @@ const getAdditionalDependencies = (
case "web":
dependencies.push({
name: "llama-index-readers-web",
version: "^0.2.2",
version: "^0.3.0",
});
break;
case "db":
dependencies.push({
name: "llama-index-readers-database",
version: "^0.2.0",
version: "^0.3.0",
});
dependencies.push({
name: "pymysql",
Expand Down Expand Up @@ -167,15 +167,15 @@ const getAdditionalDependencies = (
if (templateType !== "multiagent") {
dependencies.push({
name: "llama-index-llms-openai",
version: "^0.2.0",
version: "^0.3.2",
});
dependencies.push({
name: "llama-index-embeddings-openai",
version: "^0.2.3",
version: "^0.3.1",
});
dependencies.push({
name: "llama-index-agent-openai",
version: "^0.3.0",
version: "^0.4.0",
});
}
break;
Expand Down Expand Up @@ -524,7 +524,7 @@ export const installPythonTemplate = async ({
if (observability === "llamatrace") {
addOnDependencies.push({
name: "llama-index-callbacks-arize-phoenix",
version: "^0.2.1",
version: "^0.3.0",
constraints: {
python: ">=3.11,<3.13",
},
Expand Down
4 changes: 2 additions & 2 deletions helpers/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ export const supportedTools: Tool[] = [
dependencies: [
{
name: "llama-index-tools-google",
version: "^0.2.0",
version: "^0.3.0",
},
],
supportedFrameworks: ["fastapi"],
Expand Down Expand Up @@ -82,7 +82,7 @@ For better results, you can specify the region parameter to get results from a s
dependencies: [
{
name: "llama-index-tools-wikipedia",
version: "^0.2.0",
version: "^0.3.0",
},
],
supportedFrameworks: ["fastapi", "express", "nextjs"],
Expand Down
54 changes: 7 additions & 47 deletions templates/components/vectordbs/python/llamacloud/generate.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,28 @@
# flake8: noqa: E402
import os

from dotenv import load_dotenv

load_dotenv()

import logging

from app.engine.index import get_client, get_index
from llama_index.core.readers import SimpleDirectoryReader

from app.engine.index import get_index
from app.engine.service import LLamaCloudFileService # type: ignore
from app.settings import init_settings
from llama_cloud import PipelineType
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.settings import Settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()


def ensure_index(index):
project_id = index._get_project_id()
client = get_client()
pipelines = client.pipelines.search_pipelines(
project_id=project_id,
pipeline_name=index.name,
pipeline_type=PipelineType.MANAGED.value,
)
if len(pipelines) == 0:
from llama_index.embeddings.openai import OpenAIEmbedding

if not isinstance(Settings.embed_model, OpenAIEmbedding):
raise ValueError(
"Creating a new pipeline with a non-OpenAI embedding model is not supported."
)
client.pipelines.upsert_pipeline(
project_id=project_id,
request={
"name": index.name,
"embedding_config": {
"type": "OPENAI_EMBEDDING",
"component": {
"api_key": os.getenv("OPENAI_API_KEY"), # editable
"model_name": os.getenv("EMBEDDING_MODEL"),
},
},
"transform_config": {
"mode": "auto",
"config": {
"chunk_size": Settings.chunk_size, # editable
"chunk_overlap": Settings.chunk_overlap, # editable
},
},
},
)


def generate_datasource():
init_settings()
logger.info("Generate index for the provided data")

index = get_index()
ensure_index(index)
project_id = index._get_project_id()
pipeline_id = index._get_pipeline_id()
index = get_index(create_if_missing=True)
if index is None:
raise ValueError("Index not found and could not be created")

# use SimpleDirectoryReader to retrieve the files to process
reader = SimpleDirectoryReader(
Expand All @@ -78,7 +38,7 @@ def generate_datasource():
f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
)
LLamaCloudFileService.add_file_to_pipeline(
project_id, pipeline_id, f, custom_metadata={}
index.project.id, index.pipeline.id, f, custom_metadata={}
)

logger.info("Finished generating the index")
Expand Down
59 changes: 55 additions & 4 deletions templates/components/vectordbs/python/llamacloud/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
import os
from typing import Optional

from llama_cloud import PipelineType
from llama_index.core.callbacks import CallbackManager
from llama_index.core.ingestion.api_utils import (
get_client as llama_cloud_get_client,
)
from llama_index.core.settings import Settings
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from pydantic import BaseModel, Field, field_validator

Expand Down Expand Up @@ -82,14 +84,63 @@ def to_index_kwargs(self) -> dict:
}


def get_index(config: IndexConfig = None):
def get_index(
config: IndexConfig = None,
create_if_missing: bool = False,
):
if config is None:
config = IndexConfig()
index = LlamaCloudIndex(**config.to_index_kwargs())
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The newly update resolve and throw the index if it's not exist automatically, see: run-llama/llama_index@a56c3e0#diff-52ef3b6a93ae4dc22f541ee65207a1f7a22b4724ec0f757e3b7f221dd480b2b6R90


return index
# Check whether the index exists
try:
index = LlamaCloudIndex(**config.to_index_kwargs())
return index
except ValueError:
logger.warning("Index not found")
if create_if_missing:
logger.info("Creating index")
_create_index(config)
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
return LlamaCloudIndex(**config.to_index_kwargs())
return None


def get_client():
config = LlamaCloudConfig()
return llama_cloud_get_client(**config.to_client_kwargs())


def _create_index(
config: IndexConfig,
):
client = get_client()
pipeline_name = config.llama_cloud_pipeline_config.pipeline

pipelines = client.pipelines.search_pipelines(
pipeline_name=pipeline_name,
pipeline_type=PipelineType.MANAGED.value,
)
if len(pipelines) == 0:
from llama_index.embeddings.openai import OpenAIEmbedding

if not isinstance(Settings.embed_model, OpenAIEmbedding):
raise ValueError(
"Creating a new pipeline with a non-OpenAI embedding model is not supported."
)
client.pipelines.upsert_pipeline(
request={
"name": pipeline_name,
"embedding_config": {
"type": "OPENAI_EMBEDDING",
"component": {
"api_key": os.getenv("OPENAI_API_KEY"), # editable
"model_name": os.getenv("EMBEDDING_MODEL"),
},
},
"transform_config": {
"mode": "auto",
"config": {
"chunk_size": Settings.chunk_size, # editable
"chunk_overlap": Settings.chunk_overlap, # editable
},
},
},
)
21 changes: 12 additions & 9 deletions templates/components/vectordbs/python/llamacloud/service.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from io import BytesIO
import logging
import os
import time
from typing import Any, Dict, List, Optional, Set, Tuple, Union
import typing
from io import BytesIO
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import requests
from fastapi import BackgroundTasks
from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadataValue
from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel
import requests

from app.api.routers.models import SourceNodes
from app.engine.index import get_client
from llama_index.core.schema import NodeWithScore


logger = logging.getLogger("uvicorn")

Expand Down Expand Up @@ -67,10 +67,11 @@ def add_file_to_pipeline(
) -> str:
client = get_client()
file = client.files.upload_file(project_id=project_id, upload_file=upload_file)
file_id = file.id
files = [
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
{
"file_id": file.id,
"custom_metadata": {"file_id": file.id, **(custom_metadata or {})},
"file_id": file_id,
"custom_metadata": {"file_id": file_id, **(custom_metadata or {})},
}
]
files = client.pipelines.add_files_to_pipeline(pipeline_id, request=files)
Expand All @@ -79,12 +80,14 @@ def add_file_to_pipeline(
max_attempts = 20
attempt = 0
while attempt < max_attempts:
result = client.pipelines.get_pipeline_file_status(pipeline_id, file.id)
result = client.pipelines.get_pipeline_file_status(
file_id=file_id, pipeline_id=pipeline_id
)
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
if result.status == ManagedIngestionStatus.ERROR:
raise Exception(f"File processing failed: {str(result)}")
if result.status == ManagedIngestionStatus.SUCCESS:
# File is ingested - return the file id
return file.id
return file_id
attempt += 1
time.sleep(0.1) # Sleep for 100ms
raise Exception(
Expand Down
2 changes: 1 addition & 1 deletion templates/types/streaming/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ python-dotenv = "^1.0.0"
pydantic = "<2.10"
aiostream = "^0.5.2"
cachetools = "^5.3.3"
llama-index = "^0.11.17"
llama-index = "^0.12.1"
rich = "^13.9.4"

[tool.poetry.group.dev.dependencies]
Expand Down
Loading