Skip to content

Commit

Permalink
bump llama_cloud and update pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
leehuwuj committed Dec 3, 2024
1 parent a07f320 commit ab9ad19
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 58 deletions.
2 changes: 1 addition & 1 deletion helpers/python.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ const getAdditionalDependencies = (
case "llamacloud":
dependencies.push({
name: "llama-index-indices-managed-llama-cloud",
version: "^0.6.0",
version: "^0.6.3",
});
break;
}
Expand Down
54 changes: 7 additions & 47 deletions templates/components/vectordbs/python/llamacloud/generate.py
Original file line number Diff line number Diff line change
@@ -1,68 +1,28 @@
# flake8: noqa: E402
import os

from dotenv import load_dotenv

load_dotenv()

import logging

from app.engine.index import get_client, get_index
from llama_index.core.readers import SimpleDirectoryReader

from app.engine.index import get_index
from app.engine.service import LLamaCloudFileService # type: ignore
from app.settings import init_settings
from llama_cloud import PipelineType
from llama_index.core.readers import SimpleDirectoryReader
from llama_index.core.settings import Settings

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()


def ensure_index(index):
project_id = index._get_project_id()
client = get_client()
pipelines = client.pipelines.search_pipelines(
project_id=project_id,
pipeline_name=index.name,
pipeline_type=PipelineType.MANAGED.value,
)
if len(pipelines) == 0:
from llama_index.embeddings.openai import OpenAIEmbedding

if not isinstance(Settings.embed_model, OpenAIEmbedding):
raise ValueError(
"Creating a new pipeline with a non-OpenAI embedding model is not supported."
)
client.pipelines.upsert_pipeline(
project_id=project_id,
request={
"name": index.name,
"embedding_config": {
"type": "OPENAI_EMBEDDING",
"component": {
"api_key": os.getenv("OPENAI_API_KEY"), # editable
"model_name": os.getenv("EMBEDDING_MODEL"),
},
},
"transform_config": {
"mode": "auto",
"config": {
"chunk_size": Settings.chunk_size, # editable
"chunk_overlap": Settings.chunk_overlap, # editable
},
},
},
)


def generate_datasource():
init_settings()
logger.info("Generate index for the provided data")

index = get_index()
ensure_index(index)
project_id = index._get_project_id()
pipeline_id = index._get_pipeline_id()
index = get_index(create_if_missing=True)
if index is None:
raise ValueError("Index not found and could not be created")

# use SimpleDirectoryReader to retrieve the files to process
reader = SimpleDirectoryReader(
Expand All @@ -78,7 +38,7 @@ def generate_datasource():
f"Adding file {input_file} to pipeline {index.name} in project {index.project_name}"
)
LLamaCloudFileService.add_file_to_pipeline(
project_id, pipeline_id, f, custom_metadata={}
index.project.id, index.pipeline.id, f, custom_metadata={}
)

logger.info("Finished generating the index")
Expand Down
60 changes: 56 additions & 4 deletions templates/components/vectordbs/python/llamacloud/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
import os
from typing import Optional

from llama_cloud import PipelineType
from llama_index.core.callbacks import CallbackManager
from llama_index.core.ingestion.api_utils import (
get_client as llama_cloud_get_client,
)
from llama_index.core.settings import Settings
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from pydantic import BaseModel, Field, field_validator

Expand Down Expand Up @@ -82,14 +84,64 @@ def to_index_kwargs(self) -> dict:
}


def get_index(config: IndexConfig = None):
def get_index(
config: IndexConfig = None,
create_if_missing: bool = False,
):
if config is None:
config = IndexConfig()
index = LlamaCloudIndex(**config.to_index_kwargs())

return index
# Check whether the index exists
try:
index = LlamaCloudIndex(**config.to_index_kwargs())
return index
except ValueError:
if create_if_missing:
logger.info(
f"Index {config.llama_cloud_pipeline_config.pipeline} not found, creating it"
)
_create_index(config)
return LlamaCloudIndex(**config.to_index_kwargs())
return None


def get_client():
config = LlamaCloudConfig()
return llama_cloud_get_client(**config.to_client_kwargs())


def _create_index(
config: IndexConfig,
):
client = get_client()
pipeline_name = config.llama_cloud_pipeline_config.pipeline

pipelines = client.pipelines.search_pipelines(
pipeline_name=pipeline_name,
pipeline_type=PipelineType.MANAGED.value,
)
if len(pipelines) == 0:
from llama_index.embeddings.openai import OpenAIEmbedding

if not isinstance(Settings.embed_model, OpenAIEmbedding):
raise ValueError(
"Creating a new pipeline with a non-OpenAI embedding model is not supported."
)
client.pipelines.upsert_pipeline(
request={
"name": pipeline_name,
"embedding_config": {
"type": "OPENAI_EMBEDDING",
"component": {
"api_key": os.getenv("OPENAI_API_KEY"), # editable
"model_name": os.getenv("EMBEDDING_MODEL"),
},
},
"transform_config": {
"mode": "auto",
"config": {
"chunk_size": Settings.chunk_size, # editable
"chunk_overlap": Settings.chunk_overlap, # editable
},
},
},
)
10 changes: 5 additions & 5 deletions templates/components/vectordbs/python/llamacloud/service.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
from io import BytesIO
import logging
import os
import time
from typing import Any, Dict, List, Optional, Set, Tuple, Union
import typing
from io import BytesIO
from typing import Any, Dict, List, Optional, Set, Tuple, Union

import requests
from fastapi import BackgroundTasks
from llama_cloud import ManagedIngestionStatus, PipelineFileCreateCustomMetadataValue
from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel
import requests

from app.api.routers.models import SourceNodes
from app.engine.index import get_client
from llama_index.core.schema import NodeWithScore


logger = logging.getLogger("uvicorn")

Expand Down
2 changes: 1 addition & 1 deletion templates/types/streaming/fastapi/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ python-dotenv = "^1.0.0"
pydantic = "<2.10"
aiostream = "^0.5.2"
cachetools = "^5.3.3"
llama-index = "^0.11.17"
llama-index = "^0.12.1"
rich = "^13.9.4"

[tool.poetry.group.dev.dependencies]
Expand Down

0 comments on commit ab9ad19

Please sign in to comment.