Skip to content

Commit

Permalink
use cli args for secrets
Browse files Browse the repository at this point in the history
  • Loading branch information
mishig25 committed Jun 21, 2024
1 parent fa3fa2f commit fd12450
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 7 deletions.
15 changes: 8 additions & 7 deletions src/doc_builder/build_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import concurrent
import importlib
import os
import re
from collections import namedtuple
from concurrent.futures import ThreadPoolExecutor
Expand Down Expand Up @@ -364,16 +363,14 @@ def chunks_to_embeddings(client, chunks) -> List[Embedding]:
return embeddings


def call_embedding_inference(chunks: List[Chunk]) -> List[Embedding]:
def call_embedding_inference(chunks: List[Chunk], hf_ie_name, hf_ie_namespace, hf_ie_token) -> List[Embedding]:
"""
Using https://huggingface.co/inference-endpoints with a text embedding model
"""
batch_size = 20
embeddings = []

endpoint = get_inference_endpoint(
name=os.environ["HF_IE_NAME"], namespace=os.environ["HF_IE_NAMESPACE"], token=os.environ["HF_IE_TOKEN"]
)
endpoint = get_inference_endpoint(name=hf_ie_name, namespace=hf_ie_namespace, token=hf_ie_token)
if endpoint.status != "running":
print("[inference endpoint] restarting...")
endpoint.resume().wait()
Expand Down Expand Up @@ -401,6 +398,10 @@ def call_embedding_inference(chunks: List[Chunk]) -> List[Embedding]:
def build_embeddings(
package_name,
doc_folder,
hf_ie_name,
hf_ie_namespace,
hf_ie_token,
meilisearch_key,
version="main",
version_tag="main",
language="en",
Expand Down Expand Up @@ -454,10 +455,10 @@ def build_embeddings(
return

# Step 2: create embeddings
embeddings = call_embedding_inference(chunks)
embeddings = call_embedding_inference(chunks, hf_ie_name, hf_ie_namespace, hf_ie_token)

# Step 3: push embeddings to vector database (meilisearch)
client = meilisearch.Client("https://edge.meilisearch.com", os.environ["MEILISEARCH_KEY"])
client = meilisearch.Client("https://edge.meilisearch.com", meilisearch_key)
index_name = "docs-embed"

payload_docs_size = 50
Expand Down
8 changes: 8 additions & 0 deletions src/doc_builder/commands/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ def embeddings_command(args):
build_embeddings(
args.library_name,
args.path_to_docs,
args.hf_ie_name,
args.hf_ie_namespace,
args.hf_ie_token,
args.meilisearch_key,
version=version,
version_tag=version_tag,
language=args.language,
Expand All @@ -78,6 +82,10 @@ def embeddings_command_parser(subparsers=None):
help="Local path to library documentation. The library should be cloned, and the folder containing the "
"documentation files should be indicated here.",
)
parser.add_argument("--hf_ie_name", type=str, help="Inference Endpoints name.")
parser.add_argument("--hf_ie_namespace", type=str, help="Inference Endpoints namespace.")
parser.add_argument("--hf_ie_token", type=str, help="Hugging Face token.")
parser.add_argument("--meilisearch_key", type=str, help="Meilisearch key.")
parser.add_argument("--language", type=str, help="Language of the documentation to generate", default="en")
parser.add_argument(
"--version",
Expand Down

0 comments on commit fd12450

Please sign in to comment.