From 61e3794ea91c1d8d91ed237a2aa107d79b5703aa Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Tue, 10 Sep 2024 21:07:34 -0400 Subject: [PATCH 1/3] Enhance HuggingFaceInferenceAPIEmbeddings component Enhance HuggingFaceInferenceAPIEmbeddings component - Update display name and description for clarity - Add API URL validation method - Implement local URL detection and dummy API key for local deployments - Improve error handling for API key and URL requirements - Update documentation link --- .../HuggingFaceInferenceAPIEmbeddings.py | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py index b2b15c6ef37f..7e0dd6b0bf2c 100644 --- a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py +++ b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py @@ -1,5 +1,7 @@ from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings from pydantic.v1.types import SecretStr +import requests +from urllib.parse import urlparse from langflow.base.models.model import LCModelComponent from langflow.field_typing import Embeddings @@ -7,15 +9,15 @@ class HuggingFaceInferenceAPIEmbeddingsComponent(LCModelComponent): - display_name = "HuggingFace Embeddings" - description = "Generate embeddings using Hugging Face Inference API models." - documentation = "https://github.com/huggingface/text-embeddings-inference" + display_name = "HuggingFace Embeddings Inference" + description = "Generate embeddings using HuggingFace Text Embeddings Inference (TEI)" + documentation = "https://huggingface.co/docs/text-embeddings-inference/en/index" icon = "HuggingFace" name = "HuggingFaceInferenceAPIEmbeddings" inputs = [ SecretStrInput(name="api_key", display_name="API Key"), - MessageTextInput(name="api_url", display_name="API URL", advanced=True, value="http://localhost:8080"), + MessageTextInput(name="api_url", display_name="API URL", required=True, value="http://localhost:8080"), MessageTextInput(name="model_name", display_name="Model Name", value="BAAI/bge-large-en-v1.5"), ] @@ -23,10 +25,33 @@ class HuggingFaceInferenceAPIEmbeddingsComponent(LCModelComponent): Output(display_name="Embeddings", name="embeddings", method="build_embeddings"), ] - def build_embeddings(self) -> Embeddings: - if not self.api_key: - raise ValueError("API Key is required") + def validate_api_url(self, api_url: str) -> bool: + parsed_url = urlparse(api_url) + if not all([parsed_url.scheme, parsed_url.netloc]): + raise ValueError("Invalid API URL format") + + try: + response = requests.get(f"{api_url}/health", timeout=5) + return response.status_code == 200 + except requests.RequestException: + return False - api_key = SecretStr(self.api_key) + def build_embeddings(self) -> Embeddings: + if not self.api_url: + raise ValueError("API URL is required") + + if not self.validate_api_url(self.api_url): + raise ValueError("API URL is invalid or the service is not responding") + + # Check if the API URL is local + is_local_url = self.api_url.startswith(("http://localhost", "http://127.0.0.1")) + + # Use a dummy key for local URLs if no key is provided + if not self.api_key and is_local_url: + api_key = SecretStr("DummyAPIKeyForLocalDeployment") + elif not self.api_key: + raise ValueError("API Key is required for non-local API URLs") + else: + api_key = SecretStr(self.api_key) return HuggingFaceInferenceAPIEmbeddings(api_key=api_key, api_url=self.api_url, model_name=self.model_name) From 33ef4b9d4ebbe664052b55e2583b88ec63274380 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Wed, 11 Sep 2024 15:13:11 -0400 Subject: [PATCH 2/3] updated the info in api key --- .../embeddings/HuggingFaceInferenceAPIEmbeddings.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py index 7e0dd6b0bf2c..48acc9952623 100644 --- a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py +++ b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py @@ -16,7 +16,9 @@ class HuggingFaceInferenceAPIEmbeddingsComponent(LCModelComponent): name = "HuggingFaceInferenceAPIEmbeddings" inputs = [ - SecretStrInput(name="api_key", display_name="API Key"), + SecretStrInput( + name="api_key", display_name="API Key", advanced=True, info="The API key is required for non-local API URLs" + ), MessageTextInput(name="api_url", display_name="API URL", required=True, value="http://localhost:8080"), MessageTextInput(name="model_name", display_name="Model Name", value="BAAI/bge-large-en-v1.5"), ] From 2f435fbf516fc2f177c0a4322a77ce4b30c29c82 Mon Sep 17 00:00:00 2001 From: Edwin Jose Date: Thu, 12 Sep 2024 03:45:39 -0400 Subject: [PATCH 3/3] Refactor HuggingFaceInferenceAPIEmbeddings component - Update base class from LCModelComponent to LCEmbeddingsModel - Rename 'api_url' to 'inference_endpoint' for clarity - Improve error messages and validation for inference endpoint - Update documentation link - Enhance comments and code formatting --- .../HuggingFaceInferenceAPIEmbeddings.py | 71 +++++++++++++------ 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py index 48acc9952623..589043c6bc96 100644 --- a/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py +++ b/src/backend/base/langflow/components/embeddings/HuggingFaceInferenceAPIEmbeddings.py @@ -1,59 +1,84 @@ +from urllib.parse import urlparse + +import requests from langchain_community.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings from pydantic.v1.types import SecretStr -import requests -from urllib.parse import urlparse -from langflow.base.models.model import LCModelComponent +from langflow.base.embeddings.model import LCEmbeddingsModel from langflow.field_typing import Embeddings from langflow.io import MessageTextInput, Output, SecretStrInput -class HuggingFaceInferenceAPIEmbeddingsComponent(LCModelComponent): +class HuggingFaceInferenceAPIEmbeddingsComponent(LCEmbeddingsModel): display_name = "HuggingFace Embeddings Inference" description = "Generate embeddings using HuggingFace Text Embeddings Inference (TEI)" - documentation = "https://huggingface.co/docs/text-embeddings-inference/en/index" + documentation = "https://huggingface.co/docs/text-embeddings-inference/index" icon = "HuggingFace" name = "HuggingFaceInferenceAPIEmbeddings" inputs = [ SecretStrInput( - name="api_key", display_name="API Key", advanced=True, info="The API key is required for non-local API URLs" + name="api_key", + display_name="API Key", + advanced=True, + info="Required for non-local inference endpoints. Local inference does not require an API Key.", + ), + MessageTextInput( + name="inference_endpoint", + display_name="Inference Endpoint", + required=True, + value="http://localhost:8080", + info="Custom inference endpoint URL.", + ), + MessageTextInput( + name="model_name", + display_name="Model Name", + value="BAAI/bge-large-en-v1.5", + info="The name of the model to use for text embeddings.", ), - MessageTextInput(name="api_url", display_name="API URL", required=True, value="http://localhost:8080"), - MessageTextInput(name="model_name", display_name="Model Name", value="BAAI/bge-large-en-v1.5"), ] outputs = [ Output(display_name="Embeddings", name="embeddings", method="build_embeddings"), ] - def validate_api_url(self, api_url: str) -> bool: - parsed_url = urlparse(api_url) + def validate_inference_endpoint(self, inference_endpoint: str) -> bool: + parsed_url = urlparse(inference_endpoint) if not all([parsed_url.scheme, parsed_url.netloc]): - raise ValueError("Invalid API URL format") + raise ValueError( + f"Invalid inference endpoint format: '{self.inference_endpoint}'. Please ensure the URL includes both a scheme (e.g., 'http://' or 'https://') and a domain name. Example: 'http://localhost:8080' or 'https://api.example.com'" + ) try: - response = requests.get(f"{api_url}/health", timeout=5) - return response.status_code == 200 + response = requests.get(f"{inference_endpoint}/health", timeout=5) except requests.RequestException: - return False + raise ValueError( + f"Inference endpoint '{inference_endpoint}' is not responding. Please ensure the URL is correct and the service is running." + ) + + if response.status_code != 200: + raise ValueError(f"HuggingFace health check failed: {response.status_code}") + # returning True to solve linting error + return True def build_embeddings(self) -> Embeddings: - if not self.api_url: - raise ValueError("API URL is required") + if not self.inference_endpoint: + raise ValueError("Inference endpoint is required") - if not self.validate_api_url(self.api_url): - raise ValueError("API URL is invalid or the service is not responding") + self.validate_inference_endpoint(self.inference_endpoint) - # Check if the API URL is local - is_local_url = self.api_url.startswith(("http://localhost", "http://127.0.0.1")) + # Check if the inference endpoint is local + is_local_url = self.inference_endpoint.startswith(("http://localhost", "http://127.0.0.1")) - # Use a dummy key for local URLs if no key is provided + # Use a dummy key for local URLs if no key is provided. + # Refer https://python.langchain.com/v0.2/api_reference/community/embeddings/langchain_community.embeddings.huggingface.HuggingFaceInferenceAPIEmbeddings.html if not self.api_key and is_local_url: api_key = SecretStr("DummyAPIKeyForLocalDeployment") elif not self.api_key: - raise ValueError("API Key is required for non-local API URLs") + raise ValueError("API Key is required for non-local inference endpoints") else: api_key = SecretStr(self.api_key) - return HuggingFaceInferenceAPIEmbeddings(api_key=api_key, api_url=self.api_url, model_name=self.model_name) + return HuggingFaceInferenceAPIEmbeddings( + api_key=api_key, api_url=self.inference_endpoint, model_name=self.model_name + )