working on support for localai

valentinfrlch · May 19, 2024 · f38dfb8 · f38dfb8
1 parent 5cc7f29
commit f38dfb8
Show file tree

Hide file tree

Showing 11 changed files with 370 additions and 50 deletions.
diff --git a/custom_components/gpt4vision/__init__.py b/custom_components/gpt4vision/__init__.py
@@ -1,24 +1,50 @@
 # Declare variables
-from .const import DOMAIN, CONF_API_KEY, CONF_MAXTOKENS, CONF_TARGET_WIDTH, CONF_MODEL, CONF_MESSAGE, CONF_IMAGE_FILE
+from .const import DOMAIN, CONF_API_KEY, CONF_MAXTOKENS, CONF_TARGET_WIDTH, CONF_MODEL, CONF_MESSAGE, CONF_IMAGE_FILE, CONF_MODE, CONF_IP_ADDRESS, CONF_PORT
 import base64
 import io
 import os
+import logging
 from homeassistant.helpers.aiohttp_client import async_get_clientsession
 from homeassistant.core import SupportsResponse
 from homeassistant.exceptions import ServiceValidationError
 from PIL import Image
 
+_LOGGER = logging.getLogger(__name__)
+
 
 async def async_setup_entry(hass, entry):
     """Set up gpt4vision from a config entry."""
     # Get the API key from the configuration entry
-    api_key = entry.data[CONF_API_KEY]
+    mode = entry.data.get(CONF_MODE)
+    data = {"mode": mode}
+
+    if mode == "OpenAI":
+        api_key = entry.data[CONF_API_KEY]
+        data["api_key"] = api_key
+    else:
+        ip_address = entry.data[CONF_IP_ADDRESS]
+        port = entry.data[CONF_PORT]
+        # Add the IP address and port to the data dictionary
+        data["ip_address"] = ip_address
+        data["port"] = port
+
+    # Store the data dictionary in hass.data
+    hass.data[DOMAIN] = data
+
+    return True
 
-    # Store the API key in hass.data
-    hass.data[DOMAIN] = {
-        "api_key": api_key
-    }
 
+async def validate_data(data):
+    if data[CONF_MODE] == "OpenAI":
+        if not data[CONF_API_KEY]:
+            raise ServiceValidationError("empty_api_key")
+    elif data[CONF_MODE] == "LocalAI":
+        if not data[CONF_IP_ADDRESS]:
+            raise ServiceValidationError("empty_ip_address")
+        if not data[CONF_PORT]:
+            raise ServiceValidationError("empty_port")
+    else:
+        raise ServiceValidationError("empty_mode")
     return True
 
 
@@ -30,26 +56,40 @@ async def image_analyzer(data_call):
             json: response_text
         """
 
-        # Try to get the API key from hass.data
-        api_key = hass.data.get(DOMAIN, {}).get("api_key")
-
-        # Check if api key is present
-        if not api_key:
-            raise ServiceValidationError(
-                "API key is required. Please set up the integration again.")
+        # Read from configuration (hass.data)
+        api_key = hass.data.get(DOMAIN, {}).get(CONF_API_KEY)
+        ip_address = hass.data.get(DOMAIN, {}).get(CONF_IP_ADDRESS)
+        port = hass.data.get(DOMAIN, {}).get(CONF_PORT)
+        mode = hass.data.get(DOMAIN, {}).get(CONF_MODE)
+
+        validate = {
+            CONF_MODE: mode,
+            CONF_API_KEY: api_key,
+            CONF_IP_ADDRESS: ip_address,
+            CONF_PORT: port
+        }
+        try:
+            await validate_data(validate)
+        except ServiceValidationError as e:
+            _LOGGER.error(f"Validation failed: {e}")
 
         # Read data from service call
         # Resolution (width only) of the image. Example: 1280 for 720p etc.
         target_width = data_call.data.get(CONF_TARGET_WIDTH, 1280)
         # Local path to your image. Example: "/config/www/images/garage.jpg"
         image_path = data_call.data.get(CONF_IMAGE_FILE)
-        # Maximum number of tokens used by model. Default is 100.
-        max_tokens = int(data_call.data.get(CONF_MAXTOKENS))
-        # GPT model: Default model is gpt-4o
-        model = str(data_call.data.get(CONF_MODEL, "gpt-4o"))
         # Message to be sent to AI model
         message = str(data_call.data.get(CONF_MESSAGE)[0:2000])
 
+        if mode == "OpenAI":
+            # Maximum number of tokens used by model. Default is 100.
+            max_tokens = int(data_call.data.get(CONF_MAXTOKENS))
+            # GPT model: Default model is gpt-4o for OpenAI
+            model = str(data_call.data.get(CONF_MODEL, "gpt-4o"))
+        if mode == "LocalAI":
+            # GPT model: Default model is gpt-4-vision-preview for LocalAI
+            model = str(data_call.data.get(CONF_MODEL, "gpt-4-vision-preview"))
+
         # Check if image file exists
         if not os.path.exists(image_path):
             raise ServiceValidationError(
@@ -88,30 +128,42 @@ def encode_image(image_path):
         # Get the base64 string from the image
         base64_image = encode_image(image_path)
 
-        # HTTP Request for AI API
-        # Header Parameters
+        # Get the Home Assistant http client
+        session = async_get_clientsession(hass)
+
+        if mode == "LocalAI":
+            response_text = await handle_localai_request(data_call, session, model, message, base64_image, ip_address, port)
+
+        elif mode == "OpenAI":
+            response_text = await handle_openai_request(data_call, session, model, message, base64_image, api_key, max_tokens)
+
+        return {"response_text": response_text}
+
+    async def handle_localai_request(data_call, session, model, message, base64_image, ip_address, port):
+        data = {"model": model, "messages": [{"role": "user", "content": [{"type": "text", "text": message},
+                                                                          {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]}]}
+        response = await session.post(
+            f"http://{data_call.data.get(ip_address)}:{data_call.data.get(port)}/v1/chat/completions", json=data)
+        if response.status != 200:
+            raise ServiceValidationError(
+                f"Request failed with status code {response.status}")
+        response_text = (await response.json()).get("choices")[0].get(
+            "message").get("content")
+        return response_text
+
+    async def handle_openai_request(data_call, session, model, message, base64_image, api_key, max_tokens):
         headers = {'Content-type': 'application/json',
                    'Authorization': 'Bearer ' + api_key}
-
-        # Body Parameters
         data = {"model": model, "messages": [{"role": "user", "content": [{"type": "text", "text": message},
                                                                           {"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}}]}], "max_tokens": max_tokens}
-
-        # Get the Home Assistant http client
-        session = async_get_clientsession(hass)
-
-        # Get response from OpenAI and read content inside message
         response = await session.post(
             "https://api.openai.com/v1/chat/completions", headers=headers, json=data)
-
-        # Check if response is successful
         if response.status != 200:
             raise ServiceValidationError(
                 (await response.json()).get('error').get('message'))
-
         response_text = (await response.json()).get(
             "choices")[0].get("message").get("content")
-        return {"response_text": response_text}
+        return response_text
 
     hass.services.register(
         DOMAIN, "image_analyzer", image_analyzer,

diff --git a/custom_components/gpt4vision/config_flow.py b/custom_components/gpt4vision/config_flow.py
@@ -1,22 +1,115 @@
 from homeassistant import config_entries
-from .const import DOMAIN, CONF_API_KEY
+from homeassistant.helpers.selector import selector
+from homeassistant.exceptions import ServiceValidationError
+from .const import DOMAIN, CONF_API_KEY, CONF_MODE, CONF_IP_ADDRESS, CONF_PORT
 import voluptuous as vol
+import logging
+
+_LOGGER = logging.getLogger(__name__)
+
+
+async def validate_mode(user_input: dict):
+    # check CONF_MODE is not empty
+    _LOGGER.debug(f"Validating mode: {user_input[CONF_MODE]}")
+    if not user_input[CONF_MODE]:
+        raise ServiceValidationError("empty_mode")
+
+
+async def validate_localai(user_input: dict):
+    # check CONF_IP_ADDRESS is not empty
+    _LOGGER.debug(f"Validating IP Address: {user_input[CONF_IP_ADDRESS]}")
+    if not user_input[CONF_IP_ADDRESS]:
+        raise ServiceValidationError("empty_ip_address")
+
+    # check CONF_PORT is not empty
+    _LOGGER.debug(f"Validating Port: {user_input[CONF_PORT]}")
+    if not user_input[CONF_PORT]:
+        raise ServiceValidationError("empty_port")
+
+
+async def validate_openai(user_input: dict):
+    # check CONF_API_KEY is not empty
+    _LOGGER.debug(f"Validating API Key: {user_input[CONF_API_KEY]}")
+    if not user_input[CONF_API_KEY]:
+        raise ServiceValidationError("empty_api_key")
 
 
 class gpt4visionConfigFlow(config_entries.ConfigFlow, domain=DOMAIN):
+
     VERSION = 1
 
     async def async_step_user(self, user_input=None):
         data_schema = vol.Schema({
-            vol.Required(CONF_API_KEY): str
+            vol.Required(CONF_MODE, default="OpenAI"): selector({
+                "select": {
+                    "options": ["OpenAI", "LocalAI"],
+                    "mode": "dropdown",
+                    "sort": True,
+                    "custom_value": False
+                }
+            }),
         })
 
         if user_input is not None:
-            # Save the API key
-            return self.async_create_entry(title="GPT4Vision Configuration", data=user_input)
+            self.init_info = user_input
+            if user_input[CONF_MODE] == "LocalAI":
+                _LOGGER.debug("LocalAI selected")
+                return await self.async_step_localai()
+            else:
+                _LOGGER.debug("OpenAI selected")
+                return await self.async_step_openai()
 
         return self.async_show_form(
             step_id="user",
             data_schema=data_schema,
             description_placeholders=user_input
         )
+
+
+    async def async_step_localai(self, user_input=None):
+        data_schema = vol.Schema({
+           vol.Required(CONF_IP_ADDRESS): str,
+           vol.Required(CONF_PORT, default=8080): int,
+           })
+
+        if user_input is not None:
+            try:
+                await validate_localai(user_input)
+                # add the mode to user_input
+                user_input[CONF_MODE] = self.init_info[CONF_MODE]
+                return self.async_create_entry(title="GPT4Vision LocalAI", data=user_input)
+            except ServiceValidationError as e:
+                return self.async_show_form(
+                    step_id="localai",
+                    data_schema=data_schema,
+                    errors={"base": e}
+                )
+
+        return self.async_show_form(
+            step_id="localai",
+            data_schema=data_schema
+        )
+
+
+    async def async_step_openai(self, user_input=None):
+        data_schema = vol.Schema({
+            vol.Required(CONF_API_KEY): str,
+        })
+
+        if user_input is not None:
+            try:
+                await validate_openai(user_input)
+                # add the mode to user_input
+                user_input[CONF_MODE] = self.init_info[CONF_MODE]
+                return self.async_create_entry(title="GPT4Vision OpenAI", data=user_input)
+            except ServiceValidationError as e:
+                return self.async_show_form(
+                    step_id="openai",
+                    data_schema=data_schema,
+                    errors={"base": e}
+                )
+
+        return self.async_show_form(
+            step_id="openai",
+            data_schema=data_schema
+        )
diff --git a/custom_components/gpt4vision/const.py b/custom_components/gpt4vision/const.py
@@ -2,8 +2,11 @@
 
 DOMAIN = "gpt4vision"
 CONF_API_KEY = 'api_key'
+CONF_IP_ADDRESS = 'localai_ip'
+CONF_PORT = 'localai_port'
 CONF_MAXTOKENS = 'max_tokens'
 CONF_TARGET_WIDTH = 'target_width'
 CONF_MODEL = 'model'
 CONF_MESSAGE = 'message'
-CONF_IMAGE_FILE = 'image_file'
+CONF_IMAGE_FILE = 'image_file'
+CONF_MODE = 'mode'
diff --git a/custom_components/gpt4vision/localai.py b/custom_components/gpt4vision/localai.py
@@ -0,0 +1,46 @@
+"""Send a request to localai API '/v1/chat/completions' endpoint"""
+
+
+import requests
+import json
+import base64
+
+
+def localai_analyzer(image_path, message, model):
+    """Send a request to localai API '/v1/chat/completions' endpoint
+
+    Args:
+        image_path (string): path where image is stored e.g.: "/config/www/tmp/image.jpg"
+        message (string): message to be sent to AI model
+        model (string): GPT model: Default model is gpt-4o
+        max_tokens (int): Maximum number of tokens used by model. Default is 100.
+        target_width (int): Resolution (width only) of the image. Example: 1280 for 720p etc.
+
+    Returns:
+        json: response_text
+    """
+
+    # Open the image file
+    with open(image_path, "rb") as image_file:
+        # Encode the image as base64
+        image_base64 = base64.b64encode(image_file.read()).decode("utf-8")
+
+    data = {"model": model, "messages": [{"role": "user", "content": [{"type": "text", "text": message},
+                                                                      {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64," + image_base64}}]}]}
+
+    # Send a POST request to the localai API
+    response = requests.post(
+        "http://localhost:8080/v1/chat/completions", json=data)
+
+    # Check if the request was successful
+    if response.status_code != 200:
+        raise Exception(
+            f"Request failed with status code {response.status_code}")
+
+    # Parse the response as JSON
+    response_text = json.loads(response.text)
+
+    return response_text
+
+
+print(localai_analyzer("C:/Users/valen/Pictures/Screenshots/test.png", "What is in this image?", "gpt-4-vision-preview"))
diff --git a/custom_components/gpt4vision/manifest.json b/custom_components/gpt4vision/manifest.json
@@ -6,5 +6,5 @@
     "issue_tracker": "https://github.com/valentinfrlch/ha-gpt4vision/issues",
     "documentation": "https://github.com/valentinfrlch/ha-gpt4vision",
     "iot_class": "cloud_polling",
-    "version": "0.2.2"
+    "version": "0.3.0"
 }
diff --git a/custom_components/gpt4vision/strings.json b/custom_components/gpt4vision/strings.json
@@ -2,12 +2,33 @@
     "config": {
         "step": {
             "user": {
-                "title": "Add API key",
-                "description": "Provide your API key. See docs for more information.",
+                "title": "Choose your provider",
+                "description": "Select the provider you want to use for your AI.",
+                "data": {
+                    "api_key": "Your API key"
+                }
+            },
+            "localai": {
+                "title": "Connect to your LocalAI server",
+                "description": "Provide the IP address and port of your LocalAI server.",
+                "data": {
+                    "localai_ip": "IP address",
+                    "localai_port": "Port"
+                }
+            },
+            "openai": {
+                "title": "Add OpenAI API key",
+                "description": "Provide a valid OpenAI API key.",
                 "data": {
                     "api_key": "Your API key"
                 }
             }
+        },
+        "error": {
+            "empty_mode": "Please select a provider.",
+            "empty_ip_address": "IP address cannot be empty",
+            "empty_port": "Port cannot be empty",
+            "empty_api_key": "API key is required."
         }
     }
 }