Merge branch 'main' into feat/new-login

* main: (77 commits) feat: add voyage ai as a new model provider (#8747) docs: add english versions for the files customizable_model_scale_out and predefined_model_scale_out (#8871) fix: #8843 event: tts_message_end always return in api streaming resp… (#8846) Add Jamba and Llama3.2 model support (#8878) fix(workflow): update tagging logic in GitHub Actions (#8882) chore: bump ruff to 0.6.8 for fixing violation in SIM910 (#8869) refactor: update Callback to an abstract class (#8868) feat: deprecate gte-Qwen2-7B-instruct embedding model (#8866) feat: add internlm2.5-20b and qwen2.5-coder-7b model (#8862) fix: customize model credentials were invalid despite the provider credentials being active (#8864) fix: update qwen2.5-coder-7b model name (#8861) fix(workflow/nodes/knowledge-retrieval/use-config): Preserve rerankin… (#8842) chore: fix wrong VectorType match case (#8857) feat: add min-connection and max-connection for pgvector (#8841) feat(Tools): add feishu tools (#8800) fix: delete harm catalog settings for gemini (#8829) Add Llama3.2 models in Groq provider (#8831) feat: deprecate mistral model for siliconflow (#8828) fix: AnalyticdbVector retrieval scores (#8803) fix: close log status option raise error (#8826) ...
langgenius · Sep 29, 2024 · cd88f27 · cd88f27
2 parents d481a1b + fb49413
commit cd88f27
Show file tree

Hide file tree

Showing 381 changed files with 8,348 additions and 1,057 deletions.
diff --git a/.github/workflows/build-push.yml b/.github/workflows/build-push.yml
@@ -125,7 +125,7 @@ jobs:
         with:
           images: ${{ env[matrix.image_name_env] }}
           tags: |
-            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
+            type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
             type=ref,event=branch
             type=sha,enable=true,priority=100,prefix=,suffix=,format=long
             type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}

diff --git a/.github/workflows/web-tests.yml b/.github/workflows/web-tests.yml
@@ -0,0 +1,46 @@
+name: Web Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - web/**
+
+concurrency:
+  group: web-tests-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  test:
+    name: Web Tests
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: ./web
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Check changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v45
+        with:
+          files: web/**
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        if: steps.changed-files.outputs.any_changed == 'true'
+        with:
+          node-version: 20
+          cache: yarn
+          cache-dependency-path: ./web/package.json
+
+      - name: Install dependencies
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn install --frozen-lockfile
+
+      - name: Run tests
+        if: steps.changed-files.outputs.any_changed == 'true'
+        run: yarn test
diff --git a/api/.env.example b/api/.env.example
@@ -162,6 +162,8 @@ PGVECTOR_PORT=5433
 PGVECTOR_USER=postgres
 PGVECTOR_PASSWORD=postgres
 PGVECTOR_DATABASE=postgres
+PGVECTOR_MIN_CONNECTION=1
+PGVECTOR_MAX_CONNECTION=5
 
 # Tidb Vector configuration
 TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com

diff --git a/api/app.py b/api/app.py
@@ -53,11 +53,9 @@
 
 warnings.simplefilter("ignore", ResourceWarning)
 
-# fix windows platform
-if os.name == "nt":
-    os.system('tzutil /s "UTC"')
-else:
-    os.environ["TZ"] = "UTC"
+os.environ["TZ"] = "UTC"
+# windows platform not support tzset
+if hasattr(time, "tzset"):
     time.tzset()
 
 

diff --git a/api/commands.py b/api/commands.py
@@ -652,7 +652,7 @@ def fix_app_site_missing():
                         app_was_created.send(app, account=account)
                 except Exception as e:
                     failed_app_ids.append(app_id)
-                    click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
+                    click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
                     logging.exception(f"Fix app related site missing issue failed, error: {e}")
                     continue
 

diff --git a/api/configs/middleware/vdb/pgvector_config.py b/api/configs/middleware/vdb/pgvector_config.py
@@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings):
         description="Name of the PostgreSQL database to connect to",
         default=None,
     )
+
+    PGVECTOR_MIN_CONNECTION: PositiveInt = Field(
+        description="Min connection of the PostgreSQL database",
+        default=1,
+    )
+
+    PGVECTOR_MAX_CONNECTION: PositiveInt = Field(
+        description="Max connection of the PostgreSQL database",
+        default=5,
+    )
diff --git a/api/controllers/console/datasets/datasets.py b/api/controllers/console/datasets/datasets.py
@@ -563,10 +563,10 @@ def get(self):
             case (
                 VectorType.MILVUS
                 | VectorType.RELYT
-                | VectorType.PGVECTOR
                 | VectorType.TIDB_VECTOR
                 | VectorType.CHROMA
                 | VectorType.TENCENT
+                | VectorType.PGVECTO_RS
             ):
                 return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
             case (
@@ -577,6 +577,7 @@ def get(self):
                 | VectorType.MYSCALE
                 | VectorType.ORACLE
                 | VectorType.ELASTICSEARCH
+                | VectorType.PGVECTOR
             ):
                 return {
                     "retrieval_method": [

diff --git a/api/core/app/apps/advanced_chat/generate_task_pipeline.py b/api/core/app/apps/advanced_chat/generate_task_pipeline.py
@@ -231,7 +231,8 @@ def _wrapper_process_stream_response(
             except Exception as e:
                 logger.error(e)
                 break
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if tts_publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
 
     def _process_stream_response(
         self,

diff --git a/api/core/app/apps/base_app_generate_response_converter.py b/api/core/app/apps/base_app_generate_response_converter.py
@@ -75,10 +75,10 @@ def _get_simple_metadata(cls, metadata: dict[str, Any]):
         :return:
         """
         # show_retrieve_source
+        updated_resources = []
         if "retriever_resources" in metadata:
-            metadata["retriever_resources"] = []
             for resource in metadata["retriever_resources"]:
-                metadata["retriever_resources"].append(
+                updated_resources.append(
                     {
                         "segment_id": resource["segment_id"],
                         "position": resource["position"],
@@ -87,6 +87,7 @@ def _get_simple_metadata(cls, metadata: dict[str, Any]):
                         "content": resource["content"],
                     }
                 )
+            metadata["retriever_resources"] = updated_resources
 
         # show annotation reply
         if "annotation_reply" in metadata:

diff --git a/api/core/app/apps/base_app_runner.py b/api/core/app/apps/base_app_runner.py
@@ -309,7 +309,7 @@ def _handle_invoke_result_stream(
             if not prompt_messages:
                 prompt_messages = result.prompt_messages
 
-            if not usage and result.delta.usage:
+            if result.delta.usage:
                 usage = result.delta.usage
 
         if not usage:

diff --git a/api/core/app/apps/workflow/generate_task_pipeline.py b/api/core/app/apps/workflow/generate_task_pipeline.py
@@ -212,7 +212,8 @@ def _wrapper_process_stream_response(
             except Exception as e:
                 logger.error(e)
                 break
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if tts_publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
 
     def _process_stream_response(
         self,

diff --git a/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py b/api/core/app/task_pipeline/easy_ui_based_generate_task_pipeline.py
@@ -248,7 +248,8 @@ def _wrapper_process_stream_response(
             else:
                 start_listener_time = time.time()
                 yield MessageAudioStreamResponse(audio=audio.audio, task_id=task_id)
-        yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
+        if publisher:
+            yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
 
     def _process_stream_response(
         self, publisher: AppGeneratorTTSPublisher, trace_manager: Optional[TraceQueueManager] = None

diff --git a/api/core/embedding/cached_embedding.py b/api/core/embedding/cached_embedding.py
@@ -5,6 +5,7 @@
 import numpy as np
 from sqlalchemy.exc import IntegrityError
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.model_manager import ModelInstance
 from core.model_runtime.entities.model_entities import ModelPropertyKey
 from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
@@ -56,7 +57,9 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
                 for i in range(0, len(embedding_queue_texts), max_chunks):
                     batch_texts = embedding_queue_texts[i : i + max_chunks]
 
-                    embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
+                    embedding_result = self._model_instance.invoke_text_embedding(
+                        texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
+                    )
 
                     for vector in embedding_result.embeddings:
                         try:
@@ -100,7 +103,9 @@ def embed_query(self, text: str) -> list[float]:
             redis_client.expire(embedding_cache_key, 600)
             return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
         try:
-            embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
+            embedding_result = self._model_instance.invoke_text_embedding(
+                texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
+            )
 
             embedding_results = embedding_result.embeddings[0]
             embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()

diff --git a/api/core/embedding/embedding_constant.py b/api/core/embedding/embedding_constant.py
@@ -0,0 +1,10 @@
+from enum import Enum
+
+
+class EmbeddingInputType(Enum):
+    """
+    Enum for embedding input type.
+    """
+
+    DOCUMENT = "document"
+    QUERY = "query"
diff --git a/api/core/entities/provider_configuration.py b/api/core/entities/provider_configuration.py
@@ -119,7 +119,7 @@ def get_current_credentials(self, model_type: ModelType, model: str) -> Optional
                         credentials = model_configuration.credentials
                         break
 
-            if self.custom_configuration.provider:
+            if not credentials and self.custom_configuration.provider:
                 credentials = self.custom_configuration.provider.credentials
 
             return credentials

diff --git a/api/core/llm_generator/prompts.py b/api/core/llm_generator/prompts.py
@@ -65,7 +65,6 @@
     "Please help me predict the three most likely questions that human would ask, "
     "and keeping each question under 20 characters.\n"
     "MAKE SURE your output is the SAME language as the Assistant's latest response"
-    "(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
     "The output must be an array in JSON format following the specified schema:\n"
     '["question1","question2","question3"]\n'
 )

diff --git a/api/core/model_manager.py b/api/core/model_manager.py
@@ -3,6 +3,7 @@
 from collections.abc import Callable, Generator, Sequence
 from typing import IO, Optional, Union, cast
 
+from core.embedding.embedding_constant import EmbeddingInputType
 from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
 from core.entities.provider_entities import ModelLoadBalancingConfiguration
 from core.errors.error import ProviderTokenNotInitError
@@ -158,12 +159,15 @@ def get_llm_num_tokens(
             tools=tools,
         )
 
-    def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
+    def invoke_text_embedding(
+        self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
+    ) -> TextEmbeddingResult:
         """
         Invoke large language model
 
         :param texts: texts to embed
         :param user: unique user id
+        :param input_type: input type
         :return: embeddings result
         """
         if not isinstance(self.model_type_instance, TextEmbeddingModel):
@@ -176,6 +180,7 @@ def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) ->
             credentials=self.credentials,
             texts=texts,
             user=user,
+            input_type=input_type,
         )
 
     def get_text_embedding_num_tokens(self, texts: list[str]) -> int:

diff --git a/api/core/model_runtime/callbacks/base_callback.py b/api/core/model_runtime/callbacks/base_callback.py
@@ -1,3 +1,4 @@
+from abc import ABC, abstractmethod
 from typing import Optional
 
 from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
@@ -13,14 +14,15 @@
 }
 
 
-class Callback:
+class Callback(ABC):
     """
     Base class for callbacks.
     Only for LLM.
     """
 
     raise_error: bool = False
 
+    @abstractmethod
     def on_before_invoke(
         self,
         llm_instance: AIModel,
@@ -48,6 +50,7 @@ def on_before_invoke(
         """
         raise NotImplementedError()
 
+    @abstractmethod
     def on_new_chunk(
         self,
         llm_instance: AIModel,
@@ -77,6 +80,7 @@ def on_new_chunk(
         """
         raise NotImplementedError()
 
+    @abstractmethod
     def on_after_invoke(
         self,
         llm_instance: AIModel,
@@ -106,6 +110,7 @@ def on_after_invoke(
         """
         raise NotImplementedError()
 
+    @abstractmethod
     def on_invoke_error(
         self,
         llm_instance: AIModel,