Skip to content

Commit

Permalink
Merge branch 'main' into feat/new-login
Browse files Browse the repository at this point in the history
* main: (77 commits)
  feat: add voyage ai as a new model provider (#8747)
  docs: add english versions for the files customizable_model_scale_out and predefined_model_scale_out (#8871)
  fix: #8843 event: tts_message_end always return in api streaming resp… (#8846)
  Add Jamba and Llama3.2 model support (#8878)
  fix(workflow): update tagging logic in GitHub Actions (#8882)
  chore: bump ruff to 0.6.8 for fixing violation in SIM910 (#8869)
  refactor: update Callback to an abstract class (#8868)
  feat: deprecate gte-Qwen2-7B-instruct embedding model (#8866)
  feat: add internlm2.5-20b and qwen2.5-coder-7b model (#8862)
  fix: customize model credentials were invalid despite the provider credentials being active (#8864)
  fix: update qwen2.5-coder-7b model name (#8861)
  fix(workflow/nodes/knowledge-retrieval/use-config): Preserve rerankin… (#8842)
  chore: fix wrong VectorType match case (#8857)
  feat: add min-connection and max-connection for pgvector (#8841)
  feat(Tools): add feishu tools (#8800)
  fix: delete harm catalog settings for gemini (#8829)
  Add Llama3.2 models in Groq provider (#8831)
  feat: deprecate mistral model for siliconflow (#8828)
  fix: AnalyticdbVector retrieval scores (#8803)
  fix: close log status option raise error (#8826)
  ...
  • Loading branch information
ZhouhaoJiang committed Sep 29, 2024
2 parents d481a1b + fb49413 commit cd88f27
Show file tree
Hide file tree
Showing 381 changed files with 8,348 additions and 1,057 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ jobs:
with:
images: ${{ env[matrix.image_name_env] }}
tags: |
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') }}
type=raw,value=latest,enable=${{ startsWith(github.ref, 'refs/tags/') && !contains(github.ref, '-') }}
type=ref,event=branch
type=sha,enable=true,priority=100,prefix=,suffix=,format=long
type=raw,value=${{ github.ref_name }},enable=${{ startsWith(github.ref, 'refs/tags/') }}
Expand Down
46 changes: 46 additions & 0 deletions .github/workflows/web-tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Web Tests

on:
pull_request:
branches:
- main
paths:
- web/**

concurrency:
group: web-tests-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
test:
name: Web Tests
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./web

steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Check changed files
id: changed-files
uses: tj-actions/changed-files@v45
with:
files: web/**

- name: Setup Node.js
uses: actions/setup-node@v4
if: steps.changed-files.outputs.any_changed == 'true'
with:
node-version: 20
cache: yarn
cache-dependency-path: ./web/package.json

- name: Install dependencies
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn install --frozen-lockfile

- name: Run tests
if: steps.changed-files.outputs.any_changed == 'true'
run: yarn test
2 changes: 2 additions & 0 deletions api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,8 @@ PGVECTOR_PORT=5433
PGVECTOR_USER=postgres
PGVECTOR_PASSWORD=postgres
PGVECTOR_DATABASE=postgres
PGVECTOR_MIN_CONNECTION=1
PGVECTOR_MAX_CONNECTION=5

# Tidb Vector configuration
TIDB_VECTOR_HOST=xxx.eu-central-1.xxx.aws.tidbcloud.com
Expand Down
8 changes: 3 additions & 5 deletions api/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,9 @@

warnings.simplefilter("ignore", ResourceWarning)

# fix windows platform
if os.name == "nt":
os.system('tzutil /s "UTC"')
else:
os.environ["TZ"] = "UTC"
os.environ["TZ"] = "UTC"
# windows platform not support tzset
if hasattr(time, "tzset"):
time.tzset()


Expand Down
2 changes: 1 addition & 1 deletion api/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,7 @@ def fix_app_site_missing():
app_was_created.send(app, account=account)
except Exception as e:
failed_app_ids.append(app_id)
click.echo(click.style("FFailed to fix missing site for app {}".format(app_id), fg="red"))
click.echo(click.style("Failed to fix missing site for app {}".format(app_id), fg="red"))
logging.exception(f"Fix app related site missing issue failed, error: {e}")
continue

Expand Down
10 changes: 10 additions & 0 deletions api/configs/middleware/vdb/pgvector_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,13 @@ class PGVectorConfig(BaseSettings):
description="Name of the PostgreSQL database to connect to",
default=None,
)

PGVECTOR_MIN_CONNECTION: PositiveInt = Field(
description="Min connection of the PostgreSQL database",
default=1,
)

PGVECTOR_MAX_CONNECTION: PositiveInt = Field(
description="Max connection of the PostgreSQL database",
default=5,
)
3 changes: 2 additions & 1 deletion api/controllers/console/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,10 +563,10 @@ def get(self):
case (
VectorType.MILVUS
| VectorType.RELYT
| VectorType.PGVECTOR
| VectorType.TIDB_VECTOR
| VectorType.CHROMA
| VectorType.TENCENT
| VectorType.PGVECTO_RS
):
return {"retrieval_method": [RetrievalMethod.SEMANTIC_SEARCH.value]}
case (
Expand All @@ -577,6 +577,7 @@ def get(self):
| VectorType.MYSCALE
| VectorType.ORACLE
| VectorType.ELASTICSEARCH
| VectorType.PGVECTOR
):
return {
"retrieval_method": [
Expand Down
3 changes: 2 additions & 1 deletion api/core/app/apps/advanced_chat/generate_task_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ def _wrapper_process_stream_response(
except Exception as e:
logger.error(e)
break
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if tts_publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

def _process_stream_response(
self,
Expand Down
5 changes: 3 additions & 2 deletions api/core/app/apps/base_app_generate_response_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,10 @@ def _get_simple_metadata(cls, metadata: dict[str, Any]):
:return:
"""
# show_retrieve_source
updated_resources = []
if "retriever_resources" in metadata:
metadata["retriever_resources"] = []
for resource in metadata["retriever_resources"]:
metadata["retriever_resources"].append(
updated_resources.append(
{
"segment_id": resource["segment_id"],
"position": resource["position"],
Expand All @@ -87,6 +87,7 @@ def _get_simple_metadata(cls, metadata: dict[str, Any]):
"content": resource["content"],
}
)
metadata["retriever_resources"] = updated_resources

# show annotation reply
if "annotation_reply" in metadata:
Expand Down
2 changes: 1 addition & 1 deletion api/core/app/apps/base_app_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ def _handle_invoke_result_stream(
if not prompt_messages:
prompt_messages = result.prompt_messages

if not usage and result.delta.usage:
if result.delta.usage:
usage = result.delta.usage

if not usage:
Expand Down
3 changes: 2 additions & 1 deletion api/core/app/apps/workflow/generate_task_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,8 @@ def _wrapper_process_stream_response(
except Exception as e:
logger.error(e)
break
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if tts_publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

def _process_stream_response(
self,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,8 @@ def _wrapper_process_stream_response(
else:
start_listener_time = time.time()
yield MessageAudioStreamResponse(audio=audio.audio, task_id=task_id)
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)
if publisher:
yield MessageAudioEndStreamResponse(audio="", task_id=task_id)

def _process_stream_response(
self, publisher: AppGeneratorTTSPublisher, trace_manager: Optional[TraceQueueManager] = None
Expand Down
9 changes: 7 additions & 2 deletions api/core/embedding/cached_embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import numpy as np
from sqlalchemy.exc import IntegrityError

from core.embedding.embedding_constant import EmbeddingInputType
from core.model_manager import ModelInstance
from core.model_runtime.entities.model_entities import ModelPropertyKey
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
Expand Down Expand Up @@ -56,7 +57,9 @@ def embed_documents(self, texts: list[str]) -> list[list[float]]:
for i in range(0, len(embedding_queue_texts), max_chunks):
batch_texts = embedding_queue_texts[i : i + max_chunks]

embedding_result = self._model_instance.invoke_text_embedding(texts=batch_texts, user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=batch_texts, user=self._user, input_type=EmbeddingInputType.DOCUMENT
)

for vector in embedding_result.embeddings:
try:
Expand Down Expand Up @@ -100,7 +103,9 @@ def embed_query(self, text: str) -> list[float]:
redis_client.expire(embedding_cache_key, 600)
return list(np.frombuffer(base64.b64decode(embedding), dtype="float"))
try:
embedding_result = self._model_instance.invoke_text_embedding(texts=[text], user=self._user)
embedding_result = self._model_instance.invoke_text_embedding(
texts=[text], user=self._user, input_type=EmbeddingInputType.QUERY
)

embedding_results = embedding_result.embeddings[0]
embedding_results = (embedding_results / np.linalg.norm(embedding_results)).tolist()
Expand Down
10 changes: 10 additions & 0 deletions api/core/embedding/embedding_constant.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from enum import Enum


class EmbeddingInputType(Enum):
"""
Enum for embedding input type.
"""

DOCUMENT = "document"
QUERY = "query"
2 changes: 1 addition & 1 deletion api/core/entities/provider_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def get_current_credentials(self, model_type: ModelType, model: str) -> Optional
credentials = model_configuration.credentials
break

if self.custom_configuration.provider:
if not credentials and self.custom_configuration.provider:
credentials = self.custom_configuration.provider.credentials

return credentials
Expand Down
1 change: 0 additions & 1 deletion api/core/llm_generator/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@
"Please help me predict the three most likely questions that human would ask, "
"and keeping each question under 20 characters.\n"
"MAKE SURE your output is the SAME language as the Assistant's latest response"
"(if the main response is written in Chinese, then the language of your output must be using Chinese.)!\n"
"The output must be an array in JSON format following the specified schema:\n"
'["question1","question2","question3"]\n'
)
Expand Down
7 changes: 6 additions & 1 deletion api/core/model_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections.abc import Callable, Generator, Sequence
from typing import IO, Optional, Union, cast

from core.embedding.embedding_constant import EmbeddingInputType
from core.entities.provider_configuration import ProviderConfiguration, ProviderModelBundle
from core.entities.provider_entities import ModelLoadBalancingConfiguration
from core.errors.error import ProviderTokenNotInitError
Expand Down Expand Up @@ -158,12 +159,15 @@ def get_llm_num_tokens(
tools=tools,
)

def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) -> TextEmbeddingResult:
def invoke_text_embedding(
self, texts: list[str], user: Optional[str] = None, input_type: EmbeddingInputType = EmbeddingInputType.DOCUMENT
) -> TextEmbeddingResult:
"""
Invoke large language model
:param texts: texts to embed
:param user: unique user id
:param input_type: input type
:return: embeddings result
"""
if not isinstance(self.model_type_instance, TextEmbeddingModel):
Expand All @@ -176,6 +180,7 @@ def invoke_text_embedding(self, texts: list[str], user: Optional[str] = None) ->
credentials=self.credentials,
texts=texts,
user=user,
input_type=input_type,
)

def get_text_embedding_num_tokens(self, texts: list[str]) -> int:
Expand Down
7 changes: 6 additions & 1 deletion api/core/model_runtime/callbacks/base_callback.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from abc import ABC, abstractmethod
from typing import Optional

from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk
Expand All @@ -13,14 +14,15 @@
}


class Callback:
class Callback(ABC):
"""
Base class for callbacks.
Only for LLM.
"""

raise_error: bool = False

@abstractmethod
def on_before_invoke(
self,
llm_instance: AIModel,
Expand Down Expand Up @@ -48,6 +50,7 @@ def on_before_invoke(
"""
raise NotImplementedError()

@abstractmethod
def on_new_chunk(
self,
llm_instance: AIModel,
Expand Down Expand Up @@ -77,6 +80,7 @@ def on_new_chunk(
"""
raise NotImplementedError()

@abstractmethod
def on_after_invoke(
self,
llm_instance: AIModel,
Expand Down Expand Up @@ -106,6 +110,7 @@ def on_after_invoke(
"""
raise NotImplementedError()

@abstractmethod
def on_invoke_error(
self,
llm_instance: AIModel,
Expand Down
Loading

0 comments on commit cd88f27

Please sign in to comment.