diff --git a/agent/__init__.py b/agent/__init__.py
index e69de29bb2d..6bfdd33d0ba 100644
--- a/agent/__init__.py
+++ b/agent/__init__.py
@@ -0,0 +1,2 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
diff --git a/api/__init__.py b/api/__init__.py
index e69de29bb2d..6bfdd33d0ba 100644
--- a/api/__init__.py
+++ b/api/__init__.py
@@ -0,0 +1,2 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
diff --git a/deepdoc/__init__.py b/deepdoc/__init__.py
index e69de29bb2d..6bfdd33d0ba 100644
--- a/deepdoc/__init__.py
+++ b/deepdoc/__init__.py
@@ -0,0 +1,2 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
diff --git a/docs/guides/configure_knowledge_base.md b/docs/guides/configure_knowledge_base.md
index 457062b34c5..801163d6690 100644
--- a/docs/guides/configure_knowledge_base.md
+++ b/docs/guides/configure_knowledge_base.md
@@ -58,7 +58,7 @@ You can also change the chunk template for a particular file on the **Datasets**
### Select embedding model
-An embedding model converts chunks into embeddings. It cannot be changed once the knowledge base has chunks. To switch to a different embedding model, You must delete all chunks in the knowledge base. The obvious reason is that we *must* ensure that files in a specific knowledge base are converted to embeddings using the *same* embedding model (ensure that they are compared in the same embedding space).
+An embedding model converts chunks into embeddings. It cannot be changed once the knowledge base has chunks. To switch to a different embedding model, you must delete all existing chunks in the knowledge base. The obvious reason is that we *must* ensure that files in a specific knowledge base are converted to embeddings using the *same* embedding model (ensure that they are compared in the same embedding space).
The following embedding models can be deployed locally:
diff --git a/docs/release_notes.md b/docs/release_notes.md
index b59ebacb391..7af4232413a 100644
--- a/docs/release_notes.md
+++ b/docs/release_notes.md
@@ -13,7 +13,7 @@ Released on November 29, 2024.
### Improvements
-Adds [Infinity's configuration file](https://github.com/infiniflow/ragflow/blob/main/docker/infinity_conf.toml) to facilitate integration and customization of Infinity as a document engine. From this release onwards, updates to Infinity's configuration can be made directly within RAGFlow and will take effect immediately after restarting RAGFlow using `docker compose`. [#3715](https://github.com/infiniflow/ragflow/pull/3715)
+Adds [Infinity's configuration file](https://github.com/infiniflow/ragflow/blob/main/docker/infinity_conf.toml) to facilitate integration and customization of [Infinity](https://github.com/infiniflow/infinity) as a document engine. From this release onwards, updates to Infinity's configuration can be made directly within RAGFlow and will take effect immediately after restarting RAGFlow using `docker compose`. [#3715](https://github.com/infiniflow/ragflow/pull/3715)
### Fixed issues
@@ -137,7 +137,7 @@ See [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) for instructi
## v0.11.0
-Released on September 14, 2024
+Released on September 14, 2024.
### New features
@@ -152,4 +152,100 @@ Released on September 14, 2024
- Supports running retrieval benchmarking on the following datasets:
- [ms_marco_v1.1](https://huggingface.co/datasets/microsoft/ms_marco)
- [trivia_qa](https://huggingface.co/datasets/mandarjoshi/trivia_qa)
- - [miracl](https://huggingface.co/datasets/miracl/miracl)
\ No newline at end of file
+ - [miracl](https://huggingface.co/datasets/miracl/miracl)
+
+## v0.10.0
+
+Released on August 26, 2024.
+
+### New features
+
+- Introduces a text-to-SQL template in the Agent UI.
+- Implements Agent APIs.
+- Incorporates monitoring for the task executor.
+- Introduces Agent tools **GitHub**, **DeepL**, **BaiduFanyi**, **QWeather**, and **GoogleScholar**.
+- Supports chunking of EML files.
+- Supports more LLMs or model services: **GPT-4o-mini**, **PerfXCloud**, **TogetherAI**, **Upstage**, **Novita.AI**, **01.AI**, **SiliconFlow**, **XunFei Spark**, **Baidu Yiyan**, and **Tencent Hunyuan**.
+
+## v0.9.0
+
+Released on August 6, 2024.
+
+### New features
+
+- Supports GraphRAG as a chunk method.
+- Introduces Agent component **Keyword** and search tools, including **Baidu**, **DduckDuckGo**, **PubMed**, **Wikipedia**, **Bing**, and **Google**.
+- Supports speech-to-text recognition for audio files.
+- Supports model vendors **Gemini** and **Groq**.
+- Supports inference frameworks, engines, and services including **LM studio**, **OpenRouter**, **LocalAI**, and **Nvidia API**.
+- Supports using reranker models in Xinference.
+
+## v0.8.0
+
+Released on July 8, 2024.
+
+### New features
+
+- Supports Agentic RAG, enabling graph-based workflow construction for RAG and agents.
+- Supports model vendors **Mistral**, **MiniMax**, **Bedrock**, and **Azure OpenAI**.
+- Supports DOCX files in the MANUAL chunk method.
+- Supports DOCX, MD, and PDF files in the Q&A chunk method.
+
+## v0.7.0
+
+Released on May 31, 2024.
+
+### New features
+
+- Supports the use of reranker models.
+- Integrates reranker and embedding models: [BCE](https://github.com/netease-youdao/BCEmbedding), [BGE](https://github.com/FlagOpen/FlagEmbedding), and [Jina](https://jina.ai/embeddings/).
+- Supports LLMs Baichuan and VolcanoArk.
+- Implements [RAPTOR](https://arxiv.org/html/2401.18059v1) for improved text retrieval.
+- Supports HTML files in the GENERAL chunk method.
+- Provides HTTP and Python APIs for deleting documents by ID.
+- Supports ARM64 platforms.
+
+:::danger IMPORTANT
+While we also test RAGFlow on ARM64 platforms, we do not plan to maintain RAGFlow Docker images for ARM.
+
+If you are on an ARM platform, following [this guide](https://ragflow.io/docs/dev/build_docker_image) to build a RAGFlow Docker image.
+:::
+
+### Related APIs
+
+#### HTTP API
+
+- [Delete documents](https://ragflow.io/docs/dev/http_api_reference#delete-documents)
+
+#### Python API
+
+- [Delete documents](https://ragflow.io/docs/dev/python_api_reference#delete-documents)
+
+## v0.6.0
+
+Released on May 21, 2024.
+
+### New features
+
+- Supports streaming output.
+- Provides HTTP and Python APIs for retrieving document chunks.
+- Supports monitoring of system components, including Elasticsearch, MySQL, Redis, and MinIO.
+- Supports disabling **Layout Recognition** in the GENERAL chunk method to reduce file chunking time.
+
+### Related APIs
+
+#### HTTP API
+
+- [Retrieve chunks](https://ragflow.io/docs/dev/http_api_reference#retrieve-chunks)
+
+#### Python API
+
+- [Retrieve chunks](https://ragflow.io/docs/dev/python_api_reference#retrieve-chunks)
+
+## v0.5.0
+
+Released on May 8, 2024.
+
+### New features
+
+- Supports LLM DeepSeek.
diff --git a/intergrations/chatgpt-on-wechat/plugins/__init__.py b/intergrations/chatgpt-on-wechat/plugins/__init__.py
index c1c3a156841..28032b8b8e8 100644
--- a/intergrations/chatgpt-on-wechat/plugins/__init__.py
+++ b/intergrations/chatgpt-on-wechat/plugins/__init__.py
@@ -1,3 +1,6 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
+
from .ragflow_chat import RAGFlowChat
__all__ = [
diff --git a/rag/__init__.py b/rag/__init__.py
index e69de29bb2d..6bfdd33d0ba 100644
--- a/rag/__init__.py
+++ b/rag/__init__.py
@@ -0,0 +1,2 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py
index 227c0bc6a4b..ba1ae2ff38b 100644
--- a/rag/utils/infinity_conn.py
+++ b/rag/utils/infinity_conn.py
@@ -350,8 +350,9 @@ def insert(
assert "_id" not in d
assert "id" in d
for k, v in d.items():
- if k.endswith("_kwd") and isinstance(v, list):
- d[k] = " ".join(v)
+ if k in ["important_kwd", "question_kwd", "entities_kwd"]:
+ assert isinstance(v, list)
+ d[k] = "###".join(v)
elif k == 'kb_id':
if isinstance(d[k], list):
d[k] = d[k][0] # since d[k] is a list, but we need a str
@@ -443,9 +444,9 @@ def getFields(self, res, fields: list[str]) -> list[str, dict]:
v = res[fieldnm][i]
if isinstance(v, Series):
v = list(v)
- elif fieldnm.endswith("_kwd"):
+ elif fieldnm in ["important_kwd", "question_kwd", "entities_kwd"]:
assert isinstance(v, str)
- v = v.split()
+ v = [kwd for kwd in v.split("###") if kwd]
elif fieldnm == "position_int":
assert isinstance(v, str)
if v:
diff --git a/sdk/python/ragflow_sdk/__init__.py b/sdk/python/ragflow_sdk/__init__.py
index f8df6aaf056..b7cf31e1235 100644
--- a/sdk/python/ragflow_sdk/__init__.py
+++ b/sdk/python/ragflow_sdk/__init__.py
@@ -1,3 +1,6 @@
+from beartype.claw import beartype_this_package
+beartype_this_package()
+
import importlib.metadata
from .ragflow import RAGFlow
diff --git a/web/reducer.js b/web/reducer.js
deleted file mode 100644
index 5a8d5cc6fb9..00000000000
--- a/web/reducer.js
+++ /dev/null
@@ -1,26 +0,0 @@
-import React, { useReducer } from 'react';
-const CHANGE_LOCALE = 'CHANGE_LOCALE';
-
-const mainContext = React.createContext();
-
-const reducer = (state, action) => {
- switch (action.type) {
- case CHANGE_LOCALE:
- return { ...state, locale: action.locale || 'zh' };
- default:
- return state;
- }
-};
-
-const ContextProvider = (props) => {
- const [state, dispatch] = useReducer(reducer, {
- locale: 'zh',
- });
- return (
-
Supported file formats are DOCX, EXCEL, PPT, IMAGE, PDF, TXT, MD, JSON, EML -
This approach chunks files using the 'naive'/'General' method. It splits a document into segements and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.
+This approach chunks files using the 'naive'/'General' method. It splits a document into segments and then combines adjacent segments until the token count exceeds the threshold specified by 'Chunk token number', at which point a chunk is created.
The chunks are then fed to the LLM to extract entities and relationships for a knowledge graph and a mind map.
Ensure that you set the Entity types.
`, useRaptor: 'Use RAPTOR to enhance retrieval', useRaptorTip: - 'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information', + 'Recursive Abstractive Processing for Tree-Organized Retrieval, see https://huggingface.co/papers/2401.18059 for more information.', prompt: 'Prompt', promptTip: 'LLM prompt used for summarization.', promptMessage: 'Prompt is required', @@ -305,7 +305,7 @@ The above is the content you need to summarize.`, entityTypes: 'Entity types', vietnamese: 'Vietamese', pageRank: 'Page rank', - pageRankTip: `This is used to boost the relevance score. The relevance score with all the retrieved chunks will plus this number, When you want to search the given knowledge base at first place, set a higher pagerank score than others.`, + pageRankTip: `This increases the relevance score of the knowledge base. Its value will be added to the relevance score of all retrieved chunks from this knowledge base. Useful when you are searching within multiple knowledge bases and wanting to assign a higher pagerank score to a specific one.`, }, chunk: { chunk: 'Chunk', diff --git a/web/src/pages/chat/markdown-content/index.tsx b/web/src/pages/chat/markdown-content/index.tsx index 5e0d30c2097..d4acf95e678 100644 --- a/web/src/pages/chat/markdown-content/index.tsx +++ b/web/src/pages/chat/markdown-content/index.tsx @@ -20,9 +20,10 @@ import { useTranslation } from 'react-i18next'; import 'katex/dist/katex.min.css'; // `rehype-katex` does not import the CSS for you +import { replaceTextByOldReg } from '../utils'; import styles from './index.less'; -const reg = /(#{2}\d+\${2})/g; +const reg = /(#{2}\d+@{2})/g; const curReg = /(~{2}\d+\${2})/g; const getChunkIndex = (match: string) => Number(match.slice(2, -2)); @@ -156,7 +157,9 @@ const MarkdownContent = ({ const renderReference = useCallback( (text: string) => { - let replacedText = reactStringReplace(text, reg, (match, i) => { + const nextText = replaceTextByOldReg(text); + + let replacedText = reactStringReplace(nextText, reg, (match, i) => { const chunkIndex = getChunkIndex(match); return (