Skip to content

Commit

Permalink
Merge branch 'infiniflow:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
isthaison authored Dec 17, 2024
2 parents 0696135 + 8ea631a commit 80e7966
Show file tree
Hide file tree
Showing 17 changed files with 80 additions and 91 deletions.
5 changes: 5 additions & 0 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,11 @@
| nightly | ≈9 | :heavy_check_mark: | *Unstable* nightly build |
| nightly-slim | ≈2 || *Unstable* nightly build |

> [!TIP]
> 如果你遇到 Docker 镜像拉不下来的问题,可以在 **docker/.env** 文件内根据变量 `RAGFLOW_IMAGE` 的注释提示选择华为云或者阿里云的相应镜像。
> - 华为云镜像名:`swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow`
> - 阿里云镜像名:`registry.cn-hangzhou.aliyuncs.com/infiniflow/ragflow`
4. 服务器启动成功后再次确认服务器状态:

```bash
Expand Down
20 changes: 17 additions & 3 deletions agent/component/exesql.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import psycopg2
from agent.component.base import ComponentBase, ComponentParamBase
import pyodbc

import logging

class ExeSQLParam(ComponentParamBase):
"""
Expand Down Expand Up @@ -65,13 +65,26 @@ def _run(self, history, **kwargs):
self._loop += 1

ans = self.get_input()


ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else ""
ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
if self._param.db_type == 'mssql':
# improve the information extraction, most llm return results in markdown format ```sql query ```
match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL)
if match:
ans = match.group(1) # Query content
print(ans)
else:
print("no markdown")
ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE)
else:
ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE)
ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE)
ans = re.sub(r';[^;]*$', r';', ans)
if not ans:
raise Exception("SQL statement not found!")

logging.info("db_type: ",self._param.db_type)
if self._param.db_type in ["mysql", "mariadb"]:
db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host,
port=self._param.port, password=self._param.password)
Expand All @@ -96,11 +109,12 @@ def _run(self, history, **kwargs):
if not single_sql:
continue
try:
logging.info("single_sql: ",single_sql)
cursor.execute(single_sql)
if cursor.rowcount == 0:
sql_res.append({"content": "\nTotal: 0\n No record in the database!"})
continue
single_res = pd.DataFrame([i for i in cursor.fetchmany(size=self._param.top_n)])
single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)])
single_res.columns = [i[0] for i in cursor.description]
sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()})
except Exception as e:
Expand Down
2 changes: 0 additions & 2 deletions api/apps/dialog_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,6 @@ def set_dialog():
req["rerank_id"] = ""
similarity_threshold = req.get("similarity_threshold", 0.1)
vector_similarity_weight = req.get("vector_similarity_weight", 0.3)
if vector_similarity_weight is None:
vector_similarity_weight = 0.3
llm_setting = req.get("llm_setting", {})
default_prompt = {
"system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。
Expand Down
3 changes: 1 addition & 2 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,12 +356,11 @@ def run():
try:
for id in req["doc_ids"]:
info = {"run": str(req["run"]), "progress": 0}
if str(req["run"]) == TaskStatus.RUNNING.value:
if str(req["run"]) == TaskStatus.RUNNING.value and req.get("delete", False):
info["progress_msg"] = ""
info["chunk_num"] = 0
info["token_num"] = 0
DocumentService.update_by_id(id, info)
# if str(req["run"]) == TaskStatus.CANCEL.value:
tenant_id = DocumentService.get_tenant_id(id)
if not tenant_id:
return get_data_error_result(message="Tenant not found!")
Expand Down
4 changes: 1 addition & 3 deletions api/apps/llm_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,9 +255,7 @@ def apikey_json(keys):
)
try:
img_url = (
"https://upload.wikimedia.org/wikipedia/comm"
"ons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/256"
"0px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
"https://www.8848seo.cn/zb_users/upload/2022/07/20220705101240_99378.jpg"
)
res = requests.get(img_url)
if res.status_code == 200:
Expand Down
11 changes: 8 additions & 3 deletions api/db/services/task_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,15 +248,17 @@ def new_task():

prev_tasks = TaskService.get_tasks(doc["id"])
if prev_tasks:
ck_num = 0
for task in tsks:
reuse_prev_task_chunks(task, prev_tasks, chunking_config)
ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config)
TaskService.filter_delete([Task.doc_id == doc["id"]])
chunk_ids = []
for task in prev_tasks:
if task["chunk_ids"]:
chunk_ids.extend(task["chunk_ids"].split())
if chunk_ids:
settings.docStoreConn.delete({"id": chunk_ids}, search.index_name(chunking_config["tenant_id"]), chunking_config["kb_id"])
DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num})

bulk_insert_into_db(Task, tsks, True)
DocumentService.begin2parse(doc["id"])
Expand All @@ -267,14 +269,17 @@ def new_task():
SVR_QUEUE_NAME, message=t
), "Can't access Redis. Please check the Redis' status."


def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config: dict):
idx = bisect.bisect_left(prev_tasks, task["from_page"], key=lambda x: x["from_page"])
if idx >= len(prev_tasks):
return
return 0
prev_task = prev_tasks[idx]
if prev_task["progress"] < 1.0 or prev_task["digest"] != task["digest"] or not prev_task["chunk_ids"]:
return
return 0
task["chunk_ids"] = prev_task["chunk_ids"]
task["progress"] = 1.0
task["progress_msg"] = f"Page({task['from_page']}~{task['to_page']}): reused previous task's chunks"
prev_task["chunk_ids"] = ""

return len(task["chunk_ids"].split())
9 changes: 6 additions & 3 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
- **docker-compose-base.yml**
Sets up environment for RAGFlow's dependencies: Elasticsearch/[Infinity](https://github.com/infiniflow/infinity), MySQL, MinIO, and Redis.

> [!CAUTION]
> We do not actively maintain **docker-compose-CN-oc9.yml**, **docker-compose-gpu-CN-oc9.yml**, or **docker-compose-gpu.yml**, so use them at your own risk. However, you are welcome to file a pull request to improve any of them.
## 🐬 Docker environment variables

The [.env](./.env) file contains important environment variables for Docker.
Expand All @@ -27,7 +30,7 @@ The [.env](./.env) file contains important environment variables for Docker.
- `ES_PORT`
The port used to expose the Elasticsearch service to the host machine, allowing **external** access to the service running inside the Docker container. Defaults to `1200`.
- `ELASTIC_PASSWORD`
The password for Elasticsearch.
The password for Elasticsearch.

### Kibana

Expand All @@ -46,7 +49,7 @@ The [.env](./.env) file contains important environment variables for Docker.
### MySQL

- `MYSQL_PASSWORD`
The password for MySQL.
The password for MySQL.
- `MYSQL_PORT`
The port used to expose the MySQL service to the host machine, allowing **external** access to the MySQL database running inside the Docker container. Defaults to `5455`.

Expand All @@ -59,7 +62,7 @@ The [.env](./.env) file contains important environment variables for Docker.
- `MINIO_USER`
The username for MinIO.
- `MINIO_PASSWORD`
The password for MinIO.
The password for MinIO.

### Redis

Expand Down
2 changes: 2 additions & 0 deletions docker/docker-compose-CN-oc9.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# The RAGFlow team do not actively maintain docker-compose-CN-oc9.yml, so use them at your own risk.
# However, you are welcome to file a pull request to improve it.
include:
- ./docker-compose-base.yml

Expand Down
37 changes: 0 additions & 37 deletions docker/docker-compose-admin-tool.yml

This file was deleted.

2 changes: 2 additions & 0 deletions docker/docker-compose-gpu-CN-oc9.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# The RAGFlow team do not actively maintain docker-compose-gpu-CN-oc9.yml, so use them at your own risk.
# However, you are welcome to file a pull request to improve it.
include:
- ./docker-compose-base.yml

Expand Down
2 changes: 2 additions & 0 deletions docker/docker-compose-gpu.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# The RAGFlow team do not actively maintain docker-compose-gpu.yml, so use them at your own risk.
# However, you are welcome to file a pull request to improve it.
include:
- ./docker-compose-base.yml

Expand Down
6 changes: 5 additions & 1 deletion docs/configurations.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ docker compose -f docker/docker-compose.yml up -d
- **docker-compose-base.yml**
Sets up environment for RAGFlow's dependencies: Elasticsearch/[Infinity](https://github.com/infiniflow/infinity), MySQL, MinIO, and Redis.

:::danger IMPORTANT
We do not actively maintain **docker-compose-CN-oc9.yml**, **docker-compose-gpu-CN-oc9.yml**, or **docker-compose-gpu.yml**, so use them at your own risk. However, you are welcome to file a pull request to improve any of them.
:::

## Docker environment variables

The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file contains important environment variables for Docker.
Expand All @@ -45,7 +49,7 @@ The [.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env) file con
- `ES_PORT`
The port used to expose the Elasticsearch service to the host machine, allowing **external** access to the service running inside the Docker container. Defaults to `1200`.
- `ELASTIC_PASSWORD`
The password for Elasticsearch.
The password for Elasticsearch.

### Kibana

Expand Down
8 changes: 6 additions & 2 deletions docs/guides/develop/launch_ragflow_from_source.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,14 @@ cd ragflow/
```

3. Install Python dependencies:

- slim:
```bash
~/.local/bin/poetry install --sync --no-root
```
- full:
```bash
~/.local/bin/poetry install --sync --no-root --with full
```
*A virtual environment named `.venv` is created, and all Python dependencies are installed into the new environment.*

### Launch Third-party Services
Expand Down Expand Up @@ -138,4 +142,4 @@ In your web browser, enter `http://127.0.0.1:<PORT>/`, ensuring the port number
2. Stop the RAGFlow backend service:
```bash
pkill -f "docker/entrypoint.sh"
```
```
16 changes: 8 additions & 8 deletions docs/quickstart.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -196,14 +196,14 @@ This section provides instructions on setting up the RAGFlow server on Linux. If
<APITable>
```

| RAGFlow image tag | Image size (GB) | Has embedding models and Python packages? | Stable? |
| ------------------- | --------------- | ----------------------------------------- | ------------------------ |
| `v0.14.1` | &approx;9 | :heavy_check_mark: | Stable release |
| `v0.14.1-slim` | &approx;2 || Stable release |
| `v0.15.0-dev1` | &approx;9 | :heavy_check_mark: | *Unstable* beta release |
| `v0.15.0-dev1-slim` | &approx;2 || *Unstable* beta release |
| `nightly` | &approx;9 | :heavy_check_mark: | *Unstable* nightly build |
| `nightly-slim` | &approx;2 || *Unstable* nightly build |
| RAGFlow image tag | Image size (GB) | Has embedding models and Python packages? | Stable? |
| ------------------- | --------------- | ----------------------------------------- | ------------------------ |
| `v0.14.1` | &approx;9 | :heavy_check_mark: | Stable release |
| `v0.14.1-slim` | &approx;2 || Stable release |
| `v0.15.0-dev1` | &approx;9 | :heavy_check_mark: | *Unstable* beta release |
| `v0.15.0-dev1-slim` | &approx;2 || *Unstable* beta release |
| `nightly` | &approx;9 | :heavy_check_mark: | *Unstable* nightly build |
| `nightly-slim` | &approx;2 || *Unstable* nightly build |

```mdx-code-block
</APITable>
Expand Down
5 changes: 2 additions & 3 deletions web/src/hooks/chunk-hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,8 @@ export const useFetchNextChunkList = (): ResponseGetType<{
debouncedSearchString,
available,
],

initialData: { data: [], total: 0, documentInfo: {} },
// placeholderData: keepPreviousData,
placeholderData: (previousData) =>
previousData ?? { data: [], total: 0, documentInfo: {} }, // https://github.com/TanStack/query/issues/8183
gcTime: 0,
queryFn: async () => {
const { data } = await kbService.chunk_list({
Expand Down
Loading

0 comments on commit 80e7966

Please sign in to comment.