diff --git a/agent/component/exesql.py b/agent/component/exesql.py index d3b5e914cd1..1b48b6b6006 100644 --- a/agent/component/exesql.py +++ b/agent/component/exesql.py @@ -20,7 +20,7 @@ import psycopg2 from agent.component.base import ComponentBase, ComponentParamBase import pyodbc - +import logging class ExeSQLParam(ComponentParamBase): """ @@ -65,13 +65,26 @@ def _run(self, history, **kwargs): self._loop += 1 ans = self.get_input() + + ans = "".join([str(a) for a in ans["content"]]) if "content" in ans else "" - ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE) + if self._param.db_type == 'mssql': + # improve the information extraction, most llm return results in markdown format ```sql query ``` + match = re.search(r"```sql\s*(.*?)\s*```", ans, re.DOTALL) + if match: + ans = match.group(1) # Query content + print(ans) + else: + print("no markdown") + ans = re.sub(r'^.*?SELECT ', 'SELECT ', (ans), flags=re.IGNORECASE) + else: + ans = re.sub(r'^.*?SELECT ', 'SELECT ', repr(ans), flags=re.IGNORECASE) ans = re.sub(r';.*?SELECT ', '; SELECT ', ans, flags=re.IGNORECASE) ans = re.sub(r';[^;]*$', r';', ans) if not ans: raise Exception("SQL statement not found!") + logging.info("db_type: ",self._param.db_type) if self._param.db_type in ["mysql", "mariadb"]: db = pymysql.connect(db=self._param.database, user=self._param.username, host=self._param.host, port=self._param.port, password=self._param.password) @@ -96,11 +109,12 @@ def _run(self, history, **kwargs): if not single_sql: continue try: + logging.info("single_sql: ",single_sql) cursor.execute(single_sql) if cursor.rowcount == 0: sql_res.append({"content": "\nTotal: 0\n No record in the database!"}) continue - single_res = pd.DataFrame([i for i in cursor.fetchmany(size=self._param.top_n)]) + single_res = pd.DataFrame([i for i in cursor.fetchmany(self._param.top_n)]) single_res.columns = [i[0] for i in cursor.description] sql_res.append({"content": "\nTotal: " + str(cursor.rowcount) + "\n" + single_res.to_markdown()}) except Exception as e: diff --git a/api/apps/dialog_app.py b/api/apps/dialog_app.py index 4b1be30a64a..0599c4da271 100644 --- a/api/apps/dialog_app.py +++ b/api/apps/dialog_app.py @@ -41,8 +41,6 @@ def set_dialog(): req["rerank_id"] = "" similarity_threshold = req.get("similarity_threshold", 0.1) vector_similarity_weight = req.get("vector_similarity_weight", 0.3) - if vector_similarity_weight is None: - vector_similarity_weight = 0.3 llm_setting = req.get("llm_setting", {}) default_prompt = { "system": """你是一个智能助手,请总结知识库的内容来回答问题,请列举知识库中的数据详细回答。当所有知识库内容都与问题无关时,你的回答必须包括“知识库中未找到您要的答案!”这句话。回答需要考虑聊天历史。 diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 4b1c3e69995..afae536520f 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -356,12 +356,11 @@ def run(): try: for id in req["doc_ids"]: info = {"run": str(req["run"]), "progress": 0} - if str(req["run"]) == TaskStatus.RUNNING.value: + if str(req["run"]) == TaskStatus.RUNNING.value and req.get("delete", False): info["progress_msg"] = "" info["chunk_num"] = 0 info["token_num"] = 0 DocumentService.update_by_id(id, info) - # if str(req["run"]) == TaskStatus.CANCEL.value: tenant_id = DocumentService.get_tenant_id(id) if not tenant_id: return get_data_error_result(message="Tenant not found!") diff --git a/api/db/services/task_service.py b/api/db/services/task_service.py index f73afa0bbde..a39a4e84f86 100644 --- a/api/db/services/task_service.py +++ b/api/db/services/task_service.py @@ -248,8 +248,9 @@ def new_task(): prev_tasks = TaskService.get_tasks(doc["id"]) if prev_tasks: + ck_num = 0 for task in tsks: - reuse_prev_task_chunks(task, prev_tasks, chunking_config) + ck_num += reuse_prev_task_chunks(task, prev_tasks, chunking_config) TaskService.filter_delete([Task.doc_id == doc["id"]]) chunk_ids = [] for task in prev_tasks: @@ -257,6 +258,7 @@ def new_task(): chunk_ids.extend(task["chunk_ids"].split()) if chunk_ids: settings.docStoreConn.delete({"id": chunk_ids}, search.index_name(chunking_config["tenant_id"]), chunking_config["kb_id"]) + DocumentService.update_by_id(doc["id"], {"chunk_num": ck_num}) bulk_insert_into_db(Task, tsks, True) DocumentService.begin2parse(doc["id"]) @@ -267,14 +269,17 @@ def new_task(): SVR_QUEUE_NAME, message=t ), "Can't access Redis. Please check the Redis' status." + def reuse_prev_task_chunks(task: dict, prev_tasks: list[dict], chunking_config: dict): idx = bisect.bisect_left(prev_tasks, task["from_page"], key=lambda x: x["from_page"]) if idx >= len(prev_tasks): - return + return 0 prev_task = prev_tasks[idx] if prev_task["progress"] < 1.0 or prev_task["digest"] != task["digest"] or not prev_task["chunk_ids"]: - return + return 0 task["chunk_ids"] = prev_task["chunk_ids"] task["progress"] = 1.0 task["progress_msg"] = f"Page({task['from_page']}~{task['to_page']}): reused previous task's chunks" prev_task["chunk_ids"] = "" + + return len(task["chunk_ids"].split()) \ No newline at end of file diff --git a/web/src/hooks/chunk-hooks.ts b/web/src/hooks/chunk-hooks.ts index 374f1c98335..052f2d17c62 100644 --- a/web/src/hooks/chunk-hooks.ts +++ b/web/src/hooks/chunk-hooks.ts @@ -45,9 +45,8 @@ export const useFetchNextChunkList = (): ResponseGetType<{ debouncedSearchString, available, ], - - initialData: { data: [], total: 0, documentInfo: {} }, - // placeholderData: keepPreviousData, + placeholderData: (previousData) => + previousData ?? { data: [], total: 0, documentInfo: {} }, // https://github.com/TanStack/query/issues/8183 gcTime: 0, queryFn: async () => { const { data } = await kbService.chunk_list({ diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx index 54b204602d9..6bce02396d5 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/components/document-preview/preview.tsx @@ -32,7 +32,6 @@ const HighlightPopup = ({ // TODO: merge with DocumentPreviewer const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { const url = useGetDocumentUrl(); - useCatchDocumentError(url); const ref = useRef<(highlight: IHighlight) => void>(() => {}); const error = useCatchDocumentError(url); @@ -119,12 +118,4 @@ const Preview = ({ highlights: state, setWidthAndHeight }: IProps) => { ); }; -const compare = (oldProps: IProps, newProps: IProps) => { - const arePropsEqual = - oldProps.highlights === newProps.highlights || - (oldProps.highlights.length === 0 && newProps.highlights.length === 0); - - return arePropsEqual; -}; - export default memo(Preview);