From 52b3492b18eddae47ee8b766908277708273fa52 Mon Sep 17 00:00:00 2001 From: balibabu Date: Thu, 28 Nov 2024 10:51:30 +0800 Subject: [PATCH 01/34] Feat: Scrolling knowledge base list #3695 (#3703) ### What problem does this PR solve? Feat: Scrolling knowledge base list #3695 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/kb_app.py | 7 ++- api/db/services/knowledgebase_service.py | 27 ++++++--- web/.umirc.ts | 2 +- web/package-lock.json | 20 +++++++ web/package.json | 1 + web/src/hooks/knowledge-hooks.ts | 55 ++++++++++++++++- web/src/locales/en.ts | 1 + web/src/locales/zh-traditional.ts | 1 + web/src/locales/zh.ts | 1 + web/src/pages/knowledge/index.less | 1 + web/src/pages/knowledge/index.tsx | 75 +++++++++++++++++------- 11 files changed, 155 insertions(+), 36 deletions(-) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index ebac350f10b..fa867493245 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -125,15 +125,16 @@ def detail(): @manager.route('/list', methods=['GET']) @login_required def list_kbs(): + keywords = request.args.get("keywords", "") page_number = int(request.args.get("page", 1)) items_per_page = int(request.args.get("page_size", 150)) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", True) try: tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) - kbs = KnowledgebaseService.get_by_tenant_ids( - [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc) - return get_json_result(data=kbs) + kbs, total = KnowledgebaseService.get_by_tenant_ids( + [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc, keywords) + return get_json_result(data={"kbs": kbs, "total": total}) except Exception as e: return server_error_response(e) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 1b4c82cf178..e05a14a166d 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -34,7 +34,7 @@ def list_documents_by_ids(cls,kb_ids): @classmethod @DB.connection_context() def get_by_tenant_ids(cls, joined_tenant_ids, user_id, - page_number, items_per_page, orderby, desc): + page_number, items_per_page, orderby, desc, keywords): fields = [ cls.model.id, cls.model.avatar, @@ -51,20 +51,31 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id, User.avatar.alias('tenant_avatar'), cls.model.update_time ] - kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value) - ) + if keywords: + kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value), + (fn.LOWER(cls.model.name).contains(keywords.lower())) + ) + else: + kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value) + ) if desc: kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) else: kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) + count = kbs.count() + kbs = kbs.paginate(page_number, items_per_page) - return list(kbs.dicts()) + return list(kbs.dicts()), count @classmethod @DB.connection_context() diff --git a/web/.umirc.ts b/web/.umirc.ts index 8de9ff1e152..b96d84c1bb9 100644 --- a/web/.umirc.ts +++ b/web/.umirc.ts @@ -34,7 +34,7 @@ export default defineConfig({ proxy: [ { context: ['/api', '/v1'], - target: 'http://127.0.0.1:9456/', + target: 'http://127.0.0.1:9380/', changeOrigin: true, ws: true, logger: console, diff --git a/web/package-lock.json b/web/package-lock.json index 779be63867c..65421945fba 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -57,6 +57,7 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", + "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", @@ -24705,6 +24706,25 @@ } } }, + "node_modules/react-infinite-scroll-component": { + "version": "6.1.0", + "resolved": "https://registry.npmmirror.com/react-infinite-scroll-component/-/react-infinite-scroll-component-6.1.0.tgz", + "integrity": "sha512-SQu5nCqy8DxQWpnUVLx7V7b7LcA37aM7tvoWjTLZp1dk6EJibM5/4EJKzOnl07/BsM1Y40sKLuqjCwwH/xV0TQ==", + "dependencies": { + "throttle-debounce": "^2.1.0" + }, + "peerDependencies": { + "react": ">=16.0.0" + } + }, + "node_modules/react-infinite-scroll-component/node_modules/throttle-debounce": { + "version": "2.3.0", + "resolved": "https://registry.npmmirror.com/throttle-debounce/-/throttle-debounce-2.3.0.tgz", + "integrity": "sha512-H7oLPV0P7+jgvrk+6mwwwBDmxTaxnu9HMXmloNLXwnNO0ZxZ31Orah2n8lU1eMPvsaowP2CX+USCgyovXfdOFQ==", + "engines": { + "node": ">=8" + } + }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmmirror.com/react-is/-/react-is-18.2.0.tgz", diff --git a/web/package.json b/web/package.json index 061b17352fa..3a226d97f8a 100644 --- a/web/package.json +++ b/web/package.json @@ -68,6 +68,7 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", + "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", diff --git a/web/src/hooks/knowledge-hooks.ts b/web/src/hooks/knowledge-hooks.ts index 0dc245c06d1..8b460c123b8 100644 --- a/web/src/hooks/knowledge-hooks.ts +++ b/web/src/hooks/knowledge-hooks.ts @@ -3,14 +3,17 @@ import { IKnowledge, ITestingResult } from '@/interfaces/database/knowledge'; import i18n from '@/locales/config'; import kbService from '@/services/knowledge-service'; import { + useInfiniteQuery, useIsMutating, useMutation, useMutationState, useQuery, useQueryClient, } from '@tanstack/react-query'; +import { useDebounce } from 'ahooks'; import { message } from 'antd'; import { useSearchParams } from 'umi'; +import { useHandleSearchChange } from './logic-hooks'; import { useSetPaginationParams } from './route-hook'; export const useKnowledgeBaseId = (): string => { @@ -50,7 +53,7 @@ export const useNextFetchKnowledgeList = ( gcTime: 0, // https://tanstack.com/query/latest/docs/framework/react/guides/caching?from=reactQueryV3 queryFn: async () => { const { data } = await kbService.getList(); - const list = data?.data ?? []; + const list = data?.data?.kbs ?? []; return shouldFilterListWithoutDocument ? list.filter((x: IKnowledge) => x.chunk_num > 0) : list; @@ -60,6 +63,52 @@ export const useNextFetchKnowledgeList = ( return { list: data, loading }; }; +export const useInfiniteFetchKnowledgeList = () => { + const { searchString, handleInputChange } = useHandleSearchChange(); + const debouncedSearchString = useDebounce(searchString, { wait: 500 }); + + const PageSize = 10; + const { + data, + error, + fetchNextPage, + hasNextPage, + isFetching, + isFetchingNextPage, + status, + } = useInfiniteQuery({ + queryKey: ['infiniteFetchKnowledgeList', debouncedSearchString], + queryFn: async ({ pageParam }) => { + const { data } = await kbService.getList({ + page: pageParam, + page_size: PageSize, + keywords: debouncedSearchString, + }); + const list = data?.data ?? []; + return list; + }, + initialPageParam: 1, + getNextPageParam: (lastPage, pages, lastPageParam) => { + if (lastPageParam * PageSize <= lastPage.total) { + return lastPageParam + 1; + } + return undefined; + }, + }); + return { + data, + loading: isFetching, + error, + fetchNextPage, + hasNextPage, + isFetching, + isFetchingNextPage, + status, + handleInputChange, + searchString, + }; +}; + export const useCreateKnowledge = () => { const queryClient = useQueryClient(); const { @@ -95,7 +144,9 @@ export const useDeleteKnowledge = () => { const { data } = await kbService.rmKb({ kb_id: id }); if (data.code === 0) { message.success(i18n.t(`message.deleted`)); - queryClient.invalidateQueries({ queryKey: ['fetchKnowledgeList'] }); + queryClient.invalidateQueries({ + queryKey: ['infiniteFetchKnowledgeList'], + }); } return data?.data ?? []; }, diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 57253d33a31..59b8d0871f8 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: 'Please input name!', doc: 'Docs', searchKnowledgePlaceholder: 'Search', + noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: 'Dataset', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 9f92d9ffaa5..bd9602e97ee 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: '請輸入名稱', doc: '文件', searchKnowledgePlaceholder: '搜索', + noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: '數據集', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 59c0639ac75..6ffdf246464 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: '请输入名称', doc: '文档', searchKnowledgePlaceholder: '搜索', + noMoreData: '沒有更多的數據了', }, knowledgeDetails: { dataset: '数据集', diff --git a/web/src/pages/knowledge/index.less b/web/src/pages/knowledge/index.less index 2479453fa1f..25bcb00deb1 100644 --- a/web/src/pages/knowledge/index.less +++ b/web/src/pages/knowledge/index.less @@ -2,6 +2,7 @@ .knowledge { padding: 48px 0; + overflow: auto; } .topWrapper { diff --git a/web/src/pages/knowledge/index.tsx b/web/src/pages/knowledge/index.tsx index 64c1d6c73ba..d41d6416d2e 100644 --- a/web/src/pages/knowledge/index.tsx +++ b/web/src/pages/knowledge/index.tsx @@ -1,18 +1,26 @@ -import { useNextFetchKnowledgeList } from '@/hooks/knowledge-hooks'; +import { useInfiniteFetchKnowledgeList } from '@/hooks/knowledge-hooks'; import { useFetchUserInfo } from '@/hooks/user-setting-hooks'; import { PlusOutlined, SearchOutlined } from '@ant-design/icons'; -import { Button, Empty, Flex, Input, Space, Spin } from 'antd'; +import { + Button, + Divider, + Empty, + Flex, + Input, + Skeleton, + Space, + Spin, +} from 'antd'; +import { useTranslation } from 'react-i18next'; +import InfiniteScroll from 'react-infinite-scroll-component'; +import { useSaveKnowledge } from './hooks'; import KnowledgeCard from './knowledge-card'; import KnowledgeCreatingModal from './knowledge-creating-modal'; -import { useTranslation } from 'react-i18next'; -import { useSaveKnowledge, useSearchKnowledge } from './hooks'; +import { useMemo } from 'react'; import styles from './index.less'; const KnowledgeList = () => { - const { searchString, handleInputChange } = useSearchKnowledge(); - const { loading, list: data } = useNextFetchKnowledgeList(); - const list = data.filter((x) => x.name.includes(searchString)); const { data: userInfo } = useFetchUserInfo(); const { t } = useTranslation('translation', { keyPrefix: 'knowledgeList' }); const { @@ -22,9 +30,23 @@ const KnowledgeList = () => { onCreateOk, loading: creatingLoading, } = useSaveKnowledge(); + const { + fetchNextPage, + data, + hasNextPage, + searchString, + handleInputChange, + loading, + } = useInfiniteFetchKnowledgeList(); + console.log('🚀 ~ KnowledgeList ~ data:', data); + const nextList = data?.pages?.flatMap((x) => x.kbs) ?? []; + + const total = useMemo(() => { + return data?.pages.at(-1).total ?? 0; + }, [data?.pages]); return ( - +
@@ -53,21 +75,30 @@ const KnowledgeList = () => {
- } + endMessage={total && {t('noMoreData')} 🤐} + scrollableTarget="scrollableDiv" > - {list.length > 0 ? ( - list.map((item: any) => { - return ( - - ); - }) - ) : ( - - )} - + + {nextList?.length > 0 ? ( + nextList.map((item: any) => { + return ( + + ); + }) + ) : ( + + )} + + Date: Thu, 28 Nov 2024 11:44:23 +0800 Subject: [PATCH 02/34] Revert "Feat: Scrolling knowledge base list #3695" (#3708) Reverts infiniflow/ragflow#3703 --- api/apps/kb_app.py | 7 +-- api/db/services/knowledgebase_service.py | 27 +++------ web/.umirc.ts | 2 +- web/package-lock.json | 20 ------- web/package.json | 1 - web/src/hooks/knowledge-hooks.ts | 55 +---------------- web/src/locales/en.ts | 1 - web/src/locales/zh-traditional.ts | 1 - web/src/locales/zh.ts | 1 - web/src/pages/knowledge/index.less | 1 - web/src/pages/knowledge/index.tsx | 75 +++++++----------------- 11 files changed, 36 insertions(+), 155 deletions(-) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index fa867493245..ebac350f10b 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -125,16 +125,15 @@ def detail(): @manager.route('/list', methods=['GET']) @login_required def list_kbs(): - keywords = request.args.get("keywords", "") page_number = int(request.args.get("page", 1)) items_per_page = int(request.args.get("page_size", 150)) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", True) try: tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) - kbs, total = KnowledgebaseService.get_by_tenant_ids( - [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc, keywords) - return get_json_result(data={"kbs": kbs, "total": total}) + kbs = KnowledgebaseService.get_by_tenant_ids( + [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc) + return get_json_result(data=kbs) except Exception as e: return server_error_response(e) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index e05a14a166d..1b4c82cf178 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -34,7 +34,7 @@ def list_documents_by_ids(cls,kb_ids): @classmethod @DB.connection_context() def get_by_tenant_ids(cls, joined_tenant_ids, user_id, - page_number, items_per_page, orderby, desc, keywords): + page_number, items_per_page, orderby, desc): fields = [ cls.model.id, cls.model.avatar, @@ -51,31 +51,20 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id, User.avatar.alias('tenant_avatar'), cls.model.update_time ] - if keywords: - kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value), - (fn.LOWER(cls.model.name).contains(keywords.lower())) - ) - else: - kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value) - ) + kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value) + ) if desc: kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) else: kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) - count = kbs.count() - kbs = kbs.paginate(page_number, items_per_page) - return list(kbs.dicts()), count + return list(kbs.dicts()) @classmethod @DB.connection_context() diff --git a/web/.umirc.ts b/web/.umirc.ts index b96d84c1bb9..8de9ff1e152 100644 --- a/web/.umirc.ts +++ b/web/.umirc.ts @@ -34,7 +34,7 @@ export default defineConfig({ proxy: [ { context: ['/api', '/v1'], - target: 'http://127.0.0.1:9380/', + target: 'http://127.0.0.1:9456/', changeOrigin: true, ws: true, logger: console, diff --git a/web/package-lock.json b/web/package-lock.json index 65421945fba..779be63867c 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -57,7 +57,6 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", - "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", @@ -24706,25 +24705,6 @@ } } }, - "node_modules/react-infinite-scroll-component": { - "version": "6.1.0", - "resolved": "https://registry.npmmirror.com/react-infinite-scroll-component/-/react-infinite-scroll-component-6.1.0.tgz", - "integrity": "sha512-SQu5nCqy8DxQWpnUVLx7V7b7LcA37aM7tvoWjTLZp1dk6EJibM5/4EJKzOnl07/BsM1Y40sKLuqjCwwH/xV0TQ==", - "dependencies": { - "throttle-debounce": "^2.1.0" - }, - "peerDependencies": { - "react": ">=16.0.0" - } - }, - "node_modules/react-infinite-scroll-component/node_modules/throttle-debounce": { - "version": "2.3.0", - "resolved": "https://registry.npmmirror.com/throttle-debounce/-/throttle-debounce-2.3.0.tgz", - "integrity": "sha512-H7oLPV0P7+jgvrk+6mwwwBDmxTaxnu9HMXmloNLXwnNO0ZxZ31Orah2n8lU1eMPvsaowP2CX+USCgyovXfdOFQ==", - "engines": { - "node": ">=8" - } - }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmmirror.com/react-is/-/react-is-18.2.0.tgz", diff --git a/web/package.json b/web/package.json index 3a226d97f8a..061b17352fa 100644 --- a/web/package.json +++ b/web/package.json @@ -68,7 +68,6 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", - "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", diff --git a/web/src/hooks/knowledge-hooks.ts b/web/src/hooks/knowledge-hooks.ts index 8b460c123b8..0dc245c06d1 100644 --- a/web/src/hooks/knowledge-hooks.ts +++ b/web/src/hooks/knowledge-hooks.ts @@ -3,17 +3,14 @@ import { IKnowledge, ITestingResult } from '@/interfaces/database/knowledge'; import i18n from '@/locales/config'; import kbService from '@/services/knowledge-service'; import { - useInfiniteQuery, useIsMutating, useMutation, useMutationState, useQuery, useQueryClient, } from '@tanstack/react-query'; -import { useDebounce } from 'ahooks'; import { message } from 'antd'; import { useSearchParams } from 'umi'; -import { useHandleSearchChange } from './logic-hooks'; import { useSetPaginationParams } from './route-hook'; export const useKnowledgeBaseId = (): string => { @@ -53,7 +50,7 @@ export const useNextFetchKnowledgeList = ( gcTime: 0, // https://tanstack.com/query/latest/docs/framework/react/guides/caching?from=reactQueryV3 queryFn: async () => { const { data } = await kbService.getList(); - const list = data?.data?.kbs ?? []; + const list = data?.data ?? []; return shouldFilterListWithoutDocument ? list.filter((x: IKnowledge) => x.chunk_num > 0) : list; @@ -63,52 +60,6 @@ export const useNextFetchKnowledgeList = ( return { list: data, loading }; }; -export const useInfiniteFetchKnowledgeList = () => { - const { searchString, handleInputChange } = useHandleSearchChange(); - const debouncedSearchString = useDebounce(searchString, { wait: 500 }); - - const PageSize = 10; - const { - data, - error, - fetchNextPage, - hasNextPage, - isFetching, - isFetchingNextPage, - status, - } = useInfiniteQuery({ - queryKey: ['infiniteFetchKnowledgeList', debouncedSearchString], - queryFn: async ({ pageParam }) => { - const { data } = await kbService.getList({ - page: pageParam, - page_size: PageSize, - keywords: debouncedSearchString, - }); - const list = data?.data ?? []; - return list; - }, - initialPageParam: 1, - getNextPageParam: (lastPage, pages, lastPageParam) => { - if (lastPageParam * PageSize <= lastPage.total) { - return lastPageParam + 1; - } - return undefined; - }, - }); - return { - data, - loading: isFetching, - error, - fetchNextPage, - hasNextPage, - isFetching, - isFetchingNextPage, - status, - handleInputChange, - searchString, - }; -}; - export const useCreateKnowledge = () => { const queryClient = useQueryClient(); const { @@ -144,9 +95,7 @@ export const useDeleteKnowledge = () => { const { data } = await kbService.rmKb({ kb_id: id }); if (data.code === 0) { message.success(i18n.t(`message.deleted`)); - queryClient.invalidateQueries({ - queryKey: ['infiniteFetchKnowledgeList'], - }); + queryClient.invalidateQueries({ queryKey: ['fetchKnowledgeList'] }); } return data?.data ?? []; }, diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 59b8d0871f8..57253d33a31 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -75,7 +75,6 @@ export default { namePlaceholder: 'Please input name!', doc: 'Docs', searchKnowledgePlaceholder: 'Search', - noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: 'Dataset', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index bd9602e97ee..9f92d9ffaa5 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -75,7 +75,6 @@ export default { namePlaceholder: '請輸入名稱', doc: '文件', searchKnowledgePlaceholder: '搜索', - noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: '數據集', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 6ffdf246464..59c0639ac75 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -75,7 +75,6 @@ export default { namePlaceholder: '请输入名称', doc: '文档', searchKnowledgePlaceholder: '搜索', - noMoreData: '沒有更多的數據了', }, knowledgeDetails: { dataset: '数据集', diff --git a/web/src/pages/knowledge/index.less b/web/src/pages/knowledge/index.less index 25bcb00deb1..2479453fa1f 100644 --- a/web/src/pages/knowledge/index.less +++ b/web/src/pages/knowledge/index.less @@ -2,7 +2,6 @@ .knowledge { padding: 48px 0; - overflow: auto; } .topWrapper { diff --git a/web/src/pages/knowledge/index.tsx b/web/src/pages/knowledge/index.tsx index d41d6416d2e..64c1d6c73ba 100644 --- a/web/src/pages/knowledge/index.tsx +++ b/web/src/pages/knowledge/index.tsx @@ -1,26 +1,18 @@ -import { useInfiniteFetchKnowledgeList } from '@/hooks/knowledge-hooks'; +import { useNextFetchKnowledgeList } from '@/hooks/knowledge-hooks'; import { useFetchUserInfo } from '@/hooks/user-setting-hooks'; import { PlusOutlined, SearchOutlined } from '@ant-design/icons'; -import { - Button, - Divider, - Empty, - Flex, - Input, - Skeleton, - Space, - Spin, -} from 'antd'; -import { useTranslation } from 'react-i18next'; -import InfiniteScroll from 'react-infinite-scroll-component'; -import { useSaveKnowledge } from './hooks'; +import { Button, Empty, Flex, Input, Space, Spin } from 'antd'; import KnowledgeCard from './knowledge-card'; import KnowledgeCreatingModal from './knowledge-creating-modal'; -import { useMemo } from 'react'; +import { useTranslation } from 'react-i18next'; +import { useSaveKnowledge, useSearchKnowledge } from './hooks'; import styles from './index.less'; const KnowledgeList = () => { + const { searchString, handleInputChange } = useSearchKnowledge(); + const { loading, list: data } = useNextFetchKnowledgeList(); + const list = data.filter((x) => x.name.includes(searchString)); const { data: userInfo } = useFetchUserInfo(); const { t } = useTranslation('translation', { keyPrefix: 'knowledgeList' }); const { @@ -30,23 +22,9 @@ const KnowledgeList = () => { onCreateOk, loading: creatingLoading, } = useSaveKnowledge(); - const { - fetchNextPage, - data, - hasNextPage, - searchString, - handleInputChange, - loading, - } = useInfiniteFetchKnowledgeList(); - console.log('🚀 ~ KnowledgeList ~ data:', data); - const nextList = data?.pages?.flatMap((x) => x.kbs) ?? []; - - const total = useMemo(() => { - return data?.pages.at(-1).total ?? 0; - }, [data?.pages]); return ( - +
@@ -75,30 +53,21 @@ const KnowledgeList = () => {
- } - endMessage={total && {t('noMoreData')} 🤐} - scrollableTarget="scrollableDiv" + - - {nextList?.length > 0 ? ( - nextList.map((item: any) => { - return ( - - ); - }) - ) : ( - - )} - - + {list.length > 0 ? ( + list.map((item: any) => { + return ( + + ); + }) + ) : ( + + )} + Date: Thu, 28 Nov 2024 13:00:38 +0800 Subject: [PATCH 03/34] Edit chunk shall update instead of insert it (#3709) ### What problem does this PR solve? Edit chunk shall update instead of insert it. Close #3679 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/chunk_app.py | 2 +- api/apps/kb_app.py | 4 +++- api/apps/user_app.py | 2 +- deepdoc/parser/docx_parser.py | 2 +- deepdoc/parser/pdf_parser.py | 10 +++++----- deepdoc/parser/resume/entities/corporations.py | 2 +- deepdoc/parser/resume/entities/schools.py | 2 +- deepdoc/parser/resume/step_one.py | 4 ++-- deepdoc/parser/resume/step_two.py | 6 +++--- deepdoc/vision/table_structure_recognizer.py | 2 +- rag/app/paper.py | 4 ++-- rag/app/picture.py | 2 +- rag/nlp/__init__.py | 6 +++--- rag/nlp/query.py | 10 +++++----- rag/nlp/rag_tokenizer.py | 6 +++--- rag/nlp/search.py | 18 +++++++++--------- rag/nlp/term_weight.py | 8 ++++---- rag/utils/es_conn.py | 5 ++++- rag/utils/infinity_conn.py | 2 +- 19 files changed, 51 insertions(+), 46 deletions(-) diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 4606c8b1d4d..0863df133c6 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -155,7 +155,7 @@ def set(): v, c = embd_mdl.encode([doc.name, req["content_with_weight"]]) v = 0.1 * v[0] + 0.9 * v[1] if doc.parser_id != ParserType.QA else v[1] d["q_%d_vec" % len(v)] = v.tolist() - settings.docStoreConn.insert([d], search.index_name(tenant_id), doc.kb_id) + settings.docStoreConn.update({"id": req["chunk_id"]}, d, search.index_name(tenant_id), doc.kb_id) return get_json_result(data=True) except Exception as e: return server_error_response(e) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index ebac350f10b..1bb86a5d513 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -168,7 +168,9 @@ def rm(): if not KnowledgebaseService.delete_by_id(req["kb_id"]): return get_data_error_result( message="Database error (Knowledgebase removal)!") - settings.docStoreConn.delete({"kb_id": req["kb_id"]}, search.index_name(kbs[0].tenant_id), req["kb_id"]) + for kb in kbs: + settings.docStoreConn.delete({"kb_id": kb.id}, search.index_name(kb.tenant_id), kb.id) + settings.docStoreConn.deleteIdx(search.index_name(kb.tenant_id), kb.id) return get_json_result(data=True) except Exception as e: return server_error_response(e) diff --git a/api/apps/user_app.py b/api/apps/user_app.py index 66cae41519c..cc0506316fb 100644 --- a/api/apps/user_app.py +++ b/api/apps/user_app.py @@ -252,7 +252,7 @@ def feishu_callback(): if res["code"] != 0: return redirect("/?error=%s" % res["message"]) - if "contact:user.email:readonly" not in res["data"]["scope"].split(" "): + if "contact:user.email:readonly" not in res["data"]["scope"].split(): return redirect("/?error=contact:user.email:readonly not in scope") session["access_token"] = res["data"]["access_token"] session["access_token_from"] = "feishu" diff --git a/deepdoc/parser/docx_parser.py b/deepdoc/parser/docx_parser.py index 1c1c14d3041..1910f4380c4 100644 --- a/deepdoc/parser/docx_parser.py +++ b/deepdoc/parser/docx_parser.py @@ -47,7 +47,7 @@ def blockType(b): for p, n in patt: if re.search(p, b): return n - tks = [t for t in rag_tokenizer.tokenize(b).split(" ") if len(t) > 1] + tks = [t for t in rag_tokenizer.tokenize(b).split() if len(t) > 1] if len(tks) > 3: if len(tks) < 12: return "Tx" diff --git a/deepdoc/parser/pdf_parser.py b/deepdoc/parser/pdf_parser.py index 331d5da1726..9c6139469d5 100644 --- a/deepdoc/parser/pdf_parser.py +++ b/deepdoc/parser/pdf_parser.py @@ -108,13 +108,13 @@ def _updown_concat_features(self, up, down): h = max(self.__height(up), self.__height(down)) y_dis = self._y_dis(up, down) LEN = 6 - tks_down = rag_tokenizer.tokenize(down["text"][:LEN]).split(" ") - tks_up = rag_tokenizer.tokenize(up["text"][-LEN:]).split(" ") + tks_down = rag_tokenizer.tokenize(down["text"][:LEN]).split() + tks_up = rag_tokenizer.tokenize(up["text"][-LEN:]).split() tks_all = up["text"][-LEN:].strip() \ + (" " if re.match(r"[a-zA-Z0-9]+", up["text"][-1] + down["text"][0]) else "") \ + down["text"][:LEN].strip() - tks_all = rag_tokenizer.tokenize(tks_all).split(" ") + tks_all = rag_tokenizer.tokenize(tks_all).split() fea = [ up.get("R", -1) == down.get("R", -1), y_dis / h, @@ -565,13 +565,13 @@ def _filter_forpages(self): if i >= len(self.boxes): break prefix = self.boxes[i]["text"].strip()[:3] if not eng else " ".join( - self.boxes[i]["text"].strip().split(" ")[:2]) + self.boxes[i]["text"].strip().split()[:2]) while not prefix: self.boxes.pop(i) if i >= len(self.boxes): break prefix = self.boxes[i]["text"].strip()[:3] if not eng else " ".join( - self.boxes[i]["text"].strip().split(" ")[:2]) + self.boxes[i]["text"].strip().split()[:2]) self.boxes.pop(i) if i >= len(self.boxes) or not prefix: break diff --git a/deepdoc/parser/resume/entities/corporations.py b/deepdoc/parser/resume/entities/corporations.py index c26f58aebd4..142b0f5e492 100644 --- a/deepdoc/parser/resume/entities/corporations.py +++ b/deepdoc/parser/resume/entities/corporations.py @@ -47,7 +47,7 @@ def corpNorm(nm, add_region=True): nm = re.sub(r"(计算机|技术|(技术|科技|网络)*有限公司|公司|有限|研发中心|中国|总部)$", "", nm, 10000, re.IGNORECASE) if not nm or (len(nm)<5 and not regions.isName(nm[0:2])):return nm - tks = rag_tokenizer.tokenize(nm).split(" ") + tks = rag_tokenizer.tokenize(nm).split() reg = [t for i,t in enumerate(tks) if regions.isName(t) and (t != "中国" or i > 0)] nm = "" for t in tks: diff --git a/deepdoc/parser/resume/entities/schools.py b/deepdoc/parser/resume/entities/schools.py index 598d7ae83f5..31662cde94f 100644 --- a/deepdoc/parser/resume/entities/schools.py +++ b/deepdoc/parser/resume/entities/schools.py @@ -44,7 +44,7 @@ def loadRank(fnm): def split(txt): tks = [] - for t in re.sub(r"[ \t]+", " ",txt).split(" "): + for t in re.sub(r"[ \t]+", " ",txt).split(): if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \ re.match(r"[a-zA-Z]", t) and tks: tks[-1] = tks[-1] + " " + t diff --git a/deepdoc/parser/resume/step_one.py b/deepdoc/parser/resume/step_one.py index 90e52e45078..96cc668d316 100644 --- a/deepdoc/parser/resume/step_one.py +++ b/deepdoc/parser/resume/step_one.py @@ -80,7 +80,7 @@ def deal_obj(obj, k, kk): def loadjson(line): try: return json.loads(line) - except Exception as e: + except Exception: pass return {} @@ -183,4 +183,4 @@ def arr2str(a): "\r", "\\n")) # print(df.values.tolist()) - return dict(zip([n.split(" ")[0] for n in FIELDS], df.values.tolist()[0])) + return dict(zip([n.split()[0] for n in FIELDS], df.values.tolist()[0])) diff --git a/deepdoc/parser/resume/step_two.py b/deepdoc/parser/resume/step_two.py index afc5fb47da7..7d429777423 100644 --- a/deepdoc/parser/resume/step_two.py +++ b/deepdoc/parser/resume/step_two.py @@ -100,7 +100,7 @@ def forEdu(cv): if n.get("school_name") and isinstance(n["school_name"], str): sch.append(re.sub(r"(211|985|重点大学|[,&;;-])", "", n["school_name"])) e["sch_nm_kwd"] = sch[-1] - fea.append(rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(n.get("school_name", ""))).split(" ")[-1]) + fea.append(rag_tokenizer.fine_grained_tokenize(rag_tokenizer.tokenize(n.get("school_name", ""))).split()[-1]) if n.get("discipline_name") and isinstance(n["discipline_name"], str): maj.append(n["discipline_name"]) @@ -485,7 +485,7 @@ def hasValues(flds): nm = re.sub(r"[\n——\-\((\+].*", "", cv["name"].strip()) nm = re.sub(r"[ \t ]+", " ", nm) if re.match(r"[a-zA-Z ]+$", nm): - if len(nm.split(" ")) > 1: + if len(nm.split()) > 1: cv["name"] = nm else: nm = "" @@ -503,7 +503,7 @@ def hasValues(flds): for py in PY.get_pinyins(nm[:20], ''): for i in range(2, len(py) + 1): cv["name_py_pref_tks"] += " " + py[:i] for py in PY.get_pinyins(nm[:20], ' '): - py = py.split(" ") + py = py.split() for i in range(1, len(py) + 1): cv["name_py_pref0_tks"] += " " + "".join(py[:i]) cv["name_kwd"] = name diff --git a/deepdoc/vision/table_structure_recognizer.py b/deepdoc/vision/table_structure_recognizer.py index 5759c0f644b..be48ca95e4c 100644 --- a/deepdoc/vision/table_structure_recognizer.py +++ b/deepdoc/vision/table_structure_recognizer.py @@ -117,7 +117,7 @@ def blockType(b): for p, n in patt: if re.search(p, b["text"].strip()): return n - tks = [t for t in rag_tokenizer.tokenize(b["text"]).split(" ") if len(t) > 1] + tks = [t for t in rag_tokenizer.tokenize(b["text"]).split() if len(t) > 1] if len(tks) > 3: if len(tks) < 12: return "Tx" diff --git a/rag/app/paper.py b/rag/app/paper.py index 1be93be8656..23483cc0482 100644 --- a/rag/app/paper.py +++ b/rag/app/paper.py @@ -99,11 +99,11 @@ def _begin(txt): i += 1 txt = b["text"].lower().strip() if re.match("(abstract|摘要)", txt): - if len(txt.split(" ")) > 32 or len(txt) > 64: + if len(txt.split()) > 32 or len(txt) > 64: abstr = txt + self._line_tag(b, zoomin) break txt = self.boxes[i]["text"].lower().strip() - if len(txt.split(" ")) > 32 or len(txt) > 64: + if len(txt.split()) > 32 or len(txt) > 64: abstr = txt + self._line_tag(self.boxes[i], zoomin) i += 1 break diff --git a/rag/app/picture.py b/rag/app/picture.py index fa4862b296a..8d5df5219a7 100644 --- a/rag/app/picture.py +++ b/rag/app/picture.py @@ -33,7 +33,7 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): txt = "\n".join([t[0] for _, t in bxs if t[0]]) eng = lang.lower() == "english" callback(0.4, "Finish OCR: (%s ...)" % txt[:12]) - if (eng and len(txt.split(" ")) > 32) or len(txt) > 32: + if (eng and len(txt.split()) > 32) or len(txt) > 32: tokenize(doc, txt, eng) callback(0.8, "OCR results is too long to use CV LLM.") return [doc] diff --git a/rag/nlp/__init__.py b/rag/nlp/__init__.py index 27233874ce2..41b895978a7 100644 --- a/rag/nlp/__init__.py +++ b/rag/nlp/__init__.py @@ -325,12 +325,12 @@ def get(i): sections.pop(i) if i >= len(sections): break - prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) + prefix = get(i)[:3] if not eng else " ".join(get(i).split()[:2]) while not prefix: sections.pop(i) if i >= len(sections): break - prefix = get(i)[:3] if not eng else " ".join(get(i).split(" ")[:2]) + prefix = get(i)[:3] if not eng else " ".join(get(i).split()[:2]) sections.pop(i) if i >= len(sections) or not prefix: break @@ -389,7 +389,7 @@ def title_frequency(bull, sections): def not_title(txt): if re.match(r"第[零一二三四五六七八九十百0-9]+条", txt): return False - if len(txt.split(" ")) > 12 or (txt.find(" ") < 0 and len(txt) >= 32): + if len(txt.split()) > 12 or (txt.find(" ") < 0 and len(txt) >= 32): return True return re.search(r"[,;,。;!!]", txt) diff --git a/rag/nlp/query.py b/rag/nlp/query.py index 63fed29b8aa..9a0ceafae73 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -74,7 +74,7 @@ def question(self, txt, tbl="qa", min_match:float=0.6): if not self.isChinese(txt): txt = FulltextQueryer.rmWWW(txt) - tks = rag_tokenizer.tokenize(txt).split(" ") + tks = rag_tokenizer.tokenize(txt).split() keywords = [t for t in tks if t] tks_w = self.tw.weights(tks, preprocess=False) tks_w = [(re.sub(r"[ \\\"'^]", "", tk), w) for tk, w in tks_w] @@ -83,7 +83,7 @@ def question(self, txt, tbl="qa", min_match:float=0.6): syns = [] for tk, w in tks_w: syn = self.syn.lookup(tk) - syn = rag_tokenizer.tokenize(" ".join(syn)).split(" ") + syn = rag_tokenizer.tokenize(" ".join(syn)).split() keywords.extend(syn) syn = ["\"{}\"^{:.4f}".format(s, w / 4.) for s in syn] syns.append(" ".join(syn)) @@ -114,7 +114,7 @@ def need_fine_grained_tokenize(tk): txt = FulltextQueryer.rmWWW(txt) qs, keywords = [], [] - for tt in self.tw.split(txt)[:256]: # .split(" "): + for tt in self.tw.split(txt)[:256]: # .split(): if not tt: continue keywords.append(tt) @@ -125,7 +125,7 @@ def need_fine_grained_tokenize(tk): tms = [] for tk, w in sorted(twts, key=lambda x: x[1] * -1): sm = ( - rag_tokenizer.fine_grained_tokenize(tk).split(" ") + rag_tokenizer.fine_grained_tokenize(tk).split() if need_fine_grained_tokenize(tk) else [] ) @@ -194,7 +194,7 @@ def token_similarity(self, atks, btkss): def toDict(tks): d = {} if isinstance(tks, str): - tks = tks.split(" ") + tks = tks.split() for t, c in self.tw.weights(tks, preprocess=False): if t not in d: d[t] = 0 diff --git a/rag/nlp/rag_tokenizer.py b/rag/nlp/rag_tokenizer.py index 75541f599a4..0815daaeeef 100644 --- a/rag/nlp/rag_tokenizer.py +++ b/rag/nlp/rag_tokenizer.py @@ -192,7 +192,7 @@ def merge_(self, tks): # if split chars is part of token res = [] - tks = re.sub(r"[ ]+", " ", tks).split(" ") + tks = re.sub(r"[ ]+", " ", tks).split() s = 0 while True: if s >= len(tks): @@ -329,7 +329,7 @@ def tokenize(self, line): return self.merge_(res) def fine_grained_tokenize(self, tks): - tks = tks.split(" ") + tks = tks.split() zh_num = len([1 for c in tks if c and is_chinese(c[0])]) if zh_num < len(tks) * 0.2: res = [] @@ -393,7 +393,7 @@ def is_alphabet(s): def naiveQie(txt): tks = [] - for t in txt.split(" "): + for t in txt.split(): if tks and re.match(r".*[a-zA-Z]$", tks[-1] ) and re.match(r".*[a-zA-Z]$", t): tks.append(" ") diff --git a/rag/nlp/search.py b/rag/nlp/search.py index eb389bdd85b..154d5850d4d 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -114,7 +114,7 @@ def search(self, req, idx_names: str | list[str], kb_ids: list[str], emb_mdl=Non for k in keywords: kwds.add(k) - for kk in rag_tokenizer.fine_grained_tokenize(k).split(" "): + for kk in rag_tokenizer.fine_grained_tokenize(k).split(): if len(kk) < 2: continue if kk in kwds: @@ -186,7 +186,7 @@ def insert_citations(self, answer, chunks, chunk_v, assert len(ans_v[0]) == len(chunk_v[0]), "The dimension of query and chunk do not match: {} vs. {}".format( len(ans_v[0]), len(chunk_v[0])) - chunks_tks = [rag_tokenizer.tokenize(self.qryr.rmWWW(ck)).split(" ") + chunks_tks = [rag_tokenizer.tokenize(self.qryr.rmWWW(ck)).split() for ck in chunks] cites = {} thr = 0.63 @@ -195,7 +195,7 @@ def insert_citations(self, answer, chunks, chunk_v, sim, tksim, vtsim = self.qryr.hybrid_similarity(ans_v[i], chunk_v, rag_tokenizer.tokenize( - self.qryr.rmWWW(pieces_[i])).split(" "), + self.qryr.rmWWW(pieces_[i])).split(), chunks_tks, tkweight, vtweight) mx = np.max(sim) * 0.99 @@ -244,8 +244,8 @@ def rerank(self, sres, query, tkweight=0.3, sres.field[i]["important_kwd"] = [sres.field[i]["important_kwd"]] ins_tw = [] for i in sres.ids: - content_ltks = sres.field[i][cfield].split(" ") - title_tks = [t for t in sres.field[i].get("title_tks", "").split(" ") if t] + content_ltks = sres.field[i][cfield].split() + title_tks = [t for t in sres.field[i].get("title_tks", "").split() if t] important_kwd = sres.field[i].get("important_kwd", []) tks = content_ltks + title_tks + important_kwd ins_tw.append(tks) @@ -265,8 +265,8 @@ def rerank_by_model(self, rerank_mdl, sres, query, tkweight=0.3, sres.field[i]["important_kwd"] = [sres.field[i]["important_kwd"]] ins_tw = [] for i in sres.ids: - content_ltks = sres.field[i][cfield].split(" ") - title_tks = [t for t in sres.field[i].get("title_tks", "").split(" ") if t] + content_ltks = sres.field[i][cfield].split() + title_tks = [t for t in sres.field[i].get("title_tks", "").split() if t] important_kwd = sres.field[i].get("important_kwd", []) tks = content_ltks + title_tks + important_kwd ins_tw.append(tks) @@ -279,8 +279,8 @@ def rerank_by_model(self, rerank_mdl, sres, query, tkweight=0.3, def hybrid_similarity(self, ans_embd, ins_embd, ans, inst): return self.qryr.hybrid_similarity(ans_embd, ins_embd, - rag_tokenizer.tokenize(ans).split(" "), - rag_tokenizer.tokenize(inst).split(" ")) + rag_tokenizer.tokenize(ans).split(), + rag_tokenizer.tokenize(inst).split()) def retrieval(self, question, embd_mdl, tenant_ids, kb_ids, page, page_size, similarity_threshold=0.2, vector_similarity_weight=0.3, top=1024, doc_ids=None, aggs=True, rerank_mdl=None, highlight=False): diff --git a/rag/nlp/term_weight.py b/rag/nlp/term_weight.py index 810f6b88176..bbf446ddfb6 100644 --- a/rag/nlp/term_weight.py +++ b/rag/nlp/term_weight.py @@ -99,7 +99,7 @@ def pretoken(self, txt, num=False, stpwd=True): txt = re.sub(p, r, txt) res = [] - for t in rag_tokenizer.tokenize(txt).split(" "): + for t in rag_tokenizer.tokenize(txt).split(): tk = t if (stpwd and tk in self.stop_words) or ( re.match(r"[0-9]$", tk) and not num): @@ -150,7 +150,7 @@ def ner(self, t): def split(self, txt): tks = [] - for t in re.sub(r"[ \t]+", " ", txt).split(" "): + for t in re.sub(r"[ \t]+", " ", txt).split(): if tks and re.match(r".*[a-zA-Z]$", tks[-1]) and \ re.match(r".*[a-zA-Z]$", t) and tks and \ self.ne.get(t, "") != "func" and self.ne.get(tks[-1], "") != "func": @@ -198,7 +198,7 @@ def freq(t): s = 0 if not s and len(t) >= 4: - s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split(" ") if len(tt) > 1] + s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1] if len(s) > 1: s = np.min([freq(tt) for tt in s]) / 6. else: @@ -214,7 +214,7 @@ def df(t): elif re.match(r"[a-z. -]+$", t): return 300 elif len(t) >= 4: - s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split(" ") if len(tt) > 1] + s = [tt for tt in rag_tokenizer.fine_grained_tokenize(t).split() if len(tt) > 1] if len(s) > 1: return max(3, np.min([df(tt) for tt in s]) / 6.) diff --git a/rag/utils/es_conn.py b/rag/utils/es_conn.py index e964a60f714..09981fbab45 100644 --- a/rag/utils/es_conn.py +++ b/rag/utils/es_conn.py @@ -85,6 +85,9 @@ def createIdx(self, indexName: str, knowledgebaseId: str, vectorSize: int): logging.exception("ESConnection.createIndex error %s" % (indexName)) def deleteIdx(self, indexName: str, knowledgebaseId: str): + if len(knowledgebaseId) > 0: + # The index need to be alive after any kb deletion since all kb under this tenant are in one index. + return try: self.es.indices.delete(index=indexName, allow_no_indices=True) except NotFoundError: @@ -400,7 +403,7 @@ def getHighlight(self, res, keywords: list[str], fieldnm: str): if not hlts: continue txt = "...".join([a for a in list(hlts.items())[0][1]]) - if not is_english(txt.split(" ")): + if not is_english(txt.split()): ans[d["_id"]] = txt continue diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 1c0ec8fb29e..699e279e3b2 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -419,7 +419,7 @@ def getFields(self, res, fields: list[str]) -> list[str, dict]: v = list(v) elif fieldnm == "important_kwd": assert isinstance(v, str) - v = v.split(" ") + v = v.split() else: if not isinstance(v, str): v = str(v) From e678819f706686229fa23a7680b80f0903f0dbe4 Mon Sep 17 00:00:00 2001 From: kuschzzp <38914005+kuschzzp@users.noreply.github.com> Date: Thu, 28 Nov 2024 13:09:02 +0800 Subject: [PATCH 04/34] Fix RGBA error (#3707) ### What problem does this PR solve? **Passing cv_mdl.describe() is not an RGB converted image** ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/app/picture.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rag/app/picture.py b/rag/app/picture.py index 8d5df5219a7..fb9b1e26952 100644 --- a/rag/app/picture.py +++ b/rag/app/picture.py @@ -41,7 +41,10 @@ def chunk(filename, binary, tenant_id, lang, callback=None, **kwargs): try: callback(0.4, "Use CV LLM to describe the picture.") cv_mdl = LLMBundle(tenant_id, LLMType.IMAGE2TEXT, lang=lang) - ans = cv_mdl.describe(binary) + img_binary = io.BytesIO() + img.save(img_binary, format='JPEG') + img_binary.seek(0) + ans = cv_mdl.describe(img_binary.read()) callback(0.8, "CV LLM respond: %s ..." % ans[:32]) txt += "\n" + ans tokenize(doc, txt, eng) From 43e367f2eabe8082fb0a7aaf90d4222ec6a49e1b Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Thu, 28 Nov 2024 14:10:22 +0800 Subject: [PATCH 05/34] Detect shape error of embedding (#3710) ### What problem does this PR solve? Detect shape error of embedding. Close #2997 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/nlp/search.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/rag/nlp/search.py b/rag/nlp/search.py index 154d5850d4d..a24f7a71d87 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -46,6 +46,9 @@ class SearchResult: def get_vector(self, txt, emb_mdl, topk=10, similarity=0.1): qv, _ = emb_mdl.encode_queries(txt) + shape = np.array(qv).shape + if len(shape) > 1: + raise Exception(f"Dealer.get_vector returned array's shape {shape} doesn't match expectation(exact one dimension).") embedding_data = [float(v) for v in qv] vector_column_name = f"q_{len(embedding_data)}_vec" return MatchDenseExpr(vector_column_name, embedding_data, 'float', 'cosine', topk, {"similarity": similarity}) From 7ae8828e616b794f6725cae9d0fd02f36d4c543d Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Thu, 28 Nov 2024 14:57:50 +0800 Subject: [PATCH 06/34] Added release notes v0.12.0 (#3711) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- docs/guides/upgrade_ragflow.mdx | 6 ++++- docs/release_notes.md | 45 ++++++++++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 5 deletions(-) diff --git a/docs/guides/upgrade_ragflow.mdx b/docs/guides/upgrade_ragflow.mdx index bdb5218adb5..31d19d91d6f 100644 --- a/docs/guides/upgrade_ragflow.mdx +++ b/docs/guides/upgrade_ragflow.mdx @@ -11,7 +11,9 @@ Upgrade RAGFlow to `dev-slim`/`dev` or the latest, published release. ## Upgrade RAGFlow to `dev-slim`/`dev`, the most recent, tested Docker image -`dev-slim` refers to the RAGFlow Docker image *without* embedding models, while `dev` refers to the RAGFlow Docker image with embedding models. For details on their differences, see **docker/.env**. +`dev-slim` refers to the RAGFlow Docker image *without* embedding models, while `dev` refers to the RAGFlow Docker image with embedding models. For details on their differences, see [ragflow/docker/.env](https://github.com/infiniflow/ragflow/blob/main/docker/.env). + +To upgrade RAGFlow, you must upgrade **both** your code **and** your Docker image: 1. Clone the repo @@ -52,6 +54,8 @@ RAGFLOW_IMAGE=infiniflow/ragflow:dev ## Upgrade RAGFlow to the most recent, officially published release +To upgrade RAGFlow, you must upgrade **both** your code **and** your Docker image: + 1. Clone the repo ```bash diff --git a/docs/release_notes.md b/docs/release_notes.md index 8b489a35317..a05515c7257 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -24,14 +24,14 @@ Released on November 26, 2024. - Replaces Redis with Valkey. - Adds three new UI languages (*contributed by the community*): Indonesian, Spanish, and Vietnamese. -### Compatability changes +### Compatibility changes As of this release, **service_config.yaml.template** replaces **service_config.yaml** for configuring backend services. Upon Docker container startup, the environment variables defined in this template file are automatically populated and a **service_config.yaml** is auto-generated from it. [#3341](https://github.com/infiniflow/ragflow/pull/3341) This approach eliminates the need to manually update **service_config.yaml** after making changes to **.env**, facilitating dynamic environment configurations. -:::danger -Ensure that you [upgrade both your code and Docker image to this release](https://ragflow.io/docs/dev/upgrade_ragflow#upgrade-ragflow-to-the-most-recent-officially-published-release) before trying this new approach. +:::danger IMPORTANT +Ensure that you [upgrade **both** your code **and** Docker image to this release](https://ragflow.io/docs/dev/upgrade_ragflow#upgrade-ragflow-to-the-most-recent-officially-published-release) before trying this new approach. ::: ### Related APIs @@ -56,6 +56,10 @@ Ensure that you [upgrade both your code and Docker image to this release](https: ## v0.13.0 +Released on October 31, 2024. + +### New features + - Adds the team management functionality for all users. - Updates the Agent UI to improve usability. - Adds support for Markdown chunking in the **General** chunk method. @@ -78,4 +82,37 @@ pip install ragflow-sdk==0.13.0 - [Acquire a RAGFlow API key](https://ragflow.io/docs/dev/acquire_ragflow_api_key) - [HTTP API Reference](https://ragflow.io/docs/dev/http_api_reference) -- [Python API Reference](https://ragflow.io/docs/dev/python_api_reference) \ No newline at end of file +- [Python API Reference](https://ragflow.io/docs/dev/python_api_reference) + +## v0.12.0 + +Released on September 30, 2024. + +### New features + +- Offers slim editions of RAGFlow's Docker images, which do not include built-in BGE/BCE embedding or reranking models. +- Improves the results of multi-round dialogues. +- Enables users to remove added LLM vendors. +- Adds support for OpenTTS and SparkTTS models. +- Implements an **Excel to HTML** toggle in the **General** chunk method, allowing users to parse an spreadsheet into either an HTML table or key-value pairs by row. +- Adds agent tools **YahooFance** and **Jin10**. +- Adds a template for an investment advisor agent. + +### Compatibility changes + +As of this release, RAGFlow offers slim editions of its Docker images to improve the experience for users with limited Internet access. A slim edition of RAGFlow's Docker image does not include built-in BGE/BCE embedding models and has a size of about 1GB; a full edition of RAGFlow is approximately 9GB and includes both built-in embedding models and embedding models that will be downloaded once you select them in the RAGFlow UI. + +The default Docker image edition is `dev-slim`. The following list clarifies the differences between various editions: + +- `dev-slim`: The slim edition of the most recent tested Docker image. +- `v0.12.0-slim`: The slim edition of the most recent **officially released** Docker image. +- `dev`: The full edition of the most recent tested Docker image. +- `v0.12.0`: The full edition of the most recent **officially released** Docker image. + +See [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) for instructions on upgrading. + +### Documentation + +#### Added documents + +- [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) \ No newline at end of file From ec560cc99dcd40f6bcdc12aea2e3d1d45c67c190 Mon Sep 17 00:00:00 2001 From: balibabu Date: Thu, 28 Nov 2024 15:25:38 +0800 Subject: [PATCH 07/34] Feat: Scrolling knowledge base list and set the number of entries per page to 30 #3695 (#3712) ### What problem does this PR solve? Feat: Scrolling knowledge base list and set the number of entries per page to 30 #3695 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- api/apps/kb_app.py | 7 +- api/db/services/knowledgebase_service.py | 28 +++++-- .../test/test_frontend_api/test_dataset.py | 4 +- web/.umirc.ts | 2 +- web/package-lock.json | 20 +++++ web/package.json | 1 + web/src/hooks/knowledge-hooks.ts | 55 +++++++++++++- web/src/locales/en.ts | 1 + web/src/locales/zh-traditional.ts | 1 + web/src/locales/zh.ts | 1 + web/src/pages/knowledge/index.less | 1 + web/src/pages/knowledge/index.tsx | 75 +++++++++++++------ 12 files changed, 158 insertions(+), 38 deletions(-) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index 1bb86a5d513..ff7f0ae135a 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -125,15 +125,16 @@ def detail(): @manager.route('/list', methods=['GET']) @login_required def list_kbs(): + keywords = request.args.get("keywords", "") page_number = int(request.args.get("page", 1)) items_per_page = int(request.args.get("page_size", 150)) orderby = request.args.get("orderby", "create_time") desc = request.args.get("desc", True) try: tenants = TenantService.get_joined_tenants_by_user_id(current_user.id) - kbs = KnowledgebaseService.get_by_tenant_ids( - [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc) - return get_json_result(data=kbs) + kbs, total = KnowledgebaseService.get_by_tenant_ids( + [m["tenant_id"] for m in tenants], current_user.id, page_number, items_per_page, orderby, desc, keywords) + return get_json_result(data={"kbs": kbs, "total": total}) except Exception as e: return server_error_response(e) diff --git a/api/db/services/knowledgebase_service.py b/api/db/services/knowledgebase_service.py index 1b4c82cf178..47105e7499d 100644 --- a/api/db/services/knowledgebase_service.py +++ b/api/db/services/knowledgebase_service.py @@ -16,6 +16,7 @@ from api.db import StatusEnum, TenantPermission from api.db.db_models import Knowledgebase, DB, Tenant, User, UserTenant,Document from api.db.services.common_service import CommonService +from peewee import fn class KnowledgebaseService(CommonService): @@ -34,7 +35,7 @@ def list_documents_by_ids(cls,kb_ids): @classmethod @DB.connection_context() def get_by_tenant_ids(cls, joined_tenant_ids, user_id, - page_number, items_per_page, orderby, desc): + page_number, items_per_page, orderby, desc, keywords): fields = [ cls.model.id, cls.model.avatar, @@ -51,20 +52,31 @@ def get_by_tenant_ids(cls, joined_tenant_ids, user_id, User.avatar.alias('tenant_avatar'), cls.model.update_time ] - kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( - ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == - TenantPermission.TEAM.value)) | ( - cls.model.tenant_id == user_id)) - & (cls.model.status == StatusEnum.VALID.value) - ) + if keywords: + kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value), + (fn.LOWER(cls.model.name).contains(keywords.lower())) + ) + else: + kbs = cls.model.select(*fields).join(User, on=(cls.model.tenant_id == User.id)).where( + ((cls.model.tenant_id.in_(joined_tenant_ids) & (cls.model.permission == + TenantPermission.TEAM.value)) | ( + cls.model.tenant_id == user_id)) + & (cls.model.status == StatusEnum.VALID.value) + ) if desc: kbs = kbs.order_by(cls.model.getter_by(orderby).desc()) else: kbs = kbs.order_by(cls.model.getter_by(orderby).asc()) + count = kbs.count() + kbs = kbs.paginate(page_number, items_per_page) - return list(kbs.dicts()) + return list(kbs.dicts()), count @classmethod @DB.connection_context() diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index 52b2c2d1c4c..c78d8e0df07 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -13,7 +13,7 @@ def test_dataset(get_auth): while True: res = list_dataset(get_auth, page_number) data = res.get("data") - for item in data: + for item in data.get("kbs"): dataset_id = item.get("id") dataset_list.append(dataset_id) if len(dataset_list) < page_number * 150: @@ -42,7 +42,7 @@ def test_dataset_1k_dataset(get_auth): while True: res = list_dataset(get_auth, page_number) data = res.get("data") - for item in data: + for item in data.get("kbs"): dataset_id = item.get("id") dataset_list.append(dataset_id) if len(dataset_list) < page_number * 150: diff --git a/web/.umirc.ts b/web/.umirc.ts index 8de9ff1e152..b96d84c1bb9 100644 --- a/web/.umirc.ts +++ b/web/.umirc.ts @@ -34,7 +34,7 @@ export default defineConfig({ proxy: [ { context: ['/api', '/v1'], - target: 'http://127.0.0.1:9456/', + target: 'http://127.0.0.1:9380/', changeOrigin: true, ws: true, logger: console, diff --git a/web/package-lock.json b/web/package-lock.json index 779be63867c..65421945fba 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -57,6 +57,7 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", + "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", @@ -24705,6 +24706,25 @@ } } }, + "node_modules/react-infinite-scroll-component": { + "version": "6.1.0", + "resolved": "https://registry.npmmirror.com/react-infinite-scroll-component/-/react-infinite-scroll-component-6.1.0.tgz", + "integrity": "sha512-SQu5nCqy8DxQWpnUVLx7V7b7LcA37aM7tvoWjTLZp1dk6EJibM5/4EJKzOnl07/BsM1Y40sKLuqjCwwH/xV0TQ==", + "dependencies": { + "throttle-debounce": "^2.1.0" + }, + "peerDependencies": { + "react": ">=16.0.0" + } + }, + "node_modules/react-infinite-scroll-component/node_modules/throttle-debounce": { + "version": "2.3.0", + "resolved": "https://registry.npmmirror.com/throttle-debounce/-/throttle-debounce-2.3.0.tgz", + "integrity": "sha512-H7oLPV0P7+jgvrk+6mwwwBDmxTaxnu9HMXmloNLXwnNO0ZxZ31Orah2n8lU1eMPvsaowP2CX+USCgyovXfdOFQ==", + "engines": { + "node": ">=8" + } + }, "node_modules/react-is": { "version": "18.2.0", "resolved": "https://registry.npmmirror.com/react-is/-/react-is-18.2.0.tgz", diff --git a/web/package.json b/web/package.json index 061b17352fa..3a226d97f8a 100644 --- a/web/package.json +++ b/web/package.json @@ -68,6 +68,7 @@ "react-force-graph": "^1.44.4", "react-hook-form": "^7.53.1", "react-i18next": "^14.0.0", + "react-infinite-scroll-component": "^6.1.0", "react-markdown": "^9.0.1", "react-pdf-highlighter": "^6.1.0", "react-string-replace": "^1.1.1", diff --git a/web/src/hooks/knowledge-hooks.ts b/web/src/hooks/knowledge-hooks.ts index 0dc245c06d1..1e00e8c0c0f 100644 --- a/web/src/hooks/knowledge-hooks.ts +++ b/web/src/hooks/knowledge-hooks.ts @@ -3,14 +3,17 @@ import { IKnowledge, ITestingResult } from '@/interfaces/database/knowledge'; import i18n from '@/locales/config'; import kbService from '@/services/knowledge-service'; import { + useInfiniteQuery, useIsMutating, useMutation, useMutationState, useQuery, useQueryClient, } from '@tanstack/react-query'; +import { useDebounce } from 'ahooks'; import { message } from 'antd'; import { useSearchParams } from 'umi'; +import { useHandleSearchChange } from './logic-hooks'; import { useSetPaginationParams } from './route-hook'; export const useKnowledgeBaseId = (): string => { @@ -50,7 +53,7 @@ export const useNextFetchKnowledgeList = ( gcTime: 0, // https://tanstack.com/query/latest/docs/framework/react/guides/caching?from=reactQueryV3 queryFn: async () => { const { data } = await kbService.getList(); - const list = data?.data ?? []; + const list = data?.data?.kbs ?? []; return shouldFilterListWithoutDocument ? list.filter((x: IKnowledge) => x.chunk_num > 0) : list; @@ -60,6 +63,52 @@ export const useNextFetchKnowledgeList = ( return { list: data, loading }; }; +export const useInfiniteFetchKnowledgeList = () => { + const { searchString, handleInputChange } = useHandleSearchChange(); + const debouncedSearchString = useDebounce(searchString, { wait: 500 }); + + const PageSize = 30; + const { + data, + error, + fetchNextPage, + hasNextPage, + isFetching, + isFetchingNextPage, + status, + } = useInfiniteQuery({ + queryKey: ['infiniteFetchKnowledgeList', debouncedSearchString], + queryFn: async ({ pageParam }) => { + const { data } = await kbService.getList({ + page: pageParam, + page_size: PageSize, + keywords: debouncedSearchString, + }); + const list = data?.data ?? []; + return list; + }, + initialPageParam: 1, + getNextPageParam: (lastPage, pages, lastPageParam) => { + if (lastPageParam * PageSize <= lastPage.total) { + return lastPageParam + 1; + } + return undefined; + }, + }); + return { + data, + loading: isFetching, + error, + fetchNextPage, + hasNextPage, + isFetching, + isFetchingNextPage, + status, + handleInputChange, + searchString, + }; +}; + export const useCreateKnowledge = () => { const queryClient = useQueryClient(); const { @@ -95,7 +144,9 @@ export const useDeleteKnowledge = () => { const { data } = await kbService.rmKb({ kb_id: id }); if (data.code === 0) { message.success(i18n.t(`message.deleted`)); - queryClient.invalidateQueries({ queryKey: ['fetchKnowledgeList'] }); + queryClient.invalidateQueries({ + queryKey: ['infiniteFetchKnowledgeList'], + }); } return data?.data ?? []; }, diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 57253d33a31..59b8d0871f8 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: 'Please input name!', doc: 'Docs', searchKnowledgePlaceholder: 'Search', + noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: 'Dataset', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 9f92d9ffaa5..bd9602e97ee 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: '請輸入名稱', doc: '文件', searchKnowledgePlaceholder: '搜索', + noMoreData: 'It is all, nothing more', }, knowledgeDetails: { dataset: '數據集', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 59c0639ac75..6ffdf246464 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -75,6 +75,7 @@ export default { namePlaceholder: '请输入名称', doc: '文档', searchKnowledgePlaceholder: '搜索', + noMoreData: '沒有更多的數據了', }, knowledgeDetails: { dataset: '数据集', diff --git a/web/src/pages/knowledge/index.less b/web/src/pages/knowledge/index.less index 2479453fa1f..25bcb00deb1 100644 --- a/web/src/pages/knowledge/index.less +++ b/web/src/pages/knowledge/index.less @@ -2,6 +2,7 @@ .knowledge { padding: 48px 0; + overflow: auto; } .topWrapper { diff --git a/web/src/pages/knowledge/index.tsx b/web/src/pages/knowledge/index.tsx index 64c1d6c73ba..d41d6416d2e 100644 --- a/web/src/pages/knowledge/index.tsx +++ b/web/src/pages/knowledge/index.tsx @@ -1,18 +1,26 @@ -import { useNextFetchKnowledgeList } from '@/hooks/knowledge-hooks'; +import { useInfiniteFetchKnowledgeList } from '@/hooks/knowledge-hooks'; import { useFetchUserInfo } from '@/hooks/user-setting-hooks'; import { PlusOutlined, SearchOutlined } from '@ant-design/icons'; -import { Button, Empty, Flex, Input, Space, Spin } from 'antd'; +import { + Button, + Divider, + Empty, + Flex, + Input, + Skeleton, + Space, + Spin, +} from 'antd'; +import { useTranslation } from 'react-i18next'; +import InfiniteScroll from 'react-infinite-scroll-component'; +import { useSaveKnowledge } from './hooks'; import KnowledgeCard from './knowledge-card'; import KnowledgeCreatingModal from './knowledge-creating-modal'; -import { useTranslation } from 'react-i18next'; -import { useSaveKnowledge, useSearchKnowledge } from './hooks'; +import { useMemo } from 'react'; import styles from './index.less'; const KnowledgeList = () => { - const { searchString, handleInputChange } = useSearchKnowledge(); - const { loading, list: data } = useNextFetchKnowledgeList(); - const list = data.filter((x) => x.name.includes(searchString)); const { data: userInfo } = useFetchUserInfo(); const { t } = useTranslation('translation', { keyPrefix: 'knowledgeList' }); const { @@ -22,9 +30,23 @@ const KnowledgeList = () => { onCreateOk, loading: creatingLoading, } = useSaveKnowledge(); + const { + fetchNextPage, + data, + hasNextPage, + searchString, + handleInputChange, + loading, + } = useInfiniteFetchKnowledgeList(); + console.log('🚀 ~ KnowledgeList ~ data:', data); + const nextList = data?.pages?.flatMap((x) => x.kbs) ?? []; + + const total = useMemo(() => { + return data?.pages.at(-1).total ?? 0; + }, [data?.pages]); return ( - +
@@ -53,21 +75,30 @@ const KnowledgeList = () => {
- } + endMessage={total && {t('noMoreData')} 🤐} + scrollableTarget="scrollableDiv" > - {list.length > 0 ? ( - list.map((item: any) => { - return ( - - ); - }) - ) : ( - - )} - + + {nextList?.length > 0 ? ( + nextList.map((item: any) => { + return ( + + ); + }) + ) : ( + + )} + + Date: Thu, 28 Nov 2024 15:46:35 +0800 Subject: [PATCH 08/34] Add more web test cases (#3702) ### What problem does this PR solve? Test cases about dataset ### Type of change - [x] Other (please describe): test cases --------- Signed-off-by: jinhai --- api/apps/kb_app.py | 27 ++++-- api/constants.py | 2 + rag/utils/infinity_conn.py | 4 +- sdk/python/test/test_frontend_api/common.py | 8 ++ .../test/test_frontend_api/test_dataset.py | 84 +++++++++++++++++-- printEnvironment.sh => show_env.sh | 6 +- 6 files changed, 113 insertions(+), 18 deletions(-) rename printEnvironment.sh => show_env.sh (95%) diff --git a/api/apps/kb_app.py b/api/apps/kb_app.py index ff7f0ae135a..c8086aa4209 100644 --- a/api/apps/kb_app.py +++ b/api/apps/kb_app.py @@ -29,6 +29,7 @@ from api.utils.api_utils import get_json_result from api import settings from rag.nlp import search +from api.constants import DATASET_NAME_LIMIT @manager.route('/create', methods=['post']) @@ -36,10 +37,19 @@ @validate_request("name") def create(): req = request.json - req["name"] = req["name"].strip() - req["name"] = duplicate_name( + dataset_name = req["name"] + if not isinstance(dataset_name, str): + return get_data_error_result(message="Dataset name must be string.") + if dataset_name == "": + return get_data_error_result(message="Dataset name can't be empty.") + if len(dataset_name) >= DATASET_NAME_LIMIT: + return get_data_error_result( + message=f"Dataset name length is {len(dataset_name)} which is large than {DATASET_NAME_LIMIT}") + + dataset_name = dataset_name.strip() + dataset_name = duplicate_name( KnowledgebaseService.query, - name=req["name"], + name=dataset_name, tenant_id=current_user.id, status=StatusEnum.VALID.value) try: @@ -73,7 +83,8 @@ def update(): if not KnowledgebaseService.query( created_by=current_user.id, id=req["kb_id"]): return get_json_result( - data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR) + data=False, message='Only owner of knowledgebase authorized for this operation.', + code=settings.RetCode.OPERATING_ERROR) e, kb = KnowledgebaseService.get_by_id(req["kb_id"]) if not e: @@ -81,7 +92,8 @@ def update(): message="Can't find this knowledgebase!") if req["name"].lower() != kb.name.lower() \ - and len(KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1: + and len( + KnowledgebaseService.query(name=req["name"], tenant_id=current_user.id, status=StatusEnum.VALID.value)) > 1: return get_data_error_result( message="Duplicated knowledgebase name.") @@ -152,10 +164,11 @@ def rm(): ) try: kbs = KnowledgebaseService.query( - created_by=current_user.id, id=req["kb_id"]) + created_by=current_user.id, id=req["kb_id"]) if not kbs: return get_json_result( - data=False, message='Only owner of knowledgebase authorized for this operation.', code=settings.RetCode.OPERATING_ERROR) + data=False, message='Only owner of knowledgebase authorized for this operation.', + code=settings.RetCode.OPERATING_ERROR) for doc in DocumentService.query(kb_id=req["kb_id"]): if not DocumentService.remove_document(doc, kbs[0].tenant_id): diff --git a/api/constants.py b/api/constants.py index 8d72c7e85aa..e6a97e2c1b1 100644 --- a/api/constants.py +++ b/api/constants.py @@ -23,3 +23,5 @@ RAG_FLOW_SERVICE_NAME = "ragflow" REQUEST_WAIT_SEC = 2 REQUEST_MAX_WAIT_SEC = 300 + +DATASET_NAME_LIMIT = 128 diff --git a/rag/utils/infinity_conn.py b/rag/utils/infinity_conn.py index 699e279e3b2..6f508e8bdde 100644 --- a/rag/utils/infinity_conn.py +++ b/rag/utils/infinity_conn.py @@ -310,7 +310,9 @@ def get( table_name = f"{indexName}_{knowledgebaseId}" table_instance = db_instance.get_table(table_name) kb_res = table_instance.output(["*"]).filter(f"id = '{chunkId}'").to_pl() - df_list.append(kb_res) + if len(kb_res) != 0 and kb_res.shape[0] > 0: + df_list.append(kb_res) + self.connPool.release_conn(inf_conn) res = concat_dataframes(df_list, ["id"]) res_fields = self.getFields(res, res.columns) diff --git a/sdk/python/test/test_frontend_api/common.py b/sdk/python/test/test_frontend_api/common.py index 4e44812b256..aa6e258e089 100644 --- a/sdk/python/test/test_frontend_api/common.py +++ b/sdk/python/test/test_frontend_api/common.py @@ -3,6 +3,8 @@ HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380') +DATASET_NAME_LIMIT = 128 + def create_dataset(auth, dataset_name): authorization = {"Authorization": auth} url = f"{HOST_ADDRESS}/v1/kb/create" @@ -24,3 +26,9 @@ def rm_dataset(auth, dataset_id): json = {"kb_id": dataset_id} res = requests.post(url=url, headers=authorization, json=json) return res.json() + +def update_dataset(auth, json_req): + authorization = {"Authorization": auth} + url = f"{HOST_ADDRESS}/v1/kb/update" + res = requests.post(url=url, headers=authorization, json=json_req) + return res.json() diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index c78d8e0df07..c6e62fc2a7a 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -1,6 +1,8 @@ -from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset -import requests - +from common import HOST_ADDRESS, create_dataset, list_dataset, rm_dataset, update_dataset, DATASET_NAME_LIMIT +import re +import pytest +import random +import string def test_dataset(get_auth): # create dataset @@ -56,8 +58,76 @@ def test_dataset_1k_dataset(get_auth): assert res.get("code") == 0, f"{res.get('message')}" print(f"{len(dataset_list)} datasets are deleted") -# delete dataset -# create invalid name dataset +def test_duplicated_name_dataset(get_auth): + # create dataset + for i in range(20): + res = create_dataset(get_auth, "test_create_dataset") + assert res.get("code") == 0, f"{res.get('message')}" + + # list dataset + res = list_dataset(get_auth, 1) + data = res.get("data") + dataset_list = [] + pattern = r'^test_create_dataset.*' + for item in data: + dataset_name = item.get("name") + dataset_id = item.get("id") + dataset_list.append(dataset_id) + match = re.match(pattern, dataset_name) + assert match != None + + for dataset_id in dataset_list: + res = rm_dataset(get_auth, dataset_id) + assert res.get("code") == 0, f"{res.get('message')}" + print(f"{len(dataset_list)} datasets are deleted") + +def test_invalid_name_dataset(get_auth): + # create dataset + # with pytest.raises(Exception) as e: + res = create_dataset(get_auth, 0) + assert res['code'] == 102 + + res = create_dataset(get_auth, "") + assert res['code'] == 102 + + long_string = "" + + while len(long_string) <= DATASET_NAME_LIMIT: + long_string += random.choice(string.ascii_letters + string.digits) + + res = create_dataset(get_auth, long_string) + assert res['code'] == 102 + print(res) + +def test_update_different_params_dataset(get_auth): + # create dataset + res = create_dataset(get_auth, "test_create_dataset") + assert res.get("code") == 0, f"{res.get('message')}" + + # list dataset + page_number = 1 + dataset_list = [] + while True: + res = list_dataset(get_auth, page_number) + data = res.get("data") + for item in data: + dataset_id = item.get("id") + dataset_list.append(dataset_id) + if len(dataset_list) < page_number * 150: + break + page_number += 1 + + print(f"found {len(dataset_list)} datasets") + dataset_id = dataset_list[0] + + json_req = {"kb_id": dataset_id, "name": "test_update_dataset", "description": "test", "permission": "me", "parser_id": "presentation"} + res = update_dataset(get_auth, json_req) + assert res.get("code") == 0, f"{res.get('message')}" + + # delete dataset + for dataset_id in dataset_list: + res = rm_dataset(get_auth, dataset_id) + assert res.get("code") == 0, f"{res.get('message')}" + print(f"{len(dataset_list)} datasets are deleted") + # update dataset with different parameters -# create duplicated name dataset -# diff --git a/printEnvironment.sh b/show_env.sh similarity index 95% rename from printEnvironment.sh rename to show_env.sh index 28bf3db6f3d..83c47635cbf 100644 --- a/printEnvironment.sh +++ b/show_env.sh @@ -15,7 +15,7 @@ get_distro_info() { echo "$distro_id $distro_version (Kernel version: $kernel_version)" } -# get Git repo name +# get Git repository name git_repo_name='' if git rev-parse --is-inside-work-tree > /dev/null 2>&1; then git_repo_name=$(basename "$(git rev-parse --show-toplevel)") @@ -48,8 +48,8 @@ else python_version="Python not installed" fi -# Print all infomation -echo "Current Repo: $git_repo_name" +# Print all information +echo "Current Repository: $git_repo_name" # get Commit ID git_version=$(git log -1 --pretty=format:'%h') From 964a6f4ec4d9de17efffa1018daa02bd396b7f8d Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Thu, 28 Nov 2024 15:59:00 +0800 Subject: [PATCH 09/34] Added an infinity configuration file to easily customize the settings of Infinity (#3715) ### What problem does this PR solve? Added an infinity configuration file to easily customize the settings of Infinity ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- docker/docker-compose-base.yml | 2 ++ docker/infinity_conf.toml | 66 ++++++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 docker/infinity_conf.toml diff --git a/docker/docker-compose-base.yml b/docker/docker-compose-base.yml index 2a1130b2b35..2d482a3ed34 100644 --- a/docker/docker-compose-base.yml +++ b/docker/docker-compose-base.yml @@ -39,6 +39,8 @@ services: image: infiniflow/infinity:v0.5.0-dev5 volumes: - infinity_data:/var/infinity + - ./infinity_conf.toml:/infinity_conf.toml + command: ["-f", "/infinity_conf.toml"] ports: - ${INFINITY_THRIFT_PORT}:23817 - ${INFINITY_HTTP_PORT}:23820 diff --git a/docker/infinity_conf.toml b/docker/infinity_conf.toml new file mode 100644 index 00000000000..cd0a886ac5c --- /dev/null +++ b/docker/infinity_conf.toml @@ -0,0 +1,66 @@ +[general] +version = "0.5.0" +time_zone = "utc-8" + +[network] +server_address = "0.0.0.0" +postgres_port = 5432 +http_port = 23820 +client_port = 23817 +connection_pool_size = 128 + +[log] +log_filename = "infinity.log" +log_dir = "/var/infinity/log" +log_to_stdout = true +log_file_max_size = "100MB" +log_file_rotate_count = 10 + +# trace/debug/info/warning/error/critical 6 log levels, default: info +log_level = "info" + +[storage] +persistence_dir = "/var/infinity/persistence" +data_dir = "/var/infinity/data" +# periodically activates garbage collection: +# 0 means real-time, +# s means seconds, for example "60s", 60 seconds +# m means minutes, for example "60m", 60 minutes +# h means hours, for example "1h", 1 hour +optimize_interval = "10s" +cleanup_interval = "60s" +compact_interval = "120s" +storage_type = "local" + +# dump memory index entry when it reachs the capacity +mem_index_capacity = 65536 + +# S3 storage config example: +# [storage.object_storage] +# url = "127.0.0.1:9000" +# bucket_name = "infinity" +# access_key = "minioadmin" +# secret_key = "minioadmin" +# enable_https = false + +[buffer] +buffer_manager_size = "8GB" +lru_num = 7 +temp_dir = "/var/infinity/tmp" +result_cache = "off" +memindex_memory_quota = "4GB" + +[wal] +wal_dir = "/var/infinity/wal" +full_checkpoint_interval = "30s" +delta_checkpoint_interval = "5s" +# delta_checkpoint_threshold = 1000000000 +wal_compact_threshold = "1GB" + +# flush_at_once: write and flush log each commit +# only_write: write log, OS control when to flush the log, default +# flush_per_second: logs are written after each commit and flushed to disk per second. +wal_flush = "only_write" + +[resource] +resource_dir = "/var/infinity/resource" From cdae8d28fe20bc2431a4f31214311bc9ff1f1f61 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Thu, 28 Nov 2024 17:37:46 +0800 Subject: [PATCH 10/34] Fix test cases (#3718) ### What problem does this PR solve? Fix test cases ### Type of change - [x] Other (please describe): Fix error cases --------- Signed-off-by: jinhai --- README.md | 3 +-- README_id.md | 5 ++--- README_ja.md | 3 +-- README_ko.md | 3 +-- README_zh.md | 3 +-- sdk/python/test/conftest.py | 5 ++++- sdk/python/test/test_frontend_api/test_dataset.py | 12 ++++++------ sdk/python/test/test_sdk_api/t_chunk.py | 5 ++++- 8 files changed, 20 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 63160e255fc..1314ad7e80e 100644 --- a/README.md +++ b/README.md @@ -333,8 +333,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . cd web npm install --force ``` -7. Configure frontend to update `proxy.target` in **.umirc.ts** to `http://127.0.0.1:9380`: -8. Launch frontend service: +7. Launch frontend service: ```bash npm run dev ``` diff --git a/README_id.md b/README_id.md index 0b739d42548..a11f51a481e 100644 --- a/README_id.md +++ b/README_id.md @@ -307,9 +307,8 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . ```bash cd web npm install --force - ``` -7. Konfigurasikan frontend untuk memperbarui `proxy.target` di **.umirc.ts** menjadi `http://127.0.0.1:9380`: -8. Jalankan aplikasi frontend: + ``` +7. Jalankan aplikasi frontend: ```bash npm run dev ``` diff --git a/README_ja.md b/README_ja.md index 28f8ccf2c57..1d2afc233a4 100644 --- a/README_ja.md +++ b/README_ja.md @@ -289,8 +289,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . cd web npm install --force ``` -7. フロントエンドを設定し、**.umirc.ts** の `proxy.target` を `http://127.0.0.1:9380` に更新します: -8. フロントエンドサービスを起動する: +7. フロントエンドサービスを起動する: ```bash npm run dev ``` diff --git a/README_ko.md b/README_ko.md index f558f4c75d8..923b10362a0 100644 --- a/README_ko.md +++ b/README_ko.md @@ -291,8 +291,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . cd web npm install --force ``` -7. **.umirc.ts** 에서 `proxy.target` 을 `http://127.0.0.1:9380` 으로 업데이트합니다: -8. 프론트엔드 서비스를 시작합니다: +7. 프론트엔드 서비스를 시작합니다: ```bash npm run dev ``` diff --git a/README_zh.md b/README_zh.md index e06e7feae6f..f2710d81e55 100644 --- a/README_zh.md +++ b/README_zh.md @@ -296,8 +296,7 @@ docker build -f Dockerfile -t infiniflow/ragflow:dev . cd web npm install --force ``` -7. 配置前端,将 **.umirc.ts** 的 `proxy.target` 更新为 `http://127.0.0.1:9380`: -8. 启动前端服务: +7. 启动前端服务: ```bash npm run dev ``` diff --git a/sdk/python/test/conftest.py b/sdk/python/test/conftest.py index 511252271ab..cd4df05f383 100644 --- a/sdk/python/test/conftest.py +++ b/sdk/python/test/conftest.py @@ -40,7 +40,10 @@ def login(): @pytest.fixture(scope="session") def get_api_key_fixture(): - register() + try: + register() + except Exception as e: + print(e) auth = login() url = HOST_ADDRESS + "/v1/system/new_token" auth = {"Authorization": auth} diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index c6e62fc2a7a..f9421d2beac 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -14,8 +14,8 @@ def test_dataset(get_auth): dataset_list = [] while True: res = list_dataset(get_auth, page_number) - data = res.get("data") - for item in data.get("kbs"): + data = res.get("data").get("kbs") + for item in data: dataset_id = item.get("id") dataset_list.append(dataset_id) if len(dataset_list) < page_number * 150: @@ -43,8 +43,8 @@ def test_dataset_1k_dataset(get_auth): dataset_list = [] while True: res = list_dataset(get_auth, page_number) - data = res.get("data") - for item in data.get("kbs"): + data = res.get("data").get("kbs") + for item in data: dataset_id = item.get("id") dataset_list.append(dataset_id) if len(dataset_list) < page_number * 150: @@ -66,7 +66,7 @@ def test_duplicated_name_dataset(get_auth): # list dataset res = list_dataset(get_auth, 1) - data = res.get("data") + data = res.get("data").get("kbs") dataset_list = [] pattern = r'^test_create_dataset.*' for item in data: @@ -109,7 +109,7 @@ def test_update_different_params_dataset(get_auth): dataset_list = [] while True: res = list_dataset(get_auth, page_number) - data = res.get("data") + data = res.get("data").get("kbs") for item in data: dataset_id = item.get("id") dataset_list.append(dataset_id) diff --git a/sdk/python/test/test_sdk_api/t_chunk.py b/sdk/python/test/test_sdk_api/t_chunk.py index 13b4c06d781..cfbfe65b07a 100644 --- a/sdk/python/test/test_sdk_api/t_chunk.py +++ b/sdk/python/test/test_sdk_api/t_chunk.py @@ -190,4 +190,7 @@ def test_retrieve_chunks(get_api_key_fixture): docs = ds.upload_documents(documents) doc = docs[0] doc.add_chunk(content="This is a chunk addition test") - rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id]) \ No newline at end of file + rag.retrieve(dataset_ids=[ds.id],document_ids=[doc.id]) + rag.delete_datasets(ids=[ds.id]) + +# test different parameters for the retrieval From 4e8e4fe53fd04d6f7a9d35c9fd6d011eabd952f9 Mon Sep 17 00:00:00 2001 From: balibabu Date: Thu, 28 Nov 2024 18:44:36 +0800 Subject: [PATCH 11/34] Feat: Add Dataset page #3221 (#3721) ### What problem does this PR solve? Feat: Add Dataset page #3221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- web/package-lock.json | 32 +++ web/package.json | 1 + web/src/components/list-filter-bar.tsx | 25 ++ web/src/interfaces/database/document.ts | 10 +- .../pages/dataset/dataset/dataset-table.tsx | 268 ++++++++++++++++++ web/src/pages/dataset/dataset/index.tsx | 14 +- web/src/pages/dataset/index.tsx | 2 +- web/src/pages/datasets/index.tsx | 24 +- 8 files changed, 351 insertions(+), 25 deletions(-) create mode 100644 web/src/components/list-filter-bar.tsx create mode 100644 web/src/pages/dataset/dataset/dataset-table.tsx diff --git a/web/package-lock.json b/web/package-lock.json index 65421945fba..ed605f52f5b 100644 --- a/web/package-lock.json +++ b/web/package-lock.json @@ -30,6 +30,7 @@ "@tailwindcss/line-clamp": "^0.4.4", "@tanstack/react-query": "^5.40.0", "@tanstack/react-query-devtools": "^5.51.5", + "@tanstack/react-table": "^8.20.5", "@uiw/react-markdown-preview": "^5.1.3", "ahooks": "^3.7.10", "antd": "^5.12.7", @@ -5609,6 +5610,37 @@ "url": "https://github.com/sponsors/tannerlinsley" } }, + "node_modules/@tanstack/react-table": { + "version": "8.20.5", + "resolved": "https://registry.npmmirror.com/@tanstack/react-table/-/react-table-8.20.5.tgz", + "integrity": "sha512-WEHopKw3znbUZ61s9i0+i9g8drmDo6asTWbrQh8Us63DAk/M0FkmIqERew6P71HI75ksZ2Pxyuf4vvKh9rAkiA==", + "dependencies": { + "@tanstack/table-core": "8.20.5" + }, + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + }, + "peerDependencies": { + "react": ">=16.8", + "react-dom": ">=16.8" + } + }, + "node_modules/@tanstack/table-core": { + "version": "8.20.5", + "resolved": "https://registry.npmmirror.com/@tanstack/table-core/-/table-core-8.20.5.tgz", + "integrity": "sha512-P9dF7XbibHph2PFRz8gfBKEXEY/HJPOhym8CHmjF8y3q5mWpKx9xtZapXQUWCgkqvsK0R46Azuz+VaxD4Xl+Tg==", + "engines": { + "node": ">=12" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/tannerlinsley" + } + }, "node_modules/@testing-library/dom": { "version": "10.1.0", "resolved": "https://registry.npmmirror.com/@testing-library/dom/-/dom-10.1.0.tgz", diff --git a/web/package.json b/web/package.json index 3a226d97f8a..74932c24004 100644 --- a/web/package.json +++ b/web/package.json @@ -41,6 +41,7 @@ "@tailwindcss/line-clamp": "^0.4.4", "@tanstack/react-query": "^5.40.0", "@tanstack/react-query-devtools": "^5.51.5", + "@tanstack/react-table": "^8.20.5", "@uiw/react-markdown-preview": "^5.1.3", "ahooks": "^3.7.10", "antd": "^5.12.7", diff --git a/web/src/components/list-filter-bar.tsx b/web/src/components/list-filter-bar.tsx new file mode 100644 index 00000000000..a6949098c24 --- /dev/null +++ b/web/src/components/list-filter-bar.tsx @@ -0,0 +1,25 @@ +import { Filter, Search } from 'lucide-react'; +import { PropsWithChildren } from 'react'; +import { Button } from './ui/button'; + +interface IProps { + title: string; +} + +export default function ListFilterBar({ + title, + children, +}: PropsWithChildren) { + return ( +
+ {title} +
+ + + +
+
+ ); +} diff --git a/web/src/interfaces/database/document.ts b/web/src/interfaces/database/document.ts index 7b1bce95ec2..81f3968defa 100644 --- a/web/src/interfaces/database/document.ts +++ b/web/src/interfaces/database/document.ts @@ -11,7 +11,7 @@ export interface IDocumentInfo { name: string; parser_config: IParserConfig; parser_id: string; - process_begin_at: null; + process_begin_at?: string; process_duation: number; progress: number; progress_msg: string; @@ -27,11 +27,11 @@ export interface IDocumentInfo { } export interface IParserConfig { - delimiter: string; - html4excel: boolean; - layout_recognize: boolean; + delimiter?: string; + html4excel?: boolean; + layout_recognize?: boolean; pages: any[]; - raptor: Raptor; + raptor?: Raptor; } interface Raptor { diff --git a/web/src/pages/dataset/dataset/dataset-table.tsx b/web/src/pages/dataset/dataset/dataset-table.tsx new file mode 100644 index 00000000000..bc9dd95010e --- /dev/null +++ b/web/src/pages/dataset/dataset/dataset-table.tsx @@ -0,0 +1,268 @@ +'use client'; + +import { + ColumnDef, + ColumnFiltersState, + SortingState, + VisibilityState, + flexRender, + getCoreRowModel, + getFilteredRowModel, + getPaginationRowModel, + getSortedRowModel, + useReactTable, +} from '@tanstack/react-table'; +import { ArrowUpDown, MoreHorizontal } from 'lucide-react'; +import * as React from 'react'; + +import { Button } from '@/components/ui/button'; +import { Checkbox } from '@/components/ui/checkbox'; +import { + DropdownMenu, + DropdownMenuContent, + DropdownMenuItem, + DropdownMenuLabel, + DropdownMenuSeparator, + DropdownMenuTrigger, +} from '@/components/ui/dropdown-menu'; +import { + Table, + TableBody, + TableCell, + TableHead, + TableHeader, + TableRow, +} from '@/components/ui/table'; +import { RunningStatus } from '@/constants/knowledge'; +import { IDocumentInfo } from '@/interfaces/database/document'; + +const data: IDocumentInfo[] = [ + { + chunk_num: 1, + create_date: 'Thu, 28 Nov 2024 17:10:22 GMT', + create_time: 1732785022792, + created_by: 'b0975cb4bc3111ee9b830aef05f5e94f', + id: '990cb30ead6811efb9b9fa163e197198', + kb_id: '25a8cfbe9cd411efbc12fa163e197198', + location: 'mian.jpg', + name: 'mian.jpg', + parser_config: { + pages: [[1, 1000000]], + }, + parser_id: 'picture', + process_begin_at: 'Thu, 28 Nov 2024 17:10:25 GMT', + process_duation: 8.46185, + progress: 1, + progress_msg: + '\nTask has been received.\nPage(1~100000001): Finish OCR: (用小麦粉\n金\nONGXI ...)\nPage(1~100000001): OCR results is too long to use CV LLM.\nPage(1~100000001): Finished slicing files (1 chunks in 0.34s). Start to embedding the content.\nPage(1~100000001): Finished embedding (in 0.35s)! Start to build index!\nPage(1~100000001): Indexing elapsed in 0.02s.\nPage(1~100000001): Done!', + run: RunningStatus.RUNNING, + size: 19692, + source_type: 'local', + status: '1', + thumbnail: + '/v1/document/image/25a8cfbe9cd411efbc12fa163e197198-thumbnail_990cb30ead6811efb9b9fa163e197198.png', + token_num: 115, + type: 'visual', + update_date: 'Thu, 28 Nov 2024 17:10:33 GMT', + update_time: 1732785033462, + }, +]; + +export const columns: ColumnDef[] = [ + { + id: 'select', + header: ({ table }) => ( + table.toggleAllPageRowsSelected(!!value)} + aria-label="Select all" + /> + ), + cell: ({ row }) => ( + row.toggleSelected(!!value)} + aria-label="Select row" + /> + ), + enableSorting: false, + enableHiding: false, + }, + { + accessorKey: 'status', + header: 'Status', + cell: ({ row }) => ( +
{row.getValue('status')}
+ ), + }, + { + accessorKey: 'email', + header: ({ column }) => { + return ( + + ); + }, + cell: ({ row }) =>
{row.getValue('email')}
, + }, + { + accessorKey: 'amount', + header: () =>
Amount
, + cell: ({ row }) => { + const amount = parseFloat(row.getValue('amount')); + + // Format the amount as a dollar amount + const formatted = new Intl.NumberFormat('en-US', { + style: 'currency', + currency: 'USD', + }).format(amount); + + return
{formatted}
; + }, + }, + { + id: 'actions', + enableHiding: false, + cell: ({ row }) => { + const payment = row.original; + + return ( + + + + + + Actions + navigator.clipboard.writeText(payment.id)} + > + Copy payment ID + + + View customer + View payment details + + + ); + }, + }, +]; + +export function DatasetTable() { + const [sorting, setSorting] = React.useState([]); + const [columnFilters, setColumnFilters] = React.useState( + [], + ); + const [columnVisibility, setColumnVisibility] = + React.useState({}); + const [rowSelection, setRowSelection] = React.useState({}); + + const table = useReactTable({ + data, + columns, + onSortingChange: setSorting, + onColumnFiltersChange: setColumnFilters, + getCoreRowModel: getCoreRowModel(), + getPaginationRowModel: getPaginationRowModel(), + getSortedRowModel: getSortedRowModel(), + getFilteredRowModel: getFilteredRowModel(), + onColumnVisibilityChange: setColumnVisibility, + onRowSelectionChange: setRowSelection, + state: { + sorting, + columnFilters, + columnVisibility, + rowSelection, + }, + }); + + return ( +
+
+ + + {table.getHeaderGroups().map((headerGroup) => ( + + {headerGroup.headers.map((header) => { + return ( + + {header.isPlaceholder + ? null + : flexRender( + header.column.columnDef.header, + header.getContext(), + )} + + ); + })} + + ))} + + + {table.getRowModel().rows?.length ? ( + table.getRowModel().rows.map((row) => ( + + {row.getVisibleCells().map((cell) => ( + + {flexRender( + cell.column.columnDef.cell, + cell.getContext(), + )} + + ))} + + )) + ) : ( + + + No results. + + + )} + +
+
+
+
+ {table.getFilteredSelectedRowModel().rows.length} of{' '} + {table.getFilteredRowModel().rows.length} row(s) selected. +
+
+ + +
+
+
+ ); +} diff --git a/web/src/pages/dataset/dataset/index.tsx b/web/src/pages/dataset/dataset/index.tsx index 914ef81d362..d9d4e356b89 100644 --- a/web/src/pages/dataset/dataset/index.tsx +++ b/web/src/pages/dataset/dataset/index.tsx @@ -1,3 +1,15 @@ +import ListFilterBar from '@/components/list-filter-bar'; +import { Upload } from 'lucide-react'; +import { DatasetTable } from './dataset-table'; + export default function Dataset() { - return
Outset
; + return ( +
+ + + Upload file + + +
+ ); } diff --git a/web/src/pages/dataset/index.tsx b/web/src/pages/dataset/index.tsx index bd4ab0cc98d..7090b049b2e 100644 --- a/web/src/pages/dataset/index.tsx +++ b/web/src/pages/dataset/index.tsx @@ -5,7 +5,7 @@ export default function DatasetWrapper() { return (
-
+
diff --git a/web/src/pages/datasets/index.tsx b/web/src/pages/datasets/index.tsx index 558243aeaca..2e6d188f8da 100644 --- a/web/src/pages/datasets/index.tsx +++ b/web/src/pages/datasets/index.tsx @@ -1,12 +1,7 @@ +import ListFilterBar from '@/components/list-filter-bar'; import { Button } from '@/components/ui/button'; import { Card, CardContent } from '@/components/ui/card'; -import { - ChevronRight, - Filter, - MoreHorizontal, - Plus, - Search, -} from 'lucide-react'; +import { ChevronRight, MoreHorizontal, Plus } from 'lucide-react'; const datasets = [ { @@ -86,17 +81,10 @@ const datasets = [ export default function Datasets() { return (
-
- Datasets -
- - - -
-
+ + + Create dataset +
{datasets.map((dataset) => ( Date: Thu, 28 Nov 2024 18:56:10 +0800 Subject: [PATCH 12/34] Fix error response (#3719) ### What problem does this PR solve? ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Jin Hai --- api/apps/chunk_app.py | 2 +- rag/llm/rerank_model.py | 2 ++ sdk/python/test/test_frontend_api/test_dataset.py | 4 ++++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/api/apps/chunk_app.py b/api/apps/chunk_app.py index 0863df133c6..1d21bc103e9 100644 --- a/api/apps/chunk_app.py +++ b/api/apps/chunk_app.py @@ -96,7 +96,7 @@ def get(): kb_ids = KnowledgebaseService.get_kb_ids(tenant_id) chunk = settings.docStoreConn.get(chunk_id, search.index_name(tenant_id), kb_ids) if chunk is None: - return server_error_response("Chunk not found") + return server_error_response(Exception("Chunk not found")) k = [] for n in chunk.keys(): if re.search(r"(_vec$|_sm_|_tks|_ltks)", n): diff --git a/rag/llm/rerank_model.py b/rag/llm/rerank_model.py index e2eb3a93db2..28420daab8a 100644 --- a/rag/llm/rerank_model.py +++ b/rag/llm/rerank_model.py @@ -158,6 +158,8 @@ class XInferenceRerank(Base): def __init__(self, key="xxxxxxx", model_name="", base_url=""): if base_url.find("/v1") == -1: base_url = urljoin(base_url, "/v1/rerank") + if base_url.find("/rerank") == -1: + base_url = urljoin(base_url, "/v1/rerank") self.model_name = model_name self.base_url = base_url self.headers = { diff --git a/sdk/python/test/test_frontend_api/test_dataset.py b/sdk/python/test/test_frontend_api/test_dataset.py index f9421d2beac..d4e69c7aa0c 100644 --- a/sdk/python/test/test_frontend_api/test_dataset.py +++ b/sdk/python/test/test_frontend_api/test_dataset.py @@ -4,6 +4,7 @@ import random import string + def test_dataset(get_auth): # create dataset res = create_dataset(get_auth, "test_create_dataset") @@ -58,6 +59,7 @@ def test_dataset_1k_dataset(get_auth): assert res.get("code") == 0, f"{res.get('message')}" print(f"{len(dataset_list)} datasets are deleted") + def test_duplicated_name_dataset(get_auth): # create dataset for i in range(20): @@ -81,6 +83,7 @@ def test_duplicated_name_dataset(get_auth): assert res.get("code") == 0, f"{res.get('message')}" print(f"{len(dataset_list)} datasets are deleted") + def test_invalid_name_dataset(get_auth): # create dataset # with pytest.raises(Exception) as e: @@ -99,6 +102,7 @@ def test_invalid_name_dataset(get_auth): assert res['code'] == 102 print(res) + def test_update_different_params_dataset(get_auth): # create dataset res = create_dataset(get_auth, "test_create_dataset") From 112ef42a1933a11b3a9b57433d6175e05f7a544a Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Thu, 28 Nov 2024 19:15:31 +0800 Subject: [PATCH 13/34] Ensure thumbnail be smaller than 64K (#3722) ### What problem does this PR solve? Ensure thumbnail be smaller than 64K. Close #1443 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu --- api/utils/file_utils.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/api/utils/file_utils.py b/api/utils/file_utils.py index 5ab61b63bc4..ed014ac0e52 100644 --- a/api/utils/file_utils.py +++ b/api/utils/file_utils.py @@ -170,29 +170,52 @@ def filename_type(filename): return FileType.OTHER.value def thumbnail_img(filename, blob): + """ + MySQL LongText max length is 65535 + """ filename = filename.lower() if re.match(r".*\.pdf$", filename): pdf = pdfplumber.open(BytesIO(blob)) buffered = BytesIO() - pdf.pages[0].to_image(resolution=32).annotated.save(buffered, format="png") - return buffered.getvalue() + resolution = 32 + img = None + for _ in range(10): + # https://github.com/jsvine/pdfplumber?tab=readme-ov-file#creating-a-pageimage-with-to_image + pdf.pages[0].to_image(resolution=resolution).annotated.save(buffered, format="png") + img = buffered.getvalue() + if len(img) >= 64000 and resolution >= 2: + resolution = resolution / 2 + buffered = BytesIO() + else: + break + return img - if re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): + elif re.match(r".*\.(jpg|jpeg|png|tif|gif|icon|ico|webp)$", filename): image = Image.open(BytesIO(blob)) image.thumbnail((30, 30)) buffered = BytesIO() image.save(buffered, format="png") return buffered.getvalue() - if re.match(r".*\.(ppt|pptx)$", filename): + elif re.match(r".*\.(ppt|pptx)$", filename): import aspose.slides as slides import aspose.pydrawing as drawing try: with slides.Presentation(BytesIO(blob)) as presentation: buffered = BytesIO() - presentation.slides[0].get_thumbnail(0.03, 0.03).save( - buffered, drawing.imaging.ImageFormat.png) - return buffered.getvalue() + scale = 0.03 + img = None + for _ in range(10): + # https://reference.aspose.com/slides/python-net/aspose.slides/slide/get_thumbnail/#float-float + presentation.slides[0].get_thumbnail(scale, scale).save( + buffered, drawing.imaging.ImageFormat.png) + img = buffered.getvalue() + if len(img) >= 64000: + scale = scale / 2.0 + buffered = BytesIO() + else: + break + return img except Exception: pass return None From 966bcda6b99b9aaee4a11eb397a1d7efdea3abe8 Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Thu, 28 Nov 2024 19:32:50 +0800 Subject: [PATCH 14/34] Updated descriptions for the Agent components (#3728) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- agent/templates/interpreter.json | 4 ++-- agent/templates/seo_blog.json | 2 +- docs/release_notes.md | 2 +- web/src/locales/en.ts | 18 +++++++++--------- web/src/pages/flow/constant.tsx | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) diff --git a/agent/templates/interpreter.json b/agent/templates/interpreter.json index 7ea83d2dc47..edb3b8be5ef 100644 --- a/agent/templates/interpreter.json +++ b/agent/templates/interpreter.json @@ -1,7 +1,7 @@ { "id": 4, "title": "Interpreter", - "description": "Translation Agent: Agentic translation using reflection workflow.\n\nThis is inspired by Andrew NG's project: https://github.com/andrewyng/translation-agent\n\n", + "description": "A translation agent based on a reflection agentic workflow, inspired by Andrew Ng's project: https://github.com/andrewyng/translation-agent\n\n1. Prompt an LLM to translate a text into the target language.\n2. Have the LLM reflect on the translation and provide constructive suggestions for improvement.\n3. Use these suggestions to improve the translation.", "canvas_type": "chatbot", "dsl": { "answer": [], @@ -473,7 +473,7 @@ { "data": { "form": { - "text": "Translation Agent: Agentic translation using reflection workflow\n\nThis is inspired by Andrew NG's project: https://github.com/andrewyng/translation-agent\n\n1. Prompt an LLM to translate a text to target_language;\n2. Have the LLM reflect on the translation to come up with constructive suggestions for improving it;\n3. Use the suggestions to improve the translation." + "text": "Translation Agent: Agentic translation using reflection workflow\n\nThis is inspired by Andrew NG's project: https://github.com/andrewyng/translation-agent\n\n1. Prompt an LLM to translate a text into the target language;\n2. Have the LLM reflect on the translation and provide constructive suggestions for improvement;\n3. Use these suggestions to improve the translation." }, "label": "Note", "name": "Breif" diff --git a/agent/templates/seo_blog.json b/agent/templates/seo_blog.json index f5995f225c7..1be6d6729b5 100644 --- a/agent/templates/seo_blog.json +++ b/agent/templates/seo_blog.json @@ -1,7 +1,7 @@ { "id": 9, "title": "SEO Blog Generator", - "description": "A generator for Blogs with SEO just by giving title or keywords.", + "description": "A blog generator that creates SEO-optimized content based on your chosen title or keywords.", "canvas_type": "chatbot", "dsl": { "answer": [], diff --git a/docs/release_notes.md b/docs/release_notes.md index a05515c7257..61435818c7c 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -94,7 +94,7 @@ Released on September 30, 2024. - Improves the results of multi-round dialogues. - Enables users to remove added LLM vendors. - Adds support for OpenTTS and SparkTTS models. -- Implements an **Excel to HTML** toggle in the **General** chunk method, allowing users to parse an spreadsheet into either an HTML table or key-value pairs by row. +- Implements an **Excel to HTML** toggle in the **General** chunk method, allowing users to parse a spreadsheet into either HTML tables or key-value pairs by row. - Adds agent tools **YahooFance** and **Jin10**. - Adds a template for an investment advisor agent. diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index 59b8d0871f8..aa4b801e976 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -158,7 +158,7 @@ export default { topKTip: `K chunks will be fed into rerank models.`, delimiter: `Delimiter`, html4excel: 'Excel to HTML', - html4excelTip: `Excel will be parsed into HTML table or not. If it's FALSE, every row in Excel will be formed as a chunk.`, + html4excelTip: `When enabled, the spreadsheet will be parsed into HTML tables; otherwise, it will be parsed into key-value pairs by row.`, autoKeywords: 'Auto-keyword', autoKeywordsTip: `Extract N keywords for each chunk to increase their ranking for queries containing those keywords. You can check or update the added keywords for a chunk from the chunk list. Be aware that extra tokens will be consumed by the LLM specified in 'System model settings'.`, autoQuestions: 'Auto-question', @@ -694,7 +694,7 @@ The above is the content you need to summarize.`, "A component that sends out a static message. If multiple messages are supplied, it randomly selects one to send. Ensure its downstream is 'Answer', the interface component.", keywordDescription: `A component that retrieves top N search results from user's input. Ensure the TopN value is set properly before use.`, switchDescription: `A component that evaluates conditions based on the output of previous components and directs the flow of execution accordingly. It allows for complex branching logic by defining cases and specifying actions for each case or default action if no conditions are met.`, - wikipediaDescription: `This component is used to get search result from wikipedia.org. Typically, it performs as a supplement to knowledgebases. Top N specifies the number of search results you need to adapt.`, + wikipediaDescription: `A component that searches from wikipedia.org, using TopN to specify the number of search results. It supplements the existing knowledge bases.`, promptText: `Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following: {input} The above is the content you need to summarize.`, @@ -718,10 +718,10 @@ The above is the content you need to summarize.`, keywordExtract: 'Keyword', keywordExtractDescription: `A component that extracts keywords from a user query, with Top N specifing the number of keywords to extract.`, baidu: 'Baidu', - baiduDescription: `This component is used to get search result from www.baidu.com. Typically, it performs as a supplement to knowledgebases. Top N specifies the number of search results you need to adapt.`, + baiduDescription: `A component that searches from baidu.com, using TopN to specify the number of search results. It supplements the existing knowledge bases.`, duckDuckGo: 'DuckDuckGo', duckDuckGoDescription: - 'A component that retrieves search results from duckduckgo.com, with TopN specifying the number of search results. It supplements existing knowledge bases.', + 'A component that searches from duckduckgo.com, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases.', channel: 'Channel', channelTip: `Perform text search or news search on the component's input`, text: 'Text', @@ -732,23 +732,23 @@ The above is the content you need to summarize.`, wikipedia: 'Wikipedia', pubMed: 'PubMed', pubMedDescription: - 'This component is used to get search result from https://pubmed.ncbi.nlm.nih.gov/. Typically, it performs as a supplement to knowledgebases. Top N specifies the number of search results you need to adapt. E-mail is a required field.', + 'A component that searches from https://pubmed.ncbi.nlm.nih.gov/, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases.', email: 'Email', emailTip: - 'This component is used to get search result from https://pubmed.ncbi.nlm.nih.gov/. Typically, it performs as a supplement to knowledgebases. Top N specifies the number of search results you need to adapt. E-mail is a required field.', + 'E-mail is a required field. You must input an E-mail address here.', arXiv: 'ArXiv', arXivDescription: - 'This component is used to get search result from https://arxiv.org/. Typically, it performs as a supplement to knowledgebases. Top N specifies the number of search results you need to adapt.', + 'A component that searches from https://arxiv.org/, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases.', sortBy: 'Sort by', submittedDate: 'Submitted date', lastUpdatedDate: 'Last updated date', relevance: 'Relevance', google: 'Google', googleDescription: - 'This component is used to get search result fromhttps://www.google.com/ . Typically, it performs as a supplement to knowledgebases. Top N and SerpApi API key specifies the number of search results you need to adapt.', + 'A component that searches from https://www.google.com/, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases. Please note that this requires an API key from serpapi.com.', bing: 'Bing', bingDescription: - 'This component is used to get search result from https://www.bing.com/. Typically, it performs as a supplement to knowledgebases. Top N and Bing Subscription-Key specifies the number of search results you need to adapt.', + 'A component that searches from https://www.bing.com/, allowing you to specify the number of search results using TopN. It supplements the existing knowledge bases. Please note that this requires an API key from microsoft.com.', apiKey: 'API KEY', country: 'Country&Region', language: 'Language', diff --git a/web/src/pages/flow/constant.tsx b/web/src/pages/flow/constant.tsx index e06398f63e7..5571bfc705e 100644 --- a/web/src/pages/flow/constant.tsx +++ b/web/src/pages/flow/constant.tsx @@ -455,7 +455,7 @@ export const initialArXivValues = { export const initialGoogleValues = { top_n: 10, - api_key: 'Xxx(get from https://serpapi.com/manage-api-key)', + api_key: 'YOUR_API_KEY (obtained from https://serpapi.com/manage-api-key)', country: 'cn', language: 'en', ...initialQueryBaseValues, @@ -465,7 +465,7 @@ export const initialBingValues = { top_n: 10, channel: 'Webpages', api_key: - '"YOUR_ACCESS_KEY"(get from https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)', + 'YOUR_API_KEY (obtained from https://www.microsoft.com/en-us/bing/apis/bing-web-search-api)', country: 'CH', language: 'en', ...initialQueryBaseValues, From 80af3cc2d4499bc1f924b88470544c3384c238f9 Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Thu, 28 Nov 2024 19:37:01 +0800 Subject: [PATCH 15/34] Don't log exception if object doesn't exist (#3724) ### What problem does this PR solve? Don't log exception if object doesn't exist. Close #1483 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/utils/minio_conn.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rag/utils/minio_conn.py b/rag/utils/minio_conn.py index 11682988e35..4662c7c8340 100644 --- a/rag/utils/minio_conn.py +++ b/rag/utils/minio_conn.py @@ -1,6 +1,7 @@ import logging import time from minio import Minio +from minio.error import S3Error from io import BytesIO from rag import settings from rag.utils import singleton @@ -84,8 +85,11 @@ def obj_exist(self, bucket, filename): return True else: return False + except S3Error as e: + if e.code in ["NoSuchKey", "NoSuchBucket", "ResourceNotFound"]: + return False except Exception: - logging.exception(f"Not found: {bucket}/{filename}") + logging.exception(f"obj_exist {bucket}/{filename} got exception") return False def get_presigned_url(self, bucket, fnm, expires): From a3e0ac9c0b1180c22a9b3c687453abae2a47ff04 Mon Sep 17 00:00:00 2001 From: balibabu Date: Thu, 28 Nov 2024 20:08:06 +0800 Subject: [PATCH 16/34] Fix: Clicking the checkbox of the pop-up window for editing chunk is invalid #3726 (#3727) ### What problem does this PR solve? Fix: Clicking the checkbox of the pop-up window for editing chunk is invalid #3726 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- web/src/locales/zh-traditional.ts | 2 +- web/src/locales/zh.ts | 2 +- .../components/chunk-creating-modal/index.tsx | 21 +++++++++++++------ .../components/knowledge-chunk/hooks.ts | 11 +++++++++- 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index bd9602e97ee..518ba559bfc 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -296,7 +296,7 @@ export default { search: '搜尋', all: '所有', enabled: '啟用', - disabled: '禁用的', + disabled: '禁用', keyword: '關鍵詞', function: '函數', chunkMessage: '請輸入值!', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 6ffdf246464..86040a8a304 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -313,7 +313,7 @@ export default { search: '搜索', all: '所有', enabled: '启用', - disabled: '禁用的', + disabled: '禁用', keyword: '关键词', function: '函数', chunkMessage: '请输入值!', diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx index 7540c95545f..791d14ee4e1 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/components/chunk-creating-modal/index.tsx @@ -2,7 +2,7 @@ import EditTag from '@/components/edit-tag'; import { useFetchChunk } from '@/hooks/chunk-hooks'; import { IModalProps } from '@/interfaces/common'; import { DeleteOutlined } from '@ant-design/icons'; -import { Checkbox, Divider, Form, Input, Modal, Space } from 'antd'; +import { Divider, Form, Input, Modal, Space, Switch } from 'antd'; import React, { useEffect, useState } from 'react'; import { useTranslation } from 'react-i18next'; import { useDeleteChunkByIds } from '../../hooks'; @@ -31,9 +31,14 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ useEffect(() => { if (data?.code === 0) { - const { content_with_weight, important_kwd = [] } = data.data; + const { + content_with_weight, + important_kwd = [], + available_int, + } = data.data; form.setFieldsValue({ content: content_with_weight }); setKeywords(important_kwd); + setChecked(available_int === 1); } if (!chunkId) { @@ -48,6 +53,7 @@ const ChunkCreatingModal: React.FC & kFProps> = ({ onOk?.({ content: values.content, keywords, // keywords + available_int: checked ? 1 : 0, // available_int }); } catch (errorInfo) { console.log('Failed:', errorInfo); @@ -82,16 +88,19 @@ const ChunkCreatingModal: React.FC & kFProps> = ({
-

{t('chunk.keyword')} *

+

{t('chunk.keyword')} *

{chunkId && (
- - {t('chunk.enabled')} - + {t('common.delete')} diff --git a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts index 0e4a2742622..70e3dc3645b 100644 --- a/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts +++ b/web/src/pages/add-knowledge/components/knowledge-chunk/hooks.ts @@ -95,12 +95,21 @@ export const useUpdateChunk = () => { const { documentId } = useGetKnowledgeSearchParams(); const onChunkUpdatingOk = useCallback( - async ({ content, keywords }: { content: string; keywords: string }) => { + async ({ + content, + keywords, + available_int, + }: { + content: string; + keywords: string; + available_int: number; + }) => { const code = await createChunk({ content_with_weight: content, doc_id: documentId, chunk_id: chunkId, important_kwd: keywords, // keywords + available_int, }); if (code === 0) { From 834c4d81f38d18704684f0d74ec235368b080023 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Thu, 28 Nov 2024 20:09:20 +0800 Subject: [PATCH 17/34] Update version info to v0.14.1 (#3720) ### What problem does this PR solve? Update version info to v0.14.1 ### Type of change - [x] Documentation Update --------- Signed-off-by: jinhai --- README.md | 6 +++--- README_id.md | 6 +++--- README_ja.md | 6 +++--- README_ko.md | 6 +++--- README_zh.md | 6 +++--- docs/guides/configure_knowledge_base.md | 6 +++--- docs/guides/deploy_local_llm.mdx | 4 ++-- docs/guides/develop/acquire_ragflow_api_key.md | 2 +- docs/guides/manage_files.md | 2 +- docs/guides/manage_team_members.md | 2 +- docs/guides/run_health_check.md | 4 ++-- docs/guides/start_chat.md | 2 +- docs/guides/upgrade_ragflow.mdx | 6 +++--- docs/quickstart.mdx | 8 ++++---- 14 files changed, 33 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index 1314ad7e80e..106d7b5e594 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Static Badge - docker pull infiniflow/ragflow:v0.14.0 + docker pull infiniflow/ragflow:v0.14.1 Latest Release @@ -176,14 +176,14 @@ releases! 🌟 ``` > - To download a RAGFlow slim Docker image of a specific version, update the `RAGFLOW_IMAGE` variable in * - *docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0-slim`. After + *docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1-slim`. After making this change, rerun the command above to initiate the download. > - To download the dev version of RAGFlow Docker image *including* embedding models and Python libraries, update the `RAGFLOW_IMAGE` variable in **docker/.env** to `RAGFLOW_IMAGE=infiniflow/ragflow:dev`. After making this change, rerun the command above to initiate the download. > - To download a specific version of RAGFlow Docker image *including* embedding models and Python libraries, update the `RAGFLOW_IMAGE` variable in **docker/.env** to your desired version. For example, - `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`. After making this change, rerun the command above to initiate the + `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`. After making this change, rerun the command above to initiate the download. > **NOTE:** A RAGFlow Docker image that includes embedding models and Python libraries is approximately 9GB in size diff --git a/README_id.md b/README_id.md index a11f51a481e..3fa6262c068 100644 --- a/README_id.md +++ b/README_id.md @@ -20,7 +20,7 @@ Lencana Daring - docker pull infiniflow/ragflow:v0.14.0 + docker pull infiniflow/ragflow:v0.14.1 Rilis Terbaru @@ -169,14 +169,14 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io). ``` > - Untuk mengunduh versi tertentu dari image Docker RAGFlow slim, perbarui variabel `RAGFlow_IMAGE` di * - *docker/.env** sesuai dengan versi yang diinginkan. Misalnya, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0-slim`. + *docker/.env** sesuai dengan versi yang diinginkan. Misalnya, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1-slim`. Setelah mengubah ini, jalankan ulang perintah di atas untuk memulai unduhan. > - Untuk mengunduh versi dev dari image Docker RAGFlow *termasuk* model embedding dan library Python, perbarui variabel `RAGFlow_IMAGE` di **docker/.env** menjadi `RAGFLOW_IMAGE=infiniflow/ragflow:dev`. Setelah mengubah ini, jalankan ulang perintah di atas untuk memulai unduhan. > - Untuk mengunduh versi tertentu dari image Docker RAGFlow *termasuk* model embedding dan library Python, perbarui variabel `RAGFlow_IMAGE` di **docker/.env** sesuai dengan versi yang diinginkan. Misalnya, - `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`. Setelah mengubah ini, jalankan ulang perintah di atas untuk memulai unduhan. + `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`. Setelah mengubah ini, jalankan ulang perintah di atas untuk memulai unduhan. > **CATATAN:** Image Docker RAGFlow yang mencakup model embedding dan library Python berukuran sekitar 9GB dan mungkin memerlukan waktu lebih lama untuk dimuat. diff --git a/README_ja.md b/README_ja.md index 1d2afc233a4..e1854d1547b 100644 --- a/README_ja.md +++ b/README_ja.md @@ -20,7 +20,7 @@ Static Badge - docker pull infiniflow/ragflow:v0.14.0 + docker pull infiniflow/ragflow:v0.14.1 Latest Release @@ -148,9 +148,9 @@ $ docker compose -f docker-compose.yml up -d ``` - > - 特定のバージョンのRAGFlow slim Dockerイメージをダウンロードするには、**docker/.env**内の`RAGFlow_IMAGE`変数を希望のバージョンに更新します。例えば、`RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`とします。この変更を行った後、上記のコマンドを再実行してダウンロードを開始してください。 + > - 特定のバージョンのRAGFlow slim Dockerイメージをダウンロードするには、**docker/.env**内の`RAGFlow_IMAGE`変数を希望のバージョンに更新します。例えば、`RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`とします。この変更を行った後、上記のコマンドを再実行してダウンロードを開始してください。 > - RAGFlowの埋め込みモデルとPythonライブラリを含む開発版Dockerイメージをダウンロードするには、**docker/.env**内の`RAGFlow_IMAGE`変数を`RAGFLOW_IMAGE=infiniflow/ragflow:dev`に更新します。この変更を行った後、上記のコマンドを再実行してダウンロードを開始してください。 - > - 特定のバージョンのRAGFlow Dockerイメージ(埋め込みモデルとPythonライブラリを含む)をダウンロードするには、**docker/.env**内の`RAGFlow_IMAGE`変数を希望のバージョンに更新します。例えば、`RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`とします。この変更を行った後、上記のコマンドを再実行してダウンロードを開始してください。 + > - 特定のバージョンのRAGFlow Dockerイメージ(埋め込みモデルとPythonライブラリを含む)をダウンロードするには、**docker/.env**内の`RAGFlow_IMAGE`変数を希望のバージョンに更新します。例えば、`RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`とします。この変更を行った後、上記のコマンドを再実行してダウンロードを開始してください。 > **NOTE:** 埋め込みモデルとPythonライブラリを含むRAGFlow Dockerイメージのサイズは約9GBであり、読み込みにかなりの時間がかかる場合があります。 diff --git a/README_ko.md b/README_ko.md index 923b10362a0..0ad751e21e3 100644 --- a/README_ko.md +++ b/README_ko.md @@ -20,7 +20,7 @@ Static Badge - docker pull infiniflow/ragflow:v0.14.0 + docker pull infiniflow/ragflow:v0.14.1 Latest Release @@ -152,9 +152,9 @@ $ docker compose -f docker-compose.yml up -d ``` - > - 특정 버전의 RAGFlow slim Docker 이미지를 다운로드하려면, **docker/.env**에서 `RAGFlow_IMAGE` 변수를 원하는 버전으로 업데이트하세요. 예를 들어, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0-slim`으로 설정합니다. 이 변경을 완료한 후, 위의 명령을 다시 실행하여 다운로드를 시작하세요. + > - 특정 버전의 RAGFlow slim Docker 이미지를 다운로드하려면, **docker/.env**에서 `RAGFlow_IMAGE` 변수를 원하는 버전으로 업데이트하세요. 예를 들어, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1-slim`으로 설정합니다. 이 변경을 완료한 후, 위의 명령을 다시 실행하여 다운로드를 시작하세요. > - RAGFlow의 임베딩 모델과 Python 라이브러리를 포함한 개발 버전 Docker 이미지를 다운로드하려면, **docker/.env**에서 `RAGFlow_IMAGE` 변수를 `RAGFLOW_IMAGE=infiniflow/ragflow:dev`로 업데이트하세요. 이 변경을 완료한 후, 위의 명령을 다시 실행하여 다운로드를 시작하세요. - > - 특정 버전의 RAGFlow Docker 이미지를 임베딩 모델과 Python 라이브러리를 포함하여 다운로드하려면, **docker/.env**에서 `RAGFlow_IMAGE` 변수를 원하는 버전으로 업데이트하세요. 예를 들어, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0` 로 설정합니다. 이 변경을 완료한 후, 위의 명령을 다시 실행하여 다운로드를 시작하세요. + > - 특정 버전의 RAGFlow Docker 이미지를 임베딩 모델과 Python 라이브러리를 포함하여 다운로드하려면, **docker/.env**에서 `RAGFlow_IMAGE` 변수를 원하는 버전으로 업데이트하세요. 예를 들어, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1` 로 설정합니다. 이 변경을 완료한 후, 위의 명령을 다시 실행하여 다운로드를 시작하세요. > **NOTE:** 임베딩 모델과 Python 라이브러리를 포함한 RAGFlow Docker 이미지의 크기는 약 9GB이며, 로드하는 데 상당히 오랜 시간이 걸릴 수 있습니다. diff --git a/README_zh.md b/README_zh.md index f2710d81e55..ee22fa597d6 100644 --- a/README_zh.md +++ b/README_zh.md @@ -20,7 +20,7 @@ Static Badge - docker pull infiniflow/ragflow:v0.14.0 + docker pull infiniflow/ragflow:v0.14.1 Latest Release @@ -149,9 +149,9 @@ $ docker compose -f docker-compose.yml up -d ``` - > - 如果你想下载并运行特定版本的 RAGFlow slim Docker 镜像,请在 **docker/.env** 文件中找到 `RAGFLOW_IMAGE` 变量,将其改为对应版本。例如 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0-slim`,然后再运行上述命令。 + > - 如果你想下载并运行特定版本的 RAGFlow slim Docker 镜像,请在 **docker/.env** 文件中找到 `RAGFLOW_IMAGE` 变量,将其改为对应版本。例如 `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1-slim`,然后再运行上述命令。 > - 如果您想安装内置 embedding 模型和 Python 库的 dev 版本的 Docker 镜像,需要将 **docker/.env** 文件中的 `RAGFLOW_IMAGE` 变量修改为: `RAGFLOW_IMAGE=infiniflow/ragflow:dev`。 - > - 如果您想安装内置 embedding 模型和 Python 库的指定版本的 RAGFlow Docker 镜像,需要将 **docker/.env** 文件中的 `RAGFLOW_IMAGE` 变量修改为: `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`。修改后,再运行上面的命令。 + > - 如果您想安装内置 embedding 模型和 Python 库的指定版本的 RAGFlow Docker 镜像,需要将 **docker/.env** 文件中的 `RAGFLOW_IMAGE` 变量修改为: `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`。修改后,再运行上面的命令。 > **注意:** 安装内置 embedding 模型和 Python 库的指定版本的 RAGFlow Docker 镜像大小约 9 GB,可能需要更长时间下载,请耐心等待。 4. 服务器启动成功后再次确认服务器状态: diff --git a/docs/guides/configure_knowledge_base.md b/docs/guides/configure_knowledge_base.md index aef6ad5165e..457062b34c5 100644 --- a/docs/guides/configure_knowledge_base.md +++ b/docs/guides/configure_knowledge_base.md @@ -103,7 +103,7 @@ RAGFlow features visibility and explainability, allowing you to view the chunkin 2. Hover over each snapshot for a quick view of each chunk. -3. Double click the chunked texts to add keywords or make *manual* changes where necessary: +3. Double-click the chunked texts to add keywords or make *manual* changes where necessary: ![update chunk](https://github.com/infiniflow/ragflow/assets/93570324/1d84b408-4e9f-46fd-9413-8c1059bf9c76) @@ -111,7 +111,7 @@ RAGFlow features visibility and explainability, allowing you to view the chunkin You can add keywords to a file chunk to increase its ranking for queries containing those keywords. This action increases its keyword weight and can improve its position in search list. ::: -4. In Retrieval testing, ask a quick question in **Test text** to double check if your configurations work: +4. In Retrieval testing, ask a quick question in **Test text** to double-check if your configurations work: _As you can tell from the following, RAGFlow responds with truthful citations._ @@ -128,7 +128,7 @@ RAGFlow uses multiple recall of both full-text search and vector search in its c ## Search for knowledge base -As of RAGFlow v0.14.0, the search feature is still in a rudimentary form, supporting only knowledge base search by name. +As of RAGFlow v0.14.1, the search feature is still in a rudimentary form, supporting only knowledge base search by name. ![search knowledge base](https://github.com/infiniflow/ragflow/assets/93570324/836ae94c-2438-42be-879e-c7ad2a59693e) diff --git a/docs/guides/deploy_local_llm.mdx b/docs/guides/deploy_local_llm.mdx index 5dcaefc5a09..76c8543c6e9 100644 --- a/docs/guides/deploy_local_llm.mdx +++ b/docs/guides/deploy_local_llm.mdx @@ -108,7 +108,7 @@ Click on your logo **>** **Model Providers** **>** **System Model Settings** to Update your chat model accordingly in **Chat Configuration**: -> If your local model is an embedding model, update it on the configruation page of your knowledge base. +> If your local model is an embedding model, update it on the configuration page of your knowledge base. ## Deploy a local model using Xinference @@ -161,7 +161,7 @@ Click on your logo **>** **Model Providers** **>** **System Model Settings** to Update your chat model accordingly in **Chat Configuration**: -> If your local model is an embedding model, update it on the configruation page of your knowledge base. +> If your local model is an embedding model, update it on the configuration page of your knowledge base. ## Deploy a local model using IPEX-LLM diff --git a/docs/guides/develop/acquire_ragflow_api_key.md b/docs/guides/develop/acquire_ragflow_api_key.md index 82243fff8a9..b5d1bccc3fa 100644 --- a/docs/guides/develop/acquire_ragflow_api_key.md +++ b/docs/guides/develop/acquire_ragflow_api_key.md @@ -7,7 +7,7 @@ slug: /acquire_ragflow_api_key A key is required for the RAGFlow server to authenticate your requests via HTTP or a Python API. This documents provides instructions on obtaining a RAGFlow API key. -1. Click your avatar on the top right corner of the RAGFlow UI to access the configuration page. +1. Click your avatar in the top right corner of the RAGFlow UI to access the configuration page. 2. Click **API** to switch to the **API** page. 3. Obtain a RAGFlow API key: diff --git a/docs/guides/manage_files.md b/docs/guides/manage_files.md index 9756c62beb3..0e13998adf8 100644 --- a/docs/guides/manage_files.md +++ b/docs/guides/manage_files.md @@ -81,4 +81,4 @@ RAGFlow's file management allows you to download an uploaded file: ![download_file](https://github.com/infiniflow/ragflow/assets/93570324/cf3b297f-7d9b-4522-bf5f-4f45743e4ed5) -> As of RAGFlow v0.14.0, bulk download is not supported, nor can you download an entire folder. +> As of RAGFlow v0.14.1, bulk download is not supported, nor can you download an entire folder. diff --git a/docs/guides/manage_team_members.md b/docs/guides/manage_team_members.md index f87377411e3..f6c958b93fa 100644 --- a/docs/guides/manage_team_members.md +++ b/docs/guides/manage_team_members.md @@ -17,7 +17,7 @@ By default, each RAGFlow user is assigned a single team named after their name. Team members are currently *not* allowed to invite users to your team, and only you, the team owner, is permitted to do so. ::: -To enter the **Team** page, click on your avatar on the top right corner of the page **>** Team: +To enter the **Team** page, click on your avatar in the top right corner of the page **>** Team: ![team](https://github.com/user-attachments/assets/0eac2503-26bc-4568-b3f2-bcd84069a07a) diff --git a/docs/guides/run_health_check.md b/docs/guides/run_health_check.md index fece2575825..af6d8491a3c 100644 --- a/docs/guides/run_health_check.md +++ b/docs/guides/run_health_check.md @@ -5,7 +5,7 @@ slug: /run_health_check # Run health check on RAGFlow's dependencies -Double check the health status of RAGFlow's dependencies. +Double-check the health status of RAGFlow's dependencies. The operation of RAGFlow depends on four services: @@ -16,7 +16,7 @@ The operation of RAGFlow depends on four services: If an exception or error occurs related to any of the above services, such as `Exception: Can't connect to ES cluster`, refer to this document to check their health status. -You can also click you avatar on the top right corner of the page **>** System to view the visualized health status of RAGFlow's core services. The following screenshot shows that all services are 'green' (running healthily). The task executor displays the *cumulative* number of completed and failed document parsing tasks from the past 30 minutes: +You can also click you avatar in the top right corner of the page **>** System to view the visualized health status of RAGFlow's core services. The following screenshot shows that all services are 'green' (running healthily). The task executor displays the *cumulative* number of completed and failed document parsing tasks from the past 30 minutes: ![system_status_page](https://github.com/user-attachments/assets/b0c1a11e-93e3-4947-b17a-1bfb4cdab6e4) diff --git a/docs/guides/start_chat.md b/docs/guides/start_chat.md index 81115bd8061..2120cca802b 100644 --- a/docs/guides/start_chat.md +++ b/docs/guides/start_chat.md @@ -19,7 +19,7 @@ You start an AI conversation by creating an assistant. - **Assistant name** is the name of your chat assistant. Each assistant corresponds to a dialogue with a unique combination of knowledge bases, prompts, hybrid search configurations, and large model settings. - **Empty response**: - - If you wish to *confine* RAGFlow's answers to your knowledge bases, leave a response here. Then when it doesn't retrieve an answer, it *uniformly* responds with what you set here. + - If you wish to *confine* RAGFlow's answers to your knowledge bases, leave a response here. Then, when it doesn't retrieve an answer, it *uniformly* responds with what you set here. - If you wish RAGFlow to *improvise* when it doesn't retrieve an answer from your knowledge bases, leave it blank, which may give rise to hallucinations. - **Show Quote**: This is a key feature of RAGFlow and enabled by default. RAGFlow does not work like a black box. instead, it clearly shows the sources of information that its responses are based on. - Select the corresponding knowledge bases. You can select one or multiple knowledge bases, but ensure that they use the same embedding model, otherwise an error would occur. diff --git a/docs/guides/upgrade_ragflow.mdx b/docs/guides/upgrade_ragflow.mdx index 31d19d91d6f..e329e579a3b 100644 --- a/docs/guides/upgrade_ragflow.mdx +++ b/docs/guides/upgrade_ragflow.mdx @@ -62,16 +62,16 @@ To upgrade RAGFlow, you must upgrade **both** your code **and** your Docker imag git clone https://github.com/infiniflow/ragflow.git ``` -2. Switch to the latest, officially published release, e.g., `v0.14.0`: +2. Switch to the latest, officially published release, e.g., `v0.14.1`: ```bash - git checkout v0.14.0 + git checkout v0.14.1 ``` 3. Update **ragflow/docker/.env** as follows: ```bash - RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0 + RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1 ``` 4. Update the RAGFlow image and restart RAGFlow: diff --git a/docs/quickstart.mdx b/docs/quickstart.mdx index 355747f0554..b3c6721e83e 100644 --- a/docs/quickstart.mdx +++ b/docs/quickstart.mdx @@ -32,9 +32,9 @@ This section provides instructions on setting up the RAGFlow server on Linux. If
1. Ensure vm.max_map_count ≥ 262144: - `vm.max_map_count`. This value sets the maximum number of memory map areas a process may have. Its default value is 65530. While most applications require fewer than a thousand maps, reducing this value can result in abmornal behaviors, and the system will throw out-of-memory errors when a process reaches the limitation. + `vm.max_map_count`. This value sets the maximum number of memory map areas a process may have. Its default value is 65530. While most applications require fewer than a thousand maps, reducing this value can result in abnormal behaviors, and the system will throw out-of-memory errors when a process reaches the limitation. - RAGFlow v0.14.0 uses Elasticsearch for multiple recall. Setting the value of `vm.max_map_count` correctly is crucial to the proper functioning of the Elasticsearch component. + RAGFlow v0.14.1 uses Elasticsearch or [Infinity](https://github.com/infiniflow/infinity) for multiple recall. Setting the value of `vm.max_map_count` correctly is crucial to the proper functioning of the Elasticsearch component. - To download a RAGFlow slim Docker image of a specific version, update the `RAGFlOW_IMAGE` variable in **docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0-slim`. After making this change, rerun the command above to initiate the download. + > - To download a RAGFlow slim Docker image of a specific version, update the `RAGFlOW_IMAGE` variable in **docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1-slim`. After making this change, rerun the command above to initiate the download. > - To download the dev version of RAGFlow Docker image *including* embedding models and Python libraries, update the `RAGFlOW_IMAGE` variable in **docker/.env** to `RAGFLOW_IMAGE=infiniflow/ragflow:dev`. After making this change, rerun the command above to initiate the download. - > - To download a specific version of RAGFlow Docker image *including* embedding models and Python libraries, update the `RAGFlOW_IMAGE` variable in **docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.0`. After making this change, rerun the command above to initiate the download. + > - To download a specific version of RAGFlow Docker image *including* embedding models and Python libraries, update the `RAGFlOW_IMAGE` variable in **docker/.env** to your desired version. For example, `RAGFLOW_IMAGE=infiniflow/ragflow:v0.14.1`. After making this change, rerun the command above to initiate the download. :::tip NOTE A RAGFlow Docker image that includes embedding models and Python libraries is approximately 9GB in size and may take significantly longer time to load. From fcdc6ad08552158116e37ede3c21677d1b7e2c1d Mon Sep 17 00:00:00 2001 From: kunkeji <49889250+kunkeji@users.noreply.github.com> Date: Fri, 29 Nov 2024 10:36:48 +0800 Subject: [PATCH 18/34] Fix the issue where the agent interface cannot call the context (#3725) ### What problem does this PR solve? Fix the context of the agent interface call to the context during web testing, and change it to the context record of user chat Remove duplicate messages and add them, which can be viewed in the messages section of database api_4_comversation ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Co-authored-by: Kevin Hu Co-authored-by: Jin Hai --- api/apps/sdk/session.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index fff67707b58..3874035bf74 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -308,7 +308,6 @@ def rename_field(ans): if 'docnm_kwd' in chunk_i: chunk_i['doc_name'] = chunk_i['docnm_kwd'] chunk_i.pop('docnm_kwd') - conv.message.append(msg[-1]) if not conv.reference: conv.reference = [] @@ -317,7 +316,6 @@ def rename_field(ans): final_ans = {"reference": [], "content": ""} - canvas.messages.append(msg[-1]) canvas.add_user_input(msg[-1]["content"]) if stream: From b89f7c69adf9422080ab0db089406eb05d76c5df Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 29 Nov 2024 10:37:09 +0800 Subject: [PATCH 19/34] Fix image_id absence issue (#3735) ### What problem does this PR solve? #3731 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- api/apps/sdk/session.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/apps/sdk/session.py b/api/apps/sdk/session.py index 3874035bf74..c5a7c4991d3 100644 --- a/api/apps/sdk/session.py +++ b/api/apps/sdk/session.py @@ -413,7 +413,7 @@ def list_session(chat_id,tenant_id): "document_id": chunk["doc_id"], "document_name": chunk["docnm_kwd"], "dataset_id": chunk["kb_id"], - "image_id": chunk["image_id"], + "image_id": chunk.get("image_id", ""), "similarity": chunk["similarity"], "vector_similarity": chunk["vector_similarity"], "term_similarity": chunk["term_similarity"], From a0c0a957b4cc7ac0b68483a93e76fbd398f66668 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Fri, 29 Nov 2024 10:49:15 +0800 Subject: [PATCH 20/34] Fix GPU docker compose file (#3736) ### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) Signed-off-by: Jin Hai --- docker/docker-compose-gpu.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/docker/docker-compose-gpu.yml b/docker/docker-compose-gpu.yml index 1f0fe8a4c85..fc82f03f498 100644 --- a/docker/docker-compose-gpu.yml +++ b/docker/docker-compose-gpu.yml @@ -1,14 +1,11 @@ include: - - path: ./docker-compose-base.yml - env_file: ./.env + - ./docker-compose-base.yml services: ragflow: depends_on: mysql: condition: service_healthy - es01: - condition: service_healthy image: ${RAGFLOW_IMAGE} container_name: ragflow-server ports: @@ -20,13 +17,18 @@ services: - ./nginx/ragflow.conf:/etc/nginx/conf.d/ragflow.conf - ./nginx/proxy.conf:/etc/nginx/proxy.conf - ./nginx/nginx.conf:/etc/nginx/nginx.conf + env_file: .env environment: - TZ=${TIMEZONE} - - HF_ENDPOINT=https://huggingface.co + - HF_ENDPOINT=${HF_ENDPOINT} - MACOS=${MACOS} networks: - ragflow - restart: always + restart: on-failure + # https://docs.docker.com/engine/daemon/prometheus/#create-a-prometheus-configuration + # If you're using Docker Desktop, the --add-host flag is optional. This flag makes sure that the host's internal IP gets exposed to the Prometheus container. + extra_hosts: + - "host.docker.internal:host-gateway" deploy: resources: reservations: From 27cd765d6fe522c789f1833437593a125a74d6d9 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 29 Nov 2024 11:55:41 +0800 Subject: [PATCH 21/34] Fix raptor issue (#3737) ### What problem does this PR solve? #3732 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/raptor.py | 30 +++++++++++++++++------------- rag/svr/task_executor.py | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/rag/raptor.py b/rag/raptor.py index 5974e371d48..51f1ad1177d 100644 --- a/rag/raptor.py +++ b/rag/raptor.py @@ -33,7 +33,7 @@ def __init__(self, max_cluster, llm_model, embd_model, prompt, max_token=512, th self._prompt = prompt self._max_token = max_token - def _get_optimal_clusters(self, embeddings: np.ndarray, random_state:int): + def _get_optimal_clusters(self, embeddings: np.ndarray, random_state: int): max_clusters = min(self._max_cluster, len(embeddings)) n_clusters = np.arange(1, max_clusters) bics = [] @@ -44,7 +44,7 @@ def _get_optimal_clusters(self, embeddings: np.ndarray, random_state:int): optimal_clusters = n_clusters[np.argmin(bics)] return optimal_clusters - def __call__(self, chunks: tuple[str, np.ndarray], random_state, callback=None): + def __call__(self, chunks, random_state, callback=None): layers = [(0, len(chunks))] start, end = 0, len(chunks) if len(chunks) <= 1: return @@ -54,13 +54,15 @@ def summarize(ck_idx, lock): nonlocal chunks try: texts = [chunks[i][0] for i in ck_idx] - len_per_chunk = int((self._llm_model.max_length - self._max_token)/len(texts)) + len_per_chunk = int((self._llm_model.max_length - self._max_token) / len(texts)) cluster_content = "\n".join([truncate(t, max(1, len_per_chunk)) for t in texts]) cnt = self._llm_model.chat("You're a helpful assistant.", - [{"role": "user", "content": self._prompt.format(cluster_content=cluster_content)}], - {"temperature": 0.3, "max_tokens": self._max_token} - ) - cnt = re.sub("(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)", "", cnt) + [{"role": "user", + "content": self._prompt.format(cluster_content=cluster_content)}], + {"temperature": 0.3, "max_tokens": self._max_token} + ) + cnt = re.sub("(······\n由于长度的原因,回答被截断了,要继续吗?|For the content length reason, it stopped, continue?)", "", + cnt) logging.debug(f"SUM: {cnt}") embds, _ = self._embd_model.encode([cnt]) with lock: @@ -74,10 +76,10 @@ def summarize(ck_idx, lock): while end - start > 1: embeddings = [embd for _, embd in chunks[start: end]] if len(embeddings) == 2: - summarize([start, start+1], Lock()) + summarize([start, start + 1], Lock()) if callback: - callback(msg="Cluster one layer: {} -> {}".format(end-start, len(chunks)-end)) - labels.extend([0,0]) + callback(msg="Cluster one layer: {} -> {}".format(end - start, len(chunks) - end)) + labels.extend([0, 0]) layers.append((end, len(chunks))) start = end end = len(chunks) @@ -85,7 +87,7 @@ def summarize(ck_idx, lock): n_neighbors = int((len(embeddings) - 1) ** 0.8) reduced_embeddings = umap.UMAP( - n_neighbors=max(2, n_neighbors), n_components=min(12, len(embeddings)-2), metric="cosine" + n_neighbors=max(2, n_neighbors), n_components=min(12, len(embeddings) - 2), metric="cosine" ).fit_transform(embeddings) n_clusters = self._get_optimal_clusters(reduced_embeddings, random_state) if n_clusters == 1: @@ -100,7 +102,7 @@ def summarize(ck_idx, lock): with ThreadPoolExecutor(max_workers=12) as executor: threads = [] for c in range(n_clusters): - ck_idx = [i+start for i in range(len(lbls)) if lbls[i] == c] + ck_idx = [i + start for i in range(len(lbls)) if lbls[i] == c] threads.append(executor.submit(summarize, ck_idx, lock)) wait(threads, return_when=ALL_COMPLETED) logging.debug(str([t.result() for t in threads])) @@ -109,7 +111,9 @@ def summarize(ck_idx, lock): labels.extend(lbls) layers.append((end, len(chunks))) if callback: - callback(msg="Cluster one layer: {} -> {}".format(end-start, len(chunks)-end)) + callback(msg="Cluster one layer: {} -> {}".format(end - start, len(chunks) - end)) start = end end = len(chunks) + return chunks + diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index d7be6fa818c..cc69bdaa6fb 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -344,7 +344,7 @@ def run_raptor(row, chat_mdl, embd_mdl, callback=None): row["parser_config"]["raptor"]["threshold"] ) original_length = len(chunks) - raptor(chunks, row["parser_config"]["raptor"]["random_seed"], callback) + chunks = raptor(chunks, row["parser_config"]["raptor"]["random_seed"], callback) doc = { "doc_id": row["doc_id"], "kb_id": [str(row["kb_id"])], From 9a3febb7c52232caf78a7930309b1812972fc635 Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Fri, 29 Nov 2024 13:37:50 +0800 Subject: [PATCH 22/34] Refactor dockerfile (#3741) ### What problem does this PR solve? ### Type of change - [x] Refactoring --- Dockerfile | 66 +++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/Dockerfile b/Dockerfile index 3595720e088..985eb061bbd 100644 --- a/Dockerfile +++ b/Dockerfile @@ -59,6 +59,21 @@ USER root WORKDIR /ragflow +# install dependencies from poetry.lock file +COPY pyproject.toml poetry.toml poetry.lock ./ + +RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \ + if [ "$LIGHTEN" == "1" ]; then \ + poetry install --no-root; \ + else \ + poetry install --no-root --with=full; \ + fi + +COPY web web +COPY docs docs +RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \ + cd web && npm install --force && npm run build + COPY .git /ragflow/.git RUN current_commit=$(git rev-parse --short HEAD); \ @@ -78,28 +93,16 @@ RUN current_commit=$(git rev-parse --short HEAD); \ echo "RAGFlow version: $version_info"; \ echo $version_info > /ragflow/VERSION -COPY web web -COPY docs docs -RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \ - cd web && npm install --force && npm run build - -# install dependencies from poetry.lock file -COPY pyproject.toml poetry.toml poetry.lock ./ - -RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \ - if [ "$LIGHTEN" == "1" ]; then \ - poetry install --no-root; \ - else \ - poetry install --no-root --with=full; \ - fi - # production stage FROM base AS production USER root WORKDIR /ragflow -COPY --from=builder /ragflow/VERSION /ragflow/VERSION +# Copy Python environment and packages +ENV VIRTUAL_ENV=/ragflow/.venv +COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} +ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" # Install python packages' dependencies # cv2 requires libGL.so.1 @@ -107,15 +110,6 @@ RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=l apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \ rm -rf /var/lib/apt/lists/* -COPY web web -COPY api api -COPY conf conf -COPY deepdoc deepdoc -COPY rag rag -COPY agent agent -COPY graphrag graphrag -COPY pyproject.toml poetry.toml poetry.lock ./ - # Copy models downloaded via download_deps.py RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow RUN --mount=type=bind,source=huggingface.co,target=/huggingface.co \ @@ -153,18 +147,24 @@ RUN --mount=type=bind,source=chromedriver-linux64-121-0-6167-85,target=/chromedr mv chromedriver /usr/local/bin/ && \ rm -f /usr/bin/google-chrome -# Copy compiled web pages -COPY --from=builder /ragflow/web/dist /ragflow/web/dist - -# Copy Python environment and packages -ENV VIRTUAL_ENV=/ragflow/.venv -COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV} -ENV PATH="${VIRTUAL_ENV}/bin:${PATH}" - ENV PYTHONPATH=/ragflow/ +COPY web web +COPY api api +COPY conf conf +COPY deepdoc deepdoc +COPY rag rag +COPY agent agent +COPY graphrag graphrag +COPY pyproject.toml poetry.toml poetry.lock ./ + COPY docker/service_conf.yaml.template ./conf/service_conf.yaml.template COPY docker/entrypoint.sh ./entrypoint.sh RUN chmod +x ./entrypoint.sh +# Copy compiled web pages +COPY --from=builder /ragflow/web/dist /ragflow/web/dist + +COPY --from=builder /ragflow/VERSION /ragflow/VERSION + ENTRYPOINT ["./entrypoint.sh"] From 06a21d2031c96e1fed607c36342820c560d40abe Mon Sep 17 00:00:00 2001 From: kunkeji <49889250+kunkeji@users.noreply.github.com> Date: Fri, 29 Nov 2024 13:45:31 +0800 Subject: [PATCH 23/34] Change Traditional Chinese to Simplified Chinese (#3742) ### What problem does this PR solve? Change Traditional Chinese to Simplified Chinese ### Type of change - [x] Other (please describe): --- web/src/locales/zh.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 86040a8a304..0aeb2541c8a 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -75,7 +75,7 @@ export default { namePlaceholder: '请输入名称', doc: '文档', searchKnowledgePlaceholder: '搜索', - noMoreData: '沒有更多的數據了', + noMoreData: '没有更多数据了', }, knowledgeDetails: { dataset: '数据集', From 0a62dd7a7ee9d2a0e4d311c13faec688c26c654b Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Fri, 29 Nov 2024 14:50:45 +0800 Subject: [PATCH 24/34] Update document (#3746) ### What problem does this PR solve? Fix description on local LLM deployment case ### Type of change - [x] Documentation Update --------- Signed-off-by: jinhai Co-authored-by: writinwaters <93570324+writinwaters@users.noreply.github.com> --- docs/guides/deploy_local_llm.mdx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/guides/deploy_local_llm.mdx b/docs/guides/deploy_local_llm.mdx index 76c8543c6e9..1c7b856d476 100644 --- a/docs/guides/deploy_local_llm.mdx +++ b/docs/guides/deploy_local_llm.mdx @@ -74,9 +74,9 @@ In the popup window, complete basic settings for Ollama: 4. OPTIONAL: Switch on the toggle under **Does it support Vision?** if your model includes an image-to-text model. :::caution NOTE +- If RAGFlow is in Docker and Ollama runs on the same host machine, use `http://host.docker.internal:11434` as base URL. - If your Ollama and RAGFlow run on the same machine, use `http://localhost:11434` as base URL. -- If your Ollama and RAGFlow run on the same machine and Ollama is in Docker, use `http://host.docker.internal:11434` as base URL. -- If your Ollama runs on a different machine from RAGFlow, use `http://:11434` as base URL. +- If your Ollama runs on a different machine from RAGFlow, use `http://:11434` as base URL. ::: :::danger WARNING From d94386e00a9cb3a9e5d3f66e3947ed0da2a9105b Mon Sep 17 00:00:00 2001 From: Zhichang Yu Date: Fri, 29 Nov 2024 14:52:27 +0800 Subject: [PATCH 25/34] Pass top_p to ollama (#3744) ### What problem does this PR solve? Pass top_p to ollama. Close #1769 ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --- rag/llm/chat_model.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py index 9dea59a72a5..90786c58faa 100644 --- a/rag/llm/chat_model.py +++ b/rag/llm/chat_model.py @@ -356,7 +356,7 @@ def chat(self, system, history, gen_conf): options = {} if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"] + if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"] if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] response = self.client.chat( @@ -376,7 +376,7 @@ def chat_streamly(self, system, history, gen_conf): options = {} if "temperature" in gen_conf: options["temperature"] = gen_conf["temperature"] if "max_tokens" in gen_conf: options["num_predict"] = gen_conf["max_tokens"] - if "top_p" in gen_conf: options["top_k"] = gen_conf["top_p"] + if "top_p" in gen_conf: options["top_p"] = gen_conf["top_p"] if "presence_penalty" in gen_conf: options["presence_penalty"] = gen_conf["presence_penalty"] if "frequency_penalty" in gen_conf: options["frequency_penalty"] = gen_conf["frequency_penalty"] ans = "" @@ -430,7 +430,7 @@ def do_rpc(*args, **kwargs): try: self._connection.send(pickle.dumps((name, args, kwargs))) return pickle.loads(self._connection.recv()) - except Exception as e: + except Exception: self.__conn() raise Exception("RPC connection lost!") @@ -442,7 +442,7 @@ def __init__(self, key, model_name): self.client = Client(port=12345, protocol="grpc", asyncio=True) def _prepare_prompt(self, system, history, gen_conf): - from rag.svr.jina_server import Prompt, Generation + from rag.svr.jina_server import Prompt if system: history.insert(0, {"role": "system", "content": system}) if "max_tokens" in gen_conf: @@ -450,7 +450,7 @@ def _prepare_prompt(self, system, history, gen_conf): return Prompt(message=history, gen_conf=gen_conf) def _stream_response(self, endpoint, prompt): - from rag.svr.jina_server import Prompt, Generation + from rag.svr.jina_server import Generation answer = "" try: res = self.client.stream_doc( From 1e0fc76efa2e16c79f433803611b2bb5b55445bd Mon Sep 17 00:00:00 2001 From: writinwaters <93570324+writinwaters@users.noreply.github.com> Date: Fri, 29 Nov 2024 16:00:42 +0800 Subject: [PATCH 26/34] Added release notes v0.11.0 (#3745) ### What problem does this PR solve? ### Type of change - [x] Documentation Update --- docker/.env | 6 +----- docker/README.md | 6 +++--- docs/release_notes.md | 25 ++++++++++++++++++++++--- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/docker/.env b/docker/.env index 6f8ce6a4c59..d64cd2be779 100644 --- a/docker/.env +++ b/docker/.env @@ -22,7 +22,6 @@ ES_HOST=es01 ES_PORT=1200 # The password for Elasticsearch. -# When updated, you must revise the `es.password` entry in service_conf.yaml accordingly. ELASTIC_PASSWORD=infini_rag_flow # The port used to expose the Kibana service to the host machine, @@ -44,7 +43,6 @@ INFINITY_HTTP_PORT=23820 INFINITY_PSQL_PORT=5432 # The password for MySQL. -# When updated, you must revise the `mysql.password` entry in service_conf.yaml. MYSQL_PASSWORD=infini_rag_flow # The hostname where the MySQL service is exposed MYSQL_HOST=mysql @@ -75,7 +73,6 @@ REDIS_HOST=redis # allowing EXTERNAL access to the Redis service running inside the Docker container. REDIS_PORT=6379 # The password for Redis. -# When updated, you must revise the `redis.password` entry in service_conf.yaml accordingly. REDIS_PASSWORD=infini_rag_flow # The port used to expose RAGFlow's HTTP API service to the host machine, @@ -118,7 +115,6 @@ RAGFLOW_IMAGE=infiniflow/ragflow:dev-slim # RAGFLOW_IMAGE=swr.cn-north-4.myhuaweicloud.com/infiniflow/ragflow:dev # RAGFLOW_IMAGE=registry.cn-hangzhou.aliyuncs.com/infiniflow/ragflow:dev - # The local time zone. TIMEZONE='Asia/Shanghai' @@ -130,6 +126,6 @@ TIMEZONE='Asia/Shanghai' # MACOS=1 # The maximum file size for each uploaded file, in bytes. -# You can uncomment this line and update the value if you wish to change 128M file size limit +# You can uncomment this line and update the value if you wish to change the 128M file size limit # MAX_CONTENT_LENGTH=134217728 diff --git a/docker/README.md b/docker/README.md index bab35dbe9e7..da51bbf6d36 100644 --- a/docker/README.md +++ b/docker/README.md @@ -133,14 +133,14 @@ The [.env](./.env) file contains important environment variables for Docker. - `mysql` - `name`: The MySQL database name. Defaults to `rag_flow`. - `user`: The username for MySQL. - - `password`: The password for MySQL. When updated, you must revise the `MYSQL_PASSWORD` variable in [.env](./.env) accordingly. + - `password`: The password for MySQL. - `port`: The MySQL serving port inside the Docker container. Defaults to `3306`. - `max_connections`: The maximum number of concurrent connections to the MySQL database. Defaults to `100`. - `stale_timeout`: Timeout in seconds. - `minio` - - `user`: The username for MinIO. When updated, you must revise the `MINIO_USER` variable in [.env](./.env) accordingly. - - `password`: The password for MinIO. When updated, you must revise the `MINIO_PASSWORD` variable in [.env](./.env) accordingly. + - `user`: The username for MinIO. + - `password`: The password for MinIO. - `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`. - `oauth` diff --git a/docs/release_notes.md b/docs/release_notes.md index 61435818c7c..a5463015e13 100644 --- a/docs/release_notes.md +++ b/docs/release_notes.md @@ -93,10 +93,10 @@ Released on September 30, 2024. - Offers slim editions of RAGFlow's Docker images, which do not include built-in BGE/BCE embedding or reranking models. - Improves the results of multi-round dialogues. - Enables users to remove added LLM vendors. -- Adds support for OpenTTS and SparkTTS models. +- Adds support for **OpenTTS** and **SparkTTS** models. - Implements an **Excel to HTML** toggle in the **General** chunk method, allowing users to parse a spreadsheet into either HTML tables or key-value pairs by row. - Adds agent tools **YahooFance** and **Jin10**. -- Adds a template for an investment advisor agent. +- Adds an investment advisor agent template. ### Compatibility changes @@ -115,4 +115,23 @@ See [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) for instructi #### Added documents -- [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) \ No newline at end of file +- [Upgrade RAGFlow](https://ragflow.io/docs/dev/upgrade_ragflow) + +## v0.11.0 + +Released on September 14, 2024 + +### New features + +- Introduces an AI search interface within the RAGFlow UI. +- Supports audio output via **FishAudio** or **Tongyi Qwen TTS**. +- Allows the use of Postgres for metadata storage, in addition to MySQL. +- Supports object storage options with S3 or Azure Blob. +- Supports model vendors: **Anthropic**, **Voyage AI**, and **Google Cloud**. +- Supports the use of **Tencent Cloud ASR** for audio content recognition. +- Adds finance-specific agent components: **WenCai**, **AkShare**, **YahooFinance**, and **TuShare**. +- Adds a medical consultant agent template. +- Supports running retrieval benchmarking on the following datasets: + - [ms_marco_v1.1](https://huggingface.co/datasets/microsoft/ms_marco) + - [trivia_qa](https://huggingface.co/datasets/mandarjoshi/trivia_qa) + - [miracl](https://huggingface.co/datasets/miracl/miracl) \ No newline at end of file From c93e0355c3bce1db9d1fdcc92670a240799cd4e2 Mon Sep 17 00:00:00 2001 From: balibabu Date: Fri, 29 Nov 2024 16:05:46 +0800 Subject: [PATCH 27/34] Feat: Add DatasetTable #3221 (#3743) ### What problem does this PR solve? Feat: Add DatasetTable #3221 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- web/src/components/ui/switch.tsx | 4 +- web/src/layouts/next.tsx | 2 +- .../pages/dataset/dataset/dataset-table.tsx | 208 ++++++++++-------- web/src/pages/dataset/index.tsx | 2 +- web/src/pages/dataset/sidebar/index.tsx | 11 +- web/tailwind.config.js | 1 + web/tailwind.css | 2 + 7 files changed, 132 insertions(+), 98 deletions(-) diff --git a/web/src/components/ui/switch.tsx b/web/src/components/ui/switch.tsx index fa53e7cd429..9c523adabfc 100644 --- a/web/src/components/ui/switch.tsx +++ b/web/src/components/ui/switch.tsx @@ -11,7 +11,7 @@ const Switch = React.forwardRef< >(({ className, ...props }, ref) => ( diff --git a/web/src/layouts/next.tsx b/web/src/layouts/next.tsx index a4fbef17dab..d92a3ff0712 100644 --- a/web/src/layouts/next.tsx +++ b/web/src/layouts/next.tsx @@ -3,7 +3,7 @@ import { Header } from './next-header'; export default function NextLayout() { return ( -
+
diff --git a/web/src/pages/dataset/dataset/dataset-table.tsx b/web/src/pages/dataset/dataset/dataset-table.tsx index bc9dd95010e..3856a287206 100644 --- a/web/src/pages/dataset/dataset/dataset-table.tsx +++ b/web/src/pages/dataset/dataset/dataset-table.tsx @@ -12,7 +12,7 @@ import { getSortedRowModel, useReactTable, } from '@tanstack/react-table'; -import { ArrowUpDown, MoreHorizontal } from 'lucide-react'; +import { ArrowUpDown, MoreHorizontal, Pencil } from 'lucide-react'; import * as React from 'react'; import { Button } from '@/components/ui/button'; @@ -25,6 +25,7 @@ import { DropdownMenuSeparator, DropdownMenuTrigger, } from '@/components/ui/dropdown-menu'; +import { Switch } from '@/components/ui/switch'; import { Table, TableBody, @@ -35,6 +36,7 @@ import { } from '@/components/ui/table'; import { RunningStatus } from '@/constants/knowledge'; import { IDocumentInfo } from '@/interfaces/database/document'; +import { useTranslation } from 'react-i18next'; const data: IDocumentInfo[] = [ { @@ -68,97 +70,6 @@ const data: IDocumentInfo[] = [ }, ]; -export const columns: ColumnDef[] = [ - { - id: 'select', - header: ({ table }) => ( - table.toggleAllPageRowsSelected(!!value)} - aria-label="Select all" - /> - ), - cell: ({ row }) => ( - row.toggleSelected(!!value)} - aria-label="Select row" - /> - ), - enableSorting: false, - enableHiding: false, - }, - { - accessorKey: 'status', - header: 'Status', - cell: ({ row }) => ( -
{row.getValue('status')}
- ), - }, - { - accessorKey: 'email', - header: ({ column }) => { - return ( - - ); - }, - cell: ({ row }) =>
{row.getValue('email')}
, - }, - { - accessorKey: 'amount', - header: () =>
Amount
, - cell: ({ row }) => { - const amount = parseFloat(row.getValue('amount')); - - // Format the amount as a dollar amount - const formatted = new Intl.NumberFormat('en-US', { - style: 'currency', - currency: 'USD', - }).format(amount); - - return
{formatted}
; - }, - }, - { - id: 'actions', - enableHiding: false, - cell: ({ row }) => { - const payment = row.original; - - return ( - - - - - - Actions - navigator.clipboard.writeText(payment.id)} - > - Copy payment ID - - - View customer - View payment details - - - ); - }, - }, -]; - export function DatasetTable() { const [sorting, setSorting] = React.useState([]); const [columnFilters, setColumnFilters] = React.useState( @@ -167,6 +78,119 @@ export function DatasetTable() { const [columnVisibility, setColumnVisibility] = React.useState({}); const [rowSelection, setRowSelection] = React.useState({}); + const { t } = useTranslation('translation', { + keyPrefix: 'knowledgeDetails', + }); + + const columns: ColumnDef[] = [ + { + id: 'select', + header: ({ table }) => ( + table.toggleAllPageRowsSelected(!!value)} + aria-label="Select all" + /> + ), + cell: ({ row }) => ( + row.toggleSelected(!!value)} + aria-label="Select row" + /> + ), + enableSorting: false, + enableHiding: false, + }, + { + accessorKey: 'name', + header: ({ column }) => { + return ( + + ); + }, + cell: ({ row }) => ( +
{row.getValue('name')}
+ ), + }, + { + accessorKey: 'create_time', + header: ({ column }) => { + return ( + + ); + }, + cell: ({ row }) => ( +
{row.getValue('create_time')}
+ ), + }, + { + accessorKey: 'parser_id', + header: t('chunkMethod'), + cell: ({ row }) => ( +
{row.getValue('parser_id')}
+ ), + }, + { + accessorKey: 'run', + header: t('parsingStatus'), + cell: ({ row }) => ( + + ), + }, + { + id: 'actions', + header: t('action'), + enableHiding: false, + cell: ({ row }) => { + const payment = row.original; + + return ( +
+ + + + + + + + Actions + navigator.clipboard.writeText(payment.id)} + > + Copy payment ID + + + View customer + View payment details + + +
+ ); + }, + }, + ]; const table = useReactTable({ data, diff --git a/web/src/pages/dataset/index.tsx b/web/src/pages/dataset/index.tsx index 7090b049b2e..d79abb76160 100644 --- a/web/src/pages/dataset/index.tsx +++ b/web/src/pages/dataset/index.tsx @@ -3,7 +3,7 @@ import { SideBar } from './sidebar'; export default function DatasetWrapper() { return ( -
+
diff --git a/web/src/pages/dataset/sidebar/index.tsx b/web/src/pages/dataset/sidebar/index.tsx index 2fb76f97255..e3711bc384b 100644 --- a/web/src/pages/dataset/sidebar/index.tsx +++ b/web/src/pages/dataset/sidebar/index.tsx @@ -2,7 +2,7 @@ import { Button } from '@/components/ui/button'; import { KnowledgeRouteKey } from '@/constants/knowledge'; import { useSecondPathName } from '@/hooks/route-hook'; import { cn } from '@/lib/utils'; -import { Banknote, LayoutGrid, User } from 'lucide-react'; +import { Banknote, LayoutGrid, Trash2, User } from 'lucide-react'; import { useHandleMenuClick } from './hooks'; const items = [ @@ -29,7 +29,7 @@ export function SideBar() { const { handleMenuClick } = useHandleMenuClick(); return ( -
{item.name} -

{item.description}

+

{item.description}

From a19210daf1ce7caeae2358f3e12f8a14d38343de Mon Sep 17 00:00:00 2001 From: balibabu Date: Fri, 29 Nov 2024 18:13:59 +0800 Subject: [PATCH 34/34] Feat: Add tooltip to delimiter filed #1909 (#3758) ### What problem does this PR solve? Feat: Add tooltip to delimiter filed #1909 ### Type of change - [x] New Feature (non-breaking change which adds functionality) --- web/src/components/delimiter.tsx | 1 + web/src/locales/en.ts | 2 ++ web/src/locales/zh-traditional.ts | 2 ++ web/src/locales/zh.ts | 2 ++ 4 files changed, 7 insertions(+) diff --git a/web/src/components/delimiter.tsx b/web/src/components/delimiter.tsx index 5605763de5f..25e3128bde2 100644 --- a/web/src/components/delimiter.tsx +++ b/web/src/components/delimiter.tsx @@ -25,6 +25,7 @@ const Delimiter = () => { label={t('knowledgeDetails.delimiter')} initialValue={`\\n!?;。;!?`} rules={[{ required: true }]} + tooltip={t('knowledgeDetails.delimiterTip')} > diff --git a/web/src/locales/en.ts b/web/src/locales/en.ts index aa4b801e976..ce0525dbefd 100644 --- a/web/src/locales/en.ts +++ b/web/src/locales/en.ts @@ -157,6 +157,8 @@ export default { topK: 'Top-K', topKTip: `K chunks will be fed into rerank models.`, delimiter: `Delimiter`, + delimiterTip: + 'Supports multiple characters as separators, and the multiple character separators are wrapped with `. For example, if it is configured like this: \n`##`; then the text will be separated by line breaks, two #s and a semicolon, and then assembled according to the size of the "token number".', html4excel: 'Excel to HTML', html4excelTip: `When enabled, the spreadsheet will be parsed into HTML tables; otherwise, it will be parsed into key-value pairs by row.`, autoKeywords: 'Auto-keyword', diff --git a/web/src/locales/zh-traditional.ts b/web/src/locales/zh-traditional.ts index 518ba559bfc..2753e781ed4 100644 --- a/web/src/locales/zh-traditional.ts +++ b/web/src/locales/zh-traditional.ts @@ -153,6 +153,8 @@ export default { topK: 'Top-K', topKTip: `K塊將被送入Rerank型號。`, delimiter: `分段標識符`, + delimiterTip: + '支援多字元作為分隔符,多字元分隔符用`包裹。如配置成這樣:\n`##`;那麼就會用換行,兩個#以及分號先對文字進行分割,然後按照「 token number」大小進行拼裝。', html4excel: '表格轉HTML', html4excelTip: `Excel 是否會被解析為 HTML 表格。如果為 FALSE,Excel 中的每一行都會形成一個區塊。`, autoKeywords: '自動關鍵字', diff --git a/web/src/locales/zh.ts b/web/src/locales/zh.ts index 0aeb2541c8a..8a3fe97bd97 100644 --- a/web/src/locales/zh.ts +++ b/web/src/locales/zh.ts @@ -154,6 +154,8 @@ export default { topK: 'Top-K', topKTip: `K块将被送入Rerank型号。`, delimiter: `分段标识符`, + delimiterTip: + '支持多字符作为分隔符,多字符分隔符用`包裹。如配置成这样:\n`##`;那么就会用换行,两个#以及分号先对文本进行分割,然后按照“ token number”大小进行拼装。', html4excel: '表格转HTML', html4excelTip: `Excel 是否将被解析为 HTML 表。如果为 FALSE,Excel 中的每一行都将形成一个块。`, autoKeywords: '自动关键词',