From 14fc219668ff34cc6d0093c816c9debeae472be5 Mon Sep 17 00:00:00 2001
From: chenshuizhong <guoyuhao2330@outlook.com>
Date: Wed, 15 May 2024 15:18:17 +0800
Subject: [PATCH 1/8] Updated conversation_api.md document/upload

---
 docs/conversation_api.md | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/docs/conversation_api.md b/docs/conversation_api.md
index 2dd258fd6f1..13acec8d9e6 100644
--- a/docs/conversation_api.md
+++ b/docs/conversation_api.md
@@ -315,10 +315,12 @@ This is usually used when upload a file to.
 
 ### Parameter:
 
-| name    | type   | optional | description                            |
-|---------|--------|----------|----------------------------------------|
-| file    | file   | No       | Upload file.                           |
-| kb_name | string | No       | Choose the upload knowledge base name. |
+| name      | type   | optional | description                                             |
+|-----------|--------|----------|---------------------------------------------------------|
+| file      | file   | No       | Upload file.                                            |
+| kb_name   | string | No       | Choose the upload knowledge base name.                  |
+| parser_id | string | Yes      | Choose the parsing method.                              |
+| run       | string | Yes      | Parsing will start automatically when the value is "1". |
 
 ### Response 
 ```json

From c1d3286fa4dfed79d94b159ee0fad6befcd7d0f8 Mon Sep 17 00:00:00 2001
From: guoyuhao2330 <guoyuhao2330@outlook.com>
Date: Fri, 17 May 2024 14:10:04 +0800
Subject: [PATCH 2/8] list_chunks

---
 api/apps/api_app.py                 | 43 +++++++++++++++++++++++++++++
 api/db/services/document_service.py | 13 +++++++++
 2 files changed, 56 insertions(+)

diff --git a/api/apps/api_app.py b/api/apps/api_app.py
index bc4fadf5e29..c2568a2ab22 100644
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
@@ -39,6 +39,9 @@
 from api.utils.file_utils import filename_type, thumbnail
 from rag.utils.minio_conn import MINIO
 
+from rag.utils.es_conn import ELASTICSEARCH
+from rag.nlp import search
+from elasticsearch_dsl import Q
 
 def generate_confirmation_token(tenent_id):
     serializer = URLSafeTimedSerializer(tenent_id)
@@ -347,3 +350,43 @@ def upload():
                  return server_error_response(e)
 
     return get_json_result(data=doc_result.to_json())
+
+
+@manager.route('/list_chunks', methods=['POST'])
+# @login_required
+def list_chunks():
+    token = request.headers.get('Authorization').split()[1]
+    objs = APIToken.query(token=token)
+    if not objs:
+        return get_json_result(
+            data=False, retmsg='Token is not valid!"', retcode=RetCode.AUTHENTICATION_ERROR)
+
+    form_data = request.form
+
+    try:
+        if "filename" in form_data.keys():
+            tenant_id = DocumentService.get_tenant_id_by_name(form_data['filename'])
+            q = Q("match", docnm_kwd=form_data['filename'])
+
+        elif "doc_id" in form_data.keys():
+            tenant_id = DocumentService.get_tenant_id(form_data['doc_id'])
+            q = Q("match", doc_id=form_data['doc_id'])
+        else:
+            return get_json_result(
+                data=False,retmsg="Can't find filename or doc_id"
+            )
+
+        res_es_search = ELASTICSEARCH.search(q,idxnm=search.index_name(tenant_id),timeout="600s")
+
+        res = [{} for _ in range(len(res_es_search['hits']['hits']))]
+
+        for index , chunk in enumerate(res_es_search['hits']['hits']):
+            res[index]['doc_name'] = chunk['_source']['docnm_kwd']
+            res[index]['content'] = chunk['_source']['content_with_weight']
+            if 'img_id' in chunk['_source'].keys():
+                res[index]['img_id'] = chunk['_source']['img_id']
+
+    except Exception as e:
+        return server_error_response(e)
+
+    return get_json_result(data=res)
diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index d85c15070fc..23e6b0597bc 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -180,6 +180,19 @@ def get_tenant_id(cls, doc_id):
         if not docs:
             return
         return docs[0]["tenant_id"]
+	
+	@classmethod
+    @DB.connection_context()
+    def get_tenant_id_by_name(cls, name):
+        docs = cls.model.select(
+            Knowledgebase.tenant_id).join(
+            Knowledgebase, on=(
+                    Knowledgebase.id == cls.model.kb_id)).where(
+            cls.model.name == name, Knowledgebase.status == StatusEnum.VALID.value)
+        docs = docs.dicts()
+        if not docs:
+            return
+        return docs[0]["tenant_id"]
 
     @classmethod
     @DB.connection_context()

From 53a7b1c414cbccb110c9d791462a59030242c49e Mon Sep 17 00:00:00 2001
From: guoyuhao2330 <guoyuhao2330@outlook.com>
Date: Fri, 17 May 2024 14:42:39 +0800
Subject: [PATCH 3/8] list_chunks

---
 api/db/services/document_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 23e6b0597bc..0e344dce02e 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -180,7 +180,7 @@ def get_tenant_id(cls, doc_id):
         if not docs:
             return
         return docs[0]["tenant_id"]
-	
+
 	@classmethod
     @DB.connection_context()
     def get_tenant_id_by_name(cls, name):

From 2d75c91742ac6959e26ade5ce0cf7f382bb6565c Mon Sep 17 00:00:00 2001
From: guoyuhao2330 <guoyuhao2330@outlook.com>
Date: Fri, 17 May 2024 14:45:12 +0800
Subject: [PATCH 4/8] list_chunks

---
 api/db/services/document_service.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/api/db/services/document_service.py b/api/db/services/document_service.py
index 0e344dce02e..02f3197de96 100644
--- a/api/db/services/document_service.py
+++ b/api/db/services/document_service.py
@@ -181,7 +181,7 @@ def get_tenant_id(cls, doc_id):
             return
         return docs[0]["tenant_id"]
 
-	@classmethod
+    @classmethod
     @DB.connection_context()
     def get_tenant_id_by_name(cls, name):
         docs = cls.model.select(

From ec592e1544c29e4e03d39d9126fcbb67972ca598 Mon Sep 17 00:00:00 2001
From: guoyuhao2330 <guoyuhao2330@outlook.com>
Date: Fri, 17 May 2024 15:00:04 +0800
Subject: [PATCH 5/8] list_chunks

---
 docs/conversation_api.md | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/docs/conversation_api.md b/docs/conversation_api.md
index 13acec8d9e6..6e9e83f9773 100644
--- a/docs/conversation_api.md
+++ b/docs/conversation_api.md
@@ -364,3 +364,38 @@ This is usually used when upload a file to.
 }
 
 ```
+
+## Get document chunks
+
+Get the chunks of the document based on filename or doc_id.
+### Path: /api/list_chunks/
+### Method: POST
+
+### Parameter:
+
+| name     | type   | optional | description                                             |
+|----------|--------|----------|---------------------------------------------------------|
+| filename | string | Yes      |                                           |
+| doc_id   | string | Yes      |                   |
+
+
+### Response 
+```json
+{
+    "data": [
+        {
+            "content": "Figure 14: Per-request neural-net processingof RL-Cache.\n103\n(sn)\nCPU\n 102\nGPU\n8101\n100\n8\n16 64 256 1K\n4K",
+            "doc_name": "RL-Cache.pdf",
+            "img_id": "0335167613f011ef91240242ac120006-b46c3524952f82dbe061ce9b123f2211"
+        },
+        {
+            "content": "4.3 ProcessingOverheadof RL-CacheACKNOWLEDGMENTSThis section evaluates how e￿ectively our RL-Cache implemen-tation leverages modern multi-core CPUs and GPUs to keep the per-request neural-net processing overhead low. Figure 14 depictsThis researchwas supported inpart by the Regional Government of Madrid (grant P2018/TCS-4499, EdgeData-CM)andU.S. National Science Foundation (grants CNS-1763617 andCNS-1717179).REFERENCES",
+            "doc_name": "RL-Cache.pdf",
+            "img_id": "0335167613f011ef91240242ac120006-d4c12c43938eb55d2d8278eea0d7e6d7"
+        }
+    ],
+    "retcode": 0,
+    "retmsg": "success"
+}
+
+```

From 329c598673e878fe908cbabf99ad7329703cdb6f Mon Sep 17 00:00:00 2001
From: guoyuhao2330 <guoyuhao2330@outlook.com>
Date: Fri, 17 May 2024 15:14:17 +0800
Subject: [PATCH 6/8] list_chunks

---
 api/apps/api_app.py      |  8 ++++----
 docs/conversation_api.md | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/api/apps/api_app.py b/api/apps/api_app.py
index c2568a2ab22..04944904193 100644
--- a/api/apps/api_app.py
+++ b/api/apps/api_app.py
@@ -364,16 +364,16 @@ def list_chunks():
     form_data = request.form
 
     try:
-        if "filename" in form_data.keys():
-            tenant_id = DocumentService.get_tenant_id_by_name(form_data['filename'])
-            q = Q("match", docnm_kwd=form_data['filename'])
+        if "doc_name" in form_data.keys():
+            tenant_id = DocumentService.get_tenant_id_by_name(form_data['doc_name'])
+            q = Q("match", docnm_kwd=form_data['doc_name'])
 
         elif "doc_id" in form_data.keys():
             tenant_id = DocumentService.get_tenant_id(form_data['doc_id'])
             q = Q("match", doc_id=form_data['doc_id'])
         else:
             return get_json_result(
-                data=False,retmsg="Can't find filename or doc_id"
+                data=False,retmsg="Can't find doc_name or doc_id"
             )
 
         res_es_search = ELASTICSEARCH.search(q,idxnm=search.index_name(tenant_id),timeout="600s")
diff --git a/docs/conversation_api.md b/docs/conversation_api.md
index 6e9e83f9773..6310a2fab9c 100644
--- a/docs/conversation_api.md
+++ b/docs/conversation_api.md
@@ -367,16 +367,16 @@ This is usually used when upload a file to.
 
 ## Get document chunks
 
-Get the chunks of the document based on filename or doc_id.
+Get the chunks of the document based on doc_name or doc_id.
 ### Path: /api/list_chunks/
 ### Method: POST
 
 ### Parameter:
 
-| name     | type   | optional | description                                             |
-|----------|--------|----------|---------------------------------------------------------|
-| filename | string | Yes      |                                           |
-| doc_id   | string | Yes      |                   |
+| name     | type   | optional | description                     |
+|----------|--------|----------|---------------------------------|
+| doc_name | string | Yes      | The doc name in knowledge base. |
+| doc_id   | string | Yes      | The doc_id of doc.              |
 
 
 ### Response 

From dc8ced17fb62d93728c86f0f2e95334aa7ced72b Mon Sep 17 00:00:00 2001
From: GYH <43509927+guoyuhao2330@users.noreply.github.com>
Date: Fri, 17 May 2024 15:46:40 +0800
Subject: [PATCH 7/8] Update conversation_api.md

---
 docs/conversation_api.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/conversation_api.md b/docs/conversation_api.md
index 6310a2fab9c..c3f708f527f 100644
--- a/docs/conversation_api.md
+++ b/docs/conversation_api.md
@@ -375,8 +375,8 @@ Get the chunks of the document based on doc_name or doc_id.
 
 | name     | type   | optional | description                     |
 |----------|--------|----------|---------------------------------|
-| doc_name | string | Yes      | The doc name in knowledge base. |
-| doc_id   | string | Yes      | The doc_id of doc.              |
+| doc_name | string | Yes      | The doc name in knowledge. It can not be empty without doc_id.|
+| doc_id   | string | Yes      | The doc_id of doc. It can not be empty without doc_name.|
 
 
 ### Response 

From 1f23c031160bfab0d0a63553b2506c303594897a Mon Sep 17 00:00:00 2001
From: GYH <43509927+guoyuhao2330@users.noreply.github.com>
Date: Fri, 17 May 2024 15:57:40 +0800
Subject: [PATCH 8/8] Update conversation_api.md

---
 docs/conversation_api.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/conversation_api.md b/docs/conversation_api.md
index c3f708f527f..bc40983e378 100644
--- a/docs/conversation_api.md
+++ b/docs/conversation_api.md
@@ -373,10 +373,10 @@ Get the chunks of the document based on doc_name or doc_id.
 
 ### Parameter:
 
-| name     | type   | optional | description                     |
+| Name     | Type   | Optional | Description                     |
 |----------|--------|----------|---------------------------------|
-| doc_name | string | Yes      | The doc name in knowledge. It can not be empty without doc_id.|
-| doc_id   | string | Yes      | The doc_id of doc. It can not be empty without doc_name.|
+| `doc_name` | string | Yes      | The name of the document in the knowledge base. It must not be empty if `doc_id` is not set.|
+| `doc_id`   | string | Yes      | The ID of the document in the knowledge base. It must not be empty if `doc_name` is not set.|
 
 
 ### Response