Skip to content

Commit

Permalink
fix doc number miss-match issue (infiniflow#822)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

infiniflow#620 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
KevinHuSh authored May 17, 2024
1 parent ced6c35 commit 02d3e2f
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 43 deletions.
6 changes: 1 addition & 5 deletions api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,13 +250,9 @@ def rm():
if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!")

ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))

DocumentService.clear_chunk_num(doc_id)
b, n = File2DocumentService.get_minio_address(doc_id=doc_id)

if not DocumentService.delete(doc):
if not DocumentService.remove_document(doc, tenant_id):
return get_data_error_result(
retmsg="Database error (Document removal)!")

Expand Down
12 changes: 2 additions & 10 deletions api/apps/file2document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,7 @@ def convert():
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!")
ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not DocumentService.delete(doc):
if not DocumentService.remove_document(doc, tenant_id):
return get_data_error_result(
retmsg="Database error (Document removal)!")
File2DocumentService.delete_by_file_id(id)
Expand Down Expand Up @@ -125,11 +121,7 @@ def rm():
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!")
ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not DocumentService.delete(doc):
if not DocumentService.remove_document(doc, tenant_id):
return get_data_error_result(
retmsg="Database error (Document removal)!")
return get_json_result(data=True)
Expand Down
6 changes: 1 addition & 5 deletions api/apps/file_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,11 +277,7 @@ def rm():
tenant_id = DocumentService.get_tenant_id(doc_id)
if not tenant_id:
return get_data_error_result(retmsg="Tenant not found!")
ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
DocumentService.increment_chunk_num(
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not DocumentService.delete(doc):
if not DocumentService.remove_document(doc, tenant_id):
return get_data_error_result(
retmsg="Database error (Document removal)!")
File2DocumentService.delete_by_file_id(file_id)
Expand Down
7 changes: 1 addition & 6 deletions api/apps/kb_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,7 @@ def rm():
data=False, retmsg=f'Only owner of knowledgebase authorized for this operation.', retcode=RetCode.OPERATING_ERROR)

for doc in DocumentService.query(kb_id=req["kb_id"]):
ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(kbs[0].tenant_id))

DocumentService.increment_chunk_num(
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not DocumentService.delete(doc):
if not DocumentService.remove_document(doc, kbs[0].tenant_id):
return get_data_error_result(
retmsg="Database error (Document removal)!")

Expand Down
19 changes: 2 additions & 17 deletions api/db/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,27 +70,12 @@ def insert(cls, doc):
raise RuntimeError("Database error (Knowledgebase)!")
return doc

@classmethod
@DB.connection_context()
def delete(cls, doc):
e, kb = KnowledgebaseService.get_by_id(doc.kb_id)
if not KnowledgebaseService.update_by_id(
kb.id, {"doc_num": max(0, kb.doc_num - 1)}):
raise RuntimeError("Database error (Knowledgebase)!")
return cls.delete_by_id(doc.id)

@classmethod
@DB.connection_context()
def remove_document(cls, doc, tenant_id):
ELASTICSEARCH.deleteByQuery(
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))

cls.increment_chunk_num(
doc.id, doc.kb_id, doc.token_num * -1, doc.chunk_num * -1, 0)
if not cls.delete(doc):
raise RuntimeError("Database error (Document removal)!")

MINIO.rm(doc.kb_id, doc.location)
Q("match", doc_id=doc.id), idxnm=search.index_name(tenant_id))
cls.clear_chunk_num(doc.id)
return cls.delete_by_id(doc.id)

@classmethod
Expand Down

0 comments on commit 02d3e2f

Please sign in to comment.