Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: Multi channel recall, adding display of knowledge base titles and other information #2309

Merged
merged 2 commits into from
Feb 18, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,14 @@ def merge_reranker_list(reranker_list, result=None):
merge_reranker_list(document, result)
elif isinstance(document, dict):
content = document.get('title', '') + document.get('content', '')
result.append(str(document) if len(content) == 0 else content)
title = document.get("title")
dataset_name = document.get("dataset_name")
document_name = document.get('document_name')
result.append(
Document(page_content=str(document) if len(content) == 0 else content,
metadata={'title': title, 'dataset_name': dataset_name, 'document_name': document_name}))
else:
result.append(str(document))
result.append(Document(page_content=str(document), metadata={}))
return result


Expand All @@ -43,6 +48,21 @@ def filter_result(document_list: List[Document], max_paragraph_char_number, top_
return result


def reset_result_list(result_list: List[Document], document_list: List[Document]):
r = []
document_list = document_list.copy()
for result in result_list:
filter_result_list = [document for document in document_list if document.page_content == result.page_content]
if len(filter_result_list) > 0:
item = filter_result_list[0]
document_list.remove(item)
r.append(Document(page_content=item.page_content,
metadata={**item.metadata, 'relevance_score': result.metadata.get('relevance_score')}))
else:
r.append(result)
return r


class BaseRerankerNode(IRerankerNode):
def save_context(self, details, workflow_manage):
self.context['document_list'] = details.get('document_list', [])
Expand All @@ -55,16 +75,18 @@ def execute(self, question, reranker_setting, reranker_list, reranker_model_id,
**kwargs) -> NodeResult:
documents = merge_reranker_list(reranker_list)
top_n = reranker_setting.get('top_n', 3)
self.context['document_list'] = documents
self.context['document_list'] = [{'page_content': document.page_content, 'metadata': document.metadata} for
document in documents]
self.context['question'] = question
reranker_model = get_model_instance_by_model_user_id(reranker_model_id,
self.flow_params_serializer.data.get('user_id'),
top_n=top_n)
result = reranker_model.compress_documents(
[Document(page_content=document) for document in documents if document is not None and len(document) > 0],
documents,
question)
similarity = reranker_setting.get('similarity', 0.6)
max_paragraph_char_number = reranker_setting.get('max_paragraph_char_number', 5000)
result = reset_result_list(result, documents)
r = filter_result(result, max_paragraph_char_number, top_n, similarity)
return NodeResult({'result_list': r, 'result': ''.join([item.get('page_content') for item in r])}, {})

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The provided code has several improvements that can enhance its readability, maintainability, and efficiency:

  1. Use of copy Method: The line document_list = document_list.copy() in the reset_result_list function is unnecessary as it creates an extra copy of document_list. This should be removed.

  2. Inline Dictionary Comprehension: In the same function, consider inlining the dictionary comprehension to make it more readable.

  3. Consistent Use of F-Strings: For string formatting consistency, you can use f-strings instead of concatenating strings with '+'. This can improve code readability.

Here's the revised version:

from typing import List

class Document:
    def __init__(self, page_content="", metadata={}):
        self.page_content = page_content
        self.metadata = metadata

def merge_reranker_list(reranker_list, result=None):
    if result is None:
        result = []
    for document in reranker_list:
        if isinstance(document, list):
            merge_reranker_list(document, result)
        elif isinstance(document, dict):
            content = document.get('title', '') + document.get('content', '')
            result.append(Document(str(document) if len(content) == 0 else content,
                                  {'title': document.get("title"),
                                   'dataset_name': document.get("dataset_name"),
                                   'document_name': document.get('document_name')})
        else:
            result.append(Document(str(document), {}))
    return result

def filter_result(document_list: List[Document], max_paragraph_char_number, top_n, similarity):
    return [doc for doc in document_list if len(doc.page_content) <= max_paragraph_char_number][:top_n]

def reset_result_list(result_list: List[Document], document_list: List[Document]):
    r = []
    for result in result_list:
        filtered_docs = [d for d in document_list if d.page_content == result.page_content]
        if filtered_docs:
            item = filtered_docs[0]
            document_list.remove(item)
            r.append(Document(item.page_content, {**item.metadata, 'relevance_score': result.metadata.get('relevance_score')})
        else:
            r.append(result)
    return r

# Assuming BaseRerankerNode class definition here...

Summary Changes:

  1. Removed redundant copying of document_list.
  2. Used inline dictionary comprehensions where applicable.
  3. Simplified the logic for filtering results without making copies.
  4. Ensured consistent use of formatted strings using f-strings.

These changes should help improve the clarity and performance of the code while maintaining functionality.

Expand Down
2 changes: 1 addition & 1 deletion ui/src/components/ai-chat/ExecutionDetailDialog.vue
Original file line number Diff line number Diff line change
Expand Up @@ -429,7 +429,7 @@
<MdPreview
ref="editorRef"
editorId="preview-only"
:modelValue="paragraph"
:modelValue="paragraph.page_content"
/>
</el-scrollbar>
</template>
Expand Down