From 7ce99979d016c8e577069b45e485dcb6993ceb34 Mon Sep 17 00:00:00 2001 From: Janhvi Patil Date: Fri, 19 Jan 2024 15:47:58 +0530 Subject: [PATCH] added html striped content for better search --- raven/api/search.py | 4 +-- raven/patches.txt | 1 + ...all_messages_to_include_message_content.py | 26 +++++++++++++++++++ .../doctype/raven_message/raven_message.json | 11 ++++++-- .../doctype/raven_message/raven_message.py | 17 +++++++----- 5 files changed, 49 insertions(+), 10 deletions(-) create mode 100644 raven/patches/v1_3/update_all_messages_to_include_message_content.py diff --git a/raven/api/search.py b/raven/api/search.py index d43225016..25b95794e 100644 --- a/raven/api/search.py +++ b/raven/api/search.py @@ -20,7 +20,7 @@ def get_search_result(filter_type, doctype, search_text=None, from_user=None, in } query = frappe.qb.from_(doctype).select( - doctype.name, doctype.file, doctype.owner, doctype.creation, doctype.message_type, doctype.channel_id, doctype.text).join(channel, JoinType.left).on(doctype.channel_id == channel.name).join(channel_member, JoinType.left).on( + doctype.name, doctype.file, doctype.owner, doctype.creation, doctype.message_type, doctype.channel_id, doctype.text, doctype.content).join(channel, JoinType.left).on(doctype.channel_id == channel.name).join(channel_member, JoinType.left).on( channel_member.channel_id == doctype.channel_id).where((channel.type != 'Private') | (channel_member.user_id == frappe.session.user)) if filter_type == 'File': @@ -40,7 +40,7 @@ def get_search_result(filter_type, doctype, search_text=None, from_user=None, in query = query.where(doctype.file.like( "/private/files/%" + search_text + "%")) elif filter_type == 'Message': - query = query.where(doctype.text.like("%" + search_text + "%")) + query = query.where(doctype.content.like("%" + search_text + "%")) elif filter_type == 'Channel': query = query.where( doctype.channel_name.like("%" + search_text + "%")) diff --git a/raven/patches.txt b/raven/patches.txt index b617b5528..e8b60c652 100644 --- a/raven/patches.txt +++ b/raven/patches.txt @@ -3,3 +3,4 @@ [post_model_sync] raven.patches.v1_2.create_raven_users raven.patches.v1_3.create_raven_message_indexes #23 +raven.patches.v1_3.update_all_messages_to_include_message_content #1 \ No newline at end of file diff --git a/raven/patches/v1_3/update_all_messages_to_include_message_content.py b/raven/patches/v1_3/update_all_messages_to_include_message_content.py new file mode 100644 index 000000000..30c69b3ff --- /dev/null +++ b/raven/patches/v1_3/update_all_messages_to_include_message_content.py @@ -0,0 +1,26 @@ +import frappe +from frappe.utils import strip_html_tags + + +def execute(): + update_old_messages_to_include_message_content() + + +def update_old_messages_to_include_message_content(): + ''' + Update all old messages to include message content + Message content is required for search + It is basically the message's text content but without any html tags + (this is done to improve search results) + This is a one-time operation, not required for new messages + ''' + messages = frappe.db.get_all('Raven Message', fields=[ + 'name', 'text', 'message_type']) + for message in messages: + if message.text: + cleaned_text = strip_html_tags(message.text).replace( + '\ufeff', '').replace(' ', ' ') + content = cleaned_text + frappe.db.set_value( + 'Raven Message', message.name, 'content', content) + frappe.db.commit() diff --git a/raven/raven_messaging/doctype/raven_message/raven_message.json b/raven/raven_messaging/doctype/raven_message/raven_message.json index 7eee68fda..ff7d32024 100644 --- a/raven/raven_messaging/doctype/raven_message/raven_message.json +++ b/raven/raven_messaging/doctype/raven_message/raven_message.json @@ -22,7 +22,8 @@ "is_reply", "linked_message", "link_doctype", - "link_document" + "link_document", + "content" ], "fields": [ { @@ -109,11 +110,17 @@ "fieldtype": "Dynamic Link", "label": "Link Document", "options": "link_doctype" + }, + { + "fieldname": "content", + "fieldtype": "Long Text", + "label": "Content", + "read_only": 1 } ], "index_web_pages_for_search": 1, "links": [], - "modified": "2023-12-22 18:25:02.237368", + "modified": "2024-01-19 14:56:44.534433", "modified_by": "Administrator", "module": "Raven Messaging", "name": "Raven Message", diff --git a/raven/raven_messaging/doctype/raven_message/raven_message.py b/raven/raven_messaging/doctype/raven_message/raven_message.py index 4699f3ba1..9cb564acc 100644 --- a/raven/raven_messaging/doctype/raven_message/raven_message.py +++ b/raven/raven_messaging/doctype/raven_message/raven_message.py @@ -3,9 +3,10 @@ import frappe from frappe import _ from frappe.model.document import Document +from frappe.utils import strip_html_tags from datetime import timedelta from frappe.query_builder.functions import Count, Coalesce -from frappe.query_builder import Case, Order,JoinType +from frappe.query_builder import Case, Order, JoinType from collections.abc import Iterable import json from raven.raven_channel_management.doctype.raven_channel.raven_channel import get_peer_user_id @@ -14,6 +15,7 @@ message = frappe.qb.DocType('Raven Message') user = frappe.qb.DocType("User") + class RavenMessage(Document): # begin: auto-generated types # This code is auto-generated. Do not modify anything in this block. @@ -24,6 +26,7 @@ class RavenMessage(Document): from frappe.types import DF channel_id: DF.Link + content: DF.LongText | None file: DF.Attach | None file_thumbnail: DF.Attach | None image_height: DF.Data | None @@ -148,6 +151,7 @@ def send_message(channel_id, text, is_reply, linked_message=None, json=None): 'doctype': 'Raven Message', 'channel_id': channel_id, 'text': clean_text, + 'content': strip_html_tags(clean_text).replace('\ufeff', '').replace(' ', ' '), 'message_type': 'Text', 'is_reply': is_reply, 'linked_message': linked_message, @@ -158,6 +162,7 @@ def send_message(channel_id, text, is_reply, linked_message=None, json=None): 'doctype': 'Raven Message', 'channel_id': channel_id, 'text': clean_text, + 'content': strip_html_tags(clean_text).replace('\ufeff', '').replace(' ', ' '), 'message_type': 'Text', 'json': json }) @@ -191,11 +196,11 @@ def fetch_recent_files(channel_id): def get_messages(channel_id): messages = frappe.db.get_all('Raven Message', - filters={'channel_id': channel_id}, - fields=['name', 'owner', 'creation', 'text', - 'file', 'message_type', 'message_reactions', 'is_reply', 'linked_message', '_liked_by', 'channel_id', 'thumbnail_width', 'thumbnail_height', 'file_thumbnail', 'link_doctype', 'link_document'], - order_by='creation asc' - ) + filters={'channel_id': channel_id}, + fields=['name', 'owner', 'creation', 'text', + 'file', 'message_type', 'message_reactions', 'is_reply', 'linked_message', '_liked_by', 'channel_id', 'thumbnail_width', 'thumbnail_height', 'file_thumbnail', 'link_doctype', 'link_document'], + order_by='creation asc' + ) return messages