From ea288e92d8cbd275cdb010c6ab39cc1f964eaef3 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Sun, 14 Sep 2025 13:49:19 -0700 Subject: [PATCH 1/8] perf: create sqlite indexes for common columns --- src/tagstudio/core/library/alchemy/library.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index a25231e95..3c5c4872a 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -555,6 +555,20 @@ def open_sqlite_library(self, library_dir: Path, is_new: bool) -> LibraryStatus: # Convert file extension list to ts_ignore file, if a .ts_ignore file does not exist self.migrate_sql_to_ts_ignore(library_dir) + session.execute( + text("CREATE INDEX IF NOT EXISTS idx_tags_name_shorthand ON tags (name, shorthand)") + ) + session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_tag_parents_child_id ON tag_parents (child_id)" + ) + ) + session.execute( + text( + "CREATE INDEX IF NOT EXISTS idx_tag_entries_entry_id ON tag_entries (entry_id)" + ) + ) + # Update DB_VERSION if loaded_db_version < DB_VERSION: self.set_version(DB_VERSION_CURRENT_KEY, DB_VERSION) From 1d7a267122026ee69b2233ab9a16a37230fb0d39 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Sun, 14 Sep 2025 13:51:59 -0700 Subject: [PATCH 2/8] perf: optimize Library.search_tags --- src/tagstudio/core/library/alchemy/library.py | 43 ++++++------------- 1 file changed, 14 insertions(+), 29 deletions(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index 3c5c4872a..011e1b652 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -75,7 +75,6 @@ DB_VERSION_LEGACY_KEY, JSON_FILENAME, SQL_FILENAME, - TAG_CHILDREN_QUERY, ) from tagstudio.core.library.alchemy.db import make_tables from tagstudio.core.library.alchemy.enums import ( @@ -1071,13 +1070,10 @@ def search_library( def search_tags(self, name: str | None, limit: int = 100) -> list[set[Tag]]: """Return a list of Tag records matching the query.""" with Session(self.engine) as session: - query = select(Tag).outerjoin(TagAlias).order_by(func.lower(Tag.name)) - query = query.options( - selectinload(Tag.parent_tags), - selectinload(Tag.aliases), - ) - if limit > 0: - query = query.limit(limit) + query = select(Tag.id, Tag.name).outerjoin(TagAlias) + + if limit > 0 and (not name or len(name) == 1): + query = query.limit(limit).order_by(func.lower(Tag.name)) if name: query = query.where( @@ -1088,35 +1084,24 @@ def search_tags(self, name: str | None, limit: int = 100) -> list[set[Tag]]: ) ) - direct_tags = set(session.scalars(query)) - ancestor_tag_ids: list[Tag] = [] - for tag in direct_tags: - ancestor_tag_ids.extend( - list(session.scalars(TAG_CHILDREN_QUERY, {"tag_id": tag.id})) - ) - - ancestor_tags = session.scalars( - select(Tag) - .where(Tag.id.in_(ancestor_tag_ids)) - .options(selectinload(Tag.parent_tags), selectinload(Tag.aliases)) - ) - - res = [ - direct_tags, - {at for at in ancestor_tags if at not in direct_tags}, - ] - + tags = list(session.execute(query)) logger.info( "searching tags", search=name, limit=limit, statement=str(query), - results=len(res), + results=len(tags), ) - session.expunge_all() + tags.sort(key=lambda t: t[1].lower()) + if limit <= 0: + limit = len(tags) + tag_ids = [t[0] for t in tags[:limit]] - return res + hierarchy = self.get_tag_hierarchy(tag_ids) + direct_tags = {hierarchy.pop(id) for id in tag_ids} + ancestor_tags = set(hierarchy.values()) + return [direct_tags, ancestor_tags] def update_entry_path(self, entry_id: int | Entry, path: Path) -> bool: """Set the path field of an entry. From 8ce773b92ded95dc3cbd14fdf5a5c39e0891ced2 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Sun, 14 Sep 2025 14:43:46 -0700 Subject: [PATCH 3/8] fix(tag_search): do ordering before applying limit --- src/tagstudio/core/library/alchemy/library.py | 42 ++++++++++++++----- src/tagstudio/qt/mixed/tag_search.py | 27 +++--------- 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index 011e1b652..eb8917f05 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -1067,12 +1067,18 @@ def search_library( return res - def search_tags(self, name: str | None, limit: int = 100) -> list[set[Tag]]: + def search_tags(self, name: str | None, limit: int = 100) -> tuple[list[Tag], list[Tag]]: """Return a list of Tag records matching the query.""" + name = name or "" + name = name.lower() + + def sort_key(text: str): + return (not text.startswith(name), text) + with Session(self.engine) as session: - query = select(Tag.id, Tag.name).outerjoin(TagAlias) + query = select(Tag.id, Tag.name) - if limit > 0 and (not name or len(name) == 1): + if limit > 0 and not name: query = query.limit(limit).order_by(func.lower(Tag.name)) if name: @@ -1080,28 +1086,42 @@ def search_tags(self, name: str | None, limit: int = 100) -> list[set[Tag]]: or_( Tag.name.icontains(name), Tag.shorthand.icontains(name), - TagAlias.name.icontains(name), ) ) tags = list(session.execute(query)) + + if name: + query = select(TagAlias.tag_id, TagAlias.name).where(TagAlias.name.icontains(name)) + tags.extend(session.execute(query)) + + tags.sort(key=lambda t: sort_key(t[1])) + seen_ids = set() + tag_ids = [] + for row in tags: + id = row[0] + if id in seen_ids: + continue + tag_ids.append(id) + seen_ids.add(id) + logger.info( "searching tags", search=name, limit=limit, statement=str(query), - results=len(tags), + results=len(tag_ids), ) - tags.sort(key=lambda t: t[1].lower()) if limit <= 0: - limit = len(tags) - tag_ids = [t[0] for t in tags[:limit]] + limit = len(tag_ids) + tag_ids = tag_ids[:limit] hierarchy = self.get_tag_hierarchy(tag_ids) - direct_tags = {hierarchy.pop(id) for id in tag_ids} - ancestor_tags = set(hierarchy.values()) - return [direct_tags, ancestor_tags] + direct_tags = [hierarchy.pop(id) for id in tag_ids] + ancestor_tags = list(hierarchy.values()) + ancestor_tags.sort(key=lambda t: sort_key(t.name)) + return direct_tags, ancestor_tags def update_entry_path(self, entry_id: int | Entry, path: Path) -> bool: """Set the path field of an entry. diff --git a/src/tagstudio/qt/mixed/tag_search.py b/src/tagstudio/qt/mixed/tag_search.py index 53990c378..9a825dc62 100644 --- a/src/tagstudio/qt/mixed/tag_search.py +++ b/src/tagstudio/qt/mixed/tag_search.py @@ -218,32 +218,15 @@ def update_tags(self, query: str | None = None): self.scroll_layout.takeAt(self.scroll_layout.count() - 1).widget().deleteLater() self.create_button_in_layout = False - # Get results for the search query - query_lower = "" if not query else query.lower() # Only use the tag limit if it's an actual number (aka not "All Tags") tag_limit = TagSearchPanel.tag_limit if isinstance(TagSearchPanel.tag_limit, int) else -1 - tag_results: list[set[Tag]] = self.lib.search_tags(name=query, limit=tag_limit) - if self.exclude: - tag_results[0] = {t for t in tag_results[0] if t.id not in self.exclude} - tag_results[1] = {t for t in tag_results[1] if t.id not in self.exclude} - - # Sort and prioritize the results - results_0 = list(tag_results[0]) - results_0.sort(key=lambda tag: tag.name.lower()) - results_1 = list(tag_results[1]) - results_1.sort(key=lambda tag: tag.name.lower()) - raw_results = list(results_0 + results_1) - priority_results: set[Tag] = set() - all_results: list[Tag] = [] + direct_tags, ancestor_tags = self.lib.search_tags(name=query, limit=tag_limit) - if query and query.strip(): - for tag in raw_results: - if tag.name.lower().startswith(query_lower): - priority_results.add(tag) + all_results = [t for t in direct_tags if t.id not in self.exclude] + for tag in ancestor_tags: + if tag.id not in self.exclude: + all_results.append(tag) - all_results = sorted(list(priority_results), key=lambda tag: len(tag.name)) + [ - r for r in raw_results if r not in priority_results - ] if tag_limit > 0: all_results = all_results[:tag_limit] From a1dd8d48984bd17d7045064654dae400b9ef7fba Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Sun, 14 Sep 2025 14:47:22 -0700 Subject: [PATCH 4/8] tag_search: order shorter tag names first --- src/tagstudio/core/library/alchemy/library.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index eb8917f05..5157de64e 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -1073,7 +1073,7 @@ def search_tags(self, name: str | None, limit: int = 100) -> tuple[list[Tag], li name = name.lower() def sort_key(text: str): - return (not text.startswith(name), text) + return (not text.startswith(name), len(text), text) with Session(self.engine) as session: query = select(Tag.id, Tag.name) From 30d9403452cd2ec04ecf155ef9dbe551b9699a76 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Sun, 14 Sep 2025 15:11:55 -0700 Subject: [PATCH 5/8] update tests --- tests/test_library.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_library.py b/tests/test_library.py index 447344512..111e36116 100644 --- a/tests/test_library.py +++ b/tests/test_library.py @@ -131,10 +131,10 @@ def test_library_search(library: Library, entry_full: Entry): def test_tag_search(library: Library): tag = library.tags[0] - assert library.search_tags(tag.name.lower()) - assert library.search_tags(tag.name.upper()) - assert library.search_tags(tag.name[2:-2]) - assert library.search_tags(tag.name * 2) == [set(), set()] + assert library.search_tags(tag.name.lower())[0] + assert library.search_tags(tag.name.upper())[0] + assert library.search_tags(tag.name[2:-2])[0] + assert library.search_tags(tag.name * 2) == ([], []) def test_get_entry(library: Library, entry_min: Entry): From 06dc5e858a500a06302e5bd0ed9898cf5605b748 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Tue, 16 Sep 2025 10:50:59 -0700 Subject: [PATCH 6/8] cleanup --- src/tagstudio/core/library/alchemy/library.py | 11 +++++------ src/tagstudio/qt/mixed/tag_search.py | 4 +--- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index 5157de64e..39d86e156 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -1098,12 +1098,11 @@ def sort_key(text: str): tags.sort(key=lambda t: sort_key(t[1])) seen_ids = set() tag_ids = [] - for row in tags: - id = row[0] - if id in seen_ids: - continue - tag_ids.append(id) - seen_ids.add(id) + # Use order from Tag.name or TagAlias.name depending on which comes first for each tag. + for id, _ in tags: + if id not in seen_ids: + seen_ids.add(id) + tag_ids.append(id) logger.info( "searching tags", diff --git a/src/tagstudio/qt/mixed/tag_search.py b/src/tagstudio/qt/mixed/tag_search.py index 9a825dc62..a0f2b0402 100644 --- a/src/tagstudio/qt/mixed/tag_search.py +++ b/src/tagstudio/qt/mixed/tag_search.py @@ -223,9 +223,7 @@ def update_tags(self, query: str | None = None): direct_tags, ancestor_tags = self.lib.search_tags(name=query, limit=tag_limit) all_results = [t for t in direct_tags if t.id not in self.exclude] - for tag in ancestor_tags: - if tag.id not in self.exclude: - all_results.append(tag) + all_results.extend(t for t in ancestor_tags if t.id not in self.exclude) if tag_limit > 0: all_results = all_results[:tag_limit] From 9ff33648a0d4b4c97d30aa7ac2a4195c39b8d0a2 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Tue, 16 Sep 2025 10:54:05 -0700 Subject: [PATCH 7/8] tag_search: use same sorting order when returning all tags --- src/tagstudio/core/library/alchemy/library.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index 39d86e156..0cd153893 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -9,6 +9,7 @@ import re import shutil +import sys import time import unicodedata from collections.abc import Iterable, Iterator @@ -1073,13 +1074,15 @@ def search_tags(self, name: str | None, limit: int = 100) -> tuple[list[Tag], li name = name.lower() def sort_key(text: str): - return (not text.startswith(name), len(text), text) + priority = text.startswith(name) + p_ordering = len(text) if priority else sys.maxsize + return (not priority, p_ordering, text) with Session(self.engine) as session: query = select(Tag.id, Tag.name) if limit > 0 and not name: - query = query.limit(limit).order_by(func.lower(Tag.name)) + query = query.order_by(Tag.name).limit(limit) if name: query = query.where( From dba9e24c6c4bcbe6e55ba7dc65139479f659fe03 Mon Sep 17 00:00:00 2001 From: Bob Bobs Date: Tue, 16 Sep 2025 12:36:40 -0700 Subject: [PATCH 8/8] use dict for deduplicating tags --- src/tagstudio/core/library/alchemy/library.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/tagstudio/core/library/alchemy/library.py b/src/tagstudio/core/library/alchemy/library.py index 0cd153893..66ead4e6e 100644 --- a/src/tagstudio/core/library/alchemy/library.py +++ b/src/tagstudio/core/library/alchemy/library.py @@ -1099,13 +1099,9 @@ def sort_key(text: str): tags.extend(session.execute(query)) tags.sort(key=lambda t: sort_key(t[1])) - seen_ids = set() - tag_ids = [] # Use order from Tag.name or TagAlias.name depending on which comes first for each tag. - for id, _ in tags: - if id not in seen_ids: - seen_ids.add(id) - tag_ids.append(id) + # Value=0 to avoid unnecessary copying of tag names. + tag_ids = list(dict((id, 0) for id, _ in tags).keys()) logger.info( "searching tags",