From 2df3731780665f65090fdcd925c1f79b461d68e4 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Thu, 13 Apr 2023 10:51:29 +0200 Subject: [PATCH 01/36] Creating postgres migration script and starting to set up to detect database --- ...7.04_b3dbb554ba53_postgres_fuzzy_search.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py new file mode 100644 index 0000000000..dd6db10c44 --- /dev/null +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -0,0 +1,35 @@ +"""postgres fuzzy search + +Revision ID: b3dbb554ba53 +Revises: 38514b39a824 +Create Date: 2023-04-13 06:47:04.617131 + +""" +import sqlalchemy as sa + +import mealie.db.migration_types +from alembic import op +import alembic.context as context +from mealie.core.config import get_app_settings + +# revision identifiers, used by Alembic. +revision = "b3dbb554ba53" +down_revision = "38514b39a824" +branch_labels = None +depends_on = None + + +def get_db_type(): + return "sqlite" if "sqlite" in self.settings.DB_URL else "postgres" + + +def upgrade(): + dbtype = get_db_type() + if dbtype == "postgres": + pass + else: + pass + + +def downgrade(): + pass From 72e70f5325f61ef714de9bb5c09c9ad1a4976bc7 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Thu, 13 Apr 2023 11:37:34 +0200 Subject: [PATCH 02/36] non-working placeholders for postgres pg_tgrm --- ...2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index dd6db10c44..c29b95d515 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -23,6 +23,11 @@ def get_db_type(): return "sqlite" if "sqlite" in self.settings.DB_URL else "postgres" +# def setup_pg_tgrm(): +# db.session.execute('SET pg_trgm.similarity_threshold = 0.7;') +# CREATE EXTENSION pg_trgm; + + def upgrade(): dbtype = get_db_type() if dbtype == "postgres": From a8b7ea8a1b8291d45acd0d01327658dafd9779bd Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Thu, 13 Apr 2023 13:50:38 +0000 Subject: [PATCH 03/36] First draft of some indexes --- ...7.04_b3dbb554ba53_postgres_fuzzy_search.py | 78 +++++++++++++++++-- 1 file changed, 70 insertions(+), 8 deletions(-) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index c29b95d515..1df75e5876 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -20,21 +20,83 @@ def get_db_type(): - return "sqlite" if "sqlite" in self.settings.DB_URL else "postgres" + return op.get_context().dialect.name -# def setup_pg_tgrm(): -# db.session.execute('SET pg_trgm.similarity_threshold = 0.7;') -# CREATE EXTENSION pg_trgm; +def setup_postgres_trigrams(): + op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + op.execute("SET pg_trgm.word_similarity_threshold = 0.7;") + # text <% text + op.create_index( + "ix_recipe_name_gin", + table_name="recipe", + columns=["name"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_description_gin", + table_name="recipe", + columns=["description"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "description": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_ingredients_note_gin", + table_name="recipe_instructions", + columns=["note"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "note": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_ingredients_description_gin", + table_name="recipe_instructions", + columns=["original_text"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "original_text": "gin_trgm_ops", + }, + ) + + +def remove_postgres_trigrams(): + op.drop_index("ix_recipe_name_gin", table_name="recipe") + op.drop_index("ix_recipe_description_gin", table_name="recipe") + op.drop_index("ix_recipe_ingredients_note_gin", table_name="recipe_instructions") + op.drop_index("ix_recipe_ingredients_description_gin", table_name="recipe_instructions") + + +def setup_sqlite_trigrams(): + pass + + +def remove_sqlite_trigrams(): + pass def upgrade(): - dbtype = get_db_type() - if dbtype == "postgres": - pass + if get_db_type() == "postgres": + setup_postgres_trigrams() + elif get_db_type() == "sqlite": + setup_sqlite_trigrams() else: pass def downgrade(): - pass + if get_db_type() == "postgres": + remove_postgres_trigrams() + elif get_db_type() == "sqlite": + remove_sqlite_trigrams() + else: + pass From 80d1b5be47f8090b9ea9a9ffcaa9923219dc3423 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Fri, 14 Apr 2023 07:58:05 +0000 Subject: [PATCH 04/36] non-working commit of postgres indexing --- mealie/db/models/recipe/recipe.py | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 2f97981606..eb03126c4e 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -192,6 +192,59 @@ def __init__( if description is not None: self.description_normalized = unidecode(description).lower().strip() + if session.get_bind().name == "postgres": + __table_args__ = ( + sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), + sa.Index( + "ix_recipe_instructions_text", + "recipe_instructions", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_name", + "name", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_description", + "description", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_ingredients_note", + "recipe_ingredients", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_ingredients_original_text", + "recipes_ingredients", + ["original_text"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + ) + else: + __table_args__ = (sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),) + @event.listens_for(RecipeModel.name, "set") def receive_name(target: RecipeModel, value: str, oldvalue, initiator): From 3fe9da0c10b17d5858974e2058a5ffc335a4fc7e Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Fri, 14 Apr 2023 09:12:31 +0000 Subject: [PATCH 05/36] Further non-working edits to db-centric fuzzy search --- mealie/db/models/recipe/ingredient.py | 38 +++++++++++++++++++++- mealie/db/models/recipe/recipe.py | 46 +++++++++++++++++---------- mealie/repos/repository_recipes.py | 2 ++ 3 files changed, 68 insertions(+), 18 deletions(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index d3c8c1c873..643b6b0691 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -1,5 +1,6 @@ from typing import TYPE_CHECKING +import sqlalchemy as sa from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm from sqlalchemy.orm import Mapped, mapped_column from text_unidecode import unidecode @@ -87,7 +88,7 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins): original_text_normalized: Mapped[str | None] = mapped_column(String, index=True) @auto_init() - def __init__(self, note: str | None = None, orginal_text: str | None = None, **_) -> None: + def __init__(self, session, note: str | None = None, orginal_text: str | None = None, **_) -> None: # SQLAlchemy events do not seem to register things that are set during auto_init if note is not None: self.note_normalized = unidecode(note).lower().strip() @@ -95,6 +96,41 @@ def __init__(self, note: str | None = None, orginal_text: str | None = None, **_ if orginal_text is not None: self.orginal_text = unidecode(orginal_text).lower().strip() + if session.get_bind().name == "postgres": + self.__table_args__ = ( + sa.Index( + "ix_recipes_ingredients_note_normalized", + "recipe_ingredients", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized", + "recipes_ingredients", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name": "gin_trgm_ops", + }, + ), + ) + else: # sqlite case + self.__table_args__ = ( + sa.Index( + "ix_recipes_ingredients_note_normalized", + "recipe_ingredients", + unique=False, + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized", + "recipes_ingredients", + unique=False, + ), + ) + @event.listens_for(RecipeIngredientModel.note, "set") def receive_note(target: RecipeIngredientModel, value: str, oldvalue, initiator): diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index eb03126c4e..cd8f076655 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -193,11 +193,11 @@ def __init__( self.description_normalized = unidecode(description).lower().strip() if session.get_bind().name == "postgres": - __table_args__ = ( + self.__table_args__ = ( sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), sa.Index( - "ix_recipe_instructions_text", - "recipe_instructions", + "ix_recipes_name_normalized", + "name_normalized", unique=False, postgresql_using="gin", postgresql_ops={ @@ -205,8 +205,8 @@ def __init__( }, ), sa.Index( - "ix_recipes_name", - "name", + "ix_recipes_description_normalized", + "description_normalized", unique=False, postgresql_using="gin", postgresql_ops={ @@ -214,8 +214,8 @@ def __init__( }, ), sa.Index( - "ix_recipes_description", - "description", + "ix_recipes_ingredients_note_normalized", + "recipe_ingredients", unique=False, postgresql_using="gin", postgresql_ops={ @@ -223,27 +223,39 @@ def __init__( }, ), sa.Index( - "ix_recipes_ingredients_note", - "recipe_ingredients", + "ix_recipes_ingredients_original_text_normalized", + "recipes_ingredients", unique=False, postgresql_using="gin", postgresql_ops={ "name": "gin_trgm_ops", }, ), + ) + else: # sqlite case + self.__table_args__ = ( + sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), + sa.Index( + "ix_recipes_name_normalized", + "name_normalized", + unique=False, + ), sa.Index( - "ix_recipes_ingredients_original_text", + "ix_recipes_description_normalized", + "description_normalized", + unique=False, + ), + sa.Index( + "ix_recipes_ingredients_note_normalized", + "recipe_ingredients", + unique=False, + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized", "recipes_ingredients", - ["original_text"], unique=False, - postgresql_using="gin", - postgresql_ops={ - "name": "gin_trgm_ops", - }, ), ) - else: - __table_args__ = (sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),) @event.listens_for(RecipeModel.name, "set") diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 947b75a334..1f543e96de 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -154,6 +154,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: normalized_search = unidecode(search).lower().strip() # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is + if self.session.get_bind().name == "postgres": + pass ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( From 04f9896f5f212a8496859c1b1be73f4446790c96 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Fri, 14 Apr 2023 11:31:06 +0000 Subject: [PATCH 06/36] update alembic for extensions --- ...7.04_b3dbb554ba53_postgres_fuzzy_search.py | 51 ++----------------- 1 file changed, 3 insertions(+), 48 deletions(-) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index 1df75e5876..7fbcc3e7c1 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -25,63 +25,18 @@ def get_db_type(): def setup_postgres_trigrams(): op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") - op.execute("SET pg_trgm.word_similarity_threshold = 0.7;") - # text <% text - op.create_index( - "ix_recipe_name_gin", - table_name="recipe", - columns=["name"], - unique=False, - postgresql_using="gin", - postgresql_ops={ - "name": "gin_trgm_ops", - }, - ) - op.create_index( - "ix_recipe_description_gin", - table_name="recipe", - columns=["description"], - unique=False, - postgresql_using="gin", - postgresql_ops={ - "description": "gin_trgm_ops", - }, - ) - op.create_index( - "ix_recipe_ingredients_note_gin", - table_name="recipe_instructions", - columns=["note"], - unique=False, - postgresql_using="gin", - postgresql_ops={ - "note": "gin_trgm_ops", - }, - ) - op.create_index( - "ix_recipe_ingredients_description_gin", - table_name="recipe_instructions", - columns=["original_text"], - unique=False, - postgresql_using="gin", - postgresql_ops={ - "original_text": "gin_trgm_ops", - }, - ) def remove_postgres_trigrams(): - op.drop_index("ix_recipe_name_gin", table_name="recipe") - op.drop_index("ix_recipe_description_gin", table_name="recipe") - op.drop_index("ix_recipe_ingredients_note_gin", table_name="recipe_instructions") - op.drop_index("ix_recipe_ingredients_description_gin", table_name="recipe_instructions") + op.execute("DROP EXTENSION IF EXISTS pg_trgm;") def setup_sqlite_trigrams(): - pass + op.execute("CREATE VIRTUAL TABLE IF NOT EXISTS email USING fts5(sender, title, body);") def remove_sqlite_trigrams(): - pass + op.execute("DROP VIRTUAL TABLE IF EXISTS email USING fts5(sender, title, body);") def upgrade(): From 54c1cacdbb94a18b337ee272b2d4444611c6d169 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Fri, 14 Apr 2023 15:53:39 +0000 Subject: [PATCH 07/36] More non-working setup --- mealie/db/db_setup.py | 7 +++++++ mealie/db/init_db.py | 8 ++++++++ mealie/repos/repository_recipes.py | 1 + 3 files changed, 16 insertions(+) diff --git a/mealie/db/db_setup.py b/mealie/db/db_setup.py index e70f36849e..d29a26eac8 100644 --- a/mealie/db/db_setup.py +++ b/mealie/db/db_setup.py @@ -4,6 +4,7 @@ import sqlalchemy as sa from sqlalchemy.orm import sessionmaker from sqlalchemy.orm.session import Session +from sqlalchemy import event from mealie.core.config import get_app_settings @@ -17,6 +18,12 @@ def sql_global_init(db_url: str): engine = sa.create_engine(db_url, echo=False, connect_args=connect_args, pool_pre_ping=True, future=True) + @event.listens_for(engine, "connect") + def receive_connect(connection, _) -> None: + connection.enable_load_extension(True) + connection.execute("SELECT load_extension('fts5');") + connection.enable_load_extension(False) + SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine, future=True) return SessionLocal, engine diff --git a/mealie/db/init_db.py b/mealie/db/init_db.py index 30f67e55de..c7a7365232 100644 --- a/mealie/db/init_db.py +++ b/mealie/db/init_db.py @@ -88,6 +88,14 @@ def main(): alembic_cfg = Config(str(PROJECT_DIR / "alembic.ini")) if db_is_at_head(alembic_cfg): logger.debug("Migration not needed.") + # if session.get_bind().dialect.name == "sqlite": + # session.enable_load_extension(True) + # session.execute("SELECT load_extension('fts5');") + # session.enable_load_extension(False) + + if session.get_bind().name == "sqlite": + session.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + else: logger.info("Migration needed. Performing migration...") command.upgrade(alembic_cfg, "head") diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 1f543e96de..7ba748caea 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -156,6 +156,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is if self.session.get_bind().name == "postgres": pass + # Product.name.op('%>')(word) ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( From d4c72567f25efc251f50e09f18577cfd4d77a1c9 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sat, 15 Apr 2023 07:23:08 +0200 Subject: [PATCH 08/36] Move db type check to init_db --- mealie/db/db_setup.py | 6 ------ mealie/db/init_db.py | 11 +++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/mealie/db/db_setup.py b/mealie/db/db_setup.py index d29a26eac8..a30dd8d77f 100644 --- a/mealie/db/db_setup.py +++ b/mealie/db/db_setup.py @@ -18,12 +18,6 @@ def sql_global_init(db_url: str): engine = sa.create_engine(db_url, echo=False, connect_args=connect_args, pool_pre_ping=True, future=True) - @event.listens_for(engine, "connect") - def receive_connect(connection, _) -> None: - connection.enable_load_extension(True) - connection.execute("SELECT load_extension('fts5');") - connection.enable_load_extension(False) - SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine, future=True) return SessionLocal, engine diff --git a/mealie/db/init_db.py b/mealie/db/init_db.py index c7a7365232..41f95cb791 100644 --- a/mealie/db/init_db.py +++ b/mealie/db/init_db.py @@ -88,14 +88,6 @@ def main(): alembic_cfg = Config(str(PROJECT_DIR / "alembic.ini")) if db_is_at_head(alembic_cfg): logger.debug("Migration not needed.") - # if session.get_bind().dialect.name == "sqlite": - # session.enable_load_extension(True) - # session.execute("SELECT load_extension('fts5');") - # session.enable_load_extension(False) - - if session.get_bind().name == "sqlite": - session.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") - else: logger.info("Migration needed. Performing migration...") command.upgrade(alembic_cfg, "head") @@ -110,6 +102,9 @@ def main(): safe_try(lambda: fix_slug_food_names(db)) + if session.get_bind().name == "postgresql": # needed for fuzzy search and fast GIN text indices + session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + if __name__ == "__main__": main() From 853c43f59be0bb9d7b667619145b0cf4a4f4d6b6 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sat, 15 Apr 2023 07:23:21 +0200 Subject: [PATCH 09/36] fix typo in db name check --- mealie/db/models/recipe/ingredient.py | 2 +- mealie/db/models/recipe/recipe.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 643b6b0691..b82161288f 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -96,7 +96,7 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = if orginal_text is not None: self.orginal_text = unidecode(orginal_text).lower().strip() - if session.get_bind().name == "postgres": + if session.get_bind().name == "postgresql": self.__table_args__ = ( sa.Index( "ix_recipes_ingredients_note_normalized", diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index cd8f076655..27995230c7 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -192,7 +192,7 @@ def __init__( if description is not None: self.description_normalized = unidecode(description).lower().strip() - if session.get_bind().name == "postgres": + if session.get_bind().name == "postgresql": self.__table_args__ = ( sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), sa.Index( From 86c6fca774367de2741c5c9c5074742857d858b7 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sat, 15 Apr 2023 23:42:07 +0200 Subject: [PATCH 10/36] Add sqlite token search and postgres full text search --- mealie/db/models/recipe/ingredient.py | 30 ++++++++---- mealie/db/models/recipe/recipe.py | 30 +++--------- mealie/repos/repository_recipes.py | 67 +++++++++++++++++++-------- 3 files changed, 76 insertions(+), 51 deletions(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index b82161288f..7620c90ae7 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING import sqlalchemy as sa -from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm +from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm, func from sqlalchemy.orm import Mapped, mapped_column from text_unidecode import unidecode @@ -100,33 +100,45 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = self.__table_args__ = ( sa.Index( "ix_recipes_ingredients_note_normalized", - "recipe_ingredients", + "note_normalized", unique=False, postgresql_using="gin", postgresql_ops={ - "name": "gin_trgm_ops", + "note_normalized": "gin_trgm_ops", }, ), sa.Index( "ix_recipes_ingredients_original_text_normalized", - "recipes_ingredients", + "original_text", unique=False, postgresql_using="gin", postgresql_ops={ - "name": "gin_trgm_ops", + "original_text": "gin_trgm_ops", }, ), + sa.Index( + "ix_recipes_ingredients_note_normalized", + func.to_tsvector("english", self.note_normalized), + unique=False, + postgresql_using="gin", + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized_fulltext", + func.to_tsvector("english", self.original_text), + unique=False, + postgresql_using="gin", + ), ) else: # sqlite case self.__table_args__ = ( sa.Index( "ix_recipes_ingredients_note_normalized", - "recipe_ingredients", + "note_normalized", unique=False, ), sa.Index( "ix_recipes_ingredients_original_text_normalized", - "recipes_ingredients", + "original_text_normalized", unique=False, ), ) @@ -135,9 +147,9 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = @event.listens_for(RecipeIngredientModel.note, "set") def receive_note(target: RecipeIngredientModel, value: str, oldvalue, initiator): if value is not None: - target.name_normalized = unidecode(value).lower().strip() + target.note_normalized = unidecode(value).lower().strip() else: - target.name_normalized = None + target.note_normalized = None @event.listens_for(RecipeIngredientModel.original_text, "set") diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 27995230c7..51cba8354d 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -3,7 +3,7 @@ import sqlalchemy as sa import sqlalchemy.orm as orm -from sqlalchemy import event +from sqlalchemy import event, func from sqlalchemy.ext.orderinglist import ordering_list from sqlalchemy.orm import Mapped, mapped_column, validates from text_unidecode import unidecode @@ -201,7 +201,7 @@ def __init__( unique=False, postgresql_using="gin", postgresql_ops={ - "name": "gin_trgm_ops", + "name_normalized": "gin_trgm_ops", }, ), sa.Index( @@ -210,26 +210,20 @@ def __init__( unique=False, postgresql_using="gin", postgresql_ops={ - "name": "gin_trgm_ops", + "description_normalized": "gin_trgm_ops", }, ), sa.Index( - "ix_recipes_ingredients_note_normalized", - "recipe_ingredients", + "ix_recipes_name_normalized_fulltext", + func.to_tsvector("english", self.name_normalized), unique=False, postgresql_using="gin", - postgresql_ops={ - "name": "gin_trgm_ops", - }, ), sa.Index( - "ix_recipes_ingredients_original_text_normalized", - "recipes_ingredients", + "ix_recipes_description_normalized_fulltext", + func.to_tsvector("english", self.description_normalized), unique=False, postgresql_using="gin", - postgresql_ops={ - "name": "gin_trgm_ops", - }, ), ) else: # sqlite case @@ -245,16 +239,6 @@ def __init__( "description_normalized", unique=False, ), - sa.Index( - "ix_recipes_ingredients_note_normalized", - "recipe_ingredients", - unique=False, - ), - sa.Index( - "ix_recipes_ingredients_original_text_normalized", - "recipes_ingredients", - unique=False, - ), ) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 7ba748caea..b54d66b353 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -152,31 +152,60 @@ def _uuids_for_items(self, items: list[UUID | str] | None, model: type[SqlAlchem def _add_search_to_query(self, query: Select, search: str) -> Select: normalized_search = unidecode(search).lower().strip() + normalized_search_list = normalized_search.split() # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is - if self.session.get_bind().name == "postgres": - pass - # Product.name.op('%>')(word) - ingredient_ids = ( - self.session.execute( - select(RecipeIngredientModel.id).filter( - or_( - RecipeIngredientModel.note_normalized.like(f"%{normalized_search}%"), - RecipeIngredientModel.original_text_normalized.like(f"%{normalized_search}%"), + if self.session.get_bind().name == "postgresql": + ingredient_ids = ( + self.session.execute( + select(RecipeIngredientModel.id).filter( + or_( + RecipeIngredientModel.note_normalized.op("%>")(normalized_search), + RecipeIngredientModel.note_normalized.match(normalized_search), + RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), + RecipeIngredientModel.original_text_normalized.match(normalized_search), + ) ) ) + .scalars() + .all() ) - .scalars() - .all() - ) - - q = query.filter( - or_( - RecipeModel.name_normalized.like(f"%{normalized_search}%"), - RecipeModel.description_normalized.like(f"%{normalized_search}%"), - RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), + else: + ingredient_ids = ( + self.session.execute( + select(RecipeIngredientModel.id).filter( + or_( + *[RecipeIngredientModel.note_normalized.like(f"%{ns}%") for ns in normalized_search_list], + *[ + RecipeIngredientModel.original_text_normalized.like(f"%{ns}%") + for ns in normalized_search_list + ], + ) + ) + ) + .scalars() + .all() ) - ).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%"))) + + if self.session.get_bind().name == "postgresql": + q = query.filter( + or_( + RecipeModel.name_normalized.op("%>")(normalized_search), + RecipeModel.name_normalized.match(normalized_search), + RecipeModel.description_normalized.op("%>")(normalized_search), + RecipeModel.description_normalized.match(normalized_search), + RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), + ) + ).order_by(func.levenshtein(RecipeModel.name_normalized, normalized_search)) + else: + q = query.filter( + or_( + *[RecipeModel.name_normalized.like(f"%{ns}%") for ns in normalized_search_list], + *[RecipeModel.description_normalized.like(f"%{ns}%") for ns in normalized_search_list], + RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), + ) + ).order_by(desc(RecipeModel.name_normalized.like(f"%{normalized_search}%"))) + return q def page_all( From 1378be7e8b0b90bf2528a10be2b0eac64f0502d9 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sat, 15 Apr 2023 23:57:18 +0200 Subject: [PATCH 11/36] reorder search to hit exact matches faster --- mealie/repos/repository_recipes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index b54d66b353..848d64ae34 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -160,10 +160,10 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: self.session.execute( select(RecipeIngredientModel.id).filter( or_( - RecipeIngredientModel.note_normalized.op("%>")(normalized_search), RecipeIngredientModel.note_normalized.match(normalized_search), - RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), RecipeIngredientModel.original_text_normalized.match(normalized_search), + RecipeIngredientModel.note_normalized.op("%>")(normalized_search), + RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), ) ) ) @@ -190,11 +190,11 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: if self.session.get_bind().name == "postgresql": q = query.filter( or_( - RecipeModel.name_normalized.op("%>")(normalized_search), RecipeModel.name_normalized.match(normalized_search), - RecipeModel.description_normalized.op("%>")(normalized_search), RecipeModel.description_normalized.match(normalized_search), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), + RecipeModel.name_normalized.op("%>")(normalized_search), + RecipeModel.description_normalized.op("%>")(normalized_search), ) ).order_by(func.levenshtein(RecipeModel.name_normalized, normalized_search)) else: From 4ddaa8f29283259c755c62e80c362f5d42b24304 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sun, 16 Apr 2023 22:25:51 +0200 Subject: [PATCH 12/36] Add settings and docs for POSTGRES_LANGUAGE (full text search) --- .../installation/backend-config.md | 1 + mealie/core/settings/settings.py | 33 +++++++++++++++++++ template.env | 1 + 3 files changed, 35 insertions(+) diff --git a/docs/docs/documentation/getting-started/installation/backend-config.md b/docs/docs/documentation/getting-started/installation/backend-config.md index 263ba0d879..d4a890633e 100644 --- a/docs/docs/documentation/getting-started/installation/backend-config.md +++ b/docs/docs/documentation/getting-started/installation/backend-config.md @@ -29,6 +29,7 @@ | Variables | Default | Description | | ----------------- | :------: | -------------------------------- | | DB_ENGINE | sqlite | Optional: 'sqlite', 'postgres' | +| POSTGRES_LANGUAGE | english | Postgres language to use for full text search. Can be one of the following: simple, arabic, armenian, basque, catalan, danish, dutch, english, finnish, french, german, greek, hindi, hungarian, indonesian, irish, italian, lithuanian, nepali, norwegian, portuguese, romanian, russian, spanish, swedish, tamil, turkish, yiddish | | POSTGRES_USER | mealie | Postgres database user | | POSTGRES_PASSWORD | mealie | Postgres database password | | POSTGRES_SERVER | postgres | Postgres database server address | diff --git a/mealie/core/settings/settings.py b/mealie/core/settings/settings.py index 37f93738ca..d698cc6238 100644 --- a/mealie/core/settings/settings.py +++ b/mealie/core/settings/settings.py @@ -2,6 +2,7 @@ from pathlib import Path from pydantic import BaseSettings, NoneStr, validator +from typing import Literal from .db_providers import AbstractDBProvider, db_provider_factory @@ -77,6 +78,38 @@ def DB_URL(self) -> str | None: def DB_URL_PUBLIC(self) -> str | None: return self.DB_PROVIDER.db_url_public if self.DB_PROVIDER else None + POSTGRES_LANGUAGE: Literal[ + "simple", + "arabic", + "armenian", + "basque", + "catalan", + "danish", + "dutch", + "english", + "finnish", + "french", + "german", + "greek", + "hindi", + "hungarian", + "indonesian", + "irish", + "italian", + "lithuanian", + "nepali", + "norwegian", + "portuguese", + "romanian", + "russian", + "serbian", + "spanish", + "swedish", + "tamil", + "turkish", + "yiddish", + ] = "english" + DEFAULT_GROUP: str = "Home" DEFAULT_EMAIL: str = "changeme@email.com" DEFAULT_PASSWORD: str = "MyPassword" diff --git a/template.env b/template.env index aad72dba3a..6c3c788130 100644 --- a/template.env +++ b/template.env @@ -16,6 +16,7 @@ API_DOCS=True # Sets the Database type to use. Note that in order for Postgres URI to be created, you must set DB_ENGINE=postgres DB_ENGINE=sqlite # Optional: 'sqlite', 'postgres' +POSTGRES_LANGUAGE=english POSTGRES_USER=mealie POSTGRES_PASSWORD=mealie POSTGRES_SERVER=postgres From 22256af5fe4762fbb4e225e8f419ab246bf29b89 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sun, 16 Apr 2023 22:26:24 +0200 Subject: [PATCH 13/36] Use user-specified POSTGRES_LANGUAGE in search --- mealie/db/models/recipe/ingredient.py | 8 ++++++-- mealie/db/models/recipe/recipe.py | 7 +++++-- mealie/repos/repository_recipes.py | 17 +++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 7620c90ae7..4eb26fdc0a 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -8,6 +8,7 @@ from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase from mealie.db.models.labels import MultiPurposeLabel from mealie.db.models.recipe.api_extras import IngredientFoodExtras, api_extras +from mealie.core.config import get_app_settings from .._model_utils import auto_init from .._model_utils.guid import GUID @@ -97,6 +98,9 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = self.orginal_text = unidecode(orginal_text).lower().strip() if session.get_bind().name == "postgresql": + settings = get_app_settings() + language = settings.POSTGRES_LANGUAGE + self.__table_args__ = ( sa.Index( "ix_recipes_ingredients_note_normalized", @@ -118,13 +122,13 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = ), sa.Index( "ix_recipes_ingredients_note_normalized", - func.to_tsvector("english", self.note_normalized), + func.to_tsvector(language, self.note_normalized), unique=False, postgresql_using="gin", ), sa.Index( "ix_recipes_ingredients_original_text_normalized_fulltext", - func.to_tsvector("english", self.original_text), + func.to_tsvector(language, self.original_text), unique=False, postgresql_using="gin", ), diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 51cba8354d..68b3aaf155 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -9,6 +9,7 @@ from text_unidecode import unidecode from mealie.db.models._model_utils.guid import GUID +from mealie.core.config import get_app_settings from .._model_base import BaseMixins, SqlAlchemyBase from .._model_utils import auto_init @@ -193,6 +194,8 @@ def __init__( self.description_normalized = unidecode(description).lower().strip() if session.get_bind().name == "postgresql": + settings = get_app_settings() + language = settings.POSTGRES_LANGUAGE self.__table_args__ = ( sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), sa.Index( @@ -215,13 +218,13 @@ def __init__( ), sa.Index( "ix_recipes_name_normalized_fulltext", - func.to_tsvector("english", self.name_normalized), + func.to_tsvector(language, self.name_normalized), unique=False, postgresql_using="gin", ), sa.Index( "ix_recipes_description_normalized_fulltext", - func.to_tsvector("english", self.description_normalized), + func.to_tsvector(language, self.description_normalized), unique=False, postgresql_using="gin", ), diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 848d64ae34..3f36d1f139 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -27,6 +27,8 @@ from mealie.schema.recipe.recipe_category import CategoryBase, TagBase from mealie.schema.response.pagination import PaginationQuery +from mealie.core.config import get_app_settings + from ..db.models._model_base import SqlAlchemyBase from ..schema._mealie.mealie_model import extract_uuids from .repository_generic import RepositoryGeneric @@ -156,12 +158,19 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is if self.session.get_bind().name == "postgresql": + settings = get_app_settings() + language = settings.POSTGRES_LANGUAGE + ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( or_( - RecipeIngredientModel.note_normalized.match(normalized_search), - RecipeIngredientModel.original_text_normalized.match(normalized_search), + RecipeIngredientModel.note_normalized.match( + normalized_search, postgresql_regconfig=language + ), + RecipeIngredientModel.original_text_normalized.match( + normalized_search, postgresql_regconfig=language + ), RecipeIngredientModel.note_normalized.op("%>")(normalized_search), RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), ) @@ -190,8 +199,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: if self.session.get_bind().name == "postgresql": q = query.filter( or_( - RecipeModel.name_normalized.match(normalized_search), - RecipeModel.description_normalized.match(normalized_search), + RecipeModel.name_normalized.match(normalized_search, postgresql_regconfig=language), + RecipeModel.description_normalized.match(normalized_search, postgresql_regconfig=language), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), RecipeModel.name_normalized.op("%>")(normalized_search), RecipeModel.description_normalized.op("%>")(normalized_search), From 98f6a28d79aac3a3fdf8eec6665253cc079fa57e Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 07:07:16 +0200 Subject: [PATCH 14/36] fix fuzzy search typo --- mealie/repos/repository_recipes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 3f36d1f139..510ef19e05 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -171,8 +171,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeIngredientModel.original_text_normalized.match( normalized_search, postgresql_regconfig=language ), - RecipeIngredientModel.note_normalized.op("%>")(normalized_search), - RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), + RecipeIngredientModel.note_normalized.op("<%")(normalized_search), + RecipeIngredientModel.original_text_normalized.op("<%")(normalized_search), ) ) ) @@ -202,8 +202,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeModel.name_normalized.match(normalized_search, postgresql_regconfig=language), RecipeModel.description_normalized.match(normalized_search, postgresql_regconfig=language), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), - RecipeModel.name_normalized.op("%>")(normalized_search), - RecipeModel.description_normalized.op("%>")(normalized_search), + RecipeModel.name_normalized.op("<%")(normalized_search), + RecipeModel.description_normalized.op("<%")(normalized_search), ) ).order_by(func.levenshtein(RecipeModel.name_normalized, normalized_search)) else: From e64082f96d71a2ada42faa51c5000f4d3315a962 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 08:57:45 +0200 Subject: [PATCH 15/36] Remove full text search and instead order by trigram match --- .../installation/backend-config.md | 1 - mealie/core/settings/settings.py | 32 ------------------- mealie/repos/repository_recipes.py | 17 ++++------ template.env | 1 - 4 files changed, 7 insertions(+), 44 deletions(-) diff --git a/docs/docs/documentation/getting-started/installation/backend-config.md b/docs/docs/documentation/getting-started/installation/backend-config.md index d4a890633e..263ba0d879 100644 --- a/docs/docs/documentation/getting-started/installation/backend-config.md +++ b/docs/docs/documentation/getting-started/installation/backend-config.md @@ -29,7 +29,6 @@ | Variables | Default | Description | | ----------------- | :------: | -------------------------------- | | DB_ENGINE | sqlite | Optional: 'sqlite', 'postgres' | -| POSTGRES_LANGUAGE | english | Postgres language to use for full text search. Can be one of the following: simple, arabic, armenian, basque, catalan, danish, dutch, english, finnish, french, german, greek, hindi, hungarian, indonesian, irish, italian, lithuanian, nepali, norwegian, portuguese, romanian, russian, spanish, swedish, tamil, turkish, yiddish | | POSTGRES_USER | mealie | Postgres database user | | POSTGRES_PASSWORD | mealie | Postgres database password | | POSTGRES_SERVER | postgres | Postgres database server address | diff --git a/mealie/core/settings/settings.py b/mealie/core/settings/settings.py index d698cc6238..72f3358159 100644 --- a/mealie/core/settings/settings.py +++ b/mealie/core/settings/settings.py @@ -78,38 +78,6 @@ def DB_URL(self) -> str | None: def DB_URL_PUBLIC(self) -> str | None: return self.DB_PROVIDER.db_url_public if self.DB_PROVIDER else None - POSTGRES_LANGUAGE: Literal[ - "simple", - "arabic", - "armenian", - "basque", - "catalan", - "danish", - "dutch", - "english", - "finnish", - "french", - "german", - "greek", - "hindi", - "hungarian", - "indonesian", - "irish", - "italian", - "lithuanian", - "nepali", - "norwegian", - "portuguese", - "romanian", - "russian", - "serbian", - "spanish", - "swedish", - "tamil", - "turkish", - "yiddish", - ] = "english" - DEFAULT_GROUP: str = "Home" DEFAULT_EMAIL: str = "changeme@email.com" DEFAULT_PASSWORD: str = "MyPassword" diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 510ef19e05..5357ffc722 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -165,12 +165,6 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: self.session.execute( select(RecipeIngredientModel.id).filter( or_( - RecipeIngredientModel.note_normalized.match( - normalized_search, postgresql_regconfig=language - ), - RecipeIngredientModel.original_text_normalized.match( - normalized_search, postgresql_regconfig=language - ), RecipeIngredientModel.note_normalized.op("<%")(normalized_search), RecipeIngredientModel.original_text_normalized.op("<%")(normalized_search), ) @@ -199,13 +193,16 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: if self.session.get_bind().name == "postgresql": q = query.filter( or_( - RecipeModel.name_normalized.match(normalized_search, postgresql_regconfig=language), - RecipeModel.description_normalized.match(normalized_search, postgresql_regconfig=language), - RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), RecipeModel.name_normalized.op("<%")(normalized_search), RecipeModel.description_normalized.op("<%")(normalized_search), + RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), ) - ).order_by(func.levenshtein(RecipeModel.name_normalized, normalized_search)) + ).order_by( + func.least( + RecipeModel.name_normalized.op("<->>")(normalized_search), + RecipeModel.description_normalized.op("<->>")(normalized_search), + ) + ) else: q = query.filter( or_( diff --git a/template.env b/template.env index 6c3c788130..aad72dba3a 100644 --- a/template.env +++ b/template.env @@ -16,7 +16,6 @@ API_DOCS=True # Sets the Database type to use. Note that in order for Postgres URI to be created, you must set DB_ENGINE=postgres DB_ENGINE=sqlite # Optional: 'sqlite', 'postgres' -POSTGRES_LANGUAGE=english POSTGRES_USER=mealie POSTGRES_PASSWORD=mealie POSTGRES_SERVER=postgres From 3346811359aebdc8a4033efe5a0cf00da1ac7be4 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 10:15:31 +0200 Subject: [PATCH 16/36] cleaner adding of indices, remove fulltext --- ...7.04_b3dbb554ba53_postgres_fuzzy_search.py | 56 +++++++++--- mealie/db/models/recipe/ingredient.py | 82 ++++++++---------- mealie/db/models/recipe/recipe.py | 85 ++++++++----------- 3 files changed, 116 insertions(+), 107 deletions(-) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index 7fbcc3e7c1..c465e5bf84 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -25,25 +25,59 @@ def get_db_type(): def setup_postgres_trigrams(): op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") + op.create_index( + "ix_recipe_name_normalized_gin", + table_name="recipe", + columns=["name_normalized"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name_normalized": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_description_normalized_gin", + table_name="recipe", + columns=["description_normalized"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "description_normalized": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_ingredients_note_normalized_gin", + table_name="recipe_instructions", + columns=["note_normalized"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "note_normalized": "gin_trgm_ops", + }, + ) + op.create_index( + "ix_recipe_ingredients_original_text_normalized_gin", + table_name="recipe_instructions", + columns=["original_text_normalized"], + unique=False, + postgresql_using="gin", + postgresql_ops={ + "original_text_normalized": "gin_trgm_ops", + }, + ) def remove_postgres_trigrams(): op.execute("DROP EXTENSION IF EXISTS pg_trgm;") - - -def setup_sqlite_trigrams(): - op.execute("CREATE VIRTUAL TABLE IF NOT EXISTS email USING fts5(sender, title, body);") - - -def remove_sqlite_trigrams(): - op.execute("DROP VIRTUAL TABLE IF EXISTS email USING fts5(sender, title, body);") + op.drop_index("ix_recipe_name_normalized_gin", table_name="recipe") + op.drop_index("ix_recipe_description_normalized_gin", table_name="recipe") + op.drop_index("ix_recipe_ingredients_note_normalized_gin", table_name="recipe_instructions") + op.drop_index("ix_recipe_ingredients_original_text_normalized_gin", table_name="recipe_instructions") def upgrade(): if get_db_type() == "postgres": setup_postgres_trigrams() - elif get_db_type() == "sqlite": - setup_sqlite_trigrams() else: pass @@ -51,7 +85,5 @@ def upgrade(): def downgrade(): if get_db_type() == "postgres": remove_postgres_trigrams() - elif get_db_type() == "sqlite": - remove_sqlite_trigrams() else: pass diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 4eb26fdc0a..5c8e921f6c 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -97,55 +97,43 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = if orginal_text is not None: self.orginal_text = unidecode(orginal_text).lower().strip() + tableargs = [ # base set of indices + sa.Index( + "ix_recipes_ingredients_note_normalized", + "note_normalized", + unique=False, + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized", + "original_text_normalized", + unique=False, + ), + ] if session.get_bind().name == "postgresql": - settings = get_app_settings() - language = settings.POSTGRES_LANGUAGE - - self.__table_args__ = ( - sa.Index( - "ix_recipes_ingredients_note_normalized", - "note_normalized", - unique=False, - postgresql_using="gin", - postgresql_ops={ - "note_normalized": "gin_trgm_ops", - }, - ), - sa.Index( - "ix_recipes_ingredients_original_text_normalized", - "original_text", - unique=False, - postgresql_using="gin", - postgresql_ops={ - "original_text": "gin_trgm_ops", - }, - ), - sa.Index( - "ix_recipes_ingredients_note_normalized", - func.to_tsvector(language, self.note_normalized), - unique=False, - postgresql_using="gin", - ), - sa.Index( - "ix_recipes_ingredients_original_text_normalized_fulltext", - func.to_tsvector(language, self.original_text), - unique=False, - postgresql_using="gin", - ), - ) - else: # sqlite case - self.__table_args__ = ( - sa.Index( - "ix_recipes_ingredients_note_normalized", - "note_normalized", - unique=False, - ), - sa.Index( - "ix_recipes_ingredients_original_text_normalized", - "original_text_normalized", - unique=False, - ), + tableargs.extend( + [ + sa.Index( + "ix_recipes_ingredients_note_normalized_gin", + "note_normalized", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "note_normalized": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_ingredients_original_text_normalized_gin", + "original_text", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "original_text": "gin_trgm_ops", + }, + ), + ] ) + # add indices + self.__table_args__ = tuple(tableargs) @event.listens_for(RecipeIngredientModel.note, "set") diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 68b3aaf155..916ffd2a48 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -193,56 +193,45 @@ def __init__( if description is not None: self.description_normalized = unidecode(description).lower().strip() + tableargs = [ # base set of indices + sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), + sa.Index( + "ix_recipes_name_normalized", + "name_normalized", + unique=False, + ), + sa.Index( + "ix_recipes_description_normalized", + "description_normalized", + unique=False, + ), + ] + if session.get_bind().name == "postgresql": - settings = get_app_settings() - language = settings.POSTGRES_LANGUAGE - self.__table_args__ = ( - sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), - sa.Index( - "ix_recipes_name_normalized", - "name_normalized", - unique=False, - postgresql_using="gin", - postgresql_ops={ - "name_normalized": "gin_trgm_ops", - }, - ), - sa.Index( - "ix_recipes_description_normalized", - "description_normalized", - unique=False, - postgresql_using="gin", - postgresql_ops={ - "description_normalized": "gin_trgm_ops", - }, - ), - sa.Index( - "ix_recipes_name_normalized_fulltext", - func.to_tsvector(language, self.name_normalized), - unique=False, - postgresql_using="gin", - ), - sa.Index( - "ix_recipes_description_normalized_fulltext", - func.to_tsvector(language, self.description_normalized), - unique=False, - postgresql_using="gin", - ), - ) - else: # sqlite case - self.__table_args__ = ( - sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), - sa.Index( - "ix_recipes_name_normalized", - "name_normalized", - unique=False, - ), - sa.Index( - "ix_recipes_description_normalized", - "description_normalized", - unique=False, - ), + tableargs.extend( + [ + sa.Index( + "ix_recipes_name_normalized_gin", + "name_normalized", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "name_normalized": "gin_trgm_ops", + }, + ), + sa.Index( + "ix_recipes_description_normalized_gin", + "description_normalized", + unique=False, + postgresql_using="gin", + postgresql_ops={ + "description_normalized": "gin_trgm_ops", + }, + ), + ] ) + # add indices + self.__table_args__ = tuple(tableargs) @event.listens_for(RecipeModel.name, "set") From 41dda03fd516ba194fd814c92698442fd377dfd8 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 10:22:42 +0200 Subject: [PATCH 17/36] Cleanup old import of getting app settings --- mealie/db/models/recipe/ingredient.py | 1 - mealie/db/models/recipe/recipe.py | 1 - 2 files changed, 2 deletions(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 5c8e921f6c..a39e536982 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -8,7 +8,6 @@ from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase from mealie.db.models.labels import MultiPurposeLabel from mealie.db.models.recipe.api_extras import IngredientFoodExtras, api_extras -from mealie.core.config import get_app_settings from .._model_utils import auto_init from .._model_utils.guid import GUID diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 916ffd2a48..27bdda2f99 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -9,7 +9,6 @@ from text_unidecode import unidecode from mealie.db.models._model_utils.guid import GUID -from mealie.core.config import get_app_settings from .._model_base import BaseMixins, SqlAlchemyBase from .._model_utils import auto_init From 53e7fa0e2904982452f0c8ce3aa584892acdb53b Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 10:27:28 +0200 Subject: [PATCH 18/36] Fix typo in index --- mealie/db/models/recipe/ingredient.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index a39e536982..264653436a 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -126,7 +126,7 @@ def __init__(self, session, note: str | None = None, orginal_text: str | None = unique=False, postgresql_using="gin", postgresql_ops={ - "original_text": "gin_trgm_ops", + "original_text_normalized": "gin_trgm_ops", }, ), ] From 236e1663596b8d73e687eceaaaed6318be34ff2b Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 11:54:18 +0200 Subject: [PATCH 19/36] Fix some alembic fuzzy typos --- ...7.04_b3dbb554ba53_postgres_fuzzy_search.py | 28 ++++++++++--------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index c465e5bf84..2c4ccb2624 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -26,8 +26,8 @@ def get_db_type(): def setup_postgres_trigrams(): op.execute("CREATE EXTENSION IF NOT EXISTS pg_trgm;") op.create_index( - "ix_recipe_name_normalized_gin", - table_name="recipe", + "ix_recipes_name_normalized_gin", + table_name="recipes", columns=["name_normalized"], unique=False, postgresql_using="gin", @@ -36,8 +36,8 @@ def setup_postgres_trigrams(): }, ) op.create_index( - "ix_recipe_description_normalized_gin", - table_name="recipe", + "ix_recipes_description_normalized_gin", + table_name="recipes", columns=["description_normalized"], unique=False, postgresql_using="gin", @@ -46,8 +46,8 @@ def setup_postgres_trigrams(): }, ) op.create_index( - "ix_recipe_ingredients_note_normalized_gin", - table_name="recipe_instructions", + "ix_recipes_ingredients_note_normalized_gin", + table_name="recipes_ingredients", columns=["note_normalized"], unique=False, postgresql_using="gin", @@ -56,8 +56,8 @@ def setup_postgres_trigrams(): }, ) op.create_index( - "ix_recipe_ingredients_original_text_normalized_gin", - table_name="recipe_instructions", + "ix_recipes_ingredients_original_text_normalized_gin", + table_name="recipes_ingredients", columns=["original_text_normalized"], unique=False, postgresql_using="gin", @@ -69,14 +69,16 @@ def setup_postgres_trigrams(): def remove_postgres_trigrams(): op.execute("DROP EXTENSION IF EXISTS pg_trgm;") - op.drop_index("ix_recipe_name_normalized_gin", table_name="recipe") - op.drop_index("ix_recipe_description_normalized_gin", table_name="recipe") - op.drop_index("ix_recipe_ingredients_note_normalized_gin", table_name="recipe_instructions") - op.drop_index("ix_recipe_ingredients_original_text_normalized_gin", table_name="recipe_instructions") + op.drop_index("ix_recipes_name_normalized_gin", table_name="recipe") + op.drop_index("ix_recipes_description_normalized_gin", table_name="recipe") + op.drop_index("ix_recipes_ingredients_note_normalized_gin", table_name="recipes_ingredients") + op.drop_index("ix_recipes_ingredients_original_text_normalized_gin", table_name="recipes_ingredients") def upgrade(): - if get_db_type() == "postgres": + print(get_db_type()) + if get_db_type() == "postgresql": + print("Using postgres") setup_postgres_trigrams() else: pass From adce758f954289973aa667980f7fc11cbf22ff8c Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 11:55:13 +0200 Subject: [PATCH 20/36] Remove diagnostic printing from alembic migration --- .../2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py index 2c4ccb2624..159087bc86 100644 --- a/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py +++ b/alembic/versions/2023-04-13-06.47.04_b3dbb554ba53_postgres_fuzzy_search.py @@ -76,9 +76,7 @@ def remove_postgres_trigrams(): def upgrade(): - print(get_db_type()) if get_db_type() == "postgresql": - print("Using postgres") setup_postgres_trigrams() else: pass From 021bb2bf94dadede0843564fe88a9dae870f25b8 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 12:34:36 +0200 Subject: [PATCH 21/36] Fix mixed up commutator for trigram operator and relax criteria --- mealie/repos/repository_recipes.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 5357ffc722..f3d98caa59 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -4,7 +4,7 @@ from pydantic import UUID4 from slugify import slugify -from sqlalchemy import Select, and_, desc, func, or_, select +from sqlalchemy import Select, and_, desc, func, or_, select, text from sqlalchemy.exc import IntegrityError from sqlalchemy.orm import joinedload from text_unidecode import unidecode @@ -158,15 +158,12 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is if self.session.get_bind().name == "postgresql": - settings = get_app_settings() - language = settings.POSTGRES_LANGUAGE - ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( or_( - RecipeIngredientModel.note_normalized.op("<%")(normalized_search), - RecipeIngredientModel.original_text_normalized.op("<%")(normalized_search), + RecipeIngredientModel.note_normalized.op("%>")(normalized_search), + RecipeIngredientModel.original_text_normalized.op("%>")(normalized_search), ) ) ) @@ -191,10 +188,12 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: ) if self.session.get_bind().name == "postgresql": + print("fuzzy searching with postgres") + self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.3;")) q = query.filter( or_( - RecipeModel.name_normalized.op("<%")(normalized_search), - RecipeModel.description_normalized.op("<%")(normalized_search), + RecipeModel.name_normalized.op("%>")(normalized_search), + RecipeModel.description_normalized.op("%>")(normalized_search), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), ) ).order_by( @@ -203,6 +202,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeModel.description_normalized.op("<->>")(normalized_search), ) ) + print(q) else: q = query.filter( or_( From 0ee9d5590cc42bb131851c0e62a2fb14ba41edbf Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 21:01:40 +0200 Subject: [PATCH 22/36] forgot to remove query debug --- mealie/repos/repository_recipes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index f3d98caa59..60cb3e3d98 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -189,7 +189,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: if self.session.get_bind().name == "postgresql": print("fuzzy searching with postgres") - self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.3;")) + self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;")) q = query.filter( or_( RecipeModel.name_normalized.op("%>")(normalized_search), @@ -202,7 +202,6 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeModel.description_normalized.op("<->>")(normalized_search), ) ) - print(q) else: q = query.filter( or_( From 8f1d1daaf1d6e66c97d5db643bd362a9b20d497a Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 22:06:33 +0200 Subject: [PATCH 23/36] sort only on name --- mealie/db/init_db.py | 6 +++--- mealie/repos/repository_recipes.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/mealie/db/init_db.py b/mealie/db/init_db.py index 41f95cb791..5837c735d5 100644 --- a/mealie/db/init_db.py +++ b/mealie/db/init_db.py @@ -85,6 +85,9 @@ def main(): if max_retry == 0: raise ConnectionError("Database connection failed - exiting application.") + if session.get_bind().name == "postgresql": # needed for fuzzy search and fast GIN text indices + session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + alembic_cfg = Config(str(PROJECT_DIR / "alembic.ini")) if db_is_at_head(alembic_cfg): logger.debug("Migration not needed.") @@ -102,9 +105,6 @@ def main(): safe_try(lambda: fix_slug_food_names(db)) - if session.get_bind().name == "postgresql": # needed for fuzzy search and fast GIN text indices - session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - if __name__ == "__main__": main() diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 60cb3e3d98..d3be04e302 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -199,7 +199,6 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: ).order_by( func.least( RecipeModel.name_normalized.op("<->>")(normalized_search), - RecipeModel.description_normalized.op("<->>")(normalized_search), ) ) else: From 81947f495c98e55738b40667c7d6e95e4ec86dd8 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Mon, 17 Apr 2023 22:18:56 +0200 Subject: [PATCH 24/36] token and fuzzy search tests --- .../test_recipe_repository.py | 23 +++++++++++++++---- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index bc424f4eaa..554143b1f1 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -432,14 +432,16 @@ def test_recipe_repo_pagination_by_foods(database: AllRepositories, unique_user: def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): - ingredient_1 = random_string(10) - ingredient_2 = random_string(10) - name_part_1 = random_string(10) + ingredient_1 = "aubergine" + ingredient_2 = "kumquat" + name_part_1 = "Steinbock" + fuzzy_name_part_1 = "Steinbuck" name_1 = f"{name_part_1} soup" - name_part_2 = random_string(10) + name_part_2 = "fiddlehead" name_2 = f"Rustic {name_part_2} stew" name_3 = f"{ingredient_1} Soup" - description_part_1 = random_string(10) + description_part_1 = "string of roses" + misordered_token_description_part_1 = "roses string" recipes = [ Recipe( user_id=unique_user.user_id, @@ -506,3 +508,14 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): print([r.name for r in normalized_result]) assert len(normalized_result) == 1 assert normalized_result[0].name == "Rátàtôuile" + + # Test token separation + token_result = database.recipes.page_all(pagination_query, search=misordered_token_description_part_1).items + assert len(token_result) == 1 + assert token_result[0].name == name_1 + + # Test fuzzy search + if database.session.get_bind().name == "postgresql": + fuzzy_result = database.recipes.page_all(pagination_query, search=fuzzy_name_part_1).items + assert len(fuzzy_result) == 1 + assert fuzzy_result[0].name == name_1 From ba250c4b87d788f083630df05b5a7a16bfe1d877 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 08:06:10 +0200 Subject: [PATCH 25/36] Refactor recipe search test to avoid rare random string cross-matches. --- .../test_recipe_repository.py | 48 ++++++++----------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index 554143b1f1..b04dcb4067 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -432,38 +432,28 @@ def test_recipe_repo_pagination_by_foods(database: AllRepositories, unique_user: def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): - ingredient_1 = "aubergine" - ingredient_2 = "kumquat" - name_part_1 = "Steinbock" - fuzzy_name_part_1 = "Steinbuck" - name_1 = f"{name_part_1} soup" - name_part_2 = "fiddlehead" - name_2 = f"Rustic {name_part_2} stew" - name_3 = f"{ingredient_1} Soup" - description_part_1 = "string of roses" - misordered_token_description_part_1 = "roses string" recipes = [ Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name=name_1, - description=f"My favorite {description_part_1}", + name="Steinbock Soup", + description=f"My favorite horns are delicious", recipe_ingredient=[ - RecipeIngredient(note=ingredient_1), + RecipeIngredient(note="goat"), ], ), Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name=name_2, + name="Fiddlehead Fern Stir Fry", recipe_ingredient=[ - RecipeIngredient(note=ingredient_2), + RecipeIngredient(note="kumquats"), ], ), Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name=name_3, + name="Goat Soup", ), # Test diacritics Recipe( @@ -483,25 +473,25 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert len(empty_result) == 0 # Search by title - title_result = database.recipes.page_all(pagination_query, search=name_part_2).items + title_result = database.recipes.page_all(pagination_query, search="Steinbock").items assert len(title_result) == 1 - assert title_result[0].name == name_2 + assert title_result[0].name == "Steinbock Soup" # Search by description - description_result = database.recipes.page_all(pagination_query, search=description_part_1).items + description_result = database.recipes.page_all(pagination_query, search="horns").items assert len(description_result) == 1 - assert description_result[0].name == name_1 + assert description_result[0].name == "Steinbock Soup" # Search by ingredient - ingredient_result = database.recipes.page_all(pagination_query, search=ingredient_2).items + ingredient_result = database.recipes.page_all(pagination_query, search="kumquats").items assert len(ingredient_result) == 1 - assert ingredient_result[0].name == name_2 + assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry" # Make sure title matches are ordered in front - ordered_result = database.recipes.page_all(pagination_query, search=ingredient_1).items + ordered_result = database.recipes.page_all(pagination_query, search="goat").items assert len(ordered_result) == 2 - assert ordered_result[0].name == name_3 - assert ordered_result[1].name == name_1 + assert ordered_result[0].name == "Goat Soup" + assert ordered_result[1].name == "Steinbock Soup" # Test string normalization normalized_result = database.recipes.page_all(pagination_query, search="ratat").items @@ -510,12 +500,12 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert normalized_result[0].name == "Rátàtôuile" # Test token separation - token_result = database.recipes.page_all(pagination_query, search=misordered_token_description_part_1).items + token_result = database.recipes.page_all(pagination_query, search="delicious horns").items assert len(token_result) == 1 - assert token_result[0].name == name_1 + assert token_result[0].name == "Steinbock Soup" # Test fuzzy search if database.session.get_bind().name == "postgresql": - fuzzy_result = database.recipes.page_all(pagination_query, search=fuzzy_name_part_1).items + fuzzy_result = database.recipes.page_all(pagination_query, search="Steinbuck").items assert len(fuzzy_result) == 1 - assert fuzzy_result[0].name == name_1 + assert fuzzy_result[0].name == "Steinbock Soup" From 605b2def657b43914d8d520ebeeff03dbe507866 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 11:47:40 +0100 Subject: [PATCH 26/36] Add ability to quote parts of search for exact match --- mealie/repos/repository_recipes.py | 30 ++++++++++++++----- .../test_recipe_repository.py | 4 +-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index d3be04e302..4efd9f84d1 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -1,6 +1,7 @@ from collections.abc import Sequence from random import randint from uuid import UUID +import re from pydantic import UUID4 from slugify import slugify @@ -154,10 +155,25 @@ def _uuids_for_items(self, items: list[UUID | str] | None, model: type[SqlAlchem def _add_search_to_query(self, query: Select, search: str) -> Select: normalized_search = unidecode(search).lower().strip() - normalized_search_list = normalized_search.split() + + # keep quoted phrases together as literal portions of the search string + literal_search = False + quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""") # thank you stack exchange! + if quoted_regex.search(normalized_search): + literal_search = True + temp_search = normalized_search + quoted_search_list = [match.group() for match in quoted_regex.finditer(temp_search)] # all quoted strings + temp_search = quoted_regex.sub("", temp_search) + unquoted_search_list = temp_search.split() # all other strings + normalized_search_list = quoted_search_list + unquoted_search_list + normalized_search_list = [re.sub(r"""['"]""", "", x) for x in normalized_search_list] # no more quotes + else: + normalized_search_list = normalized_search.split() + normalized_search_list = [x.strip() for x in normalized_search_list] # user might have whitespace inside quotes + print(normalized_search_list) # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is - if self.session.get_bind().name == "postgresql": + if (self.session.get_bind().name == "postgresql") & (literal_search == False): # fuzzy search ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( @@ -170,7 +186,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: .scalars() .all() ) - else: + else: # exact token search ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( @@ -187,8 +203,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: .all() ) - if self.session.get_bind().name == "postgresql": - print("fuzzy searching with postgres") + if (self.session.get_bind().name == "postgresql") & (literal_search == False): # fuzzy search + # default = 0.7 is too strict for effective fuzzing self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;")) q = query.filter( or_( @@ -196,12 +212,12 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeModel.description_normalized.op("%>")(normalized_search), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), ) - ).order_by( + ).order_by( # trigram ordering could be too slow on million record db, but is fine with thousands. revisit this if giant use cases evolve func.least( RecipeModel.name_normalized.op("<->>")(normalized_search), ) ) - else: + else: # exact token search q = query.filter( or_( *[RecipeModel.name_normalized.like(f"%{ns}%") for ns in normalized_search_list], diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index b04dcb4067..52e67cbb76 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -447,7 +447,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): group_id=unique_user.group_id, name="Fiddlehead Fern Stir Fry", recipe_ingredient=[ - RecipeIngredient(note="kumquats"), + RecipeIngredient(note="moss"), ], ), Recipe( @@ -483,7 +483,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert description_result[0].name == "Steinbock Soup" # Search by ingredient - ingredient_result = database.recipes.page_all(pagination_query, search="kumquats").items + ingredient_result = database.recipes.page_all(pagination_query, search="moss").items assert len(ingredient_result) == 1 assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry" From 54aad083f72c9980b0b469b5d9cbabff6059d99e Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 13:05:27 +0100 Subject: [PATCH 27/36] Remove internal punctuation, unless it's quoted for literal search --- mealie/repos/repository_recipes.py | 40 +++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 4efd9f84d1..46a57bc711 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -154,26 +154,42 @@ def _uuids_for_items(self, items: list[UUID | str] | None, model: type[SqlAlchem return ids + additional_ids def _add_search_to_query(self, query: Select, search: str) -> Select: - normalized_search = unidecode(search).lower().strip() + """ + 0. fuzzy search (postgres only) and tokenized search are performed separately (fuzzy search is inherently tokenized) + 1. take search string and do a little pre-normalization + 2. look for internal quoted strings and keep them together as literal parts of the search + 3. if there are internal quotes, do token search to be sure literals are kept intact + 4. token search looks for any individual exact hit in name, description, and ingredients + 5. fuzzy search looks for trigram hits in name, description, and ingredients + 6. Sort order is determined by closeness to the recipe name + Should tags be added? + """ + normalized_search = unidecode(search).lower().strip() + punctuation = "!\#$%&()*+,-./:;<=>?@[\\]^_`{|}~" # string.punctuation with ' & " removed # keep quoted phrases together as literal portions of the search string - literal_search = False + literal = False quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""") # thank you stack exchange! if quoted_regex.search(normalized_search): - literal_search = True - temp_search = normalized_search - quoted_search_list = [match.group() for match in quoted_regex.finditer(temp_search)] # all quoted strings - temp_search = quoted_regex.sub("", temp_search) - unquoted_search_list = temp_search.split() # all other strings + literal = True + temp = normalized_search + quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)] # all quoted strings + temp = quoted_regex.sub("", temp) # remove all quoted strings + temp = temp.translate( + str.maketrans(punctuation, " " * len(punctuation)) + ) # punctuation->spaces for splitting, but only on unquoted strings + unquoted_search_list = temp.split() # all other strings normalized_search_list = quoted_search_list + unquoted_search_list - normalized_search_list = [re.sub(r"""['"]""", "", x) for x in normalized_search_list] # no more quotes + normalized_search_list = [re.sub(r"""['"]""", "", x) for x in normalized_search_list] # remove quotes else: + # + normalized_search = normalized_search.translate(str.maketrans(punctuation, " " * len(punctuation))) normalized_search_list = normalized_search.split() - normalized_search_list = [x.strip() for x in normalized_search_list] # user might have whitespace inside quotes - print(normalized_search_list) + normalized_search_list = [x.strip() for x in normalized_search_list] # remove padding whitespace inside quotes + # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is - if (self.session.get_bind().name == "postgresql") & (literal_search == False): # fuzzy search + if (self.session.get_bind().name == "postgresql") & (literal == False): # fuzzy search ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( @@ -203,7 +219,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: .all() ) - if (self.session.get_bind().name == "postgresql") & (literal_search == False): # fuzzy search + if (self.session.get_bind().name == "postgresql") & (literal == False): # fuzzy search # default = 0.7 is too strict for effective fuzzing self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;")) q = query.filter( From 6cfe202e2c75e91657b6b3b47b68dc41e98e4f54 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 15:52:19 +0100 Subject: [PATCH 28/36] Add tests for special character removal and literal search --- .../repository_tests/test_recipe_repository.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index 52e67cbb76..d3b0c314b6 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -488,11 +488,22 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry" # Make sure title matches are ordered in front - ordered_result = database.recipes.page_all(pagination_query, search="goat").items + ordered_result = database.recipes.page_all(pagination_query, search="goat soup").items assert len(ordered_result) == 2 assert ordered_result[0].name == "Goat Soup" assert ordered_result[1].name == "Steinbock Soup" + # Test literal search + literal_result = database.recipes.page_all(pagination_query, search='"goat soup"').items + assert len(literal_result) == 1 + assert literal_result[0].name == "Goat Soup" + + # Test special character removal from non-literal searches + character_result = database.recipes.page_all(pagination_query, search="goat-soup").items + assert len(character_result) == 2 + assert character_result[0].name == "Goat Soup" + assert character_result[1].name == "Steinbock Soup" + # Test string normalization normalized_result = database.recipes.page_all(pagination_query, search="ratat").items print([r.name for r in normalized_result]) From dfc602be9b252572a1e80c9c402dfaa86df581b6 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 16:05:14 +0100 Subject: [PATCH 29/36] Remove the outer double quotes from searches, but leave internal single quotes alone. --- mealie/repos/repository_recipes.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 46a57bc711..fec87e516a 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -157,8 +157,8 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: """ 0. fuzzy search (postgres only) and tokenized search are performed separately (fuzzy search is inherently tokenized) 1. take search string and do a little pre-normalization - 2. look for internal quoted strings and keep them together as literal parts of the search - 3. if there are internal quotes, do token search to be sure literals are kept intact + 2. look for internal quoted strings and keep them together as "literal" parts of the search + 3. remove special characters from each non-literal search string 4. token search looks for any individual exact hit in name, description, and ingredients 5. fuzzy search looks for trigram hits in name, description, and ingredients 6. Sort order is determined by closeness to the recipe name @@ -174,19 +174,20 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: literal = True temp = normalized_search quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)] # all quoted strings - temp = quoted_regex.sub("", temp) # remove all quoted strings + quoted_search_list = [ + re.sub(r"""['"](.*)['"]""", "\\1", x) for x in quoted_search_list + ] # remove outer quotes + temp = quoted_regex.sub("", temp) # remove all quoted strings, leaving just non-quoted temp = temp.translate( str.maketrans(punctuation, " " * len(punctuation)) ) # punctuation->spaces for splitting, but only on unquoted strings - unquoted_search_list = temp.split() # all other strings + unquoted_search_list = temp.split() # all unquoted strings normalized_search_list = quoted_search_list + unquoted_search_list - normalized_search_list = [re.sub(r"""['"]""", "", x) for x in normalized_search_list] # remove quotes else: # normalized_search = normalized_search.translate(str.maketrans(punctuation, " " * len(punctuation))) normalized_search_list = normalized_search.split() normalized_search_list = [x.strip() for x in normalized_search_list] # remove padding whitespace inside quotes - # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is if (self.session.get_bind().name == "postgresql") & (literal == False): # fuzzy search From 37b2ba6e6bc9c4d606924aefe12eb556fe6aed3c Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 16:53:37 +0100 Subject: [PATCH 30/36] Update tests to avoid intra-test name collisions --- .../repository_tests/test_recipe_repository.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index d3b0c314b6..9d91088f08 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -439,7 +439,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): name="Steinbock Soup", description=f"My favorite horns are delicious", recipe_ingredient=[ - RecipeIngredient(note="goat"), + RecipeIngredient(note="alpine animal"), ], ), Recipe( @@ -453,7 +453,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name="Goat Soup", + name="Animal Soup", ), # Test diacritics Recipe( @@ -488,20 +488,20 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry" # Make sure title matches are ordered in front - ordered_result = database.recipes.page_all(pagination_query, search="goat soup").items + ordered_result = database.recipes.page_all(pagination_query, search="animal soup").items assert len(ordered_result) == 2 - assert ordered_result[0].name == "Goat Soup" + assert ordered_result[0].name == "Animal Soup" assert ordered_result[1].name == "Steinbock Soup" # Test literal search - literal_result = database.recipes.page_all(pagination_query, search='"goat soup"').items + literal_result = database.recipes.page_all(pagination_query, search='"Animal soup"').items assert len(literal_result) == 1 - assert literal_result[0].name == "Goat Soup" + assert literal_result[0].name == "Animal Soup" # Test special character removal from non-literal searches - character_result = database.recipes.page_all(pagination_query, search="goat-soup").items + character_result = database.recipes.page_all(pagination_query, search="animal-soup").items assert len(character_result) == 2 - assert character_result[0].name == "Goat Soup" + assert character_result[0].name == "Animal Soup" assert character_result[1].name == "Steinbock Soup" # Test string normalization From 2a4a6a1309de5bf4127a73ed4801aee45c0ac82f Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Tue, 18 Apr 2023 23:05:40 +0100 Subject: [PATCH 31/36] Fixing leftovers highlighted by lint --- mealie/core/settings/settings.py | 1 - mealie/db/db_setup.py | 1 - mealie/db/models/recipe/ingredient.py | 2 +- mealie/repos/repository_recipes.py | 12 +++++------- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/mealie/core/settings/settings.py b/mealie/core/settings/settings.py index 72f3358159..37f93738ca 100644 --- a/mealie/core/settings/settings.py +++ b/mealie/core/settings/settings.py @@ -2,7 +2,6 @@ from pathlib import Path from pydantic import BaseSettings, NoneStr, validator -from typing import Literal from .db_providers import AbstractDBProvider, db_provider_factory diff --git a/mealie/db/db_setup.py b/mealie/db/db_setup.py index a30dd8d77f..e70f36849e 100644 --- a/mealie/db/db_setup.py +++ b/mealie/db/db_setup.py @@ -4,7 +4,6 @@ import sqlalchemy as sa from sqlalchemy.orm import sessionmaker from sqlalchemy.orm.session import Session -from sqlalchemy import event from mealie.core.config import get_app_settings diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 264653436a..6fc89754b7 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -1,7 +1,7 @@ from typing import TYPE_CHECKING import sqlalchemy as sa -from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm, func +from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm from sqlalchemy.orm import Mapped, mapped_column from text_unidecode import unidecode diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index fec87e516a..351c7a6cb6 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -28,8 +28,6 @@ from mealie.schema.recipe.recipe_category import CategoryBase, TagBase from mealie.schema.response.pagination import PaginationQuery -from mealie.core.config import get_app_settings - from ..db.models._model_base import SqlAlchemyBase from ..schema._mealie.mealie_model import extract_uuids from .repository_generic import RepositoryGeneric @@ -155,14 +153,14 @@ def _uuids_for_items(self, items: list[UUID | str] | None, model: type[SqlAlchem def _add_search_to_query(self, query: Select, search: str) -> Select: """ - 0. fuzzy search (postgres only) and tokenized search are performed separately (fuzzy search is inherently tokenized) + 0. fuzzy search (postgres only) and tokenized search are performed separately 1. take search string and do a little pre-normalization 2. look for internal quoted strings and keep them together as "literal" parts of the search 3. remove special characters from each non-literal search string 4. token search looks for any individual exact hit in name, description, and ingredients 5. fuzzy search looks for trigram hits in name, description, and ingredients 6. Sort order is determined by closeness to the recipe name - Should tags be added? + Should search also look at tags? """ normalized_search = unidecode(search).lower().strip() @@ -190,7 +188,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: normalized_search_list = [x.strip() for x in normalized_search_list] # remove padding whitespace inside quotes # I would prefer to just do this in the recipe_ingredient.any part of the main query, but it turns out # that at least sqlite wont use indexes for that correctly anymore and takes a big hit, so prefiltering it is - if (self.session.get_bind().name == "postgresql") & (literal == False): # fuzzy search + if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search ingredient_ids = ( self.session.execute( select(RecipeIngredientModel.id).filter( @@ -220,7 +218,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: .all() ) - if (self.session.get_bind().name == "postgresql") & (literal == False): # fuzzy search + if (self.session.get_bind().name == "postgresql") & (literal is False): # fuzzy search # default = 0.7 is too strict for effective fuzzing self.session.execute(text("set pg_trgm.word_similarity_threshold = 0.5;")) q = query.filter( @@ -229,7 +227,7 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: RecipeModel.description_normalized.op("%>")(normalized_search), RecipeModel.recipe_ingredient.any(RecipeIngredientModel.id.in_(ingredient_ids)), ) - ).order_by( # trigram ordering could be too slow on million record db, but is fine with thousands. revisit this if giant use cases evolve + ).order_by( # trigram ordering could be too slow on million record db, but is fine with thousands. func.least( RecipeModel.name_normalized.op("<->>")(normalized_search), ) From e7d7da9957df053e1554d0145d1eaa04169f3545 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Wed, 19 Apr 2023 09:53:59 +0100 Subject: [PATCH 32/36] cleanup linting and mypy errors --- mealie/db/models/recipe/recipe.py | 7 +++++-- mealie/repos/repository_recipes.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index 27bdda2f99..f28d2a961e 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -3,7 +3,7 @@ import sqlalchemy as sa import sqlalchemy.orm as orm -from sqlalchemy import event, func +from sqlalchemy import event from sqlalchemy.ext.orderinglist import ordering_list from sqlalchemy.orm import Mapped, mapped_column, validates from text_unidecode import unidecode @@ -35,7 +35,9 @@ class RecipeModel(SqlAlchemyBase, BaseMixins): __tablename__ = "recipes" - __table_args__ = (sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"),) + __table_args__: tuple[sa.UniqueConstraint, ...] = ( + sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), + ) id: Mapped[GUID] = mapped_column(GUID, primary_key=True, default=GUID.generate) slug: Mapped[str | None] = mapped_column(sa.String, index=True) @@ -192,6 +194,7 @@ def __init__( if description is not None: self.description_normalized = unidecode(description).lower().strip() + # list[sa.UniqueConstraint | sa.Index] = [ tableargs = [ # base set of indices sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), sa.Index( diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index 351c7a6cb6..deb6bf9d00 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -1,7 +1,7 @@ +import re as re from collections.abc import Sequence from random import randint from uuid import UUID -import re from pydantic import UUID4 from slugify import slugify From 0eccb76fabf16733a369c1d1994439201ceca957 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Wed, 19 Apr 2023 10:19:11 +0100 Subject: [PATCH 33/36] Fix test cross-matching on dirty db (leftovers from bulk import) --- .../test_recipe_repository.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/unit_tests/repository_tests/test_recipe_repository.py b/tests/unit_tests/repository_tests/test_recipe_repository.py index 9d91088f08..5ffcc23292 100644 --- a/tests/unit_tests/repository_tests/test_recipe_repository.py +++ b/tests/unit_tests/repository_tests/test_recipe_repository.py @@ -436,7 +436,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name="Steinbock Soup", + name="Steinbock Sloop", description=f"My favorite horns are delicious", recipe_ingredient=[ RecipeIngredient(note="alpine animal"), @@ -453,7 +453,7 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): Recipe( user_id=unique_user.user_id, group_id=unique_user.group_id, - name="Animal Soup", + name="Animal Sloop", ), # Test diacritics Recipe( @@ -475,12 +475,12 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): # Search by title title_result = database.recipes.page_all(pagination_query, search="Steinbock").items assert len(title_result) == 1 - assert title_result[0].name == "Steinbock Soup" + assert title_result[0].name == "Steinbock Sloop" # Search by description description_result = database.recipes.page_all(pagination_query, search="horns").items assert len(description_result) == 1 - assert description_result[0].name == "Steinbock Soup" + assert description_result[0].name == "Steinbock Sloop" # Search by ingredient ingredient_result = database.recipes.page_all(pagination_query, search="moss").items @@ -488,21 +488,21 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): assert ingredient_result[0].name == "Fiddlehead Fern Stir Fry" # Make sure title matches are ordered in front - ordered_result = database.recipes.page_all(pagination_query, search="animal soup").items + ordered_result = database.recipes.page_all(pagination_query, search="animal sloop").items assert len(ordered_result) == 2 - assert ordered_result[0].name == "Animal Soup" - assert ordered_result[1].name == "Steinbock Soup" + assert ordered_result[0].name == "Animal Sloop" + assert ordered_result[1].name == "Steinbock Sloop" # Test literal search - literal_result = database.recipes.page_all(pagination_query, search='"Animal soup"').items + literal_result = database.recipes.page_all(pagination_query, search='"Animal Sloop"').items assert len(literal_result) == 1 - assert literal_result[0].name == "Animal Soup" + assert literal_result[0].name == "Animal Sloop" # Test special character removal from non-literal searches - character_result = database.recipes.page_all(pagination_query, search="animal-soup").items + character_result = database.recipes.page_all(pagination_query, search="animal-sloop").items assert len(character_result) == 2 - assert character_result[0].name == "Animal Soup" - assert character_result[1].name == "Steinbock Soup" + assert character_result[0].name == "Animal Sloop" + assert character_result[1].name == "Steinbock Sloop" # Test string normalization normalized_result = database.recipes.page_all(pagination_query, search="ratat").items @@ -513,10 +513,10 @@ def test_recipe_repo_search(database: AllRepositories, unique_user: TestUser): # Test token separation token_result = database.recipes.page_all(pagination_query, search="delicious horns").items assert len(token_result) == 1 - assert token_result[0].name == "Steinbock Soup" + assert token_result[0].name == "Steinbock Sloop" # Test fuzzy search if database.session.get_bind().name == "postgresql": fuzzy_result = database.recipes.page_all(pagination_query, search="Steinbuck").items assert len(fuzzy_result) == 1 - assert fuzzy_result[0].name == "Steinbock Soup" + assert fuzzy_result[0].name == "Steinbock Sloop" From 7492a7fce799b54fc0c61c10b10f3941b4075146 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Wed, 19 Apr 2023 10:27:59 +0100 Subject: [PATCH 34/36] forgot to cleanup something when debugging mypy errors --- mealie/db/models/recipe/recipe.py | 1 - 1 file changed, 1 deletion(-) diff --git a/mealie/db/models/recipe/recipe.py b/mealie/db/models/recipe/recipe.py index f28d2a961e..b181a23208 100644 --- a/mealie/db/models/recipe/recipe.py +++ b/mealie/db/models/recipe/recipe.py @@ -194,7 +194,6 @@ def __init__( if description is not None: self.description_normalized = unidecode(description).lower().strip() - # list[sa.UniqueConstraint | sa.Index] = [ tableargs = [ # base set of indices sa.UniqueConstraint("slug", "group_id", name="recipe_slug_group_id_key"), sa.Index( From 2095a47c3112eb853911de3a5da0d190dd841af0 Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Wed, 19 Apr 2023 13:14:56 +0100 Subject: [PATCH 35/36] re-order pg_trgm loading in postgres --- mealie/db/init_db.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mealie/db/init_db.py b/mealie/db/init_db.py index 5837c735d5..90dd91a8a2 100644 --- a/mealie/db/init_db.py +++ b/mealie/db/init_db.py @@ -85,9 +85,6 @@ def main(): if max_retry == 0: raise ConnectionError("Database connection failed - exiting application.") - if session.get_bind().name == "postgresql": # needed for fuzzy search and fast GIN text indices - session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) - alembic_cfg = Config(str(PROJECT_DIR / "alembic.ini")) if db_is_at_head(alembic_cfg): logger.debug("Migration not needed.") @@ -95,6 +92,9 @@ def main(): logger.info("Migration needed. Performing migration...") command.upgrade(alembic_cfg, "head") + if session.get_bind().name == "postgresql": # needed for fuzzy search and fast GIN text indices + session.execute(text("CREATE EXTENSION IF NOT EXISTS pg_trgm;")) + db = get_repositories(session) if db.users.get_all(): From dc2d56c1f2671d4337e51d9050dfbb3bcd7daddf Mon Sep 17 00:00:00 2001 From: Jacob Corn Date: Sun, 14 May 2023 06:02:56 +0200 Subject: [PATCH 36/36] address comments --- docs/docs/documentation/getting-started/faq.md | 7 +++++++ mealie/db/models/recipe/ingredient.py | 3 ++- mealie/repos/repository_recipes.py | 5 ++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docs/docs/documentation/getting-started/faq.md b/docs/docs/documentation/getting-started/faq.md index 7b1bf96a11..591e6362bf 100644 --- a/docs/docs/documentation/getting-started/faq.md +++ b/docs/docs/documentation/getting-started/faq.md @@ -68,6 +68,13 @@ Yes, you can install Mealie on your local machine. HOWEVER, it is recommended th - [Advanced Installation](../installation/advanced/) +## What is fuzzy search and how do I use it? +Mealie can use fuzzy search, which is robust to minor typos. For example, searching for "brocolli" will still find your recipe for "broccoli soup". But fuzzy search is only functional on a Postgres database backend. To enable fuzzy search you will need to migrate to Postgres: + +1. Backup your database and download the .zip file (same as when [migrating](./migrating-to-mealie-v1.md)) +2. Set up a [Postgres](./installation/postgres.md) instance of Mealie +3. Upload the backup .zip and click to apply it (as as migration) + ## How i can attach an image or video to a Recipe? Yes. Mealie's Recipe Steps and other fields support the markdown syntax and therefor supports images and videos. To attach an image to the recipe, you can upload it as an asset and use the provided copy button to generate the html image tag required to render the image. For videos, Mealie provides no way to host videos. You'll need to host your videos with another provider and embed them in your recipe. Generally, the video provider will provide a link to the video and the html tag required to render the video. For example, youtube provides the following link that works inside a step. You can adjust the width and height attributes as necessary to ensure a fit. diff --git a/mealie/db/models/recipe/ingredient.py b/mealie/db/models/recipe/ingredient.py index 6fc89754b7..8813603b2d 100644 --- a/mealie/db/models/recipe/ingredient.py +++ b/mealie/db/models/recipe/ingredient.py @@ -3,6 +3,7 @@ import sqlalchemy as sa from sqlalchemy import Boolean, Float, ForeignKey, Integer, String, event, orm from sqlalchemy.orm import Mapped, mapped_column +from sqlalchemy.orm.session import Session from text_unidecode import unidecode from mealie.db.models._model_base import BaseMixins, SqlAlchemyBase @@ -88,7 +89,7 @@ class RecipeIngredientModel(SqlAlchemyBase, BaseMixins): original_text_normalized: Mapped[str | None] = mapped_column(String, index=True) @auto_init() - def __init__(self, session, note: str | None = None, orginal_text: str | None = None, **_) -> None: + def __init__(self, session: Session, note: str | None = None, orginal_text: str | None = None, **_) -> None: # SQLAlchemy events do not seem to register things that are set during auto_init if note is not None: self.note_normalized = unidecode(note).lower().strip() diff --git a/mealie/repos/repository_recipes.py b/mealie/repos/repository_recipes.py index deb6bf9d00..a782daf471 100644 --- a/mealie/repos/repository_recipes.py +++ b/mealie/repos/repository_recipes.py @@ -168,13 +168,12 @@ def _add_search_to_query(self, query: Select, search: str) -> Select: # keep quoted phrases together as literal portions of the search string literal = False quoted_regex = re.compile(r"""(["'])(?:(?=(\\?))\2.)*?\1""") # thank you stack exchange! + removequotes_regex = re.compile(r"""['"](.*)['"]""") if quoted_regex.search(normalized_search): literal = True temp = normalized_search quoted_search_list = [match.group() for match in quoted_regex.finditer(temp)] # all quoted strings - quoted_search_list = [ - re.sub(r"""['"](.*)['"]""", "\\1", x) for x in quoted_search_list - ] # remove outer quotes + quoted_search_list = [removequotes_regex.sub("\\1", x) for x in quoted_search_list] # remove outer quotes temp = quoted_regex.sub("", temp) # remove all quoted strings, leaving just non-quoted temp = temp.translate( str.maketrans(punctuation, " " * len(punctuation))