diff --git a/migrations/versions/1038c2174f5d_make_case_insensitive_hash_of_query_text.py b/migrations/versions/1038c2174f5d_make_case_insensitive_hash_of_query_text.py new file mode 100644 index 0000000000..c872a918b9 --- /dev/null +++ b/migrations/versions/1038c2174f5d_make_case_insensitive_hash_of_query_text.py @@ -0,0 +1,51 @@ +"""Make case insensitive hash of query text + +Revision ID: 1038c2174f5d +Revises: fd4fc850d7ea +Create Date: 2023-07-16 23:10:12.885949 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.sql import table + +from redash.utils import gen_query_hash + +# revision identifiers, used by Alembic. +revision = '1038c2174f5d' +down_revision = 'fd4fc850d7ea' +branch_labels = None +depends_on = None + + + +def change_query_hash(conn, table, query_text_to): + for record in conn.execute(table.select()): + query_text = query_text_to(record.query) + conn.execute( + table + .update() + .where(table.c.id == record.id) + .values(query_hash=gen_query_hash(query_text))) + + +def upgrade(): + queries = table( + 'queries', + sa.Column('id', sa.Integer, primary_key=True), + sa.Column('query', sa.Text), + sa.Column('query_hash', sa.String(length=10))) + + conn = op.get_bind() + change_query_hash(conn, queries, query_text_to=str) + + +def downgrade(): + queries = table( + 'queries', + sa.Column('id', sa.Integer, primary_key=True), + sa.Column('query', sa.Text), + sa.Column('query_hash', sa.String(length=10))) + + conn = op.get_bind() + change_query_hash(conn, queries, query_text_to=str.lower) diff --git a/redash/utils/__init__.py b/redash/utils/__init__.py index 0dad9111a6..5c81e43a64 100644 --- a/redash/utils/__init__.py +++ b/redash/utils/__init__.py @@ -51,14 +51,14 @@ def slugify(s): def gen_query_hash(sql): """Return hash of the given query after stripping all comments, line breaks - and multiple spaces, and lower casing all text. + and multiple spaces. - TODO: possible issue - the following queries will get the same id: + The following queries will get different ids: 1. SELECT 1 FROM table WHERE column='Value'; 2. SELECT 1 FROM table where column='value'; """ sql = COMMENTS_REGEX.sub("", sql) - sql = "".join(sql.split()).lower() + sql = "".join(sql.split()) return hashlib.md5(sql.encode("utf-8")).hexdigest()