Skip to content

Commit 3992920

Browse files
authored
perf: Optimize content_type filtering with computed column and indexes (#868)
1 parent c63d6b3 commit 3992920

File tree

3 files changed

+46
-4
lines changed

3 files changed

+46
-4
lines changed
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
"""Add content>>type as field
2+
3+
Revision ID: 83a04f64a1db
4+
Revises: d0e1f2a3b4c5
5+
Create Date: 2025-10-14 09:26:24.239634
6+
7+
"""
8+
from alembic import op
9+
import sqlalchemy as sa
10+
11+
12+
# revision identifiers, used by Alembic.
13+
revision = '83a04f64a1db'
14+
down_revision = 'd0e1f2a3b4c5'
15+
branch_labels = None
16+
depends_on = None
17+
18+
19+
def upgrade() -> None:
20+
op.add_column('messages', sa.Column('content_type', sa.String(), sa.Computed("content->>'type'", persisted=True), nullable=True))
21+
op.create_index('ix_messages_content_type', 'messages', ['content_type'], unique=False)
22+
# Add an index on sender + content.type as content.type are often used with the sender together
23+
op.create_index(
24+
'ix_messages_sender_content_type',
25+
'messages',
26+
['sender', 'content_type'],
27+
unique=False
28+
)
29+
30+
31+
def downgrade() -> None:
32+
op.drop_index('ix_messages_sender_content_type', 'messages')
33+
op.drop_index('ix_messages_content_type', 'messages')
34+
op.drop_column('messages', 'content_type')

src/aleph/db/accessors/messages.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,7 @@ def make_matching_messages_query(
130130
MessageDb.content["item_hash"].astext.in_(content_hashes)
131131
)
132132
if content_types:
133-
select_stmt = select_stmt.where(
134-
MessageDb.content["type"].astext.in_(content_types)
135-
)
133+
select_stmt = select_stmt.where(MessageDb.content_type.in_(content_types))
136134
if tags:
137135
select_stmt = select_stmt.where(
138136
MessageDb.content["content"]["tags"].has_any(array(tags))
@@ -316,7 +314,7 @@ def get_unconfirmed_messages(
316314
def make_message_upsert_query(message: MessageDb) -> Insert:
317315
return (
318316
insert(MessageDb)
319-
.values(message.to_dict())
317+
.values(message.to_dict(exclude={"content_type"}))
320318
.on_conflict_do_update(
321319
constraint="messages_pkey",
322320
set_={"time": func.least(MessageDb.time, message.time)},

src/aleph/db/models/messages.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
ARRAY,
1919
TIMESTAMP,
2020
Column,
21+
Computed,
2122
ForeignKey,
23+
Index,
2224
Integer,
2325
String,
2426
Table,
@@ -99,6 +101,14 @@ class MessageDb(Base):
99101
time: dt.datetime = Column(TIMESTAMP(timezone=True), nullable=False, index=True)
100102
channel: Optional[Channel] = Column(String, nullable=True, index=True)
101103
size: int = Column(Integer, nullable=False)
104+
content_type: Optional[str] = Column(
105+
String, Computed("content->>'type'", persisted=True)
106+
)
107+
108+
__table_args__ = (
109+
Index("ix_messages_content_type", content_type),
110+
Index("ix_messages_sender_content_type", sender, content_type),
111+
)
102112

103113
confirmations: "List[ChainTxDb]" = relationship(
104114
"ChainTxDb", secondary=message_confirmations

0 commit comments

Comments
 (0)