Skip to content

Commit

Permalink
Fix, add alembic script
Browse files Browse the repository at this point in the history
  • Loading branch information
olliestanley committed Jun 11, 2023
1 parent 85d3ada commit 5598e00
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Add search vector update date, indexes
Revision ID: d18213c629be
Revises: c181661eba3a
Create Date: 2023-06-11 15:44:49.911455
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "d18213c629be"
down_revision = "c181661eba3a"
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("message", sa.Column("search_vector_update_date", sa.DateTime(timezone=True), nullable=True))
op.create_index(
op.f("ix_message_search_vector_update_date"), "message", ["search_vector_update_date"], unique=False
)
op.create_index(op.f("ix_message_revision_created_date"), "message_revision", ["created_date"], unique=False)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_message_revision_created_date"), table_name="message_revision")
op.drop_index(op.f("ix_message_search_vector_update_date"), table_name="message")
op.drop_column("message", "search_vector_update_date")
# ### end Alembic commands ###
52 changes: 31 additions & 21 deletions backend/oasst_backend/scheduled_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,32 +72,42 @@ def update_search_vectors(batch_size: int) -> None:
try:
with default_session_factory() as session:
while True:
to_update: list[Message] = (
session.query(Message)
.outerjoin(
MessageRevision,
query = session.query(Message)

# Subquery to obtain creation date of most recent revision for a message
latest_revision_date_subquery = (
session.query(func.max(MessageRevision.created_date))
.filter(MessageRevision.message_id == Message.id)
.correlate(Message)
.as_scalar()
)

# Outerjoin messages to their most recent revisions
query = query.outerjoin(
MessageRevision,
and_(
Message.id == MessageRevision.message_id,
MessageRevision.created_date == latest_revision_date_subquery,
),
)

# Filter for only messages where we want to update the search vector
# The core components are when search vector is null, or there is a revision since last vector update
# We also add the case where is a revision and no vector update date
# This accounts for messages where the vector was generated before vector update dates were added
query = query.filter(
or_(
Message.search_vector.is_(None),
MessageRevision.created_date > Message.search_vector_update_date,
and_(
Message.id == MessageRevision.message_id,
MessageRevision.created_date
== session.query(func.max(MessageRevision.created_date))
.filter(MessageRevision.message_id == Message.id)
.as_scalar(),
Message.search_vector_update_date.is_(None),
MessageRevision.created_date.isnot(None),
),
)
.filter(
or_(
Message.search_vector.is_(None),
MessageRevision.created_date > Message.search_vector_update_date,
and_(
Message.search_vector_update_date.is_(None),
MessageRevision.created_date.isnot(None),
),
)
)
.limit(batch_size)
.all()
)

to_update: list[Message] = query.limit(batch_size).all()

if not to_update:
break

Expand Down

0 comments on commit 5598e00

Please sign in to comment.