Skip to content

Commit

Permalink
Avoid Sync message overflow (distributed-system-analysis#3389)
Browse files Browse the repository at this point in the history
PBENCH-1120

A SQL error was observed in deployment where `pbench-index` logged an error on
the `INDEX` sync object because a tarball was somehow not present. The message
string generated by `indexing_tarballs.py` exceeded the `VARCHAR(255)` column
specification.

This isn't an attempt to address the root problem, but to address the symptom
of overloading the operation table message column in the future so at least
errors are properly recorded.

This reworks some of the `indexing_tarballs.py` messages to avoid redundancy
(e.g., naming the dataset or tarball isn't necessary as the records are linked
to the `Dataset`), but also removes the limit on the message column as a
precaution.
  • Loading branch information
dbutenhof committed Apr 20, 2023
1 parent 3f84f33 commit f17fcda
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 8 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Remove size limit on Sync messages
Revision ID: 5679217a62bb
Revises: 80c8c690f09b
Create Date: 2023-04-18 20:03:26.080554
"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = "5679217a62bb"
down_revision = "80c8c690f09b"
branch_labels = None
depends_on = None


def upgrade() -> None:
op.alter_column(
"dataset_operations",
"message",
existing_type=sa.VARCHAR(length=255),
type_=sa.Text(),
existing_nullable=True,
)
# ### end Alembic commands ###


def downgrade() -> None:
op.alter_column(
"dataset_operations",
"message",
existing_type=sa.Text(),
type_=sa.VARCHAR(length=255),
existing_nullable=True,
)
# ### end Alembic commands ###
4 changes: 2 additions & 2 deletions lib/pbench/server/database/models/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from typing import Any, Dict, List, Optional, Union

from dateutil import parser as date_parser
from sqlalchemy import Column, Enum, event, ForeignKey, Integer, JSON, String
from sqlalchemy import Column, Enum, event, ForeignKey, Integer, JSON, String, Text
from sqlalchemy.exc import DataError, IntegrityError, SQLAlchemyError
from sqlalchemy.orm import Query, relationship, validates

Expand Down Expand Up @@ -519,7 +519,7 @@ class Operation(Database.Base):
id = Column(Integer, primary_key=True, autoincrement=True)
name = Column(Enum(OperationName), index=True)
state = Column(Enum(OperationState))
message = Column(String(255))
message = Column(Text)
dataset_ref = Column(Integer, ForeignKey("datasets.id"))
dataset = relationship("Dataset", back_populates="operations")

Expand Down
8 changes: 3 additions & 5 deletions lib/pbench/server/indexing_tarballs.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,14 @@ def collect_tb(self) -> Tuple[int, List[TarballData]]:
for dataset in self.sync.next():
tb = Metadata.getvalue(dataset, Metadata.TARBALL_PATH)
if not tb:
self.sync.error(dataset, f"{dataset} does not have a tarball-path")
self.sync.error(dataset, "Dataset does not have a tarball-path")
continue

try:
# get size
size = os.stat(tb).st_size
except OSError as e:
self.sync.error(
dataset, f"Could not fetch tar ball size for {tb}: {e}"
)
self.sync.error(dataset, f"Could not fetch tarball size: {e!s}")
continue
else:
tarballs.append(TarballData(dataset=dataset, tarball=tb, size=size))
Expand Down Expand Up @@ -379,7 +377,7 @@ def sighup_handler(*args):
except TarballNotFound as e:
self.sync.error(
dataset,
f"Unable to unpack dataset: {e!r}",
f"Unable to unpack dataset: {e!s}",
)
continue

Expand Down
2 changes: 1 addition & 1 deletion lib/pbench/test/unit/server/test_indexing_tarballs.py
Original file line number Diff line number Diff line change
Expand Up @@ -412,7 +412,7 @@ def test_collect_tb_missing_tb(self, mocks, index):
FakeMetadata.no_tarball = ["ds2"]
tb_list = index.collect_tb()
assert FakeSync.called == ["next-INDEX"]
assert FakeSync.errors["ds2"] == "ds2 does not have a tarball-path"
assert FakeSync.errors["ds2"] == "Dataset does not have a tarball-path"
assert tb_list == (0, [tarball_1])

def test_collect_tb_fail(self, mocks, index):
Expand Down
28 changes: 28 additions & 0 deletions lib/pbench/test/unit/server/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,19 @@ def test_error_new(self, make_logger, more_datasets):
"UPLOAD": {"state": "FAILED", "message": "this is an error"}
}

def test_error_long(self, make_logger, more_datasets):
"""Test handling of long error messages"""
drb = Dataset.query(name="drb")
sync = Sync(make_logger, OperationName.UPLOAD)
sync.update(drb, enabled=[OperationName.UPLOAD])
assert Metadata.getvalue(drb, "dataset.operations") == {
"UPLOAD": {"state": "READY", "message": None}
}
sync.error(drb, "tenletters" * 26)
assert Metadata.getvalue(drb, "dataset.operations") == {
"UPLOAD": {"state": "FAILED", "message": "tenletters" * 26}
}

def test_update_state(self, make_logger, more_datasets):
"""Test that the sync update operation sets the component state as
specified.
Expand Down Expand Up @@ -109,6 +122,21 @@ def test_update_state_update(self, make_logger, more_datasets):
"INDEX": {"state": "FAILED", "message": "failed"},
}

def test_update_state_update_long(self, make_logger, more_datasets):
"""Test that sync update allows messages over 255 characters."""
drb = Dataset.query(name="drb")
sync = Sync(make_logger, OperationName.INDEX)
sync.update(drb, None, [OperationName.DELETE, OperationName.INDEX])
assert Metadata.getvalue(drb, "dataset.operations") == {
"DELETE": {"state": "READY", "message": None},
"INDEX": {"state": "READY", "message": None},
}
sync.update(drb, state=OperationState.FAILED, message="tenletters" * 26)
assert Metadata.getvalue(drb, "dataset.operations") == {
"DELETE": {"state": "READY", "message": None},
"INDEX": {"state": "FAILED", "message": "tenletters" * 26},
}

def test_update_enable(self, make_logger, more_datasets):
"""Test that sync update correctly enables specified operations."""
drb = Dataset.query(name="drb")
Expand Down

0 comments on commit f17fcda

Please sign in to comment.