Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

checks for generators in database functions #11564

Merged
merged 5 commits into from
Dec 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/11564.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add some safety checks that storage functions are used correctly.
50 changes: 46 additions & 4 deletions synapse/storage/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import inspect
import logging
import time
import types
from collections import defaultdict
from sys import intern
from time import monotonic as monotonic_time
Expand Down Expand Up @@ -526,6 +528,12 @@ def new_transaction(
the function will correctly handle being aborted and retried half way
through its execution.

Similarly, the arguments to `func` (`args`, `kwargs`) should not be generators,
since they could be evaluated multiple times (which would produce an empty
result on the second or subsequent evaluation). Likewise, the closure of `func`
must not reference any generators. This method attempts to detect such usage
and will log an error.

Args:
conn
desc
Expand All @@ -536,6 +544,39 @@ def new_transaction(
**kwargs
"""

# Robustness check: ensure that none of the arguments are generators, since that
# will fail if we have to repeat the transaction.
# For now, we just log an error, and hope that it works on the first attempt.
Comment on lines +547 to +549
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we update the docstring of this method to specifically call out not using generators?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair, will do.

# TODO: raise an exception.
for i, arg in enumerate(args):
if inspect.isgenerator(arg):
logger.error(
"Programming error: generator passed to new_transaction as "
"argument %i to function %s",
i,
func,
)
for name, val in kwargs.items():
if inspect.isgenerator(val):
logger.error(
"Programming error: generator passed to new_transaction as "
"argument %s to function %s",
name,
func,
)
# also check variables referenced in func's closure
if inspect.isfunction(func):
f = cast(types.FunctionType, func)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

mypy doesn't understand that FunctionType is a particular type of Callable, hence this business.

if f.__closure__:
for i, cell in enumerate(f.__closure__):
if inspect.isgenerator(cell.cell_contents):
logger.error(
"Programming error: function %s references generator %s "
"via its closure",
f,
f.__code__.co_freevars[i],
clokep marked this conversation as resolved.
Show resolved Hide resolved
)

start = monotonic_time()
txn_id = self._TXN_ID

Expand Down Expand Up @@ -1226,9 +1267,9 @@ async def simple_upsert_many(
self,
table: str,
key_names: Collection[str],
key_values: Collection[Iterable[Any]],
key_values: Collection[Collection[Any]],
value_names: Collection[str],
value_values: Iterable[Iterable[Any]],
value_values: Collection[Collection[Any]],
Comment on lines +1270 to +1272
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does simple_upsert_many_txn need to match these?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, because simple_upsert_many_txn only goes through the iterables once.

desc: str,
) -> None:
"""
Expand Down Expand Up @@ -1920,7 +1961,7 @@ async def simple_delete_many(
self,
table: str,
column: str,
iterable: Iterable[Any],
iterable: Collection[Any],
keyvalues: Dict[str, Any],
desc: str,
) -> int:
Expand All @@ -1931,7 +1972,8 @@ async def simple_delete_many(
Args:
table: string giving the table name
column: column name to test for inclusion against `iterable`
iterable: list
iterable: list of values to match against `column`. NB cannot be a generator
as it may be evaluated multiple times.
keyvalues: dict of column names and values to select the rows with
desc: description of the transaction, for logging and metrics

Expand Down
5 changes: 2 additions & 3 deletions synapse/storage/databases/main/presence.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ async def add_users_to_send_full_presence_to(self, user_ids: Iterable[str]):
"""
# Add user entries to the table, updating the presence_stream_id column if the user already
# exists in the table.
presence_stream_id = self._presence_id_gen.get_current_token()
await self.db_pool.simple_upsert_many(
table="users_to_send_full_presence_to",
key_names=("user_id",),
Expand All @@ -279,9 +280,7 @@ async def add_users_to_send_full_presence_to(self, user_ids: Iterable[str]):
# devices at different times, each device will receive full presence once - when
# the presence stream ID in their sync token is less than the one in the table
# for their user ID.
value_values=(
(self._presence_id_gen.get_current_token(),) for _ in user_ids
),
value_values=[(presence_stream_id,) for _ in user_ids],
desc="add_users_to_send_full_presence_to",
)

Expand Down