diff --git a/Lib/sqlite3/dump.py b/Lib/sqlite3/dump.py index ead3360ce67608..481d605194c7fe 100644 --- a/Lib/sqlite3/dump.py +++ b/Lib/sqlite3/dump.py @@ -7,6 +7,10 @@ # future enhancements, you should normally quote any identifier that # is an English language word, even if you do not have to." + +from contextlib import contextmanager + + def _quote_name(name): return '"{0}"'.format(name.replace('"', '""')) @@ -15,6 +19,24 @@ def _quote_value(value): return "'{0}'".format(value.replace("'", "''")) +def _force_decode(bs, *args, **kwargs): + # gh-108590: Don't fail if the database contains invalid Unicode data. + try: + return bs.decode(*args, **kwargs) + except UnicodeDecodeError: + return "".join([chr(c) for c in bs]) + + +@contextmanager +def _text_factory(con, factory): + saved_factory = con.text_factory + con.text_factory = factory + try: + yield + finally: + con.text_factory = saved_factory + + def _iterdump(connection): """ Returns an iterator to the dump of the database in an SQL text format. @@ -74,8 +96,9 @@ def _iterdump(connection): ) ) query_res = cu.execute(q) - for row in query_res: - yield("{0};".format(row[0])) + with _text_factory(connection, bytes): + for row in query_res: + yield("{0};".format(_force_decode(row[0]))) # Now when the type is 'index', 'trigger', or 'view' q = """ diff --git a/Lib/test/test_sqlite3/test_dump.py b/Lib/test/test_sqlite3/test_dump.py index 3107e1b165d950..6402e9f0c390fa 100644 --- a/Lib/test/test_sqlite3/test_dump.py +++ b/Lib/test/test_sqlite3/test_dump.py @@ -133,6 +133,34 @@ def test_dump_virtual_tables(self): actual = list(self.cx.iterdump()) self.assertEqual(expected, actual) + def test_dump_unicode_invalid(self): + # gh-108590 + expected = [ + "BEGIN TRANSACTION;", + "CREATE TABLE foo (data TEXT);", + "INSERT INTO \"foo\" VALUES('a\x9f');", + "COMMIT;", + ] + self.cu.executescript(""" + CREATE TABLE foo (data TEXT); + INSERT INTO foo VALUES (CAST(X'619f' AS TEXT)); + """) + actual = list(self.cx.iterdump()) + self.assertEqual(expected, actual) + + def test_dump_recreation(self): + self.cu.executescript(""" + CREATE TABLE foo (id INTEGER, text TEXT, blob BLOB); + INSERT INTO foo VALUES (0, CAST(X'619f' AS TEXT), X'619f'); + INSERT INTO foo VALUES (1, 'Hello SQLite!', X'98194eff46ab29f79064'); + """) + original_dump = list(self.cx.iterdump()) + with memory_database() as cx2: + query = "".join(original_dump) + cx2.executescript(query) + recreation_dump = list(cx2.iterdump()) + self.assertEqual(original_dump, recreation_dump) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst new file mode 100644 index 00000000000000..50b41f2a94d9be --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-29-22-53-48.gh-issue-108590.6k0pOl.rst @@ -0,0 +1 @@ +Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson