forked from django/django
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fixed #35936 -- Used unnest for bulk inserts on Postgres when possible.
This should make bulk_create significantly faster on Postgres when provided only literal values. Thanks James Sewell for writing about this technique, Tom Forbes for validating the performance benefits, David Sanders and Mariusz Felisiak for the review.
- Loading branch information
1 parent
2638b75
commit a16eedc
Showing
3 changed files
with
86 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from django.db.models.sql.compiler import ( | ||
SQLAggregateCompiler, | ||
SQLCompiler, | ||
SQLDeleteCompiler, | ||
) | ||
from django.db.models.sql.compiler import SQLInsertCompiler as BaseSQLInsertCompiler | ||
from django.db.models.sql.compiler import SQLUpdateCompiler | ||
|
||
__all__ = [ | ||
"SQLAggregateCompiler", | ||
"SQLCompiler", | ||
"SQLDeleteCompiler", | ||
"SQLInsertCompiler", | ||
"SQLUpdateCompiler", | ||
] | ||
|
||
|
||
class InsertUnnest(list): | ||
""" | ||
Sentinel value to signal DatabaseOperations.bulk_insert_sql() that the | ||
UNNEST strategy should be used for the bulk insert. | ||
""" | ||
|
||
def __str__(self): | ||
return "UNNEST(%s)" % ", ".join(self) | ||
|
||
|
||
class SQLInsertCompiler(BaseSQLInsertCompiler): | ||
def assemble_as_sql(self, fields, value_rows): | ||
# Specialize bulk-insertion of literal non-array values through | ||
# UNNEST to reduce the time spent planning the query. | ||
if ( | ||
# The optimization is not worth doing if there is a single | ||
# row as it will result in the same number of placeholders. | ||
len(value_rows) <= 1 | ||
# Lack of fields denote the usage of the DEFAULT keyword | ||
# for the insertion of empty rows. | ||
or any(field is None for field in fields) | ||
# Compilable cannot be combined in an array of literal values. | ||
or any(any(hasattr(value, "as_sql") for value in row) for row in value_rows) | ||
): | ||
return super().assemble_as_sql(fields, value_rows) | ||
db_types = [field.db_type(self.connection) for field in fields] | ||
# Abort if any of the fields are arrays as UNNEST indiscriminately | ||
# flatten them instead of reducing their nesting by one. | ||
if any(db_type.endswith("[]") for db_type in db_types): | ||
return super().assemble_as_sql(fields, value_rows) | ||
return InsertUnnest(["(%%s)::%s[]" % db_type for db_type in db_types]), [ | ||
list(map(list, zip(*value_rows))) | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import unittest | ||
|
||
from django.db import connection | ||
from django.db.models.expressions import RawSQL | ||
from django.test import TestCase | ||
|
||
from ..models import Square | ||
|
||
|
||
@unittest.skipUnless(connection.vendor == "postgresql", "PostgreSQL tests") | ||
class BulkCreateUnnestTests(TestCase): | ||
def test_single_object(self): | ||
with self.assertNumQueries(1) as ctx: | ||
Square.objects.bulk_create([Square(root=2, square=4)]) | ||
self.assertNotIn("UNNEST", ctx[0]["sql"]) | ||
|
||
def test_non_literal(self): | ||
with self.assertNumQueries(1) as ctx: | ||
Square.objects.bulk_create( | ||
[Square(root=2, square=RawSQL("%s", (4,))), Square(root=3, square=9)] | ||
) | ||
self.assertNotIn("UNNEST", ctx[0]["sql"]) | ||
|
||
def test_unnest_eligible(self): | ||
with self.assertNumQueries(1) as ctx: | ||
Square.objects.bulk_create( | ||
[Square(root=2, square=4), Square(root=3, square=9)] | ||
) | ||
self.assertIn("UNNEST", ctx[0]["sql"]) |