Query: Investigate the perf of different shape of TPT queries #21509

smitpatel · 2020-07-03T19:39:45Z

Essentially perf analysis of #2266 (comment) to decide our direction.

smitpatel · 2020-07-03T19:40:31Z

Co-assigned to @roji for perf investigation. Once we decide the shape, I will make changes to implement it.

roji · 2020-07-15T12:01:47Z

tl;dr

UNION JOIN generally seems to be preferrable, but it isn't totally conclusive (see notes below)
There's no significant difference between projecting a single string discriminator or multiple boolean values.

Database setup

SQL Server

IF OBJECT_ID('dbo.L4A', 'U') IS NOT NULL
    DROP TABLE L4A;
IF OBJECT_ID('dbo.L4B', 'U') IS NOT NULL
    DROP TABLE L4B;
IF OBJECT_ID('dbo.L4C', 'U') IS NOT NULL
    DROP TABLE L4C;
IF OBJECT_ID('dbo.L4D', 'U') IS NOT NULL
    DROP TABLE L4D;
IF OBJECT_ID('dbo.L3A', 'U') IS NOT NULL
    DROP TABLE L3A;
IF OBJECT_ID('dbo.L3B', 'U') IS NOT NULL
    DROP TABLE L3B;
IF OBJECT_ID('dbo.L2', 'U') IS NOT NULL
    DROP TABLE L2;
IF OBJECT_ID('dbo.L1', 'U') IS NOT NULL
    DROP TABLE L1;

CREATE TABLE L1
(
    Id  INT PRIMARY KEY,
    L1P VARCHAR(MAX)
);

CREATE TABLE L2
(
    Id   INT PRIMARY KEY,
    L1Id INT NOT NULL REFERENCES L1 (Id),
    L2P  VARCHAR(MAX)
);
CREATE INDEX IX_L2 ON L2 (L1Id);

CREATE TABLE L3A
(
    Id   INT PRIMARY KEY,
    L2Id INT NOT NULL REFERENCES L2 (Id),
    L3AP VARCHAR(MAX)
);
CREATE INDEX IX_L3A ON L3A (L2Id);

CREATE TABLE L3B
(
    Id    INT PRIMARY KEY,
    L2Id  INT NOT NULL REFERENCES L2 (Id),
    L3BP  VARCHAR(MAX)
);
CREATE INDEX IX_L3B ON L3B (L2Id);

CREATE TABLE L4A
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4AP  VARCHAR(MAX),
);
CREATE INDEX IX_L4A ON L4A (L3AId);

CREATE TABLE L4B
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4BP  VARCHAR(MAX),
);
CREATE INDEX IX_L4B ON L4B (L3AId);

CREATE TABLE L4C
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4CP  VARCHAR(MAX),
);
CREATE INDEX IX_L4C ON L4C (L3BId);

CREATE TABLE L4D
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4DP  VARCHAR(MAX),
);
CREATE INDEX IX_L4D ON L4D (L3BId);

BEGIN TRANSACTION;
DECLARE @i INT = 1;
WHILE @i <= 10000
    BEGIN
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (@i, @i, @i);
        INSERT INTO L4A (Id, L3AId, L4AP) VALUES (@i, @i, @i);
        SET @i = @i + 1;
    END;
WHILE @i <= 20000
    BEGIN
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (@i, @i, @i);
        INSERT INTO L4B (Id, L3AId, L4BP) VALUES (@i, @i, @i);
        SET @i = @i + 1;
    END;
WHILE @i <= 30000
    BEGIN
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (@i, @i, @i);
        INSERT INTO L4C (Id, L3BId, L4CP) VALUES (@i, @i, @i);
        SET @i = @i + 1;
    END;
WHILE @i <= 40000
    BEGIN
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (@i, @i, @i);
        INSERT INTO L4D (Id, L3BId, L4DP) VALUES (@i, @i, @i);
        SET @i = @i + 1;
    END;
COMMIT;

PostgreSQL

DROP TABLE IF EXISTS L4A;
DROP TABLE IF EXISTS L4B;
DROP TABLE IF EXISTS L4C;
DROP TABLE IF EXISTS L4D;
DROP TABLE IF EXISTS L3A;
DROP TABLE IF EXISTS L3B;
DROP TABLE IF EXISTS L2;
DROP TABLE IF EXISTS L1;

CREATE TABLE L1
(
    Id  INT PRIMARY KEY,
    L1P TEXT
);

CREATE TABLE L2
(
    Id   INT PRIMARY KEY,
    L1Id INT NOT NULL REFERENCES L1 (Id),
    L2P  TEXT
);
CREATE INDEX IX_L2 ON L2 (L1Id);

CREATE TABLE L3A
(
    Id   INT PRIMARY KEY,
    L2Id INT NOT NULL REFERENCES L2 (Id),
    L3AP TEXT
);
CREATE INDEX IX_L3A ON L3A (L2Id);

CREATE TABLE L3B
(
    Id   INT PRIMARY KEY,
    L2Id INT NOT NULL REFERENCES L2 (Id),
    L3BP TEXT
);
CREATE INDEX IX_L3B ON L3B (L2Id);

CREATE TABLE L4A
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4AP  TEXT
);
CREATE INDEX IX_L4A ON L4A (L3AId);

CREATE TABLE L4B
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4BP  TEXT
);
CREATE INDEX IX_L4B ON L4B (L3AId);

CREATE TABLE L4C
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4CP  TEXT
);
CREATE INDEX IX_L4C ON L4C (L3BId);

CREATE TABLE L4D
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4DP  TEXT
);
CREATE INDEX IX_L4D ON L4D (L3BId);

DO $$BEGIN
    FOR i IN 1..10000 LOOP
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (@i, @i, @i);
        INSERT INTO L4A (Id, L3AId, L4AP) VALUES (@i, @i, @i);
    END LOOP;
    FOR i IN 10001..20000 LOOP
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (@i, @i, @i);
        INSERT INTO L4B (Id, L3AId, L4BP) VALUES (@i, @i, @i);
    END LOOP;
    FOR i IN 20001..30000 LOOP
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (@i, @i, @i);
        INSERT INTO L4C (Id, L3BId, L4CP) VALUES (@i, @i, @i);
    END LOOP;
    FOR i IN 30001..40000 LOOP
        INSERT INTO L1 (Id, L1P) VALUES (@i, @i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (@i, @i, @i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (@i, @i, @i);
        INSERT INTO L4D (Id, L3BId, L4DP) VALUES (@i, @i, @i);
    END LOOP;
END$$;

MySQL

DROP TABLE IF EXISTS L4A;
DROP TABLE IF EXISTS L4B;
DROP TABLE IF EXISTS L4C;
DROP TABLE IF EXISTS L4D;
DROP TABLE IF EXISTS L3A;
DROP TABLE IF EXISTS L3B;
DROP TABLE IF EXISTS L2;
DROP TABLE IF EXISTS L1;

CREATE TABLE L1
(
    Id  INT PRIMARY KEY,
    L1P TEXT
);

CREATE TABLE L2
(
    Id   INT PRIMARY KEY,
    L1Id INT NOT NULL REFERENCES L1 (Id),
    L2P  TEXT
);
CREATE INDEX IX_L2 ON L2 (L1Id);

CREATE TABLE L3A
(
    Id   INT PRIMARY KEY,
    L2Id INT NOT NULL REFERENCES L2 (Id),
    L3AP TEXT
);
CREATE INDEX IX_L3A ON L3A (L2Id);

CREATE TABLE L3B
(
    Id   INT PRIMARY KEY,
    L2Id INT NOT NULL REFERENCES L2 (Id),
    L3BP TEXT
);
CREATE INDEX IX_L3B ON L3B (L2Id);

CREATE TABLE L4A
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4AP  TEXT
);
CREATE INDEX IX_L4A ON L4A (L3AId);

CREATE TABLE L4B
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4BP  TEXT
);
CREATE INDEX IX_L4B ON L4B (L3AId);

CREATE TABLE L4C
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4CP  TEXT
);
CREATE INDEX IX_L4C ON L4C (L3BId);

CREATE TABLE L4D
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4DP  TEXT
);
CREATE INDEX IX_L4D ON L4D (L3BId);

DROP PROCEDURE IF EXISTS myloop;
DELIMITER //
CREATE PROCEDURE myloop()
BEGIN
    DECLARE i INT DEFAULT 1;

    START TRANSACTION;
    WHILE (i <= 10000) DO
        INSERT INTO L1 (Id, L1P) VALUES (i, i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (i, i, i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (i, i, i);
        INSERT INTO L4A (Id, L3AId, L4AP) VALUES (i, i, i);
        SET i = i+1;
    END WHILE;
    WHILE (i <= 20000) DO
        INSERT INTO L1 (Id, L1P) VALUES (i, i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (i, i, i);
        INSERT INTO L3A (Id, L2Id, L3AP) VALUES (i, i, i);
        INSERT INTO L4B (Id, L3AId, L4BP) VALUES (i, i, i);
        SET i = i+1;
    END WHILE;
    WHILE (i <= 30000) DO
        INSERT INTO L1 (Id, L1P) VALUES (i, i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (i, i, i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (i, i, i);
        INSERT INTO L4C (Id, L3BId, L4CP) VALUES (i, i, i);
        SET i = i+1;
    END WHILE;
    WHILE (i <= 40000) DO
        INSERT INTO L1 (Id, L1P) VALUES (i, i);
        INSERT INTO L2 (Id, L1Id, L2P) VALUES (i, i, i);
        INSERT INTO L3B (Id, L2Id, L3BP) VALUES (i, i, i);
        INSERT INTO L4D (Id, L3BId, L4DP) VALUES (i, i, i);
        SET i = i+1;
    END WHILE;
COMMIT;
END;
//

CALL myloop();

Sqlite

DROP TABLE IF EXISTS L4A;
DROP TABLE IF EXISTS L4B;
DROP TABLE IF EXISTS L4C;
DROP TABLE IF EXISTS L4D;
DROP TABLE IF EXISTS L3A;
DROP TABLE IF EXISTS L3B;
DROP TABLE IF EXISTS L2;
DROP TABLE IF EXISTS L1;

CREATE TABLE L1
(
    Id  INTEGER PRIMARY KEY,
    L1P TEXT
);

CREATE TABLE L2
(
    Id   INTEGER PRIMARY KEY,
    L1Id INTEGER NOT NULL REFERENCES L1 (Id),
    L2P  TEXT
);
CREATE INDEX IX_L2 ON L2 (L1Id);

CREATE TABLE L3A
(
    Id   INTEGER PRIMARY KEY,
    L2Id INTEGER NOT NULL REFERENCES L2 (Id),
    L3AP TEXT
);
CREATE INDEX IX_L3A ON L3A (L2Id);

CREATE TABLE L3B
(
    Id   INTEGER PRIMARY KEY,
    L2Id INTEGER NOT NULL REFERENCES L2 (Id),
    L3BP TEXT
);
CREATE INDEX IX_L3B ON L3B (L2Id);

CREATE TABLE L4A
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4AP  TEXT
);
CREATE INDEX IX_L4A ON L4A (L3AId);

CREATE TABLE L4B
(
    Id    INT PRIMARY KEY,
    L3AId INT NOT NULL REFERENCES L3A (Id),
    L4BP  TEXT
);
CREATE INDEX IX_L4B ON L4B (L3AId);

CREATE TABLE L4C
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4CP  TEXT
);
CREATE INDEX IX_L4C ON L4C (L3BId);

CREATE TABLE L4D
(
    Id    INT PRIMARY KEY,
    L3BId INT NOT NULL REFERENCES L3B (Id),
    L4DP  TEXT
);
CREATE INDEX IX_L4D ON L4D (L3BId);

INSERT INTO L1
SELECT x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 40000
        )
    SELECT x FROM cnt
);

INSERT INTO L2
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 40000
        )
    SELECT x FROM cnt
);

INSERT INTO L3A
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 20000
        )
    SELECT x FROM cnt
);

INSERT INTO L3B
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 20000
        )
    SELECT x FROM cnt
);

INSERT INTO L4A
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 10000
                  )
    SELECT x FROM cnt
);

INSERT INTO L4B
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 10000
                  )
    SELECT x FROM cnt
                    );

INSERT INTO L4C
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 10000
                  )
    SELECT x FROM cnt
                    );

INSERT INTO L4D
SELECT x, x, x FROM (
    WITH RECURSIVE
        cnt(x) AS (
            SELECT 1
            UNION ALL
            SELECT x+1 FROM cnt
            LIMIT 10000
                  )
    SELECT x FROM cnt
                    );

UNION JOIN

BenchmarkDotNet=v0.12.1, OS=ubuntu 20.04
Intel Core i7-9750H CPU 2.60GHz, 1 CPU, 12 logical and 6 physical cores
.NET Core SDK=5.0.100-preview.6.20318.15
  [Host]     : .NET Core 3.1.3 (CoreCLR 4.700.20.11803, CoreFX 4.700.20.12001), X64 RyuJIT
  DefaultJob : .NET Core 3.1.3 (CoreCLR 4.700.20.11803, CoreFX 4.700.20.12001), X64 RyuJIT

Method	Database	Levels	Mean	Error	StdDev
MultipleJoins	MySQL	3	130.56 ms	1.260 ms	0.984 ms
UnionAll	MySQL	3	114.71 ms	2.256 ms	2.598 ms
UnionAllDiagonal	MySQL	3	NA	NA	NA
MultipleJoins	MySQL	4	251.19 ms	1.501 ms	1.330 ms
UnionAll	MySQL	4	178.52 ms	3.562 ms	3.812 ms
UnionAllDiagonal	MySQL	4	186.89 ms	1.256 ms	1.175 ms
MultipleJoins	PostgreSQL	3	39.43 ms	0.756 ms	0.900 ms
UnionAll	PostgreSQL	3	35.19 ms	0.491 ms	0.435 ms
UnionAllDiagonal	PostgreSQL	3	NA	NA	NA
MultipleJoins	PostgreSQL	4	59.77 ms	1.157 ms	1.188 ms
UnionAll	PostgreSQL	4	51.39 ms	1.007 ms	1.378 ms
UnionAllDiagonal	PostgreSQL	4	56.68 ms	1.037 ms	1.109 ms
MultipleJoins	SqlServer	3	260.53 ms	2.030 ms	1.799 ms
UnionAll	SqlServer	3	102.89 ms	2.012 ms	3.073 ms
UnionAllDiagonal	SqlServer	3	NA	NA	NA
MultipleJoins	SqlServer	4	1,153.38 ms	18.177 ms	15.179 ms
UnionAll	SqlServer	4	396.17 ms	7.599 ms	8.130 ms
UnionAllDiagonal	SqlServer	4	382.28 ms	7.092 ms	8.167 ms
MultipleJoins	Sqlite	3	29.94 ms	0.064 ms	0.057 ms
UnionAll	Sqlite	3	51.10 ms	0.046 ms	0.039 ms
UnionAllDiagonal	Sqlite	3	NA	NA	NA
MultipleJoins	Sqlite	4	50.25 ms	0.155 ms	0.145 ms
UnionAll	Sqlite	4	119.61 ms	2.187 ms	2.046 ms
UnionAllDiagonal	Sqlite	4	122.08 ms	0.800 ms	0.749 ms

I tested two models; both start with single L1 and L2 tables, and then branch off to L3A/L3B (for the 3-level model), and from there to L4A/L4B and L4C/L4C (for the 4-level model). Below are the queries for the 4-level model, and here's the full benchmark code:

Multiple joins:

SELECT L1.Id, L1P, L2P, L3AP, L3BP, L4AP, L4BP, L4CP, L4DP
FROM L2
JOIN      L1 ON L1.Id = L2.L1Id
LEFT JOIN L3A ON L3A.L2Id = L2.Id
LEFT JOIN L3B ON L3B.L2Id = L2.Id
LEFT JOIN L4A ON L4A.L3AId = L3A.Id
LEFT JOIN L4B ON L4B.L3AId = L3A.Id
LEFT JOIN L4C ON L4C.L3BId = L3B.Id
LEFT JOIN L4D ON L4D.L3BId = L3B.Id

UNION JOIN:

SELECT L1.Id, L1P, L2P, L3AP, L3BP, L4AP, L4BP, L4CP, L4DP
FROM L2
JOIN L1 ON L1.Id = L2.L1Id
LEFT JOIN
(
    SELECT L2Id, L3AP,         NULL AS L3BP
    FROM L3A
UNION ALL
    SELECT L2Id, NULL AS L3AP, L3BP
    FROM L3B
) AS L3 ON L3.L2Id = L2.Id
LEFT JOIN
(
    SELECT L3AId AS Id, L4AP,         NULL AS L4BP, NULL AS L4CP, NULL AS L4DP
    FROM L4A
UNION ALL
    SELECT L3AId AS Id, NULL AS L4AP, L4BP,         NULL AS L4CP, NULL AS L4DP
    FROM L4B
UNION ALL
    SELECT L3BId AS Id, NULL AS L4AP, NULL AS L4BP, L4CP,         NULL AS L4DP
    FROM L4C
UNION ALL
    SELECT L3BId AS Id, NULL AS L4AP, NULL AS L4BP, NULL AS L4CP, L4DP
    FROM L4D
) AS L4 ON L4.Id = L2.Id

Interestingly, the UNION strategy performs much better on SQL Server, slightly better on PG/MySQL, and worse on Sqlite. The same results show up when switching from the 3-level model to the 4-level one.

EF6 does multiple joins for concrete types, but when intermediate types are marked as abstract, it switches to a "diagonal" UNION strategy; this means that in the 4-level model, L3A is UNIONed with L4C/L4D, and the results are joined against L3B UNIONed with L4A/L4B. I suspect this is done to balance the number of UNIONs within the JOINs by mixing the different levels together (probably more tables are needed to see the effect of this. However, the precise algorithm and logic here aren't completely clear to me (EF6 rocket science). I'm also not sure why this is only done with abstract immediate types.

Discriminator strategy

Method	Database	Mean	Error	StdDev
SingleDiscriminatorColumn	MySQL	192.6 ms	3.48 ms	3.41 ms
MultipleDiscriminatorColumns	MySQL	200.4 ms	2.23 ms	2.08 ms
SingleDiscriminatorColumn	PostgreSQL	NA	NA	NA
MultipleDiscriminatorColumns	PostgreSQL	NA	NA	NA
SingleDiscriminatorColumn	SqlServer	414.1 ms	3.67 ms	3.43 ms
MultipleDiscriminatorColumns	SqlServer	419.8 ms	8.29 ms	8.87 ms
SingleDiscriminatorColumn	Sqlite	134.3 ms	0.63 ms	0.55 ms
MultipleDiscriminatorColumns	Sqlite	138.8 ms	1.54 ms	1.44 ms

It should be possible to manufacture cases where multiple discriminators would be slightly faster (i.e. long discriminator strings, few entities so few boolean columns), but overall there doesn't appear to be a significant difference. For simplicity and alignment with TPH I'd propose we do a single string discriminator.

PG is failing because of an annoying type inference issue - you have to add CAST(NULL AS INT) on the first SELECT in the UNION (hope I don't have to start a file on this db too).

Benchmark code

public class Discriminators
{
    [Params("SqlServer", "PostgreSQL", "MySQL", "Sqlite")]
    public string Database { get; set; }

    DbProviderFactory _factory;
    DbConnection _connection;
    DbCommand _command;

    #region Setup

    public void SharedSetup()
    {
        switch (Database)
        {
            case "SqlServer":
                _factory = SqlClientFactory.Instance;
                _connection = _factory.CreateConnection();
                _connection.ConnectionString = "Server=localhost;Database=test;User=SA;Password=****;Connect Timeout=60;ConnectRetryCount=0";
                break;
            case "PostgreSQL":
                _factory = NpgsqlFactory.Instance;
                _connection = _factory.CreateConnection();
                _connection.ConnectionString = "Host=localhost;Username=test;Password=test";
                break;
            case "MySQL":
                _factory = MySqlConnectorFactory.Instance;
                _connection = _factory.CreateConnection();
                _connection.ConnectionString = "Server=localhost;User ID=roji;Password=****;Database=test";
                break;
            case "Sqlite":
                _factory = SqliteFactory.Instance;
                _connection = _factory.CreateConnection();
                _connection.ConnectionString = "Data Source=/home/roji/tmp/test.sqlite";
                break;
            default:
                throw new ArgumentException();
        }
        _connection.Open();
        _command = _factory.CreateCommand();
        _command.Connection = _connection;
    }

    [GlobalSetup(Target = nameof(SingleDiscriminatorColumn))]
    public void SetupSingleDiscriminatorColumn()
    {
        SharedSetup();

        _command.CommandText = @"
SELECT L1.Id, L1P, L2P, L3AP, L3BP, L4AP, L4BP, L4CP, L4DP,
   CASE
       WHEN L4AId IS NOT NULL THEN 'L4A'
       WHEN L4BId IS NOT NULL THEN 'L4B'
       WHEN L4CId IS NOT NULL THEN 'L4C'
       WHEN L4DId IS NOT NULL THEN 'L4D'
   END AS Discriminator
FROM L2
JOIN L1 ON L1.Id = L2.L1Id
LEFT JOIN
(
SELECT L2Id, L3AP,         NULL AS L3BP
FROM L3A
UNION ALL
SELECT L2Id, NULL AS L3AP, L3BP
FROM L3B
) AS L3 ON L3.L2Id = L2.Id
LEFT JOIN
(
SELECT Id AS L4AId,   NULL AS L4BId, NULL AS L4CId, NULL AS L4DId, L3AId AS L3Id, L4AP,         NULL AS L4BP, NULL AS L4CP, NULL AS L4DP
FROM L4A
UNION ALL
SELECT NULL AS L4AId, Id AS L4BId,   NULL AS L4CId, NULL AS L4DId, L3AId AS L3Id, NULL AS L4AP, L4BP,         NULL AS L4CP, NULL AS L4DP
FROM L4B
UNION ALL
SELECT NULL AS L4AId, NULL AS L4BId, Id AS L4CId,   NULL AS L4DId, L3BId AS L3Id, NULL AS L4AP, NULL AS L4BP, L4CP,         NULL AS L4DP
FROM L4C
UNION ALL
SELECT NULL AS L4AId, NULL AS L4BId, NULL AS L4CId, Id AS L4DId,   L3BId AS L3Id, NULL AS L4AP, NULL AS L4BP, NULL AS L4CP, L4DP
FROM L4D
) AS L4 ON L4.L3Id = L2.Id";
    }

    [GlobalSetup(Target = nameof(MultipleDiscriminatorColumns))]
    public void SetupMultipleDiscriminatorColumns()
    {
        SharedSetup();

        _command.CommandText = @"
SELECT L1.Id, L1P, L2P, L3AP, L3BP, L4AP, L4BP, L4CP, L4DP,
   CASE WHEN L4AId IS NULL THEN NULL ELSE 'L4A' END AS IsL4A,
   CASE WHEN L4BId IS NULL THEN NULL ELSE 'L4B' END AS IsL4B,
   CASE WHEN L4CId IS NULL THEN NULL ELSE 'L4C' END AS IsL4C,
   CASE WHEN L4DId IS NULL THEN NULL ELSE 'L4D' END AS IsL4D
FROM L2
JOIN L1 ON L1.Id = L2.L1Id
LEFT JOIN
(
SELECT L2Id, L3AP,         NULL AS L3BP
FROM L3A
UNION ALL
SELECT L2Id, NULL AS L3AP, L3BP
FROM L3B
) AS L3 ON L3.L2Id = L2.Id
LEFT JOIN
(
SELECT Id AS L4AId,   NULL AS L4BId, CAST(NULL AS INT) AS L4CId, CAST(NULL AS INT) AS L4DId, L3AId AS L3Id, L4AP,         NULL AS L4BP, NULL AS L4CP, NULL AS L4DP
FROM L4A
UNION ALL
SELECT NULL AS L4AId, Id AS L4BId,   NULL AS L4CId, NULL AS L4DId, L3AId AS L3Id, NULL AS L4AP, L4BP,         NULL AS L4CP, NULL AS L4DP
FROM L4B
UNION ALL
SELECT NULL AS L4AId, NULL AS L4BId, Id AS L4CId,   NULL AS L4DId, L3BId AS L3Id, NULL AS L4AP, NULL AS L4BP, L4CP,         NULL AS L4DP
FROM L4C
UNION ALL
SELECT NULL AS L4AId, NULL AS L4BId, NULL AS L4CId, Id AS L4DId,   L3BId AS L3Id, NULL AS L4AP, NULL AS L4BP, NULL AS L4CP, L4DP
FROM L4D
) AS L4 ON L4.L3Id = L2.Id";
    }

    #endregion

    [Benchmark]
    public async Task SingleDiscriminatorColumn()
    {
        using var reader = await _command.ExecuteReaderAsync();
        while (reader.Read()) {}
    }

    [Benchmark]
    public async Task MultipleDiscriminatorColumns()
    {
        using var reader = await _command.ExecuteReaderAsync();
        while (reader.Read()) {}
    }
}

roji · 2020-07-15T12:03:36Z

Note: I have all the benchmarking infrastructure so it's easy for me to tweak and retest on request - let me know. It's also a good idea to make sure that I haven't missed anything in my schema/queries above!

smitpatel · 2020-07-24T20:10:54Z

Tables are wrong structures. TPT tables are always connected by PK to PK relationship, there is no additional column for that.

The strategy used for Union all is not appropriate from code maintenance perspective. It traverse every level fully. It should be recursively building up based on sub-tree in hierarchy.

Part of #21509

Feedback of #21784 Part of #21509

roji · 2020-07-27T08:57:47Z

Tables are wrong structures. TPT tables are always connected by PK to PK relationship, there is no additional column for that.

Makes sense, I can adjust and re-benchmark.

The strategy used for Union all is not appropriate from code maintenance perspective. It traverse every level fully. It should be recursively building up based on sub-tree in hierarchy.

Can you provide the SQL you'd like me to benchmark?

smitpatel assigned smitpatel and roji Jul 3, 2020

smitpatel added area-perf area-query type-enhancement labels Jul 3, 2020

This was referenced Jul 3, 2020

Relational: TPT inheritance mapping pattern #2266

Closed

Relational: TPC inheritance mapping pattern #3170

Closed

roji mentioned this issue Jul 14, 2020

Significant Query Slowdown When Using Multiple Joins Due To Changes In 3.0 #18022

Closed

ajcvickers unassigned roji Jul 17, 2020

ajcvickers added this to the 5.0.0 milestone Jul 17, 2020

smitpatel removed this from the 5.0.0 milestone Jul 25, 2020

smitpatel added a commit that referenced this issue Jul 25, 2020

Query: Change TPT to use string discriminator column

6d53311

Part of #21509

smitpatel mentioned this issue Jul 25, 2020

Query: Change TPT to use string discriminator column #21784

Merged

smitpatel added a commit that referenced this issue Jul 25, 2020

Query: Change TPT to use string discriminator column

8ad34a9

Part of #21509

ghost pushed a commit that referenced this issue Jul 25, 2020

Query: Change TPT to use string discriminator column (#21784)

f203e1f

Part of #21509

smitpatel added a commit that referenced this issue Jul 25, 2020

Query: Change alias of the column for TPT Discriminator

48cef48

Feedback of #21784 Part of #21509

smitpatel mentioned this issue Jul 25, 2020

Query: Change alias of the column for TPT Discriminator #21787

Merged

smitpatel added a commit that referenced this issue Jul 26, 2020

Query: Change alias of the column for TPT Discriminator

f02e05a

Feedback of #21784 Part of #21509

ghost pushed a commit that referenced this issue Jul 26, 2020

Query: Change alias of the column for TPT Discriminator (#21787)

2a3ff06

Feedback of #21784 Part of #21509

ajcvickers added this to the Backlog milestone Jul 31, 2020

smitpatel removed their assignment Aug 27, 2020

ajcvickers added the area-relational-mapping label Oct 11, 2020

ajcvickers mentioned this issue Oct 11, 2020

Improvements to relational database mappings #22953

Open

63 tasks

roji added the area-benchmarks label Jul 29, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Query: Investigate the perf of different shape of TPT queries #21509

Query: Investigate the perf of different shape of TPT queries #21509

smitpatel commented Jul 3, 2020 •

edited

Loading

smitpatel commented Jul 3, 2020

roji commented Jul 15, 2020

roji commented Jul 15, 2020

smitpatel commented Jul 24, 2020

roji commented Jul 27, 2020

Query: Investigate the perf of different shape of TPT queries #21509

Query: Investigate the perf of different shape of TPT queries #21509

Comments

smitpatel commented Jul 3, 2020 • edited Loading

smitpatel commented Jul 3, 2020

roji commented Jul 15, 2020

tl;dr

Database setup

UNION JOIN

Discriminator strategy

roji commented Jul 15, 2020

smitpatel commented Jul 24, 2020

roji commented Jul 27, 2020

smitpatel commented Jul 3, 2020 •

edited

Loading