Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible optimization of split querys with window functions in included entities #34362

Open
georg-jung opened this issue Aug 5, 2024 · 3 comments

Comments

@georg-jung
Copy link

georg-jung commented Aug 5, 2024

Some combinations of AsSplitQuery() and FirstOrDefault() with Take() in Include() produce querys that execute slowly on SQLite and Postgres (I didn't test others). I'm unsure if EF Core does the kind of optimization I think of in general, but it seems easy to handcraft SQL that is semantically equalivalent and executes much faster.

Repro

using Bogus;
using Microsoft.Data.Sqlite;
using Microsoft.EntityFrameworkCore;

const string ConnectionString = @"Data Source=InMemorySample;Mode=Memory;Cache=Shared";
// const string NpgsqlConnectionString = @"Host=localhost;Database=splitqueryperformance;Username=npgsql_tests;Password=npgsql_tests;Include Error Detail=True";

await using var keepOpenCon = new SqliteConnection(ConnectionString);
await keepOpenCon.OpenAsync();

await using var ctx = new ReproContext(ConnectionString);
await ctx.Database.EnsureDeletedAsync();
await ctx.Database.EnsureCreatedAsync();

Console.WriteLine(ctx.Database.GenerateCreateScript());
Console.WriteLine("----");

Randomizer.Seed = new Random(8675309);

Faker<Blog> blogFaker = new();
blogFaker.RuleFor(b => b.Content, f => f.Rant.Review());

Faker<Reviewer> reviewerFaker = new();
reviewerFaker.RuleFor(r => r.Name, f => f.Name.FullName());

var blogs = blogFaker.GenerateBetween(10_000, 15_000);
var reviewers = reviewerFaker.GenerateBetween(30_000, 45_000);
var faker = new Faker();

foreach (var blog in blogs)
{
    var cnt = faker.Random.Int(0, 20);
    blog.Reviewers = [.. faker.PickRandom(reviewers, cnt)];
}

ctx.Blogs.AddRange(blogs);
ctx.Reviewers.AddRange(reviewers);
await ctx.SaveChangesAsync();

Console.WriteLine("-- Slow FirstOrDefault --");
var toSelect = 1824; //faker.PickRandom(blogs).Id;
var q = ctx.Blogs
    .Include(b => b.Reviewers.OrderBy(r => r.Name).ThenBy(r => r.Id).Take(5))
    .Where(b => b.Id == toSelect)
    .AsSplitQuery();

Console.WriteLine();
Console.WriteLine("FirstOrDefault:");
await q.FirstOrDefaultAsync();

Console.WriteLine();
Console.WriteLine("ToList:");
await q.ToListAsync();

// naively reduce the chance ToListAsync was just able to re-use something cached
Console.WriteLine();
Console.WriteLine("FirstOrDefault 2nd:");
await q.FirstOrDefaultAsync();

Console.WriteLine("-- Fast FirstOrDefault --");
var toSelect2 = toSelect + 1000; //faker.PickRandom(blogs).Id;
var q2 = ctx.Blogs
    .Include(b => b.Reviewers.OrderBy(r => r.Name).ThenBy(r => r.Id)) // no Take(5) here
    .Where(b => b.Id == toSelect2)
    .AsSplitQuery();

Console.WriteLine();
Console.WriteLine("FirstOrDefault:");
await q2.FirstOrDefaultAsync();

Console.WriteLine();
Console.WriteLine("ToList:");
await q2.ToListAsync();

// naively reduce the chance ToListAsync was just able to re-use something cached
Console.WriteLine();
Console.WriteLine("FirstOrDefault 2nd:");
await q2.FirstOrDefaultAsync();

// This translates to a more complex query that should be sematically equivalent to the slower q query.
// On my machine it executes ~3x faster with SQLite and a bit more than 10x faster on Postgres.
Console.WriteLine("-- Workaround Fast FirstOrDefault --");
var toSelect3 = toSelect2 + 1000; //faker.PickRandom(blogs).Id;
var q3 = ctx.Blogs
    .Include(b => b.Reviewers.Where(r => r.BlogReviewers.Any(x => x.BlogId == toSelect3)).OrderBy(r => r.Name).ThenBy(r => r.Id).Take(5))
    .Where(b => b.Id == toSelect3)
    .AsSplitQuery();

Console.WriteLine();
Console.WriteLine("FirstOrDefault:");
await q3.FirstOrDefaultAsync();

Console.WriteLine();
Console.WriteLine("ToList:");
await q3.ToListAsync();

// naively reduce the chance ToListAsync was just able to re-use something cached
Console.WriteLine();
Console.WriteLine("FirstOrDefault 2nd:");
await q3.FirstOrDefaultAsync();

Console.WriteLine("-- Raw Sql --");
var toSelect4 = toSelect3 + 1000; //faker.PickRandom(blogs).Id;

await ctx.Database.ExecuteSqlAsync(@$"SELECT ""s0"".""BlogId"", ""s0"".""ReviewerId"", ""s0"".""Id"", ""s0"".""Name"", ""b1"".""Id""
FROM (
    SELECT ""b"".""Id""
    FROM ""Blogs"" AS ""b""
    WHERE ""b"".""Id"" = {toSelect4}
    LIMIT 1
) AS ""b1""
INNER JOIN (
    SELECT ""s"".""BlogId"", ""s"".""ReviewerId"", ""s"".""Id"", ""s"".""Name""
    FROM (
        SELECT ""b0"".""BlogId"", ""b0"".""ReviewerId"", ""r"".""Id"", ""r"".""Name"", ROW_NUMBER() OVER(PARTITION BY ""b0"".""BlogId"" ORDER BY ""r"".""Name"", ""r"".""Id"") AS ""row""
        FROM ""BlogReviewer"" AS ""b0""
        INNER JOIN ""Reviewers"" AS ""r"" ON ""b0"".""ReviewerId"" = ""r"".""Id""
    ) AS ""s""
    WHERE ""s"".""row"" <= 5 and ""s"".""BlogId"" = {toSelect4}
) AS ""s0"" ON ""b1"".""Id"" = ""s0"".""BlogId""
ORDER BY ""b1"".""Id"", ""s0"".""BlogId"", ""s0"".""Name"", ""s0"".""Id""
");

public class ReproContext(string ConnectionString) : DbContext
{
    public DbSet<Blog> Blogs { get; set; }

    public DbSet<Reviewer> Reviewers { get; set; }

    protected override void OnModelCreating(ModelBuilder modelBuilder)
    {
        modelBuilder.Entity<Blog>()
            .HasMany(b => b.Reviewers)
            .WithMany(r => r.Blogs)
            .UsingEntity<BlogReviewer>();
    }

    protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
        => optionsBuilder.UseSqlite(ConnectionString).LogTo(Console.WriteLine, Microsoft.Extensions.Logging.LogLevel.Information, Microsoft.EntityFrameworkCore.Diagnostics.DbContextLoggerOptions.None);
        //=> optionsBuilder.UseNpgsql(ConnectionString).LogTo(Console.WriteLine, Microsoft.Extensions.Logging.LogLevel.Information, Microsoft.EntityFrameworkCore.Diagnostics.DbContextLoggerOptions.None);
}

public class Blog
{
    public int Id { get; set; }
    public required string Content { get; set; }
    public List<Reviewer> Reviewers { get; set; } = [];
    public List<BlogReviewer> BlogReviewers { get; set; } = [];
}

public class Reviewer
{
    public int Id { get; set; }
    public required string Name { get; set; }
    public List<Blog> Blogs { get; set; } = [];
    public List<BlogReviewer> BlogReviewers { get; set; } = [];
}

public class BlogReviewer
{
    public int BlogId { get; set; }
    public int ReviewerId { get; set; }
}
<Project Sdk="Microsoft.NET.Sdk">
	<PropertyGroup>
		<OutputType>Exe</OutputType>
		<TargetFramework>net8.0</TargetFramework>
		<ImplicitUsings>enable</ImplicitUsings>
		<Nullable>enable</Nullable>
	</PropertyGroup>

	<ItemGroup>
		<PackageReference Include="Microsoft.EntityFrameworkCore.Sqlite" Version="9.0.0-preview.6.24327.4" />
		<!--<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.0-preview.3" />-->
		<PackageReference Include="Bogus" Version="35.6.0" />
	</ItemGroup>
</Project>

Output with SQLite

-- Slow FirstOrDefault --

FirstOrDefault:
Executed DbCommand (0ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (333ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect_0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b1"."Id" = "s0"."BlogId"
ORDER BY "b1"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

ToList:
Executed DbCommand (0ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect_0
ORDER BY "b"."Id"
Executed DbCommand (0ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b"."Id"
FROM "Blogs" AS "b"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b"."Id" = "s0"."BlogId"
WHERE "b"."Id" = @__toSelect_0
ORDER BY "b"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

FirstOrDefault 2nd:
Executed DbCommand (0ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (301ms) [Parameters=[@__toSelect_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect_0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b1"."Id" = "s0"."BlogId"
ORDER BY "b1"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"
-- Fast FirstOrDefault --

FirstOrDefault:
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect2_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect2_0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name"
    FROM "BlogReviewer" AS "b0"
    INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
) AS "s" ON "b1"."Id" = "s"."BlogId"
ORDER BY "b1"."Id", "s"."Name", "s"."Id"

ToList:
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect2_0
ORDER BY "b"."Id"
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name", "b"."Id"
FROM "Blogs" AS "b"
INNER JOIN (
    SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name"
    FROM "BlogReviewer" AS "b0"
    INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
) AS "s" ON "b"."Id" = "s"."BlogId"
WHERE "b"."Id" = @__toSelect2_0
ORDER BY "b"."Id", "s"."Name", "s"."Id"

FirstOrDefault 2nd:
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect2_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (0ms) [Parameters=[@__toSelect2_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect2_0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name"
    FROM "BlogReviewer" AS "b0"
    INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
) AS "s" ON "b1"."Id" = "s"."BlogId"
ORDER BY "b1"."Id", "s"."Name", "s"."Id"
-- Workaround Fast FirstOrDefault --

FirstOrDefault:
Executed DbCommand (0ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect3_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (92ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b2"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect3_0
    LIMIT 1
) AS "b2"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
        WHERE EXISTS (
            SELECT 1
            FROM "BlogReviewer" AS "b1"
            WHERE "r"."Id" = "b1"."ReviewerId" AND "b1"."BlogId" = @__toSelect3_0)
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b2"."Id" = "s0"."BlogId"
ORDER BY "b2"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

ToList:
Executed DbCommand (0ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect3_0
ORDER BY "b"."Id"
Executed DbCommand (0ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b"."Id"
FROM "Blogs" AS "b"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
        WHERE EXISTS (
            SELECT 1
            FROM "BlogReviewer" AS "b1"
            WHERE "r"."Id" = "b1"."ReviewerId" AND "b1"."BlogId" = @__toSelect3_0)
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b"."Id" = "s0"."BlogId"
WHERE "b"."Id" = @__toSelect3_0
ORDER BY "b"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

FirstOrDefault 2nd:
Executed DbCommand (0ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "b"."Id", "b"."Content"
FROM "Blogs" AS "b"
WHERE "b"."Id" = @__toSelect3_0
ORDER BY "b"."Id"
LIMIT 1
Executed DbCommand (90ms) [Parameters=[@__toSelect3_0='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b2"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @__toSelect3_0
    LIMIT 1
) AS "b2"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
        WHERE EXISTS (
            SELECT 1
            FROM "BlogReviewer" AS "b1"
            WHERE "r"."Id" = "b1"."ReviewerId" AND "b1"."BlogId" = @__toSelect3_0)
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b2"."Id" = "s0"."BlogId"
ORDER BY "b2"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"
-- Raw Sql --
Executed DbCommand (0ms) [Parameters=[@p0='?' (DbType = Int32), @p1='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @p0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5 and "s"."BlogId" = @p1
) AS "s0" ON "b1"."Id" = "s0"."BlogId"
ORDER BY "b1"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

Possible optimization

It seems to me that the query planner of SQLite and Postgres alike is not able to use an index on BlogReviewers.BlogId effectively for queries like this. If we add a predicate to the subquery manually that filters for the same id as is already done in the main query, the index will be used and the query executes much faster. Is this something general enough so that it can be done in EF or is this an optimization that is to specific to my circumstances?

Workaround

Doing e.g. (await q.ToListAsync()).FirstOrDefault() seems counter intuitive first but results in a better query plan and probably the same amount of data on the wire as with EF-side FirstOrDefaultAsync().

@roji
Copy link
Member

roji commented Aug 7, 2024

@georg-jung EF doesn't currently perform such optimizations, of implicitly lowering the external predicate into the subquery; we generally don't go too far in to the domain of "taking the place of the planner" (i.e. transform the query so that it makes the database planner produce better plans) - though this is something we could consider for the future, as a more advanced form of optimization category. I'd still need to fully think about this - the danger with such optimizations is that they concievably might regress performance for other query shapes (or other databases), so one must be very careful.

Putting on the backlog to consider for future work - thanks for the suggestion.

@roji roji added this to the Backlog milestone Aug 7, 2024
@roji
Copy link
Member

roji commented Aug 7, 2024

Note also #12776 (comment), which is an alternative implementation of split query which wouldn't embed the principal query in the dependent one, that could mitigate this particular case (though the general optimization may still may sense in other situations).

@georg-jung
Copy link
Author

Thanks for the explanation and for pointing to #12776. I think that it would indeed help in this case.

Probably this doesn't change too much about the dangers and complexities you mentioned, but just as a side note: At least in this particular case lowering the external predicate isn't the only option that helps. Lifting and repeating it in the main query would go quite far too:

-- Raw Sql --
Executed DbCommand (0ms) [Parameters=[@p0='?' (DbType = Int32), @p1='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @p0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5 and "s"."BlogId" = @p1
) AS "s0" ON "b1"."Id" = "s0"."BlogId"
ORDER BY "b1"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

-- Raw Sql 2 --
Executed DbCommand (3ms) [Parameters=[@p0='?' (DbType = Int32), @p1='?' (DbType = Int32)], CommandType='Text', CommandTimeout='30']
SELECT "s0"."BlogId", "s0"."ReviewerId", "s0"."Id", "s0"."Name", "b1"."Id"
FROM (
    SELECT "b"."Id"
    FROM "Blogs" AS "b"
    WHERE "b"."Id" = @p0
    LIMIT 1
) AS "b1"
INNER JOIN (
    SELECT "s"."BlogId", "s"."ReviewerId", "s"."Id", "s"."Name"
    FROM (
        SELECT "b0"."BlogId", "b0"."ReviewerId", "r"."Id", "r"."Name", ROW_NUMBER() OVER(PARTITION BY "b0"."BlogId" ORDER BY "r"."Name", "r"."Id") AS "row"
        FROM "BlogReviewer" AS "b0"
        INNER JOIN "Reviewers" AS "r" ON "b0"."ReviewerId" = "r"."Id"
    ) AS "s"
    WHERE "s"."row" <= 5
) AS "s0" ON "b1"."Id" = "s0"."BlogId"
WHERE "b1"."Id" = @p1
ORDER BY "b1"."Id", "s0"."BlogId", "s0"."Name", "s0"."Id"

Seems like lifting and lowering the same predicate in the same query is escpecially hard for the planner. Maybe lifting and repeating is safer to do as an optimization from the EF perspective.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

2 participants