Skip to content

Commit

Permalink
Fix to #6647 - Left Join (GroupJoin) always materializes elements res…
Browse files Browse the repository at this point in the history
…ulting in unnecessary data pulling

Problem was that for GroupJoin we would always force materialization on participating query sources.
We were doing this because we are not always able to correctly divide outer elements of the GroupJoin into correct groups.
However, if the GroupJoin clause is wrapped around SelectMany clause the groups don't matter because they are getting flattened by SelectMany anyway.

Fix is to recognize those scenarios and only force materialization when the correct grouping actually matters.
We can avoid materialization if the GroupJoin is followed by SelectMany clause (that references the grouping) and that the grouping itself is not present anywhere else in the query.
This addresses optional navigations, which is the 80% case. Manually created GroupJoins that are not modeling LeftOuterJoins still require additional materialization, but this can be addressed later as the priority is not nearly as high.

Other bugs fixed alongside the main change:

#7722 - Query : error during compilation for queries with navigation properties and First/Single/client method operators inside a subquery.

Problem here was that for some queries we don't know how to properly bind to a value buffer (when the result of binding is subquery, and not qsre).
Fix/mitigation is to recognize those scenarios and force materialization on the subqueries. This can be properly addressed in later commit (i.e. by improving the binding logic)

#7497 - Error calling Count() after GroupJoin()

(and removed the temporary fix to #7348 and implemented a proper one)
This turned out to be a chicken-and-egg problem - we had discrepancy between query sources that we marked for materialization in case of GroupJoin + result operator, and the actual client side operation that had to be performed.
We had to fix the group join materialization to allow for translation of those result operators, but without fixing this issue also we would get invalid queries in some cases.
Proper fix is to force client result operator only if the GroupJoin cannot be flattened. We also now do this for all result operators, not just a subset because it was possible to generate invalid queries with operaors like Count().
This is now consistent with the behavior of RequiresMaterializationExpressionVisitor.

Also fixed several minor issues that were encountered once no longer do extensive materialization.
  • Loading branch information
maumar committed Mar 11, 2017
1 parent b883abe commit 26d6fee
Show file tree
Hide file tree
Showing 20 changed files with 851 additions and 496 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Linq.Expressions;
using System.Reflection;
Expand All @@ -11,12 +12,14 @@
using Microsoft.EntityFrameworkCore.Metadata;
using Microsoft.EntityFrameworkCore.Metadata.Internal;
using Microsoft.EntityFrameworkCore.Query.Expressions;
using Microsoft.EntityFrameworkCore.Query.Expressions.Internal;
using Microsoft.EntityFrameworkCore.Query.ExpressionVisitors.Internal;
using Microsoft.EntityFrameworkCore.Query.Internal;
using Microsoft.EntityFrameworkCore.Query.ResultOperators.Internal;
using Microsoft.EntityFrameworkCore.Query.Sql;
using Microsoft.EntityFrameworkCore.Storage;
using Microsoft.EntityFrameworkCore.Utilities;
using Remotion.Linq;
using Remotion.Linq.Clauses;
using Remotion.Linq.Clauses.Expressions;

Expand Down Expand Up @@ -71,8 +74,13 @@ protected override Expression VisitSubQuery(SubQueryExpression expression)

var queryModelVisitor = (RelationalQueryModelVisitor)CreateQueryModelVisitor();

var queryModelMapping = new Dictionary<QueryModel, QueryModel>();
expression.QueryModel.PopulateQueryModelMapping(queryModelMapping);

queryModelVisitor.VisitQueryModel(expression.QueryModel);

expression.QueryModel.RecreateQueryModelFromMapping(queryModelMapping);

if (_querySource != null)
{
QueryModelVisitor.RegisterSubQueryVisitor(_querySource, queryModelVisitor);
Expand Down
29 changes: 0 additions & 29 deletions src/EFCore.Relational/Query/Expressions/LeftOuterJoinExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,35 +40,6 @@ protected override Expression Accept(ExpressionVisitor visitor)
: base.Accept(visitor);
}

/// <summary>
/// Determines whether or not this LeftOuterJoinExpression handles the given query source.
/// </summary>
/// <param name="querySource"> The query source. </param>
/// <returns>
/// true if the supplied query source is handled by this LeftOuterJoinExpression; otherwise false.
/// </returns>
public override bool HandlesQuerySource(IQuerySource querySource)
{
if (querySource is AdditionalFromClause additionalFromClause)
{
var subQueryModel = (additionalFromClause.FromExpression as SubQueryExpression)?.QueryModel;
if (subQueryModel != null
&& !subQueryModel.BodyClauses.Any()
&& subQueryModel.ResultOperators.Count == 1
&& subQueryModel.ResultOperators[0] is DefaultIfEmptyResultOperator
&& (subQueryModel.SelectClause.Selector as QuerySourceReferenceExpression)?.ReferencedQuerySource == subQueryModel.MainFromClause
&& (subQueryModel.MainFromClause.FromExpression as QuerySourceReferenceExpression)?.ReferencedQuerySource is GroupJoinClause targetGroupJoin)
{
if (QuerySource == targetGroupJoin.JoinClause)
{
return true;
}
}
}

return base.HandlesQuerySource(querySource);
}

/// <summary>
/// Creates a <see cref="string" /> representation of the Expression.
/// </summary>
Expand Down
4 changes: 3 additions & 1 deletion src/EFCore.Relational/Query/Expressions/SelectExpression.cs
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,9 @@ public override bool HandlesQuerySource([NotNull] IQuerySource querySource)
{
Check.NotNull(querySource, nameof(querySource));

return _tables.Any(te => te.QuerySource == querySource || te.HandlesQuerySource(querySource));
var processedQuerySource = PreProcessQuerySource(querySource);

return _tables.Any(te => te.QuerySource == processedQuerySource || te.HandlesQuerySource(processedQuerySource));
}

/// <summary>
Expand Down
15 changes: 14 additions & 1 deletion src/EFCore.Relational/Query/Expressions/TableExpressionBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
// Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Linq;
using System.Linq.Expressions;
using JetBrains.Annotations;
using Microsoft.EntityFrameworkCore.Utilities;
using Remotion.Linq.Clauses;
using Remotion.Linq.Clauses.Expressions;
using Remotion.Linq.Clauses.ResultOperators;
using Microsoft.EntityFrameworkCore.Query.Internal;

namespace Microsoft.EntityFrameworkCore.Query.Expressions
{
Expand Down Expand Up @@ -88,7 +92,16 @@ public virtual bool HandlesQuerySource([NotNull] IQuerySource querySource)
{
Check.NotNull(querySource, nameof(querySource));

return _querySource == querySource;
return _querySource == PreProcessQuerySource(querySource);
}

protected virtual IQuerySource PreProcessQuerySource([NotNull] IQuerySource querySource)
{
Check.NotNull(querySource, nameof(querySource));

var newQuerySource = (querySource as AdditionalFromClause)?.TryGetFlattenedGroupJoinClause() ?? querySource;

return (newQuerySource as GroupJoinClause)?.JoinClause ?? newQuerySource;
}

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,19 +155,6 @@ var handlerContext
return handlerContext.EvalOnClient();
}

if (relationalQueryModelVisitor.RequiresClientSingleColumnResultOperator
&& !(resultOperator is SkipResultOperator
|| resultOperator is TakeResultOperator
|| resultOperator is FirstResultOperator
|| resultOperator is SingleResultOperator
|| resultOperator is CountResultOperator
|| resultOperator is AllResultOperator
|| resultOperator is AnyResultOperator
|| resultOperator is GroupResultOperator))
{
return handlerContext.EvalOnClient();
}

return resultHandler(handlerContext);
}

Expand Down Expand Up @@ -666,7 +653,7 @@ private static Expression HandleSum(HandlerContext handlerContext)

if (!(expression.RemoveConvert() is SelectExpression))
{
var sumExpression = new SqlFunctionExpression("SUM", expression.Type, new [] { expression });
var sumExpression = new SqlFunctionExpression("SUM", handlerContext.QueryModel.SelectClause.Selector.Type, new [] { expression });

handlerContext.SelectExpression.SetProjectionExpression(sumExpression);

Expand Down
30 changes: 6 additions & 24 deletions src/EFCore.Relational/Query/RelationalQueryModelVisitor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,9 @@ private readonly Dictionary<IQuerySource, RelationalQueryModelVisitor> _subQuery
private bool _requiresClientProjection;
private bool _requiresClientOrderBy;
private bool _requiresClientResultOperator;
private bool _requiresClientSingleColumnResultOperator;

private Dictionary<IncludeSpecification, List<int>> _navigationIndexMap = new Dictionary<IncludeSpecification, List<int>>();
private List<GroupJoinClause> _unflattenedGroupJoinClauses = new List<GroupJoinClause>();

/// <summary>
/// This API supports the Entity Framework Core infrastructure and is not intended to be used
Expand Down Expand Up @@ -169,23 +169,10 @@ public virtual bool RequiresClientProjection
/// </value>
public virtual bool RequiresClientResultOperator
{
get { return _requiresClientResultOperator || RequiresClientEval; }
get { return _unflattenedGroupJoinClauses.Any() || _requiresClientResultOperator || RequiresClientEval; }
set { _requiresClientResultOperator = value; }
}

/// <summary>
/// Gets or sets a value indicating whether the query requires client evaluation for result operators potentially apply to a subset of
/// columns rather than entire row.
/// </summary>
/// <value>
/// true if the query requires client single column result operator, false if not.
/// </value>
internal virtual bool RequiresClientSingleColumnResultOperator
{
get { return _requiresClientSingleColumnResultOperator || _requiresClientResultOperator || RequiresClientEval; }
set { _requiresClientSingleColumnResultOperator = value; }
}

/// <summary>
/// Gets or sets a value indicating whether this query model visitor will be
/// able to bind directly to properties from its parent query without requiring
Expand Down Expand Up @@ -272,9 +259,6 @@ public virtual SelectExpression TryGetQuery([NotNull] IQuerySource querySource)
{
Check.NotNull(querySource, nameof(querySource));

querySource
= (querySource as GroupJoinClause)?.JoinClause ?? querySource;

SelectExpression selectExpression;
return QueriesBySource.TryGetValue(querySource, out selectExpression)
? selectExpression
Expand Down Expand Up @@ -559,6 +543,8 @@ public override void VisitGroupJoinClause(

base.VisitGroupJoinClause(groupJoinClause, queryModel, index);

_unflattenedGroupJoinClauses.Add(groupJoinClause);

if (!TryFlattenGroupJoin(
groupJoinClause,
queryModel,
Expand All @@ -574,12 +560,6 @@ public override void VisitGroupJoinClause(
{
WarnClientEval(groupJoinClause.JoinClause);
}

// Workaround until #6647 is addressed - GroupJoin requires materialization of entire entity which results in all columns of that entity being projected
// this in turn causes result operators to be applied on all of those columns, even if the query specifies a subset of columns to perform the operation on
// this could lead to incorrect results (e.g. for Distinct)
// This however is safe to do for some operators, e.g. FirstOrDefault, Count(), Take() because their result is the same whether they are applied on single column or entire row
RequiresClientSingleColumnResultOperator = true;
}

private Dictionary<IQuerySource, Expression> SnapshotQuerySourceMapping(QueryModel queryModel)
Expand Down Expand Up @@ -1597,6 +1577,8 @@ var referencedQuerySource
queryModel.BodyClauses.Remove(groupJoinClause);
queryModel.BodyClauses.Remove(additionalFromClause);

_unflattenedGroupJoinClauses.Remove(groupJoinClause);

var querySourceMapping = new QuerySourceMapping();

querySourceMapping.AddMapping(
Expand Down
155 changes: 153 additions & 2 deletions src/EFCore.Specification.Tests/ComplexNavigationsQueryTestBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3119,6 +3119,157 @@ public virtual void Complex_query_with_optional_navigations_and_client_side_eval
(e, a) => Assert.Equal(e.Id, a.Id));
}

[ConditionalFact]
public virtual void Required_navigation_on_a_subquery_with_First_in_projection()
{
AssertQuery<Level2>(
l2s => l2s
.Where(l2o => l2o.Id == 7)
.Select(l2o => l2s.OrderBy(l2i => l2i.Id).First().OneToOne_Required_FK_Inverse.Name));
}

[ConditionalFact]
public virtual void Required_navigation_on_a_subquery_with_complex_projection_and_First()
{
AssertQuery<Level1, Level2>(
(l1s, l2s) =>
from l2o in l2s
where l2o.Id == 7
select
(from l2i in l2s
join l1i in l1s
on l2i.Level1_Required_Id equals l1i.Id
orderby l2i.Id ascending
select new { Navigation = l2i.OneToOne_Required_FK_Inverse, Contant = 7 }).First().Navigation.Name);
}

[ConditionalFact]
public virtual void Required_navigation_on_a_subquery_with_First_in_predicate()
{
AssertQuery<Level2>(
l2s => l2s
.Where(l2o => l2o.Id == 7)
.Where(l1 => EF.Property<string>(l2s.OrderBy(l2i => l2i.Id).First().OneToOne_Required_FK_Inverse, "Name") == "L1 02"),
l2s => l2s
.Where(l2o => l2o.Id == 7)
.Where(l1 => l2s.OrderBy(l2i => l2i.Id).First().OneToOne_Required_FK_Inverse.Name == "L1 02"));
}

////[ConditionalFact] issue #7761
public virtual void GroupJoin_with_complex_subquery_with_joins_does_not_get_flattened()
{
using (var ctx = CreateContext())
{
var query = from l1_outer in ctx.LevelOne
join subquery in
(
from l2_inner in ctx.LevelTwo
join l1_inner in ctx.LevelOne on l2_inner.Level1_Required_Id equals l1_inner.Id
select l2_inner
)
on l1_outer.Id equals subquery.Level1_Optional_Id into grouping
from subquery in grouping.DefaultIfEmpty()
select (int?)subquery.Id;

var result = query.ToList();
}
}

[ConditionalFact]
public virtual void GroupJoin_with_complex_subquery_with_joins_with_reference_to_grouping1()
{
AssertQueryScalar<Level1, Level2, int>(
(l1s, l2s) =>
from l1_outer in l1s
join subquery in
(
from l2_inner in l2s
join l1_inner in l1s on l2_inner.Level1_Required_Id equals l1_inner.Id
select l2_inner
)
on l1_outer.Id equals subquery.Level1_Optional_Id into grouping
where grouping.Any()
from subquery in grouping.DefaultIfEmpty()
select subquery.Id);
}

[ConditionalFact]
public virtual void GroupJoin_with_complex_subquery_with_joins_with_reference_to_grouping2()
{
AssertQueryScalar<Level1, Level2, int>(
(l1s, l2s) =>
from l1_outer in l1s
join subquery in
(
from l2_inner in l2s
join l1_inner in l1s on l2_inner.Level1_Required_Id equals l1_inner.Id
select l2_inner
)
on l1_outer.Id equals subquery.Level1_Optional_Id into grouping
from subquery in grouping.DefaultIfEmpty()
where grouping.Any()
select subquery.Id);
}

////[ConditionalFact] issue #7770
public virtual void GroupJoin_on_a_subquery_containing_another_GroupJoin_projecting_outer()
{
using (var ctx = CreateContext())
{
var query = from x in
(from l1 in ctx.LevelOne
join l2 in ctx.LevelTwo on l1.Id equals l2.Level1_Optional_Id into grouping
from l2 in grouping.DefaultIfEmpty()
select l1).Take(2)
join l2_outer in ctx.LevelTwo on x.Id equals l2_outer.Level1_Optional_Id into grouping_outer
from l2_outer in grouping_outer.DefaultIfEmpty()
select l2_outer.Name;

var result = query.ToList();
}
}

////[ConditionalFact] issue #7770
public virtual void GroupJoin_on_a_subquery_containing_another_GroupJoin_projecting_outer_with_client_method()
{
using (var ctx = CreateContext())
{
var query = from x in
(from l1 in ctx.LevelOne
join l2 in ctx.LevelTwo on l1.Id equals l2.Level1_Optional_Id into grouping
from l2 in grouping.DefaultIfEmpty()
select ClientLevel1(l1)).Take(2)
join l2_outer in ctx.LevelTwo on x.Id equals l2_outer.Level1_Optional_Id into grouping_outer
from l2_outer in grouping_outer.DefaultIfEmpty()
select l2_outer.Name;

var result = query.ToList();
}
}

private Level1 ClientLevel1(Level1 arg)
{
return arg;
}

////[ConditionalFact] issue #7770
public virtual void GroupJoin_on_a_subquery_containing_another_GroupJoin_projecting_inner()
{
using (var ctx = CreateContext())
{
var query = from x in
(from l1 in ctx.LevelOne
join l2 in ctx.LevelTwo on l1.Id equals l2.Level1_Optional_Id into grouping
from l2 in grouping.DefaultIfEmpty()
select l2).Take(2)
join l1_outer in ctx.LevelOne on x.Level1_Optional_Id equals l1_outer.Id into grouping_outer
from l1_outer in grouping_outer.DefaultIfEmpty()
select l1_outer.Name;

var result = query.ToList();
}
}

[ConditionalFact]
public virtual void GroupJoin_on_left_side_being_a_subquery()
{
Expand All @@ -3140,13 +3291,13 @@ public virtual void GroupJoin_on_right_side_being_a_subquery()
from l2 in l2s
join l1 in l1s.OrderBy(x => x.OneToOne_Optional_FK.Name).Take(2) on l2.Level1_Optional_Id equals l1.Id into grouping
from l1 in grouping.DefaultIfEmpty()
select new { Id = l2.Id, Nane = l1 != null ? l1.Name : null },
select new { Id = l2.Id, Name = l1 != null ? l1.Name : null },
(l1s, l2s) =>
from l2 in l2s
join l1 in l1s.OrderBy(x => Maybe(x.OneToOne_Optional_FK, () => x.OneToOne_Optional_FK.Name)).Take(2)
on l2.Level1_Optional_Id equals l1.Id into grouping
from l1 in grouping.DefaultIfEmpty()
select new { Id = l2.Id, Nane = l1 != null ? l1.Name : null },
select new { Id = l2.Id, Name = l1 != null ? l1.Name : null },
e => e.Id);
}

Expand Down
Loading

0 comments on commit 26d6fee

Please sign in to comment.