Skip to content

Commit

Permalink
Optimizing Sitemap Creation with Batched Content Items (#16636)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Sébastien Ros <sebastienros@gmail.com>
  • Loading branch information
MikeAlhayek and sebastienros authored Aug 30, 2024
1 parent 0fa8799 commit 8fd5c88
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,40 +13,41 @@ namespace OrchardCore.ContentLocalization.Sitemaps;

public class LocalizedContentItemsQueryProvider : IContentItemsQueryProvider
{
private readonly ISession _session;
private readonly IStore _store;
private readonly IRouteableContentTypeCoordinator _routeableContentTypeCoordinator;
private readonly ILocalizationService _localizationService;

public LocalizedContentItemsQueryProvider(
ISession session,
IStore store,
IRouteableContentTypeCoordinator routeableContentTypeCoordinator,
ILocalizationService localizationService
)
{
_session = session;
_store = store;
_routeableContentTypeCoordinator = routeableContentTypeCoordinator;
_localizationService = localizationService;
}

public async Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext queryContext)
public async Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context, int? skip = null, int? take = null)
{
var routeableContentTypeDefinitions = await _routeableContentTypeCoordinator.ListRoutableTypeDefinitionsAsync();
using var session = _store.CreateSession(withTracking: false);

IEnumerable<ContentItem> contentItems = null;

if (source.IndexAll)
{
// Assumption here is that at least one content type will be localized.
var ctdNames = routeableContentTypeDefinitions.Select(ctd => ctd.Name);

var queryResults = await _session.Query<ContentItem>()
contentItems = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.Published && x.ContentType.IsIn(ctdNames))
.OrderBy(x => x.CreatedUtc)
.ThenBy(x => x.Id)
.Skip(skip ?? 0)
.Take(take ?? 0)
.ListAsync();

queryContext.ContentItems = queryResults;

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
}
else if (source.LimitItems)
{
Expand All @@ -61,41 +62,31 @@ public async Task GetContentItemsAsync(ContentTypesSitemapSource source, Content

if (contentType.Parts.Any(ctd => string.Equals(ctd.Name, nameof(LocalizationPart), StringComparison.Ordinal)))
{
// Get all content items here for reference. Then reduce by default culture.
// We know that the content item should be localized.
// If it doesn't have a localization part, the content item should have been saved.
var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(ci => ci.ContentType == source.LimitedContentType.ContentTypeName && ci.Published)
.OrderBy(ci => ci.CreatedUtc)
.With<LocalizedContentItemIndex>()
.ListAsync();

// When limiting items Content item is valid if it is for the default culture.
var defaultCulture = await _localizationService.GetDefaultCultureAsync();

// Reduce by default culture.
var items = queryResults
.Where(ci => string.Equals(ci.As<LocalizationPart>().Culture, defaultCulture, StringComparison.Ordinal))
.Skip(source.LimitedContentType.Skip)
.Take(source.LimitedContentType.Take);

queryContext.ContentItems = items;

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
// Get all content items here for reference. Then reduce by default culture.
// We know that the content item should be localized.
// If it doesn't have a localization part, the content item should have been saved.
contentItems = await session.Query<ContentItem>()
.With<ContentItemIndex>(ci => ci.ContentType == source.LimitedContentType.ContentTypeName && ci.Published)
.OrderBy(ci => ci.CreatedUtc)
.ThenBy(ci => ci.Id)
.With<LocalizedContentItemIndex>(x => x.Culture == defaultCulture)
.Take(take ?? 0)
.Skip(skip ?? 0)
.ListAsync();
}
else
{
// Content type is not localized. Produce standard results.
var queryResults = await _session.Query<ContentItem>()
contentItems = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType == source.LimitedContentType.ContentTypeName && x.Published)
.OrderBy(x => x.CreatedUtc)
.Skip(source.LimitedContentType.Skip)
.Take(source.LimitedContentType.Take)
.Skip(skip ?? 0)
.Take(take ?? 0)
.ListAsync();

queryContext.ContentItems = queryResults;
}
}
else
Expand All @@ -106,16 +97,23 @@ public async Task GetContentItemsAsync(ContentTypesSitemapSource source, Content
.Select(x => x.Name);

// No advantage here in reducing with localized index.
var queryResults = await _session.Query<ContentItem>()

contentItems = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType.IsIn(typesToIndex) && x.Published)
.OrderBy(x => x.CreatedUtc)
.ThenBy(x => x.Id)
.Skip(skip ?? 0)
.Take(take ?? 0)
.ListAsync();

queryContext.ContentItems = queryResults;
}

if (contentItems != null)
{
context.ContentItems = contentItems;

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
context.ReferenceContentItems = contentItems.Where(ci => ci.Has<LocalizationPart>());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ public async Task<bool> ApplyExtendedMetadataAsync(
XElement url)
{
var part = contentItem.As<LocalizationPart>();
if (part == null)
if (part == null ||
queryContext.ReferenceContentItems == null ||
!queryContext.ReferenceContentItems.Any())
{
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ namespace OrchardCore.Contents.Sitemaps;

public class ContentTypesSitemapSourceBuilder : SitemapSourceBuilderBase<ContentTypesSitemapSource>
{
private const int _batchSize = 500;

private static readonly XNamespace _namespace = "http://www.sitemaps.org/schemas/sitemap/0.9";

private readonly IRouteableContentTypeCoordinator _routeableContentTypeCoordinator;
Expand All @@ -21,8 +23,7 @@ public ContentTypesSitemapSourceBuilder(
IRouteableContentTypeCoordinator routeableContentTypeCoordinator,
IContentManager contentManager,
IContentItemsQueryProvider contentItemsQueryProvider,
IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExtendedMetadataProviders
)
IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExtendedMetadataProviders)
{
_routeableContentTypeCoordinator = routeableContentTypeCoordinator;
_contentManager = contentManager;
Expand All @@ -32,22 +33,66 @@ IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExten

public override async Task BuildSourceAsync(ContentTypesSitemapSource source, SitemapBuilderContext context)
{
var queryContext = new ContentItemsQueryContext();
await _contentItemsQueryProvider.GetContentItemsAsync(source, queryContext);

foreach (var sciemp in _sitemapContentItemExtendedMetadataProviders)
{
context.Response.ResponseElement.Add(sciemp.GetExtendedAttribute);
}

foreach (var contentItem in queryContext.ContentItems)
var maxAllowed = int.MaxValue;
var skip = 0;

if (source.LimitedContentType != null)
{
var url = new XElement(_namespace + "url");
skip = source.LimitedContentType.Skip;

if (source.LimitedContentType.Take > 0)
{
maxAllowed = source.LimitedContentType.Take;
}
}

var total = 0;
var take = _batchSize;
var isLastBatch = false;

while (true)
{
if ((total + take) > maxAllowed)
{
take = total + take - maxAllowed;

isLastBatch = true;
}

var queryContext = new ContentItemsQueryContext();

await _contentItemsQueryProvider.GetContentItemsAsync(source, queryContext, skip, take);

if (await BuildUrlsetMetadataAsync(source, context, queryContext, contentItem, url))
if (queryContext.ContentItems == null || !queryContext.ContentItems.Any())
{
context.Response.ResponseElement.Add(url);
break;
}

var totalFound = queryContext.ContentItems.Count();

total += totalFound;

foreach (var contentItem in queryContext.ContentItems)
{
var url = new XElement(_namespace + "url");

if (await BuildUrlsetMetadataAsync(source, context, queryContext, contentItem, url))
{
context.Response.ResponseElement.Add(url);
}
}

if (isLastBatch)
{
break;
}

skip += take;
}
}

Expand All @@ -58,7 +103,9 @@ private async Task<bool> BuildUrlsetMetadataAsync(ContentTypesSitemapSource sour
if (await BuildExtendedMetadataAsync(context, queryContext, contentItem, url))
{
PopulateLastMod(contentItem, url);

await PopulateChangeFrequencyPriority(source, contentItem, url);

return true;
}

Expand All @@ -71,13 +118,15 @@ private async Task<bool> BuildUrlsetMetadataAsync(ContentTypesSitemapSource sour
private async Task<bool> BuildExtendedMetadataAsync(SitemapBuilderContext context, ContentItemsQueryContext queryContext, ContentItem contentItem, XElement url)
{
var succeeded = true;

foreach (var sc in _sitemapContentItemExtendedMetadataProviders)
{
if (!await sc.ApplyExtendedMetadataAsync(context, queryContext, contentItem, url))
{
succeeded = false;
}
}

return succeeded;
}

Expand All @@ -94,6 +143,7 @@ private async Task<bool> BuildUrlAsync(SitemapBuilderContext context, ContentIte
var loc = new XElement(_namespace + "loc");
loc.Add(locValue);
url.Add(loc);

return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,46 @@ namespace OrchardCore.Contents.Sitemaps;

public class DefaultContentItemsQueryProvider : IContentItemsQueryProvider
{
private readonly ISession _session;
private readonly IStore _store;
private readonly IRouteableContentTypeCoordinator _routeableContentTypeCoordinator;

public DefaultContentItemsQueryProvider(
ISession session,
IRouteableContentTypeCoordinator routeableContentTypeCoordinator
)
IStore store,
IRouteableContentTypeCoordinator routeableContentTypeCoordinator)
{
_session = session;
_store = store;
_routeableContentTypeCoordinator = routeableContentTypeCoordinator;
}

public async Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context)
public async Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context, int? skip = null, int? take = null)
{
ArgumentNullException.ThrowIfNull(source);
ArgumentNullException.ThrowIfNull(context);

var routeableContentTypeDefinitions = await _routeableContentTypeCoordinator.ListRoutableTypeDefinitionsAsync();

using var session = _store.CreateSession(withTracking: false);

var query = session.Query<ContentItem, ContentItemIndex>();

if (source.IndexAll)
{
var rctdNames = routeableContentTypeDefinitions.Select(rctd => rctd.Name);

var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.Published && x.ContentType.IsIn(rctdNames))
.OrderBy(x => x.CreatedUtc)
.ListAsync();

context.ContentItems = queryResults;
query = query.Where(x => x.Published && x.ContentType.IsIn(rctdNames));
}
else if (source.LimitItems)
{
// Test that content type is still valid to include in sitemap.
var typeIsValid = routeableContentTypeDefinitions
.Any(ctd => string.Equals(source.LimitedContentType.ContentTypeName, ctd.Name, StringComparison.Ordinal));

if (typeIsValid)
if (!typeIsValid)
{
var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType == source.LimitedContentType.ContentTypeName && x.Published)
.OrderBy(x => x.CreatedUtc)
.Skip(source.LimitedContentType.Skip)
.Take(source.LimitedContentType.Take)
.ListAsync();

context.ContentItems = queryResults;
return;
}

query = query.Where(x => x.ContentType == source.LimitedContentType.ContentTypeName && x.Published);
}
else
{
Expand All @@ -62,12 +58,14 @@ public async Task GetContentItemsAsync(ContentTypesSitemapSource source, Content
.Where(ctd => source.ContentTypes.Any(s => string.Equals(ctd.Name, s.ContentTypeName, StringComparison.Ordinal)))
.Select(x => x.Name);

var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType.IsIn(typesToIndex) && x.Published)
.OrderBy(x => x.CreatedUtc)
.ListAsync();

context.ContentItems = queryResults;
query = query.Where(x => x.ContentType.IsIn(typesToIndex) && x.Published);
}

context.ContentItems = await query
.OrderBy(x => x.CreatedUtc)
.ThenBy(x => x.Id)
.Take(take ?? 0)
.Skip(skip ?? 0)
.ListAsync();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,5 @@ public interface IContentItemsQueryProvider
/// <summary>
/// Get content items to evaluate for inclusion in a sitemap.
/// </summary>
Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context);
Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context, int? skip = null, int? take = null);
}
3 changes: 1 addition & 2 deletions src/docs/releases/2.0.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ Previously, the `CreateContentTask`, `RetrieveContentTask`, and `UpdateContentTa

Additionally, a new workflow-scoped script function `setCorrelationId(id:string): void` was added, that you can use to update the workflow's CorrelationId.


## Change Logs

### Azure AI Search Module
Expand Down Expand Up @@ -437,8 +438,6 @@ Additionally, if an error occurs, a new custom exception, RecipeExecutionExcepti

- `UserConfirmedEvent`: this event triggers when a user successfully confirms their email address after registration.



### GraphQL Module

When identifying content types for GraphQL exposure, we identify those without a stereotype to provide you with control over the behavior of stereotyped content types. A new option, `DiscoverableSterotypes`, has been introduced in `GraphQLContentOptions`. This allows you to specify stereotypes that should be discoverable by default.
Expand Down

0 comments on commit 8fd5c88

Please sign in to comment.