Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizing Sitemap Creation with Batched Content Items #16636

Merged
merged 13 commits into from
Aug 30, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -13,109 +13,120 @@ namespace OrchardCore.ContentLocalization.Sitemaps;

public class LocalizedContentItemsQueryProvider : IContentItemsQueryProvider
{
private readonly ISession _session;
private readonly IStore _store;
private readonly IRouteableContentTypeCoordinator _routeableContentTypeCoordinator;
private readonly ILocalizationService _localizationService;

public LocalizedContentItemsQueryProvider(
ISession session,
IStore store,
IRouteableContentTypeCoordinator routeableContentTypeCoordinator,
ILocalizationService localizationService
)
{
_session = session;
_store = store;
_routeableContentTypeCoordinator = routeableContentTypeCoordinator;
_localizationService = localizationService;
}

public async Task GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext queryContext)
public async Task<ContentItemsQueryResult> GetContentItemsAsync(ContentTypesSitemapSource source, ContentItemsQueryContext context)
{
var routeableContentTypeDefinitions = await _routeableContentTypeCoordinator.ListRoutableTypeDefinitionsAsync();
var session = _store.CreateSession(withTracking: false);
MikeAlhayek marked this conversation as resolved.
Show resolved Hide resolved

if (source.IndexAll)
{
// Assumption here is that at least one content type will be localized.
var ctdNames = routeableContentTypeDefinitions.Select(ctd => ctd.Name);

var queryResults = await _session.Query<ContentItem>()
var results = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.Published && x.ContentType.IsIn(ctdNames))
.OrderBy(x => x.CreatedUtc)
.ThenBy(x => x.Id)
.Skip(context.Skip)
.Take(context.Take)
.ListAsync();

queryContext.ContentItems = queryResults;
return new ContentItemsQueryResult
{
ContentItems = results,

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
// Provide all content items with localization as reference content items.
ReferenceContentItems = results.Where(ci => ci.Has<LocalizationPart>()),
};
}
else if (source.LimitItems)

if (source.LimitItems)
{
// Test that content type is still valid to include in sitemap.
var contentType = routeableContentTypeDefinitions
.FirstOrDefault(ctd => string.Equals(source.LimitedContentType.ContentTypeName, ctd.Name, StringComparison.Ordinal));

if (contentType == null)
{
return;
return new ContentItemsQueryResult
{
ContentItems = [],
ReferenceContentItems = [],
};
}

if (contentType.Parts.Any(ctd => string.Equals(ctd.Name, nameof(LocalizationPart), StringComparison.Ordinal)))
{
// When limiting items Content item is valid if it is for the default culture.
var defaultCulture = await _localizationService.GetDefaultCultureAsync();

// Get all content items here for reference. Then reduce by default culture.
// We know that the content item should be localized.
// If it doesn't have a localization part, the content item should have been saved.
var queryResults = await _session.Query<ContentItem>()
var contentItems = await session.Query<ContentItem>()
.With<ContentItemIndex>(ci => ci.ContentType == source.LimitedContentType.ContentTypeName && ci.Published)
.OrderBy(ci => ci.CreatedUtc)
.With<LocalizedContentItemIndex>()
.ThenBy(ci => ci.Id)
.With<LocalizedContentItemIndex>(x => x.Culture == defaultCulture)
.Take(context.Take)
.Skip(context.Skip)
.ListAsync();

// When limiting items Content item is valid if it is for the default culture.
var defaultCulture = await _localizationService.GetDefaultCultureAsync();

// Reduce by default culture.
var items = queryResults
.Where(ci => string.Equals(ci.As<LocalizationPart>().Culture, defaultCulture, StringComparison.Ordinal))
.Skip(source.LimitedContentType.Skip)
.Take(source.LimitedContentType.Take);

queryContext.ContentItems = items;

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
}
else
{
// Content type is not localized. Produce standard results.
var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType == source.LimitedContentType.ContentTypeName && x.Published)
.OrderBy(x => x.CreatedUtc)
.Skip(source.LimitedContentType.Skip)
MikeAlhayek marked this conversation as resolved.
Show resolved Hide resolved
.Take(source.LimitedContentType.Take)
.ListAsync();

queryContext.ContentItems = queryResults;
return new ContentItemsQueryResult
{
ContentItems = contentItems,
ReferenceContentItems = contentItems.Where(ci => ci.Has<LocalizationPart>()),
};
}
}
else
{
// Test that content types are still valid to include in sitemap.
var typesToIndex = routeableContentTypeDefinitions
.Where(ctd => source.ContentTypes.Any(s => string.Equals(ctd.Name, s.ContentTypeName, StringComparison.Ordinal)))
.Select(x => x.Name);

// No advantage here in reducing with localized index.
var queryResults = await _session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType.IsIn(typesToIndex) && x.Published)

// Content type is not localized. Produce standard results.
var items = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType == source.LimitedContentType.ContentTypeName && x.Published)
.OrderBy(x => x.CreatedUtc)
.Skip(context.Skip)
.Take(context.Take)
.ListAsync();

queryContext.ContentItems = queryResults;

// Provide all content items with localization as reference content items.
queryContext.ReferenceContentItems = queryResults
.Where(ci => ci.Has<LocalizationPart>());
return new ContentItemsQueryResult
{
ContentItems = items,
ReferenceContentItems = [],
};
}

// Test that content types are still valid to include in sitemap.
var typesToIndex = routeableContentTypeDefinitions
.Where(ctd => source.ContentTypes.Any(s => string.Equals(ctd.Name, s.ContentTypeName, StringComparison.Ordinal)))
.Select(x => x.Name);

// No advantage here in reducing with localized index.
var queryResults = await session.Query<ContentItem>()
.With<ContentItemIndex>(x => x.ContentType.IsIn(typesToIndex) && x.Published)
.OrderBy(x => x.CreatedUtc)
.ThenBy(x => x.Id)
.Take(context.Take)
.Skip(context.Skip)
.ListAsync();

return new ContentItemsQueryResult
{
ContentItems = queryResults,
ReferenceContentItems = queryResults.Where(ci => ci.Has<LocalizationPart>()),
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,19 @@ IRouteableContentTypeCoordinator routeableContentTypeCoordinator

public async Task<bool> ApplyExtendedMetadataAsync(
SitemapBuilderContext context,
ContentItemsQueryContext queryContext,
ContentItemsQueryResult queryResult,
ContentItem contentItem,
XElement url)
{
var part = contentItem.As<LocalizationPart>();
if (part == null)
if (part == null ||
queryResult.ReferenceContentItems == null ||
!queryResult.ReferenceContentItems.Any())
{
return true;
}

var localizedContentParts = queryContext.ReferenceContentItems
var localizedContentParts = queryResult.ReferenceContentItems
.Select(ci => ci.As<LocalizationPart>())
.Where(cp => cp.LocalizationSet == part.LocalizationSet);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ namespace OrchardCore.Contents.Sitemaps;

public class ContentTypesSitemapSourceBuilder : SitemapSourceBuilderBase<ContentTypesSitemapSource>
{
private const int _batchSize = 500;

private static readonly XNamespace _namespace = "http://www.sitemaps.org/schemas/sitemap/0.9";

private readonly IRouteableContentTypeCoordinator _routeableContentTypeCoordinator;
Expand All @@ -21,8 +23,7 @@ public ContentTypesSitemapSourceBuilder(
IRouteableContentTypeCoordinator routeableContentTypeCoordinator,
IContentManager contentManager,
IContentItemsQueryProvider contentItemsQueryProvider,
IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExtendedMetadataProviders
)
IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExtendedMetadataProviders)
{
_routeableContentTypeCoordinator = routeableContentTypeCoordinator;
_contentManager = contentManager;
Expand All @@ -32,33 +33,80 @@ IEnumerable<ISitemapContentItemExtendedMetadataProvider> sitemapContentItemExten

public override async Task BuildSourceAsync(ContentTypesSitemapSource source, SitemapBuilderContext context)
{
var queryContext = new ContentItemsQueryContext();
await _contentItemsQueryProvider.GetContentItemsAsync(source, queryContext);

foreach (var sciemp in _sitemapContentItemExtendedMetadataProviders)
{
context.Response.ResponseElement.Add(sciemp.GetExtendedAttribute);
}

foreach (var contentItem in queryContext.ContentItems)
var queryContext = new ContentItemsQueryContext()
{
Take = _batchSize,
};

var maxAllowed = int.MaxValue;

if (source.LimitedContentType != null)
{
var url = new XElement(_namespace + "url");
queryContext.Skip = source.LimitedContentType.Skip;

if (await BuildUrlsetMetadataAsync(source, context, queryContext, contentItem, url))
if (source.LimitedContentType.Take > 0)
{
context.Response.ResponseElement.Add(url);
maxAllowed = source.LimitedContentType.Take;
}
}

var total = 0;
var isLastBatch = false;

while (true)
MikeAlhayek marked this conversation as resolved.
Show resolved Hide resolved
{
if ((total + queryContext.Take) > maxAllowed)
{
queryContext.Take = total + queryContext.Take - maxAllowed;

isLastBatch = true;
}

var result = await _contentItemsQueryProvider.GetContentItemsAsync(source, queryContext);

if (result.ContentItems == null || !result.ContentItems.Any())
{
break;
}

var totalFound = result.ContentItems.Count();

total += totalFound;

foreach (var contentItem in result.ContentItems)
{
var url = new XElement(_namespace + "url");

if (await BuildUrlsetMetadataAsync(source, context, result, contentItem, url))
{
context.Response.ResponseElement.Add(url);
}
}

if (isLastBatch)
{
break;
}

queryContext.Skip += queryContext.Take;
}
}

private async Task<bool> BuildUrlsetMetadataAsync(ContentTypesSitemapSource source, SitemapBuilderContext context, ContentItemsQueryContext queryContext, ContentItem contentItem, XElement url)
private async Task<bool> BuildUrlsetMetadataAsync(ContentTypesSitemapSource source, SitemapBuilderContext context, ContentItemsQueryResult queryResult, ContentItem contentItem, XElement url)
{
if (await BuildUrlAsync(context, contentItem, url))
{
if (await BuildExtendedMetadataAsync(context, queryContext, contentItem, url))
if (await BuildExtendedMetadataAsync(context, queryResult, contentItem, url))
{
PopulateLastMod(contentItem, url);

await PopulateChangeFrequencyPriority(source, contentItem, url);

return true;
}

Expand All @@ -68,12 +116,12 @@ private async Task<bool> BuildUrlsetMetadataAsync(ContentTypesSitemapSource sour
return false;
}

private async Task<bool> BuildExtendedMetadataAsync(SitemapBuilderContext context, ContentItemsQueryContext queryContext, ContentItem contentItem, XElement url)
private async Task<bool> BuildExtendedMetadataAsync(SitemapBuilderContext context, ContentItemsQueryResult queryResult, ContentItem contentItem, XElement url)
{
var succeeded = true;
foreach (var sc in _sitemapContentItemExtendedMetadataProviders)
{
if (!await sc.ApplyExtendedMetadataAsync(context, queryContext, contentItem, url))
if (!await sc.ApplyExtendedMetadataAsync(context, queryResult, contentItem, url))
{
succeeded = false;
}
Expand Down
Loading
Loading