Skip to content

Commit

Permalink
+ class ThreadLatestReplierSaver to fix lumina37/aiotieba#208
Browse files Browse the repository at this point in the history
+ entity class `LatestReplier` that has one-to-one relationship with entity `ThreadPost.LatestReplier`
* replace field `LatestReplierUid` with foreign key to entity `LatestReplier` @ ThreadPost.cs, also affects `ThreadRevision`, `ThreadSaver` & `CrawlPost.CrawlReplies()`

* replace field `_latestRepliers` with `_latestRepliersKeyByUnique` to reuse latest repliers with same `UniqueLatestReplier` for `FillFromRequestingWith602()` @ ThreadCrawlFacade.cs

* now will invoke `ThreadLatestReplierSaver.Save()`
FieldRevisionIgnorance @ `Save()`
* no longer ignrore revision for field `ThreadPost.LatestReplierUid` @ `FieldRevisionIgnorance()`
@ ThreadSave.cs
@ c#
  • Loading branch information
n0099 committed Jul 10, 2024
1 parent 10d5cbd commit c8f4920
Show file tree
Hide file tree
Showing 9 changed files with 100 additions and 27 deletions.
5 changes: 5 additions & 0 deletions c#/crawler/src/Db/CrawlerDbContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class CrawlerDbContext(ILogger<CrawlerDbContext> logger, Fid fid = 0)

public Fid Fid { get; } = fid;
public DbSet<User> Users => Set<User>();
public DbSet<LatestReplier> LatestRepliers => Set<LatestReplier>();
public DbSet<AuthorExpGradeRevision> AuthorExpGradeRevisions => Set<AuthorExpGradeRevision>();
public DbSet<ForumModeratorRevision> ForumModeratorRevisions => Set<ForumModeratorRevision>();
public DbSet<ThreadPost> Threads => Set<ThreadPost>();
Expand Down Expand Up @@ -67,6 +68,10 @@ protected override void OnModelCreating(ModelBuilder b)
base.OnModelCreating(b);
OnModelCreatingWithFid(b, Fid);
b.Entity<User>().ToTable("tbmc_user");
b.Entity<LatestReplier>().ToTable("tbmc_user_latestReplier");
b.Entity<LatestReplier>().Property(e => e.DisplayName).HasConversion<byte[]>();
b.Entity<LatestReplier>().HasOne<ThreadPost>().WithOne(e => e.LatestReplier)
.HasForeignKey<ThreadPost>(e => e.LatestReplierId);
b.Entity<ThreadPost>().ToTable($"tbmc_f{Fid}_thread");
b.Entity<ThreadMissingFirstReply>().ToTable("tbmc_thread_missingFirstReply");
b.Entity<ReplyPost>().ToTable($"tbmc_f{Fid}_reply");
Expand Down
8 changes: 8 additions & 0 deletions c#/crawler/src/Db/LatestReplier.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace tbm.Crawler.Db;

public class LatestReplier : BaseUser
{
[Key] [DatabaseGenerated(DatabaseGeneratedOption.Identity)]
public uint Id { get; set; }

Check notice on line 6 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(6,157)

Check notice on line 6 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(6,157)

Check notice on line 6 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on D:\a\open-tbm\open-tbm\c#\crawler\src\Db\LatestReplier.cs(6,157)
public long? Uid { get; set; }

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(7,175)

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on /Users/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(7,192)

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(7,175)

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on /home/runner/work/open-tbm/open-tbm/c#/crawler/src/Db/LatestReplier.cs(7,192)

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[UseSymbolAlias] Use type alias 'Uid'" on D:\a\open-tbm\open-tbm\c#\crawler\src\Db\LatestReplier.cs(7,175)

Check notice on line 7 in c#/crawler/src/Db/LatestReplier.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / ReSharper

"[PropertyCanBeMadeInitOnly.Global] Property can be made init-only" on D:\a\open-tbm\open-tbm\c#\crawler\src\Db\LatestReplier.cs(7,192)
}
3 changes: 2 additions & 1 deletion c#/crawler/src/Db/Post/ThreadPost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ public class ThreadPost : BasePost
public string? AuthorPhoneType { get; set; }
public uint PostedAt { get; set; }
public uint LatestReplyPostedAt { get; set; }
public long? LatestReplierUid { get; set; }
public uint? LatestReplierId { get; set; }
public LatestReplier? LatestReplier { get; set; }
public uint? ReplyCount { get; set; }
public uint? ViewCount { get; set; }
public uint? ShareCount { get; set; }
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/Db/Revision/Splitting/ThreadRevisions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class ThreadRevision : BaseThreadRevision
public string? TopicType { get; set; }
public byte? IsGood { get; set; }
public uint? LatestReplyPostedAt { get; set; }
public long? LatestReplierUid { get; set; }
public uint? LatestReplierId { get; set; }
public uint? ReplyCount { get; set; }

[NotMapped]
Expand All @@ -39,7 +39,7 @@ public override bool IsAllFieldsIsNullExceptSplit() =>
TopicType,
IsGood,
LatestReplyPostedAt,
LatestReplierUid,
LatestReplierId,
ReplyCount,
ShareCount,
AgreeCount,
Expand Down
1 change: 1 addition & 0 deletions c#/crawler/src/EntryPoint.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ protected override void ConfigureContainer(HostBuilderContext context, Container
builder.RegisterType<UserParser>();
builder.RegisterType<ThreadLateCrawler>();
builder.RegisterType<ThreadLateCrawlFacade>();
builder.RegisterType<ThreadLatestReplierSaver>();
builder.RegisterType<SonicPusher>();
builder.RegisterType<CrawlPost>();
builder.RegisterGeneric(typeof(SaverLocks<>));
Expand Down
12 changes: 5 additions & 7 deletions c#/crawler/src/Tieba/Crawl/CrawlPost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,11 @@ public async Task<SavedRepliesKeyByTid> CrawlReplies
.Aggregate(new HashSet<Tid>(), (shouldCrawl, threads) =>
{
shouldCrawl.UnionWith(threads.NewlyAdded.Select(th => th.Tid));
shouldCrawl.UnionWith(threads.Existing.Where(t =>
{
var (before, after) = t;
return before.ReplyCount != after.ReplyCount
|| before.LatestReplyPostedAt != after.LatestReplyPostedAt
|| before.LatestReplierUid != after.LatestReplierUid;
}).Select(t => t.Before.Tid));
shouldCrawl.UnionWith(threads.Existing
.Where(t => t.Before.ReplyCount != t.After.ReplyCount
|| t.Before.LatestReplyPostedAt != t.After.LatestReplyPostedAt
|| t.Before.LatestReplierId != t.After.LatestReplierId)
.Select(t => t.Before.Tid));
return shouldCrawl;
});
var savedRepliesKeyByTid = new SavedRepliesKeyByTid();
Expand Down
15 changes: 13 additions & 2 deletions c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ public class ThreadCrawlFacade(
postParser, postSaverFactory.Invoke,
userParserFactory.Invoke, userSaverFactory.Invoke)
{
private readonly Dictionary<Uid, User> _latestRepliers = [];
private readonly Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?> _latestRepliersKeyByUnique = [];

public delegate ThreadCrawlFacade New(Fid fid, string forumName);

Expand Down Expand Up @@ -44,8 +44,19 @@ join parsed in Posts.Values on (Tid)inResponse.Tid equals parsed.Tid
{ // replace with more detailed location.name in the 6.0.2 response
t.parsed.Geolocation = Helper.SerializedProtoBufOrNullIfEmpty(t.inResponse.Location);
}
var name = t.inResponse.LastReplyer.Name.NullIfEmpty();
var nameShow = t.inResponse.LastReplyer.NameShow.NullIfEmpty();

// LastReplyer will be null when LivePostType != "", but LastTimeInt will have expected timestamp value
t.parsed.LatestReplierUid = t.inResponse.LastReplyer?.Uid;
var latestReplierEntity = t.inResponse.LastReplyer == null ? null : new LatestReplier()

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)

Check failure on line 51 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Remove unnecessary parenthesis. (https://code-cracker.github.io/diagnostics/CC0015.html)
{
Name = name,
DisplayName = name == nameShow ? null : nameShow

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)

Check failure on line 54 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Extract this nested ternary operation into an independent statement. (https://rules.sonarsource.com/csharp/RSPEC-3358)
};
var uniqueLatestReplier = ThreadLatestReplierSaver.UniqueLatestReplier.FromLatestReplier(latestReplierEntity);

t.parsed.LatestReplier = _latestRepliersKeyByUnique.TryGetValue(uniqueLatestReplier, out var existingLatestReplier)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)

Check failure on line 58 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

'ThreadPost.LatestReplier' and 'Dictionary<ThreadLatestReplierSaver.UniqueLatestReplier, LatestReplier?>.this[ThreadLatestReplierSaver.UniqueLatestReplier]' are assigned in a single statement (https://github.com/dennisdoomen/CSharpGuidelines/blob/5.7.0/_rules/1522.md)
? existingLatestReplier
: _latestRepliersKeyByUnique[uniqueLatestReplier] = latestReplierEntity;

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (macos-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (ubuntu-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)

Check failure on line 60 in c#/crawler/src/Tieba/Crawl/Facade/ThreadCrawlFacade.cs

View workflow job for this annotation

GitHub Actions / runs-on (windows-latest) / build (crawler)

Extract the assignment of '_latestRepliersKeyByUnique[uniqueLatestReplier]' from this expression. (https://rules.sonarsource.com/csharp/RSPEC-1121)
});
}
35 changes: 20 additions & 15 deletions c#/crawler/src/Tieba/Crawl/Saver/Post/ThreadSaver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,24 @@ namespace tbm.Crawler.Tieba.Crawl.Saver.Post;
public partial class ThreadSaver(
ILogger<ThreadSaver> logger,
ConcurrentDictionary<Tid, ThreadPost> posts,
ThreadLatestReplierSaver threadLatestReplierSaver,
AuthorRevisionSaver.New authorRevisionSaverFactory)
: PostSaver<ThreadPost, BaseThreadRevision, Tid>(
logger, posts, authorRevisionSaverFactory, PostType.Thread)
{
public delegate ThreadSaver New(ConcurrentDictionary<Tid, ThreadPost> posts);

public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db) =>
Save(db, th => th.Tid,
public override SaverChangeSet<ThreadPost> Save(CrawlerDbContext db)
{
var changeSet = Save(db,
th => th.Tid,
th => new ThreadRevision {TakenAt = th.UpdatedAt ?? th.CreatedAt, Tid = th.Tid},
PredicateBuilder.New<ThreadPost>(th => Posts.Keys.Contains(th.Tid)));

PostSaveHandlers += threadLatestReplierSaver.Save(db, changeSet.AllAfter).Invoke;

return changeSet;
}
}
public partial class ThreadSaver
{
Expand Down Expand Up @@ -74,32 +82,29 @@ when newValue is ""

// when the latest reply post is deleted and there's no new reply after delete
// this field but not LatestReplyPostedAt will be null
nameof(ThreadPost.LatestReplierUid) when newValue is null => true,
nameof(ThreadPost.LatestReplierId) when newValue is null => true,
_ => false
};

protected override bool FieldRevisionIgnorance
(string propName, object? oldValue, object? newValue) => propName switch
{ // empty string from response has been updated by ReplyCrawlFacade.OnPostParse()
nameof(ThreadPost.Title) when oldValue is "" => true,

// null values will be later set by tieba client 6.0.2 response at ThreadParser.ParseInternal()
nameof(ThreadPost.LatestReplierUid) when oldValue is null => true,
_ => false
};

[SuppressMessage("StyleCop.CSharp.SpacingRules", "SA1025:Code should not contain multiple whitespace in a row")]
protected override NullFieldsBitMask GetRevisionNullFieldBitMask(string fieldName) => fieldName switch
{
nameof(ThreadPost.StickyType) => 1,
nameof(ThreadPost.TopicType) => 1 << 1,
nameof(ThreadPost.IsGood) => 1 << 2,
nameof(ThreadPost.LatestReplierUid) => 1 << 4,
nameof(ThreadPost.ReplyCount) => 1 << 5,
nameof(ThreadPost.ShareCount) => 1 << 7,
nameof(ThreadPost.AgreeCount) => 1 << 8,
nameof(ThreadPost.DisagreeCount) => 1 << 9,
nameof(ThreadPost.Geolocation) => 1 << 10,
nameof(ThreadPost.StickyType) => 1,
nameof(ThreadPost.TopicType) => 1 << 1,
nameof(ThreadPost.IsGood) => 1 << 2,
nameof(ThreadPost.LatestReplierId) => 1 << 4,
nameof(ThreadPost.ReplyCount) => 1 << 5,
nameof(ThreadPost.ShareCount) => 1 << 7,
nameof(ThreadPost.AgreeCount) => 1 << 8,
nameof(ThreadPost.DisagreeCount) => 1 << 9,
nameof(ThreadPost.Geolocation) => 1 << 10,
_ => 0
};
}
44 changes: 44 additions & 0 deletions c#/crawler/src/Tieba/Crawl/Saver/ThreadLatestReplierSaver.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
namespace tbm.Crawler.Tieba.Crawl.Saver;

public class ThreadLatestReplierSaver(SaverLocks<ThreadLatestReplierSaver.UniqueLatestReplier>.New saverLocksFactory)
{
private static readonly HashSet<UniqueLatestReplier> GlobalLockedLatestRepliers = [];
private readonly Lazy<SaverLocks<UniqueLatestReplier>> _saverLocks =
new(() => saverLocksFactory(GlobalLockedLatestRepliers));

public Action Save(CrawlerDbContext db, IReadOnlyCollection<ThreadPost> threads)
{
var uniqueLatestRepliers = threads
.Where(th => th.LatestReplier != null)
.Select(UniqueLatestReplier.FromThread).ToList();
var existingLatestRepliers = db.LatestRepliers.AsNoTracking()
.Where(uniqueLatestRepliers.Aggregate(
LinqKit.PredicateBuilder.New<LatestReplier>(),
(predicate, newOrExisting) =>
predicate.Or(LinqKit.PredicateBuilder
.New<LatestReplier>(existing =>
existing.Name == newOrExisting.Name)
.And(existing =>
existing.DisplayName == newOrExisting.DisplayName))))
.ToList();
(from existing in existingLatestRepliers
join thread in threads
on UniqueLatestReplier.FromLatestReplier(existing) equals UniqueLatestReplier.FromThread(thread)
select (existing, thread))
.ForEach(t => t.thread.LatestReplier = t.existing);

_ = _saverLocks.Value.Acquire(uniqueLatestRepliers
.Except(existingLatestRepliers.Select(UniqueLatestReplier.FromLatestReplier))
.ToList());
return _saverLocks.Value.Dispose;
}

public record UniqueLatestReplier(string? Name, string? DisplayName)
{
public static UniqueLatestReplier FromLatestReplier(LatestReplier? latestReplier) =>
new(latestReplier?.Name, latestReplier?.DisplayName);

public static UniqueLatestReplier FromThread(ThreadPost thread) =>
FromLatestReplier(thread.LatestReplier);
}
}

0 comments on commit c8f4920

Please sign in to comment.