From 1413923bdb2dd82b690997038ba8e084742d6fc9 Mon Sep 17 00:00:00 2001 From: n0099 Date: Sat, 18 May 2024 17:47:42 +0800 Subject: [PATCH] * fix every existing entity will be modified state since prop `RowVersionedEntity.Version` will always be reset to `0` * narrow generic constraint for `TEntity` from `class` to `RowVersionedEntity` * rename param `existingOrNewLookup` to `isExistingEntityLookup` @ `SaverWithRevision.SaveEntitiesWithRevision()` * rename primary ctor param `registeredLocksLookup` to `registeredLocksKeyByType` @ RetryCrawlWorker.cs * inline `using LinqKit` @ (Sub)ReplySaver.cs @ crawler * now will log with already retried times and no exception @ `TbmDbContext.LogDbUpdateConcurrencyException()` @ shared * enable insecption `resharper_unnecessary_whitespace_highlighting` @ .editorconfig @ c# --- c#/.editorconfig | 1 + c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs | 3 ++- .../src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs | 2 +- .../src/Tieba/Crawl/Saver/Post/ReplySaver.cs | 4 +--- .../src/Tieba/Crawl/Saver/Post/SubReplySaver.cs | 4 +--- .../src/Tieba/Crawl/Saver/SaverWithRevision.cs | 12 ++++++------ c#/crawler/src/Worker/RetryCrawlWorker.cs | 4 ++-- c#/shared/src/Db/TbmDbContext.cs | 15 ++++++++++----- 8 files changed, 24 insertions(+), 21 deletions(-) diff --git a/c#/.editorconfig b/c#/.editorconfig index 34beb3a8..cc48d3d2 100644 --- a/c#/.editorconfig +++ b/c#/.editorconfig @@ -143,6 +143,7 @@ resharper_suggest_var_or_type_simple_types_highlighting = hint resharper_entity_framework_model_validation_unlimited_string_length_highlighting = none resharper_move_local_function_after_jump_statement_highlighting = none resharper_separate_local_functions_with_jump_statement_highlighting = none +resharper_unnecessary_whitespace_highlighting = warning ############################### # .NET Coding Conventions # diff --git a/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs index 4eb87ecc..ce2d3610 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/CrawlFacade.cs @@ -41,6 +41,7 @@ public virtual void Dispose() [SuppressMessage("Major Bug", "S1751:Loops with at most one iteration should be refactored")] public SaverChangeSet? SaveCrawled(CancellationToken stoppingToken = default) { + var retryTimes = 0; while (true) { using var db = DbContextFactory(Fid); // dispose after each loop when retrying @@ -66,7 +67,7 @@ public virtual void Dispose() } catch (DbUpdateConcurrencyException e) { - db.LogDbUpdateConcurrencyException(e); + db.LogDbUpdateConcurrencyException(e, ref retryTimes); } finally { diff --git a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs index 584ecf1e..2e506423 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/AuthorRevisionSaver.cs @@ -66,7 +66,7 @@ private static void SaveAuthorRevisions( var newRevisionOfExistingUsers = existingRevisionOfExistingUsers // filter out revisions with the same DiscoveredAt to prevent duplicate keys - // when some fields get updated more than one time in a second + // when some fields get updated more than once in a second .Where(t => t.Existing.DiscoveredAt != t.NewInPost.DiscoveredAt && isValueChangedPredicate(t.Existing.Value, t.NewInPost.Value)) .Select(t => (t.Uid, t.NewInPost.Value, t.NewInPost.DiscoveredAt)); diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs index 45ee4b0c..d44ba4ea 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/ReplySaver.cs @@ -1,5 +1,3 @@ -using PredicateBuilder = LinqKit.PredicateBuilder; - namespace tbm.Crawler.Tieba.Crawl.Saver.Post; public class ReplySaver( @@ -49,7 +47,7 @@ public override SaverChangeSet Save(CrawlerDbContext db) { var changeSet = Save(db, r => r.Pid, r => new ReplyRevision {TakenAt = r.UpdatedAt ?? r.CreatedAt, Pid = r.Pid}, - PredicateBuilder.New(r => Posts.Keys.Contains(r.Pid))); + LinqKit.PredicateBuilder.New(r => Posts.Keys.Contains(r.Pid))); replyContentImageSaver.Save(db, changeSet.NewlyAdded); PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs index b87d556c..65909144 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/Post/SubReplySaver.cs @@ -1,5 +1,3 @@ -using LinqKit; - namespace tbm.Crawler.Tieba.Crawl.Saver.Post; public class SubReplySaver( @@ -44,7 +42,7 @@ public override SaverChangeSet Save(CrawlerDbContext db) { var changeSet = Save(db, sr => sr.Spid, sr => new SubReplyRevision {TakenAt = sr.UpdatedAt ?? sr.CreatedAt, Spid = sr.Spid}, - PredicateBuilder.New(sr => Posts.Keys.Contains(sr.Spid))); + LinqKit.PredicateBuilder.New(sr => Posts.Keys.Contains(sr.Spid))); PostSaveHandlers += AuthorRevisionSaver.SaveAuthorExpGradeRevisions(db, changeSet.AllAfter).Invoke; return changeSet; diff --git a/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs index c90576e9..d545475a 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/SaverWithRevision.cs @@ -25,21 +25,21 @@ public abstract partial class SaverWithRevision protected void SaveEntitiesWithRevision( CrawlerDbContext db, Func revisionFactory, - ILookup existingOrNewLookup, + ILookup isExistingEntityLookup, Func existingSelector, UserSaver.FieldChangeIgnorance? userFieldUpdateIgnorance = null, UserSaver.FieldChangeIgnorance? userFieldRevisionIgnorance = null) - where TEntity : class + where TEntity : RowVersionedEntity where TRevision : BaseRevisionWithSplitting { - db.Set().AddRange(existingOrNewLookup[false]); // newly added - var newRevisions = existingOrNewLookup[true].Select(newEntity => + db.Set().AddRange(isExistingEntityLookup[false]); // newly added + var newRevisions = isExistingEntityLookup[true].Select(newEntity => { var entityInTracking = existingSelector(newEntity); var entityEntry = db.Entry(entityInTracking); - // this will mutate existingEntity which is referenced by entry - entityEntry.CurrentValues.SetValues(newEntity); + entityEntry.CurrentValues.SetValues(newEntity); // mutate existingEntity that referenced by entry + entityEntry.Property(e => e.Version).IsModified = false; // newEntity.Version will always be default 0 bool IsTimestampingFieldName(string name) => name is nameof(BasePost.LastSeenAt) or nameof(TimestampedEntity.CreatedAt) or nameof(TimestampedEntity.UpdatedAt); diff --git a/c#/crawler/src/Worker/RetryCrawlWorker.cs b/c#/crawler/src/Worker/RetryCrawlWorker.cs index 6ba72f28..329d4012 100644 --- a/c#/crawler/src/Worker/RetryCrawlWorker.cs +++ b/c#/crawler/src/Worker/RetryCrawlWorker.cs @@ -2,7 +2,7 @@ namespace tbm.Crawler.Worker; public class RetryCrawlWorker( ILogger logger, - IIndex registeredLocksLookup, + IIndex registeredLocksKeyByType, CrawlPost crawlPost, Func> dbContextDefaultFactory, Func> threadLateCrawlFacadeFactory, @@ -16,7 +16,7 @@ protected override async Task DoWork(CancellationToken stoppingToken) foreach (var lockType in Enum.GetValues()) { if (stoppingToken.IsCancellationRequested) return; - var failed = registeredLocksLookup[lockType].RetryAllFailed(); + var failed = registeredLocksKeyByType[lockType].RetryAllFailed(); if (failed.Count == 0) continue; // skip current lock type if there's nothing needs to retry if (lockType == CrawlerLocks.Type.ThreadLate) { diff --git a/c#/shared/src/Db/TbmDbContext.cs b/c#/shared/src/Db/TbmDbContext.cs index 3891c38c..68355e07 100644 --- a/c#/shared/src/Db/TbmDbContext.cs +++ b/c#/shared/src/Db/TbmDbContext.cs @@ -11,14 +11,18 @@ namespace tbm.Shared.Db; public abstract class TbmDbContext(ILogger logger) : DbContext { - public void LogDbUpdateConcurrencyException(DbUpdateConcurrencyException e) => - logger.LogWarning(e, "DbUpdateConcurrencyException: {}", - SharedHelper.UnescapedJsonSerialize(e.Entries + public void LogDbUpdateConcurrencyException(DbUpdateConcurrencyException e, ref int retryTimes) + { + retryTimes++; + logger.LogWarning("Failed to update following entities after {} times: {}", + retryTimes, SharedHelper.UnescapedJsonSerialize(e.Entries .GroupBy(ee => ee.Entity.GetType().Name) .ToDictionary(g => g.Key, g => g.Count()))); + } public int SaveChangesForUpdate() { + var retryTimes = 0; while (true) { try @@ -27,7 +31,7 @@ public int SaveChangesForUpdate() } catch (DbUpdateConcurrencyException e) { - LogDbUpdateConcurrencyException(e); + LogDbUpdateConcurrencyException(e, ref retryTimes); foreach (var entry in e.Entries) { var existing = entry.GetDatabaseValues(); @@ -40,6 +44,7 @@ public int SaveChangesForUpdate() public async Task SaveChangesForUpdateAsync(CancellationToken stoppingToken = default) { + var retryTimes = 0; while (true) { try @@ -48,7 +53,7 @@ public async Task SaveChangesForUpdateAsync(CancellationToken stoppingToken } catch (DbUpdateConcurrencyException e) { - LogDbUpdateConcurrencyException(e); + LogDbUpdateConcurrencyException(e, ref retryTimes); foreach (var entry in e.Entries) { var existing = await entry.GetDatabaseValuesAsync(stoppingToken);