From 1bccc4ebb9fa595b004d3470b5f74156a2985a6b Mon Sep 17 00:00:00 2001 From: n0099 Date: Tue, 2 Apr 2024 00:14:10 +0800 Subject: [PATCH] * prefer implemented interface over derived interface or classes from `List<>` for type of method param or return value + extension method `ICollection.AddRange()` @ `crawler.ExtensionMethods` @ c# --- c#/crawler/src/Helper.cs | 4 +-- c#/crawler/src/SonicPusher.cs | 4 +-- c#/crawler/src/Tieba/Crawl/CrawlPost.cs | 2 +- .../src/Tieba/Crawl/Crawler/BaseCrawler.cs | 6 ++-- .../src/Tieba/Crawl/Crawler/ReplyCrawler.cs | 2 +- .../Tieba/Crawl/Crawler/SubReplyCrawler.cs | 2 +- .../src/Tieba/Crawl/Crawler/ThreadCrawler.cs | 2 +- .../src/Tieba/Crawl/Facade/BaseCrawlFacade.cs | 6 ++-- .../Tieba/Crawl/Facade/ReplyCrawlFacade.cs | 2 +- .../Tieba/Crawl/Parser/Post/BasePostParser.cs | 6 ++-- .../Tieba/Crawl/Parser/Post/ReplyParser.cs | 2 +- .../Tieba/Crawl/Parser/Post/SubReplyParser.cs | 2 +- .../Tieba/Crawl/Parser/Post/ThreadParser.cs | 2 +- .../src/Tieba/Crawl/Saver/SaverChangeSet.cs | 28 +++++++++---------- c#/crawler/src/Worker/RetryCrawlWorker.cs | 6 ++-- .../src/ImageBatchConsumingWorker.cs | 20 ++++++------- c#/imagePipeline/src/Ocr/JointRecognizer.cs | 2 +- c#/shared/src/ExtensionMethods.cs | 8 ++++++ 18 files changed, 55 insertions(+), 51 deletions(-) diff --git a/c#/crawler/src/Helper.cs b/c#/crawler/src/Helper.cs index a39893c6..e3aa7cdf 100644 --- a/c#/crawler/src/Helper.cs +++ b/c#/crawler/src/Helper.cs @@ -18,10 +18,10 @@ public static byte[]? SerializedProtoBufWrapperOrNullIfEmpty ? null : SerializedProtoBufOrNullIfEmpty(wrapperFactory()); - public static IReadOnlyList? ParseThenUnwrapPostContent(byte[]? serializedProtoBuf) => + public static IEnumerable? ParseThenUnwrapPostContent(byte[]? serializedProtoBuf) => serializedProtoBuf == null ? null : PostContentWrapper.Parser.ParseFrom(serializedProtoBuf).Value; - public static PostContentWrapper? WrapPostContent(IReadOnlyList? contents) => + public static PostContentWrapper? WrapPostContent(IEnumerable? contents) => contents == null ? null : new() {Value = {contents}}; public static void GetNowTimestamp(out Time now) => now = GetNowTimestamp(); diff --git a/c#/crawler/src/SonicPusher.cs b/c#/crawler/src/SonicPusher.cs index 358a09e4..6b3101a3 100644 --- a/c#/crawler/src/SonicPusher.cs +++ b/c#/crawler/src/SonicPusher.cs @@ -22,7 +22,7 @@ public SonicPusher(ILogger logger, IConfiguration config) public void Dispose() => Ingest.Dispose(); - public float PushPost(Fid fid, string type, PostId id, IReadOnlyList? content) + public float PushPost(Fid fid, string type, PostId id, IEnumerable? content) { if (!_config.GetValue("Enabled", false)) return 0; var stopwatch = new Stopwatch(); @@ -61,7 +61,7 @@ public float PushPost(Fid fid, string type, PostId id, IReadOnlyList? c public void PushPostWithCancellationToken( IReadOnlyCollection posts, Fid fid, string postType, Func postIdSelector, - Func?> postContentSelector, + Func?> postContentSelector, CancellationToken stoppingToken = default) { try diff --git a/c#/crawler/src/Tieba/Crawl/CrawlPost.cs b/c#/crawler/src/Tieba/Crawl/CrawlPost.cs index afd5491f..e2bce116 100644 --- a/c#/crawler/src/Tieba/Crawl/CrawlPost.cs +++ b/c#/crawler/src/Tieba/Crawl/CrawlPost.cs @@ -4,7 +4,7 @@ namespace tbm.Crawler.Tieba.Crawl; #pragma warning disable SA1135 // Using directives should be qualified #pragma warning disable SA1200 // Using directives should be placed correctly using SavedRepliesKeyByTid = ConcurrentDictionary>; -using SavedThreadsList = IList>; +using SavedThreadsList = IReadOnlyCollection>; public class CrawlPost( Func> dbContextFactory, diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs index 1a7f3c22..1bde1cf6 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs @@ -5,9 +5,9 @@ public abstract partial class BaseCrawler public abstract Exception FillExceptionData(Exception e); // ReSharper disable once UnusedParameter.Global - public abstract IReadOnlyList GetValidPosts(TResponse response, CrawlRequestFlag flag); + public abstract IReadOnlyCollection GetValidPosts(TResponse response, CrawlRequestFlag flag); public abstract TbClient.Page? GetResponsePage(TResponse response); - protected abstract IReadOnlyList GetResponsePostList(TResponse response); + protected abstract IReadOnlyCollection GetResponsePostList(TResponse response); protected abstract int GetResponseErrorCode(TResponse response); protected abstract IEnumerable GetRequestsForPage(Page page, CancellationToken stoppingToken = default); @@ -31,7 +31,7 @@ protected void ValidateOtherErrorCode(TResponse response) throw new TiebaException("Error from tieba client.") {Data = {{"raw", response}}}; } - protected IReadOnlyList EnsureNonEmptyPostList(TResponse response, string exceptionMessage) + protected IReadOnlyCollection EnsureNonEmptyPostList(TResponse response, string exceptionMessage) { var posts = GetResponsePostList(response); return posts.Count != 0 ? posts : throw new EmptyPostListException(exceptionMessage); diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/ReplyCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/ReplyCrawler.cs index eaf8da86..400c5db2 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/ReplyCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/ReplyCrawler.cs @@ -10,7 +10,7 @@ public override Exception FillExceptionData(Exception e) return e; } - public override IReadOnlyList GetValidPosts(ReplyResponse response, CrawlRequestFlag flag) + public override IReadOnlyCollection GetValidPosts(ReplyResponse response, CrawlRequestFlag flag) { if (response.Error.Errorno is 4 or 350008) throw new EmptyPostListException("Thread already deleted when crawling reply."); diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/SubReplyCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/SubReplyCrawler.cs index e6185f6a..f2b1dde0 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/SubReplyCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/SubReplyCrawler.cs @@ -11,7 +11,7 @@ public override Exception FillExceptionData(Exception e) return e; } - public override IReadOnlyList GetValidPosts(SubReplyResponse response, CrawlRequestFlag flag) + public override IReadOnlyCollection GetValidPosts(SubReplyResponse response, CrawlRequestFlag flag) { switch (response.Error.Errorno) { diff --git a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs index 109dd183..b9a00a6d 100644 --- a/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs +++ b/c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs @@ -13,7 +13,7 @@ public override Exception FillExceptionData(Exception e) return e; } - public override IReadOnlyList GetValidPosts(ThreadResponse response, CrawlRequestFlag flag) + public override IReadOnlyCollection GetValidPosts(ThreadResponse response, CrawlRequestFlag flag) { ValidateOtherErrorCode(response); return EnsureNonEmptyPostList(response, diff --git a/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs index f60fbbe5..4ddd4374 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs @@ -103,7 +103,7 @@ public async Task?> RetryThenSave - (IList pages, Func failureCountSelector, CancellationToken stoppingToken = default) + (IReadOnlyList pages, Func failureCountSelector, CancellationToken stoppingToken = default) { if (_lockingPages.Count != 0) ThrowHelper.ThrowInvalidOperationException( "RetryPages() can only be called once, a instance of BaseCrawlFacade shouldn't be reuse for other crawls."); @@ -136,14 +136,14 @@ private void ValidateThenParse(BaseCrawler.Response re parsedPostsInResponse.ForEach(pair => Posts[pair.Key] = pair.Value); if (flag == CrawlRequestFlag.None) { - if (postsEmbeddedUsers.Count == 0 && postsInResponse.Any()) ThrowIfEmptyUsersEmbedInPosts(); + if (postsEmbeddedUsers.Count == 0 && postsInResponse.Count != 0) ThrowIfEmptyUsersEmbedInPosts(); if (postsEmbeddedUsers.Count != 0) UserParser.Parse(postsEmbeddedUsers); } PostParseHook(response, flag, parsedPostsInResponse); } private async Task CrawlPages( - IList pages, + IReadOnlyList pages, Func? previousFailureCountSelector = null, CancellationToken stoppingToken = default) { diff --git a/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs b/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs index aeea60af..ae2d274c 100644 --- a/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs +++ b/c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs @@ -21,7 +21,7 @@ public class ReplyCrawlFacade( protected override void PostParseHook( ReplyResponse response, CrawlRequestFlag flag, - IDictionary parsedPostsInResponse) + IReadOnlyDictionary parsedPostsInResponse) { parsedPostsInResponse.Values.ForEach(r => r.Tid = tid); var data = response.Data; diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/BasePostParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/BasePostParser.cs index e8b296d9..f5621743 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/BasePostParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/BasePostParser.cs @@ -5,8 +5,8 @@ public abstract class BasePostParser where TPostProtoBuf : class, IMessage { public void Parse( - CrawlRequestFlag requestFlag, IReadOnlyList inPosts, - out IDictionary outPosts, out IList outUsers) + CrawlRequestFlag requestFlag, IReadOnlyCollection inPosts, + out IReadOnlyDictionary outPosts, out IReadOnlyCollection outUsers) { if (ShouldSkipParse(requestFlag)) { @@ -29,7 +29,7 @@ public void Parse( // ReSharper disable once UnusedMemberInSuper.Global protected abstract TPost Convert(TPostProtoBuf inPost); protected abstract IEnumerable ParseInternal - (IReadOnlyList inPosts, IList outUsers); + (IReadOnlyCollection inPosts, ICollection outUsers); protected virtual bool ShouldSkipParse(CrawlRequestFlag requestFlag) => false; protected abstract PostId PostIdSelector(TPost post); } diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs index 4edd8a2d..d75b654d 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs @@ -10,7 +10,7 @@ public partial class ReplyParser(ILogger logger) protected override PostId PostIdSelector(ReplyPost post) => post.Pid; protected override IEnumerable ParseInternal - (IReadOnlyList inPosts, IList outUsers) => inPosts.Select(Convert); + (IReadOnlyCollection inPosts, ICollection outUsers) => inPosts.Select(Convert); protected override ReplyPost Convert(Reply inPost) { diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs index 168fdd09..8e35e514 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs @@ -5,7 +5,7 @@ public class SubReplyParser : BasePostParser protected override PostId PostIdSelector(SubReplyPost post) => post.Spid; protected override IEnumerable ParseInternal - (IReadOnlyList inPosts, IList outUsers) + (IReadOnlyCollection inPosts, ICollection outUsers) { outUsers.AddRange(inPosts.Select(sr => sr.Author)); return inPosts.Select(Convert); diff --git a/c#/crawler/src/Tieba/Crawl/Parser/Post/ThreadParser.cs b/c#/crawler/src/Tieba/Crawl/Parser/Post/ThreadParser.cs index cab526d0..167325ae 100644 --- a/c#/crawler/src/Tieba/Crawl/Parser/Post/ThreadParser.cs +++ b/c#/crawler/src/Tieba/Crawl/Parser/Post/ThreadParser.cs @@ -8,7 +8,7 @@ protected override bool ShouldSkipParse(CrawlRequestFlag requestFlag) => requestFlag == CrawlRequestFlag.ThreadClientVersion602; protected override IEnumerable ParseInternal - (IReadOnlyList inPosts, IList outUsers) => inPosts.Select(Convert); + (IReadOnlyCollection inPosts, ICollection outUsers) => inPosts.Select(Convert); protected override ThreadPost Convert(Thread inPost) { diff --git a/c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs b/c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs index 6cc06340..772f28cc 100644 --- a/c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs +++ b/c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs @@ -5,26 +5,24 @@ namespace tbm.Crawler.Tieba.Crawl.Saver; public class SaverChangeSet where TPost : class, IPost { public SaverChangeSet( - ICollection existingBefore, + IReadOnlyCollection existingBefore, ICollection existingAfterAndNewlyAdded, Func postIdSelector) { - var existingAfter = existingAfterAndNewlyAdded - .IntersectBy(existingBefore.Select(postIdSelector), postIdSelector) - .OrderBy(postIdSelector).ToList(); - Existing = new(existingBefore + Existing = existingBefore .OrderBy(postIdSelector) - .EquiZip(existingAfter, (before, after) => (before, after)) - .ToList()); - NewlyAdded = new(existingAfterAndNewlyAdded + .EquiZip(existingAfterAndNewlyAdded + .IntersectBy(existingBefore.Select(postIdSelector), postIdSelector) + .OrderBy(postIdSelector), + (before, after) => (before, after)) + .ToList().AsReadOnly(); + NewlyAdded = existingAfterAndNewlyAdded .ExceptBy(existingBefore.Select(postIdSelector), postIdSelector) - .ToList()); - AllAfter = new([.. existingAfterAndNewlyAdded]); + .ToList().AsReadOnly(); + AllAfter = existingAfterAndNewlyAdded.ToList().AsReadOnly(); } - public ReadOnlyCollection<(TPost Before, TPost After)> Existing { get; } - public ReadOnlyCollection NewlyAdded { get; } - - // ReSharper disable once CollectionNeverUpdated.Global - public ReadOnlyCollection AllAfter { get; } + public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; } + public IReadOnlyCollection NewlyAdded { get; } + public IReadOnlyCollection AllAfter { get; } } diff --git a/c#/crawler/src/Worker/RetryCrawlWorker.cs b/c#/crawler/src/Worker/RetryCrawlWorker.cs index 96e5e722..31918d51 100644 --- a/c#/crawler/src/Worker/RetryCrawlWorker.cs +++ b/c#/crawler/src/Worker/RetryCrawlWorker.cs @@ -77,7 +77,7 @@ FailureCount FailureCountSelector(Tid tid) => private async Task RetryThread( Fid fid, - IList pages, + IReadOnlyList pages, int failureCount, Func failureCountSelector, CancellationToken stoppingToken = default) @@ -106,7 +106,7 @@ from f in dbFactory.Value().Forums.AsNoTracking() private async Task RetryReply( Fid fid, Tid tid, - IList pages, + IReadOnlyList pages, int failureCount, Func failureCountSelector, CancellationToken stoppingToken = default) @@ -123,7 +123,7 @@ private async Task RetryReply( private async Task RetrySubReply( Fid fid, Tid tid, Pid pid, - IList pages, + IReadOnlyList pages, int failureCount, Func failureCountSelector, CancellationToken stoppingToken = default) diff --git a/c#/imagePipeline/src/ImageBatchConsumingWorker.cs b/c#/imagePipeline/src/ImageBatchConsumingWorker.cs index 94ac996d..511edc5e 100644 --- a/c#/imagePipeline/src/ImageBatchConsumingWorker.cs +++ b/c#/imagePipeline/src/ImageBatchConsumingWorker.cs @@ -204,7 +204,7 @@ private async Task ConsumeOcrConsumer( foreach (var scriptsGroupByFid in scriptGroupings) { var fid = scriptsGroupByFid.Key; - List GetImagesInCurrentFid() + IEnumerable GetImagesInCurrentFid() { // dispose the scope of Owned after return to prevent long-life idle connection using var dbFactory = dbContextFactory(); var db = dbFactory.Value(fid, ""); @@ -214,18 +214,16 @@ List GetImagesInCurrentFid() #pragma warning restore IDISP004 // Don't ignore created IDisposable // try to know which fid owns current image batch - return imageKeysWithMatrix - .IntersectBy( - from replyContentImage in db.ReplyContentImages - where imageKeysWithMatrix - .Select(imageKeyWithMatrix => imageKeyWithMatrix.ImageId) - .Contains(replyContentImage.ImageId) - select replyContentImage.ImageId, - imageKeyWithMatrix => imageKeyWithMatrix.ImageId) - .ToList(); + return imageKeysWithMatrix.IntersectBy( + from replyContentImage in db.ReplyContentImages + where imageKeysWithMatrix + .Select(imageKeyWithMatrix => imageKeyWithMatrix.ImageId) + .Contains(replyContentImage.ImageId) + select replyContentImage.ImageId, + imageKeyWithMatrix => imageKeyWithMatrix.ImageId); } - var imagesInCurrentFid = GetImagesInCurrentFid(); + var imagesInCurrentFid = GetImagesInCurrentFid().ToList(); if (imagesInCurrentFid.Count == 0) continue; foreach (var script in scriptsGroupByFid) { diff --git a/c#/imagePipeline/src/Ocr/JointRecognizer.cs b/c#/imagePipeline/src/Ocr/JointRecognizer.cs index 790afbdc..92f39a4d 100644 --- a/c#/imagePipeline/src/Ocr/JointRecognizer.cs +++ b/c#/imagePipeline/src/Ocr/JointRecognizer.cs @@ -34,7 +34,7 @@ public class JointRecognizer( public async Task InitializePaddleOcr(CancellationToken stoppingToken = default) => await _paddleOcrProvider.Initialize(stoppingToken); - public IReadOnlyList> RecognizeMatrices + public IReadOnlyCollection> RecognizeMatrices (IReadOnlyDictionary matricesKeyByImageKey, CancellationToken stoppingToken = default) { var recognizedEithersViaPaddleOcr = _paddleOcrProvider diff --git a/c#/shared/src/ExtensionMethods.cs b/c#/shared/src/ExtensionMethods.cs index 01e42b2f..16ed8f39 100644 --- a/c#/shared/src/ExtensionMethods.cs +++ b/c#/shared/src/ExtensionMethods.cs @@ -27,6 +27,14 @@ public static void AddRange(this IList list, IEnumerable items) if (list is List asList) asList.AddRange(items); else foreach (var item in items) list.Add(item); } + + /// https://stackoverflow.com/questions/1474863/addrange-to-a-collection/26360010#26360010 + [System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1618:Generic type parameters should be documented")] + public static void AddRange(this ICollection list, IEnumerable items) + { + if (list is List asList) asList.AddRange(items); + else foreach (var item in items) list.Add(item); + } } public static partial class ExtensionMethods {