Skip to content

Commit

Permalink
* prefer implemented interface over derived interface or classes from…
Browse files Browse the repository at this point in the history
… `List<>` for type of method param or return value

+ extension method `ICollection.AddRange()` @ `crawler.ExtensionMethods`
@ c#
  • Loading branch information
n0099 committed Apr 1, 2024
1 parent 07b6acc commit 1bccc4e
Show file tree
Hide file tree
Showing 18 changed files with 55 additions and 51 deletions.
4 changes: 2 additions & 2 deletions c#/crawler/src/Helper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ public static byte[]? SerializedProtoBufWrapperOrNullIfEmpty<T>
? null
: SerializedProtoBufOrNullIfEmpty(wrapperFactory());

public static IReadOnlyList<Content>? ParseThenUnwrapPostContent(byte[]? serializedProtoBuf) =>
public static IEnumerable<Content>? ParseThenUnwrapPostContent(byte[]? serializedProtoBuf) =>
serializedProtoBuf == null ? null : PostContentWrapper.Parser.ParseFrom(serializedProtoBuf).Value;

public static PostContentWrapper? WrapPostContent(IReadOnlyList<Content>? contents) =>
public static PostContentWrapper? WrapPostContent(IEnumerable<Content>? contents) =>
contents == null ? null : new() {Value = {contents}};

public static void GetNowTimestamp(out Time now) => now = GetNowTimestamp();
Expand Down
4 changes: 2 additions & 2 deletions c#/crawler/src/SonicPusher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public SonicPusher(ILogger<SonicPusher> logger, IConfiguration config)

public void Dispose() => Ingest.Dispose();

public float PushPost(Fid fid, string type, PostId id, IReadOnlyList<Content>? content)
public float PushPost(Fid fid, string type, PostId id, IEnumerable<Content>? content)
{
if (!_config.GetValue("Enabled", false)) return 0;
var stopwatch = new Stopwatch();
Expand Down Expand Up @@ -61,7 +61,7 @@ public float PushPost(Fid fid, string type, PostId id, IReadOnlyList<Content>? c
public void PushPostWithCancellationToken<T>(
IReadOnlyCollection<T> posts, Fid fid, string postType,
Func<T, PostId> postIdSelector,
Func<T, IReadOnlyList<Content>?> postContentSelector,
Func<T, IEnumerable<Content>?> postContentSelector,
CancellationToken stoppingToken = default)
{
try
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/CrawlPost.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ namespace tbm.Crawler.Tieba.Crawl;
#pragma warning disable SA1135 // Using directives should be qualified
#pragma warning disable SA1200 // Using directives should be placed correctly
using SavedRepliesKeyByTid = ConcurrentDictionary<Tid, SaverChangeSet<ReplyPost>>;
using SavedThreadsList = IList<SaverChangeSet<ThreadPost>>;
using SavedThreadsList = IReadOnlyCollection<SaverChangeSet<ThreadPost>>;

public class CrawlPost(
Func<Owned<CrawlerDbContext.New>> dbContextFactory,
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Crawler/BaseCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ public abstract partial class BaseCrawler<TResponse, TPostProtoBuf>
public abstract Exception FillExceptionData(Exception e);

// ReSharper disable once UnusedParameter.Global
public abstract IReadOnlyList<TPostProtoBuf> GetValidPosts(TResponse response, CrawlRequestFlag flag);
public abstract IReadOnlyCollection<TPostProtoBuf> GetValidPosts(TResponse response, CrawlRequestFlag flag);
public abstract TbClient.Page? GetResponsePage(TResponse response);
protected abstract IReadOnlyList<TPostProtoBuf> GetResponsePostList(TResponse response);
protected abstract IReadOnlyCollection<TPostProtoBuf> GetResponsePostList(TResponse response);
protected abstract int GetResponseErrorCode(TResponse response);
protected abstract IEnumerable<Request> GetRequestsForPage(Page page, CancellationToken stoppingToken = default);

Expand All @@ -31,7 +31,7 @@ protected void ValidateOtherErrorCode(TResponse response)
throw new TiebaException("Error from tieba client.") {Data = {{"raw", response}}};
}

protected IReadOnlyList<TPostProtoBuf> EnsureNonEmptyPostList(TResponse response, string exceptionMessage)
protected IReadOnlyCollection<TPostProtoBuf> EnsureNonEmptyPostList(TResponse response, string exceptionMessage)
{
var posts = GetResponsePostList(response);
return posts.Count != 0 ? posts : throw new EmptyPostListException(exceptionMessage);
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Crawler/ReplyCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public override Exception FillExceptionData(Exception e)
return e;
}

public override IReadOnlyList<Reply> GetValidPosts(ReplyResponse response, CrawlRequestFlag flag)
public override IReadOnlyCollection<Reply> GetValidPosts(ReplyResponse response, CrawlRequestFlag flag)
{
if (response.Error.Errorno is 4 or 350008)
throw new EmptyPostListException("Thread already deleted when crawling reply.");
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Crawler/SubReplyCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ public override Exception FillExceptionData(Exception e)
return e;
}

public override IReadOnlyList<SubReply> GetValidPosts(SubReplyResponse response, CrawlRequestFlag flag)
public override IReadOnlyCollection<SubReply> GetValidPosts(SubReplyResponse response, CrawlRequestFlag flag)
{
switch (response.Error.Errorno)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Crawler/ThreadCrawler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public override Exception FillExceptionData(Exception e)
return e;
}

public override IReadOnlyList<Thread> GetValidPosts(ThreadResponse response, CrawlRequestFlag flag)
public override IReadOnlyCollection<Thread> GetValidPosts(ThreadResponse response, CrawlRequestFlag flag)
{
ValidateOtherErrorCode(response);
return EnsureNonEmptyPostList(response,
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Facade/BaseCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public async Task<BaseCrawlFacade<TPost, TBaseRevision, TResponse, TPostProtoBuf
}

public async Task<SaverChangeSet<TPost>?> RetryThenSave
(IList<Page> pages, Func<Page, FailureCount> failureCountSelector, CancellationToken stoppingToken = default)
(IReadOnlyList<Page> pages, Func<Page, FailureCount> failureCountSelector, CancellationToken stoppingToken = default)
{
if (_lockingPages.Count != 0) ThrowHelper.ThrowInvalidOperationException(
"RetryPages() can only be called once, a instance of BaseCrawlFacade shouldn't be reuse for other crawls.");
Expand Down Expand Up @@ -136,14 +136,14 @@ private void ValidateThenParse(BaseCrawler<TResponse, TPostProtoBuf>.Response re
parsedPostsInResponse.ForEach(pair => Posts[pair.Key] = pair.Value);
if (flag == CrawlRequestFlag.None)
{
if (postsEmbeddedUsers.Count == 0 && postsInResponse.Any()) ThrowIfEmptyUsersEmbedInPosts();
if (postsEmbeddedUsers.Count == 0 && postsInResponse.Count != 0) ThrowIfEmptyUsersEmbedInPosts();
if (postsEmbeddedUsers.Count != 0) UserParser.Parse(postsEmbeddedUsers);
}
PostParseHook(response, flag, parsedPostsInResponse);
}

private async Task CrawlPages(
IList<Page> pages,
IReadOnlyList<Page> pages,
Func<Page, FailureCount>? previousFailureCountSelector = null,
CancellationToken stoppingToken = default)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Facade/ReplyCrawlFacade.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class ReplyCrawlFacade(
protected override void PostParseHook(
ReplyResponse response,
CrawlRequestFlag flag,
IDictionary<PostId, ReplyPost> parsedPostsInResponse)
IReadOnlyDictionary<PostId, ReplyPost> parsedPostsInResponse)
{
parsedPostsInResponse.Values.ForEach(r => r.Tid = tid);
var data = response.Data;
Expand Down
6 changes: 3 additions & 3 deletions c#/crawler/src/Tieba/Crawl/Parser/Post/BasePostParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ public abstract class BasePostParser<TPost, TPostProtoBuf>
where TPostProtoBuf : class, IMessage<TPostProtoBuf>
{
public void Parse(
CrawlRequestFlag requestFlag, IReadOnlyList<TPostProtoBuf> inPosts,
out IDictionary<PostId, TPost> outPosts, out IList<TbClient.User> outUsers)
CrawlRequestFlag requestFlag, IReadOnlyCollection<TPostProtoBuf> inPosts,
out IReadOnlyDictionary<PostId, TPost> outPosts, out IReadOnlyCollection<TbClient.User> outUsers)
{
if (ShouldSkipParse(requestFlag))
{
Expand All @@ -29,7 +29,7 @@ public void Parse(
// ReSharper disable once UnusedMemberInSuper.Global
protected abstract TPost Convert(TPostProtoBuf inPost);
protected abstract IEnumerable<TPost> ParseInternal
(IReadOnlyList<TPostProtoBuf> inPosts, IList<TbClient.User?> outUsers);
(IReadOnlyCollection<TPostProtoBuf> inPosts, ICollection<TbClient.User?> outUsers);
protected virtual bool ShouldSkipParse(CrawlRequestFlag requestFlag) => false;
protected abstract PostId PostIdSelector(TPost post);
}
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/Post/ReplyParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public partial class ReplyParser(ILogger<ReplyParser> logger)
protected override PostId PostIdSelector(ReplyPost post) => post.Pid;

protected override IEnumerable<ReplyPost> ParseInternal
(IReadOnlyList<Reply> inPosts, IList<TbClient.User?> outUsers) => inPosts.Select(Convert);
(IReadOnlyCollection<Reply> inPosts, ICollection<TbClient.User?> outUsers) => inPosts.Select(Convert);

protected override ReplyPost Convert(Reply inPost)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/Post/SubReplyParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ public class SubReplyParser : BasePostParser<SubReplyPost, SubReply>
protected override PostId PostIdSelector(SubReplyPost post) => post.Spid;

protected override IEnumerable<SubReplyPost> ParseInternal
(IReadOnlyList<SubReply> inPosts, IList<TbClient.User?> outUsers)
(IReadOnlyCollection<SubReply> inPosts, ICollection<TbClient.User?> outUsers)
{
outUsers.AddRange(inPosts.Select(sr => sr.Author));
return inPosts.Select(Convert);
Expand Down
2 changes: 1 addition & 1 deletion c#/crawler/src/Tieba/Crawl/Parser/Post/ThreadParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ protected override bool ShouldSkipParse(CrawlRequestFlag requestFlag) =>
requestFlag == CrawlRequestFlag.ThreadClientVersion602;

protected override IEnumerable<ThreadPost> ParseInternal
(IReadOnlyList<Thread> inPosts, IList<TbClient.User?> outUsers) => inPosts.Select(Convert);
(IReadOnlyCollection<Thread> inPosts, ICollection<TbClient.User?> outUsers) => inPosts.Select(Convert);

protected override ThreadPost Convert(Thread inPost)
{
Expand Down
28 changes: 13 additions & 15 deletions c#/crawler/src/Tieba/Crawl/Saver/SaverChangeSet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,24 @@ namespace tbm.Crawler.Tieba.Crawl.Saver;
public class SaverChangeSet<TPost> where TPost : class, IPost
{
public SaverChangeSet(
ICollection<TPost> existingBefore,
IReadOnlyCollection<TPost> existingBefore,
ICollection<TPost> existingAfterAndNewlyAdded,
Func<TPost, PostId> postIdSelector)
{
var existingAfter = existingAfterAndNewlyAdded
.IntersectBy(existingBefore.Select(postIdSelector), postIdSelector)
.OrderBy(postIdSelector).ToList();
Existing = new(existingBefore
Existing = existingBefore
.OrderBy(postIdSelector)
.EquiZip(existingAfter, (before, after) => (before, after))
.ToList());
NewlyAdded = new(existingAfterAndNewlyAdded
.EquiZip(existingAfterAndNewlyAdded
.IntersectBy(existingBefore.Select(postIdSelector), postIdSelector)
.OrderBy(postIdSelector),
(before, after) => (before, after))
.ToList().AsReadOnly();
NewlyAdded = existingAfterAndNewlyAdded
.ExceptBy(existingBefore.Select(postIdSelector), postIdSelector)
.ToList());
AllAfter = new([.. existingAfterAndNewlyAdded]);
.ToList().AsReadOnly();
AllAfter = existingAfterAndNewlyAdded.ToList().AsReadOnly();
}

public ReadOnlyCollection<(TPost Before, TPost After)> Existing { get; }
public ReadOnlyCollection<TPost> NewlyAdded { get; }

// ReSharper disable once CollectionNeverUpdated.Global
public ReadOnlyCollection<TPost> AllAfter { get; }
public IReadOnlyCollection<(TPost Before, TPost After)> Existing { get; }
public IReadOnlyCollection<TPost> NewlyAdded { get; }
public IReadOnlyCollection<TPost> AllAfter { get; }
}
6 changes: 3 additions & 3 deletions c#/crawler/src/Worker/RetryCrawlWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ FailureCount FailureCountSelector(Tid tid) =>

private async Task RetryThread(
Fid fid,
IList<Page> pages,
IReadOnlyList<Page> pages,
int failureCount,
Func<Page, FailureCount> failureCountSelector,
CancellationToken stoppingToken = default)
Expand Down Expand Up @@ -106,7 +106,7 @@ from f in dbFactory.Value().Forums.AsNoTracking()

private async Task RetryReply(
Fid fid, Tid tid,
IList<Page> pages,
IReadOnlyList<Page> pages,
int failureCount,
Func<Page, FailureCount> failureCountSelector,
CancellationToken stoppingToken = default)
Expand All @@ -123,7 +123,7 @@ private async Task RetryReply(

private async Task RetrySubReply(
Fid fid, Tid tid, Pid pid,
IList<Page> pages,
IReadOnlyList<Page> pages,
int failureCount,
Func<Page, FailureCount> failureCountSelector,
CancellationToken stoppingToken = default)
Expand Down
20 changes: 9 additions & 11 deletions c#/imagePipeline/src/ImageBatchConsumingWorker.cs
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ private async Task ConsumeOcrConsumer(
foreach (var scriptsGroupByFid in scriptGroupings)
{
var fid = scriptsGroupByFid.Key;
List<ImageKeyWithMatrix> GetImagesInCurrentFid()
IEnumerable<ImageKeyWithMatrix> GetImagesInCurrentFid()
{ // dispose the scope of Owned<DbContext> after return to prevent long-life idle connection
using var dbFactory = dbContextFactory();
var db = dbFactory.Value(fid, "");
Expand All @@ -214,18 +214,16 @@ List<ImageKeyWithMatrix> GetImagesInCurrentFid()
#pragma warning restore IDISP004 // Don't ignore created IDisposable

// try to know which fid owns current image batch
return imageKeysWithMatrix
.IntersectBy(
from replyContentImage in db.ReplyContentImages
where imageKeysWithMatrix
.Select(imageKeyWithMatrix => imageKeyWithMatrix.ImageId)
.Contains(replyContentImage.ImageId)
select replyContentImage.ImageId,
imageKeyWithMatrix => imageKeyWithMatrix.ImageId)
.ToList();
return imageKeysWithMatrix.IntersectBy(
from replyContentImage in db.ReplyContentImages
where imageKeysWithMatrix
.Select(imageKeyWithMatrix => imageKeyWithMatrix.ImageId)
.Contains(replyContentImage.ImageId)
select replyContentImage.ImageId,
imageKeyWithMatrix => imageKeyWithMatrix.ImageId);
}

var imagesInCurrentFid = GetImagesInCurrentFid();
var imagesInCurrentFid = GetImagesInCurrentFid().ToList();
if (imagesInCurrentFid.Count == 0) continue;
foreach (var script in scriptsGroupByFid)
{
Expand Down
2 changes: 1 addition & 1 deletion c#/imagePipeline/src/Ocr/JointRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public class JointRecognizer(
public async Task InitializePaddleOcr(CancellationToken stoppingToken = default) =>
await _paddleOcrProvider.Initialize(stoppingToken);

public IReadOnlyList<Either<ImageId, IRecognitionResult>> RecognizeMatrices
public IReadOnlyCollection<Either<ImageId, IRecognitionResult>> RecognizeMatrices
(IReadOnlyDictionary<ImageKey, Mat> matricesKeyByImageKey, CancellationToken stoppingToken = default)
{
var recognizedEithersViaPaddleOcr = _paddleOcrProvider
Expand Down
8 changes: 8 additions & 0 deletions c#/shared/src/ExtensionMethods.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ public static void AddRange<T>(this IList<T> list, IEnumerable<T> items)
if (list is List<T> asList) asList.AddRange(items);
else foreach (var item in items) list.Add(item);
}

/// <see>https://stackoverflow.com/questions/1474863/addrange-to-a-collection/26360010#26360010</see>
[System.Diagnostics.CodeAnalysis.SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1618:Generic type parameters should be documented")]
public static void AddRange<T>(this ICollection<T> list, IEnumerable<T> items)
{
if (list is List<T> asList) asList.AddRange(items);
else foreach (var item in items) list.Add(item);
}
}
public static partial class ExtensionMethods
{
Expand Down

0 comments on commit 1bccc4e

Please sign in to comment.