-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandle_links.go
77 lines (65 loc) · 1.83 KB
/
handle_links.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
package crawler
import (
"context"
"github.com/thewizardplusplus/go-crawler/models"
syncutils "github.com/thewizardplusplus/go-sync-utils"
)
// HandleLinkDependencies ...
type HandleLinkDependencies struct {
CrawlDependencies
Waiter syncutils.WaitGroup
}
// HandleLinksConcurrently ...
func HandleLinksConcurrently(
ctx context.Context,
concurrencyFactor int,
links chan string,
dependencies HandleLinkDependencies,
) {
for threadID := 0; threadID < concurrencyFactor; threadID++ {
// waiting for completion is done via dependencies.Waiter
go HandleLinks(ctx, threadID, links, dependencies)
}
}
// HandleLinks ...
func HandleLinks(
ctx context.Context,
threadID int,
links chan string,
dependencies HandleLinkDependencies,
) {
for link := range links {
extractedLinks := HandleLink(ctx, threadID, link, dependencies)
for _, extractedLink := range extractedLinks {
// use unbounded sending to avoid a deadlock
syncutils.UnboundedSend(links, extractedLink)
}
}
}
// HandleLink ...
func HandleLink(
ctx context.Context,
threadID int,
link string,
dependencies HandleLinkDependencies,
) []string {
defer dependencies.Waiter.Done()
extractedLinks, err :=
dependencies.LinkExtractor.ExtractLinks(ctx, threadID, link)
if err != nil {
dependencies.Logger.Logf("unable to extract links for link %q: %s", link, err)
return nil
}
var checkedExtractedLinks []string
for _, extractedLink := range extractedLinks {
sourcedLink := models.SourcedLink{SourceLink: link, Link: extractedLink}
dependencies.LinkHandler.HandleLink(ctx, sourcedLink)
if !dependencies.LinkChecker.CheckLink(ctx, sourcedLink) {
continue
}
checkedExtractedLinks = append(checkedExtractedLinks, extractedLink)
// it should be called before the dependencies.Waiter.Done() call
dependencies.Waiter.Add(1)
}
return checkedExtractedLinks
}