-
Notifications
You must be signed in to change notification settings - Fork 40
Expand file tree
/
Copy pathlychee.toml
More file actions
88 lines (64 loc) · 2.53 KB
/
lychee.toml
File metadata and controls
88 lines (64 loc) · 2.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Lychee link checker configuration
# https://lychee.cli.rs/
# Base URL for resolving root-relative links in local files
# This allows lychee to check links like /agentready/demos/ when checking local files
base_url = "https://ambient-code.github.io"
# Maximum number of concurrent network requests
max_concurrency = 10
# Accept status codes (HTTP codes to consider as success)
# Including 403 to avoid false positives from bot-blocking sites
accept = [200, 206, 301, 302, 308, 403, 429]
# Timeout for each request (in seconds)
timeout = 10
# Number of retries for failed requests
max_retries = 2
# Retry wait time (in seconds)
retry_wait_time = 1
# Exclude patterns (regex) - Focus on stability, only check critical links
exclude = [
# Localhost URLs
"^http://localhost",
"^http://127\\.0\\.0\\.1",
# File protocol URLs (Jekyll-generated files)
"^file://",
# GitHub issue creation pages (always fail)
"^https://github\\.com/.*/issues/new",
# Relative URLs for Jekyll site (will be checked once deployed)
".*/agentready/demos/.*",
".*/agentready/tbench.*",
".*/demos/.*",
".*/tbench.*",
# Jinja2/Liquid template variables
"\\{\\{.*\\}\\}",
# GitHub discussions (may be private)
"^https://github\\.com/ambient-code/agentready/discussions",
# License file (avoid false positives)
"^https://github\\.com/ambient-code/agentready/blob/main/LICENSE",
# === Sites that commonly block bots ===
# Claude AI (blocks automated scrapers)
"^https://claude\\.ai",
# Academic publishers (often block bots with 403)
"^https://dl\\.acm\\.org",
"^https://ieeexplore\\.ieee\\.org",
"^https://link\\.springer\\.com",
# Research preprints (unstable, placeholder URLs)
"^https://arxiv\\.org/abs/2501\\.", # Future dates
"^https://arxiv\\.org/abs/25", # Placeholder research URLs
# Microsoft Research (commonly blocks scrapers)
"^https://www\\.microsoft\\.com/en-us/research/publication/",
# Anthropic research (may not exist yet)
"^https://www\\.anthropic\\.com/research/",
"^https://anthropic\\.com/research/",
# GitHub blog (placeholder posts)
"^https://github\\.blog/2024.*automated.*",
"^https://github\\.blog/.*ai-workflows",
# GitHub Apps (may not be public)
"^https://github\\.com/apps/claude-ai",
]
# Include mail addresses
include_mail = false
# Check fragments (anchors) in URLs
include_fragments = false
# Follow redirects (implicit, enabled by default)
# User agent string
user_agent = "lychee/agentready-link-checker"