-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllmstxtcheck.sh
More file actions
executable file
·40 lines (34 loc) · 1.11 KB
/
llmstxtcheck.sh
File metadata and controls
executable file
·40 lines (34 loc) · 1.11 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env bash
# Check input and print help as needed
if [ -z "$1" ]; then
echo "Error: First argument is missing."
echo "Usage: ./llmstxtcheck.sh base_url [output_filename]"
echo "The page 'https://{base_url}/llms.txt' will be fetched all 404'd URLs will be save to output_filename.txt or llms.txt."
exit 1
fi
# Assign optional filename or default
if [ -x "$2" ]; then
output_filename="llms.txt"
else
output_filename="${2}.txt"
fi
# Download the file and extract all URLs
echo "Loading https://$1/llms.txt..."
urls=$(curl -fsSL https://$1/llms.txt | grep -Eo 'https?://[^) ]+')
total=$(echo "$urls" | wc -l)
count=0
echo "Checking $total URLs for 404 errors..."
echo
# Loop through each URL and check HTTP status
failcount=0
echo "$urls" | while read -r url; do
count=$((count + 1))
code=$(curl -o /dev/null -s -w "%{http_code}" "$url")
printf "[%3d/%3d] %s -> %s\n" "$count" "$total" "$url" "$code"
if [ "$code" = "404" ]; then
echo "❌ 404: $url" >> $output_filename
failcount=$((failcount + 1))
fi
done
echo
echo "Done. Found $failcount failed URLs. Any 404 URLs are saved in $output_filename"