-
Notifications
You must be signed in to change notification settings - Fork 4
/
pdfdir-verify
executable file
·89 lines (71 loc) · 2.39 KB
/
pdfdir-verify
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
#
# pdfdir-verify http://github.com/bronson/pdfdir
# Scott Bronson
# 26 Mar 2009
#
# This script tries to determine if your pdf files are good.
# It prints warnings if it suspects any of them are flawed or corrupt,
# -q --quick: only use the quick-n-dirty alorithm. It's much faster
# but some of your PDFs will probably appear to have 0 pages.
# -v --verbose: print the result for each file. Normally this utility
# only produces output when its tests fail.
#
# Thanks to fpmurphy's fast PDF page counting snippet:
# http://www.unix.com/shell-programming-scripting/55661-how-get-number-pages-pdf-file.html
quick=false
verbose=false
while (( "$#" )); do
[ "x$1" = "x-v" ] || [ "x$1" = "x--verbose" ] && verbose=true
[ "x$1" = "x-q" ] || [ "x$1" = "x--quick" ] && quick=true
shift
done
root="$1"
oldifs="$IFS"
if [ "x$root" = "x" ]; then root="."; fi
# Given the name of a pdf file, outputs the number of pages in the file.
# This is just a quick-n-dirty algorithm; it's known to fail.
count_pdf_pages() {
page_count=0
while read num; do
# Turns "BLA BLA/Count 21314BLA BLA" into 21314
num="${num#*/Count }"
num="${num%%[!0-9]*}"
(($num > $page_count)) && page_count=$num
done < <(strings "$1" | grep "/Count")
echo -n "$page_count"
}
filepat=': PDF document, version [0-9.]*$'
numpat='[0-9][0-9]*'
find "$root" -name "*.pdf" -print | sort | while read path; do
[ -z "$path" ] && exit
type=$(file "$path")
# This RE is probably way too restrictive. I'll accept patches.
if [[ ! "$type" =~ $filepat ]]; then
echo "ERROR: $type <-- bad or unrecognized file type!"
continue
fi
quick_count=$(count_pdf_pages "$path")
if [[ ! "$quick_count" =~ $numpat ]]; then
echo "ERROR: $path: bad quick count '$quick_count'"
continue
fi
if ! $quick; then
slow_count=$(pdf2ps -sOutputFile=- "$path" | grep -c "%%Page: ")
if [[ ! "$slow_count" =~ $numpat ]]; then
echo "ERROR: $path: bad slow count '$slow_count'"
continue
fi
if [ "$quick_count" -lt 1 ]; then
# the quick count algorithm sometimes fails
# we just have to assume the slow count is correct
echo "warning: $path: quick count failed ($quick_count). No problem."
quick_count="$slow_count"
fi
if [ "$quick_count" -ne "$slow_count" ]; then
echo "ERROR: $path: quick count ($quick_count) and slow count ($slow_count) differ!"
continue
fi
fi
$verbose && echo "$path: $quick_count"
done