Skip to content

Commit ac7bd09

Browse files
kvanheesmasahir0y
authored andcommitted
scripts: add verifier script for builtin module range data
The modules.builtin.ranges offset range data for builtin modules is generated at compile time based on the list of built-in modules and the vmlinux.map and vmlinux.o.map linker maps. This data can be used to determine whether a symbol at a particular address belongs to module code that was configured to be compiled into the kernel proper as a built-in module (rather than as a standalone module). This patch adds a script that uses the generated modules.builtin.ranges data to annotate the symbols in the System.map with module names if their address falls within a range that belongs to one or more built-in modules. It then processes the vmlinux.map (and if needed, vmlinux.o.map) to verify the annotation: - For each top-level section: - For each object in the section: - Determine whether the object is part of a built-in module (using modules.builtin and the .*.cmd file used to compile the object as suggested in [0]) - For each symbol in that object, verify that the built-in module association (or lack thereof) matches the annotation given to the symbol. Signed-off-by: Kris Van Hees <[email protected]> Reviewed-by: Nick Alcock <[email protected]> Reviewed-by: Alan Maguire <[email protected]> Tested-by: Sam James <[email protected]> Reviewed-by: Sami Tolvanen <[email protected]> Tested-by: Sami Tolvanen <[email protected]> Signed-off-by: Masahiro Yamada <[email protected]>
1 parent 5f5e734 commit ac7bd09

File tree

1 file changed

+370
-0
lines changed

1 file changed

+370
-0
lines changed

scripts/verify_builtin_ranges.awk

Lines changed: 370 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,370 @@
1+
#!/usr/bin/gawk -f
2+
# SPDX-License-Identifier: GPL-2.0
3+
# verify_builtin_ranges.awk: Verify address range data for builtin modules
4+
# Written by Kris Van Hees <[email protected]>
5+
#
6+
# Usage: verify_builtin_ranges.awk modules.builtin.ranges System.map \
7+
# modules.builtin vmlinux.map vmlinux.o.map
8+
#
9+
10+
# Return the module name(s) (if any) associated with the given object.
11+
#
12+
# If we have seen this object before, return information from the cache.
13+
# Otherwise, retrieve it from the corresponding .cmd file.
14+
#
15+
function get_module_info(fn, mod, obj, s) {
16+
if (fn in omod)
17+
return omod[fn];
18+
19+
if (match(fn, /\/[^/]+$/) == 0)
20+
return "";
21+
22+
obj = fn;
23+
mod = "";
24+
fn = substr(fn, 1, RSTART) "." substr(fn, RSTART + 1) ".cmd";
25+
if (getline s <fn == 1) {
26+
if (match(s, /DKBUILD_MODFILE=['"]+[^'"]+/) > 0) {
27+
mod = substr(s, RSTART + 16, RLENGTH - 16);
28+
gsub(/['"]/, "", mod);
29+
} else if (match(s, /RUST_MODFILE=[^ ]+/) > 0)
30+
mod = substr(s, RSTART + 13, RLENGTH - 13);
31+
} else {
32+
print "ERROR: Failed to read: " fn "\n\n" \
33+
" For kernels built with O=<objdir>, cd to <objdir>\n" \
34+
" and execute this script as ./source/scripts/..." \
35+
>"/dev/stderr";
36+
close(fn);
37+
total = 0;
38+
exit(1);
39+
}
40+
close(fn);
41+
42+
# A single module (common case) also reflects objects that are not part
43+
# of a module. Some of those objects have names that are also a module
44+
# name (e.g. core). We check the associated module file name, and if
45+
# they do not match, the object is not part of a module.
46+
if (mod !~ / /) {
47+
if (!(mod in mods))
48+
mod = "";
49+
}
50+
51+
gsub(/([^/ ]*\/)+/, "", mod);
52+
gsub(/-/, "_", mod);
53+
54+
# At this point, mod is a single (valid) module name, or a list of
55+
# module names (that do not need validation).
56+
omod[obj] = mod;
57+
58+
return mod;
59+
}
60+
61+
# Return a representative integer value for a given hexadecimal address.
62+
#
63+
# Since all kernel addresses fall within the same memory region, we can safely
64+
# strip off the first 6 hex digits before performing the hex-to-dec conversion,
65+
# thereby avoiding integer overflows.
66+
#
67+
function addr2val(val) {
68+
sub(/^0x/, "", val);
69+
if (length(val) == 16)
70+
val = substr(val, 5);
71+
return strtonum("0x" val);
72+
}
73+
74+
# Determine the kernel build directory to use (default is .).
75+
#
76+
BEGIN {
77+
if (ARGC < 6) {
78+
print "Syntax: verify_builtin_ranges.awk <ranges-file> <system-map>\n" \
79+
" <builtin-file> <vmlinux-map> <vmlinux-o-map>\n" \
80+
>"/dev/stderr";
81+
total = 0;
82+
exit(1);
83+
}
84+
}
85+
86+
# (1) Load the built-in module address range data.
87+
#
88+
ARGIND == 1 {
89+
ranges[FNR] = $0;
90+
rcnt++;
91+
next;
92+
}
93+
94+
# (2) Annotate System.map symbols with module names.
95+
#
96+
ARGIND == 2 {
97+
addr = addr2val($1);
98+
name = $3;
99+
100+
while (addr >= mod_eaddr) {
101+
if (sect_symb) {
102+
if (sect_symb != name)
103+
next;
104+
105+
sect_base = addr - sect_off;
106+
if (dbg)
107+
printf "[%s] BASE (%s) %016x - %016x = %016x\n", sect_name, sect_symb, addr, sect_off, sect_base >"/dev/stderr";
108+
sect_symb = 0;
109+
}
110+
111+
if (++ridx > rcnt)
112+
break;
113+
114+
$0 = ranges[ridx];
115+
sub(/-/, " ");
116+
if ($4 != "=") {
117+
sub(/-/, " ");
118+
mod_saddr = strtonum("0x" $2) + sect_base;
119+
mod_eaddr = strtonum("0x" $3) + sect_base;
120+
$1 = $2 = $3 = "";
121+
sub(/^ +/, "");
122+
mod_name = $0;
123+
124+
if (dbg)
125+
printf "[%s] %s from %016x to %016x\n", sect_name, mod_name, mod_saddr, mod_eaddr >"/dev/stderr";
126+
} else {
127+
sect_name = $1;
128+
sect_off = strtonum("0x" $2);
129+
sect_symb = $5;
130+
}
131+
}
132+
133+
idx = addr"-"name;
134+
if (addr >= mod_saddr && addr < mod_eaddr)
135+
sym2mod[idx] = mod_name;
136+
137+
next;
138+
}
139+
140+
# Once we are done annotating the System.map, we no longer need the ranges data.
141+
#
142+
FNR == 1 && ARGIND == 3 {
143+
delete ranges;
144+
}
145+
146+
# (3) Build a lookup map of built-in module names.
147+
#
148+
# Lines from modules.builtin will be like:
149+
# kernel/crypto/lzo-rle.ko
150+
# and we record the object name "crypto/lzo-rle".
151+
#
152+
ARGIND == 3 {
153+
sub(/kernel\//, ""); # strip off "kernel/" prefix
154+
sub(/\.ko$/, ""); # strip off .ko suffix
155+
156+
mods[$1] = 1;
157+
next;
158+
}
159+
160+
# (4) Get a list of symbols (per object).
161+
#
162+
# Symbols by object are read from vmlinux.map, with fallback to vmlinux.o.map
163+
# if vmlinux is found to have inked in vmlinux.o.
164+
#
165+
166+
# If we were able to get the data we need from vmlinux.map, there is no need to
167+
# process vmlinux.o.map.
168+
#
169+
FNR == 1 && ARGIND == 5 && total > 0 {
170+
if (dbg)
171+
printf "Note: %s is not needed.\n", FILENAME >"/dev/stderr";
172+
exit;
173+
}
174+
175+
# First determine whether we are dealing with a GNU ld or LLVM lld linker map.
176+
#
177+
ARGIND >= 4 && FNR == 1 && NF == 7 && $1 == "VMA" && $7 == "Symbol" {
178+
map_is_lld = 1;
179+
next;
180+
}
181+
182+
# (LLD) Convert a section record fronm lld format to ld format.
183+
#
184+
ARGIND >= 4 && map_is_lld && NF == 5 && /[0-9] [^ ]+$/ {
185+
$0 = $5 " 0x"$1 " 0x"$3 " load address 0x"$2;
186+
}
187+
188+
# (LLD) Convert an object record from lld format to ld format.
189+
#
190+
ARGIND >= 4 && map_is_lld && NF == 5 && $5 ~ /:\(/ {
191+
if (/\.a\(/ && !/ vmlinux\.a\(/)
192+
next;
193+
194+
gsub(/\)/, "");
195+
sub(/:\(/, " ");
196+
sub(/ vmlinux\.a\(/, " ");
197+
$0 = " "$6 " 0x"$1 " 0x"$3 " " $5;
198+
}
199+
200+
# (LLD) Convert a symbol record from lld format to ld format.
201+
#
202+
ARGIND >= 4 && map_is_lld && NF == 5 && $5 ~ /^[A-Za-z_][A-Za-z0-9_]*$/ {
203+
$0 = " 0x" $1 " " $5;
204+
}
205+
206+
# (LLD) We do not need any other ldd linker map records.
207+
#
208+
ARGIND >= 4 && map_is_lld && /^[0-9a-f]{16} / {
209+
next;
210+
}
211+
212+
# Handle section records with long section names (spilling onto a 2nd line).
213+
#
214+
ARGIND >= 4 && !map_is_lld && NF == 1 && /^[^ ]/ {
215+
s = $0;
216+
getline;
217+
$0 = s " " $0;
218+
}
219+
220+
# Next section - previous one is done.
221+
#
222+
ARGIND >= 4 && /^[^ ]/ {
223+
sect = 0;
224+
}
225+
226+
# Get the (top level) section name.
227+
#
228+
ARGIND >= 4 && /^\./ {
229+
# Explicitly ignore a few sections that are not relevant here.
230+
if ($1 ~ /^\.orc_/ || $1 ~ /_sites$/ || $1 ~ /\.percpu/)
231+
next;
232+
233+
# Sections with a 0-address can be ignored as well (in vmlinux.map).
234+
if (ARGIND == 4 && $2 ~ /^0x0+$/)
235+
next;
236+
237+
sect = $1;
238+
239+
next;
240+
}
241+
242+
# If we are not currently in a section we care about, ignore records.
243+
#
244+
!sect {
245+
next;
246+
}
247+
248+
# Handle object records with long section names (spilling onto a 2nd line).
249+
#
250+
ARGIND >= 4 && /^ [^ \*]/ && NF == 1 {
251+
# If the section name is long, the remainder of the entry is found on
252+
# the next line.
253+
s = $0;
254+
getline;
255+
$0 = s " " $0;
256+
}
257+
258+
# Objects linked in from static libraries are ignored.
259+
# If the object is vmlinux.o, we need to consult vmlinux.o.map for per-object
260+
# symbol information
261+
#
262+
ARGIND == 4 && /^ [^ ]/ && NF == 4 {
263+
if ($4 ~ /\.a\(/)
264+
next;
265+
266+
idx = sect":"$1;
267+
if (!(idx in sect_addend)) {
268+
sect_addend[idx] = addr2val($2);
269+
if (dbg)
270+
printf "ADDEND %s = %016x\n", idx, sect_addend[idx] >"/dev/stderr";
271+
}
272+
if ($4 == "vmlinux.o") {
273+
need_o_map = 1;
274+
next;
275+
}
276+
}
277+
278+
# If data from vmlinux.o.map is needed, we only process section and object
279+
# records from vmlinux.map to determine which section we need to pay attention
280+
# to in vmlinux.o.map. So skip everything else from vmlinux.map.
281+
#
282+
ARGIND == 4 && need_o_map {
283+
next;
284+
}
285+
286+
# Get module information for the current object.
287+
#
288+
ARGIND >= 4 && /^ [^ ]/ && NF == 4 {
289+
msect = $1;
290+
mod_name = get_module_info($4);
291+
mod_eaddr = addr2val($2) + addr2val($3);
292+
293+
next;
294+
}
295+
296+
# Process a symbol record.
297+
#
298+
# Evaluate the module information obtained from vmlinux.map (or vmlinux.o.map)
299+
# as follows:
300+
# - For all symbols in a given object:
301+
# - If the symbol is annotated with the same module name(s) that the object
302+
# belongs to, count it as a match.
303+
# - Otherwise:
304+
# - If the symbol is known to have duplicates of which at least one is
305+
# in a built-in module, disregard it.
306+
# - If the symbol us not annotated with any module name(s) AND the
307+
# object belongs to built-in modules, count it as missing.
308+
# - Otherwise, count it as a mismatch.
309+
#
310+
ARGIND >= 4 && /^ / && NF == 2 && $1 ~ /^0x/ {
311+
idx = sect":"msect;
312+
if (!(idx in sect_addend))
313+
next;
314+
315+
addr = addr2val($1);
316+
317+
# Handle the rare but annoying case where a 0-size symbol is placed at
318+
# the byte *after* the module range. Based on vmlinux.map it will be
319+
# considered part of the current object, but it falls just beyond the
320+
# module address range. Unfortunately, its address could be at the
321+
# start of another built-in module, so the only safe thing to do is to
322+
# ignore it.
323+
if (mod_name && addr == mod_eaddr)
324+
next;
325+
326+
# If we are processing vmlinux.o.map, we need to apply the base address
327+
# of the section to the relative address on the record.
328+
#
329+
if (ARGIND == 5)
330+
addr += sect_addend[idx];
331+
332+
idx = addr"-"$2;
333+
mod = "";
334+
if (idx in sym2mod) {
335+
mod = sym2mod[idx];
336+
if (sym2mod[idx] == mod_name) {
337+
mod_matches++;
338+
matches++;
339+
} else if (mod_name == "") {
340+
print $2 " in " mod " (should NOT be)";
341+
mismatches++;
342+
} else {
343+
print $2 " in " mod " (should be " mod_name ")";
344+
mismatches++;
345+
}
346+
} else if (mod_name != "") {
347+
print $2 " should be in " mod_name;
348+
missing++;
349+
} else
350+
matches++;
351+
352+
total++;
353+
354+
next;
355+
}
356+
357+
# Issue the comparison report.
358+
#
359+
END {
360+
if (total) {
361+
printf "Verification of %s:\n", ARGV[1];
362+
printf " Correct matches: %6d (%d%% of total)\n", matches, 100 * matches / total;
363+
printf " Module matches: %6d (%d%% of matches)\n", mod_matches, 100 * mod_matches / matches;
364+
printf " Mismatches: %6d (%d%% of total)\n", mismatches, 100 * mismatches / total;
365+
printf " Missing: %6d (%d%% of total)\n", missing, 100 * missing / total;
366+
367+
if (mismatches || missing)
368+
exit(1);
369+
}
370+
}

0 commit comments

Comments
 (0)