Skip to content

Commit 69d495d

Browse files
author
njn
committedJun 30, 2010
Added cg_diff.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@11193 a5019735-40e9-0310-863c-91ae7b9d1cf9
1 parent 69ad7d8 commit 69d495d

File tree

6 files changed

+482
-19
lines changed

6 files changed

+482
-19
lines changed
 

‎NEWS

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,18 @@ Release 3.6.0 (???)
33
~~~~~~~~~~~~~~~~~~~
44
Improvements:
55
- XXX: ARM support
6+
- XXX: Mac OS 10.6 support (32 and 64 bit)
7+
- XXX: Much faster startup on Mac OS 10.5 for 64-bit programs.
68

79
- --smc-check=all is much faster
810

11+
- Cachegrind has a new processing script, cg_diff, which finds the
12+
difference between two profiles. It's very useful for evaluating the
13+
performance effects of a change in a program.
14+
15+
Related to this change, the meaning of cg_annotate's (rarely-used)
16+
--threshold option has changed; this is unlikely to affect many people, if
17+
you do use it please see the user manual for details.
918

1019

1120
Release 3.5.0 (19 August 2009)

‎cachegrind/Makefile.am

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ EXTRA_DIST = \
88
# Headers, etc
99
#----------------------------------------------------------------------------
1010

11-
bin_SCRIPTS = cg_annotate
11+
bin_SCRIPTS = cg_annotate cg_diff
1212

1313
noinst_HEADERS = \
1414
cg_arch.h \

‎cachegrind/cg_annotate.in

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ my @sort_order;
120120
# handled this proportion of all the events thresholded.
121121
my @thresholds;
122122

123-
my $default_threshold = 99;
123+
my $default_threshold = 0.1;
124124

125125
my $single_threshold = $default_threshold;
126126

@@ -149,8 +149,8 @@ usage: cg_annotate [options] cachegrind-out-file [source-files...]
149149
--version show version
150150
--show=A,B,C only show figures for events A,B,C [all]
151151
--sort=A,B,C sort columns by events A,B,C [event column order]
152-
--threshold=<0--100> percentage of counts (of primary sort event) we
153-
are interested in [$default_threshold%]
152+
--threshold=<0--20> a function is shown if it accounts for more than x% of
153+
the counts of the primary sort event [$default_threshold]
154154
--auto=yes|no annotate all source files containing functions
155155
that helped reach the event count threshold [no]
156156
--context=N print N lines of context before and after
@@ -217,7 +217,7 @@ sub process_cmd_line()
217217
# --threshold=X (tolerates a trailing '%')
218218
} elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) {
219219
$single_threshold = $1;
220-
($1 >= 0 && $1 <= 100) or die($usage);
220+
($1 >= 0 && $1 <= 20) or die($usage);
221221

222222
# --auto=yes|no
223223
} elsif ($arg =~ /^--auto=yes$/) {
@@ -377,7 +377,7 @@ sub read_input_file()
377377
# the primary sort event, and 0% for the rest.
378378
if (not @thresholds) {
379379
foreach my $e (@sort_order) {
380-
push(@thresholds, 0);
380+
push(@thresholds, 100);
381381
}
382382
$thresholds[0] = $single_threshold;
383383
}
@@ -617,17 +617,18 @@ sub print_summary_and_fn_totals ()
617617
# Print functions, stopping when the threshold has been reached.
618618
foreach my $fn_name (@fn_fullnames) {
619619

620+
my $fn_CC = $fn_totals{$fn_name};
621+
620622
# Stop when we've reached all the thresholds
621-
my $reached_all_thresholds = 1;
623+
my $any_thresholds_exceeded = 0;
622624
foreach my $i (0 .. scalar @thresholds - 1) {
623-
my $prop = safe_div(abs($curr_totals[$i] * 100),
625+
my $prop = safe_div(abs($fn_CC->[$sort_order[$i]] * 100),
624626
abs($summary_CC->[$sort_order[$i]]));
625-
$reached_all_thresholds &&= ($prop >= $thresholds[$i]);
627+
$any_thresholds_exceeded ||= ($prop >= $thresholds[$i]);
626628
}
627-
last if $reached_all_thresholds;
629+
last if not $any_thresholds_exceeded;
628630

629631
# Print function results
630-
my $fn_CC = $fn_totals{$fn_name};
631632
print_CC($fn_CC, $fn_CC_col_widths);
632633
print(" $fn_name\n");
633634

‎cachegrind/cg_diff.in

Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
#! @PERL@
2+
3+
##--------------------------------------------------------------------##
4+
##--- Cachegrind's differencer. cg_diff.in ---##
5+
##--------------------------------------------------------------------##
6+
7+
# This file is part of Cachegrind, a Valgrind tool for cache
8+
# profiling programs.
9+
#
10+
# Copyright (C) 2002-2010 Nicholas Nethercote
11+
# njn@valgrind.org
12+
#
13+
# This program is free software; you can redistribute it and/or
14+
# modify it under the terms of the GNU General Public License as
15+
# published by the Free Software Foundation; either version 2 of the
16+
# License, or (at your option) any later version.
17+
#
18+
# This program is distributed in the hope that it will be useful, but
19+
# WITHOUT ANY WARRANTY; without even the implied warranty of
20+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21+
# General Public License for more details.
22+
#
23+
# You should have received a copy of the GNU General Public License
24+
# along with this program; if not, write to the Free Software
25+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26+
# 02111-1307, USA.
27+
#
28+
# The GNU General Public License is contained in the file COPYING.
29+
30+
#----------------------------------------------------------------------------
31+
# This is a very cut-down and modified version of cg_annotate.
32+
#----------------------------------------------------------------------------
33+
34+
use warnings;
35+
use strict;
36+
37+
#----------------------------------------------------------------------------
38+
# Global variables
39+
#----------------------------------------------------------------------------
40+
41+
# Version number
42+
my $version = "@VERSION@";
43+
44+
# Usage message.
45+
my $usage = <<END
46+
usage: cg_diff [options] <cachegrind-out-file1> <cachegrind-out-file2>
47+
48+
options for the user, with defaults in [ ], are:
49+
-h --help show this message
50+
-v --version show version
51+
--mod-filename=<expr> a Perl search-and-replace expression that is applied
52+
to filenames, eg. --mod-filename='s/prog[0-9]/projN/'
53+
54+
cg_diff is Copyright (C) 2010-2010 Nicholas Nethercote.
55+
and licensed under the GNU General Public License, version 2.
56+
Bug reports, feedback, admiration, abuse, etc, to: njn\@valgrind.org.
57+
58+
END
59+
;
60+
61+
# --mod-filename expression
62+
my $mod_filename = undef;
63+
64+
#-----------------------------------------------------------------------------
65+
# Argument and option handling
66+
#-----------------------------------------------------------------------------
67+
sub process_cmd_line()
68+
{
69+
my ($file1, $file2) = (undef, undef);
70+
71+
for my $arg (@ARGV) {
72+
73+
if ($arg =~ /^-/) {
74+
# --version
75+
if ($arg =~ /^-v$|^--version$/) {
76+
die("cg_diff-$version\n");
77+
78+
} elsif ($arg =~ /^--mod-filename=(.*)/) {
79+
$mod_filename = $1;
80+
81+
} else { # -h and --help fall under this case
82+
die($usage);
83+
}
84+
85+
} elsif (not defined($file1)) {
86+
$file1 = $arg;
87+
88+
} elsif (not defined($file2)) {
89+
$file2 = $arg;
90+
91+
} else {
92+
die($usage);
93+
}
94+
}
95+
96+
# Must have specified two input files.
97+
if (not defined $file1 or not defined $file2) {
98+
die($usage);
99+
}
100+
101+
return ($file1, $file2);
102+
}
103+
104+
#-----------------------------------------------------------------------------
105+
# Reading of input file
106+
#-----------------------------------------------------------------------------
107+
sub max ($$)
108+
{
109+
my ($x, $y) = @_;
110+
return ($x > $y ? $x : $y);
111+
}
112+
113+
# Add the two arrays; any '.' entries are ignored. Two tricky things:
114+
# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn
115+
# off warnings to allow this. This makes things about 10% faster than
116+
# checking for definedness ourselves.
117+
# 2. We don't add an undefined count or a ".", even though it's value is 0,
118+
# because we don't want to make an $a2->[$i] that is undef become 0
119+
# unnecessarily.
120+
sub add_array_a_to_b ($$)
121+
{
122+
my ($a, $b) = @_;
123+
124+
my $n = max(scalar @$a, scalar @$b);
125+
$^W = 0;
126+
foreach my $i (0 .. $n-1) {
127+
$b->[$i] += $a->[$i] if (defined $a->[$i] && "." ne $a->[$i]);
128+
}
129+
$^W = 1;
130+
}
131+
132+
sub sub_array_b_from_a ($$)
133+
{
134+
my ($a, $b) = @_;
135+
136+
my $n = max(scalar @$a, scalar @$b);
137+
$^W = 0;
138+
foreach my $i (0 .. $n-1) {
139+
$a->[$i] -= $b->[$i]; # XXX: doesn't handle '.' entries
140+
}
141+
$^W = 1;
142+
}
143+
144+
# Add each event count to the CC array. '.' counts become undef, as do
145+
# missing entries (implicitly).
146+
sub line_to_CC ($$)
147+
{
148+
my ($line, $numEvents) = @_;
149+
150+
my @CC = (split /\s+/, $line);
151+
(@CC <= $numEvents) or die("Line $.: too many event counts\n");
152+
return \@CC;
153+
}
154+
155+
sub read_input_file($)
156+
{
157+
my ($input_file) = @_;
158+
159+
open(INPUTFILE, "< $input_file")
160+
|| die "Cannot open $input_file for reading\n";
161+
162+
# Read "desc:" lines.
163+
my $desc;
164+
my $line;
165+
while ($line = <INPUTFILE>) {
166+
if ($line =~ s/desc:\s+//) {
167+
$desc .= $line;
168+
} else {
169+
last;
170+
}
171+
}
172+
173+
# Read "cmd:" line (Nb: will already be in $line from "desc:" loop above).
174+
($line =~ s/^cmd:\s+//) or die("Line $.: missing command line\n");
175+
my $cmd = $line;
176+
chomp($cmd); # Remove newline
177+
178+
# Read "events:" line. We make a temporary hash in which the Nth event's
179+
# value is N, which is useful for handling --show/--sort options below.
180+
$line = <INPUTFILE>;
181+
(defined $line && $line =~ s/^events:\s+//)
182+
or die("Line $.: missing events line\n");
183+
my @events = split(/\s+/, $line);
184+
my $numEvents = scalar @events;
185+
186+
my $currFileName;
187+
my $currFileFuncName;
188+
189+
my %CCs; # hash("$filename#$funcname" => CC array)
190+
my $currCC = undef; # CC array
191+
192+
my $summaryCC;
193+
194+
# Read body of input file.
195+
while (<INPUTFILE>) {
196+
s/#.*$//; # remove comments
197+
if (s/^(\d+)\s+//) {
198+
my $CC = line_to_CC($_, $numEvents);
199+
defined($currCC) || die;
200+
add_array_a_to_b($CC, $currCC);
201+
202+
} elsif (s/^fn=(.*)$//) {
203+
defined($currFileName) || die;
204+
$currFileFuncName = "$currFileName#$1";
205+
$currCC = $CCs{$currFileFuncName};
206+
if (not defined $currCC) {
207+
$currCC = [];
208+
$CCs{$currFileFuncName} = $currCC;
209+
}
210+
211+
} elsif (s/^fl=(.*)$//) {
212+
$currFileName = $1;
213+
if (defined $mod_filename) {
214+
eval "\$currFileName =~ $mod_filename";
215+
}
216+
# Assume that a "fn=" line is followed by a "fl=" line.
217+
$currFileFuncName = undef;
218+
219+
} elsif (s/^\s*$//) {
220+
# blank, do nothing
221+
222+
} elsif (s/^summary:\s+//) {
223+
$summaryCC = line_to_CC($_, $numEvents);
224+
(scalar(@$summaryCC) == @events)
225+
or die("Line $.: summary event and total event mismatch\n");
226+
227+
} else {
228+
warn("WARNING: line $. malformed, ignoring\n");
229+
}
230+
}
231+
232+
# Check if summary line was present
233+
if (not defined $summaryCC) {
234+
die("missing final summary line, aborting\n");
235+
}
236+
237+
close(INPUTFILE);
238+
239+
return ($cmd, \@events, \%CCs, $summaryCC);
240+
}
241+
242+
#----------------------------------------------------------------------------
243+
# "main()"
244+
#----------------------------------------------------------------------------
245+
# Commands seen in the files. Need not match.
246+
my $cmd1;
247+
my $cmd2;
248+
249+
# Events seen in the files. They must match.
250+
my $events1;
251+
my $events2;
252+
253+
# Individual CCs, organised by filename/funcname/line_num.
254+
# hashref("$filename#$funcname", CC array)
255+
my $CCs1;
256+
my $CCs2;
257+
258+
# Total counts for summary (an arrayref).
259+
my $summaryCC1;
260+
my $summaryCC2;
261+
262+
#----------------------------------------------------------------------------
263+
# Read the input files
264+
#----------------------------------------------------------------------------
265+
my ($file1, $file2) = process_cmd_line();
266+
($cmd1, $events1, $CCs1, $summaryCC1) = read_input_file($file1);
267+
($cmd2, $events2, $CCs2, $summaryCC2) = read_input_file($file2);
268+
269+
#----------------------------------------------------------------------------
270+
# Check the events match
271+
#----------------------------------------------------------------------------
272+
my $n = max(scalar @$events1, scalar @$events2);
273+
$^W = 0; # turn off warnings, because we might hit undefs
274+
foreach my $i (0 .. $n-1) {
275+
($events1->[$i] eq $events2->[$i]) || die "events don't match, aborting\n";
276+
}
277+
$^W = 1;
278+
279+
#----------------------------------------------------------------------------
280+
# Do the subtraction: CCs2 -= CCs1
281+
#----------------------------------------------------------------------------
282+
while (my ($filefuncname, $CC1) = each(%$CCs1)) {
283+
my $CC2 = $CCs2->{$filefuncname};
284+
if (not defined $CC2) {
285+
$CC2 = [];
286+
sub_array_b_from_a($CC2, $CC1); # CC2 -= CC1
287+
$CCs2->{$filefuncname} = $CC2;
288+
} else {
289+
sub_array_b_from_a($CC2, $CC1); # CC2 -= CC1
290+
}
291+
}
292+
sub_array_b_from_a($summaryCC2, $summaryCC1);
293+
294+
#----------------------------------------------------------------------------
295+
# Print the result, in CCs2
296+
#----------------------------------------------------------------------------
297+
print("desc: Files compared: $file1; $file2\n");
298+
print("cmd: $cmd1; $cmd2\n");
299+
print("events: ");
300+
for my $e (@$events1) {
301+
print(" $e");
302+
}
303+
print("\n");
304+
305+
while (my ($filefuncname, $CC) = each(%$CCs2)) {
306+
307+
my @x = split(/#/, $filefuncname);
308+
(scalar @x == 2) || die;
309+
310+
print("fl=$x[0]\n");
311+
print("fn=$x[1]\n");
312+
313+
print("0");
314+
foreach my $n (@$CC) {
315+
print(" $n");
316+
}
317+
print("\n");
318+
}
319+
320+
print("summary:");
321+
foreach my $n (@$summaryCC2) {
322+
print(" $n");
323+
}
324+
print("\n");
325+
326+
##--------------------------------------------------------------------##
327+
##--- end ---##
328+
##--------------------------------------------------------------------##

‎cachegrind/docs/cg-manual.xml

Lines changed: 132 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@ be normally run.</para>
9898
<para>Then, you need to run Cachegrind itself to gather the profiling
9999
information, and then run cg_annotate to get a detailed presentation of that
100100
information. As an optional intermediate step, you can use cg_merge to sum
101-
together the outputs of multiple Cachegrind runs, into a single file which
102-
you then use as the input for cg_annotate.</para>
101+
together the outputs of multiple Cachegrind runs into a single file which
102+
you then use as the input for cg_annotate. Alternatively, you can use
103+
cg_diff to difference the outputs of two Cachegrind runs into a signel file
104+
which you then use as the input for cg_annotate.</para>
103105

104106

105107
<sect2 id="cg-manual.running-cachegrind" xreflabel="Running Cachegrind">
@@ -697,6 +699,85 @@ fail these checks.</para>
697699
</sect2>
698700

699701

702+
<sect2 id="cg-manual.cg_diff" xreflabel="cg_diff">
703+
<title>Differencing Profiles with cg_diff</title>
704+
705+
<para>
706+
cg_diff is a simple program which
707+
reads two profile files, as created by Cachegrind, finds the difference
708+
between them, and writes the results into another file in the same format.
709+
You can then examine the merged results using
710+
<computeroutput>cg_annotate &lt;filename&gt;</computeroutput>, as
711+
described above. This is very useful if you want to measure how a change to
712+
a program affected its performance.
713+
</para>
714+
715+
<para>
716+
cg_diff is invoked as follows:
717+
</para>
718+
719+
<programlisting><![CDATA[
720+
cg_diff file1 file2]]></programlisting>
721+
722+
<para>
723+
It reads and checks <computeroutput>file1</computeroutput>, then read
724+
and checks <computeroutput>file2</computeroutput>, then computes the
725+
difference (effectively <computeroutput>file1</computeroutput> -
726+
<computeroutput>file2</computeroutput>). The final results are written to
727+
standard output.</para>
728+
729+
<para>
730+
Costs are summed on a per-function basis. Per-line costs are not summed,
731+
because doing so is too difficult. For example, consider differencing two
732+
profiles, one from a single-file program A, and one from the same program A
733+
where a single blank line was inserted at the top of the file. Every single
734+
per-line count has changed. In comparison, the per-function counts have not
735+
changed. The per-function count differences are still very useful for
736+
determining differences between programs. Note that because the result is
737+
the difference of two profiles, many of the counts will be negative; this
738+
indicates that the counts for the relevant function are fewer in the second
739+
version than those in the first version.</para>
740+
741+
<para>
742+
cg_diff does not attempt to check
743+
that the input files come from runs of the same executable. It will
744+
happily merge together profile files from completely unrelated
745+
programs. It does however check that the
746+
<computeroutput>Events:</computeroutput> lines of all the inputs are
747+
identical, so as to ensure that the addition of costs makes sense.
748+
For example, it would be nonsensical for it to add a number indicating
749+
D1 read references to a number from a different file indicating L2
750+
write misses.</para>
751+
752+
<para>
753+
A number of other syntax and sanity checks are done whilst reading the
754+
inputs. cg_diff will stop and
755+
attempt to print a helpful error message if any of the input files
756+
fail these checks.</para>
757+
758+
<para>
759+
Sometimes you will want to compare Cachegrind profiles of two versions of a
760+
program that you have sitting side-by-side. For example, you might have
761+
<computeroutput>version1/prog.c</computeroutput> and
762+
<computeroutput>version2/prog.c</computeroutput>, where the second is
763+
slightly different to the first. A straight comparison of the two will not
764+
be useful -- because functions are qualified with filenames, a function
765+
<function>f</function> will be listed as
766+
<computeroutput>version1/prog.c:f</computeroutput> for the first version but
767+
<computeroutput>version2/prog.c:f</computeroutput> for the second
768+
version.</para>
769+
770+
<para>
771+
When this happens, you can use the <option>--mod-filename</option> option.
772+
Its argument is a Perl search-and-replace expression that will be applied
773+
to all the filenames in both Cachegrind output files. It can be used to
774+
remove minor differences in filenames. For example, the option
775+
<option>--mod-filename='s/version[0-9]/versionN/'</option> will suffice for
776+
this case.</para>
777+
778+
</sect2>
779+
780+
700781
</sect1>
701782

702783

@@ -842,21 +923,21 @@ fail these checks.</para>
842923

843924
<varlistentry>
844925
<term>
845-
<option><![CDATA[--threshold=X [default: 99%] ]]></option>
926+
<option><![CDATA[--threshold=X [default: 0.1%] ]]></option>
846927
</term>
847928
<listitem>
848929
<para>Sets the threshold for the function-by-function
849-
summary. Functions are shown that account for more than X%
850-
of the primary sort event. If auto-annotating, also affects
851-
which files are annotated.</para>
930+
summary. A function is shown if it accounts for more than X%
931+
of the counts for the primary sort event. If auto-annotating, also
932+
affects which files are annotated.</para>
852933

853934
<para>Note: thresholds can be set for more than one of the
854935
events by appending any events for the
855936
<option>--sort</option> option with a colon
856937
and a number (no spaces, though). E.g. if you want to see
857-
the functions that cover 99% of L2 read misses and 99% of L2
938+
each function that covers more than 1% of L2 read misses or 1% of L2
858939
write misses, use this option:</para>
859-
<para><option>--sort=D2mr:99,D2mw:99</option></para>
940+
<para><option>--sort=D2mr:1,D2mw:1</option></para>
860941
</listitem>
861942
</varlistentry>
862943

@@ -900,6 +981,49 @@ fail these checks.</para>
900981
</sect1>
901982

902983

984+
<sect1 id="cg-manual.diffopts" xreflabel="cg_diff Command-line Options">
985+
<title>cg_diff Command-line Options</title>
986+
987+
<!-- start of xi:include in the manpage -->
988+
<variablelist id="cg_diff.opts.list">
989+
990+
<varlistentry>
991+
<term>
992+
<option><![CDATA[-h --help ]]></option>
993+
</term>
994+
<listitem>
995+
<para>Show the help message.</para>
996+
</listitem>
997+
</varlistentry>
998+
999+
<varlistentry>
1000+
<term>
1001+
<option><![CDATA[--version ]]></option>
1002+
</term>
1003+
<listitem>
1004+
<para>Show the version number.</para>
1005+
</listitem>
1006+
</varlistentry>
1007+
1008+
<varlistentry>
1009+
<term>
1010+
<option><![CDATA[--mod-filename=<expr> [default: none]]]></option>
1011+
</term>
1012+
<listitem>
1013+
<para>Specifies a Perl search-and-replace expression that is applied
1014+
to all filenames. Useful for removing minor differences in paths
1015+
between two different versions of a program that are sitting in
1016+
different directories.</para>
1017+
</listitem>
1018+
</varlistentry>
1019+
1020+
</variablelist>
1021+
<!-- end of xi:include in the manpage -->
1022+
1023+
</sect1>
1024+
1025+
1026+
9031027

9041028
<sect1 id="cg-manual.acting-on"
9051029
xreflabel="Acting on Cachegrind's Information">

‎configure.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,6 +1827,7 @@ AC_CONFIG_FILES([
18271827
cachegrind/tests/Makefile
18281828
cachegrind/tests/x86/Makefile
18291829
cachegrind/cg_annotate
1830+
cachegrind/cg_diff
18301831
callgrind/Makefile
18311832
callgrind/callgrind_annotate
18321833
callgrind/callgrind_control

0 commit comments

Comments
 (0)
Please sign in to comment.