-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgit-find-reviewers
executable file
·282 lines (231 loc) · 11 KB
/
git-find-reviewers
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/usr/bin/env python3
"""Find the best reviewer(s) for a given changeset.
This works under both git and mercurial.
1) Runs 'hg diff <argv>' (so by default, diffs all currently edited files).
or 'git diff <argv>'.
2) Analyzes the diff to find out exact lines that have changed.
3) Runs 'hg/git blame' to figure out who last modified those lines.
4) For each file, prints the usernames who last-modified any of the
diffed lines in the file, along with how many of the lines they
modified.
The idea is that if one user has modified all the lines you are
editing, they are a good candidate to review your change.
"""
import os
import re
import sys
import subprocess
# The line we care about in the diff output is
# @@ -<startline>,<numlines> ...
# or @@ -<startline> ... # in which <numlines> is taken to be 1
# (Everything else is header or diff content, which we ignore.)
_DIFFLINE_RE = re.compile('^@@ -(\d+)(?:,(\d+))? ')
_NEWFILE_RE = re.compile('^--- (.*)')
class Mercurial(object):
def __init__(self, ui, repo):
self.ui = ui
self.repo = repo
def write(self, msg):
self.ui.write(msg)
def find_wholefile_lines(self, files, revision='.'):
"""Return a map from abspath -> set-of-all-linumbers in the file."""
ctx = self.repo[revision] # state of repository base revision
m = ctx.match(files, None, None, 'relpath')
all_files = ctx.walk(m)
all_lines = {}
for abspath in all_files:
before_text = ctx.filectx(abspath).data()
num_lines = before_text.count('\n')
all_lines[abspath] = set(range(1, num_lines + 1))
return all_lines
def find_modified_lines(self, files, revision='.'):
"""Return a map from abspath -> set-of-linenumbers changed."""
import mercurial.mdiff
ctx = self.repo[revision] # state of repository base revision
edit_ctx = self.repo[None] # current working state ('.' + local edits)
# Find the files that have modifications.
m = ctx.match(files, None, None, 'relpath')
# Only count files that have been edited from tip.
modified = self.repo.status(ctx, None, match=m)[0]
modified_lines = {}
diffopts = mercurial.mdiff.diffopts(context=0, nodates=True)
for abspath in modified:
before_text = ctx.filectx(abspath).data()
after_text = edit_ctx.filectx(abspath).data()
diff_text = mercurial.mdiff.unidiff(
before_text, None, after_text, None,
abspath, abspath, opts=diffopts)
# Look at the '@@ -<startline>,<numlines> ...' diffline to
# find what lines in the input file were changed.
modified_lines.setdefault(abspath, set())
for line in diff_text.splitlines():
m = _DIFFLINE_RE.match(line)
if m:
startline, n = int(m.group(1)), int(m.group(2) or '1')
modified_lines[abspath].update(range(startline,
startline + n))
return modified_lines
def get_annotation_info(self, abspaths, revision='.'):
"""Return a map abspath -> list-of-author-names.
retval[filename][i] says who wrote the i-th line of the file.
Line numbers start at 1, so retval[filename][0] is always None.
"""
retval = {}
user_to_shortuser = {}
ctx = self.repo[revision] # state of repository base revision
for abspath in abspaths:
retval[abspath] = [None]
anno_lines = ctx[abspath].annotate(follow=True)
for anno_line in anno_lines:
modifier = anno_line[0].user()
if modifier not in user_to_shortuser:
user_to_shortuser[modifier] = self.ui.shortuser(modifier)
retval[abspath].append(user_to_shortuser[modifier])
return retval
class Git(object):
def write(self, msg):
sys.stdout.write(msg)
def find_wholefile_lines(self, files, revision='HEAD'):
"""Return a map from abspath -> set-of-all-linumbers in the file."""
all_lines = {}
for f in files:
before_text = subprocess.check_output(['git', 'show',
'%s:%s' % (revision, f)])
num_lines = before_text.count('\n')
all_lines[os.path.abspath(f)] = set(range(1, num_lines + 1))
return all_lines
def find_modified_lines(self, files, revision='HEAD'):
"""Return a map from abspath -> set-of-linenumbers changed."""
modified_lines = {}
# Only count Deleted and Modified files.
diff_output = subprocess.check_output(
['git', 'diff', '-U0', '--diff-filter=DM',
'--no-ext-diff', '--no-prefix', revision, '--'] + files,
).decode('utf-8')
abspath = None
for line in diff_output.splitlines():
m = _NEWFILE_RE.match(line)
if m:
abspath = os.path.abspath(m.group(1))
modified_lines[abspath] = set()
else:
m = _DIFFLINE_RE.match(line)
if m:
assert abspath, line # filename comes before diff info
startline, n = int(m.group(1)), int(m.group(2) or '1')
modified_lines[abspath].update(range(startline,
startline + n))
return modified_lines
def get_annotation_info(self, abspaths, revision='HEAD'):
"""Return a map abspath -> list-of-author-nqames.
retval[filename][i] says who wrote the i-th line of the file.
Line numbers start at 1, so retval[filename][0] is always None.
"""
retval = {}
author_re = re.compile(r'author-mail <([^>]*)>')
for abspath in abspaths:
retval[abspath] = [None]
blame_output = subprocess.check_output(
['git', 'blame', '-M', '-C', '--line-porcelain',
revision, '--', abspath],
).decode('utf-8')
for line in blame_output.splitlines():
m = author_re.match(line)
if m:
author = m.group(1)
# Just to make the common-case output prettier.
if author.endswith('@khanacademy.org'):
author = author[:-len('@khanacademy.org')]
retval[abspath].append(author)
return retval
def findreviewers(vcs, files, revision=None, num_reviewers=3,
whole_file=False, output_per_file=False, ignore=[]):
"""Find the best reviewer(s) for a given changeset.
Examines the current changes in this file, and runs 'hg blame' or
'git blame' (depending on 'vcs') to find who last edited those
same lines. Collates and returns this information, including how
many lines each person is responsible for.
Arguments:
vcs: either a Mercurial or a Git instance, from this file.
files: a list of filenames to find reviewer information for
revision: what revision to diff against when looking for
reviewers (typically '.' for mercurial or 'HEAD' for git).
num_reviewers: the number of reviewers to suggest for each file.
3 is a reasonable value.
whole_file: if True, return reviewer information for the input
files as a whole, not just for the diff vs 'revision'. This
is useful when you want to know who is 'most responsible' for
a file.
output_per_file: if True, instead of printing the best reviewers
for the set of input files as a whole, prints a separate list
of best reviewers for each file in the input.
ignore: a set/list of revisions to ignore when finding blame info.
TODO(csilvers): implement this.
"""
# revision has to be a kwarg because of how findreviewers() is
# called, but it's actually required.
assert revision, 'revision argument cannot be None!'
if whole_file:
modified_lines = vcs.find_wholefile_lines(files, revision)
else:
modified_lines = vcs.find_modified_lines(files, revision)
annotation_info = vcs.get_annotation_info(modified_lines.keys(), revision)
if output_per_file:
# filename -> {author: num_lines, ...}
num_lines_per_author = {abspath: {} for abspath in modified_lines}
else:
# None -> {author: num_lines, ...}
num_lines_per_author = {None: {}}
for abspath in modified_lines:
for linenum in modified_lines[abspath]:
author = annotation_info[abspath][linenum]
if output_per_file:
num_lines_per_author[abspath].setdefault(author, 0)
num_lines_per_author[abspath][author] += 1
else:
# Just store global info
num_lines_per_author[None].setdefault(author, 0)
num_lines_per_author[None][author] += 1
# Print the information out.
for abspath in sorted(num_lines_per_author):
if abspath:
vcs.write('\n--- %s\n' % abspath)
reviewers = sorted(num_lines_per_author[abspath].items(),
key=lambda a_and_num_lines: a_and_num_lines[1],
reverse=True)
total_lines = sum(num_lines_per_author[abspath].values())
for (reviewer, reviewer_num_lines) in reviewers[:num_reviewers]:
vcs.write('%s: %s lines (%.1f%%)\n'
% (reviewer, reviewer_num_lines,
reviewer_num_lines * 100.0 / total_lines))
# How hg uses this script: via the cmdtable hook.
cmdtable = {
'findreviewers':
(lambda ui, repo, *files, **opts: (
findreviewers(Mercurial(ui, repo), files, **opts)),
[('f', 'output-per-file', None, 'Print results per input file'),
('n', 'num-reviewers', 3, 'How many reviewers to show'),
('w', 'whole-file', None, 'Calculate reviewers based on entire file'),
('i', 'ignore', [], 'TODO: Revisions to ignore when annotating'),
('r', 'revision', '.', 'Revision to use as base'),
],
'[-f] [-n #] [-w] [-i <commit_id> ...] [FILE...]')
}
# How git uses this script: via __main__
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('files', nargs='*')
for (shortname, longname, default, help) in cmdtable['findreviewers'][1]:
if default is None:
parser.add_argument('--%s' % longname, '-%s' % shortname,
help=help, default=default,
action='store_true')
else:
parser.add_argument('--%s' % longname, '-%s' % shortname,
help=help, default=default,
type=type(default))
# The one place we differ from the mercurial defaults.
parser.set_defaults(revision='HEAD')
args = parser.parse_args()
findreviewers(Git(), **args.__dict__)